Alvis-Convert
view release on metacpan or search on metacpan
t/test-data/to-split/29.xml view on Meta::CPAN
<meta name="dc:type">text/html</meta>
</metaData>
<links>
<outlinks>
<link type="a">
<anchorText>Alexander</anchorText>
<location>http://www.mobileread.com</location>
</link>
</outlinks>
</links>
</acquisition>
<linguisticAnalysis>
<semantic_unit_level>
<semantic_unit><named_entity><form>Google Inc</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google France</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
</semantic_unit_level>
</linguisticAnalysis>
</documentRecord>
<documentRecord id="F4CB2ADCF31D43697F2ED6B9412A4E8D" xmlns="http://alvis.info/enriched/">
<acquisition>
<acquisitionData>
<modifiedDate>1145958751520</modifiedDate>
<httpServer>Apache/2.0</httpServer>
<urls>
<url>http://google.weblogsinc.com/2006/04/22/google-stock-up-8/</url>
</urls>
</acquisitionData>
<canonicalDocument>
<section>Google's stock rose 6% in after hours trading on Friday. According to the NY Times, Google's profit rose 60% in the first quarter. Google has responded by saying that the market share increase might be in relation to increased uses o...
<metaData>
<meta name="title">Google Stock up 8%</meta>
<meta name="dc:type">text/html</meta>
</metaData>
<links>
<outlinks>
<link type="a">
<anchorText>stock price</anchorText>
<location>http://finance.google.com/finance?q=goog&btnG=Search</location>
</link>
</outlinks>
</links>
</acquisition>
<linguisticAnalysis>
<semantic_unit_level>
<semantic_unit><named_entity><form>Google</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google Earth</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google Maps</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google Search</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google Video</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
</semantic_unit_level>
</linguisticAnalysis>
</documentRecord>
<documentRecord id="7F0D97BDACC9D73DA79364ADF93A9080" xmlns="http://alvis.info/enriched/">
<acquisition>
<acquisitionData>
<modifiedDate>1144768340466</modifiedDate>
<httpServer>Apache/1.3.28 (Unix) mod_gzip/1.3.26.1a PHP/4.3.10 mod_ssl/2.8.15 OpenSSL/0.9.7c</httpServer>
<urls>
<url>http://www.seroundtable.com/archives/003639.html</url>
</urls>
</acquisitionData>
<canonicalDocument>
<section>There is a DigitalPoint Forum thread named that discusses a neat PageRank tool at http://www.webmastereyes.com/. The PageRank tool is different from others, in that it will enable you to plug in a URL and it will then place graphical...
<metaData>
<meta name="title">New Google PageRank Tool Plots PR Values Overlays On Page</meta>
<meta name="dc:date">Tue, 11 Apr 2006 12:40:49 GMT</meta>
<meta name="dc:type">text/html</meta>
</metaData>
<links>
<outlinks>
<link type="a">
<anchorText>http://www.webmastereyes.com/</anchorText>
<location>http://www.webmastereyes.com/</location>
</link>
<link type="a">
<anchorText>thread</anchorText>
<location>http://forums.digitalpoint.com/showthread.php?t=74054</location>
</link>
<link type="a">
<anchorText>DigitalPoint Forums</anchorText>
<location>http://forums.digitalpoint.com/showthread.php?t=74054</location>
</link>
</outlinks>
</links>
</acquisition>
<linguisticAnalysis>
<semantic_unit_level>
<semantic_unit><named_entity><form>Google</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Digital</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google PageRank</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
</semantic_unit_level>
</linguisticAnalysis>
</documentRecord>
<documentRecord id="E25E5DBF90E6C6A3CDF200F61F6A20E6" xmlns="http://alvis.info/enriched/">
<acquisition>
<acquisitionData>
<modifiedDate>1150315246240</modifiedDate>
<httpServer>Apache/1.3.36 (Unix) mod_fastcgi/2.4.2 mod_auth_passthrough/1.8 mod_log_bytes/1.2 mod_bwlimited/1.4 PHP/4.4.2 FrontPage/5.0.2.2635.SR1.2 mod_ssl/2.8.27 OpenSSL/0.9.7a</httpServer>
<urls>
<url>http://www.searchenginejournal.com/?p=3530</url>
</urls>
</acquisitionData>
<canonicalDocument>
<section>RSS - Things That Make You Go Hmmm Why doesnât the new Yahoo Spark Blog publish an RSS feed? Of any kind? Not even an âadd to my Yahooâ button? Why canât I subscribe to the Technorati Hot Tags widget thatâs (supposedly) upd...
<metaData>
<meta name="title">RSS - Things That Make You Go Hmmm</meta>
<meta name="dc:type">text/html; charset=utf-8</meta>
</metaData>
<links>
<outlinks>
<link type="a">
<anchorText>Technorati Hot Tags</anchorText>
<location>http://www.technorati.com/tags/</location>
</link>
( run in 0.557 second using v1.01-cache-2.11-cpan-39bf76dae61 )