Alvis-Convert
view release on metacpan or search on metacpan
t/test-data/to-split/29.xml view on Meta::CPAN
<link type="a">
<anchorText>Yahoo News</anchorText>
<location>http://news.yahoo.com/</location>
</link>
<link type="a">
<anchorText>stock quote service</anchorText>
<location>http://www.google.com/help/features.html#stock</location>
</link>
<link type="a">
<anchorText>Google Blog
Search</anchorText>
<location>http://blogsearch.google.com/</location>
</link>
<link type="a">
<anchorText>Yahoo Finance</anchorText>
<location>http://finance.yahoo.com/</location>
</link>
<link type="a">
<anchorText>Google News</anchorText>
<location>http://news.google.com/</location>
</link>
</outlinks>
</links>
</acquisition>
<linguisticAnalysis>
<semantic_unit_level>
<semantic_unit><named_entity><form>Chris Sherman</form><named_entity_type>person</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Yahoo</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>NetRatings</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Moreover</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>MSN</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Blogger</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google Finance</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google Toolbar</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Gmail</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>MSN</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Yahoo News</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Yahoo Finance</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google Blog</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>MSN MoneyCentral</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Flash</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>way Google</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google Blog Search</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google News</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
</semantic_unit_level>
</linguisticAnalysis>
</documentRecord>
<documentRecord id="48FFC0A03C2756C583F6D80C9E527393" xmlns="http://alvis.info/enriched/">
<acquisition>
<acquisitionData>
<modifiedDate>1142422246164</modifiedDate>
<httpServer>Apache/1.3.33 (Unix)</httpServer>
<urls>
<url>http://blog.outer-court.com/archive/2006-03-15-n42.html</url>
</urls>
</acquisitionData>
<canonicalDocument>
<section>Google releases their desktop search tool in an updated version today. Among some bugfixes, thereâs a new Quick Search box. Hit Ctrl twice to make it appear in the middle of your desktop, and then search for anything â your compu...
<metaData>
<meta name="title">Google Desktop's Quick Search Box</meta>
<meta name="dc:date">Wed, 15 Mar 2006 11:20:57 GMT</meta>
<meta name="dc:type">text/html</meta>
</metaData>
<links>
<outlinks>
<link type="a">
<anchorText>Quick Search box</anchorText>
<location>http://desktop.google.com/features.html#quicksearch</location>
</link>
</outlinks>
</links>
</acquisition>
<linguisticAnalysis>
<semantic_unit_level>
<semantic_unit><named_entity><form>Brin</form><named_entity_type>person</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google Desktop</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
</semantic_unit_level>
</linguisticAnalysis>
</documentRecord>
<documentRecord id="18C9FD35812DFC4D4CCF0FD6AC1646BC" xmlns="http://alvis.info/enriched/">
<acquisition>
<acquisitionData>
<modifiedDate>1149133052555</modifiedDate>
<httpServer>Apache/1.3.33 (Unix)</httpServer>
<urls>
<url>http://blog.outer-court.com/archive/2006-05-30-n12.html</url>
</urls>
</acquisitionData>
<canonicalDocument>
<section>Some bloggers are complaining that Google didnât have a Memorial day logo yesterday. Memorial Day âcommemorates U.S. men and women who have died in military service,âWikipedia explains. From a comment at Newsbusters by Warner T...
<metaData>
<meta name="title">Complaints Due to Lack of Google Memorial Day Logo</meta>
<meta name="dc:date">Thu, 01 Jun 2006 02:44:56 GMT</meta>
<meta name="dc:type">text/html</meta>
</metaData>
<links>
<outlinks>
<link type="a">
<anchorText>itâs good the way it is</anchorText>
<location>http://blog.lewrockwell.com/lewrw/archives/010666.html</location>
</link>
<link type="a">
<anchorText>Wikipedia</anchorText>
<location>http://en.wikipedia.org/wiki/Memorial_Day</location>
</link>
<link type="a">
<anchorText>a comment at Newsbusters</anchorText>
<location>http://newsbusters.org/node/5580</location>
</link>
<link type="a">
<anchorText>Some bloggers</anchorText>
<location>http://technorati.com/search/google%20memorial</location>
</link>
</outlinks>
</links>
</acquisition>
<linguisticAnalysis>
<semantic_unit_level>
<semantic_unit><named_entity><form>Google</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Wikipedia</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
<semantic_unit><named_entity><form>Google</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
</semantic_unit_level>
</linguisticAnalysis>
</documentRecord>
( run in 0.670 second using v1.01-cache-2.11-cpan-39bf76dae61 )