Alvis-Convert

 view release on metacpan or  search on metacpan

t/test-data/original/3/189-2.xml  view on Meta::CPAN

<documentRecord id="706D87D704893B9A3B40C28AC3620991" xmlns="http://alvis.info/enriched/">
    <acquisition>
      <acquisitionData>
        <modifiedDate>1141065628996</modifiedDate>
        <httpServer>Apache</httpServer>
        <urls>
          <url>http://blog.searchenginewatch.com/blog/0505</url>
        </urls>
      </acquisitionData>
      <canonicalDocument>        
        <section>If you've been wanting an option to keyword search your Gmail using Google's desktop search tool, your wait is over. A 105K plug-in was made available late last week on the GDS site called, "Larry's Gmail Indexer" (beta). It uses GMa...
      <metaData>
        <meta name="title">Search Your Gmail with Google Desktop Search</meta>
        <meta name="dc:date">2005-05-01</meta>
        <meta name="dc:type">text/html</meta>
      </metaData>
      <links>
        <outlinks>
          <link type="a">
            <anchorText>desktop search</anchorText>
            <location>http://desktop.google.com</location>
          </link>
          <link type="a">
            <anchorText>"Larry's Gmail Indexer"</anchorText>
            <location>http://desktop.google.com/plugins/gmail.html</location>
          </link>
          <link type="a">
            <anchorText>on his site</anchorText>
            <location>http://www.trivex.net/</location>
          </link>
        </outlinks>
      </links>
    </acquisition>
  </documentRecord>
</documentCollection>

t/test-data/to-split/29.xml  view on Meta::CPAN

<documentRecord id="48FFC0A03C2756C583F6D80C9E527393" xmlns="http://alvis.info/enriched/">
    <acquisition>
      <acquisitionData>
        <modifiedDate>1142422246164</modifiedDate>
        <httpServer>Apache/1.3.33 (Unix)</httpServer>
        <urls>
          <url>http://blog.outer-court.com/archive/2006-03-15-n42.html</url>
        </urls>
      </acquisitionData>
      <canonicalDocument>        
        <section>Google releases their desktop search tool in an updated version today. Among some bugfixes, there’s a new Quick Search box. Hit Ctrl twice to make it appear in the middle of your desktop, and then search for anything – your compu...
      <metaData>
        <meta name="title">Google Desktop's Quick Search Box</meta>
        <meta name="dc:date">Wed, 15 Mar 2006 11:20:57 GMT</meta>
        <meta name="dc:type">text/html</meta>
      </metaData>
      <links>
        <outlinks>
          <link type="a">
            <anchorText>Quick Search box</anchorText>
            <location>http://desktop.google.com/features.html#quicksearch</location>
          </link>
        </outlinks>
      </links>
    </acquisition>
  <linguisticAnalysis>
    <semantic_unit_level>
      <semantic_unit><named_entity><form>Brin</form><named_entity_type>person</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google Desktop</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>



( run in 0.764 second using v1.01-cache-2.11-cpan-299005ec8e3 )