Alvis-Convert

 view release on metacpan or  search on metacpan

t/test-data/to-split/29.xml  view on Meta::CPAN

        <meta name="dc:type">text/html</meta>
      </metaData>
      <links>
        <outlinks>
          <link type="a">
            <anchorText>Alexander</anchorText>
            <location>http://www.mobileread.com</location>
          </link>
        </outlinks>
      </links>
    </acquisition>
  <linguisticAnalysis>
    <semantic_unit_level>
      <semantic_unit><named_entity><form>Google Inc</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google France</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
    </semantic_unit_level>
  </linguisticAnalysis>

  </documentRecord>
<documentRecord id="F4CB2ADCF31D43697F2ED6B9412A4E8D" xmlns="http://alvis.info/enriched/">
    <acquisition>
      <acquisitionData>
        <modifiedDate>1145958751520</modifiedDate>
        <httpServer>Apache/2.0</httpServer>
        <urls>
          <url>http://google.weblogsinc.com/2006/04/22/google-stock-up-8/</url>
        </urls>
      </acquisitionData>
      <canonicalDocument>        
        <section>Google's stock rose 6% in after hours trading on Friday. According to the NY Times, Google's profit rose 60% in the first quarter. Google has responded by saying that the market share increase might be in relation to increased uses o...
      <metaData>
        <meta name="title">Google Stock up 8%</meta>
        <meta name="dc:type">text/html</meta>
      </metaData>
      <links>
        <outlinks>
          <link type="a">
            <anchorText>stock price</anchorText>
            <location>http://finance.google.com/finance?q=goog&amp;btnG=Search</location>
          </link>
        </outlinks>
      </links>
    </acquisition>
  <linguisticAnalysis>
    <semantic_unit_level>
      <semantic_unit><named_entity><form>Google</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google Earth</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google Maps</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google Search</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google Video</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
    </semantic_unit_level>
  </linguisticAnalysis>

  </documentRecord>
<documentRecord id="7F0D97BDACC9D73DA79364ADF93A9080" xmlns="http://alvis.info/enriched/">
    <acquisition>
      <acquisitionData>
        <modifiedDate>1144768340466</modifiedDate>
        <httpServer>Apache/1.3.28 (Unix) mod_gzip/1.3.26.1a PHP/4.3.10 mod_ssl/2.8.15 OpenSSL/0.9.7c</httpServer>
        <urls>
          <url>http://www.seroundtable.com/archives/003639.html</url>
        </urls>
      </acquisitionData>
      <canonicalDocument>        
        <section>There is a DigitalPoint Forum thread named that discusses a neat PageRank tool at http://www.webmastereyes.com/. The PageRank tool is different from others, in that it will enable you to plug in a URL and it will then place graphical...
      <metaData>
        <meta name="title">New Google PageRank Tool Plots PR Values Overlays On Page</meta>
        <meta name="dc:date">Tue, 11 Apr 2006 12:40:49 GMT</meta>
        <meta name="dc:type">text/html</meta>
      </metaData>
      <links>
        <outlinks>
          <link type="a">
            <anchorText>http://www.webmastereyes.com/</anchorText>
            <location>http://www.webmastereyes.com/</location>
          </link>
          <link type="a">
            <anchorText>thread</anchorText>
            <location>http://forums.digitalpoint.com/showthread.php?t=74054</location>
          </link>
          <link type="a">
            <anchorText>DigitalPoint Forums</anchorText>
            <location>http://forums.digitalpoint.com/showthread.php?t=74054</location>
          </link>
        </outlinks>
      </links>
    </acquisition>
  <linguisticAnalysis>
    <semantic_unit_level>
      <semantic_unit><named_entity><form>Google</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Digital</form><named_entity_type>comp</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
      <semantic_unit><named_entity><form>Google PageRank</form><named_entity_type>soft</named_entity_type></named_entity></semantic_unit>
    </semantic_unit_level>
  </linguisticAnalysis>

  </documentRecord>
<documentRecord id="E25E5DBF90E6C6A3CDF200F61F6A20E6" xmlns="http://alvis.info/enriched/">
    <acquisition>
      <acquisitionData>
        <modifiedDate>1150315246240</modifiedDate>
        <httpServer>Apache/1.3.36 (Unix) mod_fastcgi/2.4.2 mod_auth_passthrough/1.8 mod_log_bytes/1.2 mod_bwlimited/1.4 PHP/4.4.2 FrontPage/5.0.2.2635.SR1.2 mod_ssl/2.8.27 OpenSSL/0.9.7a</httpServer>
        <urls>
          <url>http://www.searchenginejournal.com/?p=3530</url>
        </urls>
      </acquisitionData>
      <canonicalDocument>        
        <section>RSS - Things That Make You Go Hmmm Why doesn’t the new Yahoo Spark Blog publish an RSS feed? Of any kind? Not even an “add to my Yahoo” button? Why can’t I subscribe to the Technorati Hot Tags widget that’s (supposedly) upd...
      <metaData>
        <meta name="title">RSS - Things That Make You Go Hmmm</meta>
        <meta name="dc:type">text/html; charset=utf-8</meta>
      </metaData>
      <links>
        <outlinks>
          <link type="a">
            <anchorText>Technorati Hot Tags</anchorText>
            <location>http://www.technorati.com/tags/</location>
          </link>



( run in 0.557 second using v1.01-cache-2.11-cpan-39bf76dae61 )