Alvis-Convert
view release on metacpan or search on metacpan
lib/Alvis/Canonical.pm view on Meta::CPAN
#########################################################################
#
# Private methods
#
######################################################################
sub _contents2canDoc
{
my $self=shift;
my $contents=shift; # contains relevant HTML markup
my $header=shift; # will be updated with information like links
my $source_encoding=shift;
my $can_doc;
if ($DEBUG)
{
open(F,">candoc.cleanNXMLSafe");
print F $contents;
close(F);
}
t/test-data/to-split/29.xml view on Meta::CPAN
<documentRecord id="48FFC0A03C2756C583F6D80C9E527393" xmlns="http://alvis.info/enriched/">
<acquisition>
<acquisitionData>
<modifiedDate>1142422246164</modifiedDate>
<httpServer>Apache/1.3.33 (Unix)</httpServer>
<urls>
<url>http://blog.outer-court.com/archive/2006-03-15-n42.html</url>
</urls>
</acquisitionData>
<canonicalDocument>
<section>Google releases their desktop search tool in an updated version today. Among some bugfixes, thereâs a new Quick Search box. Hit Ctrl twice to make it appear in the middle of your desktop, and then search for anything â your compu...
<metaData>
<meta name="title">Google Desktop's Quick Search Box</meta>
<meta name="dc:date">Wed, 15 Mar 2006 11:20:57 GMT</meta>
<meta name="dc:type">text/html</meta>
</metaData>
<links>
<outlinks>
<link type="a">
<anchorText>Quick Search box</anchorText>
<location>http://desktop.google.com/features.html#quicksearch</location>
t/test-data/to-split/29.xml view on Meta::CPAN
<documentRecord id="E25E5DBF90E6C6A3CDF200F61F6A20E6" xmlns="http://alvis.info/enriched/">
<acquisition>
<acquisitionData>
<modifiedDate>1150315246240</modifiedDate>
<httpServer>Apache/1.3.36 (Unix) mod_fastcgi/2.4.2 mod_auth_passthrough/1.8 mod_log_bytes/1.2 mod_bwlimited/1.4 PHP/4.4.2 FrontPage/5.0.2.2635.SR1.2 mod_ssl/2.8.27 OpenSSL/0.9.7a</httpServer>
<urls>
<url>http://www.searchenginejournal.com/?p=3530</url>
</urls>
</acquisitionData>
<canonicalDocument>
<section>RSS - Things That Make You Go Hmmm Why doesnât the new Yahoo Spark Blog publish an RSS feed? Of any kind? Not even an âadd to my Yahooâ button? Why canât I subscribe to the Technorati Hot Tags widget thatâs (supposedly) upd...
<metaData>
<meta name="title">RSS - Things That Make You Go Hmmm</meta>
<meta name="dc:type">text/html; charset=utf-8</meta>
</metaData>
<links>
<outlinks>
<link type="a">
<anchorText>Technorati Hot Tags</anchorText>
<location>http://www.technorati.com/tags/</location>
</link>
( run in 0.251 second using v1.01-cache-2.11-cpan-05444aca049 )