Alvis-Convert
view release on metacpan or search on metacpan
lib/Alvis/Convert.pm view on Meta::CPAN
$self->{canonicalConverter}->HTML($html,
{title=>1,
baseURL=>1,
sourceEncoding=>$src_enc});
if (!defined($can_doc))
{
$self->_set_err_state($ERR_CANDOC_CONV,
$self->{canonicalConverter}->errmsg());
return undef;
}
if (!defined($meta->get('title')))
{
$meta->set('title',$header->{title});
}
if (!defined($meta->get('url')))
{
$self->_set_err_state($ERR_NO_URL);
return undef;
}
else
{
if (!defined($meta->get('baseURL')))
{
if (defined($header->{baseURL}))
{
$meta->set('baseURL',$header->{baseURL});
}
else
{
my $base_URL=$meta->get('url');
$base_URL=~s/\/[^\/]+?$/\//isgo;
$meta->set('baseURL',$base_URL);
}
}
}
my $links=Alvis::Document::Links->new();
if (!defined($links))
{
$self->_set_err_state($ERR_LINKS);
return undef;
}
for my $link (@{$header->{links}})
{
my ($url,$text,$type);
if (exists($link->{url}))
{
$url=$link->{url};
}
if (exists($link->{text}))
{
$text=$link->{text};
}
if (exists($link->{type}))
{
if ($link->{type}=~/^\s*a\s*$/isgo)
{
$type='a';
}
elsif ($link->{type}=~/^\s*i?frame\s*$/isgo)
{
$type='frame';
}
elsif ($link->{type}=~/^\s*img\s*$/isgo)
{
$type='img';
}
}
if (!$links->add($url,$text,$type))
{
$self->_set_err_state($ERR_LINK_ADD,
$links->errmsg());
return undef;
}
}
my $alvisXML=
$self->{documentAssembler}->assemble({canDoc=>$can_doc,
links=>$links,
meta=>$meta,
origText=>$html});
if (!defined($alvisXML))
{
$self->_set_err_state($ERR_ASSEMBLE,
$self->{documentAssembler}->errmsg());
return undef;
}
return $alvisXML;
}
sub newsXML
{
my $self=shift;
my $newsXML=shift;
my $meta_txt=shift;
my $orig_txt=shift;
$self->_set_err_state($ERR_OK);
my $meta=Alvis::Document::Meta->new(text=>$meta_txt);
if (!defined($meta))
{
$self->_set_err_state($ERR_META,
"Meta text:\"$meta_txt\".");
return undef;
}
my @alvisXMLs=();
my $articles=$self->_parse_newsXML($newsXML);
if (!defined($articles))
{
$self->_set_err_state($ERR_NEWS_XML_PARSE);
return undef;
}
for my $article (@$articles)
{
my ($text,$iso_date,$title,$links)=@$article;
if (!defined($text))
{
( run in 1.345 second using v1.01-cache-2.11-cpan-e1769b4cff6 )