Alvis-Convert
view release on metacpan or search on metacpan
lib/Alvis/Canonical.pm view on Meta::CPAN
my $html=shift;
my $opts=shift; # if a title/base URL is wished for as well, they are
# returned in a header hash with the same keys
#
# title
# baseURL
# sourceEncoding
$self->_set_err_state($ERR_OK); # clean the slate
my ($title,$baseURL,$src_enc);
$title=$opts->{title} if exists($opts->{title} );
$baseURL=$opts->{title} if exists($opts->{baseURL} );
$src_enc=$self->{sourceEncoding};
$src_enc=$opts->{sourceEncoding} if exists($opts->{sourceEncoding} );
my ($contents,$header)=
$self->{htmlConverter}->clean($html,
{title=>$title,
baseURL=>$baseURL,
sourceEncoding=>$src_enc});
if (!defined($contents))
{
$self->_set_err_state($ERR_HTML_CONV,"In HTML converter: " .
$self->{htmlConverter}->errmsg());
return (undef,$header)
}
if ($DEBUG)
{
open(F,">candoc.cleaned");
print F $contents;
close(F);
}
# To safeguard the element contents with regard to XML
$contents=$self->_make_txt_XML_safe($contents);
# Here goes
my $can_doc=$self->_contents2canDoc($contents,$header,$src_enc);
if (!defined($can_doc))
{
$self->_set_err_state($ERR_CONT2CAN_DOC);
return (undef,$header);
}
return ($can_doc,$header);
}
#########################################################################
#
# Private methods
#
######################################################################
sub _contents2canDoc
{
my $self=shift;
my $contents=shift; # contains relevant HTML markup
my $header=shift; # will be updated with information like links
my $source_encoding=shift;
my $can_doc;
if ($DEBUG)
{
open(F,">candoc.cleanNXMLSafe");
print F $contents;
close(F);
}
# Convert in order of importance to the structure
$can_doc=$self->_handle_sections($contents,$source_encoding);
if ($DEBUG)
{
my $can_doc2=$self->_to_alvis($can_doc);
$can_doc2=$self->_pretty_print($can_doc2);
open(F,">candoc.aftersections");
print F $can_doc2;
close(F);
}
$can_doc=$self->_handle_lists($can_doc);
if ($DEBUG)
{
my $can_doc2=$self->_to_alvis($can_doc);
$can_doc2=$self->_pretty_print($can_doc2);
open(F,">candoc.afterlists");
print F $can_doc2;
close(F);
}
$can_doc=$self->_handle_links($can_doc,$header);
if ($DEBUG)
{
my $can_doc2=$self->_to_alvis($can_doc);
$can_doc2=$self->_pretty_print($can_doc2);
open(F,">candoc.afterlinks");
print F $can_doc2;
close(F);
}
# OK, time to put some make-up on and go out
$can_doc=$self->_to_alvis($can_doc);
if ($DEBUG)
{
my $can_doc2=$self->_pretty_print($can_doc);
open(F,">candoc.aftertoalvis");
print F $can_doc2;
close(F);
}
$can_doc=$self->_pretty_print($can_doc);
if ($DEBUG)
{
open(F,">candoc.afterprettyprint");
print F $can_doc;
close(F);
}
return $can_doc;
}
( run in 1.348 second using v1.01-cache-2.11-cpan-140bd7fdf52 )