Alvis-Convert
view release on metacpan or search on metacpan
lib/Alvis/Canonical.pm view on Meta::CPAN
$ERR_CONT2CAN_DOC=>"Converting the HTML's contents failed",
$ERR_NO_HTML_CLEAN=>"Unable to instantiate the HTML cleaner",
$ERR_MISFORMED_REL_URL=>"Misformed relative URL",
$ERR_REL_URL_VS_BASE_MISMATCH=>"Cannot match a relative URL " .
"and the URL base"
);
sub _set_err_state
{
my $self=shift;
my $errcode=shift;
my $errmsg=shift;
if (!defined($errcode))
{
confess("set_err_state() called with an undefined argument.");
}
if (exists($ErrMsgs{$errcode}))
{
if ($errcode==$ERR_OK)
{
$self->{errstr}="";
}
else
{
$self->{errstr}.=" " . $ErrMsgs{$errcode};
if (defined($errmsg))
{
$self->{errstr}.=" " . $errmsg;
}
}
}
else
{
confess("Internal error: set_err_state() called with an " .
"unrecognized argument ($errcode).")
}
}
sub errmsg
{
my $self=shift;
return $self->{errstr};
}
#############################################################################
#
# Methods
#
##############################################################################
sub new
{
my $proto=shift;
my $class=ref($proto)||$proto;
my $parent=ref($proto)&&$proto;
my $self={};
bless($self,$class);
$self->_init(@_);
$self->_set_err_state($ERR_OK);
# Removes uninteresting HTML tags, fixes the interesting tags and
# converts natural language relevant <=#255 character entities to
# characters and UTF-8 numerical entities to characters if wanted
$self->{htmlConverter}=
Alvis::HTML->new(alvisKeep=>0,
alvisRemove=>1,
obsolete=>1,
proprietary=>1,
xhtml=>1,
wml=>1,
keepAll=>1,
assertHTML=>0,
convertCharEnts=>$self->{convertCharEnts},
convertNumEnts=>$self->{convertNumEnts},
sourceEncoding=>$self->{sourceEncoding}
);
if (!defined($self->{htmlConverter}))
{
$self->_set_err_state($ERR_NO_HTML_CONV,
$self->{htmlConverter}->errmsg());
return undef;
}
#
# Used for removing all HTML tags from parts of the document
# that don't allow any (like section titles)
#
$self->{htmlTagCleaner}=
Alvis::HTML->new(alvisKeep=>1,
alvisRemove=>1,
obsolete=>1,
proprietary=>1,
xhtml=>1,
wml=>1,
keepAll=>1,
assertHTML=>0,
convertCharEnts=>$self->{convertCharEnts},
convertNumEnts=>$self->{convertNumEnts},
sourceEncoding=>$self->{sourceEncoding}
);
if (!defined($self->{htmlTagCleaner}))
{
$self->_set_err_state($ERR_NO_HTML_CLEAN,
$self->{htmlTagCleaner}->errmsg());
return undef;
}
return $self;
}
sub _init
{
my $self=shift;
( run in 1.445 second using v1.01-cache-2.11-cpan-39bf76dae61 )