Alvis-Convert

 view release on metacpan or  search on metacpan

lib/Alvis/Canonical.pm  view on Meta::CPAN

	     $ERR_CONT2CAN_DOC=>"Converting the HTML's contents failed",
	     $ERR_NO_HTML_CLEAN=>"Unable to instantiate the HTML cleaner",
	     $ERR_MISFORMED_REL_URL=>"Misformed relative URL",
	     $ERR_REL_URL_VS_BASE_MISMATCH=>"Cannot match a relative URL " .
	     "and the URL base"
   );

sub _set_err_state
{
    my $self=shift;
    my $errcode=shift;
    my $errmsg=shift;

    if (!defined($errcode))
    {
        confess("set_err_state() called with an undefined argument.");
    }

    if (exists($ErrMsgs{$errcode}))
    {
        if ($errcode==$ERR_OK)
        {
            $self->{errstr}="";
        }
        else
        {
            $self->{errstr}.=" " . $ErrMsgs{$errcode};
            if (defined($errmsg))
            {
                $self->{errstr}.=" " . $errmsg;
            }
        }
    }
    else
    {
        confess("Internal error: set_err_state() called with an " .
                "unrecognized argument ($errcode).")
    }
}

sub errmsg
{
    my $self=shift;

    return $self->{errstr};
}

#############################################################################
#
#      Methods
#
##############################################################################
 
sub new
{
    my $proto=shift;

    my $class=ref($proto)||$proto;
    my $parent=ref($proto)&&$proto;
    my $self={};
    bless($self,$class);


    $self->_init(@_);

    $self->_set_err_state($ERR_OK);

    # Removes uninteresting HTML tags, fixes the interesting tags and
    # converts natural language relevant <=#255 character entities to
    # characters and UTF-8 numerical entities to characters if wanted
    $self->{htmlConverter}=
	Alvis::HTML->new(alvisKeep=>0,
			 alvisRemove=>1,
			 obsolete=>1,
			 proprietary=>1,
			 xhtml=>1,
			 wml=>1,
			 keepAll=>1,
			 assertHTML=>0,
			 convertCharEnts=>$self->{convertCharEnts},
 			 convertNumEnts=>$self->{convertNumEnts},
			 sourceEncoding=>$self->{sourceEncoding}
			 ); 
    if (!defined($self->{htmlConverter}))
    {
	$self->_set_err_state($ERR_NO_HTML_CONV,
			      $self->{htmlConverter}->errmsg());
	return undef;
    }

    #
    # Used for removing all HTML tags from parts of the document
    # that don't allow any (like section titles)
    #
    $self->{htmlTagCleaner}=
	Alvis::HTML->new(alvisKeep=>1,
			 alvisRemove=>1,
			 obsolete=>1,
			 proprietary=>1,
			 xhtml=>1,
			 wml=>1,
			 keepAll=>1,
			 assertHTML=>0,
			 convertCharEnts=>$self->{convertCharEnts},
			 convertNumEnts=>$self->{convertNumEnts},
			 sourceEncoding=>$self->{sourceEncoding}
			 );
    if (!defined($self->{htmlTagCleaner}))
    {
	$self->_set_err_state($ERR_NO_HTML_CLEAN,
			      $self->{htmlTagCleaner}->errmsg());
	return undef;
    }

    return $self;
}

sub _init
{
    my $self=shift;



( run in 1.445 second using v1.01-cache-2.11-cpan-39bf76dae61 )