Alvis-Convert

 view release on metacpan or  search on metacpan

lib/Alvis/HTML.pm  view on Meta::CPAN

	     $ERR_TARGET_CONV=>"Converting to the output encoding failed.",
	     $ERR_SRC_NOT_IN_UTF8=>"The source is not in UTF-8.",
	     $ERR_GUESS_ENC_UTF8_CONV=>"Guessing the source encoding and " .
	     "then converting it to UTF-8 failed."
	     );

sub _set_err_state
{
    my $self=shift;
    my $errcode=shift;
    my $errmsg=shift;

    if (!defined($errcode))
    {
        confess("set_err_state() called with an undefined argument.");
    }

    if (exists($ErrMsgs{$errcode}))
    {
        if ($errcode==$ERR_OK)
        {
            $self->{errstr}="";
        }
        else
        {
            $self->{errstr}.=" " . $ErrMsgs{$errcode};
            if (defined($errmsg))
            {
                $self->{errstr}.=" " . $errmsg;
            }
        }
    }
    else
    {
        confess("Internal error: set_err_state() called with an " .
                "unrecognized argument ($errcode).")
    }
}

sub errmsg
{
    my $self=shift;

    return $self->{errstr};
}

sub clearerr
{
    my $self=shift;

    $self->{errstr}="";
}

sub new
{
    my $proto=shift;

    my $class=ref($proto)||$proto;
    my $parent=ref($proto)&&$proto;
    my $self={};
    bless($self,$class);

    $self->_init(@_);

    $self->{encodingWiz}=Alvis::Document::Encoding->new();
    if (!defined($self->{encodingWiz}))
    {
	$self->_set_err_state($ERR_ENC);
	return undef;
    }

    $self->_set_err_state($ERR_OK);

    return $self;
}

sub _init
{
    my $self=shift;

    $self->{assertHTML}=$DEF_ASSERT_HTML;
    $self->{keepAll}=$DEF_KEEP_ALL;
    $self->{assertSourceAssumptions}=$DEF_SRC_ASS;;
    $self->{convertCharEnts}=$DEF_CONVERT_CHAR_ENTS;
    $self->{convertNumEnts}=$DEF_CONVERT_NUM_ENTS;
    $self->{cleanWhitespace}=$DEF_CLEAN_WS;
    $self->{sourceEncoding}=$DEF_SRC_ENCODING;

    if (defined(@_))
    {
        my %args=@_;
        @$self{ keys %args }=values(%args);
    }
}

#############################################################################
#
#      Public methods
#
##############################################################################
 
#
# Returns (<contents as text>,<header hash ref>)
#
sub clean
{
    my $self=shift;
    my $html=shift;
    my $opts=shift;    # if a title/base URL is wished for as well
                       # returned in a header hash with keys
                       # title, baseURL

    my %header=(title=>undef,
		baseURL=>undef);

    $self->_set_err_state($ERR_OK);  # clean the slate

    # Make it utf-8 if not already
    my $src_enc;
    if ($opts->{sourceEncoding})
    {



( run in 0.548 second using v1.01-cache-2.11-cpan-39bf76dae61 )