Alvis-Convert
view release on metacpan or search on metacpan
lib/Alvis/Document/Encoding.pm view on Meta::CPAN
57095=>1, 57096=>1, 57097=>1, 57098=>1, 57099=>1, 57100=>1, 57101=>1,
57102=>1, 57103=>1, 57104=>1, 57105=>1, 57106=>1, 57107=>1, 57108=>1,
57109=>1, 57110=>1, 57111=>1, 57112=>1, 57113=>1, 57114=>1, 57115=>1,
57116=>1, 57117=>1, 57118=>1, 57119=>1, 57120=>1, 57121=>1, 57122=>1,
57123=>1, 57124=>1, 57125=>1, 57126=>1, 57127=>1, 57128=>1, 57129=>1,
57130=>1, 57131=>1, 57132=>1, 57133=>1, 57134=>1, 57135=>1, 57136=>1,
57137=>1, 57138=>1, 57139=>1, 57140=>1, 57141=>1, 57142=>1, 57143=>1,
57144=>1, 57145=>1, 57146=>1, 57147=>1, 57148=>1, 57149=>1, 57150=>1,
57151=>1, 57152=>1, 57153=>1, 57154=>1, 57155=>1, 57156=>1, 57157=>1,
57158=>1, 57159=>1, 57160=>1, 57161=>1, 57162=>1, 57163=>1, 57164=>1,
57165=>1, 57166=>1, 57167=>1, 57168=>1, 57169=>1, 57170=>1, 57171=>1,
57172=>1, 57173=>1, 57174=>1, 57175=>1, 57176=>1, 57177=>1, 57178=>1,
57179=>1, 57180=>1, 57181=>1, 57182=>1, 57183=>1, 57184=>1, 57185=>1,
57186=>1, 57187=>1, 57188=>1, 57189=>1, 57190=>1, 57191=>1, 57192=>1,
57193=>1, 57194=>1, 57195=>1, 57196=>1, 57197=>1, 57198=>1, 57199=>1,
57200=>1, 57201=>1, 57202=>1, 57203=>1, 57204=>1, 57205=>1, 57206=>1,
57207=>1, 57208=>1, 57209=>1, 57210=>1, 57211=>1, 57212=>1, 57213=>1,
57214=>1, 57215=>1, 57216=>1, 57217=>1, 57218=>1, 57219=>1, 57220=>1,
57221=>1, 57222=>1, 57223=>1, 57224=>1, 57225=>1, 57226=>1, 57227=>1,
57228=>1, 57229=>1, 57230=>1, 57231=>1, 57232=>1, 57233=>1, 57234=>1,
57235=>1, 57236=>1, 57237=>1, 57238=>1, 57239=>1, 57240=>1, 57241=>1,
57242=>1, 57243=>1, 57244=>1, 57245=>1, 57246=>1, 57247=>1, 57248=>1,
57249=>1, 57250=>1, 57251=>1, 57252=>1, 57253=>1, 57254=>1, 57255=>1,
57256=>1, 57257=>1, 57258=>1, 57259=>1, 57260=>1, 57261=>1, 57262=>1,
57263=>1, 57264=>1, 57265=>1, 57266=>1, 57267=>1, 57268=>1, 57269=>1,
57270=>1, 57271=>1, 57272=>1, 57273=>1, 57274=>1, 57275=>1, 57276=>1,
57277=>1, 57278=>1, 57279=>1, 57280=>1, 57281=>1, 57282=>1, 57283=>1,
57284=>1, 57285=>1, 57286=>1, 57287=>1, 57288=>1, 57289=>1, 57290=>1,
57291=>1, 57292=>1, 57293=>1, 57294=>1, 57295=>1, 57296=>1, 57297=>1,
57298=>1, 57299=>1, 57300=>1, 57301=>1, 57302=>1, 57303=>1, 57304=>1,
57305=>1, 57306=>1, 57307=>1, 57308=>1, 57309=>1, 57310=>1, 57311=>1,
57312=>1, 57313=>1, 57314=>1, 57315=>1, 57316=>1, 57317=>1, 57318=>1,
57319=>1, 57320=>1, 57321=>1, 57322=>1, 57323=>1, 57324=>1, 57325=>1,
57326=>1, 57327=>1, 57328=>1, 57329=>1, 57330=>1, 57331=>1, 57332=>1,
57333=>1, 57334=>1, 57335=>1, 57336=>1, 57337=>1, 57338=>1, 57339=>1,
57340=>1, 57341=>1, 57342=>1, 57343=>1, 64976=>1, 64977=>1, 64978=>1,
64979=>1, 64980=>1, 64981=>1, 64982=>1, 64983=>1, 64984=>1, 64985=>1,
64986=>1, 64987=>1, 64988=>1, 64989=>1, 64990=>1, 64991=>1, 64992=>1,
64993=>1, 64994=>1, 64995=>1, 64996=>1, 64997=>1, 64998=>1, 64999=>1,
65000=>1, 65001=>1, 65002=>1, 65003=>1, 65004=>1, 65005=>1, 65006=>1,
65007=>1, 65534=>1, 65535=>1, 131070=>1, 131071=>1, 196606=>1, 196607=>1,
262142=>1, 262143=>1, 327678=>1, 327679=>1, 393214=>1, 393215=>1, 458750=>1,
458751=>1, 524286=>1, 524287=>1, 589822=>1, 589823=>1, 655358=>1, 655359=>1,
720894=>1, 720895=>1, 786430=>1, 786431=>1, 851966=>1, 851967=>1, 917502=>1,
917503=>1, 983038=>1, 983039=>1, 1048574=>1, 1048575=>1, 1114110=>1, 1114111=>1,
);
#############################################################################
#
# Error message stuff
#
#############################################################################
my $ErrStr;
my ($ERR_OK,
$ERR_ILLEGAL_CODE,
$ERR_DOC,
$ERR_DOC_TYPE,
$ERR_DOC_SUB_TYPE,
$ERR_BOM,
$ERR_FIRST_CHARS,
$ERR_META,
$ERR_XML,
$ERR_GUESS,
$ERR_WRONG_GUESS,
$ERR_ILLEGAL_CHAR,
$ERR_DOC_TYPE_WIZARD,
$ERR_TYPE_GUESS,
$ERR_ENCODE_GUESS,
$ERR_GUESS_AND_CONVERT,
$ERR_UNABLE_TO_GUESS
)=(0..16);
my %ErrMsgs=($ERR_OK=>"",
$ERR_ILLEGAL_CODE=>"Illegal UTF-8 code.",
$ERR_DOC=>"No document text.",
$ERR_DOC_TYPE=>"No document type.",
$ERR_DOC_SUB_TYPE=>"No document subtype.",
$ERR_BOM=>"Byte order mark recognition failed miserably.",
$ERR_FIRST_CHARS=>"Guessing from the first characters " .
"failed miserably.",
$ERR_META=>"Guessing from the meta information " .
"failed miserably.",
$ERR_XML=>"Guessing from XML format failed miserably.",
$ERR_GUESS=>"Unable to guess at the encoding.",
$ERR_WRONG_GUESS=>"This pair does not convert:",
$ERR_ILLEGAL_CHAR=>"Illegal character in supposedly UTF-8 " .
"result.",
$ERR_DOC_TYPE_WIZARD=>"Instantiating Alvis::Document::Type",
$ERR_TYPE_GUESS=>"Guessing the document type failed.",
$ERR_ENCODE_GUESS=>"Encode::Guess failed.",
$ERR_GUESS_AND_CONVERT=>"Guessing an encoding and then " .
"converting failed.",
$ERR_UNABLE_TO_GUESS=>"Unable to guess at encoding name " .
"corrections."
);
sub _set_err_state
{
my $self=shift;
my $errcode=shift;
my $errmsg=shift;
if (!defined($errcode))
{
confess("set_err_state() called with an undefined argument.");
}
if (exists($ErrMsgs{$errcode}))
{
if ($errcode==$ERR_OK)
{
$self->{errstr}="";
}
else
{
$self->{errstr}.=" " . $ErrMsgs{$errcode};
if (defined($errmsg))
{
$self->{errstr}.=" " . $errmsg;
}
}
}
else
{
confess("Internal error: set_err_state() called with an " .
"unrecognized argument ($errcode).")
}
}
sub errmsg
{
my $self=shift;
return $self->{errstr};
}
#############################################################################
#
lib/Alvis/Document/Encoding.pm view on Meta::CPAN
}
if ($typo=~/^\s*(?:utf|uft)-?7\s*$/isgo)
{
push(@possibilities,'UTF-7');
}
if ($typo=~/^\s*macintosh\s*$/isgo)
{
for (my $i=1; $i<=11; $i++)
{
push(@possibilities,"iso-8859-$i");
}
push(@possibilities,'viscii');
}
if ($typo=~/^\s*iso-8559-(\d)\s*$/isgo)
{
push(@possibilities,"iso-8859-$1");
}
if ($typo=~/^\s*iso-8895-(\d)\s*$/isgo)
{
push(@possibilities,"iso-8859-$1");
}
if ($typo=~/^\s*(?:utf|uft)-?16be\s*$/isgo)
{
push(@possibilities,'UTF-16BE');
}
if ($typo=~/^\s*(?:utf|uft)-?16le\s*$/isgo)
{
push(@possibilities,'UTF-16LE');
}
return @possibilities;
}
########################################################################3
#
# Private methods
#
##########################################################################
#
# HTML::Encoding has a nasty bug
#
sub _HTML
{
my $self=shift;
my $text=shift;
if (!defined($text) || length($text)<1)
{
$self->_set_err_state($ERR_DOC);
return ();
}
my @guesses;
eval
{
@guesses=HTML::Encoding::encoding_from_byte_order_mark($text,xhtml=>0);
};
if ($@)
{
$self->_set_err_state($ERR_BOM,"$@");
}
if (scalar(@guesses))
{
return @guesses;
}
# Sanity check to exclude e.g. UTF-32
#
eval
{
@guesses=
HTML::Encoding::encoding_from_first_chars($text);
};
if ($@)
{
$self->_set_err_state($ERR_FIRST_CHARS,"$@");
}
my @tries;
if (scalar(@guesses))
{
@tries=@guesses;
}
else
{
@tries=@{$HTML::Encoding::DEFAULT_ENCODINGS};
}
foreach my $try (@tries)
{
if ($try=~/^\s*UTF-(?:16|32)((?:B|L)E)?\s*$/isgo)
{
# HTML::Encoding is a bit imperfect
next;
}
my @try_results;
eval
{
@try_results=
HTML::Encoding::encoding_from_meta_element($text,$try);
};
if ($@)
{
$self->_set_err_state($ERR_META,"$@");
}
@guesses=(@try_results,@guesses);
}
return @guesses;
}
sub _XHTML
{
my $self=shift;
my $text=shift;
if (!defined($text) || length($text)<1)
{
$self->_set_err_state($ERR_DOC);
( run in 1.161 second using v1.01-cache-2.11-cpan-13bb782fe5a )