Alvis-NLPPlatform
view release on metacpan or search on metacpan
lib/Alvis/NLPPlatform/Convert.pm view on Meta::CPAN
282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
my
$alvisXML
=
$Alvis_converter
->HTML(
$html_txt
,
$meta_txt
);
if
(!
defined
(
$alvisXML
))
{
warn
"Obtaining the Alvis version of the "
.
"HTML version of an article failed. "
.
$Alvis_converter
->errmsg();
$Alvis_converter
->clearerr();
return
2;
}
# my $e=Alvis::Document::Encoding->new();
# my $type_guesser=Alvis::Document::Type->new();
# my ($doc_type,$doc_sub_type)=$type_guesser->guess($alvisXML);
# my $doc_encoding=$e->guess_and_convert($alvisXML,$doc_type,$doc_sub_type, "UTF-8");
# if (!defined($doc_encoding))
# {
# die('Cannot guess. ' . $e->errmsg());
# }
# print STDERR "$doc_type,$doc_sub_type,$doc_encoding\n";
# print STDERR $e->guess($alvisXML);
# warn "Checking the encoding\n";
# if (!Encode::is_utf8($alvisXML)) {
# warn "Not a UTF-8, assume to be a latin-1 document\n";
# print STDERR "Converting in UTF8...\n";
# Encode::from_to($alvisXML, "iso-8859-1", "UTF-8");
# print STDERR "done\n";
# }
# print STDERR $alvisXML;
# exit;
# my $decoder = Encode::Guess->guess_encoding($alvisXML, /UTF-8/);
# if (!ref($decoder)) {
# warn "Not a UTF-8, assume to be a latin-1 document\n";
# print STDERR "Converting in UTF8...\n";
# $alvisXML = $decoder->decode($alvisXML);
# # Encode::from_to($alvisXML, "iso-8859-1", "UTF-8");
# print STDERR "done\n";
# } else {
# warn "Document is already in UTF-8 :-)\n";
# }
( run in 0.279 second using v1.01-cache-2.11-cpan-e9199f4ba4c )