Alvis-NLPPlatform

 view release on metacpan or  search on metacpan

lib/Alvis/NLPPlatform/Convert.pm  view on Meta::CPAN

    my $alvisXML=$Alvis_converter->HTML($html_txt,$meta_txt);

    if (!defined($alvisXML))
    {
	warn "Obtaining the Alvis version of the " .
	    "HTML version of an article failed. " . $Alvis_converter->errmsg();
	$Alvis_converter->clearerr();
	return 2;
    }
#  	my $e=Alvis::Document::Encoding->new();
# 	my $type_guesser=Alvis::Document::Type->new();
# 	my ($doc_type,$doc_sub_type)=$type_guesser->guess($alvisXML);
# 	my $doc_encoding=$e->guess_and_convert($alvisXML,$doc_type,$doc_sub_type, "UTF-8");
# 	if (!defined($doc_encoding))
# 	{
# 	    die('Cannot guess. ' . $e->errmsg());
# 	}
# 	print STDERR "$doc_type,$doc_sub_type,$doc_encoding\n";
# 	print STDERR $e->guess($alvisXML);
#     warn "Checking the encoding\n";
#     if (!Encode::is_utf8($alvisXML)) {
# 	warn "Not a UTF-8, assume to be a latin-1 document\n";
# 	print STDERR "Converting in UTF8...\n";
# 	Encode::from_to($alvisXML, "iso-8859-1", "UTF-8");
# 	print STDERR "done\n";
#     }
#  	print STDERR $alvisXML;
#  	exit;
	
#   my $decoder = Encode::Guess->guess_encoding($alvisXML, /UTF-8/);
#     if (!ref($decoder)) {
# 	warn "Not a UTF-8, assume to be a latin-1 document\n";
# 	print STDERR "Converting in UTF8...\n";
# 	$alvisXML = $decoder->decode($alvisXML);
# # 	Encode::from_to($alvisXML, "iso-8859-1", "UTF-8");
# 	print STDERR "done\n";
#     } else {
# 	warn "Document is already in UTF-8 :-)\n";
#     }



( run in 0.245 second using v1.01-cache-2.11-cpan-9b1e4054eb1 )