Alvis-NLPPlatform

 view release on metacpan or  search on metacpan

lib/Alvis/NLPPlatform/Convert.pm  view on Meta::CPAN

282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
    my $alvisXML=$Alvis_converter->HTML($html_txt,$meta_txt);
 
    if (!defined($alvisXML))
    {
        warn "Obtaining the Alvis version of the " .
            "HTML version of an article failed. " . $Alvis_converter->errmsg();
        $Alvis_converter->clearerr();
        return 2;
    }
#       my $e=Alvis::Document::Encoding->new();
#       my $type_guesser=Alvis::Document::Type->new();
#       my ($doc_type,$doc_sub_type)=$type_guesser->guess($alvisXML);
#       my $doc_encoding=$e->guess_and_convert($alvisXML,$doc_type,$doc_sub_type, "UTF-8");
#       if (!defined($doc_encoding))
#       {
#           die('Cannot guess. ' . $e->errmsg());
#       }
#       print STDERR "$doc_type,$doc_sub_type,$doc_encoding\n";
#       print STDERR $e->guess($alvisXML);
#     warn "Checking the encoding\n";
#     if (!Encode::is_utf8($alvisXML)) {
#       warn "Not a UTF-8, assume to be a latin-1 document\n";
#       print STDERR "Converting in UTF8...\n";
#       Encode::from_to($alvisXML, "iso-8859-1", "UTF-8");
#       print STDERR "done\n";
#     }
#       print STDERR $alvisXML;
#       exit;
         
#   my $decoder = Encode::Guess->guess_encoding($alvisXML, /UTF-8/);
#     if (!ref($decoder)) {
#       warn "Not a UTF-8, assume to be a latin-1 document\n";
#       print STDERR "Converting in UTF8...\n";
#       $alvisXML = $decoder->decode($alvisXML);
# #     Encode::from_to($alvisXML, "iso-8859-1", "UTF-8");
#       print STDERR "done\n";
#     } else {
#       warn "Document is already in UTF-8 :-)\n";
#     }



( run in 0.279 second using v1.01-cache-2.11-cpan-e9199f4ba4c )