Alvis-NLPPlatform

 view release on metacpan or  search on metacpan

lib/Alvis/NLPPlatform/Convert.pm  view on Meta::CPAN

    my $AlvisConv = shift ;
    my $config = shift;
    my $mm = shift;

    warn "Openning $outfile\n";
    open OUTFILE, ">$outfile";
    binmode(OUTFILE, ":utf8");
    print OUTFILE $outdata;
    close OUTFILE;
    return &conversion_file_to_alvis_xml($outfile, $AlvisConv, $config, $mm);
    
}


sub applying_stylesheet
{
    my $file = shift;
    my $xmlns = shift;
    my $config = shift;

#     my $xslt_proc = XML::XSLT->new ($stylesheet, warnings => 1);

#     $xslt_proc->transform ($file);
#     print $xslt_proc->toString;

   # maybe change for XML::DOM::Lite::XSLT engine

    if (!exists $config->{"CONVERTERS"}->{"STYLESHEET"}->{$xmlns}) {
	$xmlns = "default";
    }
    my $command = $config->{"CONVERTERS"}->{"STYLESHEET"}->{$xmlns} . " $file";
    print STDERR "Applying the stylesheet : " . $config->{"CONVERTERS"}->{"STYLESHEET"}->{$xmlns} . "\n";

    my $outdata;
    $outdata = `$command`;
    
    return ($xmlns,$outdata);
#    $xslt_proc->dispose();

}

sub get_type_file
{
    my $file = shift;
    my $mm = shift;

    print STDERR "Determining the type of the file " . $file . ": ";
    
    my $type = $mm->checktype_filename($file);

    if ($file =~ /.ppt$/i) {
	$type = "application/powerpoint";
	warn "Getting the type thanks to the extension\n";
    }
    if ($file =~ /.xls$/i) {
	$type = "application/vnd.ms-excel";
	warn "Getting the type thanks to the extension\n";
    }
    # if msword may be it should be relevant to check the extension, to better determine the type
    $type =~ s/;.*//;
    if (($type eq "message/rfc822") || ($file =~ /^x-system\/x-unix;/)) {
	if ($file =~ /.tex$/i) {
	    $type = "text/x-tex";
	    warn "Getting the type thanks to the extension\n";
	}
    }
    print STDERR "Type file: $type\n";
    return($type);

}

sub html2alvis_init
{
    my $config = shift;

    my $ODir;

    my $NPerOurDir=1000;
#      my $MetaEncoding='iso-8859-1';
      my $MetaEncoding='UTF-8';
    my $HTMLEncoding=undef;'iso-8859-1';
    my $HTMLEncodingFromMeta='utf-8';
    my $IncOrigDoc=1;

    if (defined $config->{"ALVISTMP"}) {
	$ODir = $config->{"ALVISTMP"};
    } else {
	$ODir = ".";
    }


    warn "Outdir is $ODir\n";

    print STDERR "Initialisation of the Alvis converter ...";

    my $C=Alvis::Convert->new(outputRootDir=>$ODir,
			  outputNPerSubdir=>1000,
			  outputAtSameLocation=>0,
			  metaEncoding=>$MetaEncoding,
			  sourceEncoding=>$HTMLEncoding,
			  includeOriginalDocument=>$IncOrigDoc,
                          sourceEncodingFromMeta=>$HTMLEncodingFromMeta);

    $C->init_output();
    my $i = 0;
    while (-f "$ODir/0/$i.alvis") { $i++;};
    warn "Starting  at $i\n";
    $C->{outputN} = $i;
    print STDERR "done\n";
    return($C);
}

sub html2alvis
{
    my $filename = shift;
    my $Alvis_converter = shift;
    my $config = shift;

    print STDERR "Converting $filename to ALVIS XML format\n";

    my $meta_txt = &make_meta($filename);



( run in 0.596 second using v1.01-cache-2.11-cpan-39bf76dae61 )