Alvis-NLPPlatform

 view release on metacpan or  search on metacpan

lib/Alvis/NLPPlatform.pm  view on Meta::CPAN


    for($i=0;$i <scalar(@records); $i++) {
	if ($i == $#records) {
	    $Alvis::NLPPlatform::last_doc = 1;
	}
	$rec = $records[$i];
	($id,$docR)=@$rec;
	warn "Process document $id\n";

	open FILETMP_OUT, ">$tmpfile";
	binmode(FILETMP_OUT, ":utf8");
#	binmode(FILETMP_OUT);
#      print FILETMP_OUT Encode::decode_utf8($doc);
	Alvis::NLPPlatform::platform_reset();
	$render_time = Alvis::NLPPlatform::standalone_main($config, $docR, \*FILETMP_OUT, 1); #${$tab_docs_xml->[$doc_num]}[1] ; ${$ref_doc}[1]
	close(FILETMP_OUT);

	open FILETMP_OUT, "$tmpfile" or die "No such file or directory\n";
	@cur_doc = <FILETMP_OUT>;
	$j = 0;
	while(($j< scalar @cur_doc) && ($cur_doc[$j] !~ s/\@RENDER_TIME_NOT_SET\@/$render_time/)) {
	    $j++;

lib/Alvis/NLPPlatform.pm  view on Meta::CPAN

	
	if ($connection_retry ==0) {
	    die "Timeout. Could not create socket: $! \n";
	}
#     $sock=new IO::Socket::INET( PeerAddr => $nlp_host,
# 				PeerPort => $nlp_port,
# 				Proto => 'tcp');

#     die "Could not create socket: $!\n" unless $sock;
	$sock -> autoflush(1); ###############
 	binmode($sock, ":utf8");
	print STDERR `date`;
	print STDERR "Established connection to server.\n";
	
	print STDERR "Requesting document...";
	print $sock "REQUEST\n";
	print STDERR "done.\n";

	print STDERR "Receiving document...\n";

# SENDING $id

lib/Alvis/NLPPlatform.pm  view on Meta::CPAN

					Proto => 'tcp');
	    
	    warn "Could not create socket: $! \n" unless $sock;
	    $connection_retry--;
	    sleep(1);
	} while(!defined($sock) && ($connection_retry >0));
	
	if ($connection_retry ==0) {
	    die "Timeout. Could not create socket: $! \n";
	}
	binmode $sock, ":utf8";
	
	print STDERR "Established connection to server.\n";
	
	print STDERR "Giving back annotated document...\n";
	# Communitation with the server
	print $sock "GIVEBACK\n$id\n";
	
	# Save to XML file

	print STDERR "\tRendering XML...  ";

lib/Alvis/NLPPlatform.pm  view on Meta::CPAN


	warn "Could not create socket: $! \n" unless $sock;
	$connection_retry--;
	sleep(1);
    } while(!defined($sock) && ($connection_retry >0));

    if ($connection_retry ==0) {
	die "Timeout. Could not create socket: $! \n";
    }
    $sock -> autoflush(1); ###############
    binmode $sock, ":utf8";


    print STDERR "Established connection to server.\n";

    print STDERR "Sending aborting message\n";

    print $sock "ABORTING\n$id\n";

    print STDERR "Aborting message sent\n";

lib/Alvis/NLPPlatform.pm  view on Meta::CPAN

    my $sub_dir;
    my %processing_id;

    while(1){
	warn "beginning of the loop\n";
	# await client connection
	if ($client_sock=$sock->accept()) {
	    warn "Accepting a connection\n";
	    if (fork() == 0) {
		close($sock);
		binmode($client_sock, ":utf8");
		my ($client_port,$client_iaddr) = sockaddr_in(getpeername($client_sock));
		warn "Getting information about remote host\n";
		$name=gethostbyaddr($client_iaddr,AF_INET);
		&disp_log($name,"Client (".inet_ntoa($client_iaddr).":".$client_port.") has connected.");
		$client_sock -> autoflush(1); ###############
		
		##############################
		# CLIENT HANDLING CODE
		my $line;
		$line=<$client_sock>;

lib/Alvis/NLPPlatform/Convert.pm  view on Meta::CPAN

sub outputting_empty_xmlns_file
{
    my $outdata = shift;
    my $outfile = shift;
    my $AlvisConv = shift ;
    my $config = shift;
    my $mm = shift;

    warn "Openning $outfile\n";
    open OUTFILE, ">$outfile";
    binmode(OUTFILE, ":utf8");
    print OUTFILE $outdata;
    close OUTFILE;
    return &conversion_file_to_alvis_xml($outfile, $AlvisConv, $config, $mm);
    
}


sub applying_stylesheet
{
    my $file = shift;

lib/Alvis/NLPPlatform/Convert.pm  view on Meta::CPAN

#     return 0;
}

sub outputting_alvis_from_file
{
    my $alvisfile = shift;
    my $Alvis_converter = shift;
    my $config = shift;

    open ALVISFILE, $alvisfile or die "No such file: $alvisfile\n";
#       binmode(ALVISFILE, ":utf8");
    binmode ALVISFILE; # XXXX

    local $/ = undef;

    my $alvisfile_data = <ALVISFILE>;
    close ALVISFILE;

    my $docs = Alvis::NLPPlatform::Document::get_documentRecords($alvisfile_data);

#     print STDERR "doc_list : $docs\n";

lib/Alvis/NLPPlatform/Convert.pm  view on Meta::CPAN

					      loglevel => 10)
	or die "can't create ALVIS write-pipe for port '" . $config->{"alvis_connection"}->{"HARVESTER_PORT"} . "': $!";

    my $tmp_spool_dir = $outputRootDir . "/0";

    opendir DIR, $tmp_spool_dir;
    while($xmlfile = readdir DIR) {

	if (($xmlfile ne ".") && ($xmlfile ne "..")) {
	    open XMLFILE, "$tmp_spool_dir/$xmlfile" or die "Cannot open such file ($xmlfile)\n";
	    binmode(XMLFILE, ":utf8");
	    $xml_rec_doc = "";
	    while($line = <XMLFILE>) {
		$xml_rec_doc .= $line;
	    }
	    $pipe_out->write($xml_rec_doc);
	    close XMLFILE;
	    unlink "$tmp_spool_dir/$xmlfile";
	}
    }
    closedir(DIR);

lib/Alvis/NLPPlatform/Document.pm  view on Meta::CPAN

# use YAML qw( Dump );

sub getnamespace
{
    my $file = shift;

    my $line;
    my $xmlns = undef;

    open FILE, $file;
    binmode(FILE);

    while(($line=<FILE>)){
	if ($line =~ /xmlns=\"?([^\"]+)\"?/) {
            $xmlns = $1;
	    next;
        }
    };
    close FILE;

    return($xmlns);

lib/Alvis/NLPPlatform/Document.pm  view on Meta::CPAN

	$doc=$Parser->parse_file($xmlalvisfile);
    };
    if (!$@)
    {
	if ($doc)
	{
	    my $xmlalvisdata = &get_language($doc);


	    open OUTPUT_FILE, ">$outfile";
	    binmode(OUTPUT_FILE, ":utf8");
	    print OUTPUT_FILE "$xmlalvisdata\n";
	    close(OUTPUT_FILE);
	    return($outfile);
	}
	else
	{
	    warn "Parsing the doc failed.\n";
	}
    } else {
	warn "Parsing the doc failed.\n";

lib/Alvis/NLPPlatform/NLPWrappers.pm  view on Meta::CPAN

	$tok_ct=~s/\\n/\\n /go;
	$tok_ct=~s/\\r/\\r /go;
	$tok_ct=~s/\\t/\\t /go;
	$corpus.=$tok_ct;
	push @tab_tokens,$tok_ct;
    }

    $corpus_filename = $h_config->{'TMPFILE'} . ".corpus_en.txt";
    
    open CORPUS,">$corpus_filename";
#     binmode(CORPUS,":utf8");


    print CORPUS Encode::encode_utf8($corpus);
    close CORPUS;

    print STDERR "done\n";
    
    my $command_line;
    if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
	$command_line = $h_config->{'NLP_tools'}->{'NETAG_FR'} . " $corpus_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;

lib/Alvis/NLPPlatform/NLPWrappers.pm  view on Meta::CPAN

    #`$command_line` && print STDERR "FAILED TO EXECUTE \"$command_line\": &!\n";
    `$command_line`;
    $Alvis::NLPPlatform::ALVISDEBUG || unlink $corpus_filename;
    @Alvis::NLPPlatform::en_start=();
    @Alvis::NLPPlatform::en_end=();
    @Alvis::NLPPlatform::en_type=();

    $result_filename = $h_config->{'TMPFILE'} . ".corpus_en.tag.txt";

    open REN,"<$result_filename"  or warn "Can't open the file $result_filename";
    binmode REN;
    while($line=<REN>){
	($NE_type, $NE_start, $NE_end) = split /\t/, $line;
# 	$line=~m/(.+)\s+([0-9]+)\s+([0-9]+)/;
# 	$NE_type = $1;
# 	$NE_start = $2;
# 	$NE_end = $3;
	push @Alvis::NLPPlatform::en_type,$NE_type;
	if ((exists($h_config->{'XML_INPUT'}->{"PRESERVEWHITESPACE"})) && ($h_config->{'XML_INPUT'}->{"PRESERVEWHITESPACE"})) {
	    push @Alvis::NLPPlatform::en_start,($NE_start-1);
	    push @Alvis::NLPPlatform::en_end,($NE_end-1);

lib/Alvis/NLPPlatform/NLPWrappers.pm  view on Meta::CPAN


####
    print STDERR "  Word segmentation...    ";
    my $content;
#     open CORPUS,">:utf8",$h_config->{'TMPFILE'} . ".corpus.tmp";

    $corpus_filename = $h_config->{'TMPFILE'} . ".corpus_word.tmp";
    $result_filename = $h_config->{'TMPFILE'} . ".words.tmp";

    open CORPUS,">$corpus_filename";
#    binmode(CORPUS);
#     binmode(CORPUS, ":utf8");
    foreach $token(Alvis::NLPPlatform::Annotation::sort(\%Alvis::NLPPlatform::hash_tokens)){
	$content=$Alvis::NLPPlatform::hash_tokens{$token};
	$content=~s/\\n/\n/og;
	$content=~s/\\t/\t/og;
	$content=~s/\\r/\r/og;
	#Encode::decode_utf8("Å“")
#	$content =~ s/\x{65}/oe/g;

	Alvis::NLPPlatform::XMLEntities::decode($content);
#  	Encode::from_to($content, "utf8", "iso-8859-1");

lib/Alvis/NLPPlatform/NLPWrappers.pm  view on Meta::CPAN


    if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
	$command_line = $h_config->{"NLP_tools"}->{'WORDSEG_FR'} . " < $corpus_filename > $result_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
    }else{
	$command_line = $h_config->{"NLP_tools"}->{'WORDSEG_EN'} . " < $corpus_filename > $result_filename 2>> ". $Alvis::NLPPlatform::ALVISLOGFILE;
    }

    `$command_line`;
    
    open(MOTS, $result_filename) or warn "Can't open the file $result_filename";;
#    binmode(MOTS,":utf8");
     binmode(MOTS);
    
    $token_id=1;
    $word_id=1;
    
    $token_id_str = "token$token_id";
    while($proposedword=<MOTS>)
    {
#	$proposedword = Encode::encode_utf8($proposedword);
	$word_id_str = "word$word_id";
#	if ($proposedword !~ /^[\s ]*\n$/o) {

lib/Alvis/NLPPlatform/NLPWrappers.pm  view on Meta::CPAN

    my $word_id_str;
    my $word_punct_id_str;

    my @words;

    $corpus_filename = $h_config->{'TMPFILE'} . ".corpus_pos.tmp";
    $result_filename = $h_config->{'TMPFILE'} . ".tags.tmp";

    print STDERR "  Part-Of-Speech tagging..";
    open CORPUS,">$corpus_filename";
#      binmode(CORPUS,":encoding(latin1)");
    # TH - 16/07/2007 - replacement of hash_words by hash_words_punct

    my $fullcontent = "";
    foreach $word (Alvis::NLPPlatform::Annotation::sort(\%Alvis::NLPPlatform::hash_words_punct)){
	$cont=$Alvis::NLPPlatform::hash_words_punct{$word};
  	$fullcontent .= Encode::encode("iso-8859-1", $cont, Encode::FB_DEFAULT);
  	$fullcontent .= "\n";
#   	Encode::from_to($cont, "utf8", "iso-8859-1");
# 	$fullcontent .= "$cont\n";
    }

lib/Alvis/NLPPlatform/NLPWrappers.pm  view on Meta::CPAN


    my $command_line;
    if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
	$command_line = $h_config->{'NLP_tools'}->{'POSTAG_FR'} . " < $corpus_filename  > $result_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
    }else{
	$command_line = $h_config->{'NLP_tools'}->{'POSTAG_EN'} . " < $corpus_filename  > $result_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
    }
    `$command_line`;

    open TAGS,"<$result_filename";
    binmode(TAGS); #, ":encoding(latin9)");
    $word_id=0;

    my $decal = 0;
    my $wordecal;
    my $word_punct_id = 1;
    $word_punct_id_str = "word$word_punct_id";  

    while ($line = <TAGS>) {
	# Read $Alvis::NLPPlatform::hash_words_punct{"word$word_punct"}
#	Encode::from_to($line, "iso-8859-9", "utf8");

lib/Alvis/NLPPlatform/NLPWrappers.pm  view on Meta::CPAN

    push @{$doc_hash->{"log_processing1"}->{"comments"}},  "Found POS Tags: " . $word_id ;
}

# sub pos_tag # WRAPPER FOR BRILL
# {
#     my $word;
#     my $cont;

#     print STDERR "   Part-Of-Speech tagging...";
#     open CORPUS,">$TMPFILE.corpus.tmp";
#     binmode(CORPUS,":utf8");
#     foreach $word(sort Alvis::NLPPlatform::Annotation::sort_keys keys %Alvis::NLPPlatform::hash_words){
# 	$cont=$Alvis::NLPPlatform::hash_words{$word};
# 	print CORPUS "$cont ";
# 	if($cont eq "."){
# 	    print CORPUS "\n";
# 	}
#     }
#     close CORPUS;
# }

lib/Alvis/NLPPlatform/NLPWrappers.pm  view on Meta::CPAN

    my ($class, $h_config, $doc_hash) = @_;

    print STDERR "  Semantic tagging...     ";

    my $in_fn = $h_config->{'TMPFILE'} . ".ast.in";

    if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
	# French parser command line
    }else{
	open DOC,">$in_fn";
	binmode(DOC,":utf8");
	Alvis::NLPPlatform::Annotation::render_xml($doc_hash, \*DOC, 1);
	close DOC;
    
	my $cmdline = $h_config->{'NLP_tools'}->{'SEMTAG_EN'} . " $in_fn > " . $h_config->{'TMPFILE'} . ".ast.out 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
#  	print STDERR "$cmdline\n";
	
 	`$cmdline`;
	$Alvis::NLPPlatform::ALVISDEBUG || unlink $h_config->{'TMPFILE'} . ".ast.in";
	$Alvis::NLPPlatform::ALVISDEBUG || unlink $h_config->{'TMPFILE'} . ".ast.out";
	# $semtagout == doc XML enriched-document

lib/Alvis/NLPPlatform/UserNLPWrappers.pm  view on Meta::CPAN

    my $min;
    my $max;

    my $btw_start;
    my $btw_end;
    my $token;
    my $sentence_cont;

    print STDERR "  Performing term extraction... \n";
    open CORPUS, ">>" . $h_config->{"TMPFILE"} . ".corpus.yatea.tmp";
    binmode(CORPUS, ":utf8");

    print CORPUS $Alvis::NLPPlatform::Annotation::document_record_id . "\tDOCUMENT\t" . $Alvis::NLPPlatform::Annotation::document_record_id . "\n" ;

    &PrintOutputTreeTagger($h_config, $doc_hash, \*CORPUS);

    close CORPUS;

#     if ((exists $h_config->{"XML_OUTPUT"}->{"YATEA"}) && ($h_config->{"XML_OUTPUT"}->{"YATEA"} == 1)) {
# 	%$doc_hash = ();
# 	%Alvis::NLPPlatform::hash_tokens = ();

lib/Alvis/NLPPlatform/UserNLPWrappers.pm  view on Meta::CPAN


    my $line = "";
    my $sentence_counter = 0;
    my $linkage_counter = 0;
    
    my @linkage_output;
    
#     my $line_prec = "";

    open INFILE, $infile or die "No such file $infile\n";
    binmode INFILE;
    open OUTFILE, ">$outfile" or die "No such file $outfile\n";
 
    # puts the text on only one line
    do {
	# We first remove the outputting input 
	while((defined  ($line = <INFILE>)) && ($line !~ /^\+\+\+\+Time/o)) {
# 	print $line;
# 	    $line_prec = $line;
	};



( run in 0.613 second using v1.01-cache-2.11-cpan-3cd7ad12f66 )