view release on metacpan or search on metacpan
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
for($i=0;$i <scalar(@records); $i++) {
if ($i == $#records) {
$Alvis::NLPPlatform::last_doc = 1;
}
$rec = $records[$i];
($id,$docR)=@$rec;
warn "Process document $id\n";
open FILETMP_OUT, ">$tmpfile";
binmode(FILETMP_OUT, ":utf8");
# binmode(FILETMP_OUT);
# print FILETMP_OUT Encode::decode_utf8($doc);
Alvis::NLPPlatform::platform_reset();
$render_time = Alvis::NLPPlatform::standalone_main($config, $docR, \*FILETMP_OUT, 1); #${$tab_docs_xml->[$doc_num]}[1] ; ${$ref_doc}[1]
close(FILETMP_OUT);
open FILETMP_OUT, "$tmpfile" or die "No such file or directory\n";
@cur_doc = <FILETMP_OUT>;
$j = 0;
while(($j< scalar @cur_doc) && ($cur_doc[$j] !~ s/\@RENDER_TIME_NOT_SET\@/$render_time/)) {
$j++;
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
if ($connection_retry ==0) {
die "Timeout. Could not create socket: $! \n";
}
# $sock=new IO::Socket::INET( PeerAddr => $nlp_host,
# PeerPort => $nlp_port,
# Proto => 'tcp');
# die "Could not create socket: $!\n" unless $sock;
$sock -> autoflush(1); ###############
binmode($sock, ":utf8");
print STDERR `date`;
print STDERR "Established connection to server.\n";
print STDERR "Requesting document...";
print $sock "REQUEST\n";
print STDERR "done.\n";
print STDERR "Receiving document...\n";
# SENDING $id
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
Proto => 'tcp');
warn "Could not create socket: $! \n" unless $sock;
$connection_retry--;
sleep(1);
} while(!defined($sock) && ($connection_retry >0));
if ($connection_retry ==0) {
die "Timeout. Could not create socket: $! \n";
}
binmode $sock, ":utf8";
print STDERR "Established connection to server.\n";
print STDERR "Giving back annotated document...\n";
# Communitation with the server
print $sock "GIVEBACK\n$id\n";
# Save to XML file
print STDERR "\tRendering XML... ";
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
warn "Could not create socket: $! \n" unless $sock;
$connection_retry--;
sleep(1);
} while(!defined($sock) && ($connection_retry >0));
if ($connection_retry ==0) {
die "Timeout. Could not create socket: $! \n";
}
$sock -> autoflush(1); ###############
binmode $sock, ":utf8";
print STDERR "Established connection to server.\n";
print STDERR "Sending aborting message\n";
print $sock "ABORTING\n$id\n";
print STDERR "Aborting message sent\n";
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
my $sub_dir;
my %processing_id;
while(1){
warn "beginning of the loop\n";
# await client connection
if ($client_sock=$sock->accept()) {
warn "Accepting a connection\n";
if (fork() == 0) {
close($sock);
binmode($client_sock, ":utf8");
my ($client_port,$client_iaddr) = sockaddr_in(getpeername($client_sock));
warn "Getting information about remote host\n";
$name=gethostbyaddr($client_iaddr,AF_INET);
&disp_log($name,"Client (".inet_ntoa($client_iaddr).":".$client_port.") has connected.");
$client_sock -> autoflush(1); ###############
##############################
# CLIENT HANDLING CODE
my $line;
$line=<$client_sock>;
lib/Alvis/NLPPlatform/Convert.pm view on Meta::CPAN
sub outputting_empty_xmlns_file
{
my $outdata = shift;
my $outfile = shift;
my $AlvisConv = shift ;
my $config = shift;
my $mm = shift;
warn "Openning $outfile\n";
open OUTFILE, ">$outfile";
binmode(OUTFILE, ":utf8");
print OUTFILE $outdata;
close OUTFILE;
return &conversion_file_to_alvis_xml($outfile, $AlvisConv, $config, $mm);
}
sub applying_stylesheet
{
my $file = shift;
lib/Alvis/NLPPlatform/Convert.pm view on Meta::CPAN
# return 0;
}
sub outputting_alvis_from_file
{
my $alvisfile = shift;
my $Alvis_converter = shift;
my $config = shift;
open ALVISFILE, $alvisfile or die "No such file: $alvisfile\n";
# binmode(ALVISFILE, ":utf8");
binmode ALVISFILE; # XXXX
local $/ = undef;
my $alvisfile_data = <ALVISFILE>;
close ALVISFILE;
my $docs = Alvis::NLPPlatform::Document::get_documentRecords($alvisfile_data);
# print STDERR "doc_list : $docs\n";
lib/Alvis/NLPPlatform/Convert.pm view on Meta::CPAN
loglevel => 10)
or die "can't create ALVIS write-pipe for port '" . $config->{"alvis_connection"}->{"HARVESTER_PORT"} . "': $!";
my $tmp_spool_dir = $outputRootDir . "/0";
opendir DIR, $tmp_spool_dir;
while($xmlfile = readdir DIR) {
if (($xmlfile ne ".") && ($xmlfile ne "..")) {
open XMLFILE, "$tmp_spool_dir/$xmlfile" or die "Cannot open such file ($xmlfile)\n";
binmode(XMLFILE, ":utf8");
$xml_rec_doc = "";
while($line = <XMLFILE>) {
$xml_rec_doc .= $line;
}
$pipe_out->write($xml_rec_doc);
close XMLFILE;
unlink "$tmp_spool_dir/$xmlfile";
}
}
closedir(DIR);
lib/Alvis/NLPPlatform/Document.pm view on Meta::CPAN
# use YAML qw( Dump );
sub getnamespace
{
my $file = shift;
my $line;
my $xmlns = undef;
open FILE, $file;
binmode(FILE);
while(($line=<FILE>)){
if ($line =~ /xmlns=\"?([^\"]+)\"?/) {
$xmlns = $1;
next;
}
};
close FILE;
return($xmlns);
lib/Alvis/NLPPlatform/Document.pm view on Meta::CPAN
$doc=$Parser->parse_file($xmlalvisfile);
};
if (!$@)
{
if ($doc)
{
my $xmlalvisdata = &get_language($doc);
open OUTPUT_FILE, ">$outfile";
binmode(OUTPUT_FILE, ":utf8");
print OUTPUT_FILE "$xmlalvisdata\n";
close(OUTPUT_FILE);
return($outfile);
}
else
{
warn "Parsing the doc failed.\n";
}
} else {
warn "Parsing the doc failed.\n";
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
$tok_ct=~s/\\n/\\n /go;
$tok_ct=~s/\\r/\\r /go;
$tok_ct=~s/\\t/\\t /go;
$corpus.=$tok_ct;
push @tab_tokens,$tok_ct;
}
$corpus_filename = $h_config->{'TMPFILE'} . ".corpus_en.txt";
open CORPUS,">$corpus_filename";
# binmode(CORPUS,":utf8");
print CORPUS Encode::encode_utf8($corpus);
close CORPUS;
print STDERR "done\n";
my $command_line;
if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
$command_line = $h_config->{'NLP_tools'}->{'NETAG_FR'} . " $corpus_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
#`$command_line` && print STDERR "FAILED TO EXECUTE \"$command_line\": &!\n";
`$command_line`;
$Alvis::NLPPlatform::ALVISDEBUG || unlink $corpus_filename;
@Alvis::NLPPlatform::en_start=();
@Alvis::NLPPlatform::en_end=();
@Alvis::NLPPlatform::en_type=();
$result_filename = $h_config->{'TMPFILE'} . ".corpus_en.tag.txt";
open REN,"<$result_filename" or warn "Can't open the file $result_filename";
binmode REN;
while($line=<REN>){
($NE_type, $NE_start, $NE_end) = split /\t/, $line;
# $line=~m/(.+)\s+([0-9]+)\s+([0-9]+)/;
# $NE_type = $1;
# $NE_start = $2;
# $NE_end = $3;
push @Alvis::NLPPlatform::en_type,$NE_type;
if ((exists($h_config->{'XML_INPUT'}->{"PRESERVEWHITESPACE"})) && ($h_config->{'XML_INPUT'}->{"PRESERVEWHITESPACE"})) {
push @Alvis::NLPPlatform::en_start,($NE_start-1);
push @Alvis::NLPPlatform::en_end,($NE_end-1);
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
####
print STDERR " Word segmentation... ";
my $content;
# open CORPUS,">:utf8",$h_config->{'TMPFILE'} . ".corpus.tmp";
$corpus_filename = $h_config->{'TMPFILE'} . ".corpus_word.tmp";
$result_filename = $h_config->{'TMPFILE'} . ".words.tmp";
open CORPUS,">$corpus_filename";
# binmode(CORPUS);
# binmode(CORPUS, ":utf8");
foreach $token(Alvis::NLPPlatform::Annotation::sort(\%Alvis::NLPPlatform::hash_tokens)){
$content=$Alvis::NLPPlatform::hash_tokens{$token};
$content=~s/\\n/\n/og;
$content=~s/\\t/\t/og;
$content=~s/\\r/\r/og;
#Encode::decode_utf8("Å")
# $content =~ s/\x{65}/oe/g;
Alvis::NLPPlatform::XMLEntities::decode($content);
# Encode::from_to($content, "utf8", "iso-8859-1");
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
$command_line = $h_config->{"NLP_tools"}->{'WORDSEG_FR'} . " < $corpus_filename > $result_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
}else{
$command_line = $h_config->{"NLP_tools"}->{'WORDSEG_EN'} . " < $corpus_filename > $result_filename 2>> ". $Alvis::NLPPlatform::ALVISLOGFILE;
}
`$command_line`;
open(MOTS, $result_filename) or warn "Can't open the file $result_filename";;
# binmode(MOTS,":utf8");
binmode(MOTS);
$token_id=1;
$word_id=1;
$token_id_str = "token$token_id";
while($proposedword=<MOTS>)
{
# $proposedword = Encode::encode_utf8($proposedword);
$word_id_str = "word$word_id";
# if ($proposedword !~ /^[\s ]*\n$/o) {
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
my $word_id_str;
my $word_punct_id_str;
my @words;
$corpus_filename = $h_config->{'TMPFILE'} . ".corpus_pos.tmp";
$result_filename = $h_config->{'TMPFILE'} . ".tags.tmp";
print STDERR " Part-Of-Speech tagging..";
open CORPUS,">$corpus_filename";
# binmode(CORPUS,":encoding(latin1)");
# TH - 16/07/2007 - replacement of hash_words by hash_words_punct
my $fullcontent = "";
foreach $word (Alvis::NLPPlatform::Annotation::sort(\%Alvis::NLPPlatform::hash_words_punct)){
$cont=$Alvis::NLPPlatform::hash_words_punct{$word};
$fullcontent .= Encode::encode("iso-8859-1", $cont, Encode::FB_DEFAULT);
$fullcontent .= "\n";
# Encode::from_to($cont, "utf8", "iso-8859-1");
# $fullcontent .= "$cont\n";
}
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
my $command_line;
if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
$command_line = $h_config->{'NLP_tools'}->{'POSTAG_FR'} . " < $corpus_filename > $result_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
}else{
$command_line = $h_config->{'NLP_tools'}->{'POSTAG_EN'} . " < $corpus_filename > $result_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
}
`$command_line`;
open TAGS,"<$result_filename";
binmode(TAGS); #, ":encoding(latin9)");
$word_id=0;
my $decal = 0;
my $wordecal;
my $word_punct_id = 1;
$word_punct_id_str = "word$word_punct_id";
while ($line = <TAGS>) {
# Read $Alvis::NLPPlatform::hash_words_punct{"word$word_punct"}
# Encode::from_to($line, "iso-8859-9", "utf8");
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
push @{$doc_hash->{"log_processing1"}->{"comments"}}, "Found POS Tags: " . $word_id ;
}
# sub pos_tag # WRAPPER FOR BRILL
# {
# my $word;
# my $cont;
# print STDERR " Part-Of-Speech tagging...";
# open CORPUS,">$TMPFILE.corpus.tmp";
# binmode(CORPUS,":utf8");
# foreach $word(sort Alvis::NLPPlatform::Annotation::sort_keys keys %Alvis::NLPPlatform::hash_words){
# $cont=$Alvis::NLPPlatform::hash_words{$word};
# print CORPUS "$cont ";
# if($cont eq "."){
# print CORPUS "\n";
# }
# }
# close CORPUS;
# }
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
my ($class, $h_config, $doc_hash) = @_;
print STDERR " Semantic tagging... ";
my $in_fn = $h_config->{'TMPFILE'} . ".ast.in";
if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
# French parser command line
}else{
open DOC,">$in_fn";
binmode(DOC,":utf8");
Alvis::NLPPlatform::Annotation::render_xml($doc_hash, \*DOC, 1);
close DOC;
my $cmdline = $h_config->{'NLP_tools'}->{'SEMTAG_EN'} . " $in_fn > " . $h_config->{'TMPFILE'} . ".ast.out 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
# print STDERR "$cmdline\n";
`$cmdline`;
$Alvis::NLPPlatform::ALVISDEBUG || unlink $h_config->{'TMPFILE'} . ".ast.in";
$Alvis::NLPPlatform::ALVISDEBUG || unlink $h_config->{'TMPFILE'} . ".ast.out";
# $semtagout == doc XML enriched-document
lib/Alvis/NLPPlatform/UserNLPWrappers.pm view on Meta::CPAN
my $min;
my $max;
my $btw_start;
my $btw_end;
my $token;
my $sentence_cont;
print STDERR " Performing term extraction... \n";
open CORPUS, ">>" . $h_config->{"TMPFILE"} . ".corpus.yatea.tmp";
binmode(CORPUS, ":utf8");
print CORPUS $Alvis::NLPPlatform::Annotation::document_record_id . "\tDOCUMENT\t" . $Alvis::NLPPlatform::Annotation::document_record_id . "\n" ;
&PrintOutputTreeTagger($h_config, $doc_hash, \*CORPUS);
close CORPUS;
# if ((exists $h_config->{"XML_OUTPUT"}->{"YATEA"}) && ($h_config->{"XML_OUTPUT"}->{"YATEA"} == 1)) {
# %$doc_hash = ();
# %Alvis::NLPPlatform::hash_tokens = ();
lib/Alvis/NLPPlatform/UserNLPWrappers.pm view on Meta::CPAN
my $line = "";
my $sentence_counter = 0;
my $linkage_counter = 0;
my @linkage_output;
# my $line_prec = "";
open INFILE, $infile or die "No such file $infile\n";
binmode INFILE;
open OUTFILE, ">$outfile" or die "No such file $outfile\n";
# puts the text on only one line
do {
# We first remove the outputting input
while((defined ($line = <INFILE>)) && ($line !~ /^\+\+\+\+Time/o)) {
# print $line;
# $line_prec = $line;
};