view release on metacpan or search on metacpan
Alvis::Pipeline => 0,
Alvis::Convert => 0,
XML::LibXML => 0,
IO::File => 0,
IO::Socket::INET => 0,
Getopt::Long => 0,
Pod::Usage => 0,
Config::General => 2.30,
Sys::Hostname => 0,
Fcntl => 0,
utf8 => 0,
File::Touch => 0,
File::Basename => 0,
Encode => 0,
Fcntl => 0,
File::Path => 0,
File::MMagic => 0,
File::Basename => 0,
Lingua::Identify => 0,
XML::Parser => 0,
Parse::Yapp => 0,
Lingua::Identify: 0
Parse::Yapp: 0
Pod::Usage: 0
Sys::Hostname: 0
Test::More: 0
Time::HiRes: 0
XML::LibXML: 0
XML::Parser: 0
XML::Parser::PerlSAX: 0
perl: 5.005
utf8: 0
build_requires:
Module::Build: 0.28
provides:
Alvis::NLPPlatform:
file: lib/Alvis/NLPPlatform.pm
version: 0.6
Alvis::NLPPlatform::Annotation:
file: lib/Alvis/NLPPlatform/Annotation.pm
version: 0
Alvis::NLPPlatform::Canonical:
Alvis::Pipeline
Alvis::Convert
XML::LibXML
IO::File
IO::Socket::INET
Getopt::Long
Pod::Usage
Config::General
Sys::Hostname
Fcntl
utf8
File::Touch
File::Basename
Encode
Fcntl
File::Path
File::MMagic
File::Basename
Lingua::Identify
XML::Parser
Parse::Yapp
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
for($i=0;$i <scalar(@records); $i++) {
if ($i == $#records) {
$Alvis::NLPPlatform::last_doc = 1;
}
$rec = $records[$i];
($id,$docR)=@$rec;
warn "Process document $id\n";
open FILETMP_OUT, ">$tmpfile";
binmode(FILETMP_OUT, ":utf8");
# binmode(FILETMP_OUT);
# print FILETMP_OUT Encode::decode_utf8($doc);
Alvis::NLPPlatform::platform_reset();
$render_time = Alvis::NLPPlatform::standalone_main($config, $docR, \*FILETMP_OUT, 1); #${$tab_docs_xml->[$doc_num]}[1] ; ${$ref_doc}[1]
close(FILETMP_OUT);
open FILETMP_OUT, "$tmpfile" or die "No such file or directory\n";
@cur_doc = <FILETMP_OUT>;
$j = 0;
while(($j< scalar @cur_doc) && ($cur_doc[$j] !~ s/\@RENDER_TIME_NOT_SET\@/$render_time/)) {
$j++;
}
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
if ($connection_retry ==0) {
die "Timeout. Could not create socket: $! \n";
}
# $sock=new IO::Socket::INET( PeerAddr => $nlp_host,
# PeerPort => $nlp_port,
# Proto => 'tcp');
# die "Could not create socket: $!\n" unless $sock;
$sock -> autoflush(1); ###############
binmode($sock, ":utf8");
print STDERR `date`;
print STDERR "Established connection to server.\n";
print STDERR "Requesting document...";
print $sock "REQUEST\n";
print STDERR "done.\n";
print STDERR "Receiving document...\n";
# SENDING $id
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
Proto => 'tcp');
warn "Could not create socket: $! \n" unless $sock;
$connection_retry--;
sleep(1);
} while(!defined($sock) && ($connection_retry >0));
if ($connection_retry ==0) {
die "Timeout. Could not create socket: $! \n";
}
binmode $sock, ":utf8";
print STDERR "Established connection to server.\n";
print STDERR "Giving back annotated document...\n";
# Communitation with the server
print $sock "GIVEBACK\n$id\n";
# Save to XML file
print STDERR "\tRendering XML... ";
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
warn "Could not create socket: $! \n" unless $sock;
$connection_retry--;
sleep(1);
} while(!defined($sock) && ($connection_retry >0));
if ($connection_retry ==0) {
die "Timeout. Could not create socket: $! \n";
}
$sock -> autoflush(1); ###############
binmode $sock, ":utf8";
print STDERR "Established connection to server.\n";
print STDERR "Sending aborting message\n";
print $sock "ABORTING\n$id\n";
print STDERR "Aborting message sent\n";
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
my $sub_dir;
my %processing_id;
while(1){
warn "beginning of the loop\n";
# await client connection
if ($client_sock=$sock->accept()) {
warn "Accepting a connection\n";
if (fork() == 0) {
close($sock);
binmode($client_sock, ":utf8");
my ($client_port,$client_iaddr) = sockaddr_in(getpeername($client_sock));
warn "Getting information about remote host\n";
$name=gethostbyaddr($client_iaddr,AF_INET);
&disp_log($name,"Client (".inet_ntoa($client_iaddr).":".$client_port.") has connected.");
$client_sock -> autoflush(1); ###############
##############################
# CLIENT HANDLING CODE
my $line;
$line=<$client_sock>;
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
foreach my $rec_out (@records) {
$pipe_out->write($xmlhead . $rec_out . $xmlfoot);
}
}
if (defined($id))
{
warn "Received\t$n\t$id\n";
`date`;
if (defined(open(I,">:utf8",$config{"ALVISTMP"} . "/${id}.xml")))
{
print I $xml;
close(I);
}
else
{
die("Unable to open " . $config{"ALVISTMP"} . "/${id}.xml for writing.");
}
my $xml2 = $xml;
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
chomp $id;
&disp_log($name,"Annotated document ID: $id");
# Recording the annotation document (local)
$sub_dir=&sub_dir_from_id($id);
if ($config{"NLP_misc"}->{"SAVE_IN_OUTDIR"}) {
mkpath( $config{"alvis_connection"}->{"OUTDIR"} . "/$sub_dir");
}
my $xml = "";
if (($config{"NLP_misc"}->{"SAVE_IN_OUTDIR"} == 0) || (defined(open(O,">:utf8", $config{"alvis_connection"}->{"OUTDIR"} . "/$sub_dir/${id}.xml"))))
{
while((defined $sock) && ($line=<$client_sock>) && ($line ne "<DONE>\n")) {
# recording the annotation document (local)
# building xml string for sending to the next step
$xml .= $line;
# print STDERR $line;
}
# print STDERR $line;
# get the RENDER TIME
if ((defined $sock) && ($line = <$client_sock>) && ($line eq "RENDER TIME\n")) {
lib/Alvis/NLPPlatform/Annotation.pm view on Meta::CPAN
sub print_Annotation
{
my ($descriptor, $string) = @_;
# print STDERR "ref : " . ref($descriptor) . "\n";
if (ref($descriptor) eq "IO::Socket::INET") {
print $descriptor Encode::decode_utf8($string);
# print $descriptor $string;
# print STDERR "Descriptor is a SOCKET\n";
}
if (ref($descriptor) eq "GLOB") {
print $descriptor Encode::decode_utf8($string);
# print $descriptor $string;
# print STDERR "Descriptor is a STREAM (GLOB)\n";
}
if (ref($descriptor) eq "SCALAR") {
$$descriptor .= Encode::decode_utf8($string);
# $$descriptor .= $string;
# print STDERR "Descriptor is a SCALAR\n";
}
unless (ref($descriptor)) {
print STDERR "Critical error: descriptor is not a reference at all.\n";
exit(-1);
}
# print STDERR "$string\n";
# print STDERR Encode::decode_utf8($string);
return(1);
}
1;
__END__
=head1 NAME
lib/Alvis/NLPPlatform/Convert.pm view on Meta::CPAN
# if (defined $config->{"CONVERTERS"}->{"SPOOLDIR"}) {
# $ODIR = $config->{"CONVERTERS"}->{"SPOOLDIR"};
# } else {
# if (defined $config->{"alvis_connection"}->{"SPOOLDIR"}) {
# $ODIR = $config->{"alvis_connection"}->{"SPOOLDIR"};
# } else {
use strict;
use warnings;
use utf8;
no utf8;
use Alvis::NLPPlatform::Document;
use File::MMagic;
use File::Basename;
use File::Path qw(mkpath);
use File::Touch;
use Data::Dumper;
use Cwd;
lib/Alvis/NLPPlatform/Convert.pm view on Meta::CPAN
sub outputting_empty_xmlns_file
{
my $outdata = shift;
my $outfile = shift;
my $AlvisConv = shift ;
my $config = shift;
my $mm = shift;
warn "Openning $outfile\n";
open OUTFILE, ">$outfile";
binmode(OUTFILE, ":utf8");
print OUTFILE $outdata;
close OUTFILE;
return &conversion_file_to_alvis_xml($outfile, $AlvisConv, $config, $mm);
}
sub applying_stylesheet
{
my $file = shift;
lib/Alvis/NLPPlatform/Convert.pm view on Meta::CPAN
my $filename = shift;
my $Alvis_converter = shift;
my $config = shift;
print STDERR "Converting $filename to ALVIS XML format\n";
my $meta_txt = &make_meta($filename);
my $html_txt=$Alvis_converter->read_HTML($filename);
# print STDERR "==>" . utf8::is_utf8($html_txt) . "\n";
if (!defined($html_txt))
{
warn "Reading the HTML for basename \"$filename\" failed. " .
$Alvis_converter->errmsg();
$Alvis_converter->clearerr();
return (1);;
}
# print STDERR $html_txt;
lib/Alvis/NLPPlatform/Convert.pm view on Meta::CPAN
# my $type_guesser=Alvis::Document::Type->new();
# my ($doc_type,$doc_sub_type)=$type_guesser->guess($alvisXML);
# my $doc_encoding=$e->guess_and_convert($alvisXML,$doc_type,$doc_sub_type, "UTF-8");
# if (!defined($doc_encoding))
# {
# die('Cannot guess. ' . $e->errmsg());
# }
# print STDERR "$doc_type,$doc_sub_type,$doc_encoding\n";
# print STDERR $e->guess($alvisXML);
# warn "Checking the encoding\n";
# if (!Encode::is_utf8($alvisXML)) {
# warn "Not a UTF-8, assume to be a latin-1 document\n";
# print STDERR "Converting in UTF8...\n";
# Encode::from_to($alvisXML, "iso-8859-1", "UTF-8");
# print STDERR "done\n";
# }
# print STDERR $alvisXML;
# exit;
# my $decoder = Encode::Guess->guess_encoding($alvisXML, /UTF-8/);
# if (!ref($decoder)) {
lib/Alvis/NLPPlatform/Convert.pm view on Meta::CPAN
# return 0;
}
sub outputting_alvis_from_file
{
my $alvisfile = shift;
my $Alvis_converter = shift;
my $config = shift;
open ALVISFILE, $alvisfile or die "No such file: $alvisfile\n";
# binmode(ALVISFILE, ":utf8");
binmode ALVISFILE; # XXXX
local $/ = undef;
my $alvisfile_data = <ALVISFILE>;
close ALVISFILE;
my $docs = Alvis::NLPPlatform::Document::get_documentRecords($alvisfile_data);
# print STDERR "doc_list : $docs\n";
lib/Alvis/NLPPlatform/Convert.pm view on Meta::CPAN
loglevel => 10)
or die "can't create ALVIS write-pipe for port '" . $config->{"alvis_connection"}->{"HARVESTER_PORT"} . "': $!";
my $tmp_spool_dir = $outputRootDir . "/0";
opendir DIR, $tmp_spool_dir;
while($xmlfile = readdir DIR) {
if (($xmlfile ne ".") && ($xmlfile ne "..")) {
open XMLFILE, "$tmp_spool_dir/$xmlfile" or die "Cannot open such file ($xmlfile)\n";
binmode(XMLFILE, ":utf8");
$xml_rec_doc = "";
while($line = <XMLFILE>) {
$xml_rec_doc .= $line;
}
$pipe_out->write($xml_rec_doc);
close XMLFILE;
unlink "$tmp_spool_dir/$xmlfile";
}
}
closedir(DIR);
lib/Alvis/NLPPlatform/Document.pm view on Meta::CPAN
$doc=$Parser->parse_file($xmlalvisfile);
};
if (!$@)
{
if ($doc)
{
my $xmlalvisdata = &get_language($doc);
open OUTPUT_FILE, ">$outfile";
binmode(OUTPUT_FILE, ":utf8");
print OUTPUT_FILE "$xmlalvisdata\n";
close(OUTPUT_FILE);
return($outfile);
}
else
{
warn "Parsing the doc failed.\n";
}
} else {
warn "Parsing the doc failed.\n";
lib/Alvis/NLPPlatform/MyReceiver.pm view on Meta::CPAN
package Alvis::NLPPlatform::MyReceiver;
#use Data::Dumper;
use strict;
use warnings;
use XML::Parser::PerlSAX;
# use utf8;
use Alvis::NLPPlatform::XMLEntities;
use Data::Dumper;
our $VERSION=$Alvis::NLPPlatform::VERSION;
###
### Package
###
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
$tok_ct=~s/\\n/\\n /go;
$tok_ct=~s/\\r/\\r /go;
$tok_ct=~s/\\t/\\t /go;
$corpus.=$tok_ct;
push @tab_tokens,$tok_ct;
}
$corpus_filename = $h_config->{'TMPFILE'} . ".corpus_en.txt";
open CORPUS,">$corpus_filename";
# binmode(CORPUS,":utf8");
print CORPUS Encode::encode_utf8($corpus);
close CORPUS;
print STDERR "done\n";
my $command_line;
if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
$command_line = $h_config->{'NLP_tools'}->{'NETAG_FR'} . " $corpus_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
} else {
$command_line = $h_config->{'NLP_tools'}->{'NETAG_EN'} . " $corpus_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
}
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
my $corpus_filename;
my $result_filename;
my $token_id_str;
my $word_id_str;
####
print STDERR " Word segmentation... ";
my $content;
# open CORPUS,">:utf8",$h_config->{'TMPFILE'} . ".corpus.tmp";
$corpus_filename = $h_config->{'TMPFILE'} . ".corpus_word.tmp";
$result_filename = $h_config->{'TMPFILE'} . ".words.tmp";
open CORPUS,">$corpus_filename";
# binmode(CORPUS);
# binmode(CORPUS, ":utf8");
foreach $token(Alvis::NLPPlatform::Annotation::sort(\%Alvis::NLPPlatform::hash_tokens)){
$content=$Alvis::NLPPlatform::hash_tokens{$token};
$content=~s/\\n/\n/og;
$content=~s/\\t/\t/og;
$content=~s/\\r/\r/og;
#Encode::decode_utf8("Å")
# $content =~ s/\x{65}/oe/g;
Alvis::NLPPlatform::XMLEntities::decode($content);
# Encode::from_to($content, "utf8", "iso-8859-1");
print CORPUS Encode::encode("iso-8859-1", $content, Encode::FB_DEFAULT);
# print CORPUS $content;
}
close CORPUS;
if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
$command_line = $h_config->{"NLP_tools"}->{'WORDSEG_FR'} . " < $corpus_filename > $result_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
}else{
$command_line = $h_config->{"NLP_tools"}->{'WORDSEG_EN'} . " < $corpus_filename > $result_filename 2>> ". $Alvis::NLPPlatform::ALVISLOGFILE;
}
`$command_line`;
open(MOTS, $result_filename) or warn "Can't open the file $result_filename";;
# binmode(MOTS,":utf8");
binmode(MOTS);
$token_id=1;
$word_id=1;
$token_id_str = "token$token_id";
while($proposedword=<MOTS>)
{
# $proposedword = Encode::encode_utf8($proposedword);
$word_id_str = "word$word_id";
# if ($proposedword !~ /^[\s ]*\n$/o) {
if ($proposedword !~ /^[\s\x{A0}]*\n$/o) {
chomp $proposedword;
# print STDERR $proposedword;
$current_word="";
$doc_hash->{$word_id_str}={};
$doc_hash->{$word_id_str}->{'id'}=$word_id_str;
$doc_hash->{$word_id_str}->{'datatype'}='word';
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
print STDERR " Part-Of-Speech tagging..";
open CORPUS,">$corpus_filename";
# binmode(CORPUS,":encoding(latin1)");
# TH - 16/07/2007 - replacement of hash_words by hash_words_punct
my $fullcontent = "";
foreach $word (Alvis::NLPPlatform::Annotation::sort(\%Alvis::NLPPlatform::hash_words_punct)){
$cont=$Alvis::NLPPlatform::hash_words_punct{$word};
$fullcontent .= Encode::encode("iso-8859-1", $cont, Encode::FB_DEFAULT);
$fullcontent .= "\n";
# Encode::from_to($cont, "utf8", "iso-8859-1");
# $fullcontent .= "$cont\n";
}
print CORPUS $fullcontent;
close CORPUS;
my $command_line;
if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
$command_line = $h_config->{'NLP_tools'}->{'POSTAG_FR'} . " < $corpus_filename > $result_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
}else{
$command_line = $h_config->{'NLP_tools'}->{'POSTAG_EN'} . " < $corpus_filename > $result_filename 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
binmode(TAGS); #, ":encoding(latin9)");
$word_id=0;
my $decal = 0;
my $wordecal;
my $word_punct_id = 1;
$word_punct_id_str = "word$word_punct_id";
while ($line = <TAGS>) {
# Read $Alvis::NLPPlatform::hash_words_punct{"word$word_punct"}
# Encode::from_to($line, "iso-8859-9", "utf8");
$line = Encode::decode("latin9",$line);
chomp $line;
($inflected, $tag, $lemma) = split /\t/, $line;
$word_id = $word_punct_id + $decal;
$word_id_str = "word$word_id";
if ((!exists $Alvis::NLPPlatform::hash_words{$word_id_str}) || ($Alvis::NLPPlatform::hash_words_punct{$word_punct_id_str} ne $Alvis::NLPPlatform::hash_words{$word_id_str})) {
# it is not a word
# punctuation, delay incrementation of index "decal"
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
push @{$doc_hash->{"log_processing1"}->{"comments"}}, "Found POS Tags: " . $word_id ;
}
# sub pos_tag # WRAPPER FOR BRILL
# {
# my $word;
# my $cont;
# print STDERR " Part-Of-Speech tagging...";
# open CORPUS,">$TMPFILE.corpus.tmp";
# binmode(CORPUS,":utf8");
# foreach $word(sort Alvis::NLPPlatform::Annotation::sort_keys keys %Alvis::NLPPlatform::hash_words){
# $cont=$Alvis::NLPPlatform::hash_words{$word};
# print CORPUS "$cont ";
# if($cont eq "."){
# print CORPUS "\n";
# }
# }
# close CORPUS;
# }
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
my ($class, $h_config, $doc_hash) = @_;
print STDERR " Semantic tagging... ";
my $in_fn = $h_config->{'TMPFILE'} . ".ast.in";
if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
# French parser command line
}else{
open DOC,">$in_fn";
binmode(DOC,":utf8");
Alvis::NLPPlatform::Annotation::render_xml($doc_hash, \*DOC, 1);
close DOC;
my $cmdline = $h_config->{'NLP_tools'}->{'SEMTAG_EN'} . " $in_fn > " . $h_config->{'TMPFILE'} . ".ast.out 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;
# print STDERR "$cmdline\n";
`$cmdline`;
$Alvis::NLPPlatform::ALVISDEBUG || unlink $h_config->{'TMPFILE'} . ".ast.in";
$Alvis::NLPPlatform::ALVISDEBUG || unlink $h_config->{'TMPFILE'} . ".ast.out";
# $semtagout == doc XML enriched-document
lib/Alvis/NLPPlatform/UserNLPWrappers.pm view on Meta::CPAN
my $min;
my $max;
my $btw_start;
my $btw_end;
my $token;
my $sentence_cont;
print STDERR " Performing term extraction... \n";
open CORPUS, ">>" . $h_config->{"TMPFILE"} . ".corpus.yatea.tmp";
binmode(CORPUS, ":utf8");
print CORPUS $Alvis::NLPPlatform::Annotation::document_record_id . "\tDOCUMENT\t" . $Alvis::NLPPlatform::Annotation::document_record_id . "\n" ;
&PrintOutputTreeTagger($h_config, $doc_hash, \*CORPUS);
close CORPUS;
# if ((exists $h_config->{"XML_OUTPUT"}->{"YATEA"}) && ($h_config->{"XML_OUTPUT"}->{"YATEA"} == 1)) {
# %$doc_hash = ();
# %Alvis::NLPPlatform::hash_tokens = ();
lib/Alvis/NLPPlatform/UserNLPWrappers.pm view on Meta::CPAN
# $sentences_cont .= "</command>";
# $sentences_cont = "<command string=\"ask\">\n" . $sentences_cont;
# $sentences_cont .= "</command>";
# $sentences_cont = "<command string=\"walls\">\n" . $sentences_cont;
# $sentences_cont .= "</command>";
# $sentences_cont = "<command string=\"union\">\n" . $sentences_cont;
# $sentences_cont .= "</command>";
open CORPUS, ">" . $h_config->{"TMPFILE"} . ".corpus.tmp";
print CORPUS Encode::encode_utf8($sentences_cont);
# print CORPUS $sentences_cont;
close CORPUS;
my $command_line;
my $command_line2;
my $command_line3;
if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
# French parser command line
}else{
$command_line = $h_config->{'NLP_tools'}->{'SYNTACTIC_ANALYSIS_EN'} . " < " . $h_config->{'TMPFILE'} . ".corpus.tmp > " . $h_config->{'TMPFILE'} . ".result.tmp.1 2>> " . $Alvis::NLPPlatform::ALVISLOGFILE;