Alvis-NLPPlatform
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
if (defined $config->{"CONVERTERS"}) {
print STDERR " Section INPUT CONVERTERS\n";
my %Converter_vars = ("SupplMagicFile" => "File for Additional Definition of Magic Number",
);
foreach $var (keys %Converter_vars) {
if (defined $config->{"CONVERTERS"}->{$var}) {
print STDERR "\t" . $Converter_vars{$var} . " : " . $config->{"CONVERTERS"}->{$var} . "\n";
}
}
print STDERR "\tRecognized formats:\n";
$Converter_vars{"STYLESHEET"} = 1;
my $format;
foreach $format (keys %{$config->{"CONVERTERS"}}) {
if (!exists($Converter_vars{$format})) {
print STDERR "\t\t$format\n";
}
}
}
}
sub client
{
my ($rcfile) = @_;
my %config = Alvis::NLPPlatform::load_config($rcfile);
$nlp_host = $config{"NLP_connection"}->{"SERVER"};
$nlp_port = $config{"NLP_connection"}->{"PORT"};
$connection_retry=$config{"alvis_connection"}->{"RETRY_CONNECTION"};
my $line;
my $doc_xml_size;
my $doc_xml;
# my $connection_retry;
my $sock=0;
my $time_render;
my $sig_handler = "";
while(1) {
# to not stop the connection (should crash the server)
$sig_handler = $SIG{'INT'};
$SIG{'INT'}='IGNORE'; # to prevent zombification
$connection_retry=$config{"alvis_connection"}->{"RETRY_CONNECTION"};
do {
$sock=new IO::Socket::INET( PeerAddr => $nlp_host,
PeerPort => $nlp_port,
Proto => 'tcp');
warn "Could not create socket: $! \n" unless $sock;
$connection_retry--;
sleep(1);
} while(!defined($sock) && ($connection_retry >0));
if ($connection_retry ==0) {
die "Timeout. Could not create socket: $! \n";
}
# $sock=new IO::Socket::INET( PeerAddr => $nlp_host,
# PeerPort => $nlp_port,
# Proto => 'tcp');
# die "Could not create socket: $!\n" unless $sock;
$sock -> autoflush(1); ###############
binmode($sock, ":utf8");
print STDERR `date`;
print STDERR "Established connection to server.\n";
print STDERR "Requesting document...";
print $sock "REQUEST\n";
print STDERR "done.\n";
print STDERR "Receiving document...\n";
# SENDING $id
while($line = <$sock>) {
print STDERR "$line";
$line=uc $line;
if ($line =~ /SENDING ([^\n]+)\n/) {
$id = $1;
last;
} else {
warn "Out of protocol message\n";
close $sock;
next;
}
}
print STDERR "GETTING $id\n";
# SIZE of $doc_xml
while ($line = <$sock>) {
print STDERR "$line";
$line=uc $line;
if ($line =~ /SIZE ([^\n]+)\n/) {
$doc_xml_size = $1;
last;
} else {
warn "Out of protocol message\n";
close $sock;
next;
}
}
print STDERR "READING $doc_xml_size bytes\n";
$doc_xml = "";
print STDERR length($doc_xml) . "\r";
while ((defined $sock) && ($line = <$sock>) && ($line ne "<DONE>\n")) { # (length($doc_xml) < $doc_xml_size) &&
print STDERR length($doc_xml) . "\r";
$doc_xml .= $line;
}
if (length($doc_xml) > $doc_xml_size) {
warn "Received more bytes than expected\n";
}
print STDERR length($doc_xml) . "\n";
print STDERR "\n";
print STDERR "READING $id done.\n";
print STDERR "Sending ACK...";
print $sock "ACK\n";
print STDERR "done.\n";
close $sock;
# restore the normal behaviour
$SIG{'INT'} = \&sigint_handler;
print STDERR "Processing $id";
my $doc_hash;
Alvis::NLPPlatform::starttimer();
$doc_hash=Alvis::NLPPlatform::Annotation::load_xml($doc_xml, \%config);
my $time_load+=Alvis::NLPPlatform::endtimer();
# Recording computing data (time and entity size)
# init
# $doc_hash->{"log_processing"} = {};
$doc_hash->{"log_processing0"}->{"datatype"}="log_processing";
$doc_hash->{"log_processing0"}->{"log_id"} = "time";
$doc_hash->{"log_processing1"}->{"datatype"}="log_processing";
$doc_hash->{"log_processing1"}->{"log_id"} = "element_size";
# Recording statistical data (time and entity size)
# XML loading time
my @tmp_c;;
$doc_hash->{"log_processing0"}->{"comments"} = \@tmp_c;
push @{$doc_hash->{"log_processing0"}->{"comments"}}, "XML loading Time : $time_load";
my @tmp_d;;
$doc_hash->{"log_processing1"}->{"comments"} = \@tmp_d;
$doc_hash = Alvis::NLPPlatform::client_main($doc_hash, \%config);
# to not stop the connection (should crash the server)
$sig_handler = $SIG{'INT'};
$SIG{'INT'}='IGNORE'; # to prevent zombification
$connection_retry=$config{"alvis_connection"}->{"RETRY_CONNECTION"};
do {
$sock=new IO::Socket::INET( PeerAddr => $nlp_host,
PeerPort => $nlp_port,
Proto => 'tcp');
warn "Could not create socket: $! \n" unless $sock;
$connection_retry--;
sleep(1);
} while(!defined($sock) && ($connection_retry >0));
if ($connection_retry ==0) {
die "Timeout. Could not create socket: $! \n";
}
binmode $sock, ":utf8";
print STDERR "Established connection to server.\n";
print STDERR "Giving back annotated document...\n";
# Communitation with the server
print $sock "GIVEBACK\n$id\n";
# Save to XML file
print STDERR "\tRendering XML... ";
starttimer();
$time_render = 0;
push @{$doc_hash->{"log_processing0"}->{"comments"}}, "XML rendering Time : \@RENDER_TIME_NOT_SET\@";
Alvis::NLPPlatform::Annotation::render_xml($doc_hash, $sock, 1,\%config);
$time_render+=endtimer();
# TODO : recording the xml rendering time
print STDERR "done\n";
print $sock "<DONE>\n";
print STDERR "done.\n";
# the render time is sent
print $sock "RENDER TIME\n$time_render\n";
print STDERR "Awaiting acknowledgement...";
my $line;
while($line=<$sock>){
chomp $line;
$line=uc $line;
if($line=~/ACK/gi){
close($sock);
last;
} }
print STDERR "OK.\n";
close($sock);
# restore the normal behaviour
$SIG{'INT'} = $sig_handler;
print STDERR "Closed connection to server.\n";
}
return($time_render);
}
sub sigint_handler {
my ($signal) = @_;
my $sock;
# $nlp_host = $r_config->{"NLP_connection"}->{"SERVER"};
# $nlp_port = $r_config->{"NLP_connection"}->{"PORT"};
warn "Receiving SIGINT -- Aborting NL processing\n";
do {
$sock=new IO::Socket::INET( PeerAddr => $nlp_host,
PeerPort => $nlp_port,
Proto => 'tcp');
warn "Could not create socket: $! \n" unless $sock;
$connection_retry--;
sleep(1);
} while(!defined($sock) && ($connection_retry >0));
if ($connection_retry ==0) {
die "Timeout. Could not create socket: $! \n";
}
$sock -> autoflush(1); ###############
binmode $sock, ":utf8";
print STDERR "Established connection to server.\n";
print STDERR "Sending aborting message\n";
print $sock "ABORTING\n$id\n";
print STDERR "Aborting message sent\n";
print STDERR "Awaiting acknowledgement...";
my $line;
while($line=<$sock>){
chomp $line;
$line=uc $line;
if($line=~/ACK/gi){
close($sock);
last;
}
}
print STDERR "OK.\n";
close($sock);
exit;
}
sub server
{
my ($rcfile) = @_;
print STDERR "config File : $rcfile \n";
my %config = Alvis::NLPPlatform::load_config($rcfile);
$nlp_host = $config{"NLP_connection"}->{"SERVER"};
$nlp_port = $config{"NLP_connection"}->{"PORT"};
$connection_retry = $config{"alvis_connection"}->{"RETRY_CONNECTION"};
# print STDERR Dumper(\%config);
my $charset = 'UTF-8';
# header and footer
my $xmlhead="<?xml version=\"1.0\" encoding=\"$charset\"?>\n<documentCollection xmlns=\"http://alvis.info/enriched/\" version=\"1.1\">\n";
my $xmlfoot="</documentCollection>\n";
# connection to the crawler
my $pipe = new Alvis::Pipeline::Read(port => $config{"alvis_connection"}->{"HARVESTER_PORT"}, spooldir => $config{"alvis_connection"}->{"SPOOLDIR"},
loglevel=>10)
or die "can't create read-pipe on port " . $config{"alvis_connection"}->{"HARVESTER_PORT"} . ": $!";
$|=1;
touch($config{"ALVISTMP"} . "/.proc_id");
view all matches for this distributionview release on metacpan - search on metacpan
( run in 1.798 second using v1.00-cache-2.02-grep-82fe00e-cpan-48ebf85a1963 )