Alvis-NLPPlatform

 view release on metacpan or  search on metacpan

lib/Alvis/NLPPlatform.pm  view on Meta::CPAN

    if (defined $config->{"CONVERTERS"}) {
	print STDERR "  Section INPUT CONVERTERS\n";

	my %Converter_vars = ("SupplMagicFile" => "File for Additional Definition of Magic Number",
	);
	foreach $var (keys %Converter_vars) {
	    if (defined $config->{"CONVERTERS"}->{$var}) { 
		print STDERR "\t" . $Converter_vars{$var} . " : " . $config->{"CONVERTERS"}->{$var} . "\n";
	    }
	}
	print STDERR "\tRecognized formats:\n";
	$Converter_vars{"STYLESHEET"} = 1;
	my $format;
	foreach $format (keys %{$config->{"CONVERTERS"}}) {
	    if (!exists($Converter_vars{$format})) {
		print STDERR "\t\t$format\n";
	    }
	}

    }
    
}

sub client
{

    my ($rcfile) = @_;

    my %config = Alvis::NLPPlatform::load_config($rcfile);

    $nlp_host = $config{"NLP_connection"}->{"SERVER"};
    $nlp_port = $config{"NLP_connection"}->{"PORT"};
    $connection_retry=$config{"alvis_connection"}->{"RETRY_CONNECTION"};

    my $line;
    my $doc_xml_size;
    my $doc_xml;
#    my $connection_retry;
    my $sock=0;
    my $time_render;
    my $sig_handler = "";

    while(1) {
	
	# to not stop the connection (should crash the server)
	$sig_handler = $SIG{'INT'};
	$SIG{'INT'}='IGNORE'; # to prevent zombification
	
	$connection_retry=$config{"alvis_connection"}->{"RETRY_CONNECTION"};
	do {
	    $sock=new IO::Socket::INET( PeerAddr => $nlp_host,
					PeerPort => $nlp_port,
					Proto => 'tcp');
	    
	    warn "Could not create socket: $! \n" unless $sock;
	    $connection_retry--;
	    sleep(1);
	} while(!defined($sock) && ($connection_retry >0));
	
	if ($connection_retry ==0) {
	    die "Timeout. Could not create socket: $! \n";
	}
#     $sock=new IO::Socket::INET( PeerAddr => $nlp_host,
# 				PeerPort => $nlp_port,
# 				Proto => 'tcp');

#     die "Could not create socket: $!\n" unless $sock;
	$sock -> autoflush(1); ###############
 	binmode($sock, ":utf8");
	print STDERR `date`;
	print STDERR "Established connection to server.\n";
	
	print STDERR "Requesting document...";
	print $sock "REQUEST\n";
	print STDERR "done.\n";

	print STDERR "Receiving document...\n";

# SENDING $id
			    
	while($line = <$sock>) {
	    print STDERR "$line";
	    $line=uc $line;
	    if ($line =~ /SENDING ([^\n]+)\n/) {
		$id = $1;
		last;
	    } else {
		warn "Out of protocol message\n";
		close $sock;
		next;
	    }
	}

	print STDERR "GETTING $id\n";

# SIZE of $doc_xml

	while ($line = <$sock>) {
	    print STDERR "$line";
	    $line=uc $line;
	    if ($line =~ /SIZE ([^\n]+)\n/) {
		$doc_xml_size = $1;
		last;
	    } else {
		warn "Out of protocol message\n";
		close $sock;
		next;
	    }
	}
	
	print STDERR "READING $doc_xml_size bytes\n";
	$doc_xml = "";
	print STDERR length($doc_xml) . "\r";
	while ((defined $sock) && ($line = <$sock>) &&  ($line ne "<DONE>\n")) { #  (length($doc_xml) < $doc_xml_size) &&
	    print STDERR length($doc_xml) . "\r";
	    $doc_xml .= $line;
	}
	if (length($doc_xml) > $doc_xml_size) {
	    warn "Received more bytes than expected\n";
	}
	print STDERR length($doc_xml) . "\n";
	print STDERR "\n";
	print STDERR "READING $id done.\n";
	print STDERR "Sending ACK...";
	print $sock "ACK\n";
	print STDERR "done.\n";
	
	close $sock;

	# restore the normal behaviour
	$SIG{'INT'} = \&sigint_handler;

	print STDERR "Processing $id";
	
	my $doc_hash;
    
	Alvis::NLPPlatform::starttimer();
	$doc_hash=Alvis::NLPPlatform::Annotation::load_xml($doc_xml, \%config);
	my $time_load+=Alvis::NLPPlatform::endtimer();

	# Recording computing data (time and entity size)
	# init
#     $doc_hash->{"log_processing"} = {};
	$doc_hash->{"log_processing0"}->{"datatype"}="log_processing";
	$doc_hash->{"log_processing0"}->{"log_id"} = "time";
	$doc_hash->{"log_processing1"}->{"datatype"}="log_processing";
	$doc_hash->{"log_processing1"}->{"log_id"} = "element_size";
	
    # Recording statistical data (time and entity size)
    # XML loading time
	my @tmp_c;;
	$doc_hash->{"log_processing0"}->{"comments"} = \@tmp_c;
	
	push @{$doc_hash->{"log_processing0"}->{"comments"}},  "XML loading Time : $time_load";
	
	my @tmp_d;;
	$doc_hash->{"log_processing1"}->{"comments"} = \@tmp_d;
	
	
	$doc_hash = Alvis::NLPPlatform::client_main($doc_hash, \%config);
	
	# to not stop the connection (should crash the server)
	$sig_handler = $SIG{'INT'};
	$SIG{'INT'}='IGNORE'; # to prevent zombification

	$connection_retry=$config{"alvis_connection"}->{"RETRY_CONNECTION"};
	do {
	    $sock=new IO::Socket::INET( PeerAddr => $nlp_host,
					PeerPort => $nlp_port,
					Proto => 'tcp');
	    
	    warn "Could not create socket: $! \n" unless $sock;
	    $connection_retry--;
	    sleep(1);
	} while(!defined($sock) && ($connection_retry >0));
	
	if ($connection_retry ==0) {
	    die "Timeout. Could not create socket: $! \n";
	}
	binmode $sock, ":utf8";
	
	print STDERR "Established connection to server.\n";
	
	print STDERR "Giving back annotated document...\n";
	# Communitation with the server
	print $sock "GIVEBACK\n$id\n";
	
	# Save to XML file

	print STDERR "\tRendering XML...  ";

	starttimer();
	$time_render = 0;
	push @{$doc_hash->{"log_processing0"}->{"comments"}},  "XML rendering Time : \@RENDER_TIME_NOT_SET\@";
	Alvis::NLPPlatform::Annotation::render_xml($doc_hash, $sock, 1,\%config);
	$time_render+=endtimer();

# TODO : recording the xml rendering time
	print STDERR "done\n";
    
	print $sock "<DONE>\n";
	
	print STDERR "done.\n";
	
	# the render time is sent

	print $sock "RENDER TIME\n$time_render\n";

	print STDERR "Awaiting acknowledgement...";
	my $line;
	while($line=<$sock>){
	    chomp $line;
	    $line=uc $line;
	    if($line=~/ACK/gi){
		close($sock);
		last;
	    }	}
	print STDERR "OK.\n";

	close($sock);

	# restore the normal behaviour
	$SIG{'INT'} = $sig_handler;
	print STDERR "Closed connection to server.\n";
    }
    return($time_render);
}


sub sigint_handler {

    my ($signal) = @_;
    my $sock;

#     $nlp_host = $r_config->{"NLP_connection"}->{"SERVER"};
#     $nlp_port = $r_config->{"NLP_connection"}->{"PORT"};


    warn "Receiving SIGINT -- Aborting NL processing\n";

    

    do {
	$sock=new IO::Socket::INET( PeerAddr => $nlp_host,
				    PeerPort => $nlp_port,
				    Proto => 'tcp');

	warn "Could not create socket: $! \n" unless $sock;
	$connection_retry--;
	sleep(1);
    } while(!defined($sock) && ($connection_retry >0));

    if ($connection_retry ==0) {
	die "Timeout. Could not create socket: $! \n";
    }
    $sock -> autoflush(1); ###############
    binmode $sock, ":utf8";


    print STDERR "Established connection to server.\n";

    print STDERR "Sending aborting message\n";

    print $sock "ABORTING\n$id\n";

    print STDERR "Aborting message sent\n";

    print STDERR "Awaiting acknowledgement...";
    my $line;
    while($line=<$sock>){
	chomp $line;
	$line=uc $line;
	if($line=~/ACK/gi){
	    close($sock);
	    last;
	}
    }
    print STDERR "OK.\n";

    close($sock);
    exit;
}


sub server 
{
    my ($rcfile) = @_;

    print STDERR "config File : $rcfile \n";

    my %config = Alvis::NLPPlatform::load_config($rcfile);

     $nlp_host = $config{"NLP_connection"}->{"SERVER"};
     $nlp_port = $config{"NLP_connection"}->{"PORT"};
     $connection_retry = $config{"alvis_connection"}->{"RETRY_CONNECTION"};
#    print STDERR Dumper(\%config);

    my $charset = 'UTF-8';

    #  header and footer

    my $xmlhead="<?xml version=\"1.0\" encoding=\"$charset\"?>\n<documentCollection xmlns=\"http://alvis.info/enriched/\" version=\"1.1\">\n";
    my $xmlfoot="</documentCollection>\n";

    # connection to the crawler

    my $pipe = new Alvis::Pipeline::Read(port => $config{"alvis_connection"}->{"HARVESTER_PORT"}, spooldir => $config{"alvis_connection"}->{"SPOOLDIR"},
					 loglevel=>10)
	or die "can't create read-pipe on port " . $config{"alvis_connection"}->{"HARVESTER_PORT"} . ": $!";

    $|=1;

    touch($config{"ALVISTMP"} . "/.proc_id");

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 1.798 second using v1.00-cache-2.02-grep-82fe00e-cpan-48ebf85a1963 )