Alvis-NLPPlatform
view release on metacpan or search on metacpan
lib/Alvis/NLPPlatform/UserNLPWrappers.pm view on Meta::CPAN
my $token_start;
my $token_end;
my $relation;
my $left_wall;
my $right_wall;
my $relation_id;
my @arr_tokens;
my $last_token;
my $wordidshift=0;
my $phrase_idx=$Alvis::NLPPlatform::Annotation::phrase_idx;
my $word;
my $word_cont;
my $word_id;
my $i;
my $sentences_cont="";
my @tab_word_punct;
my @tab_word;
my $idx_tab_word_punct=1;
my $idx_tab_word=1;
my @tab_mapping;
# print out words+punct and fill in a tab
push @tab_word_punct," ";
push @tab_word," ";
my $decal=1;
my $searchterm;
my $sti;
my $word_np;
my @tab_tmp;
my $tmp_sp;
my $spi=0;
my $termsfound=0;
my $stubs=0;
my $skip=0;
my @tab_start_term=();
my @tab_end_term=();
my $constituents;
my $nb_constituents;
my $min;
my $max;
my $btw_start;
my $btw_end;
my $token;
my $sentence_cont;
print STDERR " Performing term extraction... \n";
open CORPUS, ">>" . $h_config->{"TMPFILE"} . ".corpus.yatea.tmp";
binmode(CORPUS, ":utf8");
print CORPUS $Alvis::NLPPlatform::Annotation::document_record_id . "\tDOCUMENT\t" . $Alvis::NLPPlatform::Annotation::document_record_id . "\n" ;
&PrintOutputTreeTagger($h_config, $doc_hash, \*CORPUS);
close CORPUS;
# if ((exists $h_config->{"XML_OUTPUT"}->{"YATEA"}) && ($h_config->{"XML_OUTPUT"}->{"YATEA"} == 1)) {
# %$doc_hash = ();
# %Alvis::NLPPlatform::hash_tokens = ();
# %Alvis::NLPPlatform::hash_words = ();
# %Alvis::NLPPlatform::hash_words_punct = ();
# %Alvis::NLPPlatform::hash_sentences = ();
# %Alvis::NLPPlatform::hash_postags = ();
# %Alvis::NLPPlatform::hash_named_entities = ();
# %Alvis::NLPPlatform::hash_lemmas = ();
# $Alvis::NLPPlatform::number_of_words = 0;
# $Alvis::NLPPlatform::number_of_sentences = 0;
# $Alvis::NLPPlatform::nb_relations = 0;
# $Alvis::NLPPlatform::dont_annotate = 0;
# @Alvis::NLPPlatform::word_start = ();
# @Alvis::NLPPlatform::word_end = ();
# @Alvis::NLPPlatform::en_start = ();
# @Alvis::NLPPlatform::en_end = ();
# @Alvis::NLPPlatform::en_type = ();
# @Alvis::NLPPlatform::en_tokens_start = ();
# @Alvis::NLPPlatform::en_tokens_end = ();
# %Alvis::NLPPlatform::en_tokens_hash = ();
# }
if ( $Alvis::NLPPlatform::last_doc == 0) {
return(1);
}
require Lingua::YaTeA::Corpus;
require Lingua::YaTeA;
my %config_yatea = Lingua::YaTeA::load_config($h_config->{'NLP_tools'}->{'YATEARC'});
my $yatea = Lingua::YaTeA->new($config_yatea{"OPTIONS"}, \%config_yatea);
if (defined $h_config->{'NLP_tools'}->{'YATEAOUTPUT'}) {
print STDERR "\nYaTeA output defined is " . $h_config->{'NLP_tools'}->{'YATEAOUTPUT'} . "\n\n";
$yatea->getOptionSet->addOption("output-path", $h_config->{'NLP_tools'}->{'YATEAOUTPUT'});
} else {
print STDERR "\nNo YaTeA output defined\n\n";
$yatea->getOptionSet->addOption("output-path", $h_config->{"ALVISTMP"});
}
my $corpus_path = $h_config->{"TMPFILE"} . ".corpus.yatea.tmp";
my $corpus = Lingua::YaTeA::Corpus->new($corpus_path,$yatea->getOptionSet,$yatea->getMessageSet);
########################################################################
lib/Alvis/NLPPlatform/UserNLPWrappers.pm view on Meta::CPAN
if(($kref>0)&&($tmp1_within==1)&&($Alvis::NLPPlatform::found_terms_phr[($kref-1)]!=-666)){
$doc_hash->{"syntactic_relation$relation_id"}->{'refid_head'}->{"refid_phrase"}="phrase".$Alvis::NLPPlatform::found_terms_phr[($kref-1)];
# print STDERR "\n\nSize: ".scalar @Alvis::NLPPlatform::found_terms_phr."\n";
# print STDERR "Index: $kref\n\n";
}else{
$doc_hash->{"syntactic_relation$relation_id"}->{'refid_head'}->{"refid_word"}="word".$tab_mapping[($token_start+$wordidshift)];
}
# $doc_hash->{"syntactic_relation$relation_id"}->{'refid_head'}="word".$tab_mapping[($token_start+$wordidshift)];
$doc_hash->{"syntactic_relation$relation_id"}->{'refid_modifier'} = {};
$doc_hash->{"syntactic_relation$relation_id"}->{'refid_modifier'}->{'datatype'}="refid_modifier";
if(($kref>0)&&($tmp2_within==1)&&($Alvis::NLPPlatform::found_terms_phr[($kref-1)]!=-666)){
$doc_hash->{"syntactic_relation$relation_id"}->{'refid_modifier'}->{"refid_phrase"}="phrase".$Alvis::NLPPlatform::found_terms_phr[($kref-1)];
# print STDERR "\n\nIndex: $kref\n\n";
}else{
$doc_hash->{"syntactic_relation$relation_id"}->{'refid_modifier'}->{"refid_word"}="word".$tab_mapping[($token_end+$wordidshift)];
}
# $doc_hash->{"syntactic_relation$relation_id"}->{'refid_modifier'}="word".$tab_mapping[($token_end+$wordidshift)];
$relation_id++;
}
}
}
}
# trash everything and continue the loop
$insentence=0;
$wordidshift+=$last_token-1;
}
}
}
close CONSTITUENT_OUTPUT;
close SYN_RES;
# print STDERR $h_config->{'TMPFILE'}. ".corpus.tmp" . "\n";
$Alvis::NLPPlatform::ALVISDEBUG || unlink $h_config->{'TMPFILE'}. ".corpus.tmp";
$Alvis::NLPPlatform::ALVISDEBUG || unlink $h_config->{'TMPFILE'} . ".result.tmp";
$Alvis::NLPPlatform::nb_relations=$relation_id-1;
$Alvis::NLPPlatform::Annotation::phrase_idx=$phrase_idx;
print STDERR "done - Found $Alvis::NLPPlatform::nb_relations relations, $termsfound full terms, $nb_constituents constituents.\n";
push @{$doc_hash->{"log_processing1"}->{"comments"}}, "Found Syntactic Relations : " . $Alvis::NLPPlatform::nb_relations;
}
sub clean_bioLG {
my ($infile, $outfile) = @_;
my $line = "";
my $sentence_counter = 0;
my $linkage_counter = 0;
my @linkage_output;
# my $line_prec = "";
open INFILE, $infile or die "No such file $infile\n";
binmode INFILE;
open OUTFILE, ">$outfile" or die "No such file $outfile\n";
# puts the text on only one line
do {
# We first remove the outputting input
while((defined ($line = <INFILE>)) && ($line !~ /^\+\+\+\+Time/o)) {
# print $line;
# $line_prec = $line;
};
if ((defined $line) && ($line =~ /^\+\+\+\+Time/o)) {
$linkage_counter = 0;
@linkage_output = ();
do {
#We remove the postscript output until we found constituent part
while((defined ($line = <INFILE>)) && ($line !~ /^\[/o)) {
# nothing
}
# we print the output until we find the next postscript part
$linkage_output[$linkage_counter] = $line;
# print $line;
while((defined ($line = <INFILE>)) && ($line ne "diagram\n")) {
# print STDERR "=> $line\n";
$linkage_output[$linkage_counter] .= $line;
# print $line;
}
# we remove the next postscript part
while ((defined ($line =<INFILE>)) && ($line ne "%%EndDocument\n")) {
# nothing
}
$line = <INFILE>;
$linkage_output[$linkage_counter] .= "\n";
$linkage_counter++;
# print "\n";
# Next Linkage ?
} while((defined ($line = <INFILE>)) && ($line =~ /^%!PS-Adobe/o));
# we print the constituent
print OUTFILE "[Sentence " . $sentence_counter . "]\n";
$sentence_counter++;
for($linkage_counter = 0; $linkage_counter < scalar(@linkage_output); $linkage_counter++) {
print OUTFILE "[Linkage " . $linkage_counter ."]\n";
print OUTFILE $linkage_output[$linkage_counter];
print OUTFILE "$line\n";
}
# we remove all it remains
while((defined ($line = <INFILE>)) && ($line ne "Press RETURN for the next linkage.\n")) {
#nothing
}
}
} while ($line = <INFILE>);
close INFILE;
close OUTFILE;
return 0;
}
( run in 0.799 second using v1.01-cache-2.11-cpan-39bf76dae61 )