Lingua-Ogmios
view release on metacpan or search on metacpan
lib/Lingua/Ogmios/NLPWrappers/LexicalSyntacticPatterns.pm view on Meta::CPAN
my $term2;
my @terms1;
my @terms2;
my $document;
my $relation;
warn "[LOG] LexicalSyntacticPatterns\n";
$lang = $self->_documentSet->[0]->getAnnotations->getLanguage;
warn "open " . $self->_input_filename . " (2)\n";
open FILEINPUT, "<:utf8", $self->_input_filename or die "No such file " . $self->_input_filename;
# open FILEINPUT, "<:utf8", "/home/thierry/Recherche/Projets/2011AIR-REACH/Data/GuidesV2/ogmios.metis.30920.LexicalSyntacticPatterns.in" or die "No such file " . "/home/thierry/Recherche/Projets/2011AIR-REACH/Data/GuidesV2/ogmios.metis.30920.Lexi...
# open FILEINPUT, "<:utf8", "/export/home/limbio/hamon/Research/Projets/2011AIR-REACH/Data/GuidesV2/ogmios.metis.30920.LexicalSyntacticPatterns.in" or die "No such file " . "/export/home/limbio/hamon/Research/Projets/2011AIR-REACH/Data/GuidesV2/o...
# $lang = $self->_documentSet->[0]->getAnnotations->getLanguage;
while ($sentence = <FILEINPUT>) {
chomp $sentence;
# warn "--> $sentence\n";
foreach $pattern (@{$self->{"Resources"}->{$lang}->{"PATTERNS"}->[1]}) {
# warn $pattern->{'pattern'} . "\n";
# foreach $sentence (@{$self->_input_hash->{'sentences'}}) {
if ($sentence =~ m!$pattern->{'pattern'}!) {
# warn "OK\n";
# warn $pattern->{'element1'}->{'role'} . ": " . $+{$pattern->{'element1'}->{'ID'}} . "\n";
# warn $pattern->{'element2'}->{'role'} . ": " . $+{$pattern->{'element2'}->{'ID'}} . "\n";
if ($pattern->{'element1'}->{'type'} eq "list") {
($document, @terms1) = $self->_extractTermsFromList($lang, $+{$pattern->{'element1'}->{'ID'}});
} else {
($term1, $document) = $self->_extractInfos($+{$pattern->{'element1'}->{'ID'}});
@terms1 = ($term1);
}
if ($pattern->{'element2'}->{'type'} eq "list") {
($document, @terms2) = $self->_extractTermsFromList($lang, $+{$pattern->{'element2'}->{'ID'}});
} else {
($term2, $document) = $self->_extractInfos($+{$pattern->{'element2'}->{'ID'}});
@terms2 = ($term2);
}
foreach $term1 (@terms1) {
foreach $term2 (@terms2) {
$relation = $self->_addRelation($document, $pattern->{'relation'} . " (" . $pattern->{'element1'}->{'role'} . "-" . $pattern->{'element2'}->{'role'} . ")", [$term1, $term2]);
if (defined $relation) {
# warn "==>" . $relation->getId . " " . $pattern->{'relation'} . "\n";
push @{$self->_addedElements_array}, [$relation, $document->getId];
}
}
}
}
}
}
close FILEINPUT;
warn "[LOG]\n";
}
sub _extractInfos {
my ($self, $string) = @_;
my ($IF, $POSTAG, $LM, $ID, $DOCID, $SEMTAG) = split m!/!, $string;
my $document;
my $word;
my $termUnit;
$document = $self->_documentSet->[$DOCID];
if ($POSTAG eq "term") {
$termUnit = $document->getAnnotations->getSemanticUnitLevel->getElementById($ID);
} else {
# creation of term
$word = $document->getAnnotations->getWordLevel->getElementById($ID);
$termUnit = Lingua::Ogmios::Annotations::SemanticUnit->newTerm(
{'form' => $word->getForm,
'refid_word' => $word,
});
$document->getAnnotations->addSemanticUnit($termUnit);
}
return($termUnit, $document);
}
sub _extractTermsFromList {
my ($self, $lang, $list) = @_;
my $term_pattern = $self->{"Resources"}->{$lang}->{"PATTERNS"}->[2];
my $term_string;
my $term;
my @terms;
my $document;
my $docid;
while($list =~ / ?($term_pattern)/gc) {
# warn "\t$1\n";
$term_string = $1;
($term, $document) = $self->_extractInfos($term_string);
push @terms, $term;
# $document = $self->_documentSet->[$docid]
# print "\t" . &cleanBrillOutput($1) . "\n";
}
# warn "(0)$document\n";
return($document, @terms);
}
sub _inputLexicalSyntacticPatterns {
my ($self) = @_;
$self->getTimer->startsLap("making input");
warn "[LOG] making LexicalSyntacticPatterns input\n";
$self->_input_hash->{'sentences'} = {};
warn "open " . $self->_input_filename . "(1)\n";
open FILEINPUT, ">:utf8", $self->_input_filename or die "No such file " . $self->_input_filename;
$self->_makeTaggedSentences($self->_input_hash->{'sentences'}, \*FILEINPUT, "/", " ");
close FILEINPUT;
# $self->getTimer->_printTimesBySteps;
( run in 0.453 second using v1.01-cache-2.11-cpan-71847e10f99 )