Lingua-Ogmios

 view release on metacpan or  search on metacpan

lib/Lingua/Ogmios/NLPWrappers/LexicalSyntacticPatterns.pm  view on Meta::CPAN

    my $term2;
    my @terms1;
    my @terms2;
    my $document;
    my $relation;

    warn "[LOG] LexicalSyntacticPatterns\n";

    $lang = $self->_documentSet->[0]->getAnnotations->getLanguage;
    warn "open " . $self->_input_filename . " (2)\n";

    open FILEINPUT, "<:utf8", $self->_input_filename or die "No such file " . $self->_input_filename;

    # open FILEINPUT, "<:utf8", "/home/thierry/Recherche/Projets/2011AIR-REACH/Data/GuidesV2/ogmios.metis.30920.LexicalSyntacticPatterns.in" or die "No such file " . "/home/thierry/Recherche/Projets/2011AIR-REACH/Data/GuidesV2/ogmios.metis.30920.Lexi...
    # open FILEINPUT, "<:utf8", "/export/home/limbio/hamon/Research/Projets/2011AIR-REACH/Data/GuidesV2/ogmios.metis.30920.LexicalSyntacticPatterns.in" or die "No such file " . "/export/home/limbio/hamon/Research/Projets/2011AIR-REACH/Data/GuidesV2/o...

#	$lang = $self->_documentSet->[0]->getAnnotations->getLanguage;

    while ($sentence = <FILEINPUT>) {
	chomp $sentence;
	# warn "--> $sentence\n";
	foreach $pattern (@{$self->{"Resources"}->{$lang}->{"PATTERNS"}->[1]}) {
	    # warn $pattern->{'pattern'} . "\n";
	    # foreach $sentence (@{$self->_input_hash->{'sentences'}}) {
	    if ($sentence =~ m!$pattern->{'pattern'}!) {
		# warn "OK\n";
		# warn $pattern->{'element1'}->{'role'} . ": " . $+{$pattern->{'element1'}->{'ID'}} . "\n";
		# warn $pattern->{'element2'}->{'role'} . ": " . $+{$pattern->{'element2'}->{'ID'}} . "\n";
		if ($pattern->{'element1'}->{'type'} eq "list") {
		    ($document, @terms1) = $self->_extractTermsFromList($lang, $+{$pattern->{'element1'}->{'ID'}});
		} else {
		    ($term1, $document) = $self->_extractInfos($+{$pattern->{'element1'}->{'ID'}});
		    @terms1 = ($term1);
		}
		if ($pattern->{'element2'}->{'type'} eq "list") {
		    ($document, @terms2) = $self->_extractTermsFromList($lang, $+{$pattern->{'element2'}->{'ID'}});
		} else {
		    ($term2, $document) = $self->_extractInfos($+{$pattern->{'element2'}->{'ID'}});
		    @terms2 = ($term2);
		}
		foreach $term1 (@terms1) {
		    foreach $term2 (@terms2) {
			$relation = $self->_addRelation($document, $pattern->{'relation'} . " (" . $pattern->{'element1'}->{'role'} . "-" . $pattern->{'element2'}->{'role'} . ")", [$term1, $term2]);
			if (defined $relation) {
			    # warn "==>" . $relation->getId . " " . $pattern->{'relation'} . "\n";
			    push @{$self->_addedElements_array}, [$relation, $document->getId];
			}
		    }
		}
	    }
	}
    }
    close FILEINPUT;

    warn "[LOG]\n";
}

sub _extractInfos {
    my ($self, $string) = @_;

    my ($IF, $POSTAG, $LM, $ID, $DOCID, $SEMTAG) = split m!/!, $string;
    my $document;
    my $word;
    my $termUnit;

    $document = $self->_documentSet->[$DOCID];
    if ($POSTAG eq "term") {	
	$termUnit = $document->getAnnotations->getSemanticUnitLevel->getElementById($ID);
    } else {
	# creation of term
	$word = $document->getAnnotations->getWordLevel->getElementById($ID);
	$termUnit = Lingua::Ogmios::Annotations::SemanticUnit->newTerm(
	    {'form' => $word->getForm,
	     'refid_word' => $word,
	    });
	$document->getAnnotations->addSemanticUnit($termUnit);

    }
	return($termUnit, $document);


}

sub _extractTermsFromList {
    my ($self, $lang, $list) = @_;
    my $term_pattern = $self->{"Resources"}->{$lang}->{"PATTERNS"}->[2];
    my $term_string;
    my $term;
    my @terms;
    my $document;
    my $docid;

    while($list =~ / ?($term_pattern)/gc) {
#	warn "\t$1\n";
	$term_string = $1;
	($term, $document) = $self->_extractInfos($term_string);
	push @terms, $term;
#	$document = $self->_documentSet->[$docid]
#	print "\t" . &cleanBrillOutput($1) . "\n";
    }
    # warn "(0)$document\n";
    return($document, @terms);
}


sub _inputLexicalSyntacticPatterns {
    my ($self) = @_;

    $self->getTimer->startsLap("making input");
    warn "[LOG] making LexicalSyntacticPatterns input\n";

    $self->_input_hash->{'sentences'} = {};

    warn "open " . $self->_input_filename . "(1)\n";

    open FILEINPUT, ">:utf8", $self->_input_filename or die "No such file " . $self->_input_filename;

    $self->_makeTaggedSentences($self->_input_hash->{'sentences'}, \*FILEINPUT, "/", " ");

    close FILEINPUT;
#    $self->getTimer->_printTimesBySteps;



( run in 0.453 second using v1.01-cache-2.11-cpan-71847e10f99 )