Alvis-NLPPlatform

 view release on metacpan or  search on metacpan

lib/Alvis/NLPPlatform/NLPWrappers.pm  view on Meta::CPAN

    my $sent;
    my $term_regex;
    my $term;
    my $phrase_idx=1;
    my $canonical_form;
    my %corpus;
    my %lc_corpus;
    my $sent_id;
    my $command_line;
    my %corpus_index;
    my %idtrm_select;
    my @tab_results;
    my $semtag;

    my $token_start;
    my $token_end;
    my $offset_start;
    my $offset_end;
    my $offset;

    my $semantic_unit_id_str;

lib/Alvis/NLPPlatform/NLPWrappers.pm  view on Meta::CPAN



    # Term list loading 

    if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
	if (scalar(@term_list_FR) == 0) {
	    Alvis::TermTagger::load_TermList($h_config->{'NLP_misc'}->{'TERM_LIST_FR'},\@term_list_FR);
	      Alvis::TermTagger::get_Regex_TermList(\@term_list_FR, \@regex_term_list_FR);
	  }
	Alvis::TermTagger::corpus_Indexing(\%lc_corpus, \%corpus_index);
	Alvis::TermTagger::term_Selection(\%corpus_index, \@term_list_FR, \%idtrm_select);
	Alvis::TermTagger::term_tagging_offset_tab(\@term_list_FR, \@regex_term_list_FR, \%idtrm_select, \%corpus, \%tabh_sent_terms);
    } else {
	if (scalar(@term_list_EN) == 0) {
	    Alvis::TermTagger::load_TermList($h_config->{'NLP_misc'}->{'TERM_LIST_EN'},\@term_list_EN);
	      Alvis::TermTagger::get_Regex_TermList(\@term_list_EN, \@regex_term_list_EN);
	  }
	Alvis::TermTagger::corpus_Indexing(\%lc_corpus, \%corpus_index);
	Alvis::TermTagger::term_Selection(\%corpus_index, \@term_list_EN, \%idtrm_select);
	Alvis::TermTagger::term_tagging_offset_tab(\@term_list_EN, \@regex_term_list_EN, \%idtrm_select, \%corpus, \%tabh_sent_terms);
      }
    %lc_corpus = ();
    %corpus_index = ();
    %idtrm_select = ();
    %corpus = ();

# TODO : taking into account the case where terms appear at least twice in a sentence

    $i=0;
    for $key (keys %tabh_sent_terms) {
	$sent = $tabh_sent_terms{$key}->[0];
	$term = $tabh_sent_terms{$key}->[1];
	$term_regex = $term;
 	$term_regex =~ s/ /\[ \n\]+/go;



( run in 1.075 second using v1.01-cache-2.11-cpan-49f99fa48dc )