Alvis-NLPPlatform
view release on metacpan or search on metacpan
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
my $sent;
my $term_regex;
my $term;
my $phrase_idx=1;
my $canonical_form;
my %corpus;
my %lc_corpus;
my $sent_id;
my $command_line;
my %corpus_index;
my %idtrm_select;
my @tab_results;
my $semtag;
my $token_start;
my $token_end;
my $offset_start;
my $offset_end;
my $offset;
my $semantic_unit_id_str;
lib/Alvis/NLPPlatform/NLPWrappers.pm view on Meta::CPAN
# Term list loading
if($Alvis::NLPPlatform::Annotation::ALVISLANGUAGE eq "FR"){
if (scalar(@term_list_FR) == 0) {
Alvis::TermTagger::load_TermList($h_config->{'NLP_misc'}->{'TERM_LIST_FR'},\@term_list_FR);
Alvis::TermTagger::get_Regex_TermList(\@term_list_FR, \@regex_term_list_FR);
}
Alvis::TermTagger::corpus_Indexing(\%lc_corpus, \%corpus_index);
Alvis::TermTagger::term_Selection(\%corpus_index, \@term_list_FR, \%idtrm_select);
Alvis::TermTagger::term_tagging_offset_tab(\@term_list_FR, \@regex_term_list_FR, \%idtrm_select, \%corpus, \%tabh_sent_terms);
} else {
if (scalar(@term_list_EN) == 0) {
Alvis::TermTagger::load_TermList($h_config->{'NLP_misc'}->{'TERM_LIST_EN'},\@term_list_EN);
Alvis::TermTagger::get_Regex_TermList(\@term_list_EN, \@regex_term_list_EN);
}
Alvis::TermTagger::corpus_Indexing(\%lc_corpus, \%corpus_index);
Alvis::TermTagger::term_Selection(\%corpus_index, \@term_list_EN, \%idtrm_select);
Alvis::TermTagger::term_tagging_offset_tab(\@term_list_EN, \@regex_term_list_EN, \%idtrm_select, \%corpus, \%tabh_sent_terms);
}
%lc_corpus = ();
%corpus_index = ();
%idtrm_select = ();
%corpus = ();
# TODO : taking into account the case where terms appear at least twice in a sentence
$i=0;
for $key (keys %tabh_sent_terms) {
$sent = $tabh_sent_terms{$key}->[0];
$term = $tabh_sent_terms{$key}->[1];
$term_regex = $term;
$term_regex =~ s/ /\[ \n\]+/go;
( run in 1.075 second using v1.01-cache-2.11-cpan-49f99fa48dc )