view release on metacpan or search on metacpan
- Correction in the Yatea wrapper in the handling of the output file.
0.4 - Correction in the function sigint handler : nlp_host and
nlp_port are now declared as global.
- Correction in the TermTagging : language switch was well
taken into account
- Correction in the management of the ".proc_id" file
- correction in the computing of the xml rendering time
(the variable is set to zero ;-)
- stderr when NLP tools are called, is redirected in a log file
- addition of a variable DEBUG defining a debug mode (temporary
files are not removed)
- alvis-nlp-standalone can read a file given in argument or on
the STDIN stream
- Documentation of the modules and scripts are gathered at the
end of each file
- Addition of DTD and XSD files in the documentation (etc
directory)
- Additional functionality: Loading files in various formats
(PDF, LaTeX, Word, etc.) before carrying out linguistic
annotations.
examples/InputDocument.xml view on Meta::CPAN
<section>unparseable_id()</section> unparseable_id($id)
<section>The method checks if the id have been parsed or not. If not, it prints a warning.</section></section></section>
<section title="PLATFORM CONFIGURATION">
<section>PLATFORM CONFIGURATION</section>
<section>The configuration file of the NLP Platform is composed of global variables and divided into several sections:</section>
<section>Global variables.
<section>The two mandatory variables are ALVISTMP and PRESERVEWHITESPACE (in the XML_INPUT section).</section>
<section>
<section>ALVISTMP : it defines the temporary directory used during the annotation process. The files are recorded in (XML files and input/output of the NLP tools) during the annotation step. It must be writable to the user the process...
<section>
<section>DEBUG : this variable indicates if the NLP platform is run in a debug mode or not. The value are 1 (debug mode) or 0 (no debug mode). Default value is 0. The main consequence of the debug mode is to keep the temporary file.</...
<section>Additional variables and environement variables can be used if they are interpolated in the configuration file. For instance, in the default configuration file, we add</section>
<section>
<section>PLATFORM_ROOT : directory where are installed NLP tools and resources.</section></section>
<section>
<section>NLP_tools_root : root directory where are installed the NLP tools</section></section>
<section>
<section>AWK : path for awk</section></section>
<section>
<section>SEMTAG_EN_DIR : directory where is installed the semantic tagger</section></section>
<section>
lib/Alvis/NLPPlatform.pm view on Meta::CPAN
=item *
C<ALVISTMP> : it defines the temporary directory used during the
annotation process. The files are recorded in (XML files and
input/output of the NLP tools) during the annotation step. It must
be writable to the user the process is running as.
=item *
C<DEBUG> : this variable indicates if the NLP platform is run in a
debug mode or not. The value are 1 (debug mode) or 0 (no debug
mode). Default value is 0. The main consequence of the debug mode is
to keep the temporary file.
=back
Additional variables and environement variables can be used if they
are interpolated in the configuration file. For instance, in the
default configuration file, we add
=over
lib/Alvis/NLPPlatform/ParseConstituents.pm view on Meta::CPAN
use Alvis::NLPPlatform;
use Data::Dumper;
use warnings;
our $VERSION=$Alvis::NLPPlatform::VERSION;
my $doc_hash;
my $decal_phrase_idx;
my $debug_mode=0;
my $lconst = 0;
my $nconst;
my @tab_nconst;
# my @tab_type;
# my @tab_string;
my $tab_type_ref;
my $tab_string_ref;
my $lastword="";
lib/Alvis/NLPPlatform/ParseConstituents.pm view on Meta::CPAN
if($lconst>0){
push @{$$tab_string_ref[$tab_nconst[$lconst]]} , "phrase" . ($decal_phrase_idx+$$nconst+1) ;
}
$lconst++;
$$nconst++;
$tab_nconst[$lconst]=$$nconst;
# get type
$$tab_type_ref[$tab_nconst[$lconst]]=$_[1];
print STDERR "*** DEBUG *** Opened constituent $$nconst with type ".$_[1]."\n" unless ($debug_mode==0);
}
],
[#Rule 13
'open', 2,
sub
#line 78 "lib/Alvis/NLPPlatform/ParseConstituents.yp"
{ $_[0]->YYErrok }
],
[#Rule 14
'close', 1,
sub
#line 81 "lib/Alvis/NLPPlatform/ParseConstituents.yp"
{
# check type
print STDERR $_[1] unless ($debug_mode==0);
if($_[1] ne $$tab_type_ref[$tab_nconst[$lconst]]){
print STDERR "Error found at level $lconst: types don't match!\n";
exit 0;
}
# remove ending space
# $$tab_string_ref[$tab_nconst[$lconst]] =~ s/\s+$//sgo;
# close constituent
print STDERR "*** DEBUG *** Closing constituent $tab_nconst[$lconst]\n" unless ($debug_mode==0);
$lconst--;
}
],
[#Rule 15
'close', 2,
sub
#line 94 "lib/Alvis/NLPPlatform/ParseConstituents.yp"
{ $_[0]->YYErrok }
],
[#Rule 16
lib/Alvis/NLPPlatform/ParseConstituents.pm view on Meta::CPAN
'chunk', 2,
sub
#line 98 "lib/Alvis/NLPPlatform/ParseConstituents.yp"
{ $_[0]->YYErrok }
],
[#Rule 19
'word', 1,
sub
#line 101 "lib/Alvis/NLPPlatform/ParseConstituents.yp"
{
print STDERR "*** DEBUG *** Found string '".$_[1] . "'\n" unless ($debug_mode==0);
if((defined $$tab_string_ref[$tab_nconst[$lconst]])
&&(scalar(@{$$tab_string_ref[$tab_nconst[$lconst]]}) != 0)){
print STDERR "*** DEBUG *** Appended to previously found string\n" unless ($debug_mode==0);
# $$tab_string_ref[$tab_nconst[$lconst]].=$_[0]->text;
if(($_[1] eq $lastword) || ($_[1]=~/^\./)){
}else{
push @{$$tab_string_ref[$tab_nconst[$lconst]]}, "word" . $$word_id_np_ref ;
$$word_id_np_ref++;
$lastword=$_[1];
}
}else{
# $$tab_string_ref[$tab_nconst[$lconst]]=$_[0]->text;
if(!(($_[1] eq $lastword)||($_[1] =~ /^\./))){
lib/Alvis/NLPPlatform/ParseConstituents.yp view on Meta::CPAN
use Alvis::NLPPlatform;
use Data::Dumper;
use warnings;
our $VERSION=$Alvis::NLPPlatform::VERSION;
my $doc_hash;
my $decal_phrase_idx;
my $debug_mode=0;
my $lconst = 0;
my $nconst;
my @tab_nconst;
# my @tab_type;
# my @tab_string;
my $tab_type_ref;
my $tab_string_ref;
my $lastword="";
lib/Alvis/NLPPlatform/ParseConstituents.yp view on Meta::CPAN
if($lconst>0){
push @{$$tab_string_ref[$tab_nconst[$lconst]]} , "phrase" . ($decal_phrase_idx+$$nconst+1) ;
}
$lconst++;
$$nconst++;
$tab_nconst[$lconst]=$$nconst;
# get type
$$tab_type_ref[$tab_nconst[$lconst]]=$_[1];
print STDERR "*** DEBUG *** Opened constituent $$nconst with type ".$_[1]."\n" unless ($debug_mode==0);
}
| error '\nopen: ' { $_[0]->YYErrok }
;
close: CLOSE {
# check type
print STDERR $_[1] unless ($debug_mode==0);
if($_[1] ne $$tab_type_ref[$tab_nconst[$lconst]]){
print STDERR "Error found at level $lconst: types don't match!\n";
exit 0;
}
# remove ending space
# $$tab_string_ref[$tab_nconst[$lconst]] =~ s/\s+$//sgo;
# close constituent
print STDERR "*** DEBUG *** Closing constituent $tab_nconst[$lconst]\n" unless ($debug_mode==0);
$lconst--;
}
| error '\nclose: ' { $_[0]->YYErrok }
;
chunk: word chunk | word
| error '\nchunk: ' { $_[0]->YYErrok }
;
word: WORD {
print STDERR "*** DEBUG *** Found string '".$_[1] . "'\n" unless ($debug_mode==0);
if((defined $$tab_string_ref[$tab_nconst[$lconst]])
&&(scalar(@{$$tab_string_ref[$tab_nconst[$lconst]]}) != 0)){
print STDERR "*** DEBUG *** Appended to previously found string\n" unless ($debug_mode==0);
# $$tab_string_ref[$tab_nconst[$lconst]].=$_[0]->text;
if(($_[1] eq $lastword) || ($_[1]=~/^\./)){
}else{
push @{$$tab_string_ref[$tab_nconst[$lconst]]}, "word" . $$word_id_np_ref ;
$$word_id_np_ref++;
$lastword=$_[1];
}
}else{
# $$tab_string_ref[$tab_nconst[$lconst]]=$_[0]->text;
if(!(($_[1] eq $lastword)||($_[1] =~ /^\./))){
lib/Alvis/NLPPlatform/UserNLPWrappers.pm view on Meta::CPAN
print STDERR $Lingua::YaTeA::process_counter++ . ") " . ($yatea->getMessageSet->getMessage('PARSING')->getContent($yatea->getOptionSet->getDisplayLanguage)) . "\n";
$phrase_set->parseProgressively($yatea->getTagSet,$yatea->getOptionSet->getParsingDirection,$yatea->getParsingPatternSet,$yatea->getChunkingDataSet,$corpus->getLexicon,$corpus->getSentenceSet,$yatea->getMessageSet,$yatea->getOptionSet->getDisplay...
$phrase_set->addTermCandidates($yatea->getOptionSet);
print STDERR $Lingua::YaTeA::process_counter++ . ") " . ($yatea->getMessageSet->getMessage('RESULTS')->getContent($yatea->getOptionSet->getDisplayLanguage)) . "\n";
# coments to keep
print STDERR "\t-" . ($yatea->getMessageSet->getMessage('DISPLAY_RAW')->getContent($yatea->getOptionSet->getDisplayLanguage)) . "\'". $corpus->getOutputFileSet->getFile('debug')->getPath . "'\n";
$phrase_set->printPhrases(FileHandle->new(">" . $corpus->getOutputFileSet->getFile('debug')->getPath));
$phrase_set->printUnparsable($corpus->getOutputFileSet->getFile('unparsable'));
print STDERR "\t-" . ($yatea->getMessageSet->getMessage('DISPLAY_TC_XML')->getContent($yatea->getOptionSet->getDisplayLanguage)) . "\'". $corpus->getOutputFileSet->getFile('candidates')->getPath . "'\n";
#
if ((exists $h_config->{"XML_OUTPUT"}->{"YATEA"}) && ($h_config->{"XML_OUTPUT"}->{"YATEA"} == 1)) {
$phrase_set->printTermCandidatesXML("stdout",$yatea->getTagSet);
exit;
} else {
# $phrase_set->printTermCandidatesXML($corpus->getOutputFileSet->getFile("candidates"),$yatea->getTagSet);
lib/Alvis/NLPPlatform/UserNLPWrappers.pm view on Meta::CPAN
return $decal_phrase_idx+$csti-1;
}
sub parse_constituents_old {
my $constituents=$_[0];
my $tmpptr=$_[1];
my $decal_phrase_idx=$_[1];
my $doc_hash=$_[2];
my $lexer;
my $debug_mode=0;
my $lconst=0;
my $nconst=0;
my @tab_nconst;
my @tab_type;
my @tab_string;
my $word_count=$word_id_np;
my $lastword="";
lib/Alvis/NLPPlatform/UserNLPWrappers.pm view on Meta::CPAN
}
$lconst++;
$nconst++;
$tab_nconst[$lconst]=$nconst;
# get type
$tab_type[$tab_nconst[$lconst]]=$1;
$lexer->end('type1');
$lexer->start('string');
print STDERR "*** DEBUG *** Opened constituent $nconst with type ".$1."\n" unless ($debug_mode==0);
},
# CLOSE
'type2','([A-Z]+)\]', sub {
# check type
if($1 ne $tab_type[$tab_nconst[$lconst]]){
print STDERR "Error found at level $lconst: types don't match!\n";
$lexer->end('ALL');
exit 0;
}
# remove ending space
$tab_string[$tab_nconst[$lconst]]=~s/\s+$//sgo;
# close constituent
print STDERR "*** DEBUG *** Closing constituent $tab_nconst[$lconst]\n" unless ($debug_mode==0);
$lconst--;
},
# STRING
'string','[^\s]+\s', sub {
print STDERR "*** DEBUG *** Found string '".$_[0]->text."'\n" unless ($debug_mode==0);
if((defined $tab_string[$tab_nconst[$lconst]])&&($tab_string[$tab_nconst[$lconst]] ne "")){
print STDERR "*** DEBUG *** Appended to previously found string\n" unless ($debug_mode==0);
# $tab_string[$tab_nconst[$lconst]].=$_[0]->text;
if(($_[0]->text eq $lastword) || ($_[0]->text=~/^\./)){
}else{
$tab_string[$tab_nconst[$lconst]].="word$word_id_np ";
$word_id_np++;
$lastword=$_[0]->text;
}
}else{
# $tab_string[$tab_nconst[$lconst]]=$_[0]->text;
if(!(($_[0]->text eq $lastword)||($_[0]->text=~/^\./))){