view release on metacpan or search on metacpan
data/vrt/VeryShortStories.vrt view on Meta::CPAN
system NN system
, , ,
which WDT which
had VBD have
replaced VBN replace
automobiles NNS automobile
when WRB when
most RBS most
of IN of
the DT the
global JJ global
oil NN oil
reserves NNS reserve
were VBD be
depleted VBN deplete
) ) )
. SENT .
</s>
<s>
While IN while
he PP he
$CWB::BinDir; # directory for CWB binaries (executable programs)
$CWB::DefaultRegistry; # compiled-in default registry directory
=cut
# make package configuration variables available
our $Prefix = $CWB::Config::Prefix; # this doesn't say much, as individual install directories may have been overwritten
our $BinDir = $CWB::Config::BinDir;
our $DefaultRegistry = $CWB::Config::Registry;
# global variables: full paths to CWB tools
our $Config = "$BinDir/cwb-config";
our $SEncode = "$BinDir/cwb-s-encode";
our $SDecode = "$BinDir/cwb-s-decode";
our $Encode = "$BinDir/cwb-encode";
our $Decode = "$BinDir/cwb-decode";
our $Lexdecode = "$BinDir/cwb-lexdecode";
our $Makeall = "$BinDir/cwb-makeall";
our $DescribeCorpus = "$BinDir/cwb-describe-corpus";
our $Itoa = "$BinDir/cwb-itoa";
our $Atoi = "$BinDir/cwb-atoi";
With the default setting of 0, B<CWB::>B<Shell::Cmd()> will B<die> if the
error level is 5 or greater. In the B<extra paranoid> setting (+1), it
will almost always B<die> (error level 2 or greater). In the B<less paranoid>
setting (-1) only an error level of 6 (i.e. failure to execute the shell
command) will cause the script to abort.
=cut
our $Paranoid = 0;
# use global variables and sub to handle warn/die situations
our $return_status = 0;
our $current_cmd = "";
# internal function: raise error (according to current Paranoid setting)
# Error $errlevel, $message [, $message ...];
# error levels are:
# LVL <less p.> <normal> <extra-p.>
# 6 : fatal fatal fatal
# 5 : warn fatal fatal
# 4 : warn warn fatal
lib/CWB/CEQL/Parser.pm view on Meta::CPAN
Create parser object I<$grammar> for the specified grammar (which must be a
class derived from B<CWB::CEQL::Parser>). Note that the parser itself is not
reentrant, but multiple parsers for the same grammar can be run in parallel.
The return value I<$grammar> is an object of class B<MyGrammar>.
=cut
sub new {
my $class = shift;
my $self = {
'PARAM_DEFAULTS' => {}, # globally set default values for parameters
'PARAM' => undef, # working copies of parameters during parse
'INPUT' => undef, # input string (defined while parsing)
'ERROR' => undef, # error message generated by last parse (undef = no error)
'CALLSTACK' => [], # call stack for backtrace in case of error
'GROUPS' => undef, # group structure for shift-reduce parser (undef if not active)
'GROUPSTACK' => undef, # stack of nested bracketing groups (undef if not active)
};
bless($self, $class);
}
lib/CWB/CEQL/Parser.pm view on Meta::CPAN
if (not defined $result) {
my $error = $@;
chomp($error); # remove trailing newline
$error = "parse of '' ".$self->{INPUT}." '' returned no result (reason unknown)"
if $error eq "";
$error =~ s/\s*\n\s*/ **::** /g;
$self->{ERROR} = $error;
}
$self->{INPUT} = undef; # no active parse
$self->{PARAM} = undef; # restore global parameter values (PARAM_DEFAULTS)
return $result; # undef if parse failed
}
=item I<@lines_of_text> = I<$grammar>->B<ErrorMessage>;
If the last parse failed, returns a detailed error message and backtrace of
the callstack as a list of text lines (without newlines). Otherwise, returns
empty list.
=cut
lib/CWB/CEQL/Parser.pm view on Meta::CPAN
(B<GetParam>). The parameter I<$name> must have been defined by the grammar
class (which I<$grammar> is an instance of) and should be described in the
grammar's documentation.
=cut
sub SetParam {
croak 'Usage: $grammar->SetParam($name, $value)'
unless @_ == 3;
my ($self, $name, $value) = @_;
## select either global parameter values (user level) or working copy (during parse)
my $param_set = (defined $self->{INPUT}) ? $self->{PARAM} : $self->{PARAM_DEFAULTS};
croak "CWB::CEQL::Parser: parameter '$name' does not exist"
unless exists $param_set->{$name};
$param_set->{$name} = $value;
}
sub GetParam {
croak 'Usage: $grammar->GetParam($name)'
unless @_ == 2;
my ($self, $name) = @_;
lib/CWB/CEQL/Parser.pm view on Meta::CPAN
parameterized grammar. If it is used in a rule body, the new parameter
will be created in the working copy of the parameter set and will only be
available during the current parse.
=cut
sub NewParam {
confess 'Usage: $self->NewParam($name, $default_value)'
unless @_ == 3;
my ($self, $name, $value) = @_;
## select either global parameter values (user level) or working copy (during parse)
my $param_set = (defined $self->{INPUT}) ? $self->{PARAM} : $self->{PARAM_DEFAULTS};
confess "CWB::CEQL::Parser: parameter '$name' already exists, cannot create with NewParam()"
if exists $param_set->{$name};
$param_set->{$name} = $value;
}
=item I<$result> = I<$self>->B<Call>(I<$rule>, I<$input>);
Apply rule I<$rule> to input string I<$input>. The return value I<$result>
depends on the grammar rule, but is usually a string containing a translated
lib/CWB/CEQL/Parser.pm view on Meta::CPAN
=head2 Internal structure of CWB::CEQL::Parser objects
A DPP parser object (i.e. an object that belongs to B<CWB::CEQL::Parser> or
one of its subclasses) is a data structure (hashref) with the following
variables:
=over 4
=item PARAM_DEFAULTS
A hashref containing the global values of grammar parameters, i.e. values set
by the main program for this parser object or the default values defined by
the grammar class.
=item PARAM
Working copy of the grammar parameters, which is used while parsing and may be
modified by grammar rules without affecting the global values. During a
parse, the B<NewParam>, B<SetParam> and B<GetParam> methods operate on this
working copy.
The C<PARAM> variable is re-initialised before each parse with a flat copy of
the C<PARAM_DEFAULTS> hashref. Therefore, care has to be taken when modifying
complex parameter values within grammar rules, as the changes will affect the
global values in C<PARAM_DEFAULTS>. If complex values need to be changed
internally, the grammar rule should always update the parameter with
B<SetParam> and a deep copy of the previous parameter value.
=item INPUT
The current input string passed to the B<Parse> method. This variable is
mostly used to indicate whether the parser is currently active or not (e.g. in
order to avoid nested B<Parse> calls).
=item ERROR
lib/CWB/CQP.pm view on Meta::CPAN
CWB::CQP - Interact with a CQP process running in the background
=head1 SYNOPSIS
B<TODO: Update synopsis!>
use CWB::CQP;
# start CQP server process in the background
$cqp = new CWB::CQP;
$cqp = new CWB::CQP("-r /corpora/registry", "-I /global/init.cqp");
# check for specified or newer CQP version
$ok = $cqp->check_version($major, $minor, $beta);
# execute CQP command (blocking mode) and check for error
@lines = $cqp->exec($my_cmd);
unless ($cqp->ok) {
@cqp_error_message = $cqp->error_message;
my_error_handler();
}
lib/CWB/CQP.pm view on Meta::CPAN
use sigtrap qw(die PIPE); # catch write errors to background CQP process
## $SIG{'CHLD'} = 'IGNORE'; # it would be nice to reap child processes automatically, but this seems to mess up closing pipes
use CWB;
use Carp;
use FileHandle;
use IPC::Open3;
use IO::Select;
## package global variables
our @CQP_options = "-c"; # always run CQP in child mode
our $CQP_version = "2.2.101"; # required version of CQP (checked at startup)
=head1 METHODS
The following methods are available:
=over 4
=item I<$cqp> = B<new> CWB::CQP;
lib/CWB/Encoder.pm view on Meta::CPAN
my $dir = $self->{DIR}; # check/create data directory
croak "CWB::Encoder: Data directory has not been set.\n"
unless $dir;
if (-d $dir) {
croak "CWB::Encoder: Data directory already exists (overwriting not enabled).\n"
unless $overwrite;
print "Cleaning up data directory $dir ...\n"
if $self->{VERBOSE};
my $dh = new DirHandle $dir;
my @files = grep {-f $_} (glob("$dir/*"), glob("$dir/.*"));
my ($file, $filename);
while (defined($filename = $dh->read)) {
$file = "$dir/$filename";
next unless -f $file; # skip subdirectories etc.
unlink $file;
carp "CWB::Encoder: Can't delete file $file (trying to continue).\n"
if -f $file;
print STDERR "CWB::Encoder: deleting file $file\n"
if $self->{DEBUG};
}
script/cwb-align-import view on Meta::CPAN
pod2usage(-msg => "(Type 'cwb-align-import -h' for more information.)",
-exitval => 1, -verbose => 0) if $ok and @ARGV == 0;
pod2usage(-msg => "SYNTAX ERROR.",
-exitval => 2, -verbose => 0)
unless $ok and @ARGV == 1;
die "Flags -l1, -l2, -s and -k must be specified if -nh option is used.\n"
if $Opt_NH and not($Opt_Source and $Opt_Target and $Opt_Grid and $Opt_Key);
$Opt_Empty = 1 if $Opt_Prune; # -p implies -e
}
## global variables
our ($C1_id, $C2_id, $C1_lc, $C2_lc, $S_id); # source and target corpus name (with lowercase variant) and alignment grid
our ($align_file, $FH); # alignment file and file handle
our ($key_pattern); # pattern used to generate keys that identify regions in the alignment grid
our (%R1, %R2); # hashes mapping keys to [start, end] regions, in source and target corpus
our @Beads; # list of alignment beads, with entries [$l1_start, $l1_end, $l2_start, $l2_end, ($annot)]
SETUP:
{
$align_file = shift @ARGV;
$FH = CWB::OpenFile $align_file;
script/cwb-regedit view on Meta::CPAN
$norm = ":prop" if /^:pr(op)?$/i;
$norm = ":add" if /^:add$/i;
$norm = lc($cmd) if /^:[psa]$/i;
$norm = ":del" if /^:d(el(ete)?)?$/i;
$norm = ":list" if /^:l(ist)?$/i;
}
return $norm;
}
## get a command block from the command line (command plus any number of non-command arguments)
## result is stored in global variables $COMMAND and @ARGS; returns FALSE at end of input
sub get_block {
$COMMAND = "";
@ARGS = ();
return 0
unless @ARGV > 0;
die "Syntax error: expected command, got '$ARGV[0]'\n"
unless is_command();
$COMMAND = match_command();
die "Syntax error: unknown command '$ARGV[0]'\n"
unless $COMMAND;
t/11_cwb_file.t view on Meta::CPAN
}
else {
diag("couldn't open file '$filename'");
}
ok($ok, $name);
}
## try to write & read compressed and uncompressed files
sub test_read_write_file {
my ($ext, $two_arg) = @_;
$tempfile = "/tmp/test_CWB_$$.$ext"; # set global variable for END{} cleanup
my $name = "write/read .$ext file (".(($two_arg) ? 1 : 2)."-argument form)";
my $fh = undef;
my $ok = 0;
if ($two_arg) {
$fh = CWB::OpenFile ">", $tempfile;
}
else {
$fh = CWB::OpenFile "> $tempfile";
}
if ($fh) {
t/31_cqp_dickens.t.dont_run view on Meta::CPAN
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'
BEGIN { $| = 1; print "Running tests for CWB/CQP.pm:\n"; }
# step 1: load the CQP/Perl library
END {
unlink glob "tmp/DICKENS:*"; # clean up temporary corpus files
if (not $loaded) {
print "!! can't load CWB::CQP module (aborted)\n";
exit 1;
}
if (not $deleted) {
print "!! error deleting CWB::CQP object\n";
exit 1;
}
}
t/31_cqp_dickens.t.dont_run view on Meta::CPAN
}
print " ok\n";
# step 11: save results (named queries) to disk
print " - saving named queries ";
for ($i = 0; $i < 8; $i++) {
print ".";
$CQP->exec("save A$i");
}
print " ok\n";
@saved = glob "tmp/DICKENS:A*";
if (@saved != 8) {
print "!! can't find saved query results on disk\n";
exit 1;
}
# step 12: exit and re-start CQP
undef $CQP;
$deleted = 1;
print " - CQP process terminated\n";
$CQP = new CWB::CQP ("-I data/files/init.cqp");