Alt-CWB-ambs
view release on metacpan or search on metacpan
my $name = $basedir.$prefix.".$$".$suffix;
my $num = 1;
while (-e $name) { # choose unique name in case file already exists
$name = $basedir.$prefix.".$$-".$num.$suffix;
$num++;
}
my $fh = CWB::OpenFile "> $name";
$self->{NAME} = $name;
$self->{FH} = $fh;
$self->{STATUS} = "W"; # W = writing, F = finished, R = reading, D = deleted
return bless($self, $class);
}
sub DESTROY {
my $self = shift;
if ($self->{STATUS} ne "D") {
$self->close;
}
}
=item $tf->close;
INFO => undef, # info file (optional, but highly recommended)
PROPERTIES => [], # corpus properties ([property, value] pairs)
ATT => {}, # attributes (att => 'p' / 's' / 'a')
ATT_PATH => {}, # data paths for attributs
SERIALIZE => [], # order in which attributes are listed in the registry entry
COMMENTS => {}, # comments and/or blank lines preceding each content line
# (att => [comment1, comment2, ...], ':NAME' => [...], '::property' => ...)
LINECOMMENT => {}, # line comments on content lines (att => comment, ':NAME' => comment, ...)
FILENAME => undef, # filename of registry file (if loaded from file)
};
bless($self, $class);
# if filename was specified, try loading registry entry (searches in registry directories if necessary)
if (defined $filename) {
if ($filename !~ /\// and not -f $filename) {
my @dirs = CWB::RegistryDirectory();
my @files = grep { -f $_ } map { "$_/".lc($filename) } @dirs; # corpus ID may be specified in uppercase
return $self->error("Found multiple registry entries for corpus ".uc($filename).":", @files)
if @files > 1;
$filename = shift @files
if @files;
lib/CWB/CEQL.pm view on Meta::CPAN
sub new {
my $class = shift;
my $self = new CWB::CEQL::Parser;
$self->NewParam("pos_attribute", "pos");
$self->NewParam("lemma_attribute", "lemma");
$self->NewParam("simple_pos", undef);
$self->NewParam("simple_pos_attribute", undef);
$self->NewParam("s_attributes", { "s" => 1 });
$self->NewParam("default_ignore_case", 1);
$self->NewParam("default_ignore_diac", 0);
return bless($self, $class);
}
=item I<$cqp_query> = I<$CEQL>->B<Parse>(I<$simple_query>);
Parses simple query in CEQL syntax and returns equivalent CQP code. If there
is a syntax error in I<$simple_query> or parsing fails for some other reason,
an B<undef>ined value is returned.
=item @text_lines = I<$CEQL>->B<ErrorMessage>;
lib/CWB/CEQL/Parser.pm view on Meta::CPAN
A typical skeletion of a DPP grammar with parameters looks as follows:
package MyGrammar;
use base 'CWB::CEQL::Parser';
sub new {
my $class = shift;
my $self = new CWB::CEQL::Parser;
$self->NewParam("pos_attribute", "pos");
return bless($self, $class);
}
sub pos_tag {
my ($self, $input) = @_;
my $pos_att = $self->GetParam("pos_attribute");
die "'$input' does not appear to be a valid POS tag\n"
unless $input =~ /^[A-Z0-9]+$/;
return "$pos_att = '$input'"; # CQP constraint for POS tag
}
lib/CWB/CEQL/Parser.pm view on Meta::CPAN
my $class = shift;
my $self = {
'PARAM_DEFAULTS' => {}, # globally set default values for parameters
'PARAM' => undef, # working copies of parameters during parse
'INPUT' => undef, # input string (defined while parsing)
'ERROR' => undef, # error message generated by last parse (undef = no error)
'CALLSTACK' => [], # call stack for backtrace in case of error
'GROUPS' => undef, # group structure for shift-reduce parser (undef if not active)
'GROUPSTACK' => undef, # stack of nested bracketing groups (undef if not active)
};
bless($self, $class);
}
=item I<$result> = I<$grammar>->B<Parse>(I<$string> [, I<$rule>]);
Parse input string I<$string> as a constituent of type I<$rule> (if
unspecified, the C<default> rule will be used). The return value I<$result>
is typically a string containing the transformed query, but may also be an
arbitrary data structure or object (such as a parse tree for I<$input>).
Consult the relevant grammar documentation for details. If parsing fails,
B<undef> is returned.
lib/CWB/CEQL/Parser.pm view on Meta::CPAN
confess "CWB::CEQL::Parser: parameter '$name' already exists, cannot create with NewParam()"
if exists $param_set->{$name};
$param_set->{$name} = $value;
}
=item I<$result> = I<$self>->B<Call>(I<$rule>, I<$input>);
Apply rule I<$rule> to input string I<$input>. The return value I<$result>
depends on the grammar rule, but is usually a string containing a translated
version of I<$input>. Grammar rules may also annotate this string with
B<attributes> or by B<bless>ing it into a custom class, or return a complex
data structure such as a parse tree for I<$input>. The caller has to be aware
what kind of value I<$rule> returns.
Note that B<Call> never returns B<undef>. In case of an error, the entire
parse is aborted.
=cut
sub Call {
confess 'Usage: $result = $self->Call($rule, $input);'
lib/CWB/CEQL/String.pm view on Meta::CPAN
=cut
sub new {
my ($class, $value, $type) = @_;
my $self = {
VALUE => $value,
TYPE => $type, # undef if not specified
ATTRIBUTE => {},
};
return bless($self, $class);
}
=item I<$string> = I<$obj>->B<value>;
=item I<$string> = "I<$obj>";
Return string value of B<CWB::CEQL::String> object I<$obj>. Overloading
ensures that this value is accessed automatically if I<$obj> is used in a
string context (such as interpolation).
lib/CWB/CEQL/String.pm view on Meta::CPAN
=cut
sub copy {
my $self = shift;
my $new_self = {
VALUE => $self->{VALUE},
TYPE => $self->{TYPE},
ATTRIBUTE => { %{$self->{ATTRIBUTE}} },
};
return bless($new_self, ref $self);
}
=item I<$result> = I<$obj>->B<cmp>(I<$obj2> [, I<$reverse>]);
The B<cmp> method implements string comparison operators for
B<CWB::CEQL::String> objects. The second operand I<$obj2> must either be a
plain string or another B<CWB::CEQL::String> object. If the optional argument
I<$reverse> is TRUE, the comparison is reversed (so a string as first operand
can be compared with a B<CWB::CEQL::String> object).
lib/CWB/CQP.pm view on Meta::CPAN
## debugging (prints more or less everything on stdout)
$self->{'debug'} = 0;
## select vectors for CQP output (stdout, stderr, stdout|stderr)
$self->{'select_err'} = new IO::Select($err);
$self->{'select_out'} = new IO::Select($out);
$self->{'select_any'} = new IO::Select($err, $out);
## CQP object setup complete
bless($self, $class);
## the following command will collect and ignore any output which may have been produced during startup
$self->exec("set PrettyPrint off"); # pretty-printing should be turned off for non-interactive use
return $self;
}
=item B<undef> I<$cqp>;
Exit CQP background process gracefully by issuing an C<exit;> command.
lib/CWB/Encoder.pm view on Meta::CPAN
$comp = $attr = ""; # reset to check for syntax errors
}
elsif (/Type:\s+([A-Z])/) {
carp "CWB::Indexer: Missing attribute name in output of cwb-describe-corpus $name (skipped).\n"
unless $attr;
$self->{TYPES}->{$attr} = $1;
}
# all other lines are ignored
}
return bless($self, $class);
}
=item $idx->group($group);
=item $idx->perm($permission);
Optional group membership and access permissions for newly created
files (otherwise, neither B<chgrp> nor B<chmod> will be called). Note
that I<$permission> must be a string rather than an octal number (as
for the built-in B<chmod> function). Indexing will fail if the
lib/CWB/Encoder.pm view on Meta::CPAN
PERM => undef, # permissions for created files
OVERWRITE => undef, # can I overwrite existing files?
MEMORY => 75, # passed to CWB::Indexer
VALIDATE => 1, # passed to CWB::Indexer
ENTITIES => 1, # whether to decode XML entities (and skip comments etc.)
UNDEF_SYMBOL => "", # string to insert for missing values of p-attributes
VERBOSE => 0, # print some progress information (stdout)
DEBUG => 0,
PIPE => undef, # pipe to cwb-encode (for encode_pipe() method)
};
bless($self, $class);
$self->name(shift)
if @_;
return $self;
}
=item $enc->name($corpus);
Change the CWB name of a corpus after the encoder object I<$enc> has been created.
Has to be used if the constructor was called without arguments.
t/44_ceql_bncweb.t view on Meta::CPAN
"STOP" => "STOP",
"UNC" => "UNC",
};
$self->SetParam("simple_pos", $table);
$self->SetParam("simple_pos_attribute", "class");
my %xml_tags = map { $_ => 1 } # list of s-attribute regions in the BNC version used by BNCweb
(qw(text u div head quote sp speaker stage lg l list label item note bibl corr hi trunc p s mw), # from CWB registry file
# nested attributes are accepted, but should perhaps better be inserted automagically
qw(div1 div2 div3 quote1 list1 list2 item1 item2 hi1 p1 p2));
$self->SetParam("s_attributes", \%xml_tags);
return bless($self, $class);
}
# BNCweb::CEQL expects its input to be in the canonical BNCweb encoding, i.e. Latin-1 + HTML entities;
# the "default" rule first converts the input to a Perl Unicode string, and then re-encodes the resulting CQP query in Latin-1
sub default {
my ($self, $input) = @_;
my $unicode = decode("iso-8859-1", $input);
##-- # the real implementation uses the HTML::Entities module to decode HTML entities
##-- decode_entities($unicode);
# here, dummy rules covering all entities in the test suite help us to avoid a dependency on the non-standard HTML::Entities module
( run in 2.747 seconds using v1.01-cache-2.11-cpan-de7293f3b23 )