Alt-CWB-ambs
view release on metacpan or search on metacpan
lib/CWB/CEQL.pm view on Meta::CPAN
=over 4
=item C<proximity_query>
A proximity query searches for combinations of words within a certain distance
of each other, specified either as a number of tokens (I<numeric distance>) or
as co-occurrence within an s-attribute region (I<structural distance>). The
C<proximity_query> rule splits its input into a sequence of token patterns,
distance operators and parentheses used for grouping. Shorthand notation for
word sequences is expanded (e.g. C<as long as> into C<<< as >>1>> long >>2>>
as >>>), and then the C<proximity_expression> rule is applied to each item in
turn. A shift-reduce algorithm in C<proximity_expression> reduces the
resulting list into a single CQP query (using the undocumented "MU" notation).
=cut
sub proximity_query {
my ($self, $input) = @_;
$input =~ s/(?<!\\)([()])/\t$1\t/g; # separate parentheses and distance operators with TABs
$input =~ s/(?<!\\)((<<|>>)[^<>\\ ]*(<<|>>))/\t$1\t/g;
$input =~ s/^\s+//; $input =~ s/\s+$//; # strip leading/trailing whitespace
my @items = split /\s*\t\s*/, $input; # split on TABs into proximity operators, parentheses, token expressions (removes extra whitespace)
# pre-process shorthand notation for word sequences (such as "as long as")
@items = map {
if (/\s/) {
my @shorthand = split " ";
my @expanded = ("(", $shorthand[0]);
foreach my $i (1 .. $#shorthand) {
push @expanded, ">>$i,$i>>", $shorthand[$i];
}
push @expanded, ")";
@expanded;
}
else {
$_; # single token expressions, distance operators and parentheses are passed through
}
} @items;
# now apply proximity_expression rule to each item, which should eventually return a single term
my @query = $self->Apply("proximity_expression", @items);
die "incomplete proximity query: expected another term after distance operator\n"
if @query == 2 and $query[1]->type eq "Op";
confess "shift-reduce parsing with **proximity_expression** failed to return a single term"
script/cwb-regedit view on Meta::CPAN
foreach my $a (@atts) {
$reg->delete_attribute($a);
}
}
## add attributes of specified type
sub add_attributes {
my $type = shift;
my @atts = @_;
if ($type eq "s") { # automatic expansion of s-attribute specs in cwb-encode format
my @expanded = ();
foreach my $spec (@_) {
if ($spec =~ /[:+]/) {
die "Syntax error in s-attribute specifier '$spec'. Aborted.\n"
unless $spec =~ /^([a-z0-9_-]+):([0-9])(\+([a-z0-9_+-]+))?$/;
my $base = $1;
my $recursion = $2;
my @xmlatt = ($4) ? split /\+/, $4 : ();
foreach my $i ("", 1 .. $recursion) {
foreach my $ext ("", map {"_$_"} @xmlatt) {
push @expanded, "$base$ext$i";
}
}
}
else {
push @expanded, $spec;
}
}
@atts = @expanded;
}
my @invalid = grep {not /^[a-z_][a-z0-9_-]*$/} @atts;
die "Error in :add :$type command: invalid attribute name(s) [@invalid]. Aborted.\n"
if @invalid;
print "Adding $type-attributes: @atts\n";
foreach my $a (@atts) {
my $exist = $reg->attribute($a);
if ($exist) {
die "Error: attribute '$a' already declared as $exist-attribute. Aborted.\n"
unless $exist eq $type;
( run in 1.460 second using v1.01-cache-2.11-cpan-97f6503c9c8 )