Alt-CWB-ambs

 view release on metacpan or  search on metacpan

lib/CWB/CEQL.pm  view on Meta::CPAN


=over 4

=item C<proximity_query>

A proximity query searches for combinations of words within a certain distance
of each other, specified either as a number of tokens (I<numeric distance>) or
as co-occurrence within an s-attribute region (I<structural distance>).  The
C<proximity_query> rule splits its input into a sequence of token patterns,
distance operators and parentheses used for grouping.  Shorthand notation for
word sequences is expanded (e.g. C<as long as> into C<<< as >>1>> long >>2>>
as >>>), and then the C<proximity_expression> rule is applied to each item in
turn.  A shift-reduce algorithm in C<proximity_expression> reduces the
resulting list into a single CQP query (using the undocumented "MU" notation).

=cut

sub proximity_query {
  my ($self, $input) = @_;
  $input =~ s/(?<!\\)([()])/\t$1\t/g; # separate parentheses and distance operators with TABs
  $input =~ s/(?<!\\)((<<|>>)[^<>\\ ]*(<<|>>))/\t$1\t/g;
  $input =~ s/^\s+//; $input =~ s/\s+$//; # strip leading/trailing whitespace
  my @items = split /\s*\t\s*/, $input; # split on TABs into proximity operators, parentheses, token expressions (removes extra whitespace)
  # pre-process shorthand notation for word sequences (such as "as long as")
  @items = map {
    if (/\s/) {
      my @shorthand = split " ";
      my @expanded =  ("(", $shorthand[0]);
      foreach my $i (1 .. $#shorthand) {
        push @expanded, ">>$i,$i>>", $shorthand[$i];
      }
      push @expanded, ")";
      @expanded;
    }
    else {
      $_; # single token expressions, distance operators and parentheses are passed through
    }
  } @items;
  # now apply proximity_expression rule to each item, which should eventually return a single term
  my @query = $self->Apply("proximity_expression", @items);
  die "incomplete proximity query: expected another term after distance operator\n"
    if @query == 2 and $query[1]->type eq "Op";
  confess "shift-reduce parsing with **proximity_expression** failed to return a single term"

script/cwb-regedit  view on Meta::CPAN

  foreach my $a (@atts) {
    $reg->delete_attribute($a);
  }
}

## add attributes of specified type
sub add_attributes {
  my $type = shift;
  my @atts = @_;
  if ($type eq "s") {           # automatic expansion of s-attribute specs in cwb-encode format
    my @expanded = ();
    foreach my $spec (@_) {
      if ($spec =~ /[:+]/) {
        die "Syntax error in s-attribute specifier '$spec'. Aborted.\n"
          unless $spec =~ /^([a-z0-9_-]+):([0-9])(\+([a-z0-9_+-]+))?$/;
        my $base = $1;
        my $recursion = $2;
        my @xmlatt = ($4) ? split /\+/, $4 : ();
        foreach my $i ("", 1 .. $recursion) {
          foreach my $ext ("", map {"_$_"} @xmlatt) {
            push @expanded, "$base$ext$i";
          }
        }
      }
      else {
        push @expanded, $spec;
      }
    }
    @atts = @expanded;
  }
  my @invalid = grep {not /^[a-z_][a-z0-9_-]*$/} @atts;
  die "Error in :add :$type command: invalid attribute name(s) [@invalid]. Aborted.\n"
    if @invalid;
  print "Adding $type-attributes: @atts\n";
  foreach my $a (@atts) {
    my $exist = $reg->attribute($a);
    if ($exist) {
      die "Error: attribute '$a' already declared as $exist-attribute. Aborted.\n"
        unless $exist eq $type;



( run in 1.460 second using v1.01-cache-2.11-cpan-97f6503c9c8 )