Alt-CWB-ambs

 view release on metacpan or  search on metacpan

t/40_ceql_parser.t  view on Meta::CPAN

            qr/syntax error/);
check_error('42 +',
            qr/syntax error/);

# next test is T27

## helper routines for testing automatic translation results
sub check_query {
  my ($query, $expected, $rule) = @_;
  my $result = undef;
  my $msg = "parse string ``$query''";
  if (defined $rule) {
    $result = $grammar->Parse($query, $rule);
    $msg .= " as $rule";
  }
  else {
    $result = $grammar->Parse($query);
  }
  if (defined $result) {
    is($result, $expected, $msg);
  }
  else {
    fail($msg);
    foreach ($grammar->ErrorMessage) { diag($_) };
  }
}

sub check_error {
  my ($query, $err_regexp, $rule) = @_;
  my $result = undef;
  my $msg = "find syntax error in string ``$query''";
  if (defined $rule) {
    $result = $grammar->Parse($query, $rule);
    $msg .= " as $rule";
  }
  else {
    $result = $grammar->Parse($query);
  }
  if (defined $result) {
    fail($msg);
  }
  else {
    like($grammar->HtmlErrorMessage, $err_regexp, $msg);
  }
}


########## BEGIN 'SimpleQuery' grammar (from CWB::CEQL::Parser manpage)

package SimpleQuery;
use base 'CWB::CEQL::Parser';

sub wildcard_expression {
  my ($self, $input) = @_;
  return _wildcard_to_regexp($input);
}

# note leading underscore for internal subroutine (this is not a method!)
sub _wildcard_to_regexp {
  my $s = quotemeta(shift);
  $s =~ s/\\[?]/./g;  # wildcards will also have been escaped with a backslash
  $s =~ s/\\([*+])/.$1/g;  # works for wildcards * and +
  return $s;
}

sub wordform_pattern {
  my ($self, $input) = @_;
  die "the wordform pattern ''$input'' must not contain whitespace or double quotes\n"
    if $input =~ /\s|\"/;
  if ($input =~ /^\/(.+)\/$/) {
    my $regexp = $1; # regular expression query: simply wrap in double quotes
    return "\"$regexp\"";
  }
  elsif ($input =~ /^\/|\/$/) {
    die "missing ''/'' at start/end of pattern: did you intend to use a regular expression?\n";
  }
  else {
    if ($input =~ /[?*+]/) {
      my $regexp = $self->Call("wildcard_expression", $input); # call subrule
      return "\"$regexp\"";
    }
    else {
      return "\"$input\"\%l";
    }
  }
}

sub wordform_sequence {
  my ($self, $input) = @_;
  my @items = split " ", $input;
  my @cqp_patterns = $self->Apply("wordform_pattern", @items);
  return "@cqp_patterns";
}

sub simple_query {
  my ($self, $input) = @_;
  my @items = split " ", $input;
  my @cqp_tokens = $self->Apply("simple_query_item", @items);
  return "@cqp_tokens";
}

# need to define single rule to parse all items of a list with nested bracketing
sub simple_query_item {
  my ($self, $item) = @_;
  # opening delimiter: (
  if ($item eq "(") {
    $self->BeginGroup();
    return "";  # opening delimiter should not become part of group output
  }
  # alternatives separator: | (only within nested group)
  elsif ($item eq "|") {
    die "a group of alternatives (|) must be enclosed in parentheses\n"
      unless $self->NestingLevel > 0; # | metacharacter is not allowed at top level
    return "|";
  }
  # closing delimiter: ) with optional quantifier
  elsif ($item =~ /^\)([?*+]?)$/) {
    my $quantifier = $1;
    my @cqp_tokens = $self->EndGroup();
    die "empty groups '( )' are not allowed\n"
      unless @cqp_tokens > 0;



( run in 1.114 second using v1.01-cache-2.11-cpan-524268b4103 )