Alt-CWB-ambs
view release on metacpan or search on metacpan
t/40_ceql_parser.t view on Meta::CPAN
qr/syntax error/);
check_error('42 +',
qr/syntax error/);
# next test is T27
## helper routines for testing automatic translation results
sub check_query {
my ($query, $expected, $rule) = @_;
my $result = undef;
my $msg = "parse string ``$query''";
if (defined $rule) {
$result = $grammar->Parse($query, $rule);
$msg .= " as $rule";
}
else {
$result = $grammar->Parse($query);
}
if (defined $result) {
is($result, $expected, $msg);
}
else {
fail($msg);
foreach ($grammar->ErrorMessage) { diag($_) };
}
}
sub check_error {
my ($query, $err_regexp, $rule) = @_;
my $result = undef;
my $msg = "find syntax error in string ``$query''";
if (defined $rule) {
$result = $grammar->Parse($query, $rule);
$msg .= " as $rule";
}
else {
$result = $grammar->Parse($query);
}
if (defined $result) {
fail($msg);
}
else {
like($grammar->HtmlErrorMessage, $err_regexp, $msg);
}
}
########## BEGIN 'SimpleQuery' grammar (from CWB::CEQL::Parser manpage)
package SimpleQuery;
use base 'CWB::CEQL::Parser';
sub wildcard_expression {
my ($self, $input) = @_;
return _wildcard_to_regexp($input);
}
# note leading underscore for internal subroutine (this is not a method!)
sub _wildcard_to_regexp {
my $s = quotemeta(shift);
$s =~ s/\\[?]/./g; # wildcards will also have been escaped with a backslash
$s =~ s/\\([*+])/.$1/g; # works for wildcards * and +
return $s;
}
sub wordform_pattern {
my ($self, $input) = @_;
die "the wordform pattern ''$input'' must not contain whitespace or double quotes\n"
if $input =~ /\s|\"/;
if ($input =~ /^\/(.+)\/$/) {
my $regexp = $1; # regular expression query: simply wrap in double quotes
return "\"$regexp\"";
}
elsif ($input =~ /^\/|\/$/) {
die "missing ''/'' at start/end of pattern: did you intend to use a regular expression?\n";
}
else {
if ($input =~ /[?*+]/) {
my $regexp = $self->Call("wildcard_expression", $input); # call subrule
return "\"$regexp\"";
}
else {
return "\"$input\"\%l";
}
}
}
sub wordform_sequence {
my ($self, $input) = @_;
my @items = split " ", $input;
my @cqp_patterns = $self->Apply("wordform_pattern", @items);
return "@cqp_patterns";
}
sub simple_query {
my ($self, $input) = @_;
my @items = split " ", $input;
my @cqp_tokens = $self->Apply("simple_query_item", @items);
return "@cqp_tokens";
}
# need to define single rule to parse all items of a list with nested bracketing
sub simple_query_item {
my ($self, $item) = @_;
# opening delimiter: (
if ($item eq "(") {
$self->BeginGroup();
return ""; # opening delimiter should not become part of group output
}
# alternatives separator: | (only within nested group)
elsif ($item eq "|") {
die "a group of alternatives (|) must be enclosed in parentheses\n"
unless $self->NestingLevel > 0; # | metacharacter is not allowed at top level
return "|";
}
# closing delimiter: ) with optional quantifier
elsif ($item =~ /^\)([?*+]?)$/) {
my $quantifier = $1;
my @cqp_tokens = $self->EndGroup();
die "empty groups '( )' are not allowed\n"
unless @cqp_tokens > 0;
( run in 1.114 second using v1.01-cache-2.11-cpan-524268b4103 )