CWB
view release on metacpan or search on metacpan
t/31_cqp_queries.t view on Meta::CPAN
is(@rows, 54, "count by lemma produces correct number of items"); # T27
%expected_counts = qw(be 7 have 5 watch 3 make 2);
is_deeply(rows2hash([@rows[0 .. 3]], 2, 0), \%expected_counts, "count by lemma produces correct frequency counts"); # T28
# advanced group and count
$cqp->exec("PP = /pp[]");
($n_matches) = $cqp->exec("size PP");
ok($n_matches > 500, "simple PP query"); # T29
@rows = $cqp->exec_rows("count PP by lemma cut 3");
%expected_counts = ("in front" => 4, "on the screen" => 3);
is_deeply(rows2hash(\@rows, 2, 0), \%expected_counts, "count PPs by lemma with cut"); # T30
@rows = $cqp->exec_rows("count PP by lemma on match[1] .. matchend cut 6");
%expected_counts = ("the girl" => 7, "the screen" => 6);
is_deeply(rows2hash(\@rows, 2, 0), \%expected_counts, "count PPs by lemma without first token"); # T31
@rows = $cqp->exec_rows("group PP matchend lemma by match word cut 3");
%expected_counts = ("in front" => 4, "on screen" => 4, "of window" => 4, "at time" => 3);
@rows = map { [$_->[0]." ".$_->[1], $_->[2]] } @rows;
is_deeply(rows2hash(\@rows), \%expected_counts, "group prep:noun pairs from PPs with cut"); # T32
@rows = $cqp->exec_rows("group PP matchend lemma by match word within s cut 3");
@rows = map { [$_->[0]." ".$_->[1], $_->[2]] } @rows;
is_deeply(rows2hash(\@rows), \%expected_counts, "group prep:noun pairs from PPs within s"); # T33
@rows = $cqp->exec_rows("group PP matchend lemma by match word within story cut 3");
@rows = map { [$_->[0]." ".$_->[1], $_->[2]] } @rows;
is_deeply(rows2hash(\@rows), {"at time" => 3}, "group prep:noun pairs from PPs within story");
# dump/undump and subqueries
$cqp->exec("NP = /np[]");
($n_matches) = $cqp->exec("size NP");
ok($n_matches > 1200, "simple NP query"); # T35
$cqp->exec("sort NP by word \%c");
@rows = $cqp->exec("tabulate NP 10 20 match lemma");
ok((not grep { $_ ne "a" } @rows), "alphabetical sort (plausibility check)"); # T36
my @dump = map { [$_->[0], $_->[1]] } $cqp->dump("NP");
$cqp->undump("NP_copy", @dump);
my @rows1 = $cqp->exec("tabulate NP match .. matchend lemma");
my @rows2 = $cqp->exec("tabulate NP_copy match .. matchend lemma");
is_deeply(\@rows1, \@rows2, "undump preserves sort order"); # T37
@dump = map {
my ($s, $e) = @$_;
$s = ($s > 0) ? $s - 1 : $s; # expand matches by one token on the left
[$s, $e];
} @dump;
$cqp->undump("NP_mod", @dump);
$cqp->exec("NP_mod"); # subquery on modified undump
$cqp->exec("PP_subquery = <match> [pos='IN|TO'] []* [pos='NN.*'] </match>");
$cqp->exec("VSS");
$cqp->exec("Diff1 = diff PP PP_subquery"); # check that query results are identical
$cqp->exec("Diff2 = diff PP_subquery PP");
my ($n1) = $cqp->exec("size Diff1");
my ($n2) = $cqp->exec("size Diff2");
ok($n1 == 0 && $n2 == 0, "modified undump + subquery gives expected result"); # T38
# asynchronous execution with run() / getline()
$cqp->run("tabulate NP match .. matchend lemma");
@rows = ();
while (my $row = $cqp->getline) {
push @rows, $row;
}
is_deeply(\@rows, \@rows1, "asynchronous execution (tabulate command)"); # T39
ok((not defined $cqp->ready), "asychronous execution has completed"); # T40
# progress bar handler
my @progress_data = ();
$cqp->set_progress_handler(sub { my $perc = shift; push @progress_data, $perc if $perc > 0 });
$cqp->progress_on;
$cqp->exec("Temp = [pos='IN'] /np[]");
is_deeply(\@progress_data, [ 1 .. 100 ], "progress handler works correctly"); # T41
$cqp->progress_off;
# matching strategy modifier T42âT43
my $query = "[pos='JJ.*'] [pos='NNS?']+";
$cqp->exec("JN0 = $query");
$cqp->exec("JN1 = (?longest) $query");
$cqp->exec("set MatchingStrategy longest");
$cqp->exec("JN2 = $query");
$cqp->exec("set MatchingStrategy standard");
my @JN1 = $cqp->dump("JN1");
my @JN2 = $cqp->dump("JN2");
$cqp->exec("JDiff = diff JN2 JN0");
my ($n_diff) = $cqp->exec("size JDiff");
is_deeply(\@JN1, \@JN2, "matching strategy modifier works (?longest)");
ok($n_diff > 0, "confirm that matching strategy makes a difference");
# corpus position lookup T44-T45
$cqp->exec("CP1 = [_ = 666] []{2}");
my @result = $cqp->dump("CP1");
is_deeply(\@result, [[666, 668, -1, -1]], "corpus position lookup with [_ = 666]");
$cqp->exec("CP2 = [lemma = 'elephant'] []{0,10} [_ >= 8038]"); # should also work in earlier versions
@result = $cqp->dump("CP2");
is_deeply(\@result, [[8031, 8038, -1, -1]], "corpus position test with ... [_ >= 8038]");
# strlen() built-in function T46-T50
for my $corpus (qw(GOETHE_LATIN1 GOETHE_UTF8)) {
$cqp->exec($corpus);
$cqp->exec("G1 = [word = '.*chen' & strlen(word) = 7]");
$cqp->exec("G2 = [word = '.*chen' & strlen(word) >= 8]");
my ($n1) = $cqp->exec("size G1");
my ($n2) = $cqp->exec("size G2");
ok($n1 == 1, "strlen() test works for corpus $corpus");
ok($n2 == 0, "negative strlen() test works for corpus $corpus");
}
$cqp->exec("VSS");
$cqp->exec("G3 = [lemma = 'time'] :: strlen(match.story_title) <= 5");
my ($n) = $cqp->exec("size G3");
ok($n == 10, "strlen() test works for s-attribute annotation");
# validate MU Queries against corresponding finite-state queries T51-T54
$cqp->exec("Union_MU = MU(union [lemma = 'coffee'] [lemma = 'elephant'])");
$cqp->exec("Union_CQL = ([lemma = 'coffee'] | [lemma = 'elephant'])");
ok(matches_eq("Union_MU", "Union_CQL"), "MU(union ...) corresponds to basic query");
$cqp->exec("Meet3_MU = MU(meet 'of' 'the' -3 3)");
$cqp->exec("Meet3_CQL = 'of'");
$cqp->exec("set Meet3_CQL target nearest 'the' within 3 words");
$cqp->exec("delete Meet3_CQL without target");
ok(matches_eq("Meet3_MU", "Meet3_CQL"), "MU(meet ... -3 3) corresponds to basic query");
$cqp->exec("MeetS_MU = MU(meet 'elephant' 'garden' s)");
$cqp->exec("MeetS_CQL = 'elephant'");
( run in 2.498 seconds using v1.01-cache-2.11-cpan-524268b4103 )