validate results from the CPAN

Alt-CWB-CL-ambs

  ($src_start, $src_end, $target_start, $target_end) 
      = $french->alg2cpos($alg);                 # returns empty list on error
      # or use convenience function $french->cpos2alg2cpos($cpos);


  # Feature sets (used as values of CWB::CL::Attribute and CWB::CL::AttStruc)
  $np_f = $corpus->attribute("np_feat", 's');    # p- and s-attributes can store feature sets
  $fs_string = $np_f->cpos2str($cpos);           # feature sets are encoded as strings
  $fs = CL::set2hash($fs_string);                # expand feature set into hash (reference)
  if (exists $fs->{"paren"}) { ... {}
  $fs1 = CWB::CL::make_set("|proper|nogen|");    # validate feature set or construct from string
  $fs2 = CWB::CL::make_set("paren nogen proper", 'split');
  $fs  = CWB::CL::set_intersection($fs1, $fs2);  # intersection of feature set values
  $n   = CWB::CL::set_size($fs);                 # size of feature set


=head1 DESCRIPTION

Sorry, there is no full description for this module yet, since the 
B<CWB Corpus Library>, on which B<CWB::CL> is based, does not have 
complete documentation.

t/01_vss.t view on Meta::CPAN

is($f, 14, "frequency of 'elephant'"); # T12
diag("- 'elephant' occurs $f times in VSS corpus");

our $regex = "[a-z]+(ally|ily)"; # -- search lexicon with regular expression
our $perl_regex = qr/^(?:${regex})$/i; # compile Perl regular expression for validation 
our @id = $Word->regex2id($regex, "c"); # same as `` "[a-z]+(ally|ily)" %c; '' in CQP
our $n_types = @id;
is($n_types, 24, "match regular expressions against lexicon"); # T13
our @words = $Word->id2str(@id);
ok(@words == @id, "map matching IDs to words");
our @errors = grep {not /$perl_regex/} @words; # validate against Perl regexp
ok(@errors == 0, "regular expression matches are correct");
diag("- these words should not have matched: @errors")
  if @errors;

our $total_f = $Word->idlist2freq(@id); # -- compute total frequency of matches
our $sum_f = 0; # alternatively, compute by summing up individual frequencies
foreach my $f ($Word->id2freq(@id)) { $sum_f += $f };
is($total_f, $sum_f, "total frequency counts are consistent"); # T16
is($total_f, 37, "total frequency of matching words");
diag("- regexp \"$regex\"\%c matches $n_types types, $total_f tokens");

our @cpos = $Word->idlist2cpos(@id); # -- look up corpus positions in index
is(@cpos+0, $total_f, "index lookup returns correct number of corpus positions"); # T18
@errors = grep {$Word->cpos2str($_) !~ /$perl_regex/} @cpos; # validate returned corpus positions
ok(@errors == 0, "index entries are correct");
diag("- these corpus positions should not have been in the index: @errors")
  if @errors;
our @first5 = $Word->cpos2str(@cpos[0 .. 4]);
diag("- index entries: ".join(", ", @first5, "..."));

our $n_sentences = $S->max_struc; # -- number of regions (s-attributes)
our $n_stories = $StoryTitle->max_struc;
is($n_sentences, 459, "number of sentences"); # T20
is($n_stories, 6, "number of stories");

( run in 0.284 second using v1.01-cache-2.11-cpan-a5abf4f5562 )