Lucy
view release on metacpan or search on metacpan
buildlib/Lucy/Test/TestUtils.pm view on Meta::CPAN
my $content_type = Lucy::Plan::FullTextType->new(
analyzer => $analyzer,
highlightable => 1,
);
my $url_type = Lucy::Plan::StringType->new( indexed => 0, );
my $cat_type = Lucy::Plan::StringType->new;
$schema->spec_field( name => 'title', type => $title_type );
$schema->spec_field( name => 'content', type => $content_type );
$schema->spec_field( name => 'url', type => $url_type );
$schema->spec_field( name => 'category', type => $cat_type );
return $schema;
}
sub create_uscon_index {
my $folder
= Lucy::Store::FSFolder->new( path => persistent_test_index_loc() );
my $indexer = Lucy::Index::Indexer->new(
schema => _uscon_schema(),
index => $folder,
truncate => 1,
create => 1,
);
$indexer->add_doc( { content => "zz$_" } ) for ( 0 .. 10000 );
$indexer->commit;
undef $indexer;
$indexer = Lucy::Index::Indexer->new( index => $folder );
my $source_docs = get_uscon_docs();
$indexer->add_doc( { content => $_->{bodytext} } )
for values %$source_docs;
$indexer->commit;
undef $indexer;
$indexer = Lucy::Index::Indexer->new( index => $folder );
my @chars = ( 'a' .. 'z' );
for ( 0 .. 1000 ) {
my $content = '';
for my $num_words ( 1 .. int( rand(20) ) ) {
for ( 1 .. ( int( rand(10) ) + 10 ) ) {
$content .= @chars[ rand(@chars) ];
}
$content .= ' ';
}
$indexer->add_doc( { content => $content } );
}
$indexer->optimize;
$indexer->commit;
}
# Return 3 strings useful for verifying UTF-8 integrity.
sub utf8_test_strings {
my $smiley = "\x{263a}";
my $not_a_smiley = $smiley;
_utf8_off($not_a_smiley);
my $frowny = $not_a_smiley;
utf8::upgrade($frowny);
return ( $smiley, $not_a_smiley, $frowny );
}
# Verify an Analyzer's transform, transform_text, and split methods.
sub test_analyzer {
my ( $analyzer, $source, $expected, $message ) = @_;
my $inversion = Lucy::Analysis::Inversion->new( text => $source );
$inversion = $analyzer->transform($inversion);
my @got;
while ( my $token = $inversion->next ) {
push @got, $token->get_text;
}
Test::More::is_deeply( \@got, $expected, "analyze: $message" );
$inversion = $analyzer->transform_text($source);
@got = ();
while ( my $token = $inversion->next ) {
push @got, $token->get_text;
}
Test::More::is_deeply( \@got, $expected, "transform_text: $message" );
@got = @{ $analyzer->split($source) };
Test::More::is_deeply( \@got, $expected, "split: $message" );
}
# Extract all doc nums from a SortCollector. Return two sorted array refs:
# by_score and by_id.
sub doc_ids_from_td_coll {
my $collector = shift;
my @by_score;
my $match_docs = $collector->pop_match_docs;
my @by_score_then_id = map { $_->get_doc_id }
sort {
$b->get_score <=> $a->get_score
|| $a->get_doc_id <=> $b->get_doc_id
} @$match_docs;
my @by_id = sort { $a <=> $b } @by_score_then_id;
return ( \@by_score_then_id, \@by_id );
}
# Use a modulus to generate a set of numbers.
sub modulo_set {
my ( $interval, $max ) = @_;
my @out;
for ( my $doc = $interval; $doc < $max; $doc += $interval ) {
push @out, $doc;
}
return \@out;
}
1;
__END__
( run in 1.065 second using v1.01-cache-2.11-cpan-71847e10f99 )