KinoSearch

 view release on metacpan or  search on metacpan

buildlib/KinoSearch/Test/TestUtils.pm  view on Meta::CPAN

        };
    }

    return \%docs;
}

sub create_uscon_index {
    my $folder = KinoSearch::Store::FSFolder->new(
        path => persistent_test_index_loc() );
    my $schema  = KinoSearch::Test::USConSchema->new;
    my $indexer = KinoSearch::Index::Indexer->new(
        schema   => $schema,
        index    => $folder,
        truncate => 1,
        create   => 1,
    );

    $indexer->add_doc( { content => "zz$_" } ) for ( 0 .. 10000 );
    $indexer->commit;
    undef $indexer;

    $indexer = KinoSearch::Index::Indexer->new(
        schema => $schema,
        index  => $folder,
    );
    my $source_docs = get_uscon_docs();
    $indexer->add_doc( { content => $_->{bodytext} } )
        for values %$source_docs;
    $indexer->commit;
    undef $indexer;

    $indexer = KinoSearch::Index::Indexer->new(
        schema => $schema,
        index  => $folder,
    );
    my @chars = ( 'a' .. 'z' );
    for ( 0 .. 1000 ) {
        my $content = '';
        for my $num_words ( 1 .. int( rand(20) ) ) {
            for ( 1 .. ( int( rand(10) ) + 10 ) ) {
                $content .= @chars[ rand(@chars) ];
            }
            $content .= ' ';
        }
        $indexer->add_doc( { content => $content } );
    }
    $indexer->optimize;
    $indexer->commit;
}

# Return 3 strings useful for verifying UTF-8 integrity.
sub utf8_test_strings {
    my $smiley       = "\x{263a}";
    my $not_a_smiley = $smiley;
    _utf8_off($not_a_smiley);
    my $frowny = $not_a_smiley;
    utf8::upgrade($frowny);
    return ( $smiley, $not_a_smiley, $frowny );
}

# Verify an Analyzer's transform, transform_text, and split methods.
sub test_analyzer {
    my ( $analyzer, $source, $expected, $message ) = @_;

    my $inversion = KinoSearch::Analysis::Inversion->new( text => $source );
    $inversion = $analyzer->transform($inversion);
    my @got;
    while ( my $token = $inversion->next ) {
        push @got, $token->get_text;
    }
    Test::More::is_deeply( \@got, $expected, "analyze: $message" );

    $inversion = $analyzer->transform_text($source);
    @got       = ();
    while ( my $token = $inversion->next ) {
        push @got, $token->get_text;
    }
    Test::More::is_deeply( \@got, $expected, "transform_text: $message" );

    @got = @{ $analyzer->split($source) };
    Test::More::is_deeply( \@got, $expected, "split: $message" );
}

# Extract all doc nums from a SortCollector.  Return two sorted array refs:
# by_score and by_id.
sub doc_ids_from_td_coll {
    my $collector = shift;
    my @by_score;
    my $match_docs = $collector->pop_match_docs;
    my @by_score_then_id = map { $_->get_doc_id }
        sort {
               $b->get_score <=> $a->get_score
            || $a->get_doc_id <=> $b->get_doc_id
        } @$match_docs;
    my @by_id = sort { $a <=> $b } @by_score_then_id;
    return ( \@by_score_then_id, \@by_id );
}

# Use a modulus to generate a set of numbers.
sub modulo_set {
    my ( $interval, $max ) = @_;
    my @out;
    for ( my $doc = $interval; $doc < $max; $doc += $interval ) {
        push @out, $doc;
    }
    return \@out;
}

1;

__END__

__COPYRIGHT__

Copyright 2005-2011 Marvin Humphrey

This program is free software; you can redistribute it and/or modify
under the same terms as Perl itself.



( run in 1.079 second using v1.01-cache-2.11-cpan-71847e10f99 )