Bio-Grep

 view release on metacpan or  search on metacpan

examples/Benchmarks.tt  view on Meta::CPAN

=head1 BENCHMARKS

[% cpuinfo %], 4GB RAM. [% osname %]. Perl [% perl %].

[% filenameCDNA %] (Arabidopsis CDNA Fasta file, 63MB). 

Bio::Grep [% biogrepv %]. 

=head2 Database Generation

Average over [% iterationsdb %] iterations.

  GUUGle         : [% guugle_dbgen %] sec
  Agrep/RE       : [% agrep_dbgen  %] sec
  Vmatch (-pl 3) : [% vmatch_dbgen %] sec

  
=head2 Mismatches

Query: C<ugacagaagagagugagcac> (revcom)

Average over [% iterations %] iterations.

=over

=item B<No mismatches (exact matching):>

  Agrep (Wu-Manber):  [%  agrep_mm_0_0 %] sec
  Vmatch           :  [% vmatch_mm_0_0 %] sec
  RE               :  [%     re_mm_0_0 %] sec
  Vmatch (-online) :  [% vmatch_mm_0_1 %] sec
  GUUGle           : [% guugle_mm_0_0 %] sec
  Agrep (TRE)      : [% agrep_tre_mm_0_0 %] sec

Note that C<Vmatch> needs one slow run to load the suffix arrays in memory
(Values are the average over [% iterations %] iterations). Also note that GUUGle
allows GU mismatches.

=item B<One mismatch:>

  Vmatch           :  [% vmatch_mm_1_0 %] sec
  Agrep (Wu-Manber):  [%  agrep_mm_1_0 %] sec
  Vmatch (-online) :  [% vmatch_mm_1_1 %] sec
  Agrep (TRE)      :  [% agrep_tre_mm_1_0 %] sec
  GUUGle           :       n/a
  RE               :       n/a

examples/benchmark.pl  view on Meta::CPAN

);

my %results;

my $query = 'ugacagaagagagugagcac';
$query =~ tr{u}{t};

my $time;
my $VERBOSE=0;
my $filenameCDNA = 'TAIR8_cdna_20080412';
my $iterations = $DEBUG ? 1 : 20;
my $iterationsdb = $DEBUG ? 1: 2;
my $maxmm = $DEBUG ? 1 : 5;

#goto CREATETMP;
DB:
for $b (sort keys %be) {
    my $sbe = $be{$b};
    $time = [gettimeofday];
    for my $i (1..$iterationsdb) {
        system("rm -rf data$b/");
        mkdir 'data' . $b;

        $sbe->generate_database({
                datapath      => 'data' . $b,
                file          => "examples/$filenameCDNA",
                prefix_length => 3,
            }); 
    }
    $results{"${b}_dbgen"} = sprintf("%.2f",
        (tv_interval($time)/$iterationsdb));
    warn "$b took " . $results{"${b}_dbgen"} . " seconds\n";
}    

MM:
for $b (sort keys %be) {
    my $sbe = $be{$b};
    my $loop_counter = 0;
    $loop_counter = 1 if $b eq 'vmatch';
    for my $online ( 0 .. $loop_counter) {
    for my $mm (0..$maxmm) {
        next MM if !defined $sbe->features->{MISMATCHES} && $mm > 0;
        $time = [gettimeofday];
        for my $i (1..$iterations) {
            print "." if ($i % 5 == 0);
            my %showdesc;
            %showdesc = ( showdesc => 100) if $b eq 'vmatch'; 
            my $gu = 1;
            $gu = 0 if $b eq 'guugle';
            eval { $sbe->search({
            query              => $query,
            mismatches         => $mm,
            reverse_complement => 1,
            datapath => 'data' . $b,

examples/benchmark.pl  view on Meta::CPAN

            }); };
            my @ids;
            while (my $res = $sbe->next_res) {
                push @ids, $res->sequence->id;
            }    
            warn scalar(@ids). " results.\n" if $VERBOSE;
        }   
        warn 'Is TRE? ' . $sbe->is_tre_agrep() if $b =~/agrep/;
        
        $results{"${b}_mm_${mm}_$online"} = sprintf("%.2f",
            tv_interval($time)/$iterations);
        warn "$b (mm $mm) took " . $results{"${b}_mm_${mm}_$online"} . " seconds\n";
    }     
    }
}    

CREATETMP:
my $info = Sys::Info->new;
$results{cpuinfo} = scalar $info->device('CPU')->identify;
$results{perl} = $info->perl_long();
$results{osname} = $info->os->name( long => 1 );
$results{filenameCDNA} = $filenameCDNA;
$results{biogrepv} = $Bio::Grep::VERSION;
$results{iterations} = $iterations;
$results{iterationsdb} = $iterationsdb;
$template->process('examples/Benchmarks.tt', \%results, 'lib/Bio/Grep/Benchmarks.pod') || die
$template->error(), "\n";

lib/Bio/Grep/Benchmarks.pod  view on Meta::CPAN

=head1 BENCHMARKS

4 x Intel(R) Core(TM)2 Quad CPU Q9400  @ 2.66GHz, 4GB RAM. Fedora Linux 2.6.27.38-170.2.113.fc10.i686.PAE (kernel: 2.6.27.38-170.2.113.fc10.i686.PAE). Perl 5.10.0.0.

TAIR8_cdna_20080412 (Arabidopsis CDNA Fasta file, 63MB). 

Bio::Grep 0.10.6. 

=head2 Database Generation

Average over 2 iterations.

  GUUGle         : 2.88 sec
  Agrep/RE       : 10.69 sec
  Vmatch (-pl 3) : 135.32 sec

  
=head2 Mismatches

Query: C<ugacagaagagagugagcac> (revcom)

Average over 20 iterations.

=over

=item B<No mismatches (exact matching):>

  Vmatch           :  0.02 sec
  Agrep (Wu-Manber):  0.22 sec
  RE               :  1.66 sec
  Vmatch (-online) :  3.80 sec
  GUUGle           :  6.18 sec
  Agrep (TRE)      : 10.22 sec

Note that C<Vmatch> needs one slow run to load the suffix arrays in memory
(Values are the average over 20 iterations). Also note that GUUGle
allows GU mismatches.

=item B<One mismatch:>

  Vmatch           :  0.05 sec
  Agrep (Wu-Manber):  0.98 sec
  Vmatch (-online) :  3.85 sec
  Agrep (TRE)      : 35.26 sec
  GUUGle           :       n/a
  RE               :       n/a



( run in 1.740 second using v1.01-cache-2.11-cpan-96521ef73a4 )