BioPerl
view release on metacpan or search on metacpan
Bio/SearchIO/blast.pm view on Meta::CPAN
Usage : my $obj = Bio::SearchIO::blast->new(%args);
Function: Builds a new Bio::SearchIO::blast object
Returns : Bio::SearchIO::blast
Args : Key-value pairs:
-fh/-file => filehandle/filename to BLAST file
-format => 'blast'
-report_type => 'blastx', 'tblastn', etc -- only for bl2seq
reports when you want to distinguish between
tblastn and blastx reports (this only controls
where the frame information is put - on the query
or subject object.
-inclusion_threshold => e-value threshold for inclusion in the
PSI-BLAST score matrix model (blastpgp)
-signif => float or scientific notation number to be used
as a P- or Expect value cutoff
-score => integer or scientific notation number to be used
as a blast score value cutoff
-bits => integer or scientific notation number to be used
as a bit score value cutoff
-hit_filter => reference to a function to be used for
filtering hits based on arbitrary criteria.
All hits of each BLAST report must satisfy
this criteria to be retained.
If a hit fails this test, it is ignored.
This function should take a
Bio::Search::Hit::BlastHit.pm object as its first
argument and return true
if the hit should be retained.
Sample filter function:
-hit_filter => sub { $hit = shift;
$hit->gaps == 0; },
(Note: -filt_func is synonymous with -hit_filter)
-overlap => integer. The amount of overlap to permit between
adjacent HSPs when tiling HSPs. A reasonable value is 2.
Default = $Bio::SearchIO::blast::MAX_HSP_OVERLAP.
The following criteria are not yet supported:
(these are probably best applied within this module rather than in the
event handler since they would permit the parser to take some shortcuts.)
-check_all_hits => boolean. Check all hits for significance against
significance criteria. Default = false.
If false, stops processing hits after the first
non-significant hit or the first hit that fails
the hit_filter call. This speeds parsing,
taking advantage of the fact that the hits
are processed in the order they appear in the report.
-min_query_len => integer to be used as a minimum for query sequence length.
Reports with query sequences below this length will
not be processed. Default = no minimum length.
-best => boolean. Only process the best hit of each report;
default = false.
=cut
sub _initialize {
my ( $self, @args ) = @_;
$self->SUPER::_initialize(@args);
# Blast reports require a specialized version of the SREB due to the
# possibility of iterations (PSI-BLAST). Forwarding all arguments to it. An
# issue here is that we want to set new default object factories if none are
# supplied.
my $handler = Bio::SearchIO::IteratedSearchResultEventBuilder->new(@args);
$self->attach_EventHandler($handler);
# 2006-04-26 move this to the attach_handler function in this module so we
# can really reset the handler
# Optimization: caching
# the EventHandler since it is used a lot during the parse.
# $self->{'_handler_cache'} = $handler;
my ($rpttype ) = $self->_rearrange(
[
qw(
REPORT_TYPE)
],
@args
);
defined $rpttype && ( $self->{'_reporttype'} = $rpttype );
}
sub attach_EventHandler {
my ($self,$handler) = @_;
$self->SUPER::attach_EventHandler($handler);
# Optimization: caching the EventHandler since it is used a lot
# during the parse.
$self->{'_handler_cache'} = $handler;
return;
}
=head2 next_result
Title : next_result
Usage : my $hit = $searchio->next_result;
Function: Returns the next Result from a search
Returns : Bio::Search::Result::ResultI object
Args : none
=cut
sub next_result {
my ($self) = @_;
my $v = $self->verbose;
my $data = '';
my $flavor = '';
$self->{'_seentop'} = 0; # start next report at top
my ( $reporttype, $seenquery, $reportline, $reportversion );
my ( $seeniteration, $found_again );
my $incl_threshold = $self->inclusion_threshold;
my $bl2seq_fix;
$self->start_document(); # let the fun begin...
my (@hit_signifs);
my $gapped_stats = 0; # for switching between gapped/ungapped
# lambda, K, H
Bio/SearchIO/blast.pm view on Meta::CPAN
if ($reporttype =~ /RPS-BLAST/) {
$reporttype .= '(BLASTP)'; # default RPS-BLAST type
}
$reportline = $_; # to fix the fact that RPS-BLAST output is wrong
$self->element(
{
'Name' => 'BlastOutput_program',
'Data' => $reporttype
}
);
$self->element(
{
'Name' => 'BlastOutput_version',
'Data' => $reportversion
}
);
$self->element(
{
'Name' => 'BlastOutput_inclusion-threshold',
'Data' => $incl_threshold
}
);
}
# parse the BLAST algorithm reference
elsif(/^Reference:\s+(.*)$/) {
# want to preserve newlines for the BLAST algorithm reference
my $algorithm_reference = "$1\n";
$_ = $self->_readline;
# while the current line, does not match an empty line, a RID:, a
# Database:, or a query definition line (Query=) we are still
# looking at the algorithm_reference, append it to what we parsed so
# far
while($_ !~ /^$/ && $_ !~ /^RID:/ && $_ !~ /^Database:/ && $_ !~ /^Query=/) {
$algorithm_reference .= "$_";
$_ = $self->_readline;
}
# if we exited the while loop, we saw an empty line, a RID:, or a
# Database:, so push it back
$self->_pushback($_);
$self->element(
{
'Name' => 'BlastOutput_algorithm-reference',
'Data' => $algorithm_reference
}
);
}
# parse BLAST RID (Request ID)
elsif(/^RID:\s+(.*)$/) {
my $rid = $1;
$self->element(
{
'Name' => 'BlastOutput_rid',
'Data' => $rid
}
);
}
# added Windows workaround for bug 1985
elsif (/^(Searching|Results from round)/) {
next unless $1 =~ /Results from round/;
$self->debug("blast.pm: Possible psi blast iterations found...\n");
$self->in_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
$self->in_element('hit')
&& $self->end_element( { 'Name' => 'Hit' } );
if ( defined $seeniteration ) {
$self->within_element('iteration')
&& $self->end_element( { 'Name' => 'Iteration' } );
$self->start_element( { 'Name' => 'Iteration' } );
}
else {
$self->start_element( { 'Name' => 'Iteration' } );
}
$seeniteration = 1;
}
elsif (/^Query=\s*(.*)$/) {
my $q = $1;
$self->debug("blast.pm: Query= found...$_\n");
my $size = 0;
if ( defined $seenquery ) {
$self->_pushback($_);
$self->_pushback($reportline) if $reportline;
last PARSER;
}
if ( !defined $reporttype ) {
$self->_start_blastoutput;
if ( defined $seeniteration ) {
$self->in_element('iteration')
&& $self->end_element( { 'Name' => 'Iteration' } );
$self->start_element( { 'Name' => 'Iteration' } );
}
else {
$self->start_element( { 'Name' => 'Iteration' } );
}
$seeniteration = 1;
}
$seenquery = $q;
$_ = $self->_readline;
while ( defined($_) ) {
if (/^Database:/) {
$self->_pushback($_);
last;
}
# below line fixes length issue with BLAST v2.2.13; still works
# with BLAST v2.2.12
if ( /\((\-?[\d,]+)\s+letters.*\)/ || /^Length=(\-?[\d,]+)/ ) {
$size = $1;
$size =~ s/,//g;
last;
}
else {
# bug 2391
$q .= ($q =~ /\w$/ && $_ =~ /^\w/) ? " $_" : $_;
$q =~ s/\s+/ /g; # this catches the newline as well
$q =~ s/^ | $//g;
}
$_ = $self->_readline;
}
chomp($q);
( run in 0.568 second using v1.01-cache-2.11-cpan-71847e10f99 )