BioPerl
view release on metacpan or search on metacpan
Bio/SearchIO/blasttable.pm view on Meta::CPAN
Title : new
Usage : my $obj = Bio::SearchIO::blasttable->new();
Function: Builds a new Bio::SearchIO::blasttable object
Returns : an instance of Bio::SearchIO::blasttable
Args :
=cut
sub _initialize {
my ($self,@args) = @_;
$self->SUPER::_initialize(@args);
my ($pname) = $self->_rearrange([qw(PROGRAM_NAME)],
@args);
$self->program_name($pname || $DefaultProgramName);
$self->_eventHandler->register_factory('result', Bio::Search::Result::ResultFactory->new(-type => 'Bio::Search::Result::GenericResult'));
$self->_eventHandler->register_factory('hit', Bio::Search::Hit::HitFactory->new(-type => 'Bio::Search::Hit::GenericHit'));
$self->_eventHandler->register_factory('hsp', Bio::Search::HSP::HSPFactory->new(-type => 'Bio::Search::HSP::GenericHSP'));
}
=head2 next_result
Title : next_result
Usage : my $result = $parser->next_result
Function: Parse the next result from the data stream
Returns : L<Bio::Search::Result::ResultI>
Args : none
=cut
sub next_result{
my ($self) = @_;
my ($lastquery,$lasthit);
local $/ = "\n";
local $_;
my ($alg, $ver);
while( defined ($_ = $self->_readline) ) {
# WU-BLAST -mformat 3 only
if(m{^#\s((?:\S+?)?BLAST[NPX])\s(\d+\.\d+.+\d{4}\])}) {
($alg, $ver) = ($1, $2);
# only one header for whole file with WU-BLAST
# so $alg and $ver won't get set properly for
# each result
$self->program_name($alg) if $alg;
$self->element({'Name' => 'Result_version',
'Data' => $ver}) if $ver;
next;
}
# -m 9 only
elsif(m{^#\s+((?:\S+?)?BLAST[NPX])\s+(.+)}) {
($alg, $ver) = ($1, $2);
next;
}
next if /^#/ || /^\s*$/;
my @fields = split;
next if @fields == 1;
my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
$qstart,$qend,$hstart,$hend,$evalue,$bits);
# WU-BLAST-specific
my ($num_scores, $raw_score, $identities, $positives, $percent_pos,
$qgap_blocks,$qgaps, $sgap_blocks, $sgaps, $qframe,
$sframe);
# NCBI -m8 and -m9
if (@fields == 12) {
($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
$qstart,$qend,$hstart,$hend,$evalue,$bits) = @fields;
# NCBI -m8 and -m9, v 2.2.18+
} elsif (@fields == 13) {
($qname, $hname, $percent_id, $percent_pos, $hsp_len, $mismatches, $gapsm,
$qstart,$qend,$hstart,$hend,$evalue,$bits) = @fields;
}
# WU-BLAST -mformat 2 and 3
elsif ((@fields == 22) or (@fields == 24)) {
($qname,$hname,$evalue,$num_scores, $bits, $raw_score, $hsp_len,
$identities, $positives,$mismatches, $percent_id, $percent_pos,
$qgap_blocks, $qgaps, $sgap_blocks, $sgaps, $qframe, $qstart,
$qend, $sframe, $hstart,$hend,) = @fields;
# we need total gaps in the alignment
$gapsm=$qgaps+$sgaps;
}
if (@fields == 12 || @fields == 13) {
# need to determine total gaps in the alignment for NCBI output
# since NCBI reports number of gapopens and NOT total gaps
my $qlen = abs($qstart - $qend) + 1;
my $querygaps = $hsp_len - $qlen;
my $hlen = abs($hstart - $hend) + 1;
my $hitgaps = $hsp_len - $hlen;
$gapsm = $querygaps + $hitgaps;
}
# Remember Jim's code is 0 based
if( defined $lastquery &&
$lastquery ne $qname ) {
$self->end_element({'Name' => 'Hit'});
$self->end_element({'Name' => 'Result'});
$self->_pushback($_);
return $self->end_document;
} elsif( ! defined $lastquery ) {
$self->{'_result_count'}++;
$self->start_element({'Name' => 'Result'});
$self->element({'Name' => 'Result_program',
'Data' => $alg || $self->program_name});
$self->element({'Name' => 'Result_version',
'Data' => $ver}) if $ver;
$self->element({'Name' => 'Result_query-def',
'Data' => $qname});
$self->start_element({'Name' => 'Hit'});
$self->element({'Name' => 'Hit_id',
'Data' => $hname});
# we'll store the 1st hsp bits as the hit bits
$self->element({'Name' => 'Hit_bits',
'Data' => $bits});
# we'll store the 1st hsp value as the hit evalue
$self->element({'Name' => 'Hit_signif',
'Data' => $evalue});
} elsif( $lasthit ne $hname ) {
if( $self->in_element('hit') ) {
$self->end_element({'Name' => 'Hit'});
}
$self->start_element({'Name' => 'Hit'});
$self->element({'Name' => 'Hit_id',
'Data' => $hname});
# we'll store the 1st hsp bits as the hit bits
$self->element({'Name' => 'Hit_bits',
'Data' => $bits});
# we'll store the 1st hsp value as the hit evalue
$self->element({'Name' => 'Hit_signif',
'Data' => $evalue});
}
my $identical = $hsp_len - $mismatches - $gapsm;
# If $positives value is absent, try to recover it from $percent_pos,
# this is better than letting the program to assume "conserved == identical"
if (not defined $positives and defined $percent_pos) {
$positives = sprintf "%d", ($percent_pos * $hsp_len / 100);
}
$self->start_element({'Name' => 'Hsp'});
$self->element({'Name' => 'Hsp_evalue',
'Data' => $evalue});
$self->element({'Name' => 'Hsp_bit-score',
'Data' => $bits});
$self->element({'Name' => 'Hsp_identity',
'Data' => $identical});
$self->element({'Name' => 'Hsp_positive',
'Data' => $positives});
$self->element({'Name' => 'Hsp_gaps',
'Data' => $gapsm});
$self->element({'Name' => 'Hsp_query-from',
'Data' => $qstart});
$self->element({'Name' => 'Hsp_query-to',
'Data' => $qend});
$self->element({'Name' => 'Hsp_hit-from',
'Data' => $hstart });
$self->element({'Name' => 'Hsp_hit-to',
'Data' => $hend });
$self->element({'Name' => 'Hsp_align-len',
'Data' => $hsp_len});
$self->end_element({'Name' => 'Hsp'});
$lastquery = $qname;
$lasthit = $hname;
}
# fencepost
if( defined $lasthit && defined $lastquery ) {
if( $self->in_element('hit') ) {
$self->end_element({'Name' => 'Hit'});
}
$self->end_element({'Name' => 'Result'});
return $self->end_document;
}
}
=head2 start_element
Title : start_element
Usage : $eventgenerator->start_element
Function: Handles a start element event
Returns : none
Args : hashref with at least 2 keys 'Data' and 'Name'
=cut
sub start_element{
my ($self,$data) = @_;
# we currently don't care about attributes
my $nm = $data->{'Name'};
if( my $type = $MODEMAP{$nm} ) {
$self->_mode($type);
if( $self->_will_handle($type) ) {
my $func = sprintf("start_%s",lc $type);
$self->_eventHandler->$func($data->{'Attributes'});
}
unshift @{$self->{'_elements'}}, $type;
}
( run in 0.514 second using v1.01-cache-2.11-cpan-39bf76dae61 )