BioPerl

 view release on metacpan or  search on metacpan

Bio/SearchIO/blasttable.pm  view on Meta::CPAN

 Title   : new
 Usage   : my $obj = Bio::SearchIO::blasttable->new();
 Function: Builds a new Bio::SearchIO::blasttable object 
 Returns : an instance of Bio::SearchIO::blasttable
 Args    :


=cut

sub _initialize {
    my ($self,@args) = @_;
    $self->SUPER::_initialize(@args);

    my ($pname) = $self->_rearrange([qw(PROGRAM_NAME)],
				    @args);
    $self->program_name($pname || $DefaultProgramName);
    $self->_eventHandler->register_factory('result', Bio::Search::Result::ResultFactory->new(-type => 'Bio::Search::Result::GenericResult'));
    $self->_eventHandler->register_factory('hit', Bio::Search::Hit::HitFactory->new(-type => 'Bio::Search::Hit::GenericHit'));
    $self->_eventHandler->register_factory('hsp', Bio::Search::HSP::HSPFactory->new(-type => 'Bio::Search::HSP::GenericHSP'));
}


=head2 next_result

 Title   : next_result
 Usage   : my $result = $parser->next_result
 Function: Parse the next result from the data stream
 Returns : L<Bio::Search::Result::ResultI>
 Args    : none


=cut

sub next_result{
   my ($self) = @_;
   my ($lastquery,$lasthit);
   local $/ = "\n";
   local $_;
   my ($alg, $ver);
   while( defined ($_ = $self->_readline) ) {
	  # WU-BLAST -mformat 3 only
	  if(m{^#\s((?:\S+?)?BLAST[NPX])\s(\d+\.\d+.+\d{4}\])}) {
            ($alg, $ver) = ($1, $2);
			# only one header for whole file with WU-BLAST
			# so $alg and $ver won't get set properly for
			# each result
			$self->program_name($alg) if $alg;
			$self->element({'Name' => 'Result_version',
					   		'Data' => $ver}) if $ver;
            next;
	  }
      # -m 9 only
      elsif(m{^#\s+((?:\S+?)?BLAST[NPX])\s+(.+)}) {
            ($alg, $ver) = ($1, $2);
            next;
       }
       next if /^#/ || /^\s*$/;

	  my @fields = split;
      next if @fields == 1;
	  my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
	      $qstart,$qend,$hstart,$hend,$evalue,$bits);
	  # WU-BLAST-specific
	  my ($num_scores, $raw_score, $identities, $positives, $percent_pos,
	      $qgap_blocks,$qgaps, $sgap_blocks, $sgaps, $qframe,
	      $sframe);
	  # NCBI -m8 and -m9
	  if (@fields == 12) {
	      ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
	       $qstart,$qend,$hstart,$hend,$evalue,$bits) = @fields;
	  # NCBI -m8 and -m9, v 2.2.18+
	  } elsif (@fields == 13) {
          ($qname, $hname, $percent_id, $percent_pos, $hsp_len, $mismatches, $gapsm,
	       $qstart,$qend,$hstart,$hend,$evalue,$bits) = @fields;
      }
	  # WU-BLAST -mformat 2 and 3
	  elsif ((@fields == 22) or (@fields == 24)) {
	      ($qname,$hname,$evalue,$num_scores, $bits, $raw_score, $hsp_len,
	       $identities, $positives,$mismatches, $percent_id, $percent_pos,
	       $qgap_blocks, $qgaps, $sgap_blocks, $sgaps, $qframe, $qstart,
	       $qend, $sframe, $hstart,$hend,) = @fields;
	      # we need total gaps in the alignment
	      $gapsm=$qgaps+$sgaps;
	  }

       if (@fields == 12 || @fields == 13) {
          # need to determine total gaps in the alignment for NCBI output
          # since NCBI reports number of gapopens and NOT total gaps
          my $qlen      = abs($qstart - $qend) + 1;
          my $querygaps = $hsp_len - $qlen;
          my $hlen      = abs($hstart - $hend) + 1;
          my $hitgaps   = $hsp_len - $hlen;
          $gapsm = $querygaps + $hitgaps;
       }

       # Remember Jim's code is 0 based
       if( defined $lastquery && 
	   $lastquery ne $qname ) {
	   $self->end_element({'Name' => 'Hit'});
	   $self->end_element({'Name' => 'Result'});
	   $self->_pushback($_);
	   return $self->end_document;
       } elsif( ! defined $lastquery ) {
	   $self->{'_result_count'}++;
	   $self->start_element({'Name' => 'Result'});
	   $self->element({'Name' => 'Result_program',
			   'Data' => $alg || $self->program_name});
       $self->element({'Name' => 'Result_version',
			   'Data' => $ver}) if $ver;
	   $self->element({'Name' => 'Result_query-def',
			   'Data' => $qname});
	   $self->start_element({'Name' => 'Hit'});
	   $self->element({'Name' => 'Hit_id',
			   'Data' => $hname});
	   # we'll store the 1st hsp bits as the hit bits
	   $self->element({'Name' => 'Hit_bits',			   
			   'Data' => $bits});	   
           # we'll store the 1st hsp value as the hit evalue
	   $self->element({'Name' => 'Hit_signif',			   
			   'Data' => $evalue});
	   
       } elsif( $lasthit ne $hname ) {
	   if( $self->in_element('hit') ) {	       
	       $self->end_element({'Name' => 'Hit'});
	   }
	   $self->start_element({'Name' => 'Hit'});
	   $self->element({'Name' => 'Hit_id',
			   'Data' => $hname});
	   # we'll store the 1st hsp bits as the hit bits
	   $self->element({'Name' => 'Hit_bits',			   
			   'Data' => $bits});	   
           # we'll store the 1st hsp value as the hit evalue
	   $self->element({'Name' => 'Hit_signif',			   
			   'Data' => $evalue});
       }
       my $identical = $hsp_len - $mismatches - $gapsm;
       # If $positives value is absent, try to recover it from $percent_pos,
       # this is better than letting the program to assume "conserved == identical"
       if (not defined $positives and defined $percent_pos) {
	   $positives = sprintf "%d", ($percent_pos * $hsp_len / 100);
       }
       $self->start_element({'Name' => 'Hsp'});
       $self->element({'Name' => 'Hsp_evalue',			   
		       'Data' => $evalue});       
       $self->element({'Name' => 'Hsp_bit-score',
		       'Data' => $bits});
       $self->element({'Name' => 'Hsp_identity',
		       'Data' => $identical});
       $self->element({'Name' => 'Hsp_positive',
		       'Data' => $positives});
       $self->element({'Name' => 'Hsp_gaps',
		       'Data' => $gapsm});
       $self->element({'Name' => 'Hsp_query-from',
		       'Data' => $qstart});
       $self->element({'Name' => 'Hsp_query-to',
		       'Data' => $qend});

       $self->element({'Name' => 'Hsp_hit-from',
		       'Data' => $hstart });
       $self->element({'Name' => 'Hsp_hit-to',
		       'Data' => $hend });
       $self->element({'Name' => 'Hsp_align-len',
		       'Data' => $hsp_len});
       $self->end_element({'Name' => 'Hsp'});
       $lastquery = $qname;
       $lasthit   = $hname;
   }
   # fencepost
   if( defined $lasthit && defined $lastquery ) {
       if( $self->in_element('hit') ) {
	   $self->end_element({'Name' => 'Hit'});
       }
       $self->end_element({'Name' => 'Result'});
       return $self->end_document;
   }
}

=head2 start_element

 Title   : start_element
 Usage   : $eventgenerator->start_element
 Function: Handles a start element event
 Returns : none
 Args    : hashref with at least 2 keys 'Data' and 'Name'


=cut

sub start_element{
   my ($self,$data) = @_;
    # we currently don't care about attributes
    my $nm = $data->{'Name'};    
   if( my $type = $MODEMAP{$nm} ) {
	$self->_mode($type);
	if( $self->_will_handle($type) ) {
	    my $func = sprintf("start_%s",lc $type);
	    $self->_eventHandler->$func($data->{'Attributes'});
	}						 
	unshift @{$self->{'_elements'}}, $type;
    }



( run in 0.514 second using v1.01-cache-2.11-cpan-39bf76dae61 )