FAST

 view release on metacpan or  search on metacpan

lib/FAST/Bio/SearchIO/IteratedSearchResultEventBuilder.pm  view on Meta::CPAN

                                 -interface => 'FAST::Bio::Search::Iteration::IterationI'));
    return $self;
}


#Initializes parameters used during parsing of Blast reports.
sub _init_parse_params {

    my ($self, @args) = @_;
    # -FILT_FUNC has been replaced by -HIT_FILTER.
    # Leaving -FILT_FUNC in place for backward compatibility
    my($ithresh, $signif, $score, $bits, $hit_filter, $filt_func) =
           $self->_rearrange([qw(INCLUSION_THRESHOLD
                                 SIGNIF SCORE BITS HIT_FILTER FILT_FUNC
                                )], @args);

    $self->inclusion_threshold( defined($ithresh) ? $ithresh : $DEFAULT_INCLUSION_THRESHOLD);
    my $hit_filt = $hit_filter || $filt_func;
    defined $hit_filter && $self->hit_filter($hit_filt);
    defined $signif     && $self->max_significance($signif);
    defined $score      && $self->min_score($score);
    defined $bits       && $self->min_bits($bits);
}

=head2 will_handle

 Title   : will_handle
 Usage   : if( $handler->will_handle($event_type) ) { ... }
 Function: Tests if this event builder knows how to process a specific event
 Returns : boolean
 Args    : event type name


=cut

sub will_handle{
   my ($self,$type) = @_;
   # these are the events we recognize
   return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result' || $type eq 'iteration' ||
            $type eq 'newhits' || $type eq 'oldhits' );
}

=head2 SAX methods

=cut

=head2 start_result

 Title   : start_result
 Usage   : $handler->start_result($resulttype)
 Function: Begins a result event cycle
 Returns : none 
 Args    : Type of Report

=cut

sub start_result {
   my $self = shift;
   #print STDERR "ISREB: start_result()\n";
   $self->SUPER::start_result(@_);
   $self->{'_iterations'} = [];
   $self->{'_iteration_count'} = 0;
   $self->{'_old_hit_names'} = undef;
   $self->{'_hit_names_below'} = undef;
   return;
}

=head2 end_result

 Title   : end_result
 Usage   : my @results = $parser->end_result
 Function: Finishes a result handler cycle 
 Returns : A FAST::Bio::Search::Result::ResultI
 Args    : none

=cut

sub end_result {
    my ($self,$type,$data) = @_;
    #print STDERR "ISREB: end_result\n";
    ## How is runid getting set? Purpose?
    if( defined $data->{'runid'} &&
        $data->{'runid'} !~ /^\s+$/ ) {        

        if( $data->{'runid'} !~ /^lcl\|/) { 
            $data->{"RESULT-query_name"}= $data->{'runid'};
        } else { 
            ($data->{"RESULT-query_name"},$data->{"RESULT-query_description"}) = 
                split(/\s+/,$data->{"RESULT-query_description"},2);
        }
        
        if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
            my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
            # this is for |123|gb|ABC1.1|
            $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
            $data->{"RESULT-query_accession"}= $acc;
        }
        delete $data->{'runid'};
    }
    my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); } 
               grep { /^RESULT/ } keys %{$data};
    
    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
                               $data->{'RESULT-algorithm_name'} || $type);

    $args{'-iterations'} = $self->{'_iterations'};

    my $result = $self->factory('result')->create_object(%args);
    $result->hit_factory($self->factory('hit'));
    $self->{'_iterations'} = [];
    return $result;
}


# Title   : _add_hit (private function for internal use only)
# Purpose : Applies hit filtering and calls _store_hit if it passes filtering.
# Argument: FAST::Bio::Search::Hit::HitI object 

sub _add_hit {
    my ($self, $hit) = @_;
	
    my $hit_name = uc($hit->{-name});
    my $hit_signif = $hit->{-significance};
    my $ithresh = $self->{'_inclusion_threshold'};
	
    # Test significance using custom function (if supplied)
    my $add_hit = 1;
	
    my $hit_filter = $self->{'_hit_filter'};
	
    if($hit_filter) {
        # since &hit_filter is out of our control and would expect a HitI object,
        # we're forced to make one for it
        $hit = $self->factory('hit')->create_object(%{$hit});
        $add_hit = 0 unless &$hit_filter($hit);
    } else {
        if($self->{'_confirm_significance'}) {
            $add_hit = 0 unless $hit_signif <= $self->{'_max_significance'};
        }
        if($self->{'_confirm_score'}) {
            my $hit_score = $hit->{-score} || $hit->{-hsps}->[0]->{-score};
            $add_hit = 0 unless $hit_score >= $self->{'_min_score'};
        }
        if($self->{'_confirm_bits'}) {
            my $hit_bits = $hit->{-bits} || $hit->{-hsps}->[0]->{-bits};
            $add_hit = 0 unless $hit_bits >= $self->{'_min_bits'};
        }
    }
	
    $add_hit && $self->_store_hit($hit, $hit_name, $hit_signif);
    # Building hit lookup hashes for determining if the hit is old/new and 
    # above/below threshold.
    $self->{'_old_hit_names'}->{$hit_name}++;
    $self->{'_hit_names_below'}->{$hit_name}++ if $hit_signif <= $ithresh;
}

# Title   : _store_hit (private function for internal use only)
# Purpose : Collects hit objects into defined sets that are useful for 
#           analyzing PSI-blast results.
#           These are ultimately added to the iteration object in end_iteration().
#
# Strategy:
#   Primary split = old vs. new
#   Secondary split = below vs. above threshold
#   1. Has this hit occurred in a previous iteration?
#   1.1. If yes, was it below threshold?
#   1.1.1. If yes, ---> [oldhits_below] 
#   1.1.2. If no, is it now below threshold?
#   1.1.2.1. If yes, ---> [oldhits_newly_below] 
#   1.1.2.2. If no, ---> [oldhits_not_below] 
#   1.2. If no, is it below threshold?
#   1.2.1. If yes, ---> [newhits_below] 
#   1.2.2. If no, ---> [newhits_not_below] 
#   1.2.3. If don't know (no inclusion threshold data), ---> [newhits_unclassified] 
#   Note: As long as there's a default inclusion threshold, 
#         there won't be an unclassified set.
#
# For the first iteration, it might be nice to detect non-PSI blast reports
# and put the hits in the unclassified set.
# However, it shouldn't matter where the hits get put for the first iteration
# for non-PSI blast reports since they'll get flattened out in the
# result and iteration search objects.


sub _store_hit {
    my ($self, $hit, $hit_name, $hit_signif) = @_;

    my $ithresh = $self->{'_inclusion_threshold'};
    
    # This is the assumption leading to Bug 1986. The assumption here is that
    # the hit name is unique (and thus new), therefore any subsequent encounters
    # with a hit containing the same name are filed as old hits. This isn't
    # always true (see the bug report for a few examples). Adding an explicit
    # check for the presence of iterations, adding to new hits otherwise.
    
    if (exists $self->{'_old_hit_names'}->{$hit_name}
        && scalar @{$self->{_iterations}}) {
        if (exists $self->{'_hit_names_below'}->{$hit_name}) {
            push @{$self->{'_oldhits_below'}}, $hit;
        } elsif ($hit_signif <= $ithresh) {
            push @{$self->{'_oldhits_newly_below'}}, $hit;
        } else {
            push @{$self->{'_oldhits_not_below'}}, $hit;
        }
    } else {
        if ($hit_signif <= $ithresh) {
            push @{$self->{'_newhits_below'}}, $hit;
        } else {
            push @{$self->{'_newhits_not_below'}}, $hit;
        }
    }
    $self->{'_hitcount'}++;
}

=head2 start_iteration

 Title   : start_iteration
 Usage   : $handler->start_iteration()
 Function: Starts an Iteration event cycle
 Returns : none
 Args    : type of event and associated hashref

=cut

sub start_iteration {
    my ($self,$type) = @_;

    #print STDERR "ISREB: start_iteration()\n";
    $self->{'_iteration_count'}++;

    # Reset arrays for the various classes of hits.
#    $self->{'_newhits_unclassified'}     = [];
    $self->{'_newhits_below'}        = [];
    $self->{'_newhits_not_below'}    = [];
    $self->{'_oldhits_below'}        = [];
    $self->{'_oldhits_newly_below'}  = [];
    $self->{'_oldhits_not_below'}    = [];
    $self->{'_hitcount'} = 0;
    return;
}


=head2 end_iteration

 Title   : end_iteration
 Usage   : $handler->end_iteration()
 Function: Ends an Iteration event cycle
 Returns : FAST::Bio::Search::Iteration object
 Args    : type of event and associated hashref


=cut

sub end_iteration {
    my ($self,$type,$data) = @_;   

    # print STDERR "ISREB: end_iteration()\n";

    my %args = map { my $v = $data->{$_}; s/ITERATION//; ($_ => $v); } 
    grep { /^ITERATION/ } keys %{$data};

    $args{'-number'} = $self->{'_iteration_count'};
    $args{'-oldhits_below'} = $self->{'_oldhits_below'};
    $args{'-oldhits_newly_below'} = $self->{'_oldhits_newly_below'};
    $args{'-oldhits_not_below'} = $self->{'_oldhits_not_below'};
    $args{'-newhits_below'} = $self->{'_newhits_below'};
    $args{'-newhits_not_below'} = $self->{'_newhits_not_below'};
    $args{'-hit_factory'} = $self->factory('hit');

    my $it = $self->factory('iteration')->create_object(%args);
    push @{$self->{'_iterations'}}, $it;
    return $it;
}

=head2 max_significance

 Usage     : $obj->max_significance();
 Purpose   : Set/Get the P or Expect value used as significance screening cutoff.
             This is the value of the -signif parameter supplied to new().
             Hits with P or E-value above this are skipped.
 Returns   : Scientific notation number with this format: 1.0e-05.
 Argument  : Number (sci notation, float, integer) (when setting)
 Throws    : FAST::Bio::Root::BadParameter exception if the supplied argument is
           : not a valid number.
 Comments  : Screening of significant hits uses the data provided on the
           : description line. For NCBI BLAST1 and WU-BLAST, this data 
           : is P-value. for NCBI BLAST2 it is an Expect value.

=cut

sub max_significance {
    my $self = shift;
    if (@_) {
        my $sig = shift;
        if( $sig =~ /[^\d.e-]/ or $sig <= 0) {
            $self->throw(-class => 'FAST::Bio::Root::BadParameter',
                         -text => "Invalid significance value: $sig\n".
                         "Must be a number greater than zero.",
                         -value=>$sig);
        }
        $self->{'_confirm_significance'} = 1;
        $self->{'_max_significance'} = $sig;
    }
    sprintf "%.1e", $self->{'_max_significance'};
}


=head2 signif

Synonym for L<max_significance()|max_significance>

=cut

sub signif { shift->max_significance }

=head2 min_score

 Usage     : $obj->min_score();
 Purpose   : Gets the Blast score used as screening cutoff.
             This is the value of the -score parameter supplied to new().
             Hits with scores below this are skipped.
 Returns   : Integer (or undef if not set)
 Argument  : Integer (when setting)
 Throws    : FAST::Bio::Root::BadParameter exception if the supplied argument is
           : not a valid number.
 Comments  : Screening of significant hits uses the data provided on the
           : description line. 

=cut

sub min_score {



( run in 0.556 second using v1.01-cache-2.11-cpan-71847e10f99 )