BioPerl

 view release on metacpan or  search on metacpan

Bio/SearchIO/IteratedSearchResultEventBuilder.pm  view on Meta::CPAN

    # so that we can set different default factories than are set by the super class.
    $self->register_factory('result', $resultF ||
                            Bio::Factory::ObjectFactory->new(
                                -type      => 'Bio::Search::Result::BlastResult',
                                -interface => 'Bio::Search::Result::ResultI'));

    $self->register_factory('hit', $hitF ||
                            Bio::Factory::ObjectFactory->new(
                                -type      => 'Bio::Search::Hit::BlastHit',
                                -interface => 'Bio::Search::Hit::HitI'));

    $self->register_factory('hsp', $hspF ||
                            Bio::Factory::ObjectFactory->new(
                                -type      => 'Bio::Search::HSP::GenericHSP',
                                -interface => 'Bio::Search::HSP::HSPI'));

    # TODO: Change this to BlastIteration (maybe)
    $self->register_factory('iteration', $iterationF ||
                            Bio::Factory::ObjectFactory->new(
                                -type      => 'Bio::Search::Iteration::GenericIteration',
                                -interface => 'Bio::Search::Iteration::IterationI'));

    return $self;
}

=head2 will_handle

 Title   : will_handle
 Usage   : if( $handler->will_handle($event_type) ) { ... }
 Function: Tests if this event builder knows how to process a specific event
 Returns : boolean
 Args    : event type name

=cut

sub will_handle{
   my ($self,$type) = @_;
   # these are the events we recognize
   return (   $type eq 'hsp' || $type eq 'hit' || $type eq 'result'
           || $type eq 'iteration' || $type eq 'newhits' || $type eq 'oldhits' );
}

=head2 SAX methods

=cut

=head2 start_result

 Title   : start_result
 Usage   : $handler->start_result($resulttype)
 Function: Begins a result event cycle
 Returns : none
 Args    : Type of Report

=cut

sub start_result {
   my $self = shift;
   #print STDERR "ISREB: start_result()\n";
   $self->SUPER::start_result(@_);
   $self->{'_iterations'} = [];
   $self->{'_iteration_count'} = 0;
   $self->{'_old_hit_names'} = undef;
   $self->{'_hit_names_below'} = undef;
   return;
}

=head2 start_iteration

 Title   : start_iteration
 Usage   : $handler->start_iteration()
 Function: Starts an Iteration event cycle
 Returns : none
 Args    : type of event and associated hashref

=cut

sub start_iteration {
    my ($self,$type) = @_;

    #print STDERR "ISREB: start_iteration()\n";
    $self->{'_iteration_count'}++;

    # Reset arrays for the various classes of hits.
#    $self->{'_newhits_unclassified'}     = [];
    $self->{'_newhits_below'}        = [];
    $self->{'_newhits_not_below'}    = [];
    $self->{'_oldhits_below'}        = [];
    $self->{'_oldhits_newly_below'}  = [];
    $self->{'_oldhits_not_below'}    = [];
    $self->{'_hitcount'} = 0;
    return;
}


=head2 end_iteration

 Title   : end_iteration
 Usage   : $handler->end_iteration()
 Function: Ends an Iteration event cycle
 Returns : Bio::Search::Iteration object
 Args    : type of event and associated hashref

=cut

sub end_iteration {
    my ($self,$type,$data) = @_;

    # print STDERR "ISREB: end_iteration()\n";

    my %args = map { my $v = $data->{$_}; s/ITERATION//; ($_ => $v); }
    grep { /^ITERATION/ } keys %{$data};

    $args{'-number'} = $self->{'_iteration_count'};
    $args{'-oldhits_below'} = $self->{'_oldhits_below'};
    $args{'-oldhits_newly_below'} = $self->{'_oldhits_newly_below'};
    $args{'-oldhits_not_below'} = $self->{'_oldhits_not_below'};
    $args{'-newhits_below'} = $self->{'_newhits_below'};
    $args{'-newhits_not_below'} = $self->{'_newhits_not_below'};
    $args{'-hit_factory'} = $self->factory('hit');

    my $it = $self->factory('iteration')->create_object(%args);
    push @{$self->{'_iterations'}}, $it;
    return $it;
}

# Title   : _add_hit (private function for internal use only)
# Purpose : Applies hit filtering and calls _store_hit if it passes filtering.
# Argument: Bio::Search::Hit::HitI object

sub _add_hit {
    my ($self, $hit) = @_;

    my $hit_name   = uc($hit->{-name});
    my $hit_signif = $hit->{-significance};
    my $ithresh    = $self->{'_inclusion_threshold'};

    # Test significance using custom function (if supplied)
    my $add_hit = 1;

    my $hit_filter = $self->{'_hit_filter'};

    if($hit_filter) {
        # since &hit_filter is out of our control and would expect a HitI object,
        # we're forced to make one for it
        $hit = $self->factory('hit')->create_object(%{$hit});
        $add_hit = 0 unless &$hit_filter($hit);
    }
    else {
        if($self->{'_confirm_significance'}) {
            $add_hit = 0 unless $hit_signif <= $self->{'_max_significance'};
        }
        if($self->{'_confirm_score'}) {
            my $hit_score = $hit->{-score} || $hit->{-hsps}->[0]->{-score};
            $add_hit = 0 unless $hit_score >= $self->{'_min_score'};
        }
        if($self->{'_confirm_bits'}) {
            my $hit_bits = $hit->{-bits} || $hit->{-hsps}->[0]->{-bits};
            $add_hit = 0 unless $hit_bits >= $self->{'_min_bits'};
        }
    }

    $add_hit && $self->_store_hit($hit, $hit_name, $hit_signif);
    # Building hit lookup hashes for determining if the hit is old/new and
    # above/below threshold.
    $self->{'_old_hit_names'}->{$hit_name}++;
    $self->{'_hit_names_below'}->{$hit_name}++ if $hit_signif <= $ithresh;
}

# Title   : _store_hit (private function for internal use only)
# Purpose : Collects hit objects into defined sets that are useful for
#           analyzing PSI-blast results.
#           These are ultimately added to the iteration object in end_iteration().
#
# Strategy:
#   Primary split = old vs. new
#   Secondary split = below vs. above threshold
#   1. Has this hit occurred in a previous iteration?
#   1.1. If yes, was it below threshold?
#   1.1.1. If yes, ---> [oldhits_below]
#   1.1.2. If no, is it now below threshold?
#   1.1.2.1. If yes, ---> [oldhits_newly_below]
#   1.1.2.2. If no, ---> [oldhits_not_below]
#   1.2. If no, is it below threshold?
#   1.2.1. If yes, ---> [newhits_below]
#   1.2.2. If no, ---> [newhits_not_below]
#   1.2.3. If don't know (no inclusion threshold data), ---> [newhits_unclassified]
#   Note: As long as there's a default inclusion threshold,
#         there won't be an unclassified set.
#
# For the first iteration, it might be nice to detect non-PSI blast reports
# and put the hits in the unclassified set.
# However, it shouldn't matter where the hits get put for the first iteration
# for non-PSI blast reports since they'll get flattened out in the
# result and iteration search objects.

sub _store_hit {
    my ($self, $hit, $hit_name, $hit_signif) = @_;

    my $ithresh = $self->{'_inclusion_threshold'};

    # This is the assumption leading to Bug 1986. The assumption here is that
    # the hit name is unique (and thus new), therefore any subsequent encounters
    # with a hit containing the same name are filed as old hits. This isn't
    # always true (see the bug report for a few examples). Adding an explicit
    # check for the presence of iterations, adding to new hits otherwise.

    if (exists $self->{'_old_hit_names'}->{$hit_name}
        && scalar @{$self->{_iterations}}) {
        if (exists $self->{'_hit_names_below'}->{$hit_name}) {
            push @{$self->{'_oldhits_below'}}, $hit;
        } elsif ($hit_signif <= $ithresh) {
            push @{$self->{'_oldhits_newly_below'}}, $hit;
        } else {
            push @{$self->{'_oldhits_not_below'}}, $hit;
        }
    } else {
        if ($hit_signif <= $ithresh) {
            push @{$self->{'_newhits_below'}}, $hit;
        } else {
            push @{$self->{'_newhits_not_below'}}, $hit;
        }
    }
    $self->{'_hitcount'}++;
}

1;



( run in 0.630 second using v1.01-cache-2.11-cpan-71847e10f99 )