FAST
view release on metacpan or search on metacpan
lib/FAST/Bio/SearchIO/IteratedSearchResultEventBuilder.pm view on Meta::CPAN
-interface => 'FAST::Bio::Search::Iteration::IterationI'));
return $self;
}
#Initializes parameters used during parsing of Blast reports.
sub _init_parse_params {
my ($self, @args) = @_;
# -FILT_FUNC has been replaced by -HIT_FILTER.
# Leaving -FILT_FUNC in place for backward compatibility
my($ithresh, $signif, $score, $bits, $hit_filter, $filt_func) =
$self->_rearrange([qw(INCLUSION_THRESHOLD
SIGNIF SCORE BITS HIT_FILTER FILT_FUNC
)], @args);
$self->inclusion_threshold( defined($ithresh) ? $ithresh : $DEFAULT_INCLUSION_THRESHOLD);
my $hit_filt = $hit_filter || $filt_func;
defined $hit_filter && $self->hit_filter($hit_filt);
defined $signif && $self->max_significance($signif);
defined $score && $self->min_score($score);
defined $bits && $self->min_bits($bits);
}
=head2 will_handle
Title : will_handle
Usage : if( $handler->will_handle($event_type) ) { ... }
Function: Tests if this event builder knows how to process a specific event
Returns : boolean
Args : event type name
=cut
sub will_handle{
my ($self,$type) = @_;
# these are the events we recognize
return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result' || $type eq 'iteration' ||
$type eq 'newhits' || $type eq 'oldhits' );
}
=head2 SAX methods
=cut
=head2 start_result
Title : start_result
Usage : $handler->start_result($resulttype)
Function: Begins a result event cycle
Returns : none
Args : Type of Report
=cut
sub start_result {
my $self = shift;
#print STDERR "ISREB: start_result()\n";
$self->SUPER::start_result(@_);
$self->{'_iterations'} = [];
$self->{'_iteration_count'} = 0;
$self->{'_old_hit_names'} = undef;
$self->{'_hit_names_below'} = undef;
return;
}
=head2 end_result
Title : end_result
Usage : my @results = $parser->end_result
Function: Finishes a result handler cycle
Returns : A FAST::Bio::Search::Result::ResultI
Args : none
=cut
sub end_result {
my ($self,$type,$data) = @_;
#print STDERR "ISREB: end_result\n";
## How is runid getting set? Purpose?
if( defined $data->{'runid'} &&
$data->{'runid'} !~ /^\s+$/ ) {
if( $data->{'runid'} !~ /^lcl\|/) {
$data->{"RESULT-query_name"}= $data->{'runid'};
} else {
($data->{"RESULT-query_name"},$data->{"RESULT-query_description"}) =
split(/\s+/,$data->{"RESULT-query_description"},2);
}
if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
# this is for |123|gb|ABC1.1|
$acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
$data->{"RESULT-query_accession"}= $acc;
}
delete $data->{'runid'};
}
my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); }
grep { /^RESULT/ } keys %{$data};
$args{'-algorithm'} = uc( $args{'-algorithm_name'} ||
$data->{'RESULT-algorithm_name'} || $type);
$args{'-iterations'} = $self->{'_iterations'};
my $result = $self->factory('result')->create_object(%args);
$result->hit_factory($self->factory('hit'));
$self->{'_iterations'} = [];
return $result;
}
# Title : _add_hit (private function for internal use only)
# Purpose : Applies hit filtering and calls _store_hit if it passes filtering.
# Argument: FAST::Bio::Search::Hit::HitI object
sub _add_hit {
my ($self, $hit) = @_;
my $hit_name = uc($hit->{-name});
my $hit_signif = $hit->{-significance};
my $ithresh = $self->{'_inclusion_threshold'};
# Test significance using custom function (if supplied)
my $add_hit = 1;
my $hit_filter = $self->{'_hit_filter'};
if($hit_filter) {
# since &hit_filter is out of our control and would expect a HitI object,
# we're forced to make one for it
$hit = $self->factory('hit')->create_object(%{$hit});
$add_hit = 0 unless &$hit_filter($hit);
} else {
if($self->{'_confirm_significance'}) {
$add_hit = 0 unless $hit_signif <= $self->{'_max_significance'};
}
if($self->{'_confirm_score'}) {
my $hit_score = $hit->{-score} || $hit->{-hsps}->[0]->{-score};
$add_hit = 0 unless $hit_score >= $self->{'_min_score'};
}
if($self->{'_confirm_bits'}) {
my $hit_bits = $hit->{-bits} || $hit->{-hsps}->[0]->{-bits};
$add_hit = 0 unless $hit_bits >= $self->{'_min_bits'};
}
}
$add_hit && $self->_store_hit($hit, $hit_name, $hit_signif);
# Building hit lookup hashes for determining if the hit is old/new and
# above/below threshold.
$self->{'_old_hit_names'}->{$hit_name}++;
$self->{'_hit_names_below'}->{$hit_name}++ if $hit_signif <= $ithresh;
}
# Title : _store_hit (private function for internal use only)
# Purpose : Collects hit objects into defined sets that are useful for
# analyzing PSI-blast results.
# These are ultimately added to the iteration object in end_iteration().
#
# Strategy:
# Primary split = old vs. new
# Secondary split = below vs. above threshold
# 1. Has this hit occurred in a previous iteration?
# 1.1. If yes, was it below threshold?
# 1.1.1. If yes, ---> [oldhits_below]
# 1.1.2. If no, is it now below threshold?
# 1.1.2.1. If yes, ---> [oldhits_newly_below]
# 1.1.2.2. If no, ---> [oldhits_not_below]
# 1.2. If no, is it below threshold?
# 1.2.1. If yes, ---> [newhits_below]
# 1.2.2. If no, ---> [newhits_not_below]
# 1.2.3. If don't know (no inclusion threshold data), ---> [newhits_unclassified]
# Note: As long as there's a default inclusion threshold,
# there won't be an unclassified set.
#
# For the first iteration, it might be nice to detect non-PSI blast reports
# and put the hits in the unclassified set.
# However, it shouldn't matter where the hits get put for the first iteration
# for non-PSI blast reports since they'll get flattened out in the
# result and iteration search objects.
sub _store_hit {
my ($self, $hit, $hit_name, $hit_signif) = @_;
my $ithresh = $self->{'_inclusion_threshold'};
# This is the assumption leading to Bug 1986. The assumption here is that
# the hit name is unique (and thus new), therefore any subsequent encounters
# with a hit containing the same name are filed as old hits. This isn't
# always true (see the bug report for a few examples). Adding an explicit
# check for the presence of iterations, adding to new hits otherwise.
if (exists $self->{'_old_hit_names'}->{$hit_name}
&& scalar @{$self->{_iterations}}) {
if (exists $self->{'_hit_names_below'}->{$hit_name}) {
push @{$self->{'_oldhits_below'}}, $hit;
} elsif ($hit_signif <= $ithresh) {
push @{$self->{'_oldhits_newly_below'}}, $hit;
} else {
push @{$self->{'_oldhits_not_below'}}, $hit;
}
} else {
if ($hit_signif <= $ithresh) {
push @{$self->{'_newhits_below'}}, $hit;
} else {
push @{$self->{'_newhits_not_below'}}, $hit;
}
}
$self->{'_hitcount'}++;
}
=head2 start_iteration
Title : start_iteration
Usage : $handler->start_iteration()
Function: Starts an Iteration event cycle
Returns : none
Args : type of event and associated hashref
=cut
sub start_iteration {
my ($self,$type) = @_;
#print STDERR "ISREB: start_iteration()\n";
$self->{'_iteration_count'}++;
# Reset arrays for the various classes of hits.
# $self->{'_newhits_unclassified'} = [];
$self->{'_newhits_below'} = [];
$self->{'_newhits_not_below'} = [];
$self->{'_oldhits_below'} = [];
$self->{'_oldhits_newly_below'} = [];
$self->{'_oldhits_not_below'} = [];
$self->{'_hitcount'} = 0;
return;
}
=head2 end_iteration
Title : end_iteration
Usage : $handler->end_iteration()
Function: Ends an Iteration event cycle
Returns : FAST::Bio::Search::Iteration object
Args : type of event and associated hashref
=cut
sub end_iteration {
my ($self,$type,$data) = @_;
# print STDERR "ISREB: end_iteration()\n";
my %args = map { my $v = $data->{$_}; s/ITERATION//; ($_ => $v); }
grep { /^ITERATION/ } keys %{$data};
$args{'-number'} = $self->{'_iteration_count'};
$args{'-oldhits_below'} = $self->{'_oldhits_below'};
$args{'-oldhits_newly_below'} = $self->{'_oldhits_newly_below'};
$args{'-oldhits_not_below'} = $self->{'_oldhits_not_below'};
$args{'-newhits_below'} = $self->{'_newhits_below'};
$args{'-newhits_not_below'} = $self->{'_newhits_not_below'};
$args{'-hit_factory'} = $self->factory('hit');
my $it = $self->factory('iteration')->create_object(%args);
push @{$self->{'_iterations'}}, $it;
return $it;
}
=head2 max_significance
Usage : $obj->max_significance();
Purpose : Set/Get the P or Expect value used as significance screening cutoff.
This is the value of the -signif parameter supplied to new().
Hits with P or E-value above this are skipped.
Returns : Scientific notation number with this format: 1.0e-05.
Argument : Number (sci notation, float, integer) (when setting)
Throws : FAST::Bio::Root::BadParameter exception if the supplied argument is
: not a valid number.
Comments : Screening of significant hits uses the data provided on the
: description line. For NCBI BLAST1 and WU-BLAST, this data
: is P-value. for NCBI BLAST2 it is an Expect value.
=cut
sub max_significance {
my $self = shift;
if (@_) {
my $sig = shift;
if( $sig =~ /[^\d.e-]/ or $sig <= 0) {
$self->throw(-class => 'FAST::Bio::Root::BadParameter',
-text => "Invalid significance value: $sig\n".
"Must be a number greater than zero.",
-value=>$sig);
}
$self->{'_confirm_significance'} = 1;
$self->{'_max_significance'} = $sig;
}
sprintf "%.1e", $self->{'_max_significance'};
}
=head2 signif
Synonym for L<max_significance()|max_significance>
=cut
sub signif { shift->max_significance }
=head2 min_score
Usage : $obj->min_score();
Purpose : Gets the Blast score used as screening cutoff.
This is the value of the -score parameter supplied to new().
Hits with scores below this are skipped.
Returns : Integer (or undef if not set)
Argument : Integer (when setting)
Throws : FAST::Bio::Root::BadParameter exception if the supplied argument is
: not a valid number.
Comments : Screening of significant hits uses the data provided on the
: description line.
=cut
sub min_score {
( run in 0.556 second using v1.01-cache-2.11-cpan-71847e10f99 )