BioPerl
view release on metacpan or search on metacpan
Bio/Search/HSP/GenericHSP.pm view on Meta::CPAN
=head2 Support
Please direct usage questions or support issues to the mailing list:
I<bioperl-l@bioperl.org>
rather than to the module maintainer directly. Many experienced and
reponsive experts will be able look at the problem and quickly
address it. Please include a thorough description of the problem
with code and data examples if at all possible.
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via the
web:
https://github.com/bioperl/bioperl-live/issues
=head1 AUTHOR - Jason Stajich and Steve Chervitz
Email jason-at-bioperl.org
Email sac-at-bioperl.org
=head1 CONTRIBUTORS
Sendu Bala, bix@sendu.me.uk
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
# Let the code begin...
package Bio::Search::HSP::GenericHSP;
use strict;
use Bio::Root::Root;
use Bio::SeqFeature::Similarity;
use base qw(Bio::Search::HSP::HSPI);
=head2 new
Title : new
Usage : my $obj = Bio::Search::HSP::GenericHSP->new();
Function: Builds a new Bio::Search::HSP::GenericHSP object
Returns : Bio::Search::HSP::GenericHSP
Args : -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc)
-evalue => evalue
-pvalue => pvalue
-bits => bit value for HSP
-score => score value for HSP (typically z-score but depends on
analysis)
-hsp_length=> Length of the HSP (including gaps)
-identical => # of residues that that matched identically
-percent_identity => (optional) percent identity
-conserved => # of residues that matched conservatively
(only protein comparisons;
conserved == identical in nucleotide comparisons)
-hsp_gaps => # of gaps in the HSP
-query_gaps => # of gaps in the query in the alignment
-hit_gaps => # of gaps in the subject in the alignment
-query_name => HSP Query sequence name (if available)
-query_start => HSP Query start (in original query sequence coords)
-query_end => HSP Query end (in original query sequence coords)
-query_length=> total length of the query sequence
-query_seq => query sequence portion of the HSP
-query_desc => textual description of the query
-hit_name => HSP Hit sequence name (if available)
-hit_start => HSP Hit start (in original hit sequence coords)
-hit_end => HSP Hit end (in original hit sequence coords)
-hit_length => total length of the hit sequence
-hit_seq => hit sequence portion of the HSP
-hit_desc => textual description of the hit
-homology_seq=> homology sequence for the HSP
-hit_frame => hit frame (only if hit is translated protein)
-query_frame => query frame (only if query is translated protein)
-rank => HSP rank
-links => HSP links information (WU-BLAST only)
-hsp_group => HSP Group informat (WU-BLAST only)
-gap_symbol => symbol representing a gap (default = '-')
-hit_features=> string of features found in or near HSP hit
region (reported in some BLAST text output,
v. 2.2.13 and up)
-stranded => If the algorithm isn't known (i.e. defaults to
'generic'), setting this will indicate start/end
coordinates are to be used to determine the strand
for 'query', 'subject', 'hit', 'both', or 'none'
(default = 'none')
=cut
sub new {
my($class,%args) = @_;
# don't pass anything to SUPER; complex hierarchy results in lots of work
# for nothing
my $self = $class->SUPER::new();
# for speed, don't use _rearrange and just store all input data directly
# with no method calls and no work done. work can be carried
# out just-in-time later if desired
while (my ($arg, $value) = each %args) {
$arg =~ tr/a-z\055/A-Z/d;
$self->{$arg} = $value;
}
my $bits = $self->{BITS};
defined $self->{VERBOSE} && $self->verbose($self->{VERBOSE});
if (exists $self->{GAP_SYMBOL}) {
# not checking anything else but the length (must be 1 as only one gap
# symbol allowed currently); can add in support for symbol checks or
# multiple symbols later if needed
$self->throw("Gap symbol must be of length 1") if
CORE::length($self->{GAP_SYMBOL}) != 1;
Bio/Search/HSP/GenericHSP.pm view on Meta::CPAN
$value = $previous = '' unless defined $value;
$self->{PP_SEQ} = $value;
# do some housekeeping so we know when to
# re-run _calculate_seq_positions
$self->{'_sequenceschanged'} = 1;
}
return $previous;
}
=head2 length
Title : length
Usage : my $len = $hsp->length( ['query'|'hit'|'total'] );
Function : Returns the length of the query or hit in the alignment
(without gaps)
or the aggregate length of the HSP (including gaps;
this may be greater than either hit or query )
Returns : integer
Args : arg 1: 'query' = length of query seq (without gaps)
'hit' = length of hit seq (without gaps) (synonyms: sbjct, subject)
'total' = length of alignment (with gaps)
default = 'total'
arg 2: [optional] integer length value to set for specific type
=cut
sub length {
my $self = shift;
my $type = shift;
$type = 'total' unless defined $type;
$type = lc $type;
if( $type =~ /^q/i ) {
return $self->query()->length(shift);
} elsif( $type =~ /^(hit|subject|sbjct)/ ) {
return $self->hit()->length(shift);
} else {
my $v = shift;
if( defined $v ) {
$self->{HSP_LENGTH} = $v;
}
return $self->{HSP_LENGTH};
}
return 0; # should never get here
}
=head2 hsp_length
Title : hsp_length
Usage : my $len = $hsp->hsp_length()
Function: shortcut length('hsp')
Returns : floating point between 0 and 100
Args : none
=cut
sub hsp_length { return shift->length('hsp', shift); }
=head2 percent_identity
Title : percent_identity
Usage : my $percentid = $hsp->percent_identity()
Function: Returns the calculated percent identity for an HSP
Returns : floating point between 0 and 100
Args : none
=cut
sub percent_identity {
my $self = shift;
unless ($self->{_did_prepi}) {
$self->_pre_pi;
}
return $self->SUPER::percent_identity(@_);
}
=head2 frame
Title : frame
Usage : my ($qframe, $hframe) = $hsp->frame('list',$queryframe,$subjectframe)
Function: Set the Frame for both query and subject and insure that
they agree.
This overrides the frame() method implementation in
FeaturePair.
Returns : array of query and subject frame if return type wants an array
or query frame if defined or subject frame if not defined
Args : 'hit' or 'subject' or 'sbjct' to retrieve the frame of the subject (default)
'query' to retrieve the query frame
'list' or 'array' to retrieve both query and hit frames together
Note : Frames are stored in the GFF way (0-2) not 1-3
as they are in BLAST (negative frames are deduced by checking
the strand of the query or hit)
=cut
# Note: changed 4/19/08 - bug 2485
#
# frame() is supposed to be a getter/setter (as is implied by the Function desc
# above; this is also consistent with Bio::SeqFeature::SimilarityPair). Also,
# the API is not consistent with the other HSP/SimilarityPair methods such as
# strand(), start(), end(), etc.
#
# In order to make this consistent with other methods all work is now done
# when the features are instantiated and not delayed. We compromise by
# defaulting back 'to hit' w/o passed args. Setting is now allowed.
sub frame {
my $self = shift;
my $val = shift;
if (!defined $val) {
# unfortunately, w/o args we need to warn about API changes
$self->warn("API for frame() has changed.\n".
"Please refer to documentation for Bio::Search::HSP::GenericHSP;\n".
"returning query frame");
$val = 'query';
}
$val =~ s/^\s+//;
if( $val =~ /^q/i ) {
return $self->query->frame(@_);
} elsif( $val =~ /^hi|^s/i ) {
return $self->hit->frame(@_);
} elsif ( $val =~ /^list|array/i ) {
return ($self->query->frame($_[0]),
$self->hit->frame($_[1]) );
} elsif ( $val =~ /^\d+$/) {
# old API i.e. frame($query_frame, $hit_frame). This catches all but one
# case, where no arg is passed (so the hit is wanted).
$self->warn("API for frame() has changed.\n".
"Please refer to documentation for Bio::Search::HSP::GenericHSP");
wantarray ?
return ($self->query->frame($val),
$self->hit->frame(@_) ) :
return $self->hit->frame($val,@_);
Bio/Search/HSP/GenericHSP.pm view on Meta::CPAN
$strand = 1;
}
else {
$strand = undef;
}
}
else {
if ($hitfactor) {
$strand = -1;
}
else {
$strand = undef;
}
($hs,$he) = ( $he,$hs); # reverse subject: start bigger than end
}
my $sim2 = $self->{_sim2} || Bio::SeqFeature::Similarity->new();
$sim2->start($hs);
$sim2->end($he);
$sim2->significance($self->{EVALUE});
$sim2->bits($self->{BITS});
$sim2->score($self->{SCORE});
$sim2->strand($strand);
$sim2->seq_id($self->{HIT_NAME});
$sim2->seqlength($self->{HIT_LENGTH});
$sim2->source_tag($self->{ALGORITHM});
$sim2->seqdesc($self->{HIT_DESC});
$sim2->add_tag_value('meta', $self->{META}) if $self->can('meta');
my $hframe = $self->{HIT_FRAME};
if (defined $strand && !defined $hframe && $hitfactor) {
$hframe = ( $hs % 3 ) * $strand;
}
elsif (!defined $strand) {
$hframe = 0;
}
if( $hframe =~ /^([+-])?([0-3])/ ) {
my $dir = $1 || '+';
if($hframe && (($dir eq '-' && $strand >= 0) || ($dir eq '+' && $strand <= 0)) ) {
$self->warn("Subject frame ($hframe) did not match strand of subject ($strand)");
}
$hframe = $2 != 0 ? $2 - 1 : $2;
} else {
$self->warn("Unknown subject frame ($hframe)");
$hframe = 0;
}
$sim2->frame($hframe);
$self->SUPER::feature2($sim2);
$self->{_created_sff} = 1;
$self->{_making_sff} = 0;
}
# before calling the num_* methods
sub _pre_similar_stats {
my $self = shift;
my $identical = $self->{IDENTICAL};
my $conserved = $self->{CONSERVED};
my $percent_id = $self->{PERCENT_IDENTITY};
if (! defined $identical) {
if (! defined $percent_id) {
$self->warn("Did not defined the number of identical matches or overall percent identity in the HSP; assuming 0");
$identical = 0;
}
else {
$identical = sprintf("%.0f",$percent_id * $self->{HSP_LENGTH});
}
}
if (! defined $conserved) {
$self->warn("Did not define the number of conserved matches in the HSP; assuming conserved == identical ($identical)")
if( $self->{ALGORITHM} !~ /^((FAST|BLAST)N)|EXONERATE|SIM4|AXT|PSL|BLAT|BLASTZ|WABA/oi);
$conserved = $identical;
}
$self->{IDENTICAL} = $identical;
$self->{CONSERVED} = $conserved;
$self->{_did_presimilar} = 1;
}
# before calling the frac_* methods
sub _pre_frac {
my $self = shift;
my $hsp_len = $self->{HSP_LENGTH};
my $hit_len = $self->{HIT_LENGTH};
my $query_len = $self->{QUERY_LENGTH};
my $identical = $self->num_identical;
my $conserved = $self->num_conserved;
$self->{_did_prefrac} = 1;
my $logical;
if( $hsp_len ) {
$self->length('total', $hsp_len);
$logical = $self->_logical_length('total');
$self->frac_identical( 'total', $identical / $hsp_len);
$self->frac_conserved( 'total', $conserved / $hsp_len);
}
if( $hit_len ) {
$logical = $self->_logical_length('hit');
$self->frac_identical( 'hit', $identical / $logical);
$self->frac_conserved( 'hit', $conserved / $logical);
}
if( $query_len ) {
$logical = $self->_logical_length('query');
$self->frac_identical( 'query', $identical / $logical) ;
$self->frac_conserved( 'query', $conserved / $logical);
}
}
# before calling gaps()
# This relies first on passed parameters (parser-dependent), then on gaps
# calculated by seq_inds() (if set), then falls back to directly checking
# for '-' or '.' as a last resort
sub _pre_gaps {
my $self = shift;
my $query_gaps = $self->{QUERY_GAPS};
my $query_seq = $self->{QUERY_SEQ};
my $hit_gaps = $self->{HIT_GAPS};
my $hit_seq = $self->{HIT_SEQ};
my $gaps = $self->{HSP_GAPS};
$self->{_did_pregaps} = 1; # well, we're in the process; avoid recursion
if( defined $query_gaps ) {
$self->gaps('query', $query_gaps);
} elsif( defined $query_seq ) {
my $qg = (defined $self->{'_query_offset'}) ? $self->seq_inds('query','gaps')
: ($self->algorithm eq 'ERPIN') ? scalar( $hit_seq =~ tr/\-//)
: scalar( $query_seq =~ tr/\-\.// ); # HMMER3 and Infernal uses '.' and '-'
my $offset = $self->{'_query_offset'} || 1;
$self->gaps('query', $qg/$offset);
}
if( defined $hit_gaps ) {
$self->gaps('hit', $hit_gaps);
} elsif( defined $hit_seq ) {
my $hg = (defined $self->{'_sbjct_offset'}) ? $self->seq_inds('hit','gaps')
: ($self->algorithm eq 'ERPIN') ? scalar( $hit_seq =~ tr/\-//)
: scalar( $hit_seq =~ tr/\-\.// ); # HMMER3 and Infernal uses '.' and '-'
my $offset = $self->{'_sbjct_offset'} || 1;
$self->gaps('hit', $hg/$offset);
}
if( ! defined $gaps ) {
$gaps = $self->gaps("query") + $self->gaps("hit");
}
$self->gaps('total', $gaps);
}
# before percent_identity
sub _pre_pi {
my $self = shift;
$self->{_did_prepi} = 1;
$self->percent_identity($self->{PERCENT_IDENTITY} || $self->frac_identical('total')*100) if( $self->{HSP_LENGTH} > 0 );
}
1;
( run in 1.290 second using v1.01-cache-2.11-cpan-39bf76dae61 )