Bio-EnsEMBL
view release on metacpan or search on metacpan
lib/Bio/EnsEMBL/BaseAlignFeature.pm view on Meta::CPAN
my $mapped_length;
# explicit if statements to avoid rounding problems
# and make sure we have sane coordinate systems
if( $query_unit == 1 && $hit_unit == 3 ) {
$mapped_length = $length*3;
} elsif( $query_unit == 3 && $hit_unit == 1 ) {
$mapped_length = $length / 3;
} elsif ( $query_unit == 1 && $hit_unit == 1 ) {
$mapped_length = $length;
} else {
throw("Internal error $query_unit $hit_unit, currently only " .
"allowing 1 or 3 ");
}
if( int($mapped_length) != $mapped_length and
($piece =~ /M$/ or $piece =~ /D$/)) {
throw("Internal error with mismapped length of hit, query " .
"$query_unit, hit $hit_unit, length $length");
}
if( $piece =~ /M$/ ) {
#
# MATCH
#
my ( $qstart, $qend);
if( $strand1 == 1 ) {
$qstart = $start1;
$qend = $start1 + $length - 1;
$start1 = $qend + 1;
} else {
$qend = $start1;
$qstart = $start1 - $length + 1;
$start1 = $qstart - 1;
}
my ($hstart, $hend);
if( $strand2 == 1 ) {
$hstart = $start2;
$hend = $start2 + $mapped_length - 1;
$start2 = $hend + 1;
} else {
$hend = $start2;
$hstart = $start2 - $mapped_length + 1;
$start2 = $hstart - 1;
}
push @features, Bio::EnsEMBL::FeaturePair->new
(-SLICE => $self->{'slice'},
-SEQNAME => $self->{'seqname'},
-START => $qstart,
-END => $qend,
-STRAND => $strand1,
-HSLICE => $self->{'hslice'},
-HSEQNAME => $self->{'hseqname'},
-HSTART => $hstart,
-HEND => $hend,
-HSTRAND => $strand2,
-SCORE => $self->{'score'},
-PERCENT_ID => $self->{'percent_id'},
-ANALYSIS => $self->{'analysis'},
-P_VALUE => $self->{'p_value'},
-EXTERNAL_DB_ID => $self->{'external_db_id'},
-HCOVERAGE => $self->{'hcoverage'},
-GROUP_ID => $self->{'group_id'},
-LEVEL_ID => $self->{'level_id'});
# end M cigar bits
} elsif( $piece =~ /I$/ ) {
#
# INSERT
#
if( $strand1 == 1 ) {
$start1 += $length;
} else {
$start1 -= $length;
}
} elsif( $piece =~ /D$/ ) {
#
# DELETION
#
if( $strand2 == 1 ) {
$start2 += $mapped_length;
} else {
$start2 -= $mapped_length;
}
} else {
throw( "Illegal cigar line $string!" );
}
}
return \@features;
}
sub _parse_cigar {
my $self = shift;
if ($self->{'align_type'} eq 'ensembl') {
return $self->_parse_ensembl_cigar();
}
else {
throw("No parsing method implemented for " . $self->{'align_type'});
}
}
=head2 _parse_features
Arg [1] : listref Bio::EnsEMBL::FeaturePair $ungapped_features
Description: creates internal cigar_string and start,end hstart,hend
entries.
Returntype : none, fills in values of self
Exceptions : argument list undergoes many sanity checks - throws under many
invalid conditions
Caller : new
Status : Stable
=cut
sub _parse_features {
my ($self, $features) = @_;
if ($self->{'align_type'} eq 'ensembl') {
$self->_parse_ensembl_features($features);
} else {
throw("No _parse_features method implemented for " . $self->{'align_type'});
}
}
my $message_only_once = 1;
sub _parse_ensembl_features {
my ($self,$features ) = @_;
if (ref($features) ne "ARRAY") {
throw("features must be an array reference not a [".ref($features)."]");
} elsif (scalar(@$features) == 0) {
throw("features array must not be empty");
}
my $query_unit = $self->_query_unit();
my $hit_unit = $self->_hit_unit();
my $strand = $features->[0]->strand;
throw ('FeaturePair needs to have strand == 1 or strand == -1') if(!$strand);
my @f;
#
# Sort the features on their start position
# Ascending order on positive strand, descending on negative strand
#
if( $strand == 1 ) {
@f = sort {$a->start <=> $b->start} @$features;
} else {
@f = sort { $b->start <=> $a->start} @$features;
}
my $hstrand = $f[0]->hstrand;
my $slice = $f[0]->slice();
my $hslice = $f[0]->hslice();
my $name = $slice ? $slice->name() : undef;
my $hname = $f[0]->hseqname;
my $score = $f[0]->score;
my $percent = $f[0]->percent_id;
my $analysis = $f[0]->analysis;
my $pvalue = $f[0]->p_value();
my $external_db_id = $f[0]->external_db_id;
my $hcoverage = $f[0]->hcoverage;
my $group_id = $f[0]->group_id;
my $level_id = $f[0]->level_id;
my $seqname = $f[0]->seqname;
# implicit strand 1 for peptide sequences
$strand ||= 1;
$hstrand ||= 1;
my $ori = $strand * $hstrand;
throw("No features in the array to parse") if(scalar(@f) == 0);
my $prev1; # where last feature q part ended
my $prev2; # where last feature s part ended
my $string;
# Use strandedness info of query and hit to make sure both sets of
# start and end coordinates are oriented the right way around.
my $f1start;
my $f1end;
my $f2end;
my $f2start;
if ($strand == 1) {
$f1start = $f[0]->start;
$f1end = $f[-1]->end;
} else {
$f1start = $f[-1]->start;
$f1end = $f[0]->end;
}
if ($hstrand == 1) {
$f2start = $f[0]->hstart;
$f2end = $f[-1]->hend;
} else {
$f2start = $f[-1]->hstart;
$f2end = $f[0]->hend;
}
#
# Loop through each portion of alignment and construct cigar string
#
foreach my $f (@f) {
#
# Sanity checks
#
if (!$f->isa("Bio::EnsEMBL::FeaturePair")) {
throw("Array element [$f] is not a Bio::EnsEMBL::FeaturePair");
}
if( defined($f->hstrand()) && $f->hstrand() != $hstrand ) {
throw("Inconsistent hstrands in feature array");
}
if( defined($f->strand()) && ($f->strand != $strand)) {
throw("Inconsistent strands in feature array");
}
if ( defined($name) && $name ne $f->slice->name()) {
throw("Inconsistent names in feature array [$name - ".
$f->slice->name()."]");
}
if ( defined($hname) && $hname ne $f->hseqname) {
throw("Inconsistent hit names in feature array [$hname - ".
$f->hseqname . "]");
}
if ( defined($score) && $score ne $f->score) {
throw("Inconsisent scores in feature array [$score - " .
$f->score . "]");
}
if (defined($f->percent_id) && $percent ne $f->percent_id) {
throw("Inconsistent pids in feature array [$percent - " .
$f->percent_id . "]");
}
if(defined($pvalue) && $pvalue != $f->p_value()) {
throw("Inconsistant p_values in feature arraw [$pvalue " .
$f->p_value() . "]");
}
if($seqname && $seqname ne $f->seqname){
throw("Inconsistent seqname in feature array [$seqname - ".
$f->seqname . "]");
}
my $start1 = $f->start; #source sequence alignment start
my $start2 = $f->hstart(); #hit sequence alignment start
#
# More sanity checking
#
if (defined($prev1)) {
if ( $strand == 1 ) {
if ($f->start < $prev1) {
throw("Inconsistent coords in feature array (forward strand).\n" .
"Start [".$f->start()."] in current feature should be greater " .
"than previous feature end [$prev1].");
}
} else {
if ($f->end > $prev1) {
throw("Inconsistent coords in feature array (reverse strand).\n" .
"End [".$f->end() ."] should be less than previous feature " .
"start [$prev1].");
}
}
}
my $length = ($f->end - $f->start + 1); #length of source seq alignment
my $hlength = ($f->hend - $f->hstart + 1); #length of hit seq alignment
# using multiplication to avoid rounding errors, hence the
# switch from query to hit for the ratios
#
# Yet more sanity checking
#
if($query_unit > $hit_unit){
# I am going to make the assumption here that this situation will
# only occur with DnaPepAlignFeatures, this may not be true
my $query_p_length = sprintf "%.0f", ($length/$query_unit);
my $hit_p_length = sprintf "%.0f", ($hlength * $hit_unit);
if( $query_p_length != $hit_p_length) {
throw( "Feature lengths not comparable Lengths:" .$length .
" " . $hlength . " Ratios:" . $query_unit . " " .
$hit_unit );
}
} else{
my $query_d_length = sprintf "%.0f", ($length*$hit_unit);
my $hit_d_length = sprintf "%.0f", ($hlength * $query_unit);
if( $length * $hit_unit != $hlength * $query_unit ) {
throw( "Feature lengths not comparable Lengths:" . $length .
" " . $hlength . " Ratios:" . $query_unit . " " .
$hit_unit );
}
}
lib/Bio/EnsEMBL/BaseAlignFeature.pm view on Meta::CPAN
#there is a deletion
my $gap = $f->hstart - $prev2 - 1;
my $gap2 = int( $gap * $hlengthfactor + 0.5 );
if( $gap2 == 1 ) {
$gap2 = ""; # no need for a number if gap length is 1
}
$string .= "$gap2"."D";
#sanity check, Should not be an insertion and deletion
if($insertion_flag) {
if ($message_only_once) {
warning("Should not be an deletion and insertion on the " .
"same alignment region. cigar_line=$string\n");
$message_only_once = 0;
}
}
}
#shift our position in the hit seq alignment
$prev2 = $f->hend();
} else {
if( ( defined $prev2 ) && ( $f->hend() + 1 < $prev2 )) {
#there is a deletion
my $gap = $prev2 - $f->hend - 1;
my $gap2 = int( $gap * $hlengthfactor + 0.5 );
if( $gap2 == 1 ) {
$gap2 = ""; # no need for a number if gap length is 1
}
$string .= "$gap2"."D";
#sanity check, Should not be an insertion and deletion
if($insertion_flag) {
if ($message_only_once) {
warning("Should not be an deletion and insertion on the " .
"same alignment region. prev2 = $prev2; f->hend() = " .
$f->hend() . "; cigar_line = $string;\n");
$message_only_once = 0;
}
}
}
#shift our position in the hit seq alignment
$prev2 = $f->hstart();
}
my $matchlength = $f->end() - $f->start() + 1;
if( $matchlength == 1 ) {
$matchlength = "";
}
$string .= $matchlength."M";
}
$self->{'start'} = $f1start;
$self->{'end'} = $f1end;
$self->{'seqname'} = $seqname;
$self->{'strand'} = $strand;
$self->{'score'} = $score;
$self->{'percent_id'} = $percent;
$self->{'analysis'} = $analysis;
$self->{'slice'} = $slice;
$self->{'hslice'} = $hslice;
$self->{'hstart'} = $f2start;
$self->{'hend'} = $f2end;
$self->{'hstrand'} = $hstrand;
$self->{'hseqname'} = $hname;
$self->{'cigar_string'} = $string;
$self->{'p_value'} = $pvalue;
$self->{'external_db_id'} = $external_db_id;
$self->{'hcoverage'} = $hcoverage;
$self->{'group_id'} = $group_id;
$self->{'level_id'} = $level_id;
}
=head2 _hit_unit
Args : none
Description: abstract method, overwrite with something that returns
one or three
Returntype : int 1,3
Exceptions : none
Caller : internal
Status : Stable
=cut
sub _hit_unit {
my $self = shift;
throw( "Abstract method call!" );
}
=head2 _query_unit
Args : none
Description: abstract method, overwrite with something that returns
one or three
Returntype : int 1,3
Exceptions : none
Caller : internal
Status : Stable
=cut
sub _query_unit {
my $self = shift;
throw( "Abstract method call!" );
}
=head2 _mdtag_alignment_length
Arg [1] : None
Description: return the alignment length (including indels) based on the mdtag (mdz) string
( run in 0.981 second using v1.01-cache-2.11-cpan-39bf76dae61 )