Bio-EnsEMBL
view release on metacpan or search on metacpan
lib/Bio/EnsEMBL/Transcript.pm view on Meta::CPAN
sub get_all_translateable_Exons {
my ( $self ) = @_;
#return an empty list if there is no translation (i.e. pseudogene)
my $translation = $self->translation or return [];
my $start_exon = $translation->start_Exon;
my $end_exon = $translation->end_Exon;
my $t_start = $translation->start;
my $t_end = $translation->end;
my( @translateable );
foreach my $ex (@{$self->get_all_Exons}) {
if ($ex ne $start_exon and ! @translateable) {
next; # Not yet in translated region
}
my $length = $ex->length;
my $adjust_start = 0;
my $adjust_end = 0;
# Adjust to translation start if this is the start exon
if ($ex == $start_exon ) {
if ($t_start < 1 or $t_start > $length) {
warning("WARN: Translation start '$t_start' is outside exon " . $ex->display_id . " length=$length");
return [];
}
$adjust_start = $t_start - 1;
}
# Adjust to translation end if this is the end exon
if ($ex == $end_exon) {
# if ($t_end < 1 or $t_end > $length) {
# throw("Translation end '$t_end' is outside exon $ex length=$length");
# }
$adjust_end = $t_end - $length;
}
# Make a truncated exon if the translation start or
# end causes the coordinates to be altered.
if ($adjust_end || $adjust_start) {
my $newex = $ex->adjust_start_end( $adjust_start, $adjust_end );
push( @translateable, $newex );
} else {
push(@translateable, $ex);
}
# Exit the loop when we've found the last exon
last if $ex eq $end_exon;
}
return \@translateable;
}
=head2 translate
Arg [1] : Boolean, emulate the behavior of old bioperl versions where
an incomplete final codon of 2 characters is padded and guessed
Example : none
Description: Return the peptide (plus eventual stop codon) for
this transcript. Does N-padding of non-phase
matching exons. It uses translateable_seq
internally. Returns undef if this Transcript does
not have a translation (i.e. pseudogene).
Returntype : Bio::Seq or undef
Exceptions : none
Caller : general
Status : Stable
=cut
sub translate {
my ($self, $complete_codon) = @_;
my $codon_table_id;
if ( !defined( $self->translation() ) ) { return undef }
# Alternative codon tables (such as the mitochondrial codon table)
# can be specified for a sequence region via the seq_region_attrib
# table. A list of codon tables and their codes is at:
# http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
if ( defined( $self->slice() ) ) {
my $attrib;
($attrib) = @{ $self->slice()->get_all_Attributes('codon_table') };
if ( defined($attrib) ) {
$codon_table_id = $attrib->value();
}
}
$codon_table_id ||= 1; # default vertebrate codon table
my $codon_table =
Bio::Tools::CodonTable->new( -id => $codon_table_id );
my $mrna = $self->translateable_seq();
my $delta = CORE::length($mrna) % 3;
if ( $delta > 0 ) {
# If we have a partial codon of we need to decide if we
# trim it or not to fix some bad behaviour in older bioperl
# versions
if ( $complete_codon ) {
# If we want to do the bad behavior of bioperl 1.6.1 and older
# where we guess the last codon if incomplete, pad an N
# to the mrna sequence
$mrna .= 'N' x (3 - $delta);
} else {
# Otherwise trim those last bp's off so the behavior is
# consistent across bioperl versions
chop $mrna for 1 .. $delta;
}
}
if ( CORE::length($mrna) < 1 ) { return undef }
my $first_mrna_codon = substr( $mrna, 0, 3 );
my $last_mrna_codon = substr( $mrna, -3, 3 );
my $display_id = $self->translation->display_id()
|| "" . $self->translation();
# From BioPerl perspective, we'll treat our CDS as incomplete:
# thus, BioPerl will not
# - remove the terminator character from the peptide
# - check if there are terminator character inside the peptide
# - turn the first amino acid into 'M', if the first codon is a
# legitimate start
# NOTE: We will not be checking for in-sequence '*', as we have to
# apply the seq_edits after translation occurs
my $p_seq = $codon_table->translate($mrna, 0);
# Remove final stop codon from the mrna if it is present. Produced
# peptides will not have '*' at end. If terminal stop codon is
# desired call translatable_seq directly and produce a translation
# from it.
if ( $codon_table->is_ter_codon( $last_mrna_codon ) ) {
chop $p_seq;
}
# Update the first amino acid to M, as appropriate
if ( substr($p_seq,0,1) ne 'M' && $codon_table->is_start_codon( $first_mrna_codon ) ) {
$p_seq = 'M' . substr($p_seq,1);
}
my $translation = Bio::Seq->new( -seq => $p_seq,
-alphabet => 'protein',
-id => $display_id );
if ( $self->edits_enabled() ) {
$self->translation()->modify_translation($translation);
}
return $translation;
} ## end sub translate
=head2 seq
Description: Returns a Bio::Seq object which consists of just
: the sequence of the exons concatenated together,
: without messing about with padding with N\'s from
: Exon phases like B<dna_seq> does.
Args : none
( run in 1.382 second using v1.01-cache-2.11-cpan-39bf76dae61 )