Bio-EnsEMBL

 view release on metacpan or  search on metacpan

lib/Bio/EnsEMBL/Transcript.pm  view on Meta::CPAN


sub get_all_translateable_Exons {
  my ( $self ) = @_;

  #return an empty list if there is no translation (i.e. pseudogene)
  my $translation = $self->translation or return [];
  my $start_exon      = $translation->start_Exon;
  my $end_exon        = $translation->end_Exon;
  my $t_start         = $translation->start;
  my $t_end           = $translation->end;

  my( @translateable );

  foreach my $ex (@{$self->get_all_Exons}) {

    if ($ex ne $start_exon and ! @translateable) {
      next;   # Not yet in translated region
    }

    my $length  = $ex->length;

    my $adjust_start = 0;
    my $adjust_end = 0;
    # Adjust to translation start if this is the start exon
    if ($ex == $start_exon ) {
      if ($t_start < 1 or $t_start > $length) {
        warning("WARN: Translation start '$t_start' is outside exon " . $ex->display_id . " length=$length");
  return [];
      }
      $adjust_start = $t_start - 1;
    }

    # Adjust to translation end if this is the end exon
    if ($ex == $end_exon) {
#      if ($t_end < 1 or $t_end > $length) {
#        throw("Translation end '$t_end' is outside exon $ex length=$length");
#      }
      $adjust_end = $t_end - $length;
    }

    # Make a truncated exon if the translation start or
    # end causes the coordinates to be altered.
    if ($adjust_end || $adjust_start) {
      my $newex = $ex->adjust_start_end( $adjust_start, $adjust_end );

      push( @translateable, $newex );
    } else {
      push(@translateable, $ex);
    }

    # Exit the loop when we've found the last exon
    last if $ex eq $end_exon;
  }
  return \@translateable;
}


=head2 translate

  Arg [1]    : Boolean, emulate the behavior of old bioperl versions where
               an incomplete final codon of 2 characters is padded and guessed
  Example    : none
  Description: Return the peptide (plus eventual stop codon) for
               this transcript.  Does N-padding of non-phase
               matching exons.  It uses translateable_seq
               internally.  Returns undef if this Transcript does
               not have a translation (i.e. pseudogene).
  Returntype : Bio::Seq or undef
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub translate {
  my ($self, $complete_codon) = @_;
  my $codon_table_id;

  if ( !defined( $self->translation() ) ) { return undef }

  # Alternative codon tables (such as the mitochondrial codon table)
  # can be specified for a sequence region via the seq_region_attrib
  # table.  A list of codon tables and their codes is at:
  # http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi

  if ( defined( $self->slice() ) ) {
    my $attrib;

    ($attrib) = @{ $self->slice()->get_all_Attributes('codon_table') };
    if ( defined($attrib) ) {
      $codon_table_id = $attrib->value();
    }
  }

  $codon_table_id ||= 1;    # default vertebrate codon table
  my $codon_table =
    Bio::Tools::CodonTable->new( -id => $codon_table_id );

  my $mrna = $self->translateable_seq();

  my $delta = CORE::length($mrna) % 3;
  if ( $delta > 0 ) {
    # If we have a partial codon of we need to decide if we
    # trim it or not to fix some bad behaviour in older bioperl
    # versions
    if ( $complete_codon ) {
      # If we want to do the bad behavior of bioperl 1.6.1 and older
      # where we guess the last codon if incomplete, pad an N
      # to the mrna sequence
      $mrna .= 'N' x (3 - $delta);
    } else {
      # Otherwise trim those last bp's off so the behavior is
      # consistent across bioperl versions
      chop $mrna for 1 .. $delta;
    }
  }
  
  if ( CORE::length($mrna) < 1 ) { return undef }

  my $first_mrna_codon = substr( $mrna,  0, 3 );
  my $last_mrna_codon  = substr( $mrna, -3, 3 );

  my $display_id = $self->translation->display_id()
    || "" . $self->translation();

  # From BioPerl perspective, we'll treat our CDS as incomplete:
  # thus, BioPerl will not 
  # - remove the terminator character from the peptide
  # - check if there are terminator character inside the peptide
  # - turn the first amino acid into 'M', if the first codon is a 
  #   legitimate start
  # NOTE: We will not be checking for in-sequence '*', as we have to 
  # apply the seq_edits after translation occurs

  my $p_seq = $codon_table->translate($mrna, 0);

  # Remove final stop codon from the mrna if it is present.  Produced
  # peptides will not have '*' at end.  If terminal stop codon is
  # desired call translatable_seq directly and produce a translation
  # from it.
  if ( $codon_table->is_ter_codon( $last_mrna_codon ) ) {
    chop $p_seq;
  }

  # Update the first amino acid to M, as appropriate
  if ( substr($p_seq,0,1) ne 'M' && $codon_table->is_start_codon( $first_mrna_codon ) ) {
    $p_seq = 'M' . substr($p_seq,1);
  }

  my $translation = Bio::Seq->new( -seq      => $p_seq,
                                   -alphabet => 'protein',
                                   -id       => $display_id );

  if ( $self->edits_enabled() ) {
    $self->translation()->modify_translation($translation);
  }

  return $translation;
} ## end sub translate


=head2 seq

  Description: Returns a Bio::Seq object which consists of just
             : the sequence of the exons concatenated together,
             : without messing about with padding with N\'s from
             : Exon phases like B<dna_seq> does.
  Args       : none



( run in 1.382 second using v1.01-cache-2.11-cpan-39bf76dae61 )