Bio-EnsEMBL

 view release on metacpan or  search on metacpan

lib/Bio/EnsEMBL/DBSQL/TranslationAdaptor.pm  view on Meta::CPAN


  Description: Retrieves all alternative translations associated with a
               particular transcript.  If no alternative translation is
               found, a reference to an empty list is returned.

  Returntype : listref of Bio::EnsEMBL::Translation
  Exceptions : throw on incorrect argument
  Caller     : Transcript
  Status     : Stable

=cut

sub fetch_all_alternative_by_Transcript {
  my ( $self, $transcript ) = @_;

  assert_ref($transcript, 'Bio::EnsEMBL::Transcript');

  my $tl_created_date =
    $self->db()->dbc()->from_date_to_seconds('tl.created_date');
  my $tl_modified_date =
    $self->db()->dbc()->from_date_to_seconds('tl.modified_date');

  my $sql =
    sprintf( "SELECT tl.translation_id, tl.start_exon_id, "
      . "tl.end_exon_id, tl.seq_start, tl.seq_end, "
      . "tl.stable_id, tl.version, %s, %s "
      . "FROM translation tl "
      . "JOIN transcript t "
      . "ON (t.transcript_id = tl.transcript_id) "
      . "WHERE tl.transcript_id = ? "
      . "AND tl.translation_id != t.canonical_translation_id",
    $tl_created_date, $tl_modified_date );

  my $transcript_id = $transcript->dbID();
  my $sth           = $self->prepare($sql);
  $sth->bind_param( 1, $transcript_id, SQL_INTEGER );

  $sth->execute();

  my (
    $translation_id, $start_exon_id, $end_exon_id,
    $seq_start,      $seq_end,       $stable_id,
    $version,        $created_date,  $modified_date
  );

  $sth->bind_columns(
    \(
      $translation_id, $start_exon_id, $end_exon_id,
      $seq_start,      $seq_end,       $stable_id,
      $version,        $created_date,  $modified_date
    ) );

  # Get all alternative translations.
  my $translations = [];
  while ( $sth->fetch() ) {
    if ( !defined($translation_id) ) { next }

    my ( $start_exon, $end_exon );

    # this will load all the exons whenever we load the translation
    # but I guess thats ok ....

    foreach my $exon ( @{ $transcript->get_all_Exons() } ) {
      if ( $exon->dbID() == $start_exon_id ) { $start_exon = $exon }
      if ( $exon->dbID() == $end_exon_id )   { $end_exon   = $exon }
    }

    if ( !( defined($start_exon) && defined($end_exon) ) ) {
      throw(
        sprintf(
          "Could not find start or end exon in transcript_id=%d\n",
          $transcript->dbID() ) );
    }

    my $translation =
      Bio::EnsEMBL::Translation->new_fast( {
                             'dbID'          => $translation_id,
                             'adaptor'       => $self,
                             'start'         => $seq_start,
                             'end'           => $seq_end,
                             'start_exon'    => $start_exon,
                             'end_exon'      => $end_exon,
                             'stable_id'     => $stable_id,
                             'version'       => $version,
                             'created_date'  => $created_date || undef,
                             'modified_date' => $modified_date || undef,
                           } );

    $translation->transcript($transcript);

    push( @{$translations}, $translation );

  } ## end while ( $sth->fetch() )

  return $translations;
} ## end sub fetch_all_by_Transcript

=head2 fetch_by_Transcript

  Arg [1]    : Bio::EnsEMBL::Transcript $transcript
  Example    : $tl = $translation_adaptor->fetch_by_Transcript($transcript);
  Description: Retrieves a Translation via its associated transcript.
               If the Translation is not found, undef is returned.
  Returntype : Bio::EnsEMBL::Translation
  Exceptions : throw on incorrect argument
  Caller     : Transcript
  Status     : Stable

=cut

sub fetch_by_Transcript {
  my ( $self, $transcript ) = @_;

  assert_ref( $transcript, 'Bio::EnsEMBL::Transcript' );

  my $tl_created_date =
    $self->db()->dbc()->from_date_to_seconds('tl.created_date');
  my $tl_modified_date =
    $self->db()->dbc()->from_date_to_seconds('tl.modified_date');

  my $sql =
    sprintf( "SELECT tl.translation_id, tl.start_exon_id, "
      . "tl.end_exon_id, tl.seq_start, tl.seq_end, "
      . "tl.stable_id, tl.version, %s, %s "
      . "FROM translation tl "
      . "JOIN transcript tr "
      . "ON (tl.translation_id = tr.canonical_translation_id) "
      . "WHERE tr.transcript_id = ?",
    $tl_created_date, $tl_modified_date );

  my $transcript_id = $transcript->dbID();
  my $sth           = $self->prepare($sql);
  $sth->bind_param( 1, $transcript_id, SQL_INTEGER );

  $sth->execute();

  my (
    $translation_id, $start_exon_id, $end_exon_id,
    $seq_start,      $seq_end,       $stable_id,
    $version,        $created_date,  $modified_date
  ) = $sth->fetchrow_array();
  $sth->finish();

  if ( !defined($translation_id) ) { return undef }

  my ( $start_exon, $end_exon );

  # this will load all the exons whenever we load the translation
  # but I guess thats ok ....

  foreach my $exon ( @{ $transcript->get_all_Exons() } ) {
    if ( $exon->dbID() == $start_exon_id ) { $start_exon = $exon }
    if ( $exon->dbID() == $end_exon_id )   { $end_exon   = $exon }
  }

  if ( !( defined($start_exon) && defined($end_exon) ) ) {
    throw(
      sprintf( "Could not find start or end exon in transcript_id=%d\n",
        $transcript->dbID() ) );
  }

  my $translation =
    Bio::EnsEMBL::Translation->new_fast( {
                             'dbID'          => $translation_id,
                             'adaptor'       => $self,
                             'start'         => $seq_start,
                             'end'           => $seq_end,
                             'start_exon'    => $start_exon,
                             'end_exon'      => $end_exon,
                             'stable_id'     => $stable_id,
                             'version'       => $version,
                             'created_date'  => $created_date || undef,
                             'modified_date' => $modified_date || undef,
                           } );

  $translation->transcript($transcript);

  return $translation;
} ## end sub fetch_by_Transcript



=head2 fetch_all_by_external_name

  Arg [1]    : string $external_name
               The external identifier for the translation(s) to be
               obtained.
  Arg [2]    : (optional) string $external_db_name
               The name of the external database from which the
               identifier originates.
  Arg [3]    : Boolean override. Force SQL regex matching for users
               who really do want to find all 'NM%'
  Example    : my @translations =
                  @{ $trl_adaptor->fetch_all_by_external_name('BRCA2') };
               my @many_translations = 
                  @{ $trl_adaptor->fetch_all_by_external_name('BRCA%') };
  Description: Retrieves a list of translations fetched via an
               external identifier.  Note that this may not be a
               particularly useful method, because translations
               do not make much sense out of the context of
               their transcript.  It may be better to use the
               TranscriptAdaptor::fetch_all_by_external_name instead.
               SQL wildcards % and _ are supported in the $external_name
               but their use is somewhat restricted for performance reasons.
               Users that really do want % and _ in the first three characters
               should use argument 3 to prevent optimisations
  Returntype : reference to a list of Translations
  Exceptions : none
  Caller     : general

lib/Bio/EnsEMBL/DBSQL/TranslationAdaptor.pm  view on Meta::CPAN

    throw("reference to list of Transcripts argument is required");
  }

  return [] if(!@$transcripts);

  my %trans_hash = map {$_->dbID() => $_} @$transcripts;
  my @id_list = keys %trans_hash;

  my @out;

  # mysql is faster and we ensure that we do not exceed the max query size by
  # splitting large queries into smaller queries of 200 ids
  my $max_size = 200;

  my ( $transcript_id, $translation_id, $start_exon_id, $end_exon_id,
       $seq_start, $seq_end, $stable_id, $version, 
       $created_date, $modified_date );

  my %ex_hash;

  while(@id_list) {
    my @ids;
    if(@id_list > $max_size) {
      @ids = splice(@id_list, 0, $max_size);
    } else {
      @ids = splice(@id_list, 0);
    }

    my $id_str;
    if(@ids > 1)  {
      $id_str = " IN (" . join(',', @ids). ")";
    } else {
      $id_str = " = " . $ids[0];
    }
    
    my $canonical_lookup = $self->dbc()->sql_helper()->execute_into_hash(
      -SQL => 'SELECT transcript_id, canonical_translation_id FROM transcript WHERE transcript_id '.$id_str
    );

    my $created_date = $self->db->dbc->from_date_to_seconds("tl.created_date");
    my $modified_date = $self->db->dbc->from_date_to_seconds("tl.modified_date");

    my $sth = $self->prepare
      ("SELECT tl.transcript_id, tl.translation_id, tl.start_exon_id,
           tl.end_exon_id, tl.seq_start, tl.seq_end,
           tl.stable_id, tl.version, " . $created_date . "," .
       $modified_date . 
       " FROM translation tl
         WHERE tl.transcript_id $id_str");

    $sth->execute();

    $sth->bind_columns( \$transcript_id, \$translation_id, \$start_exon_id, \$end_exon_id,
                        \$seq_start, \$seq_end, \$stable_id, \$version,
			\$created_date, \$modified_date );

    while($sth->fetch()) {
      my ($start_exon, $end_exon);

      # this will load all the exons whenever we load the translation
      # but I guess thats ok ....

      my $tr = $trans_hash{$transcript_id};

      foreach my $exon (@{$tr->get_all_Exons()}) {
        if(!$start_exon && $exon->dbID() == $start_exon_id ) {
          $start_exon = $exon;
          last if($end_exon);
        }

        if(!$end_exon && $exon->dbID() == $end_exon_id ) {
          $end_exon = $exon;
          last if($start_exon);
        }
      }

      unless($start_exon && $end_exon) {
        throw("Could not find start or end exon in transcript\n");
      }

      my $tl =  Bio::EnsEMBL::Translation->new
        (-dbID => $translation_id,
         -seq_start => $seq_start,
         -seq_end => $seq_end,
         -start_exon => $start_exon,
         -end_exon => $end_exon,
         -stable_id => $stable_id,
         -version => $version,
	 -created_date => $created_date || undef,
	 -modified_date => $modified_date || undef);

      # Calling the new method will set $tl->version to '1' if $version is not defined.
      # But if the version in the database is NULL, $version will be undef; and so we
      # need to override the default version of '1', and set it back to undef.
      $tl->{version} = undef unless defined $version;
      
      $tl->adaptor($self);
      my $canonical_translation_id = $canonical_lookup->{$transcript_id};
      $tr->translation($tl) if $translation_id == $canonical_translation_id;

      push @out, $tl;
    }
  }

  return \@out;
}


=head2 fetch_all

  Example     : $translations = $translation_adaptor->fetch_all();
  Description : Retrieves all canonical and alternative translations 
                stored in the database.
  Returntype  : listref of Bio::EnsEMBL::Translation
  Caller      : general
  Status      : At Risk

=cut

sub fetch_all {
  my ($self) = @_;



( run in 0.920 second using v1.01-cache-2.11-cpan-5837b0d9d2c )