Bio-EnsEMBL

 view release on metacpan or  search on metacpan

lib/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm  view on Meta::CPAN

$Bio::EnsEMBL::Utils::VegaCuration::Transcript::VERSION = '114.0.0';
use strict;
use warnings;
no warnings 'uninitialized';
use vars qw(@ISA);

use Bio::EnsEMBL::Utils::VegaCuration::Gene;
use Data::Dumper;

@ISA = qw(Bio::EnsEMBL::Utils::VegaCuration::Gene);


=head2 find_non_overlaps

   Args       : arrayref of B::E::Transcripts
   Example    : find_non_overlaps($all_transcripts)
   Description: identifies any non-overlapping transcripts
   Returntype : array refs of stable IDs
   Exceptions : none

=cut

sub find_non_overlaps {
  my $self = shift;
  my ($all_transcripts) = @_;
  my $non_overlaps = [];
  foreach my $transcript1 (@{$all_transcripts}) {
    foreach my $transcript2 (@{$all_transcripts}) {
      if ($transcript1->end < $transcript2->start) {
	push @{$non_overlaps}, $transcript1->stable_id;
	push @{$non_overlaps}, $transcript2->stable_id;
      }
    }
  }
  return $non_overlaps;
}

=head2 check_remarks_and_update_names

   Arg[1]     : B::E::Gene (with potentially duplicated transcript names)
   Arg[2]     : counter 1 (no. of patched genes)
   Arg[3]     : counter 2 (no. of patched transcripts)
   Example    : $support->update_names($gene,\$c1,\$c2)
   Description: - checks remarks and patches transcripts with identical names according to
                CDS and length
   Returntype : true | false (depending on whether patched or not), counter1, counter2

=cut

sub check_remarks_and_update_names {
  my $self = shift;
  my ($gene,$gene_c,$trans_c) = @_;
  my $action = ($self->param('dry_run')) ? 'Would add' : 'Added';
  my $aa  = $gene->adaptor->db->get_AttributeAdaptor;
  my $dbh = $gene->adaptor->db->dbc->db_handle;

  #get list of IDs that have previously been sent to annotators
  my $seen_genes = $self->get_havana_fragmented_loci_comments;

  my $gsi    = $gene->stable_id;
  my $gid    = $gene->dbID;
  my $g_name;
  my $study_more = 1;
  eval {
    $g_name = $gene->display_xref->display_id;
  };	
  if ($@) {
    $g_name = $gene->get_all_Attributes('name')->[0]->value;
  }

  #get existing gene remarks
  my $remarks = [ map {$_->value} @{$gene->get_all_Attributes('remark')} ];

  #shout if there is no remark to identify this as being fragmented
  if ( grep {$_ eq 'fragmented locus' } @$remarks) {
    $study_more = 0;
  }
  else {
    $self->log_warning("Gene $gsi should have a fragmented locus remark\n");
  }

  ##patch transcript names according to length and CDS
  $gene_c++;

  #separate coding and non_coding transcripts
  my $coding_trans = [];
  my $noncoding_trans = [];
  foreach my $trans ( @{$gene->get_all_Transcripts()} ) {
    if ($trans->translate) {
      push @$coding_trans, $trans;
    }
    else {
      push @$noncoding_trans, $trans;
    }
  }

  #sort transcripts coding > non-coding, then on length
  my $c = 0;
  $self->log("\nPatching names according to CDS and length:\n",1);
  foreach my $array_ref ($coding_trans,$noncoding_trans) {
    foreach my $trans ( sort { $b->length <=> $a->length } @$array_ref ) {
      $trans_c++;
      my $tsi = $trans->stable_id;
      my $t_name;
      eval {
	$t_name = $trans->display_xref->display_id;
      };	
      if ($@) {
	$t_name = $trans->get_all_Attributes('name')->[0]->value;
      }
      $c++;
      my $ext = sprintf("%03d", $c);
      my $new_name = $g_name.'-'.$ext;
      $self->log(sprintf("%-20s%-3s%-20s", "$t_name ", "-->", "$new_name")."\n",1);
      if (! $self->param('dry_run')) {
	
	# update transcript display xref
	$dbh->do(qq(UPDATE xref x, external_db edb
                       SET x.display_label  = "$new_name"
                     WHERE x.external_db_id = edb.external_db_id
                       AND x.dbprimary_acc  = "$tsi"



( run in 0.478 second using v1.01-cache-2.11-cpan-5735350b133 )