Bio-EnsEMBL
view release on metacpan or search on metacpan
lib/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm view on Meta::CPAN
$Bio::EnsEMBL::Utils::VegaCuration::Transcript::VERSION = '114.0.0';
use strict;
use warnings;
no warnings 'uninitialized';
use vars qw(@ISA);
use Bio::EnsEMBL::Utils::VegaCuration::Gene;
use Data::Dumper;
@ISA = qw(Bio::EnsEMBL::Utils::VegaCuration::Gene);
=head2 find_non_overlaps
Args : arrayref of B::E::Transcripts
Example : find_non_overlaps($all_transcripts)
Description: identifies any non-overlapping transcripts
Returntype : array refs of stable IDs
Exceptions : none
=cut
sub find_non_overlaps {
my $self = shift;
my ($all_transcripts) = @_;
my $non_overlaps = [];
foreach my $transcript1 (@{$all_transcripts}) {
foreach my $transcript2 (@{$all_transcripts}) {
if ($transcript1->end < $transcript2->start) {
push @{$non_overlaps}, $transcript1->stable_id;
push @{$non_overlaps}, $transcript2->stable_id;
}
}
}
return $non_overlaps;
}
=head2 check_remarks_and_update_names
Arg[1] : B::E::Gene (with potentially duplicated transcript names)
Arg[2] : counter 1 (no. of patched genes)
Arg[3] : counter 2 (no. of patched transcripts)
Example : $support->update_names($gene,\$c1,\$c2)
Description: - checks remarks and patches transcripts with identical names according to
CDS and length
Returntype : true | false (depending on whether patched or not), counter1, counter2
=cut
sub check_remarks_and_update_names {
my $self = shift;
my ($gene,$gene_c,$trans_c) = @_;
my $action = ($self->param('dry_run')) ? 'Would add' : 'Added';
my $aa = $gene->adaptor->db->get_AttributeAdaptor;
my $dbh = $gene->adaptor->db->dbc->db_handle;
#get list of IDs that have previously been sent to annotators
my $seen_genes = $self->get_havana_fragmented_loci_comments;
my $gsi = $gene->stable_id;
my $gid = $gene->dbID;
my $g_name;
my $study_more = 1;
eval {
$g_name = $gene->display_xref->display_id;
};
if ($@) {
$g_name = $gene->get_all_Attributes('name')->[0]->value;
}
#get existing gene remarks
my $remarks = [ map {$_->value} @{$gene->get_all_Attributes('remark')} ];
#shout if there is no remark to identify this as being fragmented
if ( grep {$_ eq 'fragmented locus' } @$remarks) {
$study_more = 0;
}
else {
$self->log_warning("Gene $gsi should have a fragmented locus remark\n");
}
##patch transcript names according to length and CDS
$gene_c++;
#separate coding and non_coding transcripts
my $coding_trans = [];
my $noncoding_trans = [];
foreach my $trans ( @{$gene->get_all_Transcripts()} ) {
if ($trans->translate) {
push @$coding_trans, $trans;
}
else {
push @$noncoding_trans, $trans;
}
}
#sort transcripts coding > non-coding, then on length
my $c = 0;
$self->log("\nPatching names according to CDS and length:\n",1);
foreach my $array_ref ($coding_trans,$noncoding_trans) {
foreach my $trans ( sort { $b->length <=> $a->length } @$array_ref ) {
$trans_c++;
my $tsi = $trans->stable_id;
my $t_name;
eval {
$t_name = $trans->display_xref->display_id;
};
if ($@) {
$t_name = $trans->get_all_Attributes('name')->[0]->value;
}
$c++;
my $ext = sprintf("%03d", $c);
my $new_name = $g_name.'-'.$ext;
$self->log(sprintf("%-20s%-3s%-20s", "$t_name ", "-->", "$new_name")."\n",1);
if (! $self->param('dry_run')) {
# update transcript display xref
$dbh->do(qq(UPDATE xref x, external_db edb
SET x.display_label = "$new_name"
WHERE x.external_db_id = edb.external_db_id
AND x.dbprimary_acc = "$tsi"
( run in 0.478 second using v1.01-cache-2.11-cpan-5735350b133 )