Bio-ToolBox
view release on metacpan or search on metacpan
lib/Bio/ToolBox/Parser/ucsc/builder.pm view on Meta::CPAN
if ( $self->gene_name ne $self->name2 ) {
$transcript->add_tag_value( 'Alias', $self->gene_name );
}
# adjust the primary_tag and biotype values as necessary
if ( exists $ensembldata->{ $self->name } ) {
my $t = $ensembldata->{ $self->name }->[1] || q();
if ( $t =~ /protein.coding/xi ) {
$transcript->primary_tag('mRNA');
$transcript->add_tag_value( 'biotype', $t );
}
elsif ( $t =~ /(?: rna | transcript )/xi ) {
$transcript->primary_tag($t); # this is redundant????
$transcript->add_tag_value( 'biotype', $t );
}
elsif ($t) {
$transcript->primary_tag('transcript');
$transcript->add_tag_value( 'biotype', $t );
}
}
# update extra attributes as necessary
$self->update_attributes($transcript);
# add transcript specific attributes
if ( defined $self->completeness ) {
$transcript->add_tag_value( 'completeness', $self->completeness );
}
if ( defined $self->status ) {
$transcript->add_tag_value( 'status', $self->status );
}
# add the exons
if ( $ucsc->do_exon ) {
$self->add_exons( $transcript, $gene );
}
# add CDS, UTRs, and codons if necessary
if ( $self->cdsStart - 1 != $self->cdsEnd ) {
if ( $ucsc->do_utr ) {
$self->add_utrs( $transcript, $gene );
}
if ( $ucsc->do_codon ) {
$self->add_codons( $transcript, $gene );
}
if ( $ucsc->do_cds ) {
$self->add_cds($transcript);
}
}
# record the type of transcript
$ucsc->{counts}{ $transcript->primary_tag } += 1;
# transcript is complete
return $transcript;
}
sub update_attributes {
my ( $self, $seqf ) = @_;
# add Note if possible
if ( $self->note ) {
$self->add_unique_attribute( $seqf, 'Note', $self->note );
}
# add refSeq identifier if possible
if ( $self->refseq ) {
$self->add_unique_attribute( $seqf, 'Dbxref', 'RefSeq:' . $self->refseq );
}
# add SwissProt identifier if possible
if ( exists $self->{spid} and $self->{spid} ) {
$self->add_unique_attribute( $seqf, 'Dbxref', 'Swiss-Prot:' . $self->{spid} );
}
# add SwissProt display identifier if possible
if ( exists $self->{spdid} and $self->{spdid} ) {
$self->add_unique_attribute( $seqf, 'swiss-prot_display_id', $self->{spdid} );
}
# add NCBI protein access identifier if possible
if ( exists $self->{protacc} and $self->{protacc} ) {
$self->add_unique_attribute( $seqf, 'Dbxref', 'RefSeq:' . $self->{protacc} );
}
}
sub add_unique_attribute {
my ( $self, $seqf, $tag, $value ) = @_;
# look for a pre-existing identical tag value
my $check = 1;
foreach ( $seqf->get_tag_values($tag) ) {
if ( $_ eq $value ) {
$check = 0;
last;
}
}
# add it if our value is unique
$seqf->add_tag_value( $tag, $value ) if $check;
}
sub add_exons {
my ( $self, $transcript, $gene ) = @_;
my $ucsc = $self->ucsc;
# Add the exons
EXON_LOOP:
for ( my $i = 0; $i < $self->exonCount; $i++ ) {
# first look for existing
if ( $ucsc->share and $gene ) {
my $exon = $self->find_existing_subfeature(
$gene, 'exon',
$self->exonStarts->[$i],
$self->exonEnds->[$i]
);
if ($exon) {
( run in 1.599 second using v1.01-cache-2.11-cpan-140bd7fdf52 )