BioPerl
view release on metacpan or search on metacpan
Bio/SeqIO/game/gameWriter.pm view on Meta::CPAN
sub _write_feature {
my ($self, $feat, $bare) = @_;
my $writer = $self->{writer};
my $id;
for ( 'standard_name', $feat->primary_tag, 'ID' ) {
$id = $self->_find_name($feat, $_ );
last if $id;
}
$id ||= $feat->primary_tag . '_' . ++$self->{$feat->primary_tag}->{id};
unless ( $bare ) {
$writer->startTag('annotation', id => $id);
$self->_element('name', $id);
$self->_element('type', $feat->primary_tag);
}
$writer->startTag('feature_set', id => $id);
$self->_element('name', $id);
$self->_element('type', $feat->primary_tag);
$self->_render_tags( $feat,
\&_render_date_tags,
\&_render_comment_tags,
\&_render_tags_as_properties
);
$self->_feature_span($id, $feat);
$writer->endTag('feature_set');
$writer->endTag('annotation') unless $bare;
}
=head2 _write_gene
Title : _write_gene
Usage : $self->_write_gene($feature)
Function: internal method for rendering gene containment hierarchies into
a nested <annotation> element
Returns : nothing
Args : a nested Bio::SeqFeature::Generic gene feature
Note : A nested gene hierarchy (gene->mRNA->CDS->exon) is expected. If other gene
subfeatures occur as level one subfeatures (same level as mRNA subfeats)
an attempt will be made to link them to transcripts via the 'standard_name'
qualifier
=cut
sub _write_gene {
my ($self, $feat) = @_;
my $writer = $self->{writer};
my $str = $feat->strand;
my $id = $self->_find_name($feat, 'standard_name')
|| $self->_find_name($feat, 'gene')
|| $self->_find_name($feat, $feat->primary_tag)
|| $self->_find_name($feat, 'locus_tag')
|| $self->_find_name($feat, 'symbol')
|| $self->throw(<<EOM."Feature name was: '".($feat->display_name || 'not set')."'");
Could not find a gene/feature ID, feature must have a primary tag or a tag
with one of the names: 'standard_name', 'gene', 'locus_tag', or 'symbol'.
EOM
my $gid = $self->_find_name($feat, 'gene') || $id;
$writer->startTag('annotation', id => $id);
$self->_element('name', $gid);
$self->_element('type', $feat->primary_tag);
$self->_render_tags( $feat,
\&_render_date_tags,
\&_render_dbxref_tags,
\&_render_comment_tags,
\&_render_tags_as_properties,
);
my @genes;
if ( $feat->primary_tag eq 'gene' ) {
@genes = ($feat);
}
else {
# we are in a gene container; gene must then be one level down
@genes = grep { $_->primary_tag eq 'gene' } $feat->get_SeqFeatures;
}
for my $g ( @genes ) {
my $id ||= $self->_find_name($g, 'standard_name')
|| $self->_find_name($g, 'gene')
|| $self->_find_name($feat, 'locus_tag')
|| $self->_find_name($feat, 'symbol')
|| $self->throw("Could not find a gene ID");
my $gid ||= $self->_find_name($g, 'gene') || $self->_find_name($g);
$writer->startTag('gene', association => 'IS');
$self->_element('name', $gid);
$writer->endTag('gene');
my $proteins;
my @mRNAs = grep { $_->primary_tag =~ /mRNA|transcript/ } $g->get_SeqFeatures;
my @other_stuff = grep { $_->primary_tag !~ /mRNA|transcript/ } $g->get_SeqFeatures;
my @variants = ('A' .. 'Z');
for my $mRNA (@mRNAs) {
my ($sn, @units);
# if the mRNA is a generic transcript, it must be a non-spliced RNA gene
# Make a synthetic exon to help build a hierarchy. We have to assume that
# the location is not segmented (otherwise it should be a mRNA)
if ( $mRNA->primary_tag eq 'transcript') {
my $exon = Bio::SeqFeature::Generic->new ( -primary => 'exon' );
$exon->location($mRNA->location);
$mRNA->add_SeqFeature($exon);
}
# no subfeats? Huh? revert to generic feature
unless ( $mRNA->get_SeqFeatures ) {
$self->_write_feature($mRNA, 1); # 1 flag writes the bare feature
# with no annotation wrapper
next;
}
my $name = $self->_find_name($mRNA, $mRNA->primary_tag)
|| $self->_find_name($mRNA, 'standard_name');
my %attributes;
my ($cds) = grep { $_->primary_tag eq 'CDS' } $mRNA->get_SeqFeatures;
# make sure we have the right CDS for alternatively spliced genes
# This is meant to deal with sequences from flattened game annotations,
# where both the mRNA and CDS have split locations
if ( $cds && @mRNAs > 1 && $name ) {
$cds = $self->_check_cds($cds, $name);
}
elsif ( $cds && @mRNAs == 1 ) {
# The mRNA/CDS pairing must be right. Get the transcript name from the CDS
if ( $cds->has_tag('standard_name') ) {
($name) = $cds->get_tag_values('standard_name');
}
}
if ( !$name ) {
# assign a name to the transcript if it has no 'standard_name' binder
$name = $id . '-R' . (shift @variants);
}
my $pname;
if ( $cds ) {
($sn) = $cds->get_tag_values('standard_name')
if $cds->has_tag('standard_name');
($sn) ||= $cds->get_tag_values('mRNA')
if $cds->has_tag('mRNA');
# the protein needs a name
my $psn = $self->protein_id($cds, $sn);
$self->{curr_pname} = $psn;
Bio/SeqIO/game/gameWriter.pm view on Meta::CPAN
$feat->add_tag_value('protein_id', $psn);
}
# define the translation offset
my ($c_start, $c_end);
if ( $cds->has_tag('codon_start') ){
($c_start) = $cds->get_tag_values('codon_start');
$cds->remove_tag('codon_start');
}
else {
$c_start = 1;
}
my $cs = Bio::SeqFeature::Generic->new;
if ( $c_start == 1 ) {
$c_start = $cds->strand > 0 ? $cds->start : $cds->end;
}
if ( $cds->strand < 1 ) {
$c_end = $c_start;
$c_start = $c_start - 2;
}
else {
$c_end = $c_start + 2;
}
$cs->start($c_start);
$cs->end($c_end);
$cs->strand($cds->strand);
$cs->primary_tag('start_codon');
$cs->add_tag_value( 'standard_name' => $name );
push @units, $cs;
if ( $cds->has_tag('problem') ) {
my ($val) = $cds->get_tag_values('problem');
$cds->remove_tag('problem');
$attributes{problem} = $val;
}
my ($aa) = $cds->get_tag_values('translation')
if $cds->has_tag('translation');
if ( $aa && $psn ) {
$cds->remove_tag('translation');
my %add_seq = ();
$add_seq{residues} = $aa;
$add_seq{header} = ['seq',
id => $psn,
length => length $aa,
type => 'aa' ];
if ( $cds->has_tag('product_desc') ) {
($add_seq{desc}) = $cds->get_tag_values('product_desc');
$cds->remove_tag('product_desc');
}
unless ( $add_seq{desc} && $add_seq{desc} =~ /cds_boundaries/ ) {
my $start = $cds->start;
my $end = $cds->end;
my $str = $cds->strand;
my $acc = $self->{seq}->accession || $self->{seq}->display_id;
$str = $str < 0 ? '[-]' : '';
$add_seq{desc} = "translation from_gene[$gid] " .
"cds_boundaries:(" . $acc .
":$start..$end$str) transcript_info:[$name]";
}
$self->{add_seqs} ||= [];
push @{$self->{add_seqs}}, \%add_seq;
}
}
$writer->startTag('feature_set', id => $name);
$self->_element('name', $name);
$self->_element('type', 'transcript');
$self->_render_tags($_,
\&_render_date_tags,
\&_render_comment_tags,
\&_render_tags_as_properties,
) for ( $mRNA, ($cds) || () );
# any UTR's, etc associated with this transcript?
for my $thing ( @other_stuff ) {
if ( $thing->has_tag('standard_name') ) {
my ($v) = $thing->get_tag_values('standard_name');
if ( $v eq $sn ) {
push @units, $thing;
}
}
}
# add the exons
push @units, grep { $_->primary_tag eq 'exon' } $mRNA->get_SeqFeatures;
@units = sort { $a->start <=> $b->start } @units;
my $count = 0;
if ( $str < 0 ) {
@units = reverse @units;
}
for my $unit ( @units ) {
if ( $unit->primary_tag eq 'exon' ) {
my $ename = $id;
$ename .= ':' . ++$count;
$self->_feature_span($ename, $unit);
}
elsif ( $unit->primary_tag eq 'start_codon' ) {
$self->_feature_span(($sn || $gid), $unit, $self->{curr_pname});
}
else {
my $uname = $unit->primary_tag . ":$id";
$self->_feature_span($uname, $unit);
}
}
$self->{curr_pname} = '';
$writer->endTag('feature_set');
}
$self->{other_stuff} = \@other_stuff;
}
$writer->endTag('annotation');
# add the protein sequences
for ( @{$self->{add_seqs}} ) {
my %h = %$_;
$writer->startTag(@{$h{header}});
my @desc = split /\s+/, $h{desc};
my $desc = '';
for my $word (@desc) {
my ($lastline) = $desc =~ /.*^(.+)$/sm;
$lastline ||= '';
$desc .= length $lastline < 50 ? " $word " : "\n $word ";
}
$self->_element('description', "\n $desc\n ");
my $aa = $h{residues};
$aa =~ s/(\w{60})/$1\n /g;
$aa =~ s/\n\s+$//m;
$aa = "\n " . $aa . "\n ";
$self->_element('residues', $aa);
$writer->endTag('seq');
$self->{add_seqs} = [];
}
# Is there anything else associated with the gene? We have to write other
# features as stand-alone annotations or apollo will assume they are
# transcripts
for my $thing ( @{$self->{other_stuff}} ) {
next if $thing->has_tag('standard_name');
$self->_write_feature($thing);
}
$self->{other_stuff} = [];
}
=head2 _check_cds
Title : _check_cds
Usage : $self->_check_cds($cds, $name)
Function: internal method to check if the CDS associated with an mRNA is
the correct alternative splice variant
Returns : a Bio::SeqFeature::Generic CDS object
Args : the CDS object plus the transcript\'s 'standard_name'
Note : this method only works if alternatively spliced transcripts are bound
together by a 'standard_name' or 'mRNA' qualifier. If none is present,
we will hope that the exons were derived from a segmented RNA or a CDS
with no associated mRNA feature. Neither of these two cases would be
( run in 0.936 second using v1.01-cache-2.11-cpan-97f6503c9c8 )