BioPerl

 view release on metacpan or  search on metacpan

Bio/SeqIO/game/gameWriter.pm  view on Meta::CPAN


sub _write_feature {
    my ($self, $feat, $bare) = @_;
    my $writer = $self->{writer};
    my $id;

    for ( 'standard_name', $feat->primary_tag, 'ID' ) {
	$id = $self->_find_name($feat, $_ );
	last if $id;
    } 

    $id ||= $feat->primary_tag . '_' . ++$self->{$feat->primary_tag}->{id};

    unless ( $bare ) {
	$writer->startTag('annotation', id => $id); 
	$self->_element('name', $id);
	$self->_element('type', $feat->primary_tag);
    }

    $writer->startTag('feature_set', id => $id);
    $self->_element('name', $id);
    $self->_element('type', $feat->primary_tag);
    $self->_render_tags( $feat,
			 \&_render_date_tags,
			 \&_render_comment_tags,
			 \&_render_tags_as_properties
		       );
    $self->_feature_span($id, $feat);
    $writer->endTag('feature_set');
    $writer->endTag('annotation') unless $bare;
}

=head2 _write_gene

 Title   : _write_gene
 Usage   : $self->_write_gene($feature)
 Function: internal method for rendering gene containment hierarchies into 
           a nested <annotation> element 
 Returns : nothing
 Args    : a nested Bio::SeqFeature::Generic gene feature
 Note    : A nested gene hierarchy (gene->mRNA->CDS->exon) is expected.  If other gene 
           subfeatures occur as level one subfeatures (same level as mRNA subfeats) 
           an attempt will be made to link them to transcripts via the 'standard_name'
           qualifier

=cut

sub _write_gene {
    my ($self, $feat) = @_;
    my $writer = $self->{writer};
    my $str = $feat->strand;
    my $id = $self->_find_name($feat, 'standard_name')
          || $self->_find_name($feat, 'gene')
	  || $self->_find_name($feat, $feat->primary_tag)
	  || $self->_find_name($feat, 'locus_tag') 
	  || $self->_find_name($feat, 'symbol')
          || $self->throw(<<EOM."Feature name was: '".($feat->display_name || 'not set')."'");
Could not find a gene/feature ID, feature must have a primary tag or a tag
with one of the names: 'standard_name', 'gene', 'locus_tag', or 'symbol'.
EOM
    my $gid = $self->_find_name($feat, 'gene') || $id;

    $writer->startTag('annotation', id => $id);
    $self->_element('name', $gid);
    $self->_element('type', $feat->primary_tag);
    $self->_render_tags( $feat,
			 \&_render_date_tags,
			 \&_render_dbxref_tags,
			 \&_render_comment_tags,
			 \&_render_tags_as_properties,
		       );
    
    my @genes;
    
    if ( $feat->primary_tag eq 'gene' ) {
	@genes = ($feat);
    }
    else {
	# we are in a gene container; gene must then be one level down
	@genes = grep { $_->primary_tag eq 'gene' } $feat->get_SeqFeatures;
    }

    for my $g ( @genes ) {
	my $id ||= $self->_find_name($g, 'standard_name')
               || $self->_find_name($g, 'gene') 
	       || $self->_find_name($feat, 'locus_tag')
               || $self->_find_name($feat, 'symbol')
               || $self->throw("Could not find a gene ID");
	my $gid ||= $self->_find_name($g, 'gene') || $self->_find_name($g);

	$writer->startTag('gene', association => 'IS');
        $self->_element('name', $gid);
        $writer->endTag('gene');

        my $proteins;
	my @mRNAs = grep { $_->primary_tag =~ /mRNA|transcript/ } $g->get_SeqFeatures;
	my @other_stuff = grep { $_->primary_tag !~ /mRNA|transcript/ } $g->get_SeqFeatures;
	my @variants = ('A' .. 'Z');

	for my $mRNA (@mRNAs) {
	    my ($sn, @units);
            # if the mRNA is a generic transcript, it must be a non-spliced RNA gene
            # Make a synthetic exon to help build a hierarchy.  We have to assume that
            # the location is not segmented (otherwise it should be a mRNA)
	    if ( $mRNA->primary_tag eq 'transcript') {
		my $exon = Bio::SeqFeature::Generic->new ( -primary => 'exon' );
		$exon->location($mRNA->location);
		$mRNA->add_SeqFeature($exon);
	    }

            # no subfeats? Huh? revert to generic feature
	    unless ( $mRNA->get_SeqFeatures ) {
		$self->_write_feature($mRNA, 1); # 1 flag writes the bare feature
                                                 # with no annotation wrapper
		next;
	    }

	    my $name = $self->_find_name($mRNA, $mRNA->primary_tag) 
                     || $self->_find_name($mRNA, 'standard_name');

	    my %attributes;
            my ($cds) = grep { $_->primary_tag eq 'CDS' } $mRNA->get_SeqFeatures;

	    # make sure we have the right CDS for alternatively spliced genes
	    # This is meant to deal with sequences from flattened game annotations, 
	    # where both the mRNA and CDS have split locations
	    if ( $cds && @mRNAs > 1 && $name ) {
		$cds = $self->_check_cds($cds, $name);
	    }
	    elsif ( $cds && @mRNAs == 1 ) {
		# The mRNA/CDS pairing must be right. Get the transcript name from the CDS
		if ( $cds->has_tag('standard_name') ) {
		    ($name) = $cds->get_tag_values('standard_name');
                }
	    }
	    
	    if ( !$name ) {
		# assign a name to the transcript if it has no 'standard_name' binder
		$name = $id . '-R' . (shift @variants);
	    }

            my $pname;

	    if ( $cds ) {
		($sn) = $cds->get_tag_values('standard_name')
		    if $cds->has_tag('standard_name');
		($sn) ||= $cds->get_tag_values('mRNA')
		   if $cds->has_tag('mRNA');

		# the protein needs a name
		my $psn = $self->protein_id($cds, $sn);
                $self->{curr_pname} = $psn;

Bio/SeqIO/game/gameWriter.pm  view on Meta::CPAN

		    $feat->add_tag_value('protein_id', $psn);
		}

                # define the translation offset
		my ($c_start, $c_end);
		if ( $cds->has_tag('codon_start') ){
		    ($c_start) = $cds->get_tag_values('codon_start');
		    $cds->remove_tag('codon_start');
		}
		else {
		    $c_start = 1;
		}
		my $cs  = Bio::SeqFeature::Generic->new;
		if ( $c_start == 1 ) {
		    $c_start = $cds->strand > 0 ? $cds->start : $cds->end;
		}
		if ( $cds->strand < 1 ) {
		    $c_end = $c_start;
		    $c_start = $c_start - 2;
		}
		else {
		    $c_end = $c_start + 2;
		}
		$cs->start($c_start);
		$cs->end($c_end);
		$cs->strand($cds->strand);
		$cs->primary_tag('start_codon');
		$cs->add_tag_value( 'standard_name' => $name );
		push @units, $cs;


		if ( $cds->has_tag('problem') ) {
		    my ($val) = $cds->get_tag_values('problem');
		    $cds->remove_tag('problem');
		    $attributes{problem} = $val;
		}
		
		my ($aa) = $cds->get_tag_values('translation')
		    if $cds->has_tag('translation');
		
		if ( $aa && $psn ) {
		    $cds->remove_tag('translation');
		    my %add_seq = ();
		    $add_seq{residues} = $aa;
		    $add_seq{header} = ['seq',
					id     => $psn,
					length => length $aa,
					type   => 'aa' ];
		    
		    if ( $cds->has_tag('product_desc') ) {
			($add_seq{desc}) = $cds->get_tag_values('product_desc');
			$cds->remove_tag('product_desc');
		    }
		    
		    unless ( $add_seq{desc} && $add_seq{desc} =~ /cds_boundaries/ ) {
			my $start = $cds->start;
			my $end   = $cds->end;
			my $str   = $cds->strand;
			my $acc   = $self->{seq}->accession || $self->{seq}->display_id;
			$str = $str < 0 ? '[-]' : '';
			$add_seq{desc}  = "translation from_gene[$gid] " .
			    "cds_boundaries:(" . $acc . 
			    ":$start..$end$str) transcript_info:[$name]";
		    }
		    $self->{add_seqs} ||= [];
		    push @{$self->{add_seqs}}, \%add_seq;
		}
	    }

	    
	    $writer->startTag('feature_set', id => $name);
	    $self->_element('name', $name);
	    $self->_element('type', 'transcript');
	    $self->_render_tags($_,
				\&_render_date_tags,
				\&_render_comment_tags,
				\&_render_tags_as_properties,
			       ) for ( $mRNA, ($cds) || () );
	     
	    # any UTR's, etc associated with this transcript?
	    for my $thing ( @other_stuff ) {
		if ( $thing->has_tag('standard_name') ) {
		    my ($v)  = $thing->get_tag_values('standard_name');
		    if ( $v eq $sn ) {
			push @units, $thing;
		    }
		}
	    }
	    
	    # add the exons
	    push @units, grep { $_->primary_tag eq 'exon' } $mRNA->get_SeqFeatures;
	    @units = sort { $a->start <=> $b->start } @units;

	    my $count  = 0;
	    
	    if ( $str < 0 ) {
		@units = reverse @units;
	    }
            
	    for my $unit ( @units ) {
		if ( $unit->primary_tag eq 'exon' ) {
		    my $ename = $id;
		    $ename .= ':' . ++$count;
		    $self->_feature_span($ename, $unit);
		}
		elsif ( $unit->primary_tag eq 'start_codon' ) {
		    $self->_feature_span(($sn || $gid), $unit, $self->{curr_pname});
		}
		else {
		    my $uname = $unit->primary_tag . ":$id";
		    $self->_feature_span($uname, $unit);
		}
	    }
	    $self->{curr_pname} = '';
	    $writer->endTag('feature_set');
	}
	
	$self->{other_stuff} = \@other_stuff;
    }    
    
    $writer->endTag('annotation');

    # add the protein sequences
    for ( @{$self->{add_seqs}} ) {
	my %h = %$_;
	$writer->startTag(@{$h{header}});
	my @desc = split /\s+/, $h{desc};
	my $desc = '';
	for my $word (@desc) {
	    my ($lastline) = $desc =~ /.*^(.+)$/sm;
	    $lastline ||= '';
	    $desc .= length $lastline < 50 ? " $word " : "\n      $word ";
	}
        $self->_element('description', "\n     $desc\n    ");

	my $aa = $h{residues};
	$aa =~ s/(\w{60})/$1\n      /g;
	$aa =~ s/\n\s+$//m;
	$aa = "\n      " . $aa . "\n    ";
	$self->_element('residues', $aa);
	$writer->endTag('seq');
	$self->{add_seqs} = [];
    }
    
    # Is there anything else associated with the gene?  We have to write other
    # features as stand-alone annotations or apollo will assume they are
    # transcripts
    for my $thing ( @{$self->{other_stuff}} ) {
	next if $thing->has_tag('standard_name');
	$self->_write_feature($thing);
    }
    $self->{other_stuff} = [];
}


=head2 _check_cds

 Title   : _check_cds
 Usage   : $self->_check_cds($cds, $name)
 Function: internal method to check if the CDS associated with an mRNA is
           the correct alternative splice variant
 Returns : a Bio::SeqFeature::Generic CDS object
 Args    : the CDS object plus the transcript\'s 'standard_name'
 Note    : this method only works if alternatively spliced transcripts are bound
           together by a 'standard_name' or 'mRNA' qualifier.  If none is present, 
           we will hope that the exons were derived from a segmented RNA or a CDS 
           with no associated mRNA feature.  Neither of these two cases would be 



( run in 0.936 second using v1.01-cache-2.11-cpan-97f6503c9c8 )