BioPerl

 view release on metacpan or  search on metacpan

Bio/SeqIO/kegg.pm  view on Meta::CPAN

use Bio::Annotation::DBLink;

use base qw(Bio::SeqIO);

sub _initialize {
	my($self,@args) = @_;

	$self->SUPER::_initialize(@args);
	# hash for functions for decoding keys.
	$self->{'_func_ftunit_hash'} = {};
	if( ! defined $self->sequence_factory ) {
		$self->sequence_factory(Bio::Seq::SeqFactory->new
										(-verbose => $self->verbose(),
										 -type => 'Bio::Seq::RichSeq'));
	}
}

=head2 next_seq

 Title   : next_seq
 Usage   : $seq = $stream->next_seq()
 Function: returns the next sequence in the stream
 Returns : Bio::Seq::RichSeq object
 Args    :

=cut

sub next_seq {
	my ($self,@args) = @_;
	my $builder = $self->sequence_builder();
	my $seq;
	my %params;

	my $buffer;
	my (@acc, @features);
	my ($display_id, $annotation);
	my $species;

	# initialize; we may come here because of starting over
	@features = ();
	$annotation = undef;
	@acc = ();
	$species = undef;
	%params = (-verbose => $self->verbose); # reset hash
	local($/) = "///\n";

	$buffer = $self->_readline();

	return if( !defined $buffer ); # end of file
	$buffer =~ /^ENTRY/ ||
	  $self->throw("KEGG stream with bad ENTRY line. Not KEGG in my book. Got $buffer'");

	my %FIELDS;
	my @chunks = split /\n(?=\S)/, $buffer;

	foreach my $chunk (@chunks){
		my($key) = $chunk =~ /^(\S+)/;
		$FIELDS{$key} = $chunk;
	}

	# changing to split method to get entry_ids that include
	# sequence version like Whatever.1
	my(undef,$entry_id,$entry_seqtype,$entry_species) =
	  split(' ',$FIELDS{ENTRY});

	my($name);
	if ($FIELDS{NAME}) {
          ($name) = $FIELDS{NAME} =~ /^NAME\s+(.+)$/;
	}

        my( $definition, $aa_length, $aa_seq, $nt_length, $nt_seq );

        if(( exists $FIELDS{DEFINITION} ) and ( $FIELDS{DEFINITION} =~ /^DEFINITION/ )) {
          ($definition) = $FIELDS{DEFINITION} =~ /^DEFINITION\s+(.+)$/s;
          $definition =~ s/\s+/ /gs;
        }
        if(( exists $FIELDS{AASEQ} ) and ( $FIELDS{AASEQ} =~ /^AASEQ/ )) {
          ($aa_length,$aa_seq) = $FIELDS{AASEQ} =~ /^AASEQ\s+(\d+)\n(.+)$/s;
          $aa_seq =~ s/\s+//g;
        }
        if(( exists  $FIELDS{NTSEQ} ) and ( $FIELDS{NTSEQ} =~ /^NTSEQ/ )) {
          ($nt_length,$nt_seq) = $FIELDS{NTSEQ} =~ /^NTSEQ\s+(\d+)\n(.+)$/s;
          $nt_seq =~ s/\s+//g;
        }

	$annotation = Bio::Annotation::Collection->new();

	$annotation->add_Annotation('description',
						Bio::Annotation::Comment->new(-text => $definition));

	$annotation->add_Annotation('aa_seq',
						Bio::Annotation::Comment->new(-text => $aa_seq));

	my($ortholog_db,$ortholog_id,$ortholog_desc);
	if ($FIELDS{ORTHOLOG}) {
		($ortholog_db,$ortholog_id,$ortholog_desc) = $FIELDS{ORTHOLOG}
		  =~ /^ORTHOLOG\s+(\S+):\s+(\S+)\s+(.*?)$/;

        $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
                     -database => $ortholog_db,
                     -primary_id => $ortholog_id,
                     -comment => $ortholog_desc) );
  }

  if($FIELDS{MOTIF}){
     $FIELDS{MOTIF} =~ s/^MOTIF\s+//;
     while($FIELDS{MOTIF} =~/\s*?(\S+):\s+(.+?)$/mg){
         my $db = $1;
         my $ids = $2;
         foreach my $id (split(/\s+/, $ids)){

     $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
              -database =>$db,
              -primary_id => $id,
              -comment => "")   );
        }
     }
  }

  if($FIELDS{PATHWAY}) {
     $FIELDS{PATHWAY} =~ s/^PATHWAY\s+//;



( run in 0.405 second using v1.01-cache-2.11-cpan-71847e10f99 )