BioPerl
view release on metacpan or search on metacpan
Bio/SeqIO/kegg.pm view on Meta::CPAN
use Bio::Annotation::DBLink;
use base qw(Bio::SeqIO);
sub _initialize {
my($self,@args) = @_;
$self->SUPER::_initialize(@args);
# hash for functions for decoding keys.
$self->{'_func_ftunit_hash'} = {};
if( ! defined $self->sequence_factory ) {
$self->sequence_factory(Bio::Seq::SeqFactory->new
(-verbose => $self->verbose(),
-type => 'Bio::Seq::RichSeq'));
}
}
=head2 next_seq
Title : next_seq
Usage : $seq = $stream->next_seq()
Function: returns the next sequence in the stream
Returns : Bio::Seq::RichSeq object
Args :
=cut
sub next_seq {
my ($self,@args) = @_;
my $builder = $self->sequence_builder();
my $seq;
my %params;
my $buffer;
my (@acc, @features);
my ($display_id, $annotation);
my $species;
# initialize; we may come here because of starting over
@features = ();
$annotation = undef;
@acc = ();
$species = undef;
%params = (-verbose => $self->verbose); # reset hash
local($/) = "///\n";
$buffer = $self->_readline();
return if( !defined $buffer ); # end of file
$buffer =~ /^ENTRY/ ||
$self->throw("KEGG stream with bad ENTRY line. Not KEGG in my book. Got $buffer'");
my %FIELDS;
my @chunks = split /\n(?=\S)/, $buffer;
foreach my $chunk (@chunks){
my($key) = $chunk =~ /^(\S+)/;
$FIELDS{$key} = $chunk;
}
# changing to split method to get entry_ids that include
# sequence version like Whatever.1
my(undef,$entry_id,$entry_seqtype,$entry_species) =
split(' ',$FIELDS{ENTRY});
my($name);
if ($FIELDS{NAME}) {
($name) = $FIELDS{NAME} =~ /^NAME\s+(.+)$/;
}
my( $definition, $aa_length, $aa_seq, $nt_length, $nt_seq );
if(( exists $FIELDS{DEFINITION} ) and ( $FIELDS{DEFINITION} =~ /^DEFINITION/ )) {
($definition) = $FIELDS{DEFINITION} =~ /^DEFINITION\s+(.+)$/s;
$definition =~ s/\s+/ /gs;
}
if(( exists $FIELDS{AASEQ} ) and ( $FIELDS{AASEQ} =~ /^AASEQ/ )) {
($aa_length,$aa_seq) = $FIELDS{AASEQ} =~ /^AASEQ\s+(\d+)\n(.+)$/s;
$aa_seq =~ s/\s+//g;
}
if(( exists $FIELDS{NTSEQ} ) and ( $FIELDS{NTSEQ} =~ /^NTSEQ/ )) {
($nt_length,$nt_seq) = $FIELDS{NTSEQ} =~ /^NTSEQ\s+(\d+)\n(.+)$/s;
$nt_seq =~ s/\s+//g;
}
$annotation = Bio::Annotation::Collection->new();
$annotation->add_Annotation('description',
Bio::Annotation::Comment->new(-text => $definition));
$annotation->add_Annotation('aa_seq',
Bio::Annotation::Comment->new(-text => $aa_seq));
my($ortholog_db,$ortholog_id,$ortholog_desc);
if ($FIELDS{ORTHOLOG}) {
($ortholog_db,$ortholog_id,$ortholog_desc) = $FIELDS{ORTHOLOG}
=~ /^ORTHOLOG\s+(\S+):\s+(\S+)\s+(.*?)$/;
$annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
-database => $ortholog_db,
-primary_id => $ortholog_id,
-comment => $ortholog_desc) );
}
if($FIELDS{MOTIF}){
$FIELDS{MOTIF} =~ s/^MOTIF\s+//;
while($FIELDS{MOTIF} =~/\s*?(\S+):\s+(.+?)$/mg){
my $db = $1;
my $ids = $2;
foreach my $id (split(/\s+/, $ids)){
$annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
-database =>$db,
-primary_id => $id,
-comment => "") );
}
}
}
if($FIELDS{PATHWAY}) {
$FIELDS{PATHWAY} =~ s/^PATHWAY\s+//;
( run in 0.405 second using v1.01-cache-2.11-cpan-71847e10f99 )