FAST
view release on metacpan or search on metacpan
lib/FAST/Bio/SeqIO/embldriver.pm view on Meta::CPAN
RC => 'COMMENT',
RG => 'CONSRTM',
RP => 'POSITION',
RX => 'CROSSREF',
RT => 'TITLE',
RL => 'JOURNAL',
AS => 'ASSEMBLYINFO', # Third party annotation
);
my %DELIM = (
#CC => "\n",
#DR => "\n",
#DT => "\n",
);
# signals to process what's in the hash prior to next round
# these should be changed to map secondary data
my %PRIMARY = map {$_ => 1} qw(ID AC DT DE SV KW OS RN AH DR FH CC SQ FT WGS CON ANN TPA //);
sub _initialize {
my($self,@args) = @_;
$self->SUPER::_initialize(@args);
my $handler = $self->_rearrange([qw(HANDLER)],@args);
# hash for functions for decoding keys.
$handler ? $self->seqhandler($handler) :
$self->seqhandler(FAST::Bio::SeqIO::Handler::GenericRichSeqHandler->new(
-format => 'embl',
-verbose => $self->verbose,
-builder => $self->sequence_builder
));
#
if( ! defined $self->sequence_factory ) {
$self->sequence_factory(FAST::Bio::Seq::SeqFactory->new
(-verbose => $self->verbose(),
-type => 'FAST::Bio::Seq::RichSeq'));
}
}
=head2 next_seq
Title : next_seq
Usage : $seq = $stream->next_seq()
Function: returns the next sequence in the stream
Returns : FAST::Bio::Seq object
Args :
=cut
sub next_seq {
my $self = shift;
my $hobj = $self->seqhandler;
local($/) = "\n";
my ($featkey, $qual, $annkey, $delim, $seqdata);
my $lastann = '';
my $ct = 0;
PARSER:
while(defined(my $line = $self->_readline)) {
next PARSER if $line =~ m{^\s*$};
chomp $line;
my ($ann,$data) = split m{\s{2,3}}, $line , 2;
next PARSER if ($ann eq 'XX' || $ann eq 'FH');
if ($ann) {
$data ||='';
if ($ann eq 'FT') {
# seqfeatures
if ($data =~ m{^(\S+)\s+([^\n]+)}) {
$hobj->data_handler($seqdata) if $seqdata;
$seqdata = ();
($seqdata->{FEATURE_KEY}, $data) = ($1, $2);
$seqdata->{NAME} = $ann;
$qual = 'LOCATION';
} elsif ($data =~ m{^\s+/([^=]+)=?(.+)?}) {
($qual, $data) = ($1, $2 ||'');
$ct = (exists $seqdata->{$qual}) ?
((ref($seqdata->{$qual})) ? scalar(@{ $seqdata->{$qual} }) : 1)
: 0 ;
}
$data =~ s{^\s+}{};
$data =~ tr{"}{}d; # we don't care about quotes yet...
my $delim = ($FTQUAL_NO_QUOTE{$qual}) ? '' : ' ';
if ($ct == 0) {
$seqdata->{$qual} .= ($seqdata->{$qual}) ?
$delim.$data :
$data;
} else {
if (!ref($seqdata->{$qual})) {
$seqdata->{$qual} = [$seqdata->{$qual}];
}
(exists $seqdata->{$qual}->[$ct]) ?
(($seqdata->{$qual}->[$ct]) .= $delim.$data) :
(($seqdata->{$qual}->[$ct]) .= $data);
}
} else {
# simple annotations
$data =~ s{;$}{};
last PARSER if $ann eq '//';
if ($ann ne $lastann) {
if (!$SEC{$ann} && $seqdata) {
$hobj->data_handler($seqdata);
# can't use undef here; it can lead to subtle mem leaks
$seqdata = ();
}
$annkey = (!$SEC{$ann}) ? 'DATA' : # primary data
$SEC{$ann};
$seqdata->{'NAME'} = $ann if !$SEC{$ann};
}
# toss the data for SQ lines; this needs to be done after the
# call to the data handler
next PARSER if $ann eq 'SQ';
my $delim = $DELIM{$ann} || ' ';
$seqdata->{$annkey} .= ($seqdata->{$annkey}) ?
$delim.$data : $data;
$lastann = $ann;
}
} else {
# this should only be sequence (fingers crossed!)
SEQUENCE:
while (defined ($line = $self->_readline)) {
if (index($line, '//') == 0) {
$data =~ tr{0-9 \n}{}d;
$seqdata->{DATA} = $data;
#$self->debug(Dumper($seqdata));
$hobj->data_handler($seqdata);
$seqdata = ();
last PARSER;
} else {
$data .= $line;
$line = undef;
}
}
}
}
$hobj->data_handler($seqdata) if $seqdata;
$seqdata = ();
return $hobj->build_sequence;
}
sub next_chunk {
my $self = shift;
my $ct = 0;
PARSER:
while(defined(my $line = $self->_readline)) {
next if $line =~ m{^\s*$};
chomp $line;
my ($ann,$data) = split m{\s{2,3}}, $line , 2;
$data ||= '';
$self->debug("Ann: [$ann]\n\tData: [$data]\n");
last PARSER if $ann =~ m{//};
}
}
=head2 write_seq
Title : write_seq
Usage : $stream->write_seq($seq)
Function: writes the $seq object (must be seq) to the stream
Returns : 1 for success and 0 for error
Args : array of 1 to n FAST::Bio::SeqI objects
=cut
sub write_seq {
shift->throw("Use FAST::Bio::SeqIO::embl for output");
# maybe make a Writer class as well????
}
=head2 seqhandler
Title : seqhandler
Usage : $stream->seqhandler($handler)
Function: Get/Set teh FAST::Bio::Seq::HandlerBaseI object
Returns : FAST::Bio::Seq::HandlerBaseI
Args : FAST::Bio::Seq::HandlerBaseI
=cut
sub seqhandler {
my ($self, $handler) = @_;
if ($handler) {
$self->throw("Not a FAST::Bio::HandlerBaseI") unless
ref($handler) && $handler->isa("FAST::Bio::HandlerBaseI");
$self->{'_seqhandler'} = $handler;
}
return $self->{'_seqhandler'};
}
1;
__END__
( run in 0.689 second using v1.01-cache-2.11-cpan-71847e10f99 )