FAST

 view release on metacpan or  search on metacpan

lib/FAST/Bio/SeqIO/embldriver.pm  view on Meta::CPAN

    RC      => 'COMMENT',
    RG      => 'CONSRTM',
    RP      => 'POSITION',
    RX      => 'CROSSREF',
    RT      => 'TITLE',
    RL      => 'JOURNAL',
    AS      => 'ASSEMBLYINFO',  # Third party annotation    
    );

my %DELIM = (
    #CC      => "\n",
    #DR      => "\n",
    #DT      => "\n",
            );

# signals to process what's in the hash prior to next round
# these should be changed to map secondary data
my %PRIMARY = map {$_ => 1} qw(ID AC DT DE SV KW OS RN AH DR FH CC SQ FT WGS CON ANN TPA //);

sub _initialize {
    my($self,@args) = @_;

    $self->SUPER::_initialize(@args);
    my $handler = $self->_rearrange([qw(HANDLER)],@args);
    # hash for functions for decoding keys.
    $handler ? $self->seqhandler($handler) :
    $self->seqhandler(FAST::Bio::SeqIO::Handler::GenericRichSeqHandler->new(
                    -format => 'embl',
                    -verbose => $self->verbose,
                    -builder => $self->sequence_builder
                    ));
    #
    if( ! defined $self->sequence_factory ) {
        $self->sequence_factory(FAST::Bio::Seq::SeqFactory->new
                (-verbose => $self->verbose(),
                 -type => 'FAST::Bio::Seq::RichSeq'));
    }
}

=head2 next_seq

 Title   : next_seq
 Usage   : $seq = $stream->next_seq()
 Function: returns the next sequence in the stream
 Returns : FAST::Bio::Seq object
 Args    :

=cut

sub next_seq {
    my $self = shift;
    my $hobj = $self->seqhandler;
    local($/) = "\n";
    my ($featkey, $qual, $annkey, $delim, $seqdata);
    my $lastann = '';
    my $ct = 0;
    PARSER:
    while(defined(my $line = $self->_readline)) {
        next PARSER if $line =~ m{^\s*$};
        chomp $line;
        my ($ann,$data) = split m{\s{2,3}}, $line , 2;
        next PARSER if ($ann eq 'XX' || $ann eq 'FH');
        if ($ann) {
            $data ||='';
            if ($ann eq 'FT') {
                # seqfeatures
                if ($data =~ m{^(\S+)\s+([^\n]+)}) {
                    $hobj->data_handler($seqdata) if $seqdata;
                    $seqdata = ();
                    ($seqdata->{FEATURE_KEY}, $data) = ($1, $2);
                    $seqdata->{NAME} = $ann;
                    $qual = 'LOCATION';
                } elsif ($data =~ m{^\s+/([^=]+)=?(.+)?}) {
                    ($qual, $data) = ($1, $2 ||'');
                    $ct = (exists $seqdata->{$qual}) ? 
                        ((ref($seqdata->{$qual}))  ? scalar(@{ $seqdata->{$qual} }) : 1)
                        : 0 ;
                }
                $data =~ s{^\s+}{};
                $data =~ tr{"}{}d; # we don't care about quotes yet...
                my $delim = ($FTQUAL_NO_QUOTE{$qual}) ? '' : ' ';
                if ($ct == 0) {
                    $seqdata->{$qual} .= ($seqdata->{$qual}) ?
                        $delim.$data :
                        $data;
                } else {
                    if (!ref($seqdata->{$qual})) {
                        $seqdata->{$qual} = [$seqdata->{$qual}];
                    }
                    (exists $seqdata->{$qual}->[$ct]) ?
                        (($seqdata->{$qual}->[$ct]) .= $delim.$data) :
                        (($seqdata->{$qual}->[$ct]) .= $data);
                }
            } else {
                # simple annotations
                $data =~ s{;$}{};
                last PARSER if $ann eq '//';
                if ($ann ne $lastann) {
                    if (!$SEC{$ann} && $seqdata) {
                        $hobj->data_handler($seqdata);
                        # can't use undef here; it can lead to subtle mem leaks
                        $seqdata = ();
                    }
                    $annkey = (!$SEC{$ann})    ? 'DATA'     : # primary data
                              $SEC{$ann};
                    $seqdata->{'NAME'} = $ann if !$SEC{$ann};
                }
                
                # toss the data for SQ lines; this needs to be done after the
                # call to the data handler
                
                next PARSER if $ann eq 'SQ';
                my $delim = $DELIM{$ann} || ' ';
                $seqdata->{$annkey} .= ($seqdata->{$annkey}) ?
                    $delim.$data : $data;
                $lastann = $ann;
            } 
        } else {
            # this should only be sequence (fingers crossed!)
            SEQUENCE:
            while (defined ($line = $self->_readline)) {
                if (index($line, '//') == 0) {
                    $data =~ tr{0-9 \n}{}d;
                    $seqdata->{DATA} = $data;
                    #$self->debug(Dumper($seqdata));
                    $hobj->data_handler($seqdata);
                    $seqdata = ();
                    last PARSER;
                } else {                        
                    $data .= $line;
                    $line = undef;
                }
            }
        }
    }
    $hobj->data_handler($seqdata) if $seqdata;
    $seqdata = ();
    return $hobj->build_sequence;
}

sub next_chunk {
    my $self = shift;
    my $ct = 0;
    PARSER:
    while(defined(my $line = $self->_readline)) {
        next if $line =~ m{^\s*$};
        chomp $line;
        my ($ann,$data) = split m{\s{2,3}}, $line , 2;
        $data ||= '';
        $self->debug("Ann: [$ann]\n\tData: [$data]\n");
        last PARSER if $ann =~ m{//};
    }
}

=head2 write_seq

 Title   : write_seq
 Usage   : $stream->write_seq($seq)
 Function: writes the $seq object (must be seq) to the stream
 Returns : 1 for success and 0 for error
 Args    : array of 1 to n FAST::Bio::SeqI objects

=cut

sub write_seq {
    shift->throw("Use FAST::Bio::SeqIO::embl for output");
    # maybe make a Writer class as well????
}

=head2 seqhandler

 Title   : seqhandler
 Usage   : $stream->seqhandler($handler)
 Function: Get/Set teh FAST::Bio::Seq::HandlerBaseI object
 Returns : FAST::Bio::Seq::HandlerBaseI 
 Args    : FAST::Bio::Seq::HandlerBaseI 

=cut

sub seqhandler {
    my ($self, $handler) = @_;
    if ($handler) {
        $self->throw("Not a FAST::Bio::HandlerBaseI") unless
        ref($handler) && $handler->isa("FAST::Bio::HandlerBaseI");
        $self->{'_seqhandler'} = $handler;
    }
    return $self->{'_seqhandler'};
}

1;

__END__



( run in 0.689 second using v1.01-cache-2.11-cpan-71847e10f99 )