BioPerl

 view release on metacpan or  search on metacpan

Bio/DB/Expression/geo.pm  view on Meta::CPAN


=cut

sub get_platforms {
  my ($self,@args) = @_;

  my $doc = $self->_get_url( URL_PLATFORMS );
  $doc =~ s!^.+?>Release date<.+?</tr>(.+)</table>!$1!gs;

  my @platforms = ();
  my @records = split m!</tr>\s+<tr>!, $doc;

  foreach my $record ( @records ) {
    my ($platform_acc,$name,$tax_acc,$contact_acc,$contact_name) =
      $record =~ m!acc\.cgi\?acc=(.+?)".+?<td.+?>(.+?)<.+?<td.+?>.+?<.+?<td.+?>.+?href=".+?id=(.+?)".+?<td.+?OpenSubmitter\((\d+?)\).+?>(.+?)<!s;
    next unless $platform_acc;

    my $platform = Bio::Expression::Platform->new(
                                                  -accession => $platform_acc,
                                                  -name => $name,
                                                  -_taxon_id => $tax_acc,

Bio/SearchIO/erpin.pm  view on Meta::CPAN

                                        $acc ? $acc : $id,
                    'Hit_def'       => $desc
                    });
            }
            $lasthit = $id;
        } elsif ( (index($line, 'FW') == 0) || (index($line, 'RC') == 0)) {
            my ($str, $hn, $pos, $score, $eval) = split ' ', $line;
            if ($minscore < $score) {
                $self->start_element({'Name' => 'Hsp'});
                
                my ($start, $end) = split m{\.\.}, $pos, 2;
                ($start, $end) = ($end, $start) if ($str eq 'RC');
                $line = $self->_readline;
                chomp $line;
                $self->element_hash({
                    'Hsp_stranded'     => 'HIT',
                    'Hsp_hit-from'     => $start,
                    'Hsp_hit-to'       => $end,
                    'Hsp_score'        => $score,
                    'Hsp_bit-score'    => $score,
                    'Hsp_evalue'       => $eval,

Bio/SeqIO/Handler/GenericRichSeqHandler.pm  view on Meta::CPAN

sub _generic_seq {
    my ($self, $data) = @_;
    $self->{'_params'}->{'-seq'} = $data->{DATA};
}

####################### RAW DATA HANDLERS #######################

# GenBank LOCUS line
sub _genbank_locus {
    my ($self, $data) = @_;
    my (@tokens) = split m{\s+}, $data->{DATA};
    my $display_id = shift @tokens;
    $self->{'_params'}->{'-display_id'} = $display_id;
    my $seqlength = shift @tokens;
    if (exists $VALID_ALPHABET{$seqlength}) {
        # moved one token too far.  No locus name?
        $self->warn("Bad LOCUS name?  Changing [".$self->{'_params'}->{'-display_id'}.
                    "] to 'unknown' and length to ".$self->{'_params'}->{'-display_id'});
        $self->{'_params'}->{'-length'} = $self->{'_params'}->{'-display_id'};
        $self->{'_params'}->{'-display_id'} = 'unknown';
        # add token back...

Bio/SeqIO/Handler/GenericRichSeqHandler.pm  view on Meta::CPAN

            $self->annotation_collection->add_Annotation($update);
        }
        push @{ $self->{'_params'}->{'-dates'} }, $date;
    }
}

# UniProt/SwissProt DT lines
sub _swiss_date {
    my ($self, $data) = @_;
    # swissprot
    my @dls = split m{\n}, $data->{DATA};
    for my $dl (@dls) {
        my ($date, $version) = split(' ', $dl, 2);
        $date =~ tr{,}{}d; # remove comma if new version    
        if ($version =~ m{\(Rel\. (\d+), Last sequence update\)} || # old
            $version =~ m{sequence version (\d+)\.}) { #new
        my $update = Bio::Annotation::SimpleValue->new(
                                    -tagname    => 'seq_update',
                                    -value      => $1
                                    );
        $self->annotation_collection->add_Annotation($update);

Bio/SeqIO/Handler/GenericRichSeqHandler.pm  view on Meta::CPAN

        push @{ $self->{'_params'}->{'-dates'} }, $date;
    }
}

# GenBank KEYWORDS line
# EMBL KW line
# UniProt/SwissProt KW line
sub _generic_keywords {
    my ($self, $data) = @_;
    $data->{DATA} =~ s{\.$}{};
    my @kw = split m{\s*\;\s*}xo ,$data->{DATA};
    $self->{'_params'}->{'-keywords'} = \@kw;
}

# GenBank DEFINITION line
# EMBL DE line
# UniProt/SwissProt DE line
sub _generic_description {
    my ($self, $data) = @_;
    $self->{'_params'}->{'-desc'} = $data->{DATA};
}

# GenBank ACCESSION line
# EMBL AC line
# UniProt/SwissProt AC line
sub _generic_accession {
    my ($self, $data) = @_;
    my @accs = split m{[\s;]+}, $data->{DATA};
    $self->{'_params'}->{'-accession_number'} = shift @accs;
    $self->{'_params'}->{'-secondary_accessions'} = \@accs if @accs;
}

####################### SPECIES HANDLERS #######################

# uses Bio::Species
# GenBank SOURCE, ORGANISM lines
# EMBL O* lines
# UniProt/SwissProt O* lines

Bio/SeqIO/Handler/GenericRichSeqHandler.pm  view on Meta::CPAN

        }
        if ($data->{CROSSREF} && $data->{CROSSREF} =~ m{NCBI_TaxID=(\d+)}) {
            $taxid = $1;
        }
    }
    
    my ($sl, $class, $sci_name) = ($data->{DATA},
                                   $data->{CLASSIFICATION},
                                   $data->{ORGANISM} || '');
    my ($organelle,$abbr_name, $common);    
    my @class = reverse split m{\s*;\s*}, $class;
    # have to treat swiss different from everything else...
    if ($sl =~ m{^(mitochondrion|chloroplast|plastid)?   # GenBank format
                \s*(.*?)
                \s*(?: \( (.*?) \) )?\.?$ 
         }xmso ){ 
        ($organelle, $abbr_name, $common) = ($1, $2, $3); # optional
    } else {
        $abbr_name = $sl;	# nothing caught; this is a backup!
    }
    # there is no 'abbreviated name' for EMBL

Bio/SeqIO/embldriver.pm  view on Meta::CPAN

    my $self = shift;
    my $hobj = $self->seqhandler;
    local($/) = "\n";
    my ($featkey, $qual, $annkey, $delim, $seqdata);
    my $lastann = '';
    my $ct = 0;
    PARSER:
    while(defined(my $line = $self->_readline)) {
        next PARSER if $line =~ m{^\s*$};
        chomp $line;
        my ($ann,$data) = split m{\s{2,3}}, $line , 2;
        next PARSER if ($ann eq 'XX' || $ann eq 'FH');
        if ($ann) {
            $data ||='';
            if ($ann eq 'FT') {
                # seqfeatures
                if ($data =~ m{^(\S+)\s+([^\n]+)}) {
                    $hobj->data_handler($seqdata) if $seqdata;
                    $seqdata = ();
                    ($seqdata->{FEATURE_KEY}, $data) = ($1, $2);
                    $seqdata->{NAME} = $ann;

Bio/SeqIO/embldriver.pm  view on Meta::CPAN

    return $hobj->build_sequence;
}

sub next_chunk {
    my $self = shift;
    my $ct = 0;
    PARSER:
    while(defined(my $line = $self->_readline)) {
        next if $line =~ m{^\s*$};
        chomp $line;
        my ($ann,$data) = split m{\s{2,3}}, $line , 2;
        $data ||= '';
        $self->debug("Ann: [$ann]\n\tData: [$data]\n");
        last PARSER if $ann =~ m{//};
    }
}

=head2 write_seq

 Title   : write_seq
 Usage   : $stream->write_seq($seq)

Bio/SeqIO/gbdriver.pm  view on Meta::CPAN

# Title   : _process_features
# Usage   : $self->_process_features($seqdata)
# Function: Process feature data chunk into usable bits
# Returns : 
# Args    : data chunk
#
#=cut

sub _process_features {
    my ($self, $seqdata) = @_;
    my @ftlines = split m{\n}, $seqdata->{DATA};
    delete $seqdata->{DATA};
    # don't deal with balancing quotes for now; just get rid of them...
    # Should we worry about checking whether these are balanced
    # for round-tripping tests?
    map { s{"}{}g } @ftlines;
    # all sfs start with the location...
    my $qual = 'LOCATION';
    my $ct = 0;
    for my $qualdata (@ftlines) {
        if ($qualdata =~ m{^/([^=]+)=?(.+)?}) {

Bio/SeqIO/kegg.pm  view on Meta::CPAN

	  $self->throw("KEGG stream with bad ENTRY line. Not KEGG in my book. Got $buffer'");

	my %FIELDS;
	my @chunks = split /\n(?=\S)/, $buffer;

	foreach my $chunk (@chunks){
		my($key) = $chunk =~ /^(\S+)/;
		$FIELDS{$key} = $chunk;
	}

	# changing to split method to get entry_ids that include
	# sequence version like Whatever.1
	my(undef,$entry_id,$entry_seqtype,$entry_species) =
	  split(' ',$FIELDS{ENTRY});

	my($name);
	if ($FIELDS{NAME}) {
          ($name) = $FIELDS{NAME} =~ /^NAME\s+(.+)$/;
	}

        my( $definition, $aa_length, $aa_seq, $nt_length, $nt_seq );



( run in 4.984 seconds using v1.01-cache-2.11-cpan-483215c6ad5 )