view release on metacpan or search on metacpan
Bio/DB/Expression/geo.pm view on Meta::CPAN
=cut
sub get_platforms {
my ($self,@args) = @_;
my $doc = $self->_get_url( URL_PLATFORMS );
$doc =~ s!^.+?>Release date<.+?</tr>(.+)</table>!$1!gs;
my @platforms = ();
my @records = split m!</tr>\s+<tr>!, $doc;
foreach my $record ( @records ) {
my ($platform_acc,$name,$tax_acc,$contact_acc,$contact_name) =
$record =~ m!acc\.cgi\?acc=(.+?)".+?<td.+?>(.+?)<.+?<td.+?>.+?<.+?<td.+?>.+?href=".+?id=(.+?)".+?<td.+?OpenSubmitter\((\d+?)\).+?>(.+?)<!s;
next unless $platform_acc;
my $platform = Bio::Expression::Platform->new(
-accession => $platform_acc,
-name => $name,
-_taxon_id => $tax_acc,
Bio/SearchIO/erpin.pm view on Meta::CPAN
$acc ? $acc : $id,
'Hit_def' => $desc
});
}
$lasthit = $id;
} elsif ( (index($line, 'FW') == 0) || (index($line, 'RC') == 0)) {
my ($str, $hn, $pos, $score, $eval) = split ' ', $line;
if ($minscore < $score) {
$self->start_element({'Name' => 'Hsp'});
my ($start, $end) = split m{\.\.}, $pos, 2;
($start, $end) = ($end, $start) if ($str eq 'RC');
$line = $self->_readline;
chomp $line;
$self->element_hash({
'Hsp_stranded' => 'HIT',
'Hsp_hit-from' => $start,
'Hsp_hit-to' => $end,
'Hsp_score' => $score,
'Hsp_bit-score' => $score,
'Hsp_evalue' => $eval,
Bio/SeqIO/Handler/GenericRichSeqHandler.pm view on Meta::CPAN
sub _generic_seq {
my ($self, $data) = @_;
$self->{'_params'}->{'-seq'} = $data->{DATA};
}
####################### RAW DATA HANDLERS #######################
# GenBank LOCUS line
sub _genbank_locus {
my ($self, $data) = @_;
my (@tokens) = split m{\s+}, $data->{DATA};
my $display_id = shift @tokens;
$self->{'_params'}->{'-display_id'} = $display_id;
my $seqlength = shift @tokens;
if (exists $VALID_ALPHABET{$seqlength}) {
# moved one token too far. No locus name?
$self->warn("Bad LOCUS name? Changing [".$self->{'_params'}->{'-display_id'}.
"] to 'unknown' and length to ".$self->{'_params'}->{'-display_id'});
$self->{'_params'}->{'-length'} = $self->{'_params'}->{'-display_id'};
$self->{'_params'}->{'-display_id'} = 'unknown';
# add token back...
Bio/SeqIO/Handler/GenericRichSeqHandler.pm view on Meta::CPAN
$self->annotation_collection->add_Annotation($update);
}
push @{ $self->{'_params'}->{'-dates'} }, $date;
}
}
# UniProt/SwissProt DT lines
sub _swiss_date {
my ($self, $data) = @_;
# swissprot
my @dls = split m{\n}, $data->{DATA};
for my $dl (@dls) {
my ($date, $version) = split(' ', $dl, 2);
$date =~ tr{,}{}d; # remove comma if new version
if ($version =~ m{\(Rel\. (\d+), Last sequence update\)} || # old
$version =~ m{sequence version (\d+)\.}) { #new
my $update = Bio::Annotation::SimpleValue->new(
-tagname => 'seq_update',
-value => $1
);
$self->annotation_collection->add_Annotation($update);
Bio/SeqIO/Handler/GenericRichSeqHandler.pm view on Meta::CPAN
push @{ $self->{'_params'}->{'-dates'} }, $date;
}
}
# GenBank KEYWORDS line
# EMBL KW line
# UniProt/SwissProt KW line
sub _generic_keywords {
my ($self, $data) = @_;
$data->{DATA} =~ s{\.$}{};
my @kw = split m{\s*\;\s*}xo ,$data->{DATA};
$self->{'_params'}->{'-keywords'} = \@kw;
}
# GenBank DEFINITION line
# EMBL DE line
# UniProt/SwissProt DE line
sub _generic_description {
my ($self, $data) = @_;
$self->{'_params'}->{'-desc'} = $data->{DATA};
}
# GenBank ACCESSION line
# EMBL AC line
# UniProt/SwissProt AC line
sub _generic_accession {
my ($self, $data) = @_;
my @accs = split m{[\s;]+}, $data->{DATA};
$self->{'_params'}->{'-accession_number'} = shift @accs;
$self->{'_params'}->{'-secondary_accessions'} = \@accs if @accs;
}
####################### SPECIES HANDLERS #######################
# uses Bio::Species
# GenBank SOURCE, ORGANISM lines
# EMBL O* lines
# UniProt/SwissProt O* lines
Bio/SeqIO/Handler/GenericRichSeqHandler.pm view on Meta::CPAN
}
if ($data->{CROSSREF} && $data->{CROSSREF} =~ m{NCBI_TaxID=(\d+)}) {
$taxid = $1;
}
}
my ($sl, $class, $sci_name) = ($data->{DATA},
$data->{CLASSIFICATION},
$data->{ORGANISM} || '');
my ($organelle,$abbr_name, $common);
my @class = reverse split m{\s*;\s*}, $class;
# have to treat swiss different from everything else...
if ($sl =~ m{^(mitochondrion|chloroplast|plastid)? # GenBank format
\s*(.*?)
\s*(?: \( (.*?) \) )?\.?$
}xmso ){
($organelle, $abbr_name, $common) = ($1, $2, $3); # optional
} else {
$abbr_name = $sl; # nothing caught; this is a backup!
}
# there is no 'abbreviated name' for EMBL
Bio/SeqIO/embldriver.pm view on Meta::CPAN
my $self = shift;
my $hobj = $self->seqhandler;
local($/) = "\n";
my ($featkey, $qual, $annkey, $delim, $seqdata);
my $lastann = '';
my $ct = 0;
PARSER:
while(defined(my $line = $self->_readline)) {
next PARSER if $line =~ m{^\s*$};
chomp $line;
my ($ann,$data) = split m{\s{2,3}}, $line , 2;
next PARSER if ($ann eq 'XX' || $ann eq 'FH');
if ($ann) {
$data ||='';
if ($ann eq 'FT') {
# seqfeatures
if ($data =~ m{^(\S+)\s+([^\n]+)}) {
$hobj->data_handler($seqdata) if $seqdata;
$seqdata = ();
($seqdata->{FEATURE_KEY}, $data) = ($1, $2);
$seqdata->{NAME} = $ann;
Bio/SeqIO/embldriver.pm view on Meta::CPAN
return $hobj->build_sequence;
}
sub next_chunk {
my $self = shift;
my $ct = 0;
PARSER:
while(defined(my $line = $self->_readline)) {
next if $line =~ m{^\s*$};
chomp $line;
my ($ann,$data) = split m{\s{2,3}}, $line , 2;
$data ||= '';
$self->debug("Ann: [$ann]\n\tData: [$data]\n");
last PARSER if $ann =~ m{//};
}
}
=head2 write_seq
Title : write_seq
Usage : $stream->write_seq($seq)
Bio/SeqIO/gbdriver.pm view on Meta::CPAN
# Title : _process_features
# Usage : $self->_process_features($seqdata)
# Function: Process feature data chunk into usable bits
# Returns :
# Args : data chunk
#
#=cut
sub _process_features {
my ($self, $seqdata) = @_;
my @ftlines = split m{\n}, $seqdata->{DATA};
delete $seqdata->{DATA};
# don't deal with balancing quotes for now; just get rid of them...
# Should we worry about checking whether these are balanced
# for round-tripping tests?
map { s{"}{}g } @ftlines;
# all sfs start with the location...
my $qual = 'LOCATION';
my $ct = 0;
for my $qualdata (@ftlines) {
if ($qualdata =~ m{^/([^=]+)=?(.+)?}) {
Bio/SeqIO/kegg.pm view on Meta::CPAN
$self->throw("KEGG stream with bad ENTRY line. Not KEGG in my book. Got $buffer'");
my %FIELDS;
my @chunks = split /\n(?=\S)/, $buffer;
foreach my $chunk (@chunks){
my($key) = $chunk =~ /^(\S+)/;
$FIELDS{$key} = $chunk;
}
# changing to split method to get entry_ids that include
# sequence version like Whatever.1
my(undef,$entry_id,$entry_seqtype,$entry_species) =
split(' ',$FIELDS{ENTRY});
my($name);
if ($FIELDS{NAME}) {
($name) = $FIELDS{NAME} =~ /^NAME\s+(.+)$/;
}
my( $definition, $aa_length, $aa_seq, $nt_length, $nt_seq );