BioPerl-Run
view release on metacpan or search on metacpan
lib/Bio/DB/SoapEUtilities/FetchAdaptor/seq.pm view on Meta::CPAN
}
# features
my $feats;
if ($self->builder->want_slot('features')) {
$feats = _read_features($stem,$som,$self->locfac,$get);
$self->builder->add_slot_value(
-features => $feats
);
}
# organism data
if ( $self->builder->want_slot('species') && $get_tl->('source') ) {
my $sp = _read_species($get);
if ($sp && !$sp->ncbi_taxid) {
my ($src) = grep { $_->primary_tag eq 'source' } @$feats;
if ($src) {
foreach my $val ($src->get_tag_values('db_xref')) {
$sp->ncbi_taxid(substr($val,6)) if index($val,"taxon:") == 0;
}
}
}
$self->builder->add_slot_value( -species => $sp );
}
}
else {
$self->throw("FetchAdaptor::seq : unrecognized result elt type '$t', can't parse");
}
($self->{_idx})++;
return $self->builder->make_object;
}
# mostly ripped from Bio::SeqIO::genbank...
sub _read_species {
my ($get) = @_;
my @unkn_names = ('other', 'unknown organism', 'not specified', 'not shown',
'Unspecified', 'Unknown', 'None', 'unclassified',
'unidentified organism', 'not supplied');
# dictionary of synonyms for taxid 32644
my @unkn_genus = ('unknown','unclassified','uncultured','unidentified');
# all above can be part of valid species name
my( $sub_species, $species, $genus, $sci_name, $common,
$abbr_name, $organelle);
$sci_name = $get->('organism') || return;
# parse out organelle, common name, abbreviated name if present;
# this should catch everything, but falls back to
# entire GBSeq_taxonomy element just in case
if ($get->('source') =~ m{^
(mitochondrion|chloroplast|plastid)?
\s*(.*?)
\s*(?: \( (.*?) \) )?\.?
$}xms ) {
($organelle, $abbr_name, $common) = ($1, $2, $3); # optional
} else {
$abbr_name = $get->('source'); # nothing caught; this is a backup!
}
# Convert data in classification lines into classification array.
my @class = split(/; /, $get->('taxonomy'));
# do we have a genus?
my $possible_genus = quotemeta($class[-1])
. ($class[-2] ? "|" . quotemeta($class[-2]) : '');
if ($sci_name =~ /^($possible_genus)/) {
$genus = $1;
($species) = $sci_name =~ /^$genus\s+(.+)/;
}
else {
$species = $sci_name;
}
# is this organism of rank species or is it lower?
# (we don't catch everything lower than species, but it doesn't matter -
# this is just so we abide by previous behaviour whilst not calling a
# species a subspecies)
if ($species && $species =~ /subsp\.|var\./) {
($species, $sub_species) = $species =~ /(.+)\s+((?:subsp\.|var\.).+)/;
}
# Don't make a species object if it's empty or "Unknown" or "None"
# return unless $genus and $genus !~ /^(Unknown|None)$/oi;
# Don't make a species object if it belongs to taxid 32644
my $src = $get->('source');
return unless ($species || $genus) and
!grep { $_ eq $src } @unkn_names;
# Bio::Species array needs array in Species -> Kingdom direction
push(@class, $sci_name);
@class = reverse @class;
my $make = Bio::Species->new();
$make->scientific_name($sci_name);
$make->classification(@class) if @class > 0;
$make->common_name( $common ) if $common;
$make->name('abbreviated', $abbr_name) if $abbr_name;
$make->organelle($organelle) if $organelle;
return $make;
}
sub next_seq { shift->next_obj }
sub _read_references {
my ($stem, $som) = @_;
my @ret;
for ( my $i = 1; $som->valueof($stem."/GBSeq_references/[$i]"); $i++ ) {
my $get = sub {
$som->valueof($stem."/GBSeq_references/[$i]/GBReference_".shift )
};
my %params;
$params{'-title'} = $get->('title');
$params{'-pubmed'} = $get->('pubmed');
$params{'-medline'} = $get->('pubmed');
$params{'-journal'} = $get->('journal');
$params{'-comment'} = $get->('remark');
( run in 1.462 second using v1.01-cache-2.11-cpan-39bf76dae61 )