BioPerl-Run
view release on metacpan or search on metacpan
lib/Bio/DB/SoapEUtilities/FetchAdaptor/species.pm view on Meta::CPAN
$self->SUPER::_initialize(@args);
# my ($builder, $seqfac ) = $self->_rearrange( [qw(SEQBUILDER
# SEQFACTORY)], @args );
$self->{'_obj_class'} = 'Bio::Species' ;
$self->{'_idx'} = 1;
1;
}
sub rewind { shift->{'_idx'} = 1 }
sub obj_class { shift->{'_obj_class'} }
sub next_species { shift->next_obj }
sub next_obj {
my $self = shift;
my $stem = "//TaxaSet/[".$self->{'_idx'}."]";
# my $stem = "//Body/".$self->result->result_type."/[".$self->{'_idx'}."]";
my $som = $self->result->som;
return unless defined $som->valueof($stem);
my $get = sub { $som->valueof("$stem/".shift) };
my $toplev = $som->valueof("$stem");
my $get_tl = sub { $toplev->{ shift @_ } };
my $sp = _read_species($get_tl);
$self->warn("FetchAdaptor::species - parse error, no Bio::Species returned") unless $sp;
($self->{_idx})++;
return $sp;
}
1;
# mostly ripped from Bio::SeqIO::genbank...
sub _read_species {
my ($get) = @_;
my @unkn_names = ('other', 'unknown organism', 'not specified', 'not shown',
'Unspecified', 'Unknown', 'None', 'unclassified',
'unidentified organism', 'not supplied');
# dictionary of synonyms for taxid 32644
my @unkn_genus = ('unknown','unclassified','uncultured','unidentified');
# all above can be part of valid species name
my( $sub_species, $species, $genus, $sci_name, $common,
$abbr_name, $organelle);
$sci_name = $get->('ScientificName') || return;
# no "source" elt like gb format./maj
# parse out organelle, common name, abbreviated name if present;
# this should catch everything, but falls back to
# entire GBSeq_taxonomy element just in case
# if ($get->('source') =~ m{^
# (mitochondrion|chloroplast|plastid)?
# \s*(.*?)
# \s*(?: \( (.*?) \) )?\.?
# $}xms ) {
# ($organelle, $abbr_name, $common) = ($1, $2, $3); # optional
# } else {
# $abbr_name = $get->('source'); # nothing caught; this is a backup!
# }
# # Convert data in classification lines into classification array.
my @class = split(/; /, $get->('Lineage'));
# do we have a genus?
my $possible_genus = quotemeta($class[-1])
. ($class[-2] ? "|" . quotemeta($class[-2]) : '');
if ($sci_name =~ /^($possible_genus)/) {
$genus = $1;
($species) = $sci_name =~ /^$genus\s+(.+)/;
}
else {
$species = $sci_name;
}
# is this organism of rank species or is it lower?
# (we don't catch everything lower than species, but it doesn't matter -
# this is just so we abide by previous behaviour whilst not calling a
# species a subspecies)
if ($species && $species =~ /subsp\.|var\./) {
($species, $sub_species) = $species =~ /(.+)\s+((?:subsp\.|var\.).+)/;
}
# Don't make a species object if it's empty or "Unknown" or "None"
# return unless $genus and $genus !~ /^(Unknown|None)$/oi;
# Don't make a species object if it belongs to taxid 32644
my $src = $get->('ScientificName');
return unless ($species || $genus) and
!grep { $_ eq $src } @unkn_names;
# Bio::Species array needs array in Species -> Kingdom direction
push(@class, $sci_name);
@class = reverse @class;
my $make = Bio::Species->new();
$make->scientific_name($sci_name);
$make->classification(@class) if @class > 0;
$make->common_name( $get->('CommonName'));
$make->name('abbreviated', $abbr_name) if $abbr_name;
$make->organelle($organelle) if $organelle;
$make->ncbi_taxid( $get->('TaxId') );
$make->division( $get->('Division') );
return $make;
}
1;
( run in 3.838 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )