BioPerl-Run

 view release on metacpan or  search on metacpan

lib/Bio/DB/SoapEUtilities/FetchAdaptor/species.pm  view on Meta::CPAN

    $self->SUPER::_initialize(@args);
#    my ($builder, $seqfac ) = $self->_rearrange( [qw(SEQBUILDER
#                                                     SEQFACTORY)], @args );
    $self->{'_obj_class'} = 'Bio::Species' ; 
    $self->{'_idx'} = 1;
    1;
}

sub rewind { shift->{'_idx'} = 1 }

sub obj_class { shift->{'_obj_class'} }

sub next_species { shift->next_obj }

sub next_obj {
    my $self = shift;
    my $stem = "//TaxaSet/[".$self->{'_idx'}."]";
#    my $stem = "//Body/".$self->result->result_type."/[".$self->{'_idx'}."]";
    my $som = $self->result->som;

    return unless defined $som->valueof($stem);
    my $get = sub { $som->valueof("$stem/".shift) };
    my $toplev = $som->valueof("$stem");
    my $get_tl = sub { $toplev->{ shift @_ } };
    my $sp = _read_species($get_tl);
    $self->warn("FetchAdaptor::species - parse error, no Bio::Species returned") unless $sp;
    ($self->{_idx})++;
    return $sp;
}
1;

# mostly ripped from Bio::SeqIO::genbank...

sub _read_species {
    my ($get) = @_;
    
    my @unkn_names = ('other', 'unknown organism', 'not specified', 'not shown',
		      'Unspecified', 'Unknown', 'None', 'unclassified',
		      'unidentified organism', 'not supplied');
    # dictionary of synonyms for taxid 32644
    my @unkn_genus = ('unknown','unclassified','uncultured','unidentified');
    # all above can be part of valid species name

    my( $sub_species, $species, $genus, $sci_name, $common, 
         $abbr_name, $organelle);

    $sci_name = $get->('ScientificName') || return;

# no "source" elt like gb format./maj

    # parse out organelle, common name, abbreviated name if present;
    # this should catch everything, but falls back to
    # entire GBSeq_taxonomy element just in case
#     if ($get->('source') =~ m{^
# 		              (mitochondrion|chloroplast|plastid)?
# 		              \s*(.*?)
# 		              \s*(?: \( (.*?) \) )?\.?
# 		              $}xms ) { 
#         ($organelle, $abbr_name, $common) = ($1, $2, $3); # optional
#     } else {
#         $abbr_name = $get->('source'); # nothing caught; this is a backup!
#     }

#     # Convert data in classification lines into classification array.
    my @class = split(/; /, $get->('Lineage'));

    # do we have a genus?
    my $possible_genus =  quotemeta($class[-1])
       . ($class[-2] ? "|" . quotemeta($class[-2]) : '');
    if ($sci_name =~ /^($possible_genus)/) {
	$genus = $1;
	($species) = $sci_name =~ /^$genus\s+(.+)/;
    }
    else {
	$species = $sci_name;
    }

    # is this organism of rank species or is it lower?
    # (we don't catch everything lower than species, but it doesn't matter -
    # this is just so we abide by previous behaviour whilst not calling a
    # species a subspecies)
    if ($species && $species =~ /subsp\.|var\./) {
	($species, $sub_species) = $species =~ /(.+)\s+((?:subsp\.|var\.).+)/;
    }

    # Don't make a species object if it's empty or "Unknown" or "None"
    # return unless $genus and  $genus !~ /^(Unknown|None)$/oi;
    # Don't make a species object if it belongs to taxid 32644
    my $src = $get->('ScientificName');
    return unless ($species || $genus) and 
	!grep { $_ eq $src } @unkn_names;

    # Bio::Species array needs array in Species -> Kingdom direction
    push(@class, $sci_name);
    @class = reverse @class;

    my $make = Bio::Species->new();
    $make->scientific_name($sci_name);
    $make->classification(@class) if @class > 0;
    $make->common_name( $get->('CommonName'));
    $make->name('abbreviated', $abbr_name) if $abbr_name;
    $make->organelle($organelle) if $organelle;
    $make->ncbi_taxid( $get->('TaxId') );
    $make->division( $get->('Division') );
    return $make;
}

1;



( run in 3.838 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )