BioPerl-Run

 view release on metacpan or  search on metacpan

lib/Bio/DB/SoapEUtilities/FetchAdaptor/seq.pm  view on Meta::CPAN

	}

	# features
	my $feats;
	if ($self->builder->want_slot('features')) {
	    $feats = _read_features($stem,$som,$self->locfac,$get);
	    $self->builder->add_slot_value(
		-features => $feats
		);
	}
	
	# organism data
	if ( $self->builder->want_slot('species') && $get_tl->('source') ) {
	    my $sp = _read_species($get);
	    if ($sp && !$sp->ncbi_taxid) {
		my ($src) = grep { $_->primary_tag eq 'source' } @$feats;
		if ($src) {
		    foreach my $val ($src->get_tag_values('db_xref')) {
			$sp->ncbi_taxid(substr($val,6)) if index($val,"taxon:") == 0;
		    }
		}
	    }
	    $self->builder->add_slot_value( -species => $sp );
	}
    }
    else {
	$self->throw("FetchAdaptor::seq : unrecognized result elt type '$t', can't parse");
    }
    
    ($self->{_idx})++;
    return $self->builder->make_object;
}

# mostly ripped from Bio::SeqIO::genbank...

sub _read_species {
    my ($get) = @_;
    
    my @unkn_names = ('other', 'unknown organism', 'not specified', 'not shown',
		      'Unspecified', 'Unknown', 'None', 'unclassified',
		      'unidentified organism', 'not supplied');
    # dictionary of synonyms for taxid 32644
    my @unkn_genus = ('unknown','unclassified','uncultured','unidentified');
    # all above can be part of valid species name

    my( $sub_species, $species, $genus, $sci_name, $common, 
         $abbr_name, $organelle);

    $sci_name = $get->('organism') || return;

    # parse out organelle, common name, abbreviated name if present;
    # this should catch everything, but falls back to
    # entire GBSeq_taxonomy element just in case
    if ($get->('source') =~ m{^
		              (mitochondrion|chloroplast|plastid)?
		              \s*(.*?)
		              \s*(?: \( (.*?) \) )?\.?
		              $}xms ) { 
        ($organelle, $abbr_name, $common) = ($1, $2, $3); # optional
    } else {
        $abbr_name = $get->('source'); # nothing caught; this is a backup!
    }

    # Convert data in classification lines into classification array.
    my @class = split(/; /, $get->('taxonomy'));

    # do we have a genus?
    my $possible_genus =  quotemeta($class[-1])
       . ($class[-2] ? "|" . quotemeta($class[-2]) : '');
    if ($sci_name =~ /^($possible_genus)/) {
	$genus = $1;
	($species) = $sci_name =~ /^$genus\s+(.+)/;
    }
    else {
	$species = $sci_name;
    }

    # is this organism of rank species or is it lower?
    # (we don't catch everything lower than species, but it doesn't matter -
    # this is just so we abide by previous behaviour whilst not calling a
    # species a subspecies)
    if ($species && $species =~ /subsp\.|var\./) {
	($species, $sub_species) = $species =~ /(.+)\s+((?:subsp\.|var\.).+)/;
    }

    # Don't make a species object if it's empty or "Unknown" or "None"
    # return unless $genus and  $genus !~ /^(Unknown|None)$/oi;
    # Don't make a species object if it belongs to taxid 32644
    my $src = $get->('source');
    return unless ($species || $genus) and 
	!grep { $_ eq $src } @unkn_names;

    # Bio::Species array needs array in Species -> Kingdom direction
    push(@class, $sci_name);
    @class = reverse @class;

    my $make = Bio::Species->new();
    $make->scientific_name($sci_name);
    $make->classification(@class) if @class > 0;
    $make->common_name( $common ) if $common;
    $make->name('abbreviated', $abbr_name) if $abbr_name;
    $make->organelle($organelle) if $organelle;

    return $make;
}

sub next_seq { shift->next_obj }

sub _read_references {
    my ($stem, $som) = @_;
    my @ret;
    for ( my $i = 1; $som->valueof($stem."/GBSeq_references/[$i]"); $i++ ) {
	my $get = sub { 
	    $som->valueof($stem."/GBSeq_references/[$i]/GBReference_".shift ) 
	};
	my %params;
	$params{'-title'} = $get->('title');
	$params{'-pubmed'} = $get->('pubmed');
	$params{'-medline'} = $get->('pubmed');
	$params{'-journal'} = $get->('journal');
	$params{'-comment'} = $get->('remark');



( run in 1.462 second using v1.01-cache-2.11-cpan-39bf76dae61 )