BioPerl
view release on metacpan or search on metacpan
Bio/DB/NCBIHelper.pm view on Meta::CPAN
=head1 DESCRIPTION
Provides a single place to setup some common methods for querying NCBI
web databases. This module just centralizes the methods for
constructing a URL for querying NCBI GenBank and NCBI GenPept and the
common HTML stripping done in L<postprocess_data>().
The base NCBI query URL used is:
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the
evolution of this and other Bioperl modules. Send
your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation
is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
=head2 Support
Please direct usage questions or support issues to the mailing list:
I<bioperl-l@bioperl.org>
rather than to the module maintainer directly. Many experienced and
reponsive experts will be able look at the problem and quickly
address it. Please include a thorough description of the problem
with code and data examples if at all possible.
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to
help us keep track the bugs and their resolution.
Bug reports can be submitted via the web.
https://github.com/bioperl/bioperl-live/issues
=head1 AUTHOR - Jason Stajich
Email jason@bioperl.org
=head1 APPENDIX
The rest of the documentation details each of the
object methods. Internal methods are usually
preceded with a _
=cut
# Let the code begin...
package Bio::DB::NCBIHelper;
use strict;
use Bio::DB::Query::GenBank;
use HTTP::Request::Common;
use URI;
use Bio::Root::IO;
use Bio::DB::RefSeq;
use URI::Escape qw(uri_unescape);
use base qw(Bio::DB::WebDBSeqI Bio::Root::Root);
our $HOSTBASE = 'https://eutils.ncbi.nlm.nih.gov';
our $MAX_ENTRIES = 19000;
our $REQUEST_DELAY = 3;
our %CGILOCATION = (
'batch' => [ 'post' => '/entrez/eutils/epost.fcgi' ],
'query' => [ 'get' => '/entrez/eutils/efetch.fcgi' ],
'single' => [ 'get' => '/entrez/eutils/efetch.fcgi' ],
'version' => [ 'get' => '/entrez/eutils/efetch.fcgi' ],
'gi' => [ 'get' => '/entrez/eutils/efetch.fcgi' ],
'webenv' => [ 'get' => '/entrez/eutils/efetch.fcgi' ]
);
our %FORMATMAP = (
'gb' => 'genbank',
'gp' => 'genbank',
'fasta' => 'fasta',
'asn.1' => 'entrezgene',
'gbwithparts' => 'genbank',
);
our $DEFAULTFORMAT = 'gb';
=head2 new
Title : new
Usage :
Function: the new way to make modules a little more lightweight
Returns :
Args :
=cut
sub new {
my ( $class, @args ) = @_;
my $self = $class->SUPER::new(@args);
my ($seq_start, $seq_stop, $no_redirect,
$redirect, $complexity, $strand
)
= $self->_rearrange(
[ qw(SEQ_START SEQ_STOP NO_REDIRECT REDIRECT_REFSEQ COMPLEXITY STRAND) ],
@args
);
$seq_start && $self->seq_start($seq_start);
$seq_stop && $self->seq_stop($seq_stop);
$no_redirect && $self->no_redirect($no_redirect);
$redirect && $self->redirect_refseq($redirect);
$strand && $self->strand($strand);
# adjust statement to accept zero value
defined $complexity
&& ( $complexity >= 0 && $complexity <= 4 )
&& $self->complexity($complexity);
return $self;
}
=head2 get_params
Title : get_params
Usage : my %params = $self->get_params($mode)
Function: returns key,value pairs to be passed to NCBI database
for either 'batch' or 'single' sequence retrieval method
Returns : a key,value pair hash
Args : 'single' or 'batch' mode for retrieval
=cut
sub get_params {
my ($self, $mode) = @_;
$self->throw("subclass did not implement get_params");
}
=head2 default_format
Title : default_format
Usage : my $format = $self->default_format
Function: returns default sequence format for this module
Returns : string
Args : none
=cut
sub default_format {
return $DEFAULTFORMAT;
}
=head2 get_request
Title : get_request
Usage : my $url = $self->get_request
Function: HTTP::Request
Returns :
Args : %qualifiers = a hash of qualifiers (ids, format, etc)
=cut
sub get_request {
my ( $self, @qualifiers ) = @_;
my ( $mode, $uids, $format, $query, $seq_start, $seq_stop, $strand,
$complexity )
= $self->_rearrange(
[qw(MODE UIDS FORMAT QUERY SEQ_START SEQ_STOP STRAND COMPLEXITY)],
@qualifiers );
$mode = lc $mode;
($format) = $self->request_format() unless ( defined $format );
if ( !defined $mode || $mode eq '' ) { $mode = 'single'; }
my %params = $self->get_params($mode);
if ( !%params ) {
$self->throw(
"must specify a valid retrieval mode 'single' or 'batch' not '$mode'"
);
}
my $url = URI->new( $HOSTBASE . $CGILOCATION{$mode}[1] );
unless ( $mode eq 'webenv' || defined $uids || defined $query ) {
$self->throw("Must specify a query or list of uids to fetch");
}
if ( $query && $query->can('cookie') ) {
@params{ 'WebEnv', 'query_key' } = $query->cookie;
$params{'db'} = $query->db;
}
elsif ($query) {
$params{'id'} = join ',', $query->ids;
}
# for batch retrieval, non-query style
elsif ( $mode eq 'webenv' && $self->can('cookie') ) {
@params{ 'WebEnv', 'query_key' } = $self->cookie;
}
elsif ($uids) {
if ( ref($uids) =~ /array/i ) {
$uids = join( ",", @$uids );
}
$params{'id'} = $uids;
}
$seq_start && ( $params{'seq_start'} = $seq_start );
$seq_stop && ( $params{'seq_stop'} = $seq_stop );
$strand && ( $params{'strand'} = $strand );
if ( defined $complexity && ( $seq_start || $seq_stop || $strand ) ) {
$self->warn(
"Complexity set to $complexity; seq_start and seq_stop may not work!"
) if ( $complexity != 1 && ( $seq_start || $seq_stop ) );
$self->warn(
"Complexity set to 0; expect strange results with strand set to 2"
) if ( $complexity == 0 && $strand == 2 && $format eq 'fasta' );
}
defined $complexity && ( $params{'complexity'} = $complexity );
$params{'rettype'} = $format unless $mode eq 'batch';
# for now, 'post' is batch retrieval
if ( $CGILOCATION{$mode}[0] eq 'post' ) {
my $response = $self->ua->request( POST $url, [%params] );
( run in 0.438 second using v1.01-cache-2.11-cpan-39bf76dae61 )