BioPerl

 view release on metacpan or  search on metacpan

Bio/DB/NCBIHelper.pm  view on Meta::CPAN

=head1 DESCRIPTION

Provides a single place to setup some common methods for querying NCBI
web databases.  This module just centralizes the methods for
constructing a URL for querying NCBI GenBank and NCBI GenPept and the
common HTML stripping done in L<postprocess_data>().

The base NCBI query URL used is:
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi

=head1 FEEDBACK

=head2 Mailing Lists

User feedback is an integral part of the
evolution of this and other Bioperl modules. Send
your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation
is much appreciated.

  bioperl-l@bioperl.org                  - General discussion
  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists

=head2 Support

Please direct usage questions or support issues to the mailing list:

I<bioperl-l@bioperl.org>

rather than to the module maintainer directly. Many experienced and
reponsive experts will be able look at the problem and quickly
address it. Please include a thorough description of the problem
with code and data examples if at all possible.

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to
help us keep track the bugs and their resolution.
Bug reports can be submitted via the web.

  https://github.com/bioperl/bioperl-live/issues

=head1 AUTHOR - Jason Stajich

Email jason@bioperl.org

=head1 APPENDIX

The rest of the documentation details each of the
object methods. Internal methods are usually
preceded with a _

=cut

# Let the code begin...

package Bio::DB::NCBIHelper;
use strict;

use Bio::DB::Query::GenBank;
use HTTP::Request::Common;
use URI;
use Bio::Root::IO;
use Bio::DB::RefSeq;
use URI::Escape qw(uri_unescape);

use base qw(Bio::DB::WebDBSeqI Bio::Root::Root);

our $HOSTBASE = 'https://eutils.ncbi.nlm.nih.gov';
our $MAX_ENTRIES = 19000;
our $REQUEST_DELAY = 3;
our %CGILOCATION = (
        'batch'   => [ 'post' => '/entrez/eutils/epost.fcgi' ],
        'query'   => [ 'get'  => '/entrez/eutils/efetch.fcgi' ],
        'single'  => [ 'get'  => '/entrez/eutils/efetch.fcgi' ],
        'version' => [ 'get'  => '/entrez/eutils/efetch.fcgi' ],
        'gi'      => [ 'get'  => '/entrez/eutils/efetch.fcgi' ],
        'webenv'  => [ 'get'  => '/entrez/eutils/efetch.fcgi' ]
    );
our %FORMATMAP = (
        'gb'          => 'genbank',
        'gp'          => 'genbank',
        'fasta'       => 'fasta',
        'asn.1'       => 'entrezgene',
        'gbwithparts' => 'genbank',
    );
our $DEFAULTFORMAT = 'gb';

=head2 new

 Title   : new
 Usage   :
 Function: the new way to make modules a little more lightweight
 Returns : 
 Args    : 

=cut

sub new {
    my ( $class, @args ) = @_;
    my $self = $class->SUPER::new(@args);
    my ($seq_start, $seq_stop,   $no_redirect,
        $redirect,  $complexity, $strand
        )
        = $self->_rearrange(
        [ qw(SEQ_START SEQ_STOP NO_REDIRECT REDIRECT_REFSEQ COMPLEXITY STRAND) ],
        @args
        );
    $seq_start   && $self->seq_start($seq_start);
    $seq_stop    && $self->seq_stop($seq_stop);
    $no_redirect && $self->no_redirect($no_redirect);
    $redirect    && $self->redirect_refseq($redirect);
    $strand      && $self->strand($strand);

    # adjust statement to accept zero value
    defined $complexity
        && ( $complexity >= 0 && $complexity <= 4 )
        && $self->complexity($complexity);
    return $self;
}


=head2 get_params

 Title   : get_params
 Usage   : my %params = $self->get_params($mode)
 Function: returns key,value pairs to be passed to NCBI database
           for either 'batch' or 'single' sequence retrieval method
 Returns : a key,value pair hash
 Args    : 'single' or 'batch' mode for retrieval

=cut

sub get_params {
    my ($self, $mode) = @_;
    $self->throw("subclass did not implement get_params");
}

=head2 default_format

 Title   : default_format
 Usage   : my $format = $self->default_format
 Function: returns default sequence format for this module
 Returns : string
 Args    : none

=cut

sub default_format {
    return $DEFAULTFORMAT;
}

=head2 get_request

 Title   : get_request
 Usage   : my $url = $self->get_request
 Function: HTTP::Request
 Returns :
 Args    : %qualifiers = a hash of qualifiers (ids, format, etc)

=cut

sub get_request {
    my ( $self, @qualifiers ) = @_;
    my ( $mode, $uids, $format, $query, $seq_start, $seq_stop, $strand,
        $complexity )
        = $self->_rearrange(
        [qw(MODE UIDS FORMAT QUERY SEQ_START SEQ_STOP STRAND COMPLEXITY)],
        @qualifiers );
    $mode = lc $mode;
    ($format) = $self->request_format() unless ( defined $format );
    if ( !defined $mode || $mode eq '' ) { $mode = 'single'; }
    my %params = $self->get_params($mode);
    if ( !%params ) {
        $self->throw(
            "must specify a valid retrieval mode 'single' or 'batch' not '$mode'"
        );
    }
    my $url = URI->new( $HOSTBASE . $CGILOCATION{$mode}[1] );
    unless ( $mode eq 'webenv' || defined $uids || defined $query ) {
        $self->throw("Must specify a query or list of uids to fetch");
    }
    if ( $query && $query->can('cookie') ) {
        @params{ 'WebEnv', 'query_key' } = $query->cookie;
        $params{'db'} = $query->db;
    }
    elsif ($query) {
        $params{'id'} = join ',', $query->ids;
    }

    # for batch retrieval, non-query style
    elsif ( $mode eq 'webenv' && $self->can('cookie') ) {
        @params{ 'WebEnv', 'query_key' } = $self->cookie;
    }
    elsif ($uids) {
        if ( ref($uids) =~ /array/i ) {
            $uids = join( ",", @$uids );
        }
        $params{'id'} = $uids;
    }
    $seq_start && ( $params{'seq_start'} = $seq_start );
    $seq_stop  && ( $params{'seq_stop'}  = $seq_stop );
    $strand    && ( $params{'strand'}    = $strand );
    if ( defined $complexity && ( $seq_start || $seq_stop || $strand ) ) {
        $self->warn(
            "Complexity set to $complexity; seq_start and seq_stop may not work!"
        ) if ( $complexity != 1 && ( $seq_start || $seq_stop ) );
        $self->warn(
            "Complexity set to 0; expect strange results with strand set to 2"
        ) if ( $complexity == 0 && $strand == 2 && $format eq 'fasta' );
    }
    defined $complexity && ( $params{'complexity'} = $complexity );
    $params{'rettype'} = $format unless $mode eq 'batch';

    # for now, 'post' is batch retrieval
    if ( $CGILOCATION{$mode}[0] eq 'post' ) {
        my $response = $self->ua->request( POST $url, [%params] );



( run in 0.438 second using v1.01-cache-2.11-cpan-39bf76dae61 )