BioPerl

 view release on metacpan or  search on metacpan

Bio/Tools/Run/RemoteBlast.pm  view on Meta::CPAN

For a description of the many CGI parameters see:
https://www.ncbi.nlm.nih.gov/BLAST/Doc/urlapi.html

Various additional options and input formats are available.

=head1 FEEDBACK

=head2 Mailing Lists

User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists.  Your participation is much appreciated.

  bioperl-l@bioperl.org                  - General discussion
  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists

=head2 Support 

Please direct usage questions or support issues to the mailing list:

I<bioperl-l@bioperl.org>

rather than to the module maintainer directly. Many experienced and 
reponsive experts will be able look at the problem and quickly 
address it. Please include a thorough description of the problem 
with code and data examples if at all possible.

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution.  Bug reports can be submitted via the
web:

  https://github.com/bioperl/bioperl-live/issues

=head1 AUTHOR 

Please do NOT contact Jason directly about this module.  Please post to
the bioperl mailing list (L<FEEDBACK>). If you would like to be the
official maintainer of this module, please volunteer on the list and
we will make it official in this POD.

First written by Jason Stajich, many others have helped keep it running.

=head1 APPENDIX

The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _

=cut

package Bio::Tools::Run::RemoteBlast;

use strict;
use warnings;

use Bio::SeqIO;
use IO::String;
use Bio::SearchIO;
use LWP;
use HTTP::Request::Common;
use Bio::Root::Version;

use constant {
    NOT_FINISHED => 0,
    ERR_QBSTATUS => 1,
    ERR_NOCONTENT => 2, 
    ERR_HTTPFAIL => 4,
    ERR_QBNONSPEC => 8
};
   
# Bio::Root::IO is-a Bio::Root::Roo
use base qw(Bio::Root::IO Exporter);

our @EXPORT = qw( NOT_FINISHED ERR_QBSTATUS ERR_NOCONTENT ERR_HTTPFAIL ERR_QBNONSPEC );
our $MODVERSION = $Bio::Root::Version::VERSION;
our $URLBASE = 'https://blast.ncbi.nlm.nih.gov/Blast.cgi';

# In GET/PUTPARAMS the values are regexes which validate the input.
our %PUTPARAMS = (
    'AUTO_FORMAT'       => '(Off|(Semi|Full)auto)',     # Off, Semiauto, Fullauto
    'COMPOSITION_BASED_STATISTICS'      => '(0|1)',     # yes, no on NCBI's site, but actually binary 0/1
    'DATABASE'  =>  '.*',
    'DB_GENETIC_CODE' => '([1-9]|1[1-6]|2(1|2))',   # 1..16,21,22
    'DISPLAY_SORT'   => '\d',
    'ENDPOINTS' => '(yes|no)',                  # yes,no
    'ENTREZ_QUERY'      => '.*',
    'EXPECT'    => '\d+(\.\d+)?([eE]-\d+)?',    # Positive double
    'FILTER'    => '[LRm]',                     # L or R or m
    'GAPCOSTS'  => '-?\d+(\.\d+)\s+-?\d+(\.\d+)',
                                    # Two space separated float values
    'GENETIC_CODE'      => '([1-9]|1[1-6]|2(1|2))',     # 1..16,21,22
    'HITLIST_SIZE'      => '\d+',                       # Positive integer
    'I_THRESH'  => '-?\d+(\.\d+)([eE]-\d+)?',   # float
    'LAYOUT'    => '(One|Two)Windows?',         # onewindow, twowindows
    'LCASE_MASK'        => '(yes|no)',                  # yes, no
    'MATRIX_NAME'       => '.*',
    'NUCL_PENALTY'      => '-\d+',                      # Negative integer
    'NUCL_REWARD'       => '-?\d+',                     # Integer
    'OTHER_ADVANCED' => '.*',
    'PERC_IDENT'        => '\d\d+',                     # Integer, 0-99 inclusive
    'PHI_PATTERN'       => '.*',
    'PROGRAM'   => 't?blast[pnx]',
                                    # tblastp, tblastn, tblastx, blastp, blastn, blastx
    'QUERY'             => '.*',
    'QUERY_FILE'        => '.*',
    'QUERY_BELIEVE_DEFLINE'     => '(yes|no)',          # yes, no
    'QUERY_FROM'        => '\d+',                       # Positive integer
    'QUERY_TO'  => '\d+',                       # Positive integer
    'SEARCHSP_EFF'      => '\d+',                       # Positive integer
    'SERVICE'   => '(plain|p[sh]i|(rps|mega)blast)',
                                    # plain,psi,phi,rpsblast,megablast
    'SHORT_QUERY_ADJUST' => '(true|false)',
    'THRESHOLD' => '-?\d+',                     # Integer
    'UNGAPPED_ALIGNMENT' => '(yes|no)',         # yes, no
    'WORD_SIZE' => '\d+'                        # Positive integer
                                      );
our %GETPARAMS = (
    'ALIGNMENTS'        => '\d+',                       # Positive integer
    'ALIGNMENT_VIEW' =>
              '(Pairwise|(Flat)?QueryAnchored(NoIdentities)?|Tabular)',

Bio/Tools/Run/RemoteBlast.pm  view on Meta::CPAN

        #If query has a fasta header, the output has the query line.
        $header{'QUERY'} = ">".(defined $seq->display_id() ? $seq->display_id() : "").
                " ".(defined $seq->desc() ? $seq->desc() : "")."\n".$seq->seq();
        my $request = POST $url_base, [%header];
        $self->debug($request->as_string) if ( $self->verbose > 1);
        my $response = $self->ua->request( $request);

        if( $response->is_success ) {
            my @subdata = split(/\n/, $response->content );
            my $count = 0;
            foreach ( @subdata ) {
        if( /^\s+RID\s+=\s+(\S+)/ ) {
            $count++;
            #$self->debug("RID: $1\n");
            $self->add_rid($1);
        } elsif (/^\s+RTOE\s+=\s+(.*$)/) {
            $self->{rtoe} = $1;
            $count++;
        }
        last if $count >= 2;
            }
            if( $count == 0 ) {
        $self->warn("req was ". $request->as_string() . "\n");
        $self->warn(join('', @subdata));
            }           
            $tcount += $count;
        } else {
            # should try and be a little more verbose here
            $self->warn("req was ". $request->as_string() . "\n" .
                        $response->error_as_HTML);
            $tcount = -1;
                }
    }
    return $tcount;
}

=head2 retrieve_blast

 Title   : retrieve_blast
 Usage   : my $blastreport = $blastfactory->retrieve_blast($rid);
 Function: Attempts to retrieve a blast report from remote blast queue
 Returns : scalar int (constant) or Bio::SearchIO object
           Constants:
            NOT_FINISHED (= 0)   : 'job not finished'
            code on error:
              ERR_QBSTATUS (= 1) : return line matches 'Status=ERROR'
              ERR_NOCONTENT (= 2): HTTP request successful, but no content
                                   returned
              ERR_HTTPFAIL (= 4) : HTTP request failed
              ERR_QBNONSPEC (= 8): return line matches 'ERROR' (not status line)
 Args    : Remote Blast ID (RID)

=cut

sub retrieve_blast {
    my($self, $rid) = @_;
    my $url_base = $self->get_url_base;
    my %hdr = %RETRIEVALHEADER;
    $hdr{'RID'} = $rid;
    
    my $req = HTTP::Request->new(
            GET => $url_base."?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid",
            );
    #$self->debug("SearchInfo request is " . $req->as_string());
    my $response = $self->ua->request($req);
    if( $response->is_success ) {
        my $status;
        if($response->content =~ /Status=(WAITING|ERROR|FAILED|UNKNOWN|READY)/i ) {
            $status = $1;
            if( $status eq 'ERROR' ) {
                $self->warn("Server Error");
                return ERR_QBSTATUS;
            } elsif( $status eq 'FAILED' ) {
                $self->warn("Request Failed");
                return ERR_QBSTATUS;
            } 
        } else {
            $self->warn("Error: No status reported\n");
        }
        if ( $status ne 'READY' ) {
            return 0;
        } else {
            my ($fh,$tempfile) = $self->tempfile();
            close $fh;
            
            my $req = POST $url_base, [%hdr];
            $self->debug("retrieve request is " . $req->as_string());
            my $response = $self->ua->request($req, $tempfile);
        
            my $blastobj;
            my $mthd = $self->readmethod;
            $mthd = ($mthd =~ /blasttable/i) ? 'blasttable' :
                    ($mthd =~ /xml/i)        ? 'blastxml'   :
                    ($mthd =~ /pull/i)       ? 'blast_pull' :
                    'blast';
            $blastobj = Bio::SearchIO->new(
                        -file => $tempfile,
                        -format => $mthd);                    
            ## store filename in object ##
            $self->file($tempfile);
            return $blastobj;
        } 
    } else {
        $self->warn($response->error_as_HTML);
        return ERR_HTTPFAIL;
    }
}

=head2 save_output

 Title   : saveoutput
 Usage   : my $saveoutput = $self->save_output($filename)
 Function: Method to save the blast report
 Returns : 1 (throws error otherwise)
 Args    : string [rid, filename]

=cut

sub save_output {
    my ($self, $filename) = @_;
    if( not defined $filename ) {



( run in 0.862 second using v1.01-cache-2.11-cpan-39bf76dae61 )