BioPerl
view release on metacpan or search on metacpan
Bio/DB/WebDBSeqI.pm view on Meta::CPAN
}
=head2 proxy
Title : proxy
Usage : $httpproxy = $db->proxy('http') or
$db->proxy(['http','ftp'], 'http://myproxy' )
Function: Get/Set a proxy for use of proxy
Returns : a string indicating the proxy
Args : $protocol : an array ref of the protocol(s) to set/get
$proxyurl : url of the proxy to use for the specified protocol
$username : username (if proxy requires authentication)
$password : password (if proxy requires authentication)
=cut
sub proxy {
my ($self,$protocol,$proxy,$username,$password) = @_;
return if ( !defined $self->ua || !defined $protocol
|| !defined $proxy );
$self->authentication($username, $password)
if ($username && $password);
return $self->ua->proxy($protocol,$proxy);
}
=head2 authentication
Title : authentication
Usage : $db->authentication($user,$pass)
Function: Get/Set authentication credentials
Returns : Array of user/pass
Args : Array or user/pass
=cut
sub authentication{
my ($self,$u,$p) = @_;
if( defined $u && defined $p ) {
$self->{'_authentication'} = [ $u,$p];
}
return @{$self->{'_authentication'}};
}
=head2 retrieval_type
Title : retrieval_type
Usage : $self->retrieval_type($type);
my $type = $self->retrieval_type
Function: Get/Set a proxy for retrieval_type (pipeline, io_string or tempfile)
Returns : string representing retrieval type
Args : $value - the value to store
This setting affects how the data stream from the remote web server is
processed and passed to the Bio::SeqIO layer. Three types of retrieval
types are currently allowed:
pipeline Perform a fork in an attempt to begin streaming
while the data is still downloading from the remote
server. Disk, memory and speed efficient, but will
not work on Windows or MacOS 9 platforms.
io_string Store downloaded database entry(s) in memory. Can be
problematic for batch downloads because entire set
of entries must fit in memory. Alll entries must be
downloaded before processing can begin.
tempfile Store downloaded database entry(s) in a temporary file.
All entries must be downloaded before processing can
begin.
The default is pipeline, with automatic fallback to io_string if
pipelining is not available.
=cut
sub retrieval_type {
my ($self, $value) = @_;
if( defined $value ) {
$value = lc $value;
if( ! $RETRIEVAL_TYPES{$value} ) {
$self->warn("invalid retrieval type $value must be one of (" .
join(",", keys %RETRIEVAL_TYPES), ")");
$value = $DEFAULT_RETRIEVAL_TYPE;
}
$self->{'_retrieval_type'} = $value;
}
return $self->{'_retrieval_type'};
}
=head2 url_params
Title : url_params
Usage : my $params = $self->url_params or
$self->url_params($params)
Function: Get/Set the URL parameters for the Web Database
Returns : url parameters for Web Database
Args : $params - parameters to be appended to the URL for the WebDatabase
=cut
sub url_params {
my ($self, $value) = @_;
if( defined $value ) {
$self->{'_urlparams'} = $value;
}
}
=head2 ua
Title : ua
Usage : my $ua = $self->ua or
$self->ua($ua)
Function: Get/Set a LWP::UserAgent for use
Returns : reference to LWP::UserAgent Object
Args : $ua - must be a LWP::UserAgent
=cut
Bio/DB/WebDBSeqI.pm view on Meta::CPAN
sub postprocess_data {
my ( $self, %args) = @_;
return;
}
# private methods
sub _request {
my ($self, $url,$tmpfile) = @_;
my ($resp);
if( defined $tmpfile && $tmpfile ne '' ) {
$resp = $self->ua->request($url, $tmpfile);
} else {
$resp = $self->ua->request($url);
}
if( $resp->is_error ) {
$self->throw("WebDBSeqI Request Error:\n".$resp->as_string);
}
return $resp;
}
#mod_perl-safe replacement for the open(BLEH,'-|') call. if running
#under mod_perl, detects it and closes the child's STDIN and STDOUT
#handles
sub _open_pipe {
my ($self) = @_;
# is mod_perl running? Which API?
my $mp = $self->mod_perl_api;
if($mp and ! our $loaded_apache_sp) {
my $load_api = ($mp == 1) ? 'use Apache::SubProcess': 'use Apache2::SubProcess';
eval $load_api;
$@ and $self->throw("$@\n$load_api module required for running under mod_perl");
$loaded_apache_sp = 1;
}
my $pipe = IO::Pipe->new();
local $SIG{CHLD} = 'IGNORE';
defined(my $pid = fork)
or $self->throw("Couldn't fork: $!");
unless($pid) {
#CHILD
$pipe->writer();
#if we're running under mod_perl, clean up some things after this fork
if ($ENV{MOD_PERL} and my $r = eval{Apache->request} ) {
$r->cleanup_for_exec;
#don't read or write the mod_perl parent's tied filehandles
close STDIN; close STDOUT;
setsid() or $self->throw('Could not detach from parent');
}
} else {
#PARENT
$pipe->reader();
}
return ( $pid, $pipe );
}
# send web request to specified filehandle, or stdout, for streaming purposes
sub _stream_request {
my $self = shift;
my $request = shift;
my $dest_fh = shift || \*STDOUT;
# fork so as to pipe output of fetch process through to
# postprocess_data method call.
my ($child,$fetch) = $self->_open_pipe();
if ($child) {
#PARENT
local ($/) = "//\n"; # assume genbank/swiss format
$| = 1;
my $records = 0;
while (my $record = <$fetch>) {
$records++;
$self->postprocess_data('type' => 'string',
'location' => \$record);
print $dest_fh $record;
}
$/ = "\n"; # reset to be safe;
close $dest_fh; #must explicitly close here, because the hard
#exits don't cloes them for us
}
else {
#CHILD
$| = 1;
my $resp = $self->ua->request($request,
sub { print $fetch $_[0] }
);
if( $resp->is_error ) {
$self->throw("WebDBSeqI Request Error:\n".$resp->as_string);
}
close $fetch; #must explicitly close here, because the hard exists
#don't close them for us
POSIX::_exit(0);
}
}
sub io {
my ($self,$io) = @_;
if(defined($io) || (! exists($self->{'_io'}))) {
$io = Bio::Root::IO->new() unless $io;
$self->{'_io'} = $io;
}
return $self->{'_io'};
}
=head2 delay
Title : delay
Usage : $secs = $self->delay([$secs])
Function: get/set number of seconds to delay between fetches
Returns : number of seconds to delay
Args : new value
NOTE: the default is to use the value specified by delay_policy().
This can be overridden by calling this method, or by passing the
( run in 1.929 second using v1.01-cache-2.11-cpan-140bd7fdf52 )