BioPerl
view release on metacpan or search on metacpan
scripts/DB/bp_biofetch_genbank_proxy.pl view on Meta::CPAN
#!perl
# dbfetch style caching proxy for GenBank
use strict;
use warnings;
use CGI qw(:standard);
use HTTP::Request::Common;
use LWP::UserAgent;
use Cache::FileCache;
use vars qw(%GOT $BUFFER %MAPPING $CACHE);
use constant CACHE_LOCATION => '/usr/tmp/dbfetch_cache';
use constant MAX_SIZE => 100_000_000; # 100 megs, roughly
use constant CACHE_DEPTH => 4;
use constant EXPIRATION => "1 week";
use constant PURGE => "1 hour";
%MAPPING = (genbank => {db=>'nucleotide',
rettype => 'gb'},
genpep => {db=>'protein',
rettype => 'gp'});
# we're doing everything in callbacks, so initialize globals.
$BUFFER = '';
%GOT = ();
print header('text/plain');
param() or print_usage();
my $db = param('db');
my $style = param('style');
my $format = param('format');
my $id = param('id');
my @ids = split /\s+/,$id;
$format = 'genbank' if $format eq 'default'; #h'mmmph
$MAPPING{$db} or error(1=>"Unknown database [$db]");
$style eq 'raw' or error(2=>"Unknown style [$style]");
$format eq 'genbank' or error(3=>"Format [$format] not known for database [$db]");
$CACHE = Cache::FileCache->new({cache_root => CACHE_LOCATION,
default_expires_in => EXPIRATION,
cache_DEPTH => CACHE_DEPTH,
namespace => 'dbfetch',
auto_purge_interval => PURGE});
# handle cached entries
foreach (@ids) {
if (my $obj = $CACHE->get($_)) {
$GOT{$_}++;
print $obj,"//\n";
}
}
# handle the remainder
@ids = grep {!$GOT{$_}} @ids;
if (@ids) {
my $request = POST('https://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
[rettype => $MAPPING{$db}{rettype},
db => $MAPPING{$db}{db},
tool => 'bioperl',
retmode => 'text',
usehistory => 'n',
id => join(',',@ids),
]
);
my $ua = LWP::UserAgent->new;
my $response = $ua->request($request,\&callback);
if ($response->is_error) {
my $status = $response->status_line;
error(6 => "HTTP error from GenBank [$status]");
}
}
my @missing_ids = grep {!$GOT{$_}} @ids;
foreach (@missing_ids) {
error(4=>"ID [$_] not found in database [$db]",1);
}
( run in 1.757 second using v1.01-cache-2.11-cpan-39bf76dae61 )