BioPerl

 view release on metacpan or  search on metacpan

scripts/DB/bp_biofetch_genbank_proxy.pl  view on Meta::CPAN

#!perl

# dbfetch style caching proxy for GenBank
use strict;
use warnings;
use CGI qw(:standard);
use HTTP::Request::Common;
use LWP::UserAgent;
use Cache::FileCache;

use vars qw(%GOT $BUFFER %MAPPING $CACHE);

use constant CACHE_LOCATION => '/usr/tmp/dbfetch_cache';
use constant MAX_SIZE   => 100_000_000;  # 100 megs, roughly
use constant CACHE_DEPTH => 4;
use constant EXPIRATION => "1 week";
use constant PURGE      => "1 hour";

%MAPPING = (genbank => {db=>'nucleotide',
			rettype => 'gb'},
	    genpep  => {db=>'protein',
			rettype => 'gp'});
# we're doing everything in callbacks, so initialize globals.
$BUFFER = '';
%GOT    = ();

print header('text/plain');

param() or print_usage();

my $db     = param('db');
my $style  = param('style');
my $format = param('format');
my $id     = param('id');
my @ids    = split /\s+/,$id;

$format = 'genbank' if $format eq 'default';  #h'mmmph

$MAPPING{$db}        or error(1=>"Unknown database [$db]");
$style  eq 'raw'     or error(2=>"Unknown style [$style]");
$format eq 'genbank' or error(3=>"Format [$format] not known for database [$db]");

$CACHE = Cache::FileCache->new({cache_root          => CACHE_LOCATION,
				default_expires_in  => EXPIRATION,
				cache_DEPTH         => CACHE_DEPTH,
				namespace           => 'dbfetch',
				auto_purge_interval => PURGE});

# handle cached entries
foreach (@ids) {
  if (my $obj = $CACHE->get($_)) {
    $GOT{$_}++;
    print $obj,"//\n";
  }
}

# handle the remainder
@ids = grep {!$GOT{$_}} @ids;
if (@ids) {
  my $request = POST('https://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
		     [rettype    => $MAPPING{$db}{rettype},
		      db         => $MAPPING{$db}{db},
		      tool       => 'bioperl',
		      retmode    => 'text',
		      usehistory => 'n',
		      id         => join(',',@ids),
		     ]
		    );

  my $ua = LWP::UserAgent->new;
  my $response = $ua->request($request,\&callback);

  if ($response->is_error) {
    my $status = $response->status_line;
    error(6 => "HTTP error from GenBank [$status]");
  }
}

my @missing_ids = grep {!$GOT{$_}} @ids;
foreach (@missing_ids) {
  error(4=>"ID [$_] not found in database [$db]",1);
}



( run in 1.757 second using v1.01-cache-2.11-cpan-39bf76dae61 )