Bio-WebService-LANL-SequenceLocator

 view release on metacpan or  search on metacpan

lib/Bio/WebService/LANL/SequenceLocator.pm  view on Meta::CPAN

    print encode_json(\@sequences);
    
    __END__
    [
       {
          "query" : "sequence_1",
          "query_sequence" : "AGCAATCAGATGGTCAGCCAAAATTGCCCTATAGTGCAGAACATCCAGGGGCAAGTGGTACATCAGGCCATATCACCTAGAACTTTAAATGCA",
          "base_type" : "nucleotide",
          "reverse_complement" : "0",
          "alignment" : "\n Query AGCAATCAGA TGGTCAGCCA AAATTGCCCT ATAGTGCAGA ACATCCAGGG  50\n       ::::::::    ::::::::: ::::: :::: :::::::::: :::::::::: \n  HXB2 AGCAATCA-- -GGTCAGCCA AAATTACCCT ATAGTGCAGA ACATCCAGGG  1208\n\n Query GCAAGTGGTA CAT...
          "hxb2_sequence" : "AGCAATCA---GGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCA",
          "similarity_to_hxb2" : "94.6",
          "start" : "373",
          "end" : "462",
          "genome_start" : "1162",
          "genome_end" : "1251",
          "polyprotein" : "Gag",
          "region_names" : [
             "Gag",
             "p17",
             "p24"
          ],
          "regions" : [
             {
                "cds" : "Gag",
                "aa_from_protein_start" : [ "125", "154" ],
                "na_from_cds_start" : [ "373", "462" ],
                "na_from_hxb2_start" : [ "1162", "1251" ],
                "na_from_query_start" : [ "1", "93" ],
                "protein_translation" : "SNQMVSQNCPIVQNIQGQVVHQAISPRTLNA"
             },
             {
                "cds" : "p17",
                "aa_from_protein_start" : [ "125", "132" ],
                "na_from_cds_start" : [ "373", "396" ],
                "na_from_hxb2_start" : [ "1162", "1185" ],
                "na_from_query_start" : [ "1", "27" ],
                "protein_translation" : "SNQMVSQNC"
             },
             {
                "cds" : "p24",
                "aa_from_protein_start" : [ "1", "22" ],
                "na_from_cds_start" : [ "1", "66" ],
                "na_from_hxb2_start" : [ "1186", "1251" ],
                "na_from_query_start" : [ "28", "93" ],
                "protein_translation" : "PIVQNIQGQVVHQAISPRTLNA"
             }
          ]
       }
    ]

=cut

package Bio::WebService::LANL::SequenceLocator;

use Moo;
use Data::Dumper;
use HTML::LinkExtor;
use HTML::TableExtract;
use HTML::TokeParser;
use HTTP::Request::Common;
use List::AllUtils qw< pairwise part min max >;

our $VERSION = 20170324;

=head1 METHODS

=head2 new

Returns a new instance of this class.  An optional parameter C<agent_string>
should be provided to identify yourself to LANL out of politeness.  See the
L</SYNOPSIS> for an example.

=cut

has agent_string => (
    is      => 'ro',
    lazy    => 1,
    builder => sub { '' },
);

has agent => (
    is      => 'ro',
    lazy    => 1,
    builder => sub {
        require LWP::UserAgent;
        my $self  = shift;
        my $agent = LWP::UserAgent->new(
            agent => join(" ", __PACKAGE__ . "/$VERSION", $self->agent_string),
        );
        $agent->env_proxy;
        return $agent;
    },
);

has lanl_base => (
    is      => 'ro',
    lazy    => 1,
    builder => sub { 'https://www.hiv.lanl.gov' },
);

has lanl_endpoint => (
    is      => 'ro',
    lazy    => 1,
    builder => sub { shift->lanl_base . '/cgi-bin/LOCATE/locate.cgi' },
);

has _bogus_slug => (
    is      => 'ro',
    default => sub { 'BOGUS_SEQ_SO_TABULAR_FILES_ARE_LINKED_IN_OUTPUT' },
);

sub _request {
    my $self = shift;
    my $req  = shift;
    my $response = $self->agent->request($req);

    if (not $response->is_success) {
        warn sprintf "Request failed: %s %s -> %s\n",
            $req->method, $req->uri, $response->status_line;
        return;



( run in 0.918 second using v1.01-cache-2.11-cpan-5a3173703d6 )