MS

 view release on metacpan or  search on metacpan

lib/MS/Search/DB/Source/uniprot.pm  view on Meta::CPAN

package MS::Search::DB::Source::uniprot;

use strict;
use warnings;

use HTTP::Tiny;
use URI::Escape;
use FileHandle;

sub new {

    my ($class, %args) = @_;

    my $self = bless {%args} => $class;

    return $self;

}

sub _fetch_fh {

    my ($self) = @_;


    my ($rdr, $wtr) = FileHandle::pipe;
    my $pid = fork;

    if ($pid) {

        close $wtr;
        return($rdr, $pid);

    }
    else {

        close $rdr;

        my @proteomes;

        if (defined $self->{proteome}) {
            @proteomes = ($self->{proteome});
        }
        elsif (defined $self->{taxid} && ! defined $self->{proteome}) {
            my $ref_only = $self->{ref_only} ? 'true' : 'false';
            my $top_node = $self->{taxid} // die "No taxon specified\n";
            die "Taxon must be NCBI integer ID\n" if ($top_node =~ /\D/);

            #my $list_url = "http://www.uniprot.org/proteomes/?query=reference:$ref_only+taxonomy:$top_node&format=list";
            my $list_url = "https://rest.uniprot.org/proteomes/stream?query=reference:$ref_only+taxonomy_id:$top_node&format=list";

            my $resp = HTTP::Tiny->new->get($list_url);
            die "Failed to fetch proteome list: $resp->{status} $resp->{reason}\n"
                if (! $resp->{success});
            @proteomes = split /\r?\n/, $resp->{content};
        }
        else {
            die "No taxonomy ID or proteome ID given!\n";
        }

        my $fasta;
        my $want;
        my $reviewed = $self->{reviewed_only}
            ? '+AND+reviewed:true'
            : '';
        my $include  = $self->{include_isoforms}
            ? 'yes'
            : 'no';
        for (@proteomes) {
            my $id = uri_escape($_);
            warn "Fetching $id\n";
            #my $fetch_url = "http://www.uniprot.org/uniprot/?query=proteome:$id$reviewed&include=$include&format=fasta";
            my $fetch_url = "https://rest.uniprot.org/uniprotkb/stream?query=proteome:$id$reviewed&include=$include&format=fasta";
            my $resp = HTTP::Tiny->new->get( $fetch_url, { data_callback
                => sub { print {$wtr} $_[0] if ($_[1]->{status} < 300 ) } } );



( run in 0.483 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )