Bio-RetrieveAssemblies
view release on metacpan or search on metacpan
lib/Bio/RetrieveAssemblies/WGS.pm view on Meta::CPAN
package Bio::RetrieveAssemblies::WGS;
$Bio::RetrieveAssemblies::WGS::VERSION = '1.1.5';
use Moose;
use Bio::RetrieveAssemblies::RefWeak;
with('Bio::RetrieveAssemblies::RemoteSpreadsheetRole');
# ABSTRACT: Get all the WGS assemblies
has 'search_term' => ( is => 'ro', isa => 'Str', required => 1 );
has 'query' => ( is => 'ro', isa => 'Str', default => '*' );
has 'annotation' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'accession_column_index' => ( is => 'ro', isa => 'Int', default => 0 );
has 'accession_column_header' => ( is => 'ro', isa => 'Str', default => "Prefix" );
has 'organism_type' => ( is => 'ro', isa => 'Str', default => 'BCT' );
has 'organism_type_index' => ( is => 'ro', isa => 'Int', default => 1 );
has 'columns_to_search' => ( is => 'ro', isa => 'ArrayRef', default => sub { [ 0, 2, 3, 4 ] } );
has 'url' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_url' );
has 'accessions' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_accessions' );
has '_refweak_accessions' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__refweak_accessions' );
sub _build__refweak_accessions {
my ($self) = @_;
$self->logger->info("Downloading list of bad assembly accession numbers from RefWeak");
return Bio::RetrieveAssemblies::RefWeak->new(logger => $self->logger, verbose => $self->verbose)->accessions();
}
sub _build_url {
my ($self) = @_;
if ( $self->annotation ) {
# Only get files with annotation
return "http://www.ncbi.nlm.nih.gov/Traces/wgs/?page=1&term=" . $self->query
. "&order=prefix&dir=a&state=live&project=WGS&update_date=any&create_date=any&have_annot_contigs=on&have_annot_scaffolds=on&retmode=text&size=all";
}
else {
# Get everything bar TSA
return "http://www.ncbi.nlm.nih.gov/Traces/wgs/?&size=100&term=" . $self->query
. "&project=WGS&order=prefix&dir=asc&version=last&state=live&update_date=any&create_date=any&retmode=text&size=all";
}
}
sub _filter_out_line {
my ( $self, $columns ) = @_;
return 1 if ( $columns->[ $self->organism_type_index ] ne $self->organism_type );
# Check to see if the accession number is in refweak
return 1 if ( $self->_refweak_accessions->{ $columns->[ $self->accession_column_index ] } );
my $search_term = $self->search_term;
for my $column_index ( @{ $self->columns_to_search } ) {
return 0 if ( $columns->[$column_index] =~ /$search_term/i );
}
return 1;
}
__PACKAGE__->meta->make_immutable;
no Moose;
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
Bio::RetrieveAssemblies::WGS - Get all the WGS assemblies
=head1 VERSION
version 1.1.5
=head1 SYNOPSIS
Get all the WGS assemblies
use Bio::RetrieveAssemblies::WGS;
my $obj = Bio::RetrieveAssemblies::WGS->new();
my %accessions_hash = $obj->accessions();
=head1 AUTHOR
Andrew J. Page <ap13@sanger.ac.uk>
=head1 COPYRIGHT AND LICENSE
This software is Copyright (c) 2015 by Wellcome Trust Sanger Institute.
This is free software, licensed under:
The GNU General Public License, Version 3, June 2007
( run in 0.753 second using v1.01-cache-2.11-cpan-5837b0d9d2c )