Bio-RetrieveAssemblies

 view release on metacpan or  search on metacpan

lib/Bio/RetrieveAssemblies.pm  view on Meta::CPAN





has 'search_term'      => ( is => 'rw', isa => 'Str' );
has 'output_directory' => ( is => 'rw', isa => 'Str', default => 'downloaded_files' );
has 'file_type'        => ( is => 'rw', isa => 'Str', default => 'genbank' );
has 'organism_type'    => ( is => 'rw', isa => 'Str', default => 'BCT' );
has 'query'            => ( is => 'rw', isa => 'Str',      default  => '*' );
has 'annotation'       => ( is => 'rw', isa => 'Bool',     default  => 0 );
has 'args'             => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'script_name'      => ( is => 'ro', isa => 'Str', required => 1 );

sub BUILD {
    my ($self) = @_;
    my ( $help, $file_type, $output_directory, $organism_type,$query,$annotation,$verbose,$cmd_version );
    GetOptionsFromArray(
        $self->args,
        'p|organism_type=s'    => \$organism_type,
        'f|file_type=s'        => \$file_type,
        'o|output_directory=s' => \$output_directory,

lib/Bio/RetrieveAssemblies/AccessionFile.pm  view on Meta::CPAN

# ABSTRACT: For a given accession get the file of annotation or sequence


enum 'FileType', [qw(genbank fasta gff)];

has 'accession'        => ( is => 'ro', isa => 'Str',      required => 1 );
has 'output_directory' => ( is => 'ro', isa => 'Str',      default  => 'downloaded_files' );
has 'file_type'        => ( is => 'rw', isa => 'FileType', default  => 'genbank' );
has '_base_url'        => ( is => 'ro', isa => 'Str',      default  => 'http://www.ncbi.nlm.nih.gov/Traces/wgs/?download=' );
has '_converter_exec'  => ( is => 'ro', isa => 'Str',      default  => 'bp_genbank2gff3.pl' );
has 'url_to_file'      => ( is => 'ro', isa => 'ArrayRef', lazy     => 1, builder => '_build_url_to_file' );
has 'output_filename'  => ( is => 'ro', isa => 'Str',      lazy     => 1, builder => '_build_output_filename' );

sub _build_url_to_file {
    my ($self) = @_;

    my @url_to_file;
    if ( $self->file_type eq 'fasta' ) {
        push( @url_to_file, $self->_base_url . $self->accession . '.1.fsa_nt.gz' );
        push( @url_to_file, $self->output_directory . '/' . $self->accession . '.1.fsa_nt.gz' );
    }

lib/Bio/RetrieveAssemblies/RefWeak.pm  view on Meta::CPAN

$Bio::RetrieveAssemblies::RefWeak::VERSION = '1.1.5';
use Moose;
with('Bio::RetrieveAssemblies::RemoteSpreadsheetRole');

# ABSTRACT: Get the blacklist of accession numbers from refweak


has 'url' => ( is => 'ro', isa => 'Str', default => 'https://raw.githubusercontent.com/refweak/refweak/master/refweak.tsv' );
has 'accession_column_index'  => ( is => 'ro', isa => 'Int',     default => 0 );
has 'accession_column_header' => ( is => 'ro', isa => 'Str',     default => "accession" );
has 'accessions'              => ( is => 'ro', isa => 'HashRef', lazy    => 1, builder => '_build_accessions' );

__PACKAGE__->meta->make_immutable;
no Moose;
1;

__END__

=pod

=encoding UTF-8

lib/Bio/RetrieveAssemblies/RemoteSpreadsheetRole.pm  view on Meta::CPAN

use Log::Log4perl qw(:easy);
with('Bio::RetrieveAssemblies::LoggingRole');

# ABSTRACT: Role for downloading a spreadsheet


has 'url'                     => ( is => 'ro', isa => 'Str',       required => 1 );
has 'accession_column_index'  => ( is => 'ro', isa => 'Int',       default  => 0 );
has 'accession_column_header' => ( is => 'ro', isa => 'Str',       required => 1 );
has '_tsv_parser'             => ( is => 'ro', isa => 'Text::CSV', lazy     => 1, builder => '_build__tsv_parser' );
has '_tsv_content'            => ( is => 'ro', isa => 'ArrayRef',  lazy     => 1, builder => '_build__tsv_content' );
has '_output_file'            => ( is => 'ro', isa => 'Str',       default => '.spreadsheet_query');

sub _build__tsv_parser {
    my ($self) = @_;
    my $tsv_parser = Text::CSV->new( { binary => 1, sep_char => "\t" } )
      or Bio::RetrieveAssemblies::Exceptions::CSVParser->throw( error => "Cannot use CSV: " . Text::CSV->error_diag() );

    return $tsv_parser;
}

lib/Bio/RetrieveAssemblies/WGS.pm  view on Meta::CPAN

# ABSTRACT: Get all the WGS assemblies


has 'search_term'             => ( is => 'ro', isa => 'Str',      required => 1 );
has 'query'                   => ( is => 'ro', isa => 'Str',      default  => '*' );
has 'annotation'              => ( is => 'ro', isa => 'Bool',     default  => 0 );
has 'accession_column_index'  => ( is => 'ro', isa => 'Int',      default  => 0 );
has 'accession_column_header' => ( is => 'ro', isa => 'Str',      default  => "Prefix" );
has 'organism_type'           => ( is => 'ro', isa => 'Str',      default  => 'BCT' );
has 'organism_type_index'     => ( is => 'ro', isa => 'Int',      default  => 1 );
has 'columns_to_search'       => ( is => 'ro', isa => 'ArrayRef', default  => sub { [ 0, 2, 3, 4 ] } );
has 'url'                 => ( is => 'ro', isa => 'Str',     lazy => 1, builder => '_build_url' );
has 'accessions'          => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_accessions' );
has '_refweak_accessions' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__refweak_accessions' );


sub _build__refweak_accessions {
    my ($self) = @_;
	$self->logger->info("Downloading list of bad assembly accession numbers from RefWeak");
    return Bio::RetrieveAssemblies::RefWeak->new(logger => $self->logger, verbose => $self->verbose)->accessions();
}

sub _build_url {
    my ($self) = @_;



( run in 0.738 second using v1.01-cache-2.11-cpan-5f2e87ce722 )