Bio-RetrieveAssemblies
view release on metacpan or search on metacpan
lib/Bio/RetrieveAssemblies/AccessionFile.pm view on Meta::CPAN
package Bio::RetrieveAssemblies::AccessionFile;
$Bio::RetrieveAssemblies::AccessionFile::VERSION = '1.1.5';
use Moose;
use Bio::RetrieveAssemblies::Exceptions;
use File::Path qw(make_path);
use File::Basename;
use File::Copy;
use Data::Validate::URI qw(is_uri);
use Bio::SeqIO; # force dependancy on Bio::Perl so that you get bp_genbank2gff3.pl
use Moose::Util::TypeConstraints;
with('Bio::RetrieveAssemblies::LoggingRole');
# ABSTRACT: For a given accession get the file of annotation or sequence
enum 'FileType', [qw(genbank fasta gff)];
has 'accession' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_directory' => ( is => 'ro', isa => 'Str', default => 'downloaded_files' );
has 'file_type' => ( is => 'rw', isa => 'FileType', default => 'genbank' );
has '_base_url' => ( is => 'ro', isa => 'Str', default => 'http://www.ncbi.nlm.nih.gov/Traces/wgs/?download=' );
has '_converter_exec' => ( is => 'ro', isa => 'Str', default => 'bp_genbank2gff3.pl' );
has 'url_to_file' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_url_to_file' );
has 'output_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_output_filename' );
sub _build_url_to_file {
my ($self) = @_;
my @url_to_file;
if ( $self->file_type eq 'fasta' ) {
push( @url_to_file, $self->_base_url . $self->accession . '.1.fsa_nt.gz' );
push( @url_to_file, $self->output_directory . '/' . $self->accession . '.1.fsa_nt.gz' );
}
else {
push( @url_to_file, $self->_base_url . $self->accession . '.1.gbff.gz' );
push( @url_to_file, $self->output_directory . '/' . $self->accession . '.1.gbff.gz' );
}
return \@url_to_file;
}
sub _build_output_filename {
my ($self) = @_;
my $output_filename = $self->url_to_file->[1];
if ( $self->file_type eq "gff" ) {
$output_filename .= '.gff';
}
return $output_filename;
}
sub download_file {
my ($self) = @_;
make_path( $self->output_directory );
if ( is_uri( $self->url_to_file->[0] ) ) {
my $quiet_str = "-q";
if($self->verbose)
{
$quiet_str = "";
}
my $cmd = "wget $quiet_str -O ".$self->url_to_file->[1]." '".$self->url_to_file->[0] ."'";
$self->logger->info("Downloading accession: ".$self->accession);
$self->logger->info("Download cmd: ".$cmd);
system($cmd ) ;
#or Bio::RetrieveAssemblies::Exceptions::CouldntDownload->throw( error => "Unable to get remote page ".$self->url_to_file->[0] )
}
else {
copy( $self->url_to_file->[0], $self->url_to_file->[1] )
or Bio::RetrieveAssemblies::Exceptions::FileCopyFailed->throw( error => "Copy failed: $!" );
}
if ( $self->file_type eq "gff" ) {
$self->_convert_gb_to_gff();
}
return 1;
}
sub _convert_gb_to_gff_cmd {
my ($self) = @_;
my $quiet_str = "--quiet";
if($self->verbose)
{
$quiet_str = "";
( run in 0.768 second using v1.01-cache-2.11-cpan-f56aa216473 )