Bio-RetrieveAssemblies
view release on metacpan or search on metacpan
lib/Bio/RetrieveAssemblies/RemoteSpreadsheetRole.pm view on Meta::CPAN
package Bio::RetrieveAssemblies::RemoteSpreadsheetRole;
$Bio::RetrieveAssemblies::RemoteSpreadsheetRole::VERSION = '1.1.5';
use Moose::Role;
use Text::CSV;
use Data::Validate::URI qw(is_uri);
use File::Slurp::Tiny qw(read_file write_file);
use Bio::RetrieveAssemblies::Exceptions;
use Log::Log4perl qw(:easy);
with('Bio::RetrieveAssemblies::LoggingRole');
# ABSTRACT: Role for downloading a spreadsheet
has 'url' => ( is => 'ro', isa => 'Str', required => 1 );
has 'accession_column_index' => ( is => 'ro', isa => 'Int', default => 0 );
has 'accession_column_header' => ( is => 'ro', isa => 'Str', required => 1 );
has '_tsv_parser' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__tsv_parser' );
has '_tsv_content' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__tsv_content' );
has '_output_file' => ( is => 'ro', isa => 'Str', default => '.spreadsheet_query');
sub _build__tsv_parser {
my ($self) = @_;
my $tsv_parser = Text::CSV->new( { binary => 1, sep_char => "\t" } )
or Bio::RetrieveAssemblies::Exceptions::CSVParser->throw( error => "Cannot use CSV: " . Text::CSV->error_diag() );
return $tsv_parser;
}
sub _build__tsv_content {
my ($self) = @_;
my $tsv_content = "";
# If its not a url, then try opening it as a file
if ( is_uri( $self->url ) ) {
$self->logger->info("Downloading url: ".$self->url);
my $quiet_str = "-q";
if($self->verbose)
{
$quiet_str = "";
}
my $cmd = "wget $quiet_str -O ".$self->_output_file." '".$self->url."'";
$self->logger->info("Downloading cmd: ".$cmd);
system($cmd);
$tsv_content = read_file( $self->_output_file );
unlink($self->_output_file);
}
else {
$tsv_content = read_file( $self->url );
}
$tsv_content =~ s/[\r\n]+/\n/;
my @lines = split( /\n/, $tsv_content );
return \@lines;
}
sub _build_accessions {
my ($self) = @_;
my %accessions;
$self->logger->info("Parsing downloaded file");
for my $line ( @{ $self->_tsv_content } ) {
$self->_tsv_parser->parse($line);
my @columns = $self->_tsv_parser->fields();
next if ( $columns[ $self->accession_column_index ] eq $self->accession_column_header );
next if ( $columns[0] eq '' || $columns[0] =~ /^#/ );
if ( $self->_filter_out_line( \@columns ) )
{
next ;
}
$accessions{ $columns[ $self->accession_column_index ] } = 1;
}
return \%accessions;
}
( run in 1.025 second using v1.01-cache-2.11-cpan-f56aa216473 )