Bio-MLST-Check
view release on metacpan or search on metacpan
lib/Bio/MLST/CheckMultipleSpecies.pm view on Meta::CPAN
package Bio::MLST::CheckMultipleSpecies;
$Bio::MLST::CheckMultipleSpecies::VERSION = '2.1.1706216';
# ABSTRACT: High throughput multilocus sequence typing (MLST) checking against several MLST databases.
use Moose;
use Bio::MLST::Check;
use Bio::MLST::Databases;
use Parallel::ForkManager;
use File::Temp;
use Cwd;
use Text::CSV;
has 'species' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); # empty array searches against all databases
has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'parallel_processes' => ( is => 'ro', isa => 'Int', default => 1 ); # max parallel processes
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); # output search progress and number of matches
has 'report_all_mlst_db' => ( is => 'rw', isa => 'Bool', default => 0 ); # report all mlst databases searched
has 'report_lowest_st' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'raw_input_fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'makeblastdb_exec' => ( is => 'ro', isa => 'Str', required => 1 );
has 'blastn_exec' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'spreadsheet_basename' => ( is => 'ro', isa => 'Str', default => 'mlst_results' );
has 'output_fasta_files' => ( is => 'ro', isa => 'Bool', default => 0 ); # output of fasta not supported
has 'output_phylip_files' => ( is => 'ro', isa => 'Bool', default => 0 ); # output of phylip not supported
has 'show_contamination_instead_of_alt_matches' => ( is => 'ro', isa => 'Bool', default => 1 );
has '_species_list' => ( is => 'ro', isa => 'ArrayRef', lazy_build => 1 );
has '_working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir(DIR => getcwd, CLEANUP => 1); });
sub _build__species_list
{
my($self) = @_;
my @species_list = @{$self->species};
# if no species supplied then run vs all species
unless(@species_list)
{
my $mlst_databases = Bio::MLST::Databases->new(
base_directory => $self->base_directory,
);
@species_list = @{$mlst_databases->database_names};
}
@species_list = sort { $a cmp $b } @species_list;
return \@species_list;
}
sub _check_input_files_exist
{
my($self) = @_;
my $check = Bio::MLST::Check->new( raw_input_fasta_files => $self->raw_input_fasta_files,
species => '',
base_directory => '',
makeblastdb_exec => '',
blastn_exec => '',
output_directory => '' );
return $check->input_fasta_files_exist;
}
# print error message if phylip or fasta files requested
sub _check_fasta_phylip_options
{
my($self) = @_;
return 1 unless ($self->output_fasta_files || $self->output_phylip_files);
print qq[
The --output_fasta_files and --output_phylip_files options cannot be used when searching
against more than one MLST database as the alleles searched will differ between species.
To output fasta and phylip files, please search against a single MLST database.\n\n];
return 0;
}
sub _run_mlst_for_species_list
{
my ($self) = @_;
# set parallel processes - if more species than processes then search input files in parallel.
my $parallel_process_total = $self->parallel_processes;
my $parallel_process_species = @{$self->_species_list} < $self->parallel_processes ? @{$self->_species_list} : $self->parallel_processes;
my $parallel_process_fa_file = int($self->parallel_processes/@{$self->_species_list}) ? int($self->parallel_processes/@{$self->_species_list}) : 1;
# Run for each species - output to csv files named 0001,0002,etc.
( run in 2.254 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )