Bio-MLST-Check
view release on metacpan or search on metacpan
lib/Bio/MLST/Check.pm view on Meta::CPAN
package Bio::MLST::Check;
# ABSTRACT: Multilocus sequence type checking using blast
$Bio::MLST::Check::VERSION = '2.1.1706216';
use Moose;
use Parallel::ForkManager;
use Bio::MLST::ProcessFasta;
use Bio::MLST::Spreadsheet::File;
use Bio::MLST::NormaliseFasta;
use Bio::AlignIO;
use Bio::SimpleAlign;
use File::Temp;
use Cwd;
has 'species' => ( is => 'ro', isa => 'Str', required => 1 );
has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'raw_input_fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'makeblastdb_exec' => ( is => 'ro', isa => 'Str', required => 1 );
has 'blastn_exec' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_fasta_files' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'spreadsheet_basename' => ( is => 'ro', isa => 'Str', default => 'mlst_results' );
has 'output_phylip_files' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'show_contamination_instead_of_alt_matches' => ( is => 'ro', isa => 'Bool', default => 1 );
has 'report_lowest_st' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'parallel_processes' => ( is => 'ro', isa => 'Int', default => 1 );
has '_spreadsheet_header' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} );
has '_spreadsheet_allele_numbers_rows' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} );
has '_spreadsheet_genomic_rows' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} );
has '_input_fasta_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__input_fasta_files');
has '_concat_names' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} );
has '_concat_sequences' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} );
has '_working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir(DIR => getcwd, CLEANUP => 1); });
sub _generate_spreadsheet_rows
{
my($self) = @_;
my $pm = new Parallel::ForkManager($self->parallel_processes);
$pm -> run_on_finish (
sub {
my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $data_structure_reference) = @_;
# retrieve data structure from child
if (defined($data_structure_reference)) { # children are not forced to send anything
my ($header_row, $allele_numbers_row, $genomic_row, $concat_name, $concat_sequence) = @{$data_structure_reference};
push(@{$self->_spreadsheet_header}, $header_row);
push(@{$self->_spreadsheet_allele_numbers_rows}, $allele_numbers_row);
push(@{$self->_spreadsheet_genomic_rows}, $genomic_row);
push(@{$self->_concat_names}, $concat_name);
push(@{$self->_concat_sequences}, $concat_sequence);
} else { # problems occuring during storage or retrieval will throw a warning
print qq|No message received from child process $pid!\n|;
}
}
);
for my $fastafile (@{$self->_input_fasta_files})
{
$pm->start and next; # do the fork
my $output_fasta_obj = Bio::MLST::NormaliseFasta->new(
fasta_filename => $fastafile,
working_directory => $self->_working_directory->dirname()
);
my $fasta_sequence_type_results = Bio::MLST::ProcessFasta->new(
species => $self->species,
base_directory => $self->base_directory,
fasta_file => $output_fasta_obj->processed_fasta_filename(),
makeblastdb_exec => $self->makeblastdb_exec,
blastn_exec => $self->blastn_exec,
output_directory => $self->output_directory,
output_fasta_files => $self->output_fasta_files,
show_contamination_instead_of_alt_matches => $self->show_contamination_instead_of_alt_matches,
report_lowest_st => $self->report_lowest_st
);
my @result_rows;
push(@result_rows, ($fasta_sequence_type_results->_spreadsheet_row_obj->header_row,
$fasta_sequence_type_results->_spreadsheet_row_obj->allele_numbers_row,
$fasta_sequence_type_results->_spreadsheet_row_obj->genomic_row,
$fasta_sequence_type_results->concat_name,
$fasta_sequence_type_results->concat_sequence));
$pm->finish(0,\@result_rows); # do the exit in the child process
}
$pm->wait_all_children;
1;
( run in 0.698 second using v1.01-cache-2.11-cpan-39bf76dae61 )