Bio-MLST-Check

 view release on metacpan or  search on metacpan

lib/Bio/MLST/Check.pm  view on Meta::CPAN

package Bio::MLST::Check;
# ABSTRACT: Multilocus sequence type checking using blast
$Bio::MLST::Check::VERSION = '2.1.1706216';

use Moose;
use Parallel::ForkManager;
use Bio::MLST::ProcessFasta;
use Bio::MLST::Spreadsheet::File;
use Bio::MLST::NormaliseFasta;
use Bio::AlignIO;
use Bio::SimpleAlign;
use File::Temp;
use Cwd;

has 'species'               => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'base_directory'        => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'raw_input_fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); 
has 'makeblastdb_exec'      => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'blastn_exec'           => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'output_directory'      => ( is => 'ro', isa => 'Str',      required => 1 ); 
has 'output_fasta_files'    => ( is => 'ro', isa => 'Bool',     default  => 0 ); 
has 'spreadsheet_basename'  => ( is => 'ro', isa => 'Str',      default  => 'mlst_results' ); 
has 'output_phylip_files'   => ( is => 'ro', isa => 'Bool',     default  => 0 ); 
has 'show_contamination_instead_of_alt_matches' => ( is => 'ro', isa => 'Bool',   default => 1 ); 
has 'report_lowest_st'      => ( is => 'ro', isa => 'Bool', default => 0 );

has 'parallel_processes'    => ( is => 'ro', isa => 'Int',      default  => 1 ); 

has '_spreadsheet_header'              => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} ); 
has '_spreadsheet_allele_numbers_rows' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} ); 
has '_spreadsheet_genomic_rows'        => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} ); 
has '_input_fasta_files'    => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__input_fasta_files'); 

has '_concat_names'      => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} ); 
has '_concat_sequences' => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} ); 
has '_working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir(DIR => getcwd, CLEANUP => 1); });

sub _generate_spreadsheet_rows
{
  my($self) = @_;

  my $pm = new Parallel::ForkManager($self->parallel_processes); 
  $pm -> run_on_finish (
    sub {
      my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $data_structure_reference) = @_;
      # retrieve data structure from child
      if (defined($data_structure_reference)) {  # children are not forced to send anything
        my ($header_row, $allele_numbers_row, $genomic_row, $concat_name, $concat_sequence) = @{$data_structure_reference};
        push(@{$self->_spreadsheet_header}, $header_row);
        push(@{$self->_spreadsheet_allele_numbers_rows}, $allele_numbers_row);
        push(@{$self->_spreadsheet_genomic_rows}, $genomic_row);
        
        push(@{$self->_concat_names}, $concat_name);
        push(@{$self->_concat_sequences}, $concat_sequence);

      } else {  # problems occuring during storage or retrieval will throw a warning
        print qq|No message received from child process $pid!\n|;
    }
    }
  );
  
  for my $fastafile (@{$self->_input_fasta_files})
  {
    $pm->start and next; # do the fork
    
    my $output_fasta_obj = Bio::MLST::NormaliseFasta->new(
      fasta_filename     => $fastafile,
      working_directory  => $self->_working_directory->dirname()
    );
    
    my $fasta_sequence_type_results = Bio::MLST::ProcessFasta->new(
      species            => $self->species,
      base_directory     => $self->base_directory,
      fasta_file         => $output_fasta_obj->processed_fasta_filename(),
      makeblastdb_exec   => $self->makeblastdb_exec,
      blastn_exec        => $self->blastn_exec,
      output_directory   => $self->output_directory,
      output_fasta_files => $self->output_fasta_files,
      show_contamination_instead_of_alt_matches => $self->show_contamination_instead_of_alt_matches,
      report_lowest_st   => $self->report_lowest_st
    );
    my @result_rows;
    push(@result_rows, ($fasta_sequence_type_results->_spreadsheet_row_obj->header_row,
                        $fasta_sequence_type_results->_spreadsheet_row_obj->allele_numbers_row,
                        $fasta_sequence_type_results->_spreadsheet_row_obj->genomic_row,
                        $fasta_sequence_type_results->concat_name,
                        $fasta_sequence_type_results->concat_sequence));
     
    $pm->finish(0,\@result_rows); # do the exit in the child process
  }
  $pm->wait_all_children;
  1;



( run in 0.698 second using v1.01-cache-2.11-cpan-39bf76dae61 )