Bio_AssemblyImprovement

 view release on metacpan or  search on metacpan

lib/Bio/AssemblyImprovement/Assemble/SGA/Main.pm  view on Meta::CPAN

package Bio::AssemblyImprovement::Assemble::SGA::Main;
# ABSTRACT: Run SGA preprocess and error correction steps


use Moose;
use Cwd;
use Cwd 'abs_path';
use File::Copy;
use File::Basename;

use Bio::AssemblyImprovement::Assemble::SGA::PreprocessReads;
use Bio::AssemblyImprovement::Assemble::SGA::IndexAndCorrectReads;

with 'Bio::AssemblyImprovement::Scaffold::SSpace::TempDirectoryRole';
with 'Bio::AssemblyImprovement::Util::ZipFileRole';

has 'input_files'       => ( is => 'ro', isa => 'ArrayRef' , required => 1);

# Parameters for preprocessing
has 'min_length'	   => ( is => 'ro', isa => 'Num', default => 51);
has 'quality_trim'	   => ( is => 'ro', isa => 'Num', default => 3);
has 'pe_mode'		   => ( is => 'ro', isa => 'Num', default => 2); #We set default to 2 as the pipeline will almost always send in an interleaved fastq file

# Parameters for indexing and correction
has 'algorithm'	        => ( is => 'ro', isa => 'Str',   default => 'ropebwt'); # BWT construction algorithm: sais or ropebwt
has 'threads'	        => ( is => 'ro', isa => 'Num',   default => 1); # Use this many threads for computation
has 'kmer_threshold'	=> ( is => 'ro', isa => 'Num',   default=> 5); # Attempt to correct kmers that are seen less than this many times
has 'kmer_length'	    => ( is => 'ro', isa => 'Num',   default=> 41); 
has 'output_filename'   => ( is => 'rw', isa => 'Str',  default  => '_sga_error_corrected.fastq.gz' ); #We always zip the results 
has 'output_directory'  => ( is => 'rw', isa => 'Str', lazy => 1, builder => '_build_output_directory' ); # Default to cwd
has 'sga_exec'          => ( is => 'rw', isa => 'Str',   required => 1 );
has 'debug'             => ( is => 'ro', isa => 'Bool',  default => 0);

sub _build_output_directory{
  my ($self) = @_;
  return getcwd();
}

sub _final_results_file {
	my ($self) = @_;
	return $self->output_directory.'/'.$self->output_filename;
}

# Intermediate preprocessed file
sub _intermediate_file {
	my ($self) = @_;
	return $self->_temp_directory.'/_sga_preprocessed.fastq';
}

sub run {
    my ($self) = @_;
    my $original_cwd = getcwd();
    $self->output_directory; # Essentially setting output directory to cwd
    
    # Do all the intermediate steps in a temporary directory (which will be cleaned up when object out of scope)
    chdir( $self->_temp_directory ); # Default to temporary directory if alternative not provided
    
    my $stdout_of_program = '';
    $stdout_of_program =  "> /dev/null 2>&1"  if($self->debug == 0);
  
    # SGA preprocess
    my $sga_preprocessor     = Bio::AssemblyImprovement::Assemble::SGA::PreprocessReads->new(
            input_files      => $self->input_files,
            pe_mode			 => $self->pe_mode,
            min_length	     => $self->min_length,
            pe_mode	  		 => $self->pe_mode,
            quality_trim	 => $self->quality_trim,
            output_directory => $self->output_directory,
            sga_exec         => $self->sga_exec,
            debug			 => $self->debug,
    );
    
	$sga_preprocessor->run();
	
	# Move preprocessed file to this temporary directory
	move ( $sga_preprocessor->_output_filename(), $self->_intermediate_file  );
	



( run in 0.841 second using v1.01-cache-2.11-cpan-437f7b0c052 )