Bio_AssemblyImprovement

 view release on metacpan or  search on metacpan

lib/Bio/AssemblyImprovement/Scaffold/SSpace/PreprocessInputFiles.pm  view on Meta::CPAN

package Bio::AssemblyImprovement::Scaffold::SSpace::PreprocessInputFiles;
# ABSTRACT: Make sure the input files are in the correct format, and paths are resolved.


use Moose;
use Cwd 'abs_path';
use File::Basename;
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use Bio::AssemblyImprovement::Util::FastaTools;

with 'Bio::AssemblyImprovement::Scaffold::SSpace::TempDirectoryRole';
with 'Bio::AssemblyImprovement::Abacas::DelimiterRole';
with 'Bio::AssemblyImprovement::Util::UnzipFileIfNeededRole';

has 'input_assembly' => ( is => 'ro', isa => 'Str',      required => 1 );
has 'input_files'    => ( is => 'ro', isa => 'Maybe[ArrayRef]');
has 'reference'      => ( is => 'ro', isa => 'Maybe[Str]' );

has 'minimum_contig_size_in_assembly'  => ( is => 'ro', isa => 'Int', default => 300 );
has 'minimum_perc_to_turn_off_filtering'  => ( is => 'ro', isa => 'Int', default => 95 );

has 'processed_input_assembly' => ( is => 'ro', isa => 'Str',        lazy => 1, builder => '_build_processed_input_assembly' );
has 'processed_input_files'    => ( is => 'ro', isa => 'Maybe[ArrayRef]',   lazy => 1, builder => '_build_processed_input_files' );
has 'processed_reference'      => ( is => 'ro', isa => 'Maybe[Str]', lazy => 1, builder => '_build_processed_reference' );



sub _build_processed_input_files {
    my ($self) = @_;
    my @processed_input_files;
    return undef unless(defined($self->input_files));

    for my $filename ( @{ $self->input_files } ) {
        push( @processed_input_files,  $self->_gunzip_file_if_needed( $filename, $self->_temp_directory) );
    }
    return \@processed_input_files;
}

sub _build_processed_input_assembly {
    my ($self) = @_;
    my $base_filename = fileparse( $self->input_assembly);
    my $output_filename = join( '/', ( $self->_temp_directory, $base_filename.'.filtered' ) );
    my $fasta_processor = Bio::AssemblyImprovement::Util::FastaTools->new(input_filename => $self->input_assembly, output_filename => $output_filename);
    return $fasta_processor->remove_small_contigs($self->minimum_contig_size_in_assembly, $self->minimum_perc_to_turn_off_filtering)->output_filename;
}

sub _build_processed_reference {
    my ($self) = @_;
    return undef unless(defined($self->reference));
    return $self->_gunzip_file_if_needed($self->reference, $self->_temp_directory);
}

no Moose;
__PACKAGE__->meta->make_immutable;
1;

__END__

=pod

=encoding UTF-8

=head1 NAME

Bio::AssemblyImprovement::Scaffold::SSpace::PreprocessInputFiles - Make sure the input files are in the correct format, and paths are resolved.

=head1 VERSION

version 1.160490

=head1 SYNOPSIS

Make sure the input files are in the correct format, and paths are resolved. This object needs to be kept in scope
because it creates temp files which are cleaned up when it goes out of scope.

   use Bio::AssemblyImprovement::Scaffold::SSpace::PreprocessInputFiles;

   my $process_input_files = Bio::AssemblyImprovement::Scaffold::SSpace::PreprocessInputFiles->new(
     input_files => ['abc_1.fastq.gz', 'abc_2.fastq'],
     input_assembly => 'contigs.fa'
   );

   $process_input_files->processed_input_files;



( run in 1.495 second using v1.01-cache-2.11-cpan-437f7b0c052 )