Bio_AssemblyImprovement
view release on metacpan or search on metacpan
lib/Bio/AssemblyImprovement/Scaffold/SSpace/PreprocessInputFiles.pm view on Meta::CPAN
package Bio::AssemblyImprovement::Scaffold::SSpace::PreprocessInputFiles;
# ABSTRACT: Make sure the input files are in the correct format, and paths are resolved.
use Moose;
use Cwd 'abs_path';
use File::Basename;
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use Bio::AssemblyImprovement::Util::FastaTools;
with 'Bio::AssemblyImprovement::Scaffold::SSpace::TempDirectoryRole';
with 'Bio::AssemblyImprovement::Abacas::DelimiterRole';
with 'Bio::AssemblyImprovement::Util::UnzipFileIfNeededRole';
has 'input_assembly' => ( is => 'ro', isa => 'Str', required => 1 );
has 'input_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]');
has 'reference' => ( is => 'ro', isa => 'Maybe[Str]' );
has 'minimum_contig_size_in_assembly' => ( is => 'ro', isa => 'Int', default => 300 );
has 'minimum_perc_to_turn_off_filtering' => ( is => 'ro', isa => 'Int', default => 95 );
has 'processed_input_assembly' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_processed_input_assembly' );
has 'processed_input_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build_processed_input_files' );
has 'processed_reference' => ( is => 'ro', isa => 'Maybe[Str]', lazy => 1, builder => '_build_processed_reference' );
sub _build_processed_input_files {
my ($self) = @_;
my @processed_input_files;
return undef unless(defined($self->input_files));
for my $filename ( @{ $self->input_files } ) {
push( @processed_input_files, $self->_gunzip_file_if_needed( $filename, $self->_temp_directory) );
}
return \@processed_input_files;
}
sub _build_processed_input_assembly {
my ($self) = @_;
my $base_filename = fileparse( $self->input_assembly);
my $output_filename = join( '/', ( $self->_temp_directory, $base_filename.'.filtered' ) );
my $fasta_processor = Bio::AssemblyImprovement::Util::FastaTools->new(input_filename => $self->input_assembly, output_filename => $output_filename);
return $fasta_processor->remove_small_contigs($self->minimum_contig_size_in_assembly, $self->minimum_perc_to_turn_off_filtering)->output_filename;
}
sub _build_processed_reference {
my ($self) = @_;
return undef unless(defined($self->reference));
return $self->_gunzip_file_if_needed($self->reference, $self->_temp_directory);
}
no Moose;
__PACKAGE__->meta->make_immutable;
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
Bio::AssemblyImprovement::Scaffold::SSpace::PreprocessInputFiles - Make sure the input files are in the correct format, and paths are resolved.
=head1 VERSION
version 1.160490
=head1 SYNOPSIS
Make sure the input files are in the correct format, and paths are resolved. This object needs to be kept in scope
because it creates temp files which are cleaned up when it goes out of scope.
use Bio::AssemblyImprovement::Scaffold::SSpace::PreprocessInputFiles;
my $process_input_files = Bio::AssemblyImprovement::Scaffold::SSpace::PreprocessInputFiles->new(
input_files => ['abc_1.fastq.gz', 'abc_2.fastq'],
input_assembly => 'contigs.fa'
);
$process_input_files->processed_input_files;
( run in 1.495 second using v1.01-cache-2.11-cpan-437f7b0c052 )