App-Egaz
view release on metacpan or search on metacpan
lib/App/Egaz/Command/partition.pm view on Meta::CPAN
package App::Egaz::Command::partition;
use strict;
use warnings;
use autodie;
use App::Egaz -command;
use App::Egaz::Common;
sub abstract {
return 'partitions fasta files by size';
}
sub opt_spec {
return (
[ 'chunk=i', 'chunk size', { default => 10_010_000 }, ],
[ 'overlap=i', 'overlap size', { default => 10_000 }, ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz partition [options] <infile> [more files]";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
$desc .= <<MARKDOWN;
* Start coordinates of output is 1-based
* <infile> can't be stdin
* There is no --output option, outputs must be in the same directory of inputs
* Each fasta files should contain only one sequence. `faops split-name` can be use to do this.
MARKDOWN
return $desc;
}
sub validate_args {
my ( $self, $opt, $args ) = @_;
if ( @{$args} < 1 ) {
my $message = "This command need one or more input files.\n\tIt found";
$message .= sprintf " [%s]", $_ for @{$args};
$message .= ".\n";
$self->usage_error($message);
}
for ( @{$args} ) {
if ( !Path::Tiny::path($_)->is_file ) {
$self->usage_error("The input file [$_] doesn't exist.");
}
}
}
sub execute {
my ( $self, $opt, $args ) = @_;
for my $infile ( @{$args} ) {
my $seq_of = App::Fasops::Common::read_fasta($infile);
my @seq_names = keys %{$seq_of};
Carp::croak "More than one sequence in [$infile]\n" if @seq_names > 1;
my $seq_size = length $seq_of->{ $seq_names[0] };
if ( $seq_size > $opt->{chunk} + $opt->{overlap} ) {
# break it up
my $ranges
= App::Egaz::Common::overlap_ranges( 1, $seq_size, $opt->{chunk}, $opt->{overlap} );
for my $i ( @{$ranges} ) {
my ( $start, $end ) = @{$i};
Path::Tiny::path( $infile . "[$start,$end]" )->touch;
}
}
else {
Path::Tiny::path( $infile . "[1,$seq_size]" )->touch;
}
}
}
1;
( run in 1.055 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )