App-Anchr
view release on metacpan or search on metacpan
lib/App/Anchr/Command/contained.pm view on Meta::CPAN
package App::Anchr::Command::contained;
use strict;
use warnings;
use autodie;
use App::Anchr - command;
use App::Anchr::Common;
use constant abstract => "discard contained super-reads, k-unitigs, or anchors";
sub opt_spec {
return (
[ "outfile|o=s", "output filename, [stdout] for screen", ],
[ "len|l=i", "minimal length of overlaps", { default => 500 }, ],
[ "idt|i=f", "minimal identity of overlaps", { default => 0.98 }, ],
[ "proportion=f", "nearly contained proportion", { default => 0.98 }, ],
[ "prefix=s", "prefix of names", { default => "infile" }, ],
[ "parallel|p=i", "number of threads", { default => 8 }, ],
[ "verbose|v", "verbose mode", ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "anchr contained [options] <infile> [more infiles]";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
$desc .= "\tAll operations are running in a tempdir and no intermediate files are kept.\n";
return $desc;
}
sub validate_args {
my ( $self, $opt, $args ) = @_;
if ( @{$args} < 1 ) {
my $message = "This command need one or more input files.\n\tIt found";
$message .= sprintf " [%s]", $_ for @{$args};
$message .= ".\n";
$self->usage_error($message);
}
for ( @{$args} ) {
if ( !Path::Tiny::path($_)->is_file ) {
$self->usage_error("The input file [$_] doesn't exist.");
}
}
if ( !exists $opt->{outfile} ) {
$opt->{outfile} = Path::Tiny::path( $args->[0] )->absolute . ".contained.fasta";
}
}
sub execute {
my ( $self, $opt, $args ) = @_;
# absolute pathes as we will chdir to tempdir later
my @infiles;
for my $infile ( @{$args} ) {
push @infiles, Path::Tiny::path($infile)->absolute->stringify;
}
if ( lc $opt->{outfile} ne "stdout" ) {
$opt->{outfile} = Path::Tiny::path( $opt->{outfile} )->absolute->stringify;
}
# record cwd, we'll return there
my $cwd = Path::Tiny->cwd;
my $tempdir = Path::Tiny->tempdir("anchr_contained_XXXXXXXX");
chdir $tempdir;
my $basename = $tempdir->basename();
$basename =~ s/\W+/_/g;
{ # filter short contigs then rename reads as there're duplicated names
for my $i ( 0 .. $#infiles ) {
my $cmd;
( run in 1.982 second using v1.01-cache-2.11-cpan-39bf76dae61 )