App-Dazz

 view release on metacpan or  search on metacpan

lib/App/Dazz/Command/contained.pm  view on Meta::CPAN

package App::Dazz::Command::contained;
use strict;
use warnings;
use autodie;

use App::Dazz - command;
use App::Dazz::Common;

use constant abstract => "discard contained unitigs";

sub opt_spec {
    return (
        [ "outfile|o=s", "output filename, [stdout] for screen", ],
        [ "len|l=i",      "minimal length of overlaps",   { default => 500 }, ],
        [ "idt|i=f",      "minimal identity of overlaps", { default => 0.98 }, ],
        [ "proportion=f", "nearly contained proportion",  { default => 0.98 }, ],
        [ "prefix=s",     "prefix of names",              { default => "infile" }, ],
        [ "tmp=s",        "user defined tempdir", ],
        [ "parallel|p=i", "number of threads",            { default => 8 }, ],
        [ "verbose|v",    "verbose mode", ],
        { show_defaults => 1, }
    );
}

sub usage_desc {
    return "dazz contained [options] <infile> [more infiles]";
}

sub description {
    my $desc;
    $desc .= ucfirst(abstract) . ".\n";
    $desc .= "\tAll operations are running in a tempdir and no intermediate files are kept.\n";
    return $desc;
}

sub validate_args {
    my ( $self, $opt, $args ) = @_;

    if ( @{$args} < 1 ) {
        my $message = "This command need one or more input files.\n\tIt found";
        $message .= sprintf " [%s]", $_ for @{$args};
        $message .= ".\n";
        $self->usage_error($message);
    }
    for ( @{$args} ) {
        if ( !Path::Tiny::path($_)->is_file ) {
            $self->usage_error("The input file [$_] doesn't exist.");
        }
    }

    if ( !exists $opt->{outfile} ) {
        $opt->{outfile} = Path::Tiny::path( $args->[0] )->absolute . ".contained.fasta";
    }

}

sub execute {
    my ( $self, $opt, $args ) = @_;

    # absolute pathes as we will chdir to tempdir later
    my @infiles;
    for my $infile ( @{$args} ) {
        push @infiles, Path::Tiny::path($infile)->absolute->stringify;
    }

    if ( lc $opt->{outfile} ne "stdout" ) {
        $opt->{outfile} = Path::Tiny::path( $opt->{outfile} )->absolute->stringify;
    }

    # record cwd, we'll return there
    my $cwd = Path::Tiny->cwd;
    my $tempdir;
    if ( $opt->{tmp} ) {
        $tempdir = Path::Tiny->tempdir(
            TEMPLATE => "dazz_contained_XXXXXXXX",
            DIR      => $opt->{tmp},
        );
    }
    else {
        $tempdir = Path::Tiny->tempdir("dazz_contained_XXXXXXXX");
    }
    chdir $tempdir;

    my $basename = $tempdir->basename();
    $basename =~ s/\W+/_/g;

    {    # filter short contigs then rename reads as there're duplicated names
        for my $i ( 0 .. $#infiles ) {
            my $cmd;
            $cmd .= "faops filter -a $opt->{len} -l 0 $infiles[$i] stdout";
            $cmd .= " | faops dazz -p $opt->{prefix}_$i stdin infile.$i.fasta";

            App::Dazz::Common::exec_cmd( $cmd, { verbose => $opt->{verbose}, }, );
        }
    }



( run in 1.525 second using v1.01-cache-2.11-cpan-140bd7fdf52 )