App-Egaz

 view release on metacpan or  search on metacpan

lib/App/Egaz/Command/blastn.pm  view on Meta::CPAN

package App::Egaz::Command::blastn;
use strict;
use warnings;
use autodie;

use App::Egaz -command;
use App::Egaz::Common;

sub abstract {
    return 'blastn wrapper between two fasta files';
}

sub opt_spec {
    return (
        [ "outfile|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
        [ "evalue=f",    "expectation value (E) threshold",      { default => 0.01 }, ],
        [ "wordsize=i",  "length of best perfect match",         { default => 40 }, ],
        [   "outfmt=s", "out format",
            { default => "7 qseqid sseqid qstart qend sstart send qlen slen nident" },
        ],
        [ "tmp=s", "user defined tempdir", ],
        [ "parallel|p=i", "number of threads", { default => 2 }, ],
        [ "verbose|v",    "verbose mode", ],
        { show_defaults => 1, }
    );
}

sub usage_desc {
    return "egaz blastn [options] <infile> <genome.fa>";
}

sub description {
    my $desc;
    $desc .= ucfirst(abstract) . ".\n";
    $desc .= <<'MARKDOWN';

* <infile> should be a blocked fasta file (*.fas)
* <genome.fa> is a multi-sequence fasta file contains genome sequences
* `makeblastdb` and `blastn` should be in $PATH

MARKDOWN

    return $desc;
}

sub validate_args {
    my ( $self, $opt, $args ) = @_;

    if ( @{$args} != 2 ) {
        my $message = "This command need two input files.\n\tIt found";
        $message .= sprintf " [%s]", $_ for @{$args};
        $message .= ".\n";
        $self->usage_error($message);
    }
    for ( @{$args} ) {
        if ( !Path::Tiny::path($_)->is_file ) {
            $self->usage_error("The input file [$_] doesn't exist.");
        }
    }

}

sub execute {
    my ( $self, $opt, $args ) = @_;

    # absolute pathes as we will chdir to tempdir later
    my @infiles;
    for my $infile ( @{$args} ) {
        push @infiles, Path::Tiny::path($infile)->absolute->stringify;
    }

    if ( lc $opt->{outfile} ne "stdout" ) {
        $opt->{outfile} = Path::Tiny::path( $opt->{outfile} )->absolute->stringify;
    }

    # record cwd, we'll return there
    my $cwd = Path::Tiny->cwd;
    my $tempdir;
    if ( $opt->{tmp} ) {
        $tempdir = Path::Tiny->tempdir(
            TEMPLATE => "blastn_XXXXXXXX",
            DIR      => $opt->{tmp},
        );
    }
    else {
        $tempdir = Path::Tiny->tempdir("blastn_XXXXXXXX");
    }
    chdir $tempdir;

    my $basename = $tempdir->basename();
    $basename =~ s/\W+/_/g;

    {    # makeblastdb
        my $cmd = "makeblastdb -dbtype nucl -in $infiles[1] -out $basename -logfile $basename.log";
        App::Egaz::Common::exec_cmd( $cmd, { verbose => $opt->{verbose}, } );
        if ( !$tempdir->child("$basename.nsq")->is_file ) {
            Carp::croak "Failed: makeblastdb\n";
        }
    }

    {    # blastn
        my $cmd
            = sprintf "blastn -task megablast"
            . " -max_target_seqs 20 -culling_limit 20"    # reduce size of reports
            . " -dust no -soft_masking false"             # disable dust and soft masking
            . " -evalue $opt->{evalue} -word_size $opt->{wordsize}"
            . " -outfmt '$opt->{outfmt}'"
            . " -num_threads $opt->{parallel} -db $basename -query $infiles[0]"
            . " -out $basename.blast";

        my $blastn_usage = `blastn -h`;
        if ( $blastn_usage =~ /\-max_hsps int/ ) {
            $cmd .= " -max_hsps 10";                      # Nucleotide-Nucleotide BLAST 2.6.0+
        }
        elsif ( $blastn_usage =~ /\-max_hsps_per_subject int/ ) {
            $cmd .= " -max_hsps_per_subject 10";          # Nucleotide-Nucleotide BLAST 2.2.28+
        }

        App::Egaz::Common::exec_cmd( $cmd, { verbose => $opt->{verbose}, } );
        if ( !$tempdir->child("$basename.blast")->is_file ) {
            Carp::croak "Failed: blastn\n";
        }
    }

    {                                                     # outputs
        if ( lc $opt->{outfile} ne "stdout" ) {
            Path::Tiny::path("$basename.blast")->copy( $opt->{outfile} );
        }
        else {
            print Path::Tiny::path("$basename.blast")->slurp;
        }
    }

    chdir $cwd;
}

1;



( run in 1.642 second using v1.01-cache-2.11-cpan-d8267643d1d )