App-Egaz

 view release on metacpan or  search on metacpan

lib/App/Egaz/Command/lpcnam.pm  view on Meta::CPAN

use App::Egaz -command;
use App::Egaz::Common;

sub abstract {
    return 'the pipeline of pairwise lav-psl-chain-net-axt-maf';
}

sub opt_spec {
    return (
        [ "outdir|o=s", "Output directory", ],
        [ "lineargap=s",  "axtChain linearGap, loose or medium", { default => "loose" }, ],
        [ "minscore=i",   "minimum score for axtChain",          { default => 1000 }, ],
        [ "tname|t=s",    "target name", ],
        [ "qname|q=s",    "query name", ],
        [ "syn",          "create .synNet.maf instead of .net.maf", ],
        [ "parallel|p=i", "number of threads",                   { default => 2 }, ],
        [ "verbose|v",    "verbose mode", ],
        { show_defaults => 1, }
    );
}

sub usage_desc {
    return "egaz lpcnam [options] <path/target> <path/query> <path/lav>";
}

sub description {
    my $desc;
    $desc .= ucfirst(abstract) . ".\n";
    $desc .= <<'MARKDOWN';

* <path/target> and <path/query> are directories containing .fa, chr.sizes and chr.2bit files
* <path/lav> can be a .lav file, lav.tar.gz or a directory containing .lav files
* Many binaries from kent-tools are needed and should be found in $PATH:
    * axtChain
    * chainAntiRepeat
    * chainMergeSort
    * chainPreNet
    * chainNet
    * netSyntenic
    * netChainSubset
    * chainStitchId
    * netSplit
    * netToAxt
    * axtSort
    * axtToMaf
    * netFilter
    * chainSplit
* [Prebuild binaries](http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/)
* `--lineargap` and `--minscore`:
    * Human18vsChimp2 use loose and 1000
    * Human19vsChimp3 use medium and 5000
    * loose is chicken/human linear gap costs
    * medium is mouse/human linear gap costs
* Default names of target and query in .maf are defined by basename of <path/target> and <path/query>

MARKDOWN

    return $desc;
}

sub validate_args {
    my ( $self, $opt, $args ) = @_;

    if ( @{$args} != 3 ) {
        my $message = "This command need three input files/directories.\n\tIt found";
        $message .= sprintf " [%s]", $_ for @{$args};
        $message .= ".\n";
        $self->usage_error($message);
    }
    for ( @{$args} ) {
        if ( !( Path::Tiny::path($_)->is_file or Path::Tiny::path($_)->is_dir ) ) {
            $self->usage_error("The input file/directory [$_] doesn't exist.");
        }
    }

    # set default --outdir
    if ( Path::Tiny::path( $args->[2] )->is_file ) {
        if ( !$opt->{outdir} ) {
            $opt->{outdir} = ".";
            print STDERR "--outdir set to [.]\n" if $opt->{verbose};
        }
    }
    if ( Path::Tiny::path( $args->[2] )->is_dir ) {
        $opt->{outdir} = $args->[2];
        print STDERR "--outdir set to [$args->[2]]\n" if $opt->{verbose};
    }

    if ( !$opt->{tname} ) {
        $opt->{tname} = Path::Tiny::path( $args->[0] )->basename();
    }
    if ( !$opt->{qname} ) {
        $opt->{qname} = Path::Tiny::path( $args->[1] )->basename();
    }
}

sub execute {
    my ( $self, $opt, $args ) = @_;

    #@type Path::Tiny
    my $outdir = Path::Tiny::path( $opt->{outdir} );
    $outdir->mkpath();

    #----------------------------------------------------------#
    # lav-psl-chain-net-axt section
    #----------------------------------------------------------#
    for my $d (qw{net axtNet}) {
        $outdir->child($d)->mkpath();
    }

    my $gzip_bin = "gzip";
    if ( IPC::Cmd::can_run('pigz') ) {
        $gzip_bin = "pigz -p " . $opt->{parallel};
    }

    #----------------------------#
    # lavToPsl
    #----------------------------#
    {
        my @files;
        if ( Path::Tiny::path( $args->[2] )->is_file ) {
            my $basename = Path::Tiny::path( $args->[2] )->basename();



( run in 0.914 second using v1.01-cache-2.11-cpan-5a3173703d6 )