App-Egaz

 view release on metacpan or  search on metacpan

lib/App/Egaz/Command/template.pm  view on Meta::CPAN

package App::Egaz::Command::template;
use strict;
use warnings;
use autodie;

use App::Egaz -command;
use App::Egaz::Common;

sub abstract {
    return 'create pipeline files';
}

sub opt_spec {
    return (
        [   "mode" => hidden => {
                default => "multi",
                one_of  => [
                    [ "multi" => "multiple genome alignments, orthologs" ],
                    [ "self"  => "self genome alignments, paralogs" ],
                    [ "prep"  => "prepare sequences" ],
                ],
            }
        ],
        [],
        [ "outdir|o=s",   "Output directory", { default => "." }, ],
        [ "queue=s",      "QUEUE_NAME",       { default => "mpi" }, ],
        [ "separate",     "separate each Target-Query groups", ],
        [ "tmp=s",        "user defined tempdir", ],
        [ "parallel|p=i", "number of threads", { default => 2 }, ],
        [ "verbose|v",    "verbose mode", ],
        [],
        [ "length=i",  "minimal length of alignment fragments", { default => 1000 }, ],
        [ "partition", "use partitioned sequences if available", ],
        [ "msa=s",     "aligning program for refine alignments", { default => "mafft" }, ],
        [ "taxon=s",   "taxon.csv for this project", ],
        [ "aligndb",   "create aligndb scripts", ],
        [],
        [ "multiname=s", "naming multiply alignment", ],
        [ "outgroup=s",  "the name of outgroup", ],
        [ "tree=s",      "a predefined guiding tree for multiz", ],
        [ "order",       "multiple alignments with original order (using fake_tree.nwk)", ],
        [ "fasttree", "use FastTree instead of RaxML to create a phylotree", ],
        [ "mash",     "create guiding tree by mash", ],
        [ "vcf",      "create vcf files", ],
        [],
        [ "circos",  "create circos script", ],
        [],
        [ "repeatmasker=s", "options passed to RepeatMasker", ],
        [ "perseq=s@",      "split these files by names", ],
        [ "min=i",   "minimal length of sequences",                      { default => 5000 }, ],
        [ "about=i", "split sequences to chunks about approximate size", { default => 5000000 }, ],
        [   "suffix=s@",
            "suffix of wanted files",
            { default => [ "_genomic.fna.gz", ".fsa_nt.gz" ] },
        ],
        [ "exclude=s", "regex to exclude some files", { default => "_from_" }, ],
        { show_defaults => 1, }
    );
}

sub usage_desc {
    return "egaz template [options] <path/seqdir> [more path/seqdir]";
}

sub description {
    my $desc;
    $desc .= ucfirst(abstract) . ".\n";
    $desc .= <<'MARKDOWN';

* `path/seqdir` are directories containing multiple .fa files that represent genomes

* Each .fa files in `path/target` should contain only one sequences, otherwise second or latter
  sequences will be omitted

* Species/strain names in result files are the basenames of `path/seqdir`

* Default --multiname is the basename of --outdir. This option is for more than one aligning
  combinations

* without --tree, or --mash, the order of multiz stitch is the same as the one from
  command line

* --tree > --order > --mash

* --outgroup uses basename, not full path. *DON'T* set --outgroup to target

* --taxon may also contain unused taxonomy terms, for the construction of chr_length.csv

* --preq is designed for NCBI ASSEMBLY and WGS, `path/seqdir` are directories containing multiple



( run in 0.546 second using v1.01-cache-2.11-cpan-75ffa21a3d4 )