App-Egaz
view release on metacpan or search on metacpan
lib/App/Egaz/Command/template.pm view on Meta::CPAN
package App::Egaz::Command::template;
use strict;
use warnings;
use autodie;
use App::Egaz -command;
use App::Egaz::Common;
sub abstract {
return 'create pipeline files';
}
sub opt_spec {
return (
[ "mode" => hidden => {
default => "multi",
one_of => [
[ "multi" => "multiple genome alignments, orthologs" ],
[ "self" => "self genome alignments, paralogs" ],
[ "prep" => "prepare sequences" ],
],
}
],
[],
[ "outdir|o=s", "Output directory", { default => "." }, ],
[ "queue=s", "QUEUE_NAME", { default => "mpi" }, ],
[ "separate", "separate each Target-Query groups", ],
[ "tmp=s", "user defined tempdir", ],
[ "parallel|p=i", "number of threads", { default => 2 }, ],
[ "verbose|v", "verbose mode", ],
[],
[ "length=i", "minimal length of alignment fragments", { default => 1000 }, ],
[ "partition", "use partitioned sequences if available", ],
[ "msa=s", "aligning program for refine alignments", { default => "mafft" }, ],
[ "taxon=s", "taxon.csv for this project", ],
[ "aligndb", "create aligndb scripts", ],
[],
[ "multiname=s", "naming multiply alignment", ],
[ "outgroup=s", "the name of outgroup", ],
[ "tree=s", "a predefined guiding tree for multiz", ],
[ "order", "multiple alignments with original order (using fake_tree.nwk)", ],
[ "fasttree", "use FastTree instead of RaxML to create a phylotree", ],
[ "mash", "create guiding tree by mash", ],
[ "vcf", "create vcf files", ],
[],
[ "circos", "create circos script", ],
[],
[ "repeatmasker=s", "options passed to RepeatMasker", ],
[ "perseq=s@", "split these files by names", ],
[ "min=i", "minimal length of sequences", { default => 5000 }, ],
[ "about=i", "split sequences to chunks about approximate size", { default => 5000000 }, ],
[ "suffix=s@",
"suffix of wanted files",
{ default => [ "_genomic.fna.gz", ".fsa_nt.gz" ] },
],
[ "exclude=s", "regex to exclude some files", { default => "_from_" }, ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz template [options] <path/seqdir> [more path/seqdir]";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
$desc .= <<'MARKDOWN';
* `path/seqdir` are directories containing multiple .fa files that represent genomes
* Each .fa files in `path/target` should contain only one sequences, otherwise second or latter
sequences will be omitted
* Species/strain names in result files are the basenames of `path/seqdir`
* Default --multiname is the basename of --outdir. This option is for more than one aligning
combinations
* without --tree, or --mash, the order of multiz stitch is the same as the one from
command line
* --tree > --order > --mash
* --outgroup uses basename, not full path. *DON'T* set --outgroup to target
* --taxon may also contain unused taxonomy terms, for the construction of chr_length.csv
* --preq is designed for NCBI ASSEMBLY and WGS, `path/seqdir` are directories containing multiple
( run in 0.546 second using v1.01-cache-2.11-cpan-75ffa21a3d4 )