App-Egaz
view release on metacpan or search on metacpan
lib/App/Egaz/Command/template.pm view on Meta::CPAN
[ "tree=s", "a predefined guiding tree for multiz", ],
[ "order", "multiple alignments with original order (using fake_tree.nwk)", ],
[ "fasttree", "use FastTree instead of RaxML to create a phylotree", ],
[ "mash", "create guiding tree by mash", ],
[ "vcf", "create vcf files", ],
[],
[ "circos", "create circos script", ],
[],
[ "repeatmasker=s", "options passed to RepeatMasker", ],
[ "perseq=s@", "split these files by names", ],
[ "min=i", "minimal length of sequences", { default => 5000 }, ],
[ "about=i", "split sequences to chunks about approximate size", { default => 5000000 }, ],
[ "suffix=s@",
"suffix of wanted files",
{ default => [ "_genomic.fna.gz", ".fsa_nt.gz" ] },
],
[ "exclude=s", "regex to exclude some files", { default => "_from_" }, ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz template [options] <path/seqdir> [more path/seqdir]";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
$desc .= <<'MARKDOWN';
* `path/seqdir` are directories containing multiple .fa files that represent genomes
* Each .fa files in `path/target` should contain only one sequences, otherwise second or latter
sequences will be omitted
* Species/strain names in result files are the basenames of `path/seqdir`
* Default --multiname is the basename of --outdir. This option is for more than one aligning
combinations
* without --tree, or --mash, the order of multiz stitch is the same as the one from
command line
* --tree > --order > --mash
* --outgroup uses basename, not full path. *DON'T* set --outgroup to target
* --taxon may also contain unused taxonomy terms, for the construction of chr_length.csv
* --preq is designed for NCBI ASSEMBLY and WGS, `path/seqdir` are directories containing multiple
directories
* By default, `RAxML` is used to produce a phylotree. Turn on `--fasttree` to use FastTree, which is
less accurate and doesn't support outgroups by itself
MARKDOWN
return $desc;
}
sub validate_args {
my ( $self, $opt, $args ) = @_;
if ( @{$args} < 1 ) {
my $message = "This command need one or more directories.\n\tIt found";
$message .= sprintf " [%s]", $_ for @{$args};
$message .= ".\n";
$self->usage_error($message);
}
for ( @{$args} ) {
if ( !Path::Tiny::path($_)->is_dir ) {
$self->usage_error("The input directory [$_] doesn't exist.");
}
}
if ( $opt->{mode} eq "multi" and @{$args} < 2 ) {
$self->usage_error("Multiple alignments need at least 2 directories");
}
if ( $opt->{tree} ) {
if ( !Path::Tiny::path( $opt->{tree} )->is_file ) {
$self->usage_error("The tree file [$opt->{tree}] doesn't exist.");
}
else {
$opt->{tree} = Path::Tiny::path( $opt->{tree} )->absolute()->stringify();
}
}
if ( $opt->{taxon} ) {
if ( !Path::Tiny::path( $opt->{taxon} )->is_file ) {
$self->usage_error("The taxon file [$opt->{taxon}] doesn't exist.");
}
else {
$opt->{taxon} = Path::Tiny::path( $opt->{taxon} )->absolute()->stringify();
}
}
$opt->{outdir} = Path::Tiny::path( $opt->{outdir} )->absolute()->stringify();
if ( !$opt->{multiname} ) {
$opt->{multiname} = Path::Tiny::path( $opt->{outdir} )->basename();
}
$opt->{parallel2} = int( $opt->{parallel} / 2 );
$opt->{parallel2} = 2 if $opt->{parallel2} < 2;
}
sub execute {
my ( $self, $opt, $args ) = @_;
print STDERR "Create templates for [$opt->{mode}] genome alignments\n";
#----------------------------#
# prepare working dir
#----------------------------#
$opt->{outdir} = Path::Tiny::path( $opt->{outdir} )->absolute();
$opt->{outdir}->mkpath();
$opt->{outdir} = $opt->{outdir}->stringify();
print STDERR "Working directory [$opt->{outdir}]\n";
( run in 0.986 second using v1.01-cache-2.11-cpan-f56aa216473 )