App-Egaz

 view release on metacpan or  search on metacpan

lib/App/Egaz/Command/template.pm  view on Meta::CPAN

        [ "tree=s",      "a predefined guiding tree for multiz", ],
        [ "order",       "multiple alignments with original order (using fake_tree.nwk)", ],
        [ "fasttree", "use FastTree instead of RaxML to create a phylotree", ],
        [ "mash",     "create guiding tree by mash", ],
        [ "vcf",      "create vcf files", ],
        [],
        [ "circos",  "create circos script", ],
        [],
        [ "repeatmasker=s", "options passed to RepeatMasker", ],
        [ "perseq=s@",      "split these files by names", ],
        [ "min=i",   "minimal length of sequences",                      { default => 5000 }, ],
        [ "about=i", "split sequences to chunks about approximate size", { default => 5000000 }, ],
        [   "suffix=s@",
            "suffix of wanted files",
            { default => [ "_genomic.fna.gz", ".fsa_nt.gz" ] },
        ],
        [ "exclude=s", "regex to exclude some files", { default => "_from_" }, ],
        { show_defaults => 1, }
    );
}

sub usage_desc {
    return "egaz template [options] <path/seqdir> [more path/seqdir]";
}

sub description {
    my $desc;
    $desc .= ucfirst(abstract) . ".\n";
    $desc .= <<'MARKDOWN';

* `path/seqdir` are directories containing multiple .fa files that represent genomes

* Each .fa files in `path/target` should contain only one sequences, otherwise second or latter
  sequences will be omitted

* Species/strain names in result files are the basenames of `path/seqdir`

* Default --multiname is the basename of --outdir. This option is for more than one aligning
  combinations

* without --tree, or --mash, the order of multiz stitch is the same as the one from
  command line

* --tree > --order > --mash

* --outgroup uses basename, not full path. *DON'T* set --outgroup to target

* --taxon may also contain unused taxonomy terms, for the construction of chr_length.csv

* --preq is designed for NCBI ASSEMBLY and WGS, `path/seqdir` are directories containing multiple
  directories

* By default, `RAxML` is used to produce a phylotree. Turn on `--fasttree` to use FastTree, which is
  less accurate and doesn't support outgroups by itself

MARKDOWN

    return $desc;
}

sub validate_args {
    my ( $self, $opt, $args ) = @_;

    if ( @{$args} < 1 ) {
        my $message = "This command need one or more directories.\n\tIt found";
        $message .= sprintf " [%s]", $_ for @{$args};
        $message .= ".\n";
        $self->usage_error($message);
    }
    for ( @{$args} ) {
        if ( !Path::Tiny::path($_)->is_dir ) {
            $self->usage_error("The input directory [$_] doesn't exist.");
        }
    }

    if ( $opt->{mode} eq "multi" and @{$args} < 2 ) {
        $self->usage_error("Multiple alignments need at least 2 directories");
    }

    if ( $opt->{tree} ) {
        if ( !Path::Tiny::path( $opt->{tree} )->is_file ) {
            $self->usage_error("The tree file [$opt->{tree}] doesn't exist.");
        }
        else {
            $opt->{tree} = Path::Tiny::path( $opt->{tree} )->absolute()->stringify();
        }
    }

    if ( $opt->{taxon} ) {
        if ( !Path::Tiny::path( $opt->{taxon} )->is_file ) {
            $self->usage_error("The taxon file [$opt->{taxon}] doesn't exist.");
        }
        else {
            $opt->{taxon} = Path::Tiny::path( $opt->{taxon} )->absolute()->stringify();
        }

    }

    $opt->{outdir} = Path::Tiny::path( $opt->{outdir} )->absolute()->stringify();

    if ( !$opt->{multiname} ) {
        $opt->{multiname} = Path::Tiny::path( $opt->{outdir} )->basename();
    }

    $opt->{parallel2} = int( $opt->{parallel} / 2 );
    $opt->{parallel2} = 2 if $opt->{parallel2} < 2;

}

sub execute {
    my ( $self, $opt, $args ) = @_;

    print STDERR "Create templates for [$opt->{mode}] genome alignments\n";

    #----------------------------#
    # prepare working dir
    #----------------------------#
    $opt->{outdir} = Path::Tiny::path( $opt->{outdir} )->absolute();
    $opt->{outdir}->mkpath();
    $opt->{outdir} = $opt->{outdir}->stringify();
    print STDERR "Working directory [$opt->{outdir}]\n";



( run in 0.986 second using v1.01-cache-2.11-cpan-f56aa216473 )