Bio-MUST-Apps-FortyTwo

 view release on metacpan or  search on metacpan

bin/yaml-generator-42.pl  view on Meta::CPAN

# queries etc).
ali_keep_old_new_tags: [% ali_keep_old_new_tags %]

# ===Action to perform when a preexisting lengthened seq is identified===
# Currently, two values are available: 'remove' and 'keep'.
# The option is quite self-explanatory. It is useful when one runs 42 multiple
# times on the sames ALIs to repeatedly enrich the same orgs, assuming that
# org banks are updated between runs.
# When not specified, 'ali_keep_lengthened_seqs' internally defaults to
# 'keep'.
ali_keep_lengthened_seqs: [% ali_keep_lengthened_seqs %]

# ===Taxonomic report switch===
# Two values are available: 'on' and 'off'.
# If set to 'on', the lineage of new seqs is inferred by analyzing the
# taxonomy of their ALI closest relatives and one '.tax-report' file is
# generated for each ALI processed (see 'run_mode' above).
# The details of this taxonomic analysis can be fine-tuned by editing the
# other tax_* parameters of this configuration file.
# When not specified, 'tax_reports' internally defaults to 'off'. Yet, the
# The YAML wizard automatically sets it to 'on' if 'run_mode' is 'metagenomic'.
[% IF tax_reports -%]
tax_reports: [% tax_reports %]
[% ELSE -%]
tax_reports: on
[% END -%]

[% IF tax_dir -%]
# ===Path to dir holding NCBI Taxonomy database===
# Only required when enabling 'tax_reports' or specifying 'tax_filter'.
# It can be installed using setup-taxdir.pl.
tax_dir: [% tax_dir %]

# ===Min number of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# This parameter is a lower bound. The real number will depend both on the
# four thresholds below ('tax_min_ident', 'tax_min_len', 'tax_min_score' and
# 'tax_score_mul') and on the ability of 42 to deduce the taxonomy of each
# individual relative to compute the LCA of the new seq.
# When not specified, 'tax_min_hits' internally defaults to 1.
[% IF tax_min_hits -%]
tax_min_hits: [% tax_min_hits %]
[% ELSE -%]
tax_min_hits: 1
[% END -%]

# ===Max number of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# As for 'tax_min_hits' above, this parameter is a upper bound.
# When not specified, 'tax_max_hits' internally defaults to unlimited.
[% IF megan_like -%]
tax_max_hits: 100
[% ELSIF best_hit -%]
tax_max_hits: 1
[% ELSIF tax_max_hits -%]
tax_max_hits: [% tax_max_hits %]
[% END -%]

# ===Min identity of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# This parameter is the traditional BLAST 'percent identity' statistics except
# that it is specified as a fractional number (between 0 and 1). It is
# evaluated on the first HSP of potential relatives.
# When not specified, 'tax_min_ident' internally defaults to 0.
[% IF megan_like -%]
tax_min_ident: 0
[% ELSIF best_hit -%]
tax_min_ident: 0
[% ELSIF tax_min_ident -%]
tax_min_ident: [% tax_min_ident %]
[% END -%]

# ===Min length of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# This parameter is the traditional BLAST 'alignment length' statistics. It is
# evaluated on the first HSP of potential relatives.
# When not specified, 'tax_min_len' internally defaults to 0.
[% IF megan_like -%]
tax_min_len: 0
[% ELSIF best_hit -%]
tax_min_len: 0
[% ELSIF tax_min_len -%]
tax_min_len: [% tax_min_len %]
[% END -%]

# ===Min bit score of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# This parameter is the traditional BLAST 'bit score' statistics. It is
# evaluated on the first HSP of potential relatives.
# When not specified, 'tax_min_score' internally defaults to 0.
[% IF megan_like -%]
tax_min_score: 80
[% ELSIF best_hit -%]
tax_min_score: 0
[% ELSIF tax_min_score -%]
tax_min_score: [% tax_min_score %]
[% END -%]

# ===Bit score reduction allowed when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# The allowed bit score reduction of any relative is expressed relatively to
# the score of the FIRST relative (as in MEGAN algorithm).
# When not specified, 'tax_score_mul' internally defaults to 0.
[% IF megan_like -%]
tax_score_mul: 0.95
[% ELSIF best_hit -%]
tax_score_mul: 0
[% ELSIF tax_score_mul -%]
tax_score_mul: [% tax_score_mul %]
[% END -%]
[% END -%]

# ===Path to dir holding transcript BLAST databases===
bank_dir: [% bank_dir %]

# ===Default args applying to all orgs unless otherwise specified===
# Some of these args can be thus specified on a per-org basis below if needed.
# This especially makes sense for 'code'.
defaults:
    # ===Genetic code for translated BLAST searches===
    # When not specified 'code' internally defaults to 1 (standard).

bin/yaml-generator-42.pl  view on Meta::CPAN

                -def => $ARGV{'--aligner_mode'},
                '>';


            # ali_skip_self
            $ARGV{'--ali_skip_self'} = prompt "\nSet patch mode: ",
                -menu => { 'off [default]' => $ARGV{'--ali_skip_self'}, on => 'on' },
                -def => $ARGV{'--ali_skip_self'}, '>';

            # TAXONOMIC FILTER
            unless ($tax_filter) {
                $tax_filter = prompt "\nIs a taxonomic filter needed?", -menu => { Yes => 1, no => 0 };

                if ($tax_filter) {
                    $tf_auto = prompt "\nSet tax filter input: ",
                        -menu =>  { 'from org mapper file' => 0,
                                    "from NCBI's taxonomy - auto + prompt for missing" => 1,
                                    "from NCBI's taxonomy - auto + prompt for all" => 2,
                                  };

                    if ( $tf_auto == 1 ) {
                        @{ $ARGV{'--levels'} } = split ",", ( prompt "\nLevels separated by a coma (no whitespace): ");
                    }
                }

                # TAXNOMIC AFFILIATION
                $tax_filter = prompt "\nIs taxonomic classification needed?", -menu => { Yes => 1, no => 0 };
            }
        }

        else {
            $ARGV{'--tax_reports'} = 'on';
            $ARGV{'--aligner_mode'} = 'off';
            $ARGV{'--merge_orthologues'} = 'off';
        }

        if ($ARGV{'--run_mode'} eq 'metagenomic' || $tax_filter) {

            # TAX DIR
            $ARGV{'--tax_dir'} = prompt4dir("\nEnter path to taxdump directory: ");

            my $tax_aff = prompt "\nChoose taxonomic affiliation mode: ",
                -menu => { 'megan-like' => '--megan_like', 'best-hit' => '--best_hit' },
                -def => 1;
            $ARGV{"$tax_aff"} = 1;

            # BEST_HIT
            if ( $ARGV{'--best_hit'} ) {
                say "The '--best_hit' flag autosets the following parameter:\n'--tax_max_hits' = 1";
                $ARGV{'--tax_max_hits'} = 1;

                my $hit_filtering = prompt "\nSet hit-filtering mode: ",
                    -menu => { 'default values' => 0, 'length/identity' => 'length_identity', 'Bit score' => 'bitscore' };

                if ($hit_filtering eq 'bitscore') {
                    $ARGV{'--tax_min_score'} = prompt "\nSet minimum bit score to consider a hit: ",
                        -must => { 'be an integer' => qr{^[0-9]+\z} },
                        -def  => $ARGV{'--tax_min_score'};
                }
                if ($hit_filtering eq 'length_identity') {
                    $ARGV{'--tax_min_ident'} = prompt "\nSet minimum percentage of identity to consider a hit: ",
                        -must => { 'be a number between 0 and 1' => qr{^[0-1](?:\.\d+)?}xms },
                        -def  => $ARGV{'--tax_min_ident'};

                    $ARGV{'--tax_min_len'} = prompt "\nSet minimum length to consider a hit: ",
                        -must => { 'be an integer' => qr{^[1-9]+} },
                        -def  => $ARGV{'--tax_min_len'};
                }
            }

            # MEGAN_LIKE
            if ($ARGV{'--megan_like'}) {
                say "\nThe '--megan_like' flag autosets the following parameters:\n'--tax_min_score' = 80,\n'--tax_score_mul' = 0.95,\n'--tax_min_ident' = 0,\n'--tax_min_len' = 0.";
                say "\nFeel free to modify the default settings according to your own flavors.";

                $ARGV{'--tax_min_ident'} = 0;
                $ARGV{'--tax_min_len'}   = 0;
                $ARGV{'--tax_min_score'} = 80;
                $ARGV{'--tax_score_mul'} = 0.95;
            }
        }
    }

    # BANKS
    $ARGV{'--bank_dir'}    = prompt4dir("\nEnter path to your candidate genomes directory: ");
    $ARGV{'--bank_suffix'} = prompt4suffix("\nSet candiate banks suffix: ", $ARGV{'--bank_dir'});
    $ARGV{'--bank_mapper'} = prompt4file("\nEnter path to bank mapper file: ");

    # CODE
    $ARGV{'--code'} = prompt "\nGenetic code for translated BLAST searches [default: 1]: ",
    -must => { 'be an integer' => qr{^[1-9]+} },
    -def => $ARGV{'--code'};

}

#### %ARGV

# tol arguments
if ( $ARGV{'--tol_db'} ) {
    my $file = Path::Class::File->new($ARGV{'--tol_db'});
    $ARGV{'--tol_bank'}     = $file->basename;
    $ARGV{'--tol_bank_dir'} = $file->dir->absolute->stringify;
}

# convert arguments to values for placeholders
my %vars = map { tr/-<>//dr => $ARGV{$_} } keys %ARGV;

# bank files
my @bank_files = File::Find::Rule
    ->file()
    ->relative()
    ->maxdepth(1)
    ->name( qr{ $ARGV{'--bank_suffix'} $}xmsi )
    ->in($ARGV{'--bank_dir'})
;
# org mapper: from file names or file
$ARGV{'--choose_tax_filter'} = 1 if $ARGV{'--levels'};
$tf_auto = $ARGV{'--choose_tax_filter'};
my $tax = Taxonomy->new_from_cache( tax_dir => $ARGV{'--tax_dir'} ) if $tf_auto;
#### lvl: $ARGV{'--levels'}



( run in 0.656 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )