Bio-MUST-Apps-FortyTwo
view release on metacpan or search on metacpan
bin/yaml-generator-42.pl view on Meta::CPAN
# queries etc).
ali_keep_old_new_tags: [% ali_keep_old_new_tags %]
# ===Action to perform when a preexisting lengthened seq is identified===
# Currently, two values are available: 'remove' and 'keep'.
# The option is quite self-explanatory. It is useful when one runs 42 multiple
# times on the sames ALIs to repeatedly enrich the same orgs, assuming that
# org banks are updated between runs.
# When not specified, 'ali_keep_lengthened_seqs' internally defaults to
# 'keep'.
ali_keep_lengthened_seqs: [% ali_keep_lengthened_seqs %]
# ===Taxonomic report switch===
# Two values are available: 'on' and 'off'.
# If set to 'on', the lineage of new seqs is inferred by analyzing the
# taxonomy of their ALI closest relatives and one '.tax-report' file is
# generated for each ALI processed (see 'run_mode' above).
# The details of this taxonomic analysis can be fine-tuned by editing the
# other tax_* parameters of this configuration file.
# When not specified, 'tax_reports' internally defaults to 'off'. Yet, the
# The YAML wizard automatically sets it to 'on' if 'run_mode' is 'metagenomic'.
[% IF tax_reports -%]
tax_reports: [% tax_reports %]
[% ELSE -%]
tax_reports: on
[% END -%]
[% IF tax_dir -%]
# ===Path to dir holding NCBI Taxonomy database===
# Only required when enabling 'tax_reports' or specifying 'tax_filter'.
# It can be installed using setup-taxdir.pl.
tax_dir: [% tax_dir %]
# ===Min number of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# This parameter is a lower bound. The real number will depend both on the
# four thresholds below ('tax_min_ident', 'tax_min_len', 'tax_min_score' and
# 'tax_score_mul') and on the ability of 42 to deduce the taxonomy of each
# individual relative to compute the LCA of the new seq.
# When not specified, 'tax_min_hits' internally defaults to 1.
[% IF tax_min_hits -%]
tax_min_hits: [% tax_min_hits %]
[% ELSE -%]
tax_min_hits: 1
[% END -%]
# ===Max number of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# As for 'tax_min_hits' above, this parameter is a upper bound.
# When not specified, 'tax_max_hits' internally defaults to unlimited.
[% IF megan_like -%]
tax_max_hits: 100
[% ELSIF best_hit -%]
tax_max_hits: 1
[% ELSIF tax_max_hits -%]
tax_max_hits: [% tax_max_hits %]
[% END -%]
# ===Min identity of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# This parameter is the traditional BLAST 'percent identity' statistics except
# that it is specified as a fractional number (between 0 and 1). It is
# evaluated on the first HSP of potential relatives.
# When not specified, 'tax_min_ident' internally defaults to 0.
[% IF megan_like -%]
tax_min_ident: 0
[% ELSIF best_hit -%]
tax_min_ident: 0
[% ELSIF tax_min_ident -%]
tax_min_ident: [% tax_min_ident %]
[% END -%]
# ===Min length of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# This parameter is the traditional BLAST 'alignment length' statistics. It is
# evaluated on the first HSP of potential relatives.
# When not specified, 'tax_min_len' internally defaults to 0.
[% IF megan_like -%]
tax_min_len: 0
[% ELSIF best_hit -%]
tax_min_len: 0
[% ELSIF tax_min_len -%]
tax_min_len: [% tax_min_len %]
[% END -%]
# ===Min bit score of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# This parameter is the traditional BLAST 'bit score' statistics. It is
# evaluated on the first HSP of potential relatives.
# When not specified, 'tax_min_score' internally defaults to 0.
[% IF megan_like -%]
tax_min_score: 80
[% ELSIF best_hit -%]
tax_min_score: 0
[% ELSIF tax_min_score -%]
tax_min_score: [% tax_min_score %]
[% END -%]
# ===Bit score reduction allowed when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# The allowed bit score reduction of any relative is expressed relatively to
# the score of the FIRST relative (as in MEGAN algorithm).
# When not specified, 'tax_score_mul' internally defaults to 0.
[% IF megan_like -%]
tax_score_mul: 0.95
[% ELSIF best_hit -%]
tax_score_mul: 0
[% ELSIF tax_score_mul -%]
tax_score_mul: [% tax_score_mul %]
[% END -%]
[% END -%]
# ===Path to dir holding transcript BLAST databases===
bank_dir: [% bank_dir %]
# ===Default args applying to all orgs unless otherwise specified===
# Some of these args can be thus specified on a per-org basis below if needed.
# This especially makes sense for 'code'.
defaults:
# ===Genetic code for translated BLAST searches===
# When not specified 'code' internally defaults to 1 (standard).
bin/yaml-generator-42.pl view on Meta::CPAN
-def => $ARGV{'--aligner_mode'},
'>';
# ali_skip_self
$ARGV{'--ali_skip_self'} = prompt "\nSet patch mode: ",
-menu => { 'off [default]' => $ARGV{'--ali_skip_self'}, on => 'on' },
-def => $ARGV{'--ali_skip_self'}, '>';
# TAXONOMIC FILTER
unless ($tax_filter) {
$tax_filter = prompt "\nIs a taxonomic filter needed?", -menu => { Yes => 1, no => 0 };
if ($tax_filter) {
$tf_auto = prompt "\nSet tax filter input: ",
-menu => { 'from org mapper file' => 0,
"from NCBI's taxonomy - auto + prompt for missing" => 1,
"from NCBI's taxonomy - auto + prompt for all" => 2,
};
if ( $tf_auto == 1 ) {
@{ $ARGV{'--levels'} } = split ",", ( prompt "\nLevels separated by a coma (no whitespace): ");
}
}
# TAXNOMIC AFFILIATION
$tax_filter = prompt "\nIs taxonomic classification needed?", -menu => { Yes => 1, no => 0 };
}
}
else {
$ARGV{'--tax_reports'} = 'on';
$ARGV{'--aligner_mode'} = 'off';
$ARGV{'--merge_orthologues'} = 'off';
}
if ($ARGV{'--run_mode'} eq 'metagenomic' || $tax_filter) {
# TAX DIR
$ARGV{'--tax_dir'} = prompt4dir("\nEnter path to taxdump directory: ");
my $tax_aff = prompt "\nChoose taxonomic affiliation mode: ",
-menu => { 'megan-like' => '--megan_like', 'best-hit' => '--best_hit' },
-def => 1;
$ARGV{"$tax_aff"} = 1;
# BEST_HIT
if ( $ARGV{'--best_hit'} ) {
say "The '--best_hit' flag autosets the following parameter:\n'--tax_max_hits' = 1";
$ARGV{'--tax_max_hits'} = 1;
my $hit_filtering = prompt "\nSet hit-filtering mode: ",
-menu => { 'default values' => 0, 'length/identity' => 'length_identity', 'Bit score' => 'bitscore' };
if ($hit_filtering eq 'bitscore') {
$ARGV{'--tax_min_score'} = prompt "\nSet minimum bit score to consider a hit: ",
-must => { 'be an integer' => qr{^[0-9]+\z} },
-def => $ARGV{'--tax_min_score'};
}
if ($hit_filtering eq 'length_identity') {
$ARGV{'--tax_min_ident'} = prompt "\nSet minimum percentage of identity to consider a hit: ",
-must => { 'be a number between 0 and 1' => qr{^[0-1](?:\.\d+)?}xms },
-def => $ARGV{'--tax_min_ident'};
$ARGV{'--tax_min_len'} = prompt "\nSet minimum length to consider a hit: ",
-must => { 'be an integer' => qr{^[1-9]+} },
-def => $ARGV{'--tax_min_len'};
}
}
# MEGAN_LIKE
if ($ARGV{'--megan_like'}) {
say "\nThe '--megan_like' flag autosets the following parameters:\n'--tax_min_score' = 80,\n'--tax_score_mul' = 0.95,\n'--tax_min_ident' = 0,\n'--tax_min_len' = 0.";
say "\nFeel free to modify the default settings according to your own flavors.";
$ARGV{'--tax_min_ident'} = 0;
$ARGV{'--tax_min_len'} = 0;
$ARGV{'--tax_min_score'} = 80;
$ARGV{'--tax_score_mul'} = 0.95;
}
}
}
# BANKS
$ARGV{'--bank_dir'} = prompt4dir("\nEnter path to your candidate genomes directory: ");
$ARGV{'--bank_suffix'} = prompt4suffix("\nSet candiate banks suffix: ", $ARGV{'--bank_dir'});
$ARGV{'--bank_mapper'} = prompt4file("\nEnter path to bank mapper file: ");
# CODE
$ARGV{'--code'} = prompt "\nGenetic code for translated BLAST searches [default: 1]: ",
-must => { 'be an integer' => qr{^[1-9]+} },
-def => $ARGV{'--code'};
}
#### %ARGV
# tol arguments
if ( $ARGV{'--tol_db'} ) {
my $file = Path::Class::File->new($ARGV{'--tol_db'});
$ARGV{'--tol_bank'} = $file->basename;
$ARGV{'--tol_bank_dir'} = $file->dir->absolute->stringify;
}
# convert arguments to values for placeholders
my %vars = map { tr/-<>//dr => $ARGV{$_} } keys %ARGV;
# bank files
my @bank_files = File::Find::Rule
->file()
->relative()
->maxdepth(1)
->name( qr{ $ARGV{'--bank_suffix'} $}xmsi )
->in($ARGV{'--bank_dir'})
;
# org mapper: from file names or file
$ARGV{'--choose_tax_filter'} = 1 if $ARGV{'--levels'};
$tf_auto = $ARGV{'--choose_tax_filter'};
my $tax = Taxonomy->new_from_cache( tax_dir => $ARGV{'--tax_dir'} ) if $tf_auto;
#### lvl: $ARGV{'--levels'}
( run in 0.656 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )