Bio-MUST-Apps-FortyTwo
view release on metacpan or search on metacpan
test/config-42-prot-tax.yaml view on Meta::CPAN
# value (relatively to the previous template). The exact effect of this
# parameter depends on the 'aligner_mode' engine: 'exonerate' will try to use
# the longest template for alignment while 'blast' will use each hit in turn
# (as a fall-back with 'exoblast'). New seqs can thus be added more than once
# to the ALI (with ids *.H1.N, *.H2.N etc).
# When not specified 'ali_cover_mul' internally defaults to 1.1., which means
# that if the BLAST alignment with the second template is at least 110% of the
# BLAST alignment with the first template, both templates are retained.
ali_cover_mul: 1.1
# ===Preservation switch for '#NEW#' tags from preexisting sequences===
# Two values are available: 'on' and 'off'.
# If set to 'on' (default), #NEW# tags will be preserved. Note that
# preexisting new sequences are invisible to 42 (they cannot be used as
# queries etc).
ali_keep_old_new_tags: off
# ===Action to perform when a preexisting lengthened seq is identified===
# Currently, two values are available: 'remove' and 'keep'.
# The option is quite self-explanatory. It is useful when one runs 42 multiple
# times on the sames ALIs to repeatedly enrich the same orgs, assuming that
# org banks are updated between runs.
# When not specified, 'ali_keep_lengthened_seqs' internally defaults to
# 'keep'.
ali_keep_lengthened_seqs: on
# ===Taxonomic report switch===
# Two values are available: 'on' and 'off'.
# If set to 'on', the lineage of new seqs is inferred by analyzing the
# taxonomy of their ALI closest relatives and one '.tax-report' file is
# generated for each ALI processed (see 'run_mode' above).
# The details of this taxonomic analysis can be fine-tuned by editing the
# other tax_* parameters of this configuration file.
# When not specified, 'tax_reports' internally defaults to 'off'. Yet, the
# YAML generator automatically sets it to 'on' if 'run_mode' is 'metagenomic'.
tax_reports: on
# ===Path to dir holding NCBI Taxonomy database===
# Only required when enabling 'tax_reports' or specifying 'tax_filter'.
# It can be installed using setup-taxdir.pl.
tax_dir: test/taxdump-mini/
# ===Min number of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# This parameter is a lower bound. The real number will depend both on the
# four thresholds below ('tax_min_ident', 'tax_min_len', 'tax_min_score' and
# 'tax_score_mul') and on the ability of 42 to deduce the taxonomy of each
# individual relative to compute the LCA of the new seq.
# When not specified, 'tax_min_hits' internally defaults to 1.
tax_min_hits: 1
# ===Max number of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# As for 'tax_min_hits' above, this parameter is a upper bound.
# When not specified, 'tax_max_hits' internally defaults to unlimited.
tax_max_hits: 1
# ===Min identity of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# This parameter is the traditional BLAST 'percent identity' statistics except
# that it is specified as a fractional number (between 0 and 1). It is
# evaluated on the first HSP of potential relatives.
# When not specified, 'tax_min_ident' internally defaults to 0.
tax_min_ident: 0
# ===Min length of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# This parameter is the traditional BLAST 'alignment length' statistics. It is
# evaluated on the first HSP of potential relatives.
# When not specified, 'tax_min_len' internally defaults to 0.
tax_min_len: 0
# ===Min bit score of relatives to use when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# This parameter is the traditional BLAST 'bit score' statistics. It is
# evaluated on the first HSP of potential relatives.
# When not specified, 'tax_min_score' internally defaults to 0.
tax_min_score: 0
# ===Bit score reduction allowed when inferring taxonomy of new seqs===
# Only meaningful when enabling 'tax_reports' or specifying 'tax_filter'.
# The allowed bit score reduction of any relative is expressed relatively to
# the score of the FIRST relative (as in MEGAN algorithm).
# When not specified, 'tax_score_mul' internally defaults to 0.
tax_score_mul: 0
# ===Path to dir holding transcript BLAST databases===
bank_dir: test/candidates/proteomes/
# ===Default args applying to all orgs unless otherwise specified===
# Some of these args can be thus specified on a per-org basis below if needed.
# This especially makes sense for 'code'.
defaults:
# ===Genetic code for translated BLAST searches===
# When not specified 'code' internally defaults to 1 (standard).
# See ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt for other codes.
code: 1
# ===Org-specific args===
# The only mandatory args are 'org' and 'banks'. All other args are taken from
# the 'defaults:' section described above.
# This part can be concatenated on a per-run basis to the previous part, which
# would be the same for several runs. In the future, forty-two might support
# two different configuration files to reflect this conceptual distinction.
orgs:
- org: Apis mellifera
banks:
- Apis_mellifera
- org: Harpegnathos saltator
banks:
- Harpegnathos_saltator
#
# This config file has been generated automatically on 17:31:03 31-Mar-2020.
# We advise not to modify directly this file manually but rather to modify the
# yaml-generator command instead for traceability and reproducibility.
#
#yaml-generator-42.pl --run_mode=phylogenomic --out_suffix=-my-42-prot-tax \
#--queries test/MSAs/queries.idl \
#--evalue=1e-05 --homologues_seg=yes --max_target_seqs=10000 --templates_seg=no \
( run in 1.527 second using v1.01-cache-2.11-cpan-e93a5daba3e )