Bio-MUST-Apps-FortyTwo

 view release on metacpan or  search on metacpan

bin/yaml-generator-42.pl  view on Meta::CPAN

#!/usr/bin/env perl
# PODNAME: yaml-generator-42.pl
# ABSTRACT: Interactive or batch generator for 42 YAML config files
# CONTRIBUTOR: Mick VAN VLIERBERGHE <mvanvlierberghe@doct.uliege.be>

# TODO: add support for wordsize (default 3) in BRH phase?
# TODO: optimize vertical spacing in output (very tricky)

use autodie;
use Modern::Perl '2011';

use Smart::Comments '###';
use Getopt::Euclid qw(:vars);

use Template;
use Path::Class qw(dir file);
use Tie::IxHash;
use List::AllUtils qw(mesh);
use File::Basename;
use File::Find::Rule;

use Bio::MUST::Core;
use aliased 'Bio::MUST::Core::IdList';
use aliased 'Bio::MUST::Core::IdMapper';
use aliased 'Bio::MUST::Core::SeqId';
use aliased 'Bio::MUST::Core::Taxonomy';
use Bio::FastParsers::Constants qw(:files);

use Term::Completion::Path;
use IO::Prompter [
    -verbatim,
    -style => 'blue strong ',
    -must  => { 'be a string' => qr{\S+} }
];
#    bold      => [qw<boldly strong heavy emphasis emphatic highlight highlighted fort forte>],
#    dark      => [qw<darkly dim deep>],
#    faint     => [qw<faintly light soft>],
#    underline => [qw<underlined underscore underscored italic italics>],
#    blink     => [qw<blinking flicker flickering flash flashing>],
#    reverse   => [qw<reversed inverse inverted>],
#    concealed => [qw<hidden blank invisible>],
#    reset     => [qw<normal default standard usual ordinary regular>],
#    bright_   => [qw< bright\s+ vivid\s+ >],
#    red       => [qw< scarlet vermilion crimson ruby cherry cerise cardinal carmine
#                      burgundy claret chestnut copper garnet geranium russet
#                      salmon titian coral cochineal rose cinnamon ginger gules >],
#    yellow    => [qw< gold golden lemon cadmium daffodil mustard primrose tawny
#                      amber aureate canary champagne citrine citron cream goldenrod honey straw >],
#    green     => [qw< olive jade pea emerald lime chartreuse forest sage vert >],
#    cyan      => [qw< aqua aquamarine teal turquoise ultramarine >],
#    blue      => [qw< azure cerulean cobalt indigo navy sapphire >],
#    magenta   => [qw< amaranthine amethyst lavender lilac mauve mulberry orchid periwinkle
#                      plum pomegranate violet purple aubergine cyclamen fuchsia modena puce
#                      purpure >],
#    black     => [qw< charcoal ebon ebony jet obsidian onyx raven sable slate >],
#    white     => [qw< alabaster ash chalk ivory milk pearl silver argent >],

# TODO: check whether IF/ELSE directives are needed in template pour tax_ args

# define config file template
my $tt_str = <<'EOT';
# ===Run mode===
# Two values are available: 'phylogenomic' and 'metagenomic'.
# The phylogenomic mode is designed to enrich multiple sequence alignements
# (ALIs) with orthologues for subsequent phylogenomic analysis. In contrast,
# the metagenomic mode is designed to probe contamination in transcriptomic
# data using reference ribosomal protein ALIs; it produces a taxonomic report
# per ALI listing the lineage of each identified orthologous sequence.
# When not specified, 'run_mode' internally defaults to 'phylogenomic'.
run_mode: [% run_mode %]

# ===Suffix to append to infile basenames for deriving outfile names===
# When not specified 'outsuffix' internally defaults to '-42'.
# Use a bare 'out_suffix:' to reuse the ALI name and to preserve the original
# file by appending a .bak extension to its name.
out_suffix: [% out_suffix %]

# ===Orgs from where to select BLAST queries===
# Depending on availability at least one query by family and by org will be
# picked for the 'homologues' and 'references' BLAST rounds.
query_orgs:[% FOREACH org IN query_orgs.sort %]
    - [% org %][% END %]

# ===Optional args for each BLAST step===
# Any valid command-line option can be specified (see NCBIBLAST+ docs).
# Note the hyphens (-) before option names (departing from API consistency).
# -query, -db, -out, -outfmt, -db_gencode, -query_gencode will be ignored as
# they are directly handled by forty-two itself. -max_target_seqs may be
# specified at step 'homologues' to speed up things.
blast_args:
    # TBLASTN vs banks
    homologues:
        -evalue: [% evalue %]
        [% UNLESS SSUrRNA ~%]
        -seg: [% homologues_seg %]



( run in 0.619 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )