Bio-MUST-Apps-Physeter
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
bin/physeter.pl view on Meta::CPAN
#!/usr/bin/env perl
# PODNAME: physeter.pl
# ABSTRACT: Taxonomic parser for BLAST reports
# CONTRIBUTOR: Valerian LUPO <valerian.lupo@doct.uliege.be>
# CONTRIBUTOR: Mick VAN VLIERBERGHE <mvanvlierberghe@doct.uliege.be>
# CONTRIBUTOR: Luc CORNET <luc.cornet@uliege.be>
use Modern::Perl '2011';
use autodie;
use Smart::Comments '###';
use Getopt::Euclid qw(:vars);
use File::Basename;
use File::Find::Rule;
use File::Slurp;
use Parallel::Batch;
use Path::Class 'file';
use POSIX qw(ceil);
use List::AllUtils qw(apply sum count_by shuffle);
use Tie::IxHash;
use Bio::MUST::Core;
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::Taxonomy';
use Bio::FastParsers;
use aliased 'Bio::FastParsers::Blast::Table';
die <<'EOT' if $ARGV_tax_min_hits > $ARGV_tax_max_hits;
--tax-max-hits must be greater or equal to --tax-min-hits; aborting!
EOT
# build taxonomy object
my $tax = Taxonomy->new_from_cache(tax_dir => $ARGV_taxdir);
# TODO: fix this (make it optional?)
# build labeler
my $labeler = $tax->tax_labeler_from_list($ARGV_taxon_list);
# k-folds method
my $subsets;
if ($ARGV_kfold) {
srand($ARGV_kfold_seed) if $ARGV_kfold_seed;
$subsets = split_db($ARGV_kfold);
}
my @results;
open my $out, '>', $ARGV_outfile;
my $batch = Parallel::Batch->new( {
maxprocs => $ARGV_threads,
jobs => \@ARGV_infiles,
code => sub { # closure (providing $self)
my $infile = shift;
### Processing: $infile
process_file($infile);
# store results
say {$out} join "\n", @results;
},
} );
# launch jobs
$batch->run();
# functions
view all matches for this distributionview release on metacpan - search on metacpan
( run in 2.310 seconds using v1.00-cache-2.02-grep-82fe00e-cpan-72ae3ad1e6da )