Bio-MUST-Apps-Physeter

 view release on metacpan or  search on metacpan

bin/physeter.pl  view on Meta::CPAN

#!/usr/bin/env perl
# PODNAME: physeter.pl
# ABSTRACT: Taxonomic parser for BLAST reports
# CONTRIBUTOR: Valerian LUPO <valerian.lupo@doct.uliege.be>
# CONTRIBUTOR: Mick VAN VLIERBERGHE <mvanvlierberghe@doct.uliege.be>
# CONTRIBUTOR: Luc CORNET <luc.cornet@uliege.be>

use Modern::Perl '2011';
use autodie;

use Smart::Comments '###';

use Getopt::Euclid qw(:vars);

use File::Basename;
use File::Find::Rule;
use File::Slurp;
use Parallel::Batch;
use Path::Class 'file';
use POSIX qw(ceil);
use List::AllUtils qw(apply sum count_by shuffle);
use Tie::IxHash;

use Bio::MUST::Core;
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::Taxonomy';
use Bio::FastParsers;
use aliased 'Bio::FastParsers::Blast::Table';


die <<'EOT' if $ARGV_tax_min_hits > $ARGV_tax_max_hits;
--tax-max-hits must be greater or equal to --tax-min-hits; aborting!
EOT

# build taxonomy object
my $tax = Taxonomy->new_from_cache(tax_dir => $ARGV_taxdir);

# TODO: fix this (make it optional?)
# build labeler
my $labeler = $tax->tax_labeler_from_list($ARGV_taxon_list);

# k-folds method
my $subsets;
if ($ARGV_kfold) {
    srand($ARGV_kfold_seed) if $ARGV_kfold_seed;
    $subsets = split_db($ARGV_kfold);
}

my @results;
open my $out, '>', $ARGV_outfile;

my $batch = Parallel::Batch->new( {
    maxprocs => $ARGV_threads,
    jobs     => \@ARGV_infiles,
    code     => sub {                       # closure (providing $self)
                    my $infile = shift;
                    ### Processing: $infile
                    process_file($infile);

                    # store results
                    say {$out} join "\n", @results;
                },
} );

# launch jobs
$batch->run();

# functions

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 2.310 seconds using v1.00-cache-2.02-grep-82fe00e-cpan-72ae3ad1e6da )