Bio-MUST-Core
view release on metacpan or search on metacpan
bin/jack-ali-dir.pl view on Meta::CPAN
#!/usr/bin/env perl
# PODNAME: jack-ali-dir.pl
# ABSTRACT: Jackknife a directory of ALI files
use Modern::Perl '2011';
use autodie;
use Getopt::Euclid qw(:vars);
use Smart::Comments;
use File::Find::Rule;
use List::AllUtils qw(shuffle);
use Path::Class qw(file dir);
use POSIX;
use Bio::MUST::Core;
use Bio::MUST::Core::Constants qw(:dirs);
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::SeqMask';
# TODO: output FASTA or p80 files?
for my $indir (@ARGV_indirs) {
### Processing: $indir
my @infiles = File::Find::Rule
->file()
->maxdepth(1)
->name( $SUFFICES_FOR{Ali} )
->in($indir)
;
# create output directory named after input directory and settings
my $dirname = dir($indir)->basename
. "-jack-$ARGV_replicates-$ARGV_width"
. ($ARGV_del_const ? '-dc' : q{})
;
my $dir = dir($dirname)->relative;
$dir->mkpath();
# setup replicate numbering format
my $field = ceil( log($ARGV_replicates) / log(10) );
# build replicates
for my $rep (0..$ARGV_replicates-1) {
my @pool = shuffle @infiles;
my @alis;
my $width = 0;
ALI:
while (my $infile = shift @pool) {
my $ali = Ali->load($infile);
# optionally delete constant sites
if ($ARGV_del_const) {
my $mask = SeqMask->variable_mask($ali);
$ali->apply_mask($mask);
}
# cumulate Ali files while total width is lower than target width
push @alis, $ali;
$width += $ali->width;
last ALI if $width >= $ARGV_width;
( run in 0.321 second using v1.01-cache-2.11-cpan-ec4f86ec37b )