Bio-MUST-Core

 view release on metacpan or  search on metacpan

bin/jack-ali-dir.pl  view on Meta::CPAN

#!/usr/bin/env perl
# PODNAME: jack-ali-dir.pl
# ABSTRACT: Jackknife a directory of ALI files

use Modern::Perl '2011';
use autodie;

use Getopt::Euclid qw(:vars);
use Smart::Comments;

use File::Find::Rule;
use List::AllUtils qw(shuffle);
use Path::Class qw(file dir);
use POSIX;

use Bio::MUST::Core;
use Bio::MUST::Core::Constants qw(:dirs);
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::SeqMask';


for my $indir (@ARGV_indirs) {

    ### Processing: $indir
    my @infiles = File::Find::Rule
        ->file()
        ->name( $SUFFICES_FOR{Ali} )
        ->in($indir)
    ;

    # create output directory named after input directory and settings
    my $dirname = dir($indir)->basename
        . "-jack-$ARGV_replicates-$ARGV_width"
        . ($ARGV_del_const ? '-dc' : q{})
    ;
    my $dir = dir($dirname)->relative;
    $dir->mkpath();

    # setup replicate numbering format
    # TODO: fix bug with 100!
    my $field = ceil( log($ARGV_replicates) / log(10) );

    # build replicates
    for my $rep (1..$ARGV_replicates) {
        my @pool = shuffle @infiles;

        my @alis;
        my $width = 0;

        ALI:
        while (my $infile = shift @pool) {
            my $ali = Ali->load($infile);

            # optionally delete constant sites
            if ($ARGV_del_const) {
                my $mask = SeqMask->variable_mask($ali);
                $ali->apply_mask($mask);
            }

            # cumulate Ali files while total width is lower than target width
            push @alis, $ali;
            $width += $ali->width;

            last ALI if $width >= $ARGV_width;
        }

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 0.459 second using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )