Bio-MUST-Core

 view release on metacpan or  search on metacpan

bin/jack-ali-dir.pl  view on Meta::CPAN

#!/usr/bin/env perl
# PODNAME: jack-ali-dir.pl
# ABSTRACT: Jackknife a directory of ALI files

use Modern::Perl '2011';
use autodie;

use Getopt::Euclid qw(:vars);
use Smart::Comments;

use File::Find::Rule;
use List::AllUtils qw(shuffle);
use Path::Class qw(file dir);
use POSIX;

use Bio::MUST::Core;
use Bio::MUST::Core::Constants qw(:dirs);
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::SeqMask';

# TODO: output FASTA or p80 files?

for my $indir (@ARGV_indirs) {

    ### Processing: $indir
    my @infiles = File::Find::Rule
        ->file()
        ->maxdepth(1)
        ->name( $SUFFICES_FOR{Ali} )
        ->in($indir)
    ;

    # create output directory named after input directory and settings
    my $dirname = dir($indir)->basename
        . "-jack-$ARGV_replicates-$ARGV_width"
        . ($ARGV_del_const ? '-dc' : q{})
    ;
    my $dir = dir($dirname)->relative;
    $dir->mkpath();

    # setup replicate numbering format
    my $field = ceil( log($ARGV_replicates) / log(10) );

    # build replicates
    for my $rep (0..$ARGV_replicates-1) {
        my @pool = shuffle @infiles;

        my @alis;
        my $width = 0;

        ALI:
        while (my $infile = shift @pool) {
            my $ali = Ali->load($infile);

            # optionally delete constant sites
            if ($ARGV_del_const) {
                my $mask = SeqMask->variable_mask($ali);
                $ali->apply_mask($mask);
            }

            # cumulate Ali files while total width is lower than target width
            push @alis, $ali;
            $width += $ali->width;

            last ALI if $width >= $ARGV_width;



( run in 0.308 second using v1.01-cache-2.11-cpan-ec4f86ec37b )