Bio-MUST-Core

 view release on metacpan or  search on metacpan

bin/classify-ali.pl  view on Meta::CPAN

#!/usr/bin/env perl
# PODNAME: classify-ali.pl
# ABSTRACT: Classify ALI files based on taxonomic filters

use Modern::Perl '2011';
use autodie;

use Config::Any;
use File::Basename;
use File::Find::Rule;
use Getopt::Euclid qw(:vars);
use Path::Class qw(dir file);
use Smart::Comments;

use Bio::MUST::Core;
use Bio::MUST::Core::Constants qw(:dirs);
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::Taxonomy';


# read configuration file
my $config = Config::Any->load_files( {
    files           => [ $ARGV_config ],
    flatten_to_hash => 1,
    use_ext         => 1,
} );
### config: $config->{$ARGV_config}

die "Error: no config file specified; aborting...\n"
    unless $config;

# build taxonomy and classifier objects
my $tax = Taxonomy->new_from_cache( tax_dir => $ARGV_taxdir );
my $classifier = $tax->tax_classifier( $config->{$ARGV_config} );

for my $indir (@ARGV_indirs) {

    ### Processing: $indir
    my @infiles = File::Find::Rule
        ->file()
        ->name( $SUFFICES_FOR{Ali} )
        ->in($indir)
    ;

    # create output directories named after input directory and categories
    my $outdir = dir($indir)->basename . '-classify';
    for my $cat ( $classifier->all_categories ) {
        my $subdir = dir( $outdir, $cat->label )->relative;
        $subdir->mkpath();
    }

    ALI:
    for my $infile (@infiles) {

        ### Processing: $infile
        my $ali = Ali->load($infile);
        $ali->dont_guess;

        # classify Ali
        my $cat_label = $classifier->classify($ali);
        ### classified to: $cat_label
        next ALI unless $cat_label;

        # store Ali in corresponding directory
        my $subdir = dir($outdir, $cat_label)->relative;

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 1.915 second using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )