Bio-MUST-Tools-Mcl

 view release on metacpan or  search on metacpan

bin/classify-mcl-out.pl  view on Meta::CPAN

#!/usr/bin/env perl
# PODNAME: classify-mcl-out.pl
# ABSTRACT: Classify MCL clusters based on taxonomic filters
# CONTRIBUTOR: Arnaud DI FRANCO <arnaud.difranco@gmail.com>

use Modern::Perl '2011';
use autodie;

use Config::Any;
use File::Basename;
use Getopt::Euclid qw(:vars);
use Path::Class;
use Smart::Comments;

use Bio::MUST::Core;
use Bio::MUST::Core::Utils qw(append_suffix);
use aliased 'Bio::MUST::Core::Taxonomy';
use aliased 'Bio::MUST::Core::IdList';


# read configuration file
my $config = Config::Any->load_files( {
    files           => [ $ARGV_config ],
    flatten_to_hash => 1,
    use_ext         => 1,
} );
### config: $config->{$ARGV_config}

die "Error: no config file specified; aborting...\n"
    unless $config;

# build taxonomy and classifier objects
my $tax = Taxonomy->new_from_cache( tax_dir => $ARGV_taxdir );
my $classifier = $tax->tax_classifier( $config->{$ARGV_config} );

for my $infile (@ARGV_infiles) {

    ### Processing: $infile
    open my $in, '<', $infile;

    # create directory named after infile
    my ($filename) = fileparse($infile, qr{\.[^.]*}xms);
    my $dir = dir( $filename . '-classify' )->relative;
    $dir->mkpath();

    my %out_for;

    LINE:
    while (my $line = <$in>) {

        # process group line and extract members
        chomp $line;
        my ($group, $memb_str) = $line =~ m/(\S+) \s* : \s* (.*)/xms;
        my @members = split /\s+/xms, $memb_str;
        my $listable = IdList->new( ids => \@members );

        # classify group
        my $cat_label = $classifier->classify($listable);
        next LINE unless $cat_label;
        ### classified: "$group | $cat_label"

        # select adequate outfile based on label
        my $outfile = append_suffix($cat_label, '.txt');
        my $out = ( $out_for{$outfile} //= file($dir, $outfile)->openw );

        # output group line



( run in 1.913 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )