Bio-MUST-Tools-Mcl
view release on metacpan or search on metacpan
bin/classify-mcl-out.pl view on Meta::CPAN
#!/usr/bin/env perl
# PODNAME: classify-mcl-out.pl
# ABSTRACT: Classify MCL clusters based on taxonomic filters
# CONTRIBUTOR: Arnaud DI FRANCO <arnaud.difranco@gmail.com>
use Modern::Perl '2011';
use autodie;
use Config::Any;
use File::Basename;
use Getopt::Euclid qw(:vars);
use Path::Class;
use Smart::Comments;
use Bio::MUST::Core;
use Bio::MUST::Core::Utils qw(append_suffix);
use aliased 'Bio::MUST::Core::Taxonomy';
use aliased 'Bio::MUST::Core::IdList';
# read configuration file
my $config = Config::Any->load_files( {
files => [ $ARGV_config ],
flatten_to_hash => 1,
use_ext => 1,
} );
### config: $config->{$ARGV_config}
die "Error: no config file specified; aborting...\n"
unless $config;
# build taxonomy and classifier objects
my $tax = Taxonomy->new_from_cache( tax_dir => $ARGV_taxdir );
my $classifier = $tax->tax_classifier( $config->{$ARGV_config} );
for my $infile (@ARGV_infiles) {
### Processing: $infile
open my $in, '<', $infile;
# create directory named after infile
my ($filename) = fileparse($infile, qr{\.[^.]*}xms);
my $dir = dir( $filename . '-classify' )->relative;
$dir->mkpath();
my %out_for;
LINE:
while (my $line = <$in>) {
# process group line and extract members
chomp $line;
my ($group, $memb_str) = $line =~ m/(\S+) \s* : \s* (.*)/xms;
my @members = split /\s+/xms, $memb_str;
my $listable = IdList->new( ids => \@members );
# classify group
my $cat_label = $classifier->classify($listable);
next LINE unless $cat_label;
### classified: "$group | $cat_label"
# select adequate outfile based on label
my $outfile = append_suffix($cat_label, '.txt');
my $out = ( $out_for{$outfile} //= file($dir, $outfile)->openw );
# output group line
( run in 1.913 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )