Bio-MUST-Tools-Mcl

 view release on metacpan or  search on metacpan

bin/extract-mcl-out.pl  view on Meta::CPAN

#!/usr/bin/env perl
# PODNAME: extract-mcl-out.pl
# ABSTRACT: Extract orthogroups (FASTA files) from MCL clusters

use Modern::Perl '2011';
use autodie;

use File::Basename;
use Getopt::Euclid qw(:vars);
use Path::Class qw(dir file);
use Smart::Comments;

use Bio::MUST::Core;
use Bio::MUST::Core::Constants qw(:files);
use Bio::MUST::Core::Utils qw(change_suffix);
use aliased 'Bio::MUST::Core::Ali::Stash';
use aliased 'Bio::MUST::Core::IdList';


my $db = Stash->load($ARGV_database);

for my $infile (@ARGV_infiles) {

    ### Processing: $infile

    # create directory named after filename
    my ($filename) = fileparse($infile, qr{\.[^.]*}xms);
    my $dir = dir($filename)->relative;
    $dir->mkpath();

    # TODO: move this part to some object returning a ordered hash of IdList

    open my $in, '<', $infile;

    LINE:
    while (my $line = <$in>) {
        chomp $line;

        # skip empty lines and other comment lines
        next LINE if $line =~ $EMPTY_LINE
                  || $line =~ $COMMENT_LINE;

        # extract id list for current group
        my ($group, @ids) = split /\s+/xms, $line;
        $group =~ s/:\z//xms;           # remove trailing colon (:)
        my $list = IdList->new( ids => \@ids );

        # assemble Ali and store it as FASTA file
        my $ali = $list->reordered_ali($db);
        $ali->dont_guess;
        my $outfile = file($dir, change_suffix($group, '.fasta') )->stringify;
        $ali->store_fasta($outfile);
    }
}

__END__

=pod

=head1 NAME

extract-mcl-out.pl - Extract orthogroups (FASTA files) from MCL clusters

=head1 VERSION



( run in 2.172 seconds using v1.01-cache-2.11-cpan-5735350b133 )