Bio-MUST-Core

 view release on metacpan or  search on metacpan

bin/split-matrix.pl  view on Meta::CPAN

#!/usr/bin/env perl
# PODNAME: split-matrix.pl
# ABSTRACT: Extract individual gene ALIs from a SCaFoS supermatrix

use Modern::Perl '2011';
use autodie;

use File::Basename;
use Getopt::Euclid qw(:vars);
use Path::Class qw(dir file);
use Smart::Comments;

use Bio::MUST::Core;
use Bio::MUST::Core::Constants qw(:files);
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::SeqMask';


### Gene ALIs extracted from: $ARGV_alifile
my $ali = Ali->load($ARGV_alifile);
   $ali->gapify_seqs;

for my $infile (@ARGV_infiles) {

    ### Processing: $infile

    # create directory named after filename
    my ($filename) = fileparse($infile, qr{\.[^.]*}xms);
    my $dir = dir($filename)->relative;
    $dir->mkpath();

    open my $in, '<', $infile;

    LINE:
    while (my $line = <$in>) {
        chomp $line;

        # skip empty lines and other comment lines
        next LINE if $line =~ $EMPTY_LINE
                  || $line =~ $COMMENT_LINE;

        # build mask from gene coordinates
        my ($gene, $begin, $end) = split /\t+/xms, $line;
        my $mask = SeqMask->blocks2mask( [ [ $begin, $end ] ] );

        # extract gene and filter empty seqs
        my $masked = $mask->filtered_ali($ali);
        my @seqs = $masked->filter_seqs( sub { $_->nomiss_seq_len > 1 } );
        my $gene_ali = Ali->new( seqs => \@seqs );

        ### Output gene ALI in: $gene
        my $outfile = file($dir, $gene);
        $gene_ali->store($outfile);
    }
}

__END__

=pod

=head1 NAME

split-matrix.pl - Extract individual gene ALIs from a SCaFoS supermatrix

=head1 VERSION

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 0.393 second using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )