Bio-MUST-Core
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
bin/split-matrix.pl view on Meta::CPAN
#!/usr/bin/env perl
# PODNAME: split-matrix.pl
# ABSTRACT: Extract individual gene ALIs from a SCaFoS supermatrix
use Modern::Perl '2011';
use autodie;
use File::Basename;
use Getopt::Euclid qw(:vars);
use Path::Class qw(dir file);
use Smart::Comments;
use Bio::MUST::Core;
use Bio::MUST::Core::Constants qw(:files);
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::SeqMask';
### Gene ALIs extracted from: $ARGV_alifile
my $ali = Ali->load($ARGV_alifile);
$ali->gapify_seqs;
for my $infile (@ARGV_infiles) {
### Processing: $infile
# create directory named after filename
my ($filename) = fileparse($infile, qr{\.[^.]*}xms);
my $dir = dir($filename)->relative;
$dir->mkpath();
open my $in, '<', $infile;
LINE:
while (my $line = <$in>) {
chomp $line;
# skip empty lines and other comment lines
next LINE if $line =~ $EMPTY_LINE
|| $line =~ $COMMENT_LINE;
# build mask from gene coordinates
my ($gene, $begin, $end) = split /\t+/xms, $line;
my $mask = SeqMask->blocks2mask( [ [ $begin, $end ] ] );
# extract gene and filter empty seqs
my $masked = $mask->filtered_ali($ali);
my @seqs = $masked->filter_seqs( sub { $_->nomiss_seq_len > 1 } );
my $gene_ali = Ali->new( seqs => \@seqs );
### Output gene ALI in: $gene
my $outfile = file($dir, $gene);
$gene_ali->store($outfile);
}
}
__END__
=pod
=head1 NAME
split-matrix.pl - Extract individual gene ALIs from a SCaFoS supermatrix
=head1 VERSION
view all matches for this distributionview release on metacpan - search on metacpan
( run in 0.393 second using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )