Bio-MUST-Core
view release on metacpan or search on metacpan
bin/ali2phylip.pl view on Meta::CPAN
#!/usr/bin/env perl
# PODNAME: ali2phylip.pl
# ABSTRACT: Convert (and filter) ALI files to PHYLIP files for tree building
# CONTRIBUTOR: Arnaud DI FRANCO <arnaud.difranco@gmail.com>
# CONTRIBUTOR: Raphael LEONARD <rleonard@doct.uliege.be>
use Modern::Perl '2011';
use autodie;
use Getopt::Euclid qw(:vars);
use List::Compare;
use Smart::Comments;
use Bio::MUST::Core;
use Bio::MUST::Core::Utils qw(change_suffix);
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::IdList';
use aliased 'Bio::MUST::Core::IdMapper';
use aliased 'Bio::MUST::Core::SeqMask';
# optionally setup test outfile
my $out;
if ($ARGV_test_out) {
open $out, '>', $ARGV_test_out;
say {$out} join "\t", qw(
file gb bmge pars max min
Q0.len Q1.len Q2.len Q3.len Q4.len
in.seqs in.sites in.miss
out.seqs out.sites out.miss
);
}
for my $infile (@ARGV_infiles) {
### Processing: $infile
my $ali = Ali->load($infile);
$ali->gapify_seqs if $ARGV_from_scafos;
dump_stats($infile, $ali, 'in');
# remove shared gaps (and more if asked to do so)
_apply_mask( $ali, SeqMask->ideal_mask($ali, $ARGV_max_res_drop_site) );
# TODO: allow deleting #NEW# sequences made identical to existing seqs
# of the same org after mask application (to handle 42 mini-inserts)
# apply Gblocks mask
_apply_mask( $ali, SeqMask->gblocks_mask($ali, $ARGV_gb_mask) )
if $ARGV_gb_mask;
# apply BMGE mask
_apply_mask( $ali, SeqMask->bmge_mask($ali, $ARGV_bmge_mask) )
if $ARGV_bmge_mask;
# apply parsimony mask
_apply_mask( $ali, SeqMask->parsimony_mask($ali) )
if $ARGV_pars_mask;
# discard partial sequences and report their ids
if ($ARGV_min_res_seq) {
my @ali_list = map { $_->full_id } $ali->all_seq_ids;
$ali->apply_list( $ali->complete_seq_list($ARGV_min_res_seq) );
my @phy_list = map { $_->full_id } $ali->all_seq_ids;
my $lc = List::Compare->new( { lists => [\@ali_list, \@phy_list] } );
for my $full_id ($lc->get_unique) {
### Discarding seq: $full_id
( run in 1.101 second using v1.01-cache-2.11-cpan-75ffa21a3d4 )