Bio-MUST-Core

 view release on metacpan or  search on metacpan

bin/ali2phylip.pl  view on Meta::CPAN

#!/usr/bin/env perl
# PODNAME: ali2phylip.pl
# ABSTRACT: Convert (and filter) ALI files to PHYLIP files for tree building
# CONTRIBUTOR: Arnaud DI FRANCO <arnaud.difranco@gmail.com>
# CONTRIBUTOR: Raphael LEONARD <rleonard@doct.uliege.be>

use Modern::Perl '2011';
use autodie;

use Getopt::Euclid qw(:vars);
use List::Compare;
use Smart::Comments;

use Bio::MUST::Core;
use Bio::MUST::Core::Utils qw(change_suffix);
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::IdList';
use aliased 'Bio::MUST::Core::IdMapper';
use aliased 'Bio::MUST::Core::SeqMask';


# optionally setup test outfile
my $out;
if ($ARGV_test_out) {
    open $out, '>', $ARGV_test_out;
    say {$out} join "\t", qw(
        file gb bmge pars max min
        Q0.len Q1.len Q2.len Q3.len Q4.len
         in.seqs  in.sites in.miss
        out.seqs out.sites out.miss
    );
}

for my $infile (@ARGV_infiles) {

    ### Processing: $infile
    my $ali = Ali->load($infile);
    $ali->gapify_seqs if $ARGV_from_scafos;

    dump_stats($infile, $ali, 'in');

    # remove shared gaps (and more if asked to do so)
    _apply_mask( $ali, SeqMask->ideal_mask($ali, $ARGV_max_res_drop_site) );

    # TODO: allow deleting #NEW# sequences made identical to existing seqs
    # of the same org after mask application (to handle 42 mini-inserts)

    # apply Gblocks mask
    _apply_mask( $ali, SeqMask->gblocks_mask($ali, $ARGV_gb_mask) )
        if $ARGV_gb_mask;

    # apply BMGE mask
    _apply_mask( $ali, SeqMask->bmge_mask($ali, $ARGV_bmge_mask) )
        if $ARGV_bmge_mask;

    # apply parsimony mask
    _apply_mask( $ali, SeqMask->parsimony_mask($ali) )
        if $ARGV_pars_mask;

    # discard partial sequences and report their ids
    if ($ARGV_min_res_seq) {
        my @ali_list = map { $_->full_id } $ali->all_seq_ids;
        $ali->apply_list( $ali->complete_seq_list($ARGV_min_res_seq) );
        my @phy_list = map { $_->full_id } $ali->all_seq_ids;
        my $lc = List::Compare->new( { lists => [\@ali_list, \@phy_list] } );
        for my $full_id ($lc->get_unique) {
            ### Discarding seq: $full_id



( run in 1.101 second using v1.01-cache-2.11-cpan-75ffa21a3d4 )