Bio-MUST-Core
view release on metacpan or search on metacpan
bin/ali2phylip.pl view on Meta::CPAN
dump_stats($infile, $ali, 'in');
# remove shared gaps (and more if asked to do so)
_apply_mask( $ali, SeqMask->ideal_mask($ali, $ARGV_max_res_drop_site) );
# TODO: allow deleting #NEW# sequences made identical to existing seqs
# of the same org after mask application (to handle 42 mini-inserts)
# apply Gblocks mask
_apply_mask( $ali, SeqMask->gblocks_mask($ali, $ARGV_gb_mask) )
if $ARGV_gb_mask;
# apply BMGE mask
_apply_mask( $ali, SeqMask->bmge_mask($ali, $ARGV_bmge_mask) )
if $ARGV_bmge_mask;
# apply parsimony mask
_apply_mask( $ali, SeqMask->parsimony_mask($ali) )
if $ARGV_pars_mask;
# discard partial sequences and report their ids
if ($ARGV_min_res_seq) {
my @ali_list = map { $_->full_id } $ali->all_seq_ids;
$ali->apply_list( $ali->complete_seq_list($ARGV_min_res_seq) );
my @phy_list = map { $_->full_id } $ali->all_seq_ids;
my $lc = List::Compare->new( { lists => [\@ali_list, \@phy_list] } );
for my $full_id ($lc->get_unique) {
### Discarding seq: $full_id
}
}
# generate mapping file and map ids
if ($ARGV_map_ids) {
my $idm = $ali->std_mapper;
my $idmfile = change_suffix($infile, '.idm');
$idm->store($idmfile);
$ali->shorten_ids($idm);
}
# optionally delete constant sites
_apply_mask( $ali, SeqMask->variable_mask($ali) )
if $ARGV_del_const;
# export Ali to phylip format (or ALI) format
my $outfile = change_suffix($infile,
$ARGV_ali ? '-a2p.ali' :
$ARGV_p80 ? '.p80' :
'.phy'
);
dump_stats($outfile, $ali, 'out');
# only write actual phylip file if not in test mode
unless ($out) {
my $method = $ARGV_ali ? 'store' : 'store_phylip';
my $args = { clean => 1, $ARGV_p80 ? (short => 0, chunk => -1) : () };
$ali->$method($outfile, $args);
}
}
# wrapper to native methods to transparently handle codon_mask
sub _apply_mask {
my $ali = shift;
my $mask = shift;
if ($ARGV_keep_codons) {
$mask = $mask->codon_mask( {
frame => $ARGV_coding_frame,
max => $ARGV_codon_max_nt_drop,
} );
# frame should be fixed only once
$ARGV_coding_frame = 1;
}
$ali->apply_mask($mask);
return;
}
sub dump_stats {
my $file = shift;
my $ali = shift;
my $mode = shift;
# output tabular stats line when in test mode
if ($out) {
print {$out} join "\t", (
$file,
$ARGV_gb_mask // q{} , $ARGV_bmge_mask // q{},
$ARGV_pars_mask // q{},
$ARGV_max_res_drop_site, $ARGV_min_res_seq,
$ali->seq_len_stats
) if $mode eq 'in';
print {$out} "\t" . join "\t",
$ali->count_seqs, $ali->width, $ali->perc_miss;
print {$out} "\n" if $mode eq 'out';
return;
}
# output regular Smart::Comments stats line
my $str = "$file has " . $ali->count_seqs . ' seqs x ' . $ali->width
. ' sites (' . sprintf('%.2f', $ali->perc_miss) . '% missing states)';
if ($mode eq 'in') {
### [ALI] file: $str
} else {
### [PHY] file: $str
}
return;
}
__END__
=pod
=head1 NAME
ali2phylip.pl - Convert (and filter) ALI files to PHYLIP files for tree building
=head1 VERSION
( run in 1.467 second using v1.01-cache-2.11-cpan-bbb979687b5 )