Bio-MUST-Core

 view release on metacpan or  search on metacpan

bin/ali2phylip.pl  view on Meta::CPAN

    dump_stats($infile, $ali, 'in');

    # remove shared gaps (and more if asked to do so)
    _apply_mask( $ali, SeqMask->ideal_mask($ali, $ARGV_max_res_drop_site) );

    # TODO: allow deleting #NEW# sequences made identical to existing seqs
    # of the same org after mask application (to handle 42 mini-inserts)

    # apply Gblocks mask
    _apply_mask( $ali, SeqMask->gblocks_mask($ali, $ARGV_gb_mask) )
        if $ARGV_gb_mask;

    # apply BMGE mask
    _apply_mask( $ali, SeqMask->bmge_mask($ali, $ARGV_bmge_mask) )
        if $ARGV_bmge_mask;

    # apply parsimony mask
    _apply_mask( $ali, SeqMask->parsimony_mask($ali) )
        if $ARGV_pars_mask;

    # discard partial sequences and report their ids
    if ($ARGV_min_res_seq) {
        my @ali_list = map { $_->full_id } $ali->all_seq_ids;
        $ali->apply_list( $ali->complete_seq_list($ARGV_min_res_seq) );
        my @phy_list = map { $_->full_id } $ali->all_seq_ids;
        my $lc = List::Compare->new( { lists => [\@ali_list, \@phy_list] } );
        for my $full_id ($lc->get_unique) {
            ### Discarding seq: $full_id
        }
    }

    # generate mapping file and map ids
    if ($ARGV_map_ids) {
        my $idm = $ali->std_mapper;
        my $idmfile = change_suffix($infile, '.idm');
        $idm->store($idmfile);
        $ali->shorten_ids($idm);
    }

    # optionally delete constant sites
    _apply_mask( $ali, SeqMask->variable_mask($ali) )
        if $ARGV_del_const;

    # export Ali to phylip format (or ALI) format
    my $outfile = change_suffix($infile,
        $ARGV_ali ? '-a2p.ali' :
        $ARGV_p80 ? '.p80'     :
                    '.phy'
    );

    dump_stats($outfile, $ali, 'out');

    # only write actual phylip file if not in test mode
    unless ($out) {
        my $method = $ARGV_ali ? 'store' : 'store_phylip';
        my $args = { clean => 1, $ARGV_p80 ? (short => 0, chunk => -1) : () };
        $ali->$method($outfile, $args);
    }
}

# wrapper to native methods to transparently handle codon_mask
sub _apply_mask {
    my $ali  = shift;
    my $mask = shift;

    if ($ARGV_keep_codons) {
        $mask = $mask->codon_mask( {
            frame => $ARGV_coding_frame,
              max => $ARGV_codon_max_nt_drop,
        } );

        # frame should be fixed only once
        $ARGV_coding_frame = 1;
    }

    $ali->apply_mask($mask);
    return;
}

sub dump_stats {
    my $file = shift;
    my $ali  = shift;
    my $mode = shift;

    # output tabular stats line when in test mode
    if ($out) {
        print {$out} join "\t", (
            $file,
            $ARGV_gb_mask // q{} , $ARGV_bmge_mask // q{},
            $ARGV_pars_mask // q{},
            $ARGV_max_res_drop_site, $ARGV_min_res_seq,
            $ali->seq_len_stats
        ) if $mode eq 'in';
        print {$out} "\t" . join "\t",
            $ali->count_seqs, $ali->width, $ali->perc_miss;
        print {$out} "\n" if $mode eq 'out';
        return;
    }

    # output regular Smart::Comments stats line
    my $str = "$file has " . $ali->count_seqs . ' seqs x ' . $ali->width
        . ' sites (' . sprintf('%.2f', $ali->perc_miss) . '% missing states)';
    if ($mode eq 'in') {
        ### [ALI] file: $str
    } else {
        ### [PHY] file: $str
    }

    return;
}

__END__

=pod

=head1 NAME

ali2phylip.pl - Convert (and filter) ALI files to PHYLIP files for tree building

=head1 VERSION



( run in 1.467 second using v1.01-cache-2.11-cpan-bbb979687b5 )