Bio-Tools-Phylo-PAML
view release on metacpan or search on metacpan
lib/Bio/Tools/Phylo/PAML/Codeml.pm view on Meta::CPAN
=head1 NAME
Bio::Tools::Phylo::PAML::Codeml - Parses output from the PAML program codeml
=head1 VERSION
version 1.7.3
=head1 SYNOPSIS
#!/usr/bin/perl -Tw
use strict;
use Bio::Tools::Phylo::PAML::Codeml;
# need to specify the output file name (or a fh) (defaults to
# -file => "codeml.mlc"); also, optionally, the directory in which
# the other result files (rst, 2ML.dS, etc) may be found (defaults
# to "./")
my $parser = new Bio::Tools::Phylo::PAML::Codeml::Parser
(-file => "./results/mlc", -dir => "./results/");
# get the first/next result; a Bio::[...]::Codeml::Result object
my $result = $parser->next_result();
# get the sequences used in the analysis; returns Bio::PrimarySeq
# objects (OTU = Operational Taxonomic Unit).
my @otus = $result->get_seqs();
# codon summary: codon usage of each sequence [ arrayref of {
# hashref of counts for each codon } for each sequence and the
# overall sum ], and positional nucleotide distribution [ arrayref
# of { hashref of frequencies for each nucleotide } for each
# sequence and overall frequencies ].
my ($codonusage, $ntdist) = $result->get_codon_summary();
# example manipulations of $codonusage and $ntdist:
printf "There were %d '%s' codons in the first seq (%s)\n",
$codonusage->[0]->{AAA}, 'AAA', $otus[0]->id();
printf "There were %d '%s' codons used in all the sequences\n",
$codonusage->[$#{$codonusage}]->{AAA}, 'AAA';
printf "Nucleotide '%c' was present %g of the time in seq %s\n",
'A', $ntdist->[1]->{A}, $otus[1]->id();
# get Nei & Gojobori dN/dS matrix:
my $NGmatrix = $result->get_NGmatrix();
# get ML-estimated dN/dS matrix, if calculated; this corresponds to
# the runmode = -2, pairwise comparison usage of codeml
my $MLmatrix = $result->get_MLmatrix();
# These matrices are length(@otu) x length(@otu) "strict lower
# triangle" 2D-matrices, which means that the diagonal and
# everything above it is undefined. Each of the defined cells is a
# hashref of estimates for "dN", "dS", "omega" (dN/dS ratio), "t",
# "S" and "N". If a ML matrix, "lnL" will also be defined. Any
# additional ML parameters estimated by the model will be in an
# array ref under "params"; it's up to the user to know which
# position corresponds to which parameter (since PAML doesn't label
# them, and we can't guess very well yet (a TODO I guess).
printf "The omega ratio for sequences %s vs %s was: %g\n",
$otus[0]->id, $otus[1]->id, $MLmatrix->[0]->[1]->{omega};
# with a little work, these matrices could also be passed to
# Bio::Tools::Run::Phylip::Neighbor, or other similar tree-building
# method that accepts a matrix of "distances" (using the LOWTRI
# option):
my $distmat = [ map { [ map { $$_{omega} } @$_ ] } @$MLmatrix ];
# for runmode's other than -2, get tree topology with estimated
# branch lengths; returns a Bio::Tree::TreeI-based tree object with
# added PAML parameters at each node
my $tree = $result->get_tree();
for my $node ($tree->get_nodes()) {
# inspect the tree: the "t" (time) parameter is available via
# $node->branch_length(); all other branch-specific parameters
# ("omega", "dN", etc.) are available via $node->param('omega');
}
# get any general model parameters: kappa (the
# transition/transversion ratio), NSsites model parameters ("p0",
# "p1", "w0", "w1", etc.), etc.
my $params = $result->get_model_params();
printf "M1 params: p0 = %g\tp1 = %g\n", $params->{p0}, $params->{p1};
# for NSsites models, obtain posterior probabilities for membership
# in each class for every position; probabilities correspond to
# classes w0, w1, ... etc.
my @probs = $result->get_posteriors();
# find, say, positively selected sites!
if ($params->{w2} > 1) {
for (my $i = 0; $i < @probs ; $i++) {
if ($probs[$i]->[2] > 0.5) {
# assumes model M1: three w's, w0, w1 and w2 (positive selection)
printf "position %d: (%g prob, %g omega, %g mean w)\n",
$i, $probs[$i]->[2], $params->{w2}, $probs[$i]->[3];
}
}
} else { print "No positive selection found!\n"; }
=head1 DESCRIPTION
This module is used to parse the output from the PAML program codeml.
You can use the Bio::Tools::Run::Phylo::Phylo::PAML::Codeml module to
actually run codeml; this module is only useful to parse the output.
=head1 METHODS
=head2 new
Title : new
Usage : my $obj = new Bio::Tools::Phylo::PAML::Codeml();
Function: Builds a new Bio::Tools::Phylo::PAML::Codeml object
Returns : Bio::Tools::Phylo::PAML::Codeml
Args :
=head2 get_trees
( run in 0.576 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )