Bio-Tools-Phylo-PAML
view release on metacpan or search on metacpan
lib/Bio/Tools/Phylo/PAML.pm view on Meta::CPAN
my @bases = split;
foreach my $str (@bases) {
my ( $base, $freq ) = split( /:/, $str, 2 );
$self->{'_summary'}->{'codonposition'}->[ $pos - 1 ]->{$base} =
$freq;
}
$done = 1 if $pos == 3;
}
}
$done = 0;
while ( defined( $_ = $self->_readline ) ) {
if (/^Nei\s\&\sGojobori|\(A\)\sNei-Gojobori/) {
$self->_pushback($_);
last;
}
last if ($done);
if (/^Codon frequencies under model, for use in evolver/) {
while ( defined( $_ = $self->_readline ) ) {
last if (/^\s+$/);
s/^\s+//;
s/\s+$//;
push @{ $self->{'_summary'}->{'codonfreqs'} }, [split];
}
$done = 1;
}
}
}
sub _parse_aa_freqs {
my ($self) = @_;
my ( $okay, $done, $header ) = ( 0, 0, 0 );
my (@bases);
my $numseqs = scalar @{ $self->{'_summary'}->{'seqs'} || [] };
while ( defined( $_ = $self->_readline ) ) {
if ( /^TREE/ || /^AA distances/ ) {
$self->_pushback($_);
last;
}
last if ($done);
next if ( /^\s+$/ || /^\(Ambiguity/ );
if (/^Frequencies\./) {
$okay = 1;
}
elsif ( !$okay ) { # skip till we see 'Frequencies.
next;
}
elsif ( !$header ) {
s/^\s+//; # remove leading whitespace
@bases = split; # get an array of the all the aa names
$header = 1;
$self->{'_summary'}->{'aafreqs'} = {}; # reset/clear values
next;
}
elsif (
/^\#\s+constant\s+sites\:\s+
(\d+)\s+ # constant sites
\(\s*([\d\.]+)\s*\%\s*\)/x
)
{
$self->{'_summary'}->{'stats'}->{'constant_sites'} = $1;
$self->{'_summary'}->{'stats'}->{'constant_sites_percentage'} = $2;
}
elsif (/^ln\s+Lmax\s+\(unconstrained\)\s+\=\s+(\S+)/x) {
$self->{'_summary'}->{'stats'}->{'loglikelihood'} = $1;
$done = 1; # done for sure
}
else {
my ( $seqname, @freqs ) = split;
my $basect = 0;
foreach my $f (@freqs) {
# this will also store 'Average'
$self->{'_summary'}->{'aafreqs'}->{$seqname}
->{ $bases[ $basect++ ] } = $f;
}
}
}
}
# This is for parsing the automatic tree output
sub _parse_StarDecomposition {
my ($self) = @_;
my %data;
return %data;
}
sub _parse_aa_dists {
my ($self) = @_;
my ( $okay, $seen, $done ) = ( 0, 0, 0 );
my ( %matrix, @names, @values );
my $numseqs = scalar @{ $self->{'_summary'}->{'seqs'} || [] };
my $type = '';
while ( defined( $_ = $self->_readline ) ) {
last if $done;
if (/^TREE/) { $self->_pushback($_); last; }
if (/^\s+$/) {
last if ($seen);
next;
}
if (/^(AA|ML) distances/) {
$okay = 1;
$type = $1;
next;
}
s/\s+$//g; # remove trailing space
if ($okay) {
my ( $seqname, @vl ) = split;
$seen = 1;
my $i = 0;
# hacky workaround to problem with 3.14 aaml
if (
$type eq 'ML'
&& !@names
&& # first entry
@vl
)
{ # not empty
push @names, $self->{'_summary'}->{'seqs'}->[0]->display_id;
lib/Bio/Tools/Phylo/PAML.pm view on Meta::CPAN
my ($okay) = (0);
my %branch_dnds;
my @header;
while ( defined( $_ = $self->_readline ) ) {
next if (/^\s+$/);
next unless ( $okay || /^\s+branch\s+t/ );
if (/^\s+branch\s+(.+)/) {
s/^\s+//;
@header = split( /\s+/, $_ );
$okay = 1;
}
elsif (/^\s*(\d+\.\.\d+)/) {
my $branch = $1;
s/^\s+//;
my $i = 0;
# fancyness just maps the header names like 't' or 'dN'
# into the hash so we get at the end of the day
# 't' => 0.067
# 'dN'=> 0.001
$branch_dnds{$branch} = { map { $header[ $i++ ] => $_ } split };
}
else {
$self->_pushback($_);
last;
}
}
return %branch_dnds;
}
#baseml stuff
sub _parse_nt_freqs {
my ($self) = @_;
my ( $okay, $done, $header ) = ( 0, 0, 0 );
my (@bases);
my $numseqs = scalar @{ $self->{'_summary'}->{'seqs'} || [] };
while ( defined( $_ = $self->_readline ) ) {
if ( /^TREE/ || /^Distances/ ) { $self->_pushback($_); last }
last if ($done);
next if ( /^\s+$/ || /^\(Ambiguity/ );
if (/^Frequencies\./) {
$okay = 1;
}
elsif ( !$okay ) { # skip till we see 'Frequencies.
next;
}
elsif ( !$header ) {
s/^\s+//; # remove leading whitespace
@bases = split; # get an array of the all the aa names
$header = 1;
$self->{'_summary'}->{'ntfreqs'} = {}; # reset/clear values
next;
}
elsif (
/^\#\s+constant\s+sites\:\s+
(\d+)\s+ # constant sites
\(\s*([\d\.]+)\s*\%\s*\)/ox
)
{
$self->{'_summary'}->{'stats'}->{'constant_sites'} = $1;
$self->{'_summary'}->{'stats'}->{'constant_sites_percentage'} = $2;
}
elsif (/^ln\s+Lmax\s+\(unconstrained\)\s+\=\s+(\S+)/ox) {
$self->{'_summary'}->{'stats'}->{'loglikelihood'} = $1;
$done = 1; # done for sure
}
else {
my ( $seqname, @freqs ) = split;
my $basect = 0;
foreach my $f (@freqs) {
# this will also store 'Average'
$self->{'_summary'}->{'ntfreqs'}->{$seqname}
->{ $bases[ $basect++ ] } = $f;
}
}
}
}
sub _parse_nt_dists {
my ($self) = @_;
my ( $okay, $seen, $done ) = ( 0, 0, 0 );
my ( %matrix, @names );
my $numseqs = scalar @{ $self->{'_summary'}->{'seqs'} || [] };
my $type = '';
while ( defined( $_ = $self->_readline ) ) {
if (/^TREE/) { $self->_pushback($_); last; }
last if $done;
next if (/^This matrix is not used in later/);
if (/^\s+$/) {
last if ($seen);
next;
}
if (/^Distances:(\S+)\s+\(([^\)]+)\)\s+\(alpha set at (\-?\d+\.\d+)\)/)
{
$okay = 1;
$type = $1;
next;
}
s/\s+$//g; # remove trailing space
if ($okay) {
my ( $seqname, $vl ) = split( /\s+/, $_, 2 );
$seen = 1;
my $i = 0;
if ( defined $vl ) {
while ( $vl =~ /(\-?\d+\.\d+)\s*\(\s*(\-?\d+\.\d+)\s*\)\s*/g ) {
my ( $kappa, $alpha ) = ( $1, $2 );
$matrix{$seqname}{ $names[$i] } =
$matrix{ $names[$i] }{$seqname} = [ $kappa, $alpha ];
$i++;
}
unless ($i) {
$self->warn("no matches for $vl\n");
}
}
push @names, $seqname;
$matrix{$seqname}->{$seqname} = [ 0, 0 ];
}
$done = 1 if ( scalar @names == $numseqs );
}
( run in 1.902 second using v1.01-cache-2.11-cpan-0d23b851a93 )