Bio-Tools-Phylo-PAML

 view release on metacpan or  search on metacpan

lib/Bio/Tools/Phylo/PAML.pm  view on Meta::CPAN

            my @bases = split;
            foreach my $str (@bases) {
                my ( $base, $freq ) = split( /:/, $str, 2 );
                $self->{'_summary'}->{'codonposition'}->[ $pos - 1 ]->{$base} =
                  $freq;
            }
            $done = 1 if $pos == 3;
        }
    }
    $done = 0;
    while ( defined( $_ = $self->_readline ) ) {
        if (/^Nei\s\&\sGojobori|\(A\)\sNei-Gojobori/) {
            $self->_pushback($_);
            last;
        }
        last if ($done);
        if (/^Codon frequencies under model, for use in evolver/) {
            while ( defined( $_ = $self->_readline ) ) {
                last if (/^\s+$/);
                s/^\s+//;
                s/\s+$//;
                push @{ $self->{'_summary'}->{'codonfreqs'} }, [split];
            }
            $done = 1;
        }
    }
}

sub _parse_aa_freqs {
    my ($self) = @_;
    my ( $okay, $done, $header ) = ( 0, 0, 0 );
    my (@bases);
    my $numseqs = scalar @{ $self->{'_summary'}->{'seqs'} || [] };
    while ( defined( $_ = $self->_readline ) ) {
        if ( /^TREE/ || /^AA distances/ ) {
            $self->_pushback($_);
            last;
        }
        last if ($done);
        next if ( /^\s+$/ || /^\(Ambiguity/ );
        if (/^Frequencies\./) {
            $okay = 1;
        }
        elsif ( !$okay ) {    # skip till we see 'Frequencies.
            next;
        }
        elsif ( !$header ) {
            s/^\s+//;         # remove leading whitespace
            @bases  = split;  # get an array of the all the aa names
            $header = 1;
            $self->{'_summary'}->{'aafreqs'} = {};    # reset/clear values
            next;
        }
        elsif (
            /^\#\s+constant\s+sites\:\s+
         (\d+)\s+ # constant sites
         \(\s*([\d\.]+)\s*\%\s*\)/x
          )
        {
            $self->{'_summary'}->{'stats'}->{'constant_sites'}            = $1;
            $self->{'_summary'}->{'stats'}->{'constant_sites_percentage'} = $2;
        }
        elsif (/^ln\s+Lmax\s+\(unconstrained\)\s+\=\s+(\S+)/x) {
            $self->{'_summary'}->{'stats'}->{'loglikelihood'} = $1;
            $done = 1;    # done for sure
        }
        else {
            my ( $seqname, @freqs ) = split;
            my $basect = 0;
            foreach my $f (@freqs) {

                # this will also store 'Average'
                $self->{'_summary'}->{'aafreqs'}->{$seqname}
                  ->{ $bases[ $basect++ ] } = $f;
            }
        }
    }
}

# This is for parsing the automatic tree output

sub _parse_StarDecomposition {
    my ($self) = @_;
    my %data;

    return %data;
}

sub _parse_aa_dists {
    my ($self) = @_;
    my ( $okay, $seen, $done ) = ( 0, 0, 0 );
    my ( %matrix, @names, @values );
    my $numseqs = scalar @{ $self->{'_summary'}->{'seqs'} || [] };
    my $type = '';
    while ( defined( $_ = $self->_readline ) ) {
        last if $done;
        if (/^TREE/) { $self->_pushback($_); last; }
        if (/^\s+$/) {
            last if ($seen);
            next;
        }
        if (/^(AA|ML) distances/) {
            $okay = 1;
            $type = $1;
            next;
        }
        s/\s+$//g;    # remove trailing space
        if ($okay) {
            my ( $seqname, @vl ) = split;
            $seen = 1;
            my $i = 0;

            # hacky workaround to problem with 3.14 aaml
            if (
                   $type eq 'ML'
                && !@names
                &&    # first entry
                @vl
              )
            {         # not empty
                push @names, $self->{'_summary'}->{'seqs'}->[0]->display_id;

lib/Bio/Tools/Phylo/PAML.pm  view on Meta::CPAN

    my ($okay) = (0);
    my %branch_dnds;
    my @header;
    while ( defined( $_ = $self->_readline ) ) {
        next if (/^\s+$/);
        next unless ( $okay || /^\s+branch\s+t/ );
        if (/^\s+branch\s+(.+)/) {
            s/^\s+//;
            @header = split( /\s+/, $_ );
            $okay = 1;
        }
        elsif (/^\s*(\d+\.\.\d+)/) {
            my $branch = $1;
            s/^\s+//;
            my $i = 0;

            # fancyness just maps the header names like 't' or 'dN'
            # into the hash so we get at the end of the day
            # 't' => 0.067
            # 'dN'=> 0.001
            $branch_dnds{$branch} = { map { $header[ $i++ ] => $_ } split };
        }
        else {
            $self->_pushback($_);
            last;
        }
    }
    return %branch_dnds;
}

#baseml stuff
sub _parse_nt_freqs {
    my ($self) = @_;
    my ( $okay, $done, $header ) = ( 0, 0, 0 );
    my (@bases);
    my $numseqs = scalar @{ $self->{'_summary'}->{'seqs'} || [] };
    while ( defined( $_ = $self->_readline ) ) {
        if ( /^TREE/ || /^Distances/ ) { $self->_pushback($_); last }
        last if ($done);
        next if ( /^\s+$/ || /^\(Ambiguity/ );
        if (/^Frequencies\./) {
            $okay = 1;
        }
        elsif ( !$okay ) {    # skip till we see 'Frequencies.
            next;
        }
        elsif ( !$header ) {
            s/^\s+//;         # remove leading whitespace
            @bases  = split;  # get an array of the all the aa names
            $header = 1;
            $self->{'_summary'}->{'ntfreqs'} = {};    # reset/clear values
            next;
        }
        elsif (
            /^\#\s+constant\s+sites\:\s+
         (\d+)\s+    # constant sites
         \(\s*([\d\.]+)\s*\%\s*\)/ox
          )
        {
            $self->{'_summary'}->{'stats'}->{'constant_sites'}            = $1;
            $self->{'_summary'}->{'stats'}->{'constant_sites_percentage'} = $2;
        }
        elsif (/^ln\s+Lmax\s+\(unconstrained\)\s+\=\s+(\S+)/ox) {
            $self->{'_summary'}->{'stats'}->{'loglikelihood'} = $1;
            $done = 1;    # done for sure
        }
        else {
            my ( $seqname, @freqs ) = split;
            my $basect = 0;
            foreach my $f (@freqs) {

                # this will also store 'Average'
                $self->{'_summary'}->{'ntfreqs'}->{$seqname}
                  ->{ $bases[ $basect++ ] } = $f;
            }
        }
    }
}

sub _parse_nt_dists {
    my ($self) = @_;
    my ( $okay, $seen, $done ) = ( 0, 0, 0 );
    my ( %matrix, @names );
    my $numseqs = scalar @{ $self->{'_summary'}->{'seqs'} || [] };
    my $type = '';
    while ( defined( $_ = $self->_readline ) ) {
        if (/^TREE/) { $self->_pushback($_); last; }
        last if $done;
        next if (/^This matrix is not used in later/);
        if (/^\s+$/) {
            last if ($seen);
            next;
        }
        if (/^Distances:(\S+)\s+\(([^\)]+)\)\s+\(alpha set at (\-?\d+\.\d+)\)/)
        {
            $okay = 1;
            $type = $1;
            next;
        }
        s/\s+$//g;    # remove trailing space
        if ($okay) {
            my ( $seqname, $vl ) = split( /\s+/, $_, 2 );
            $seen = 1;
            my $i = 0;
            if ( defined $vl ) {
                while ( $vl =~ /(\-?\d+\.\d+)\s*\(\s*(\-?\d+\.\d+)\s*\)\s*/g ) {
                    my ( $kappa, $alpha ) = ( $1, $2 );
                    $matrix{$seqname}{ $names[$i] } =
                      $matrix{ $names[$i] }{$seqname} = [ $kappa, $alpha ];

                    $i++;
                }
                unless ($i) {
                    $self->warn("no matches for $vl\n");
                }
            }
            push @names, $seqname;
            $matrix{$seqname}->{$seqname} = [ 0, 0 ];
        }
        $done = 1 if ( scalar @names == $numseqs );
    }



( run in 1.902 second using v1.01-cache-2.11-cpan-0d23b851a93 )