Bio-Tools-Phylo-PAML
view release on metacpan or search on metacpan
bin/bp_pairwise_kaks view on Meta::CPAN
# Tcoffee can't handle '*'
$pseq =~ s/\*//g;
$protein->seq($pseq);
push @prots, $protein;
}
if( @prots < 2 ) {
warn("Need at least 2 cDNA sequences to proceed");
exit(0);
}
local * OUT;
if( $output ) {
open(OUT, ">$output") || die("cannot open output $output for writing");
} else {
*OUT = *STDOUT;
}
my $aa_aln = $aln_factory->align(\@prots);
my $dna_aln = &aa_to_dna_aln($aa_aln, \%seqs);
my @each = $dna_aln->each_seq();
$kaks_factory->alignment($dna_aln);
my ($rc,$parser) = $kaks_factory->run();
if( $rc <= 0 ) {
warn($kaks_factory->error_string,"\n");
exit;
}
my $result = $parser->next_result;
if ($result->version =~ m/3\.15/) {
warn("This script does not work with v3.15 of PAML! Please use 3.14 instead.");
exit(0);
}
my $MLmatrix = $result->get_MLmatrix();
my @otus = $result->get_seqs();
my @pos = map {
my $c= 1;
foreach my $s ( @each ) {
last if( $s->display_id eq $_->display_id );
$c++;
}
$c;
} @otus;
print OUT join("\t", qw(SEQ1 SEQ2 Ka Ks Ka/Ks PROT_PERCENTID CDNA_PERCENTID)), "\n";
for( my $i = 0; $i < (scalar @otus -1) ; $i++) {
for( my $j = $i+1; $j < (scalar @otus); $j++ ) {
my $sub_aa_aln = $aa_aln->select_noncont($pos[$i],$pos[$j]);
my $sub_dna_aln = $dna_aln->select_noncont($pos[$i],$pos[$j]);
print OUT join("\t",
$otus[$i]->display_id,
$otus[$j]->display_id,$MLmatrix->[$i]->[$j]->{'dN'},
$MLmatrix->[$i]->[$j]->{'dS'},
$MLmatrix->[$i]->[$j]->{'omega'},
sprintf("%.2f",$sub_aa_aln->percentage_identity),
sprintf("%.2f",$sub_dna_aln->percentage_identity),
), "\n";
}
}
__END__
=pod
=encoding UTF-8
=head1 NAME
bp_pairwise_kaks - calculate pairwise Ka,Ks for a set of sequences
=head1 VERSION
version 1.7.3
=head1 SYNOPSIS
bp_pairwise_kaks.PLS -i INPUT-cDNA [-f FORMAT] [-o OUTPUT] \
[--msa tcoffee|clustal] [--kaks yn00|codeml]
=head1 DESCRIPTION
This script will take as input a dataset of cDNA sequences, verify
that they contain no stop codons, align them in protein space, project
the alignment back into cDNA, and estimate the Ka (non-synonymous) and
Ks (synonymous) substitutions based on the Maximum Likelihood method
of Yang with the PAML package.
Often there are specific specific parameters you want to run when you
are computing Ka/Ks ratios so consider this script a starting point
and do not rely it on for every situation.
=head1 OPTIONS
=over 4
=item B<-i>, B<--input>
The input file with the cDNA sequences. Must have at least two
sequences, and be in a format that L<Bio::SeqIO> is capable to read.
In addition, if Bio::SeqIO is inable to automatically identify the
format, the B<-f> option should be specified.
Technically not an option, this is a required value.
=item B<-f>, B<--format>
Specify the format of INPUT-cDNA for L<Bio::SeqIO>.
=item B<-o>, B<--output>
Specify the file for output. Defaults to STDOUT.
=item B<--msa>
Program used for alignment, either clustalw or tcoffee. Defaults to
clustalw.
( run in 1.388 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )