BioPerl
view release on metacpan or search on metacpan
Bio/Align/DNAStatistics.pm view on Meta::CPAN
Returns : a reference to a hash of statistics with keys as
listed in Description.
=cut
sub calc_KaKs_pair {
my ( $self, $aln, $seq1_id, $seq2_id) = @_;
$self->throw("Needs 3 arguments - an alignment object, and 2 sequence ids")
if @_!= 4;
$self->throw ("This calculation needs a Bio::Align::AlignI compatible object, not a [ " . ref($aln) . " ]object") unless $aln->isa('Bio::Align::AlignI');
my @seqs = (
#{id => $seq1_id, seq =>($aln->each_seq_with_id($seq1_id))[0]->seq},
#{id => $seq2_id, seq =>($aln->each_seq_with_id($seq2_id))[0]->seq}
{id => $seq1_id, seq => uc(($aln->each_seq_with_id($seq1_id))[0]->seq)},
{id => $seq2_id, seq => uc(($aln->each_seq_with_id($seq2_id))[0]->seq)}
) ;
if (length($seqs[0]{'seq'}) != length($seqs[1]{'seq'})) {
$self->throw(" aligned sequences must be of equal length!");
}
my $results = [];
$self->_get_av_ds_dn(\@seqs, $results);
return $results;
}
=head2 calc_all_KaKs_pairs
Title : calc_all_KaKs_pairs
Useage : my $results2 = $stats->calc_KaKs_pair($alnobj).
Function : Calculates Nei_gojobori statistics for all pairwise
combinations in sequence.
Arguments: A Bio::Align::ALignI compliant object such as
a Bio::SimpleAlign object.
Returns : A reference to an array of hashes of statistics of
all pairwise comparisons in the alignment.
=cut
sub calc_all_KaKs_pairs {
#returns a multi_element_array with all pairwise comparisons
my ($self,$aln) = @_;
$self->throw ("This calculation needs a Bio::Align::AlignI compatible object, not a [ " . ref($aln) . " ]object") unless $aln->isa('Bio::Align::AlignI');
my @seqs;
for my $seq ($aln->each_seq) {
push @seqs, {id => $seq->display_id, seq=>$seq->seq};
}
my $results ;
$results = $self->_get_av_ds_dn(\@seqs, $results);
return $results;
}
=head2 calc_average_KaKs
Title : calc_average_KaKs.
Useage : my $res= $stats->calc_average_KaKs($alnobj, 1000).
Function : calculates Nei_Gojobori stats for average of all
sequences in the alignment.
Args : A Bio::Align::AlignI compliant object such as a
Bio::SimpleAlign object, number of bootstrap iterations
(default 1000).
Returns : A reference to a hash of statistics as listed in Description.
=cut
sub calc_average_KaKs {
#calculates global value for sequences in alignment using bootstrapping
#this is quite slow (~10 seconds per 3 X 200nt seqs);
my ($self, $aln, $bootstrap_rpt) = @_;
$bootstrap_rpt ||= 1000;
$self->throw ("This calculation needs a Bio::Align::AlignI compatible object, not a [ " . ref($aln) . " ]object") unless $aln->isa('Bio::Align::AlignI');
my @seqs;
for my $seq ($aln->each_seq) {
push @seqs, {id => $seq->display_id, seq=>$seq->seq};
}
my $results ;
my ($ds_orig, $dn_orig) = $self->_get_av_ds_dn(\@seqs);
#print "ds = $ds_orig, dn = $dn_orig\n";
$results = {D_s => $ds_orig, D_n => $dn_orig};
$self->_run_bootstrap(\@seqs, $results, $bootstrap_rpt);
return $results;
}
############## primary internal subs for alignment comparisons ########################
sub _run_bootstrap {
### generates sampled sequences, calculates Ds and Dn values,
### then calculates variance of sampled sequences and add results to results hash
###
my ($self,$seq_ref, $results, $bootstrap_rpt) = @_;
my @seqs = @$seq_ref;
my @btstrp_aoa; # to hold array of array of nucleotides for resampling
my %bootstrap_values = (ds => [], dn =>[]); # to hold list of av values
#1st make alternative array of codons;
my $c = 0;
while ($c < length $seqs[0]{'seq'}) {
for (0..$#seqs) {
push @{$btstrp_aoa[$_]}, substr ($seqs[$_]{'seq'}, $c, 3);
}
$c+=3;
}
for (1..$bootstrap_rpt) {
my $sampled = _resample (\@btstrp_aoa);
my ($ds, $dn) = $self->_get_av_ds_dn ($sampled) ; # is array ref
push @{$bootstrap_values{'ds'}}, $ds;
push @{$bootstrap_values{'dn'}}, $dn;
}
$results->{'D_s_var'} = sampling_variance($bootstrap_values{'ds'});
$results->{'D_n_var'} = sampling_variance($bootstrap_values{'dn'});
$results->{'z_score'} = ($results->{'D_n'} - $results->{'D_s'}) /
sqrt($results->{'D_s_var'} + $results->{'D_n_var'} );
#print "bootstrapped var_syn = $results->{'D_s_var'} \n" ;
#print "bootstrapped var_nc = $results->{'D_n_var'} \n";
#print "z is $results->{'z_score'}\n"; ### end of global set up of/perm look up data
}
sub _resample {
( run in 1.106 second using v1.01-cache-2.11-cpan-71847e10f99 )