BioPerl

 view release on metacpan or  search on metacpan

Bio/Align/DNAStatistics.pm  view on Meta::CPAN

 Returns  : a reference to a hash of statistics with keys as 
            listed in Description.

=cut

sub calc_KaKs_pair {
    my ( $self, $aln, $seq1_id, $seq2_id) = @_;
    $self->throw("Needs 3 arguments - an alignment object, and 2 sequence ids") 
	if @_!= 4;
    $self->throw ("This calculation needs a Bio::Align::AlignI compatible object, not a [ " . ref($aln) . " ]object") unless $aln->isa('Bio::Align::AlignI');
    my @seqs = (
		#{id => $seq1_id, seq =>($aln->each_seq_with_id($seq1_id))[0]->seq},
		#{id => $seq2_id, seq =>($aln->each_seq_with_id($seq2_id))[0]->seq}
		{id => $seq1_id, seq => uc(($aln->each_seq_with_id($seq1_id))[0]->seq)},
                {id => $seq2_id, seq => uc(($aln->each_seq_with_id($seq2_id))[0]->seq)}
	       ) ;
    if (length($seqs[0]{'seq'}) != length($seqs[1]{'seq'})) {
	$self->throw(" aligned sequences must be of equal length!");
    }
    my $results = [];
    $self->_get_av_ds_dn(\@seqs, $results);
    return $results;

}

=head2 calc_all_KaKs_pairs

 Title    : calc_all_KaKs_pairs
 Useage   : my $results2 = $stats->calc_KaKs_pair($alnobj).
 Function : Calculates Nei_gojobori statistics for all pairwise
            combinations in sequence.
 Arguments: A Bio::Align::ALignI compliant object such as
            a Bio::SimpleAlign object.
 Returns  : A reference to an array of hashes of statistics of
            all pairwise comparisons in the alignment.

=cut



sub calc_all_KaKs_pairs {
#returns a multi_element_array with all pairwise comparisons
	my ($self,$aln) = @_;
	$self->throw ("This calculation needs a Bio::Align::AlignI compatible object, not a [ " . ref($aln) . " ]object") unless $aln->isa('Bio::Align::AlignI');
	my @seqs;
	for my $seq ($aln->each_seq) {
		push @seqs, {id => $seq->display_id, seq=>$seq->seq};
		}
	my $results ;
	$results = $self->_get_av_ds_dn(\@seqs, $results);
	return $results;
}

=head2 calc_average_KaKs

 Title    : calc_average_KaKs.  
 Useage   : my $res= $stats->calc_average_KaKs($alnobj, 1000).
 Function : calculates Nei_Gojobori stats for average of all 
            sequences in the alignment.
 Args     : A Bio::Align::AlignI compliant object such as a
            Bio::SimpleAlign object, number of bootstrap iterations
            (default 1000).
 Returns  : A reference to a hash of statistics as listed in Description.

=cut

sub calc_average_KaKs {
#calculates global value for sequences in alignment using bootstrapping
#this is quite slow (~10 seconds per  3 X 200nt seqs); 
    my ($self, $aln, $bootstrap_rpt) = @_;
    $bootstrap_rpt ||= 1000;
    $self->throw ("This calculation needs a Bio::Align::AlignI compatible object, not a [ " . ref($aln) . " ]object") unless $aln->isa('Bio::Align::AlignI');
    my @seqs;
    for my $seq ($aln->each_seq) {
	push @seqs, {id => $seq->display_id, seq=>$seq->seq};
    }
    my $results ;
    my ($ds_orig, $dn_orig) = $self->_get_av_ds_dn(\@seqs);
    #print "ds = $ds_orig, dn = $dn_orig\n";
    $results = {D_s => $ds_orig, D_n => $dn_orig};
    $self->_run_bootstrap(\@seqs, $results, $bootstrap_rpt);
    return $results;
}

############## primary internal subs for alignment comparisons ########################

sub _run_bootstrap {
    ### generates sampled sequences, calculates Ds and Dn values,
    ### then calculates variance of sampled sequences and add results to results hash
    ### 
    my ($self,$seq_ref, $results, $bootstrap_rpt) = @_;	
    my @seqs = @$seq_ref;
    my @btstrp_aoa; # to hold array of array of nucleotides for resampling
    my %bootstrap_values = (ds => [], dn =>[]);	# to hold list of av values 

    #1st make alternative array of codons;
    my $c = 0;
    while ($c < length $seqs[0]{'seq'}) {
	for (0..$#seqs) {
	    push @{$btstrp_aoa[$_]}, substr ($seqs[$_]{'seq'}, $c, 3);
	}
	$c+=3;
    }

    for (1..$bootstrap_rpt) {
	my $sampled = _resample (\@btstrp_aoa);
	my ($ds, $dn) = $self->_get_av_ds_dn ($sampled) ; # is array ref
	push @{$bootstrap_values{'ds'}}, $ds;
	push @{$bootstrap_values{'dn'}}, $dn;
    }	

    $results->{'D_s_var'} = sampling_variance($bootstrap_values{'ds'});
    $results->{'D_n_var'} = sampling_variance($bootstrap_values{'dn'});
    $results->{'z_score'} = 	($results->{'D_n'} - $results->{'D_s'}) / 
	sqrt($results->{'D_s_var'} + $results->{'D_n_var'} ); 
    #print "bootstrapped var_syn = 	$results->{'D_s_var'} \n" ;
    #print "bootstrapped var_nc = 	$results->{'D_n_var'} \n"; 
    #print "z is $results->{'z_score'}\n";	### end of global set up of/perm look up data
}

sub _resample {



( run in 1.106 second using v1.01-cache-2.11-cpan-71847e10f99 )