BioPerl
view release on metacpan or search on metacpan
Bio/Align/DNAStatistics.pm view on Meta::CPAN
$gap[$i][$j]++;
} elsif( $c2 =~ /^$GCChhars$/i ) {
$gc[$i][$j]++;
}
}
$gc[$i][$j] = ( $gc[$i][$j] /
($length - $gap[$i][$j]) );
$j++;
}
$i++;
}
for( $i = 0; $i < $seqct-1; $i++ ) {
# (diagonals) distance is 0 for same sequence
$dist{$names[$i]}->{$names[$i]} = [$i,$i];
$values[$i][$i] = sprintf($precisionstr,0);
for( $j = $i+1; $j < $seqct; $j++ ) {
my $pairwise = $aln->select_noncont($i+1,$j+1);
my $L = $self->pairwise_stats->number_of_comparable_bases($pairwise);
my $P = $self->transitions($pairwise) / $L;
my $Q = $self->transversions($pairwise) / $L;
my $C = $gc[$i][$j] + $gc[$j][$i]-
( 2 * $gc[$i][$j] * $gc[$j][$i] );
if( $P ) {
$P = $P / $C;
}
my $d = -($C * log(1- $P - $Q)) -(0.5* ( 1 - $C) * log(1 - 2 * $Q));
# fwd and rev lookup
$dist{$names[$i]}->{$names[$j]} = [$i,$j];
$dist{$names[$j]}->{$names[$i]} = [$i,$j];
$values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$d);
# (diagonals) distance is 0 for same sequence
$dist{$names[$j]}->{$names[$j]} = [$j,$j];
$values[$j][$j] = sprintf($precisionstr,0);
}
}
return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',
-matrix => \%dist,
-names => \@names,
-values => \@values);
}
=head2 D_F84
Title : D_F84
Usage : my $d = $stat->D_F84($aln)
Function: Calculates D (pairwise distance) between 2 sequences in an
alignment using the Felsenstein 1984 distance model.
Returns : L<Bio::Matrix::PhylipDist>
Args : L<Bio::Align::AlignI> of DNA sequences
[optional] double - gap penalty
=cut
sub D_F84 {
my ($self,$aln,$gappenalty) = @_;
return 0 unless $self->_check_arg($aln);
$self->throw_not_implemented();
# ambiguities ignored at this point
my (@seqs,@names,@values,%dist);
my $seqct = 0;
foreach my $seq ( $aln->each_seq) {
# if there is no name,
my $id = $seq->display_id;
if( ! length($id) || # deal with empty names
$id =~ /^\s+$/ ) {
$id = $seqct+1;
}
push @names, $id;
push @seqs, uc $seq->seq();
$seqct++;
}
my $precisionstr = "%.$Precision"."f";
for( my $i = 0; $i < $seqct-1; $i++ ) {
# (diagonals) distance is 0 for same sequence
$dist{$names[$i]}->{$names[$i]} = [$i,$i];
$values[$i][$i] = sprintf($precisionstr,0);
for( my $j = $i+1; $j < $seqct; $j++ ) {
}
}
}
# Tajima and Nei, Mol. Biol. Evol. 1984, 1, 269.
# Tajima-Nei correction used for multiple substitutions in the calc
# of the distance matrix. Nucleic acids only.
#
# D = p-distance = 1 - (matches/(posns_scored + gaps)
#
# distance = -b * ln(1-D/b)
#
=head2 D_TajimaNei
Title : D_TajimaNei
Usage : my $d = $stat->D_TajimaNei($aln)
Function: Calculates D (pairwise distance) between 2 sequences in an
alignment using the TajimaNei 1984 distance model.
Returns : L<Bio::Matrix::PhylipDist>
Args : Bio::Align::AlignI of DNA sequences
=cut
sub D_TajimaNei{
my ($self,$aln) = @_;
return 0 unless $self->_check_arg($aln);
# ambiguities ignored at this point
my (@seqs,@names,@values,%dist);
my $seqct = 0;
foreach my $seq ( $aln->each_seq) {
# if there is no name,
push @names, $seq->display_id;
push @seqs, uc $seq->seq();
$seqct++;
}
( run in 0.525 second using v1.01-cache-2.11-cpan-39bf76dae61 )