BioPerl

 view release on metacpan or  search on metacpan

Bio/Align/DNAStatistics.pm  view on Meta::CPAN

		   $gap[$i][$j]++;	
	       } elsif( $c2 =~ /^$GCChhars$/i ) {
		   $gc[$i][$j]++;
	       } 
	   }
	   $gc[$i][$j] = ( $gc[$i][$j] / 
			   ($length - $gap[$i][$j]) );
	   $j++;
       }
       $i++;
   }
   
   for( $i = 0; $i < $seqct-1; $i++ ) {
       # (diagonals) distance is 0 for same sequence
       $dist{$names[$i]}->{$names[$i]} = [$i,$i];
       $values[$i][$i] = sprintf($precisionstr,0);
       
       for( $j = $i+1; $j < $seqct; $j++ ) {
	   
	   my $pairwise = $aln->select_noncont($i+1,$j+1);
	   my $L = $self->pairwise_stats->number_of_comparable_bases($pairwise);
	   my $P = $self->transitions($pairwise) / $L;
	   my $Q = $self->transversions($pairwise) / $L;
	   my $C = $gc[$i][$j] + $gc[$j][$i]- 
	       ( 2 * $gc[$i][$j] * $gc[$j][$i] );
	   if( $P ) {
	       $P = $P / $C;
	   }
	   my $d = -($C * log(1- $P - $Q)) -(0.5* ( 1 - $C) * log(1 - 2 * $Q));
           # fwd and rev lookup
	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];
	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	   
	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$d);
           # (diagonals) distance is 0 for same sequence
	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];
	   $values[$j][$j] = sprintf($precisionstr,0); 
       }
   }
   return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',
				       -matrix  => \%dist,
				       -names   => \@names,
				       -values  => \@values); 

}

=head2 D_F84

 Title   : D_F84
 Usage   : my $d = $stat->D_F84($aln)
 Function: Calculates D (pairwise distance) between 2 sequences in an 
           alignment using the Felsenstein 1984 distance model. 
 Returns : L<Bio::Matrix::PhylipDist>
 Args    : L<Bio::Align::AlignI> of DNA sequences
           [optional] double - gap penalty

=cut

sub D_F84 {
   my ($self,$aln,$gappenalty) = @_;
   return 0 unless $self->_check_arg($aln);
   $self->throw_not_implemented();
   # ambiguities ignored at this point
   my (@seqs,@names,@values,%dist);
   my $seqct = 0;
   foreach my $seq ( $aln->each_seq) {
       # if there is no name, 
       my $id = $seq->display_id;
       if( ! length($id) ||       # deal with empty names
	   $id =~ /^\s+$/ ) {
	   $id = $seqct+1;
       }
       push @names, $id;
       push @seqs, uc $seq->seq();
       $seqct++;
   }

   my $precisionstr = "%.$Precision"."f";

   for( my $i = 0; $i < $seqct-1; $i++ ) {
       # (diagonals) distance is 0 for same sequence
       $dist{$names[$i]}->{$names[$i]} = [$i,$i];
       $values[$i][$i] = sprintf($precisionstr,0);

       for( my $j = $i+1; $j < $seqct; $j++ ) {
       }
   }   
}

# Tajima and Nei, Mol. Biol. Evol. 1984, 1, 269.
#  Tajima-Nei correction used for multiple substitutions in the calc
# of the distance matrix. Nucleic acids only.
#
#  D = p-distance = 1 - (matches/(posns_scored + gaps)
#
#  distance = -b * ln(1-D/b)
#

=head2 D_TajimaNei

 Title   : D_TajimaNei
 Usage   : my $d = $stat->D_TajimaNei($aln)
 Function: Calculates D (pairwise distance) between 2 sequences in an 
           alignment using the TajimaNei 1984 distance model. 
 Returns : L<Bio::Matrix::PhylipDist>
 Args    : Bio::Align::AlignI of DNA sequences


=cut

sub D_TajimaNei{
   my ($self,$aln) = @_;
   return 0 unless $self->_check_arg($aln);
   # ambiguities ignored at this point
   my (@seqs,@names,@values,%dist);
   my $seqct = 0;
   foreach my $seq ( $aln->each_seq) {
       # if there is no name, 
       push @names, $seq->display_id;
       push @seqs, uc $seq->seq();
       $seqct++;
   }



( run in 0.525 second using v1.01-cache-2.11-cpan-39bf76dae61 )