Bio-Phylo
view release on metacpan or search on metacpan
lib/Bio/Phylo/Matrices/MatrixRole.pm view on Meta::CPAN
$self->set_matchchar( $match );
}
return $self->get_matchchar;
}
sub gap_char {
my ( $self, $gap ) = @_;
if ( defined $gap ) {
$self->set_gap( $gap );
}
return $self->get_gap;
}
sub symbol_chars {
my ( $self, $includeextra ) = @_;
my %seen;
for my $row ( @{ $self->get_entities } ) {
my @char = $row->get_char;
$seen{$_} = 1 for @char;
}
return keys %seen if $includeextra;
my $special_values = $self->get_special_symbols;
my %special_keys = map { $_ => 1 } values %{ $special_values };
return grep { ! $special_keys{$_} } keys %seen;
}
sub consensus_string {
my $self = shift;
my $to = $self->get_type_object;
my $ntax = $self->get_ntax;
my $nchar = $self->get_nchar;
my @consensus;
for my $i ( 0 .. $ntax - 1 ) {
my ( @column, %column );
for my $j ( 0 .. $nchar - 1 ) {
$column{ $self->get_by_index($i)->get_by_index($j) } = 1;
}
@column = keys %column;
push @consensus, $to->get_symbol_for_states(@column);
}
return join '', @consensus;
}
sub consensus_iupac {
$logger->warn
}
sub is_flush { 1 }
sub length { shift->get_nchar }
sub maxname_length { $logger->warn }
sub no_residues { $logger->warn }
sub no_sequences {
my $self = shift;
return scalar @{ $self->get_entities };
}
sub percentage_identity { $logger->warn }
# from simplealign
sub average_percentage_identity{
my ($self,@args) = @_;
my @alphabet = ('A','B','C','D','E','F','G','H','I','J','K','L','M',
'N','O','P','Q','R','S','T','U','V','W','X','Y','Z');
my ($len, $total, $subtotal, $divisor, $subdivisor, @seqs, @countHashes);
if (! $self->is_flush()) {
throw 'Generic' => "All sequences in the alignment must be the same length";
}
@seqs = $self->each_seq();
$len = $self->length();
# load the each hash with correct keys for existence checks
for( my $index=0; $index < $len; $index++) {
foreach my $letter (@alphabet) {
$countHashes[$index] = {} if not $countHashes[$index];
$countHashes[$index]->{$letter} = 0;
}
}
foreach my $seq (@seqs) {
my @seqChars = split //, $seq->seq();
for( my $column=0; $column < @seqChars; $column++ ) {
my $char = uc($seqChars[$column]);
if (exists $countHashes[$column]->{$char}) {
$countHashes[$column]->{$char}++;
}
}
}
$total = 0;
$divisor = 0;
for(my $column =0; $column < $len; $column++) {
my %hash = %{$countHashes[$column]};
$subdivisor = 0;
foreach my $res (keys %hash) {
$total += $hash{$res}*($hash{$res} - 1);
$subdivisor += $hash{$res};
}
$divisor += $subdivisor * ($subdivisor - 1);
}
return $divisor > 0 ? ($total / $divisor )*100.0 : 0;
}
# from simplealign
sub overall_percentage_identity{
my ($self, $length_measure) = @_;
my @alphabet = ('A','B','C','D','E','F','G','H','I','J','K','L','M',
'N','O','P','Q','R','S','T','U','V','W','X','Y','Z');
my ($len, $total, @seqs, @countHashes);
my %enum = map {$_ => 1} qw (align short long);
throw 'Generic' => "Unknown argument [$length_measure]"
if $length_measure and not $enum{$length_measure};
$length_measure ||= 'align';
if (! $self->is_flush()) {
throw 'Generic' => "All sequences in the alignment must be the same length";
}
@seqs = $self->each_seq();
$len = $self->length();
# load the each hash with correct keys for existence checks
for( my $index=0; $index < $len; $index++) {
foreach my $letter (@alphabet) {
$countHashes[$index] = {} if not $countHashes[$index];
$countHashes[$index]->{$letter} = 0;
}
}
foreach my $seq (@seqs) {
my @seqChars = split //, $seq->seq();
for( my $column=0; $column < @seqChars; $column++ ) {
my $char = uc($seqChars[$column]);
if (exists $countHashes[$column]->{$char}) {
$countHashes[$column]->{$char}++;
}
}
}
$total = 0;
for(my $column =0; $column < $len; $column++) {
my %hash = %{$countHashes[$column]};
foreach ( values %hash ) {
next if( $_ == 0 );
$total++ if( $_ == scalar @seqs );
last;
}
}
if ($length_measure eq 'short') {
## find the shortest length
$len = 0;
foreach my $seq ($self->each_seq) {
my $count = $seq->seq =~ tr/[A-Za-z]//;
if ($len) {
$len = $count if $count < $len;
} else {
$len = $count;
}
}
}
elsif ($length_measure eq 'long') {
( run in 2.103 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )