Algorithm-Cluster-Thresh
view release on metacpan or search on metacpan
lib/Algorithm/Cluster/Thresh.pm view on Meta::CPAN
package Algorithm::Cluster::Thresh;
BEGIN {
$Algorithm::Cluster::Thresh::VERSION = '0.05';
}
# ABSTRACT: Adds thresholding to hierarchical clustering of Algorithm::Cluster
use Algorithm::Cluster;
# Add new method to standard package:
package Algorithm::Cluster::Tree;
BEGIN {
$Algorithm::Cluster::Tree::VERSION = '0.05';
}
use strict;
use warnings;
use 5.008;
sub cutthresh {
my ($tree, $thresh) = @_;
my @nodecluster;
my @leafcluster;
# Binary tree: number of internal nodes is 1 less than # of leafs
# Last node is the root, walking down the tree
my $icluster = 0;
# Elements in tree
my $length = $tree->length;
# Root node belongs to cluster 0
$nodecluster[$length-1] = $icluster++;
for (my $i = $length-1; $i >= 0; $i--) {
my $node = $tree->get($i);
# print sprintf "%3d %3d %.3f\n", $i,$nodecluster[$i], $node->distance;
my $left = $node->left;
# Nodes are numbered -1,-2,... Leafs are numbered 0,1,2,...
my $leftref = $left < 0 ? \$nodecluster[-$left-1] : \$leafcluster[$left];
my $assigncluster = $nodecluster[$i];
# Left is always the same as the parent node's cluster
$$leftref = $assigncluster;
# print sprintf "\tleft %3d %3d\n", $left, $$leftref;
my $right = $node->right;
# Put right into a new cluster, when thresh not satisfied
if ($node->distance > $thresh) { $assigncluster = $icluster++ }
my $rightref = $right < 0 ? \$nodecluster[-$right-1] : \$leafcluster[$right];
$$rightref = $assigncluster;
# print sprintf "\tright %3d %3d\n", $right, $$rightref;
}
return wantarray ? @leafcluster : \@leafcluster;
}
1;
__END__
=pod
=head1 NAME
Algorithm::Cluster::Thresh - Adds thresholding to hierarchical clustering of Algorithm::Cluster
=head1 VERSION
version 0.05
=head1 SYNOPSIS
use Algorithm::Cluster::Thresh;
# Assuming you have a lower diagonal distance matrix ...
# See L<Algorithm::Cluster> and / or L<Algorithm::DistanceMatrix>
my $distmatrix;
use Algorithm::Cluster qw/treecluster/;
my $tree = treecluster(data=>$distmatrix, method=>'a'); # 'a'verage linkage
# Get your objects and the cluster IDs they belong to
# Clusters are within 5.5 of each other (based on average linkage here)
my $cluster_ids = $tree->cutthresh(5.5);
# Index corresponds to that of the original objects
print 'Object 2 belongs to cluster number ', $cluster_ids->[2], "\n";
=head1 DESCRIPTION
This is a small helper package for L<Algorithm::Cluster>, but not an official
part of it. That manual can be found here:
http://cpansearch.perl.org/src/MDEHOON/Algorithm-Cluster-1.48/doc/cluster.pdf
This package adds a simple method C<$tree->cutthresh(5.5)> to permit clustering
by thresholds, rather than by needing to pre-define the number of clusters to
be created.
This is a Pure Perl module. It's not as efficient as the XS approach, which has
already been submitted as a patch:
https://rt.cpan.org/Public/Bug/Display.html?id=68482
In the meantime, this module provides a Pure Perl implementation.
( run in 2.201 seconds using v1.01-cache-2.11-cpan-524268b4103 )