Algorithm-Cluster-Thresh
view release on metacpan or search on metacpan
t/cutthresh.t view on Meta::CPAN
is (dist(3,7), 8.7, 'dist col major');
# Basic clustering test, inter-cluster distance > 1.5 (depending on method)
my $thresh = 1.5;
# Linkage methods
# http://en.wikipedia.org/wiki/Hierarchical_clustering#Linkage_criteria
# distance between clusters, according to single linkage (mimimum linkage)
# Distance is the min distance between any two cluster elementes
sub minlinkage {
my ($icluster, $jcluster) = @_;
my $min;
foreach my $i (@$icluster) {
foreach my $j (@$jcluster) {
my $dist = dist($i,$j);
if (!defined($min) || $dist < $min) { $min = $dist }
}
}
$min;
}
is(minlinkage([0..5],[6..11]), 1.1, 'minlinkage');
# distance between clusters, according to complete linkage (maximum linkage)
# Distance is the max distance between any two cluster elementes
sub avglinkage {
my ($icluster, $jcluster) = @_;
my $sum = 0;
foreach my $i (@$icluster) {
foreach my $j (@$jcluster) {
my $dist = dist($i,$j);
$sum += $dist;
}
}
$sum / (@$icluster * @$jcluster);
}
ok(avglinkage([0..5],[6..11]) > 5.7361, 'avglinkage');
ok(avglinkage([0..5],[6..11]) < 5.7362, 'avglinkage');
# distance between clusters, according to complete linkage (maximum linkage)
# Distance is the max distance between any two cluster elementes
sub maxlinkage {
my ($icluster, $jcluster) = @_;
my $max;
foreach my $i (@$icluster) {
foreach my $j (@$jcluster) {
my $dist = dist($i,$j);
if (!defined($max) || $dist > $max) { $max = $dist }
}
}
$max;
}
is(maxlinkage([0..5],[6..11]), 19.3, 'maxlinkage');
# single, average (UPGMA), and maximum (complete) linkage
my %dispatch = ('s'=>\&minlinkage,'a'=>\&avglinkage,'m'=>\&maxlinkage);
foreach my $method (keys %dispatch) {
my $tree = Algorithm::Cluster::treecluster(data=>$matrix,method=>$method);
# Test that all data is in the tree
is (scalar(@$matrix) - 1, $tree->length, "tree size ($method)");
# According to current $method
my $clusters = $tree->cutthresh($thresh);
# Tree cointains only internal nodes
# Num of internal nodes is one less than leaf nodes
is (scalar(@$clusters) - 1, $tree->length, "num clusters ($method)");
# Given a cluster id, what data indexes are in it, i.e. reverse map
my @clustermap;
for (my $i = 0; $i < @$clusters; $i++) {
my $cluster = $clusters->[$i];
push @{$clustermap[$cluster]}, $i;
}
# For every pair of clusters,
# verify that inter-cluster distance (given $method) doesn't exceed $thresh
for (my $i = 0; $i < @clustermap - 1; $i++) {
for (my $j = $i+1; $j < @clustermap; $j++) {
# Dispatch table to call appropriate metric, i.e. minlinkage when 's'
my $dist = $dispatch{$method}->($clustermap[$i],$clustermap[$j]);
ok $dist > $thresh,
sprintf "%5.2f < %5.2f for clusters %2d and %2d ($method)",
$dist, $thresh, $i, $j;
}
}
}
done_testing;
( run in 0.489 second using v1.01-cache-2.11-cpan-140bd7fdf52 )