Algorithm-Cluster-Thresh
view release on metacpan or search on metacpan
t/cutthresh.t view on Meta::CPAN
if (!defined($min) || $dist < $min) { $min = $dist }
}
}
$min;
}
is(minlinkage([0..5],[6..11]), 1.1, 'minlinkage');
# distance between clusters, according to complete linkage (maximum linkage)
# Distance is the max distance between any two cluster elementes
sub avglinkage {
my ($icluster, $jcluster) = @_;
my $sum = 0;
foreach my $i (@$icluster) {
foreach my $j (@$jcluster) {
my $dist = dist($i,$j);
$sum += $dist;
}
}
$sum / (@$icluster * @$jcluster);
}
ok(avglinkage([0..5],[6..11]) > 5.7361, 'avglinkage');
ok(avglinkage([0..5],[6..11]) < 5.7362, 'avglinkage');
# distance between clusters, according to complete linkage (maximum linkage)
# Distance is the max distance between any two cluster elementes
sub maxlinkage {
my ($icluster, $jcluster) = @_;
my $max;
foreach my $i (@$icluster) {
foreach my $j (@$jcluster) {
my $dist = dist($i,$j);
if (!defined($max) || $dist > $max) { $max = $dist }
}
}
$max;
}
is(maxlinkage([0..5],[6..11]), 19.3, 'maxlinkage');
# single, average (UPGMA), and maximum (complete) linkage
my %dispatch = ('s'=>\&minlinkage,'a'=>\&avglinkage,'m'=>\&maxlinkage);
foreach my $method (keys %dispatch) {
my $tree = Algorithm::Cluster::treecluster(data=>$matrix,method=>$method);
# Test that all data is in the tree
is (scalar(@$matrix) - 1, $tree->length, "tree size ($method)");
# According to current $method
my $clusters = $tree->cutthresh($thresh);
# Tree cointains only internal nodes
# Num of internal nodes is one less than leaf nodes
is (scalar(@$clusters) - 1, $tree->length, "num clusters ($method)");
# Given a cluster id, what data indexes are in it, i.e. reverse map
my @clustermap;
for (my $i = 0; $i < @$clusters; $i++) {
my $cluster = $clusters->[$i];
push @{$clustermap[$cluster]}, $i;
}
# For every pair of clusters,
# verify that inter-cluster distance (given $method) doesn't exceed $thresh
for (my $i = 0; $i < @clustermap - 1; $i++) {
for (my $j = $i+1; $j < @clustermap; $j++) {
# Dispatch table to call appropriate metric, i.e. minlinkage when 's'
my $dist = $dispatch{$method}->($clustermap[$i],$clustermap[$j]);
ok $dist > $thresh,
sprintf "%5.2f < %5.2f for clusters %2d and %2d ($method)",
$dist, $thresh, $i, $j;
}
}
}
done_testing;
( run in 1.504 second using v1.01-cache-2.11-cpan-13bb782fe5a )