Algorithm-Cluster-Thresh

 view release on metacpan or  search on metacpan

lib/Algorithm/Cluster/Thresh.pm  view on Meta::CPAN

use 5.008;

sub cutthresh {
    my ($tree, $thresh) = @_;   
    my @nodecluster;
    my @leafcluster;
    # Binary tree: number of internal nodes is 1 less than # of leafs
    # Last node is the root, walking down the tree
    my $icluster = 0;
    # Elements in tree
    my $length = $tree->length;
    # Root node belongs to cluster 0
    $nodecluster[$length-1] = $icluster++;
    for (my $i = $length-1; $i >= 0; $i--) {        
        my $node = $tree->get($i);
#        print sprintf "%3d %3d %.3f\n", $i,$nodecluster[$i], $node->distance;
        my $left = $node->left;
        # Nodes are numbered -1,-2,... Leafs are numbered 0,1,2,...
        my $leftref = $left < 0 ? \$nodecluster[-$left-1] : \$leafcluster[$left];
        my $assigncluster = $nodecluster[$i];
        # Left is always the same as the parent node's cluster
        $$leftref = $assigncluster;
#        print sprintf "\tleft  %3d %3d\n", $left, $$leftref;
        my $right = $node->right;

t/cutthresh.t  view on Meta::CPAN

    }
    $max;
}
is(maxlinkage([0..5],[6..11]), 19.3, 'maxlinkage');

# single, average (UPGMA), and maximum (complete) linkage
my %dispatch = ('s'=>\&minlinkage,'a'=>\&avglinkage,'m'=>\&maxlinkage);
foreach my $method (keys %dispatch) {
    my $tree = Algorithm::Cluster::treecluster(data=>$matrix,method=>$method);
    # Test that all data is in the tree
    is (scalar(@$matrix) - 1, $tree->length, "tree size ($method)");

    # According to current $method
    my $clusters = $tree->cutthresh($thresh);
    # Tree cointains only internal nodes
    # Num of internal nodes is one less than leaf nodes
    is (scalar(@$clusters) - 1, $tree->length, "num clusters ($method)");

    # Given a cluster id, what data indexes are in it, i.e. reverse map
    my @clustermap;
    for (my $i = 0; $i < @$clusters; $i++) {
        my $cluster = $clusters->[$i];
        push @{$clustermap[$cluster]}, $i;
    }
    
    # For every pair of clusters, 
    # verify that inter-cluster distance (given $method) doesn't exceed $thresh



( run in 0.547 second using v1.01-cache-2.11-cpan-65fba6d93b7 )