Algorithm-Cluster
view release on metacpan or search on metacpan
src/cluster.c view on Meta::CPAN
if (distance < cutoff) {
const double dweight = exp(exponent*log(1-distance/cutoff));
/* pow() causes a crash on AIX */
result[i] += dweight;
result[j] += dweight;
}
}
}
for (i = 0; i < nelements; i++) result[i] = 1.0/result[i];
return result;
}
/* ******************************************************************** */
int
cuttree(int nelements, const Node* tree, int nclusters, int clusterid[])
/*
Purpose
=======
The cuttree routine takes the output of a hierarchical clustering routine, and
divides the elements in the tree structure into clusters based on the
hierarchical clustering result. The number of clusters is specified by the
user.
Arguments
=========
nelements (input) int
The number of elements that were clustered.
tree (input) Node[nelements-1]
The clustering solution. Each node in the array describes one linking event,
with tree[i].left and tree[i].right representing the elements that were joined.
The original elements are numbered 0..nelements-1, nodes are numbered
-1..-(nelements-1).
nclusters (input) int
The number of clusters to be formed.
clusterid (output) int[nelements]
The number of the cluster to which each element was assigned. Clusters are
numbered 0..nclusters-1 in the left-to-right order in which they appear in the
hierarchical clustering tree. Space for the clusterid array should be allocated
before calling the cuttree routine.
Return value
============
If no errors occur, cuttree returns 1.
If a memory error occurs, cuttree returns 0.
========================================================================
*/
{
int i = -nelements+1; /* top node */
int j;
int k = -1;
int previous = nelements;
const int n = nelements-nclusters; /* number of nodes to join */
int* parents;
if (nclusters == 1) {
for (i = 0; i < nelements; i++) clusterid[i] = 0;
return 1;
}
parents = malloc((nelements-1)*sizeof(int));
if (!parents) return 0;
while (1) {
if (i >= 0) {
clusterid[i] = k;
j = i;
i = previous;
previous = j;
}
else {
j = -i-1;
if (previous == tree[j].left) {
previous = i;
i = tree[j].right;
if (j >= n && (i >= 0 || -i-1 < n)) k++;
}
else if (previous == tree[j].right) {
previous = i;
i = parents[j];
if (i == nelements) break;
}
else {
parents[j] = previous;
previous = i;
i = tree[j].left;
if (j >= n && (i >= 0 || -i-1 < n)) k++;
}
}
}
free(parents);
return 1;
}
/* ******************************************************************** */
static Node*
pclcluster(int nrows, int ncolumns, double** data, int** mask, double weight[],
double** distmatrix, char dist, int transpose)
/*
Purpose
=======
The pclcluster routine performs clustering using pairwise centroid-linking on a
given set of gene expression data, using the distance metric given by dist.
Arguments
=========
nrows (input) int
The number of rows in the gene expression data matrix, equal to the number of
genes.
ncolumns (input) int
The number of columns in the gene expression data matrix, equal to the number
of samples.
data (input) double[nrows][ncolumns]
The array containing the gene expression data.
mask (input) int[nrows][ncolumns]
This array shows which data values are missing. If
mask[i][j] == 0, then data[i][j] is missing.
weight (input) double[ncolumns] if transpose == 0;
double[nrows] otherwise
The weights that are used to calculate the distance. This is equivalent
to including the jth data point weight[j] times in the calculation. The
weights can be non-integer.
transpose (input) int
If transpose == 0, the rows of the matrix are clustered. Otherwise, columns
of the matrix are clustered.
dist (input) char
Defines which distance measure is used, as given by the table:
dist == 'e': Euclidean distance
dist == 'b': City-block distance
dist == 'c': correlation
dist == 'a': absolute value of the correlation
dist == 'u': uncentered correlation
dist == 'x': absolute uncentered correlation
dist == 's': Spearman's rank correlation
dist == 'k': Kendall's tau
For other values of dist, the default (Euclidean distance) is used.
distmatrix (input) double**
The distance matrix. This matrix is precalculated by the calling routine
treecluster. The pclcluster routine modifies the contents of distmatrix, but
( run in 0.735 second using v1.01-cache-2.11-cpan-2398b32b56e )