Algorithm-Cluster
view release on metacpan or search on metacpan
src/cluster.c view on Meta::CPAN
else stddata[i] = 1;
}
}
else {
for (i = 0; i < nelements; i++) {
int n = 0;
for (j = 0; j < ndata; j++) {
if (mask[j][i]) {
double term = data[j][i];
term = term * term;
stddata[i] += term;
n++;
}
}
if (stddata[i] > 0) stddata[i] = sqrt(stddata[i]/n);
else stddata[i] = 1;
}
}
if (transpose == 0) {
dummymask = malloc(nygrid*sizeof(int*));
for (i = 0; i < nygrid; i++) {
dummymask[i] = malloc(ndata*sizeof(int));
for (j = 0; j < ndata; j++) dummymask[i][j] = 1;
}
}
else {
dummymask = malloc(ndata*sizeof(int*));
for (i = 0; i < ndata; i++) {
dummymask[i] = malloc(sizeof(int));
dummymask[i][0] = 1;
}
}
/* Randomly initialize the nodes */
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
double sum = 0.;
for (i = 0; i < ndata; i++) {
double term = -1.0 + 2.0*uniform();
celldata[ix][iy][i] = term;
sum += term * term;
}
sum = sqrt(sum/ndata);
for (i = 0; i < ndata; i++) celldata[ix][iy][i] /= sum;
}
}
/* Randomize the order in which genes or arrays will be used */
index = malloc(nelements*sizeof(int));
for (i = 0; i < nelements; i++) index[i] = i;
for (i = 0; i < nelements; i++) {
j = (int) (i + (nelements-i)*uniform());
ix = index[j];
index[j] = index[i];
index[i] = ix;
}
/* Start the iteration */
for (iter = 0; iter < niter; iter++) {
int ixbest = 0;
int iybest = 0;
int iobject = iter % nelements;
iobject = index[iobject];
if (transpose == 0) {
double closest = metric(ndata, data, celldata[ixbest], mask,
dummymask, weights, iobject, iybest,
transpose);
double radius = maxradius * (1. - ((double)iter)/((double)niter));
double tau = inittau * (1. - ((double)iter)/((double)niter));
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
double distance = metric(ndata, data, celldata[ix], mask,
dummymask, weights, iobject, iy,
transpose);
if (distance < closest) {
ixbest = ix;
iybest = iy;
closest = distance;
}
}
}
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
if (sqrt((ix-ixbest)*(ix-ixbest)+(iy-iybest)*(iy-iybest)) <
radius) {
double sum = 0.;
for (i = 0; i < ndata; i++) {
if (mask[iobject][i] == 0) continue;
celldata[ix][iy][i] +=
tau * (data[iobject][i]/stddata[iobject]
-celldata[ix][iy][i]);
}
for (i = 0; i < ndata; i++) {
double term = celldata[ix][iy][i];
term = term * term;
sum += term;
}
if (sum>0) {
sum = sqrt(sum/ndata);
for (i = 0; i < ndata; i++)
celldata[ix][iy][i] /= sum;
}
}
}
}
}
else {
double closest;
double** celldatavector = malloc(ndata*sizeof(double*));
double radius = maxradius * (1. - ((double)iter)/((double)niter));
double tau = inittau * (1. - ((double)iter)/((double)niter));
for (i = 0; i < ndata; i++)
celldatavector[i] = &(celldata[ixbest][iybest][i]);
closest = metric(ndata, data, celldatavector, mask, dummymask,
weights, iobject, 0, transpose);
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
double distance;
for (i = 0; i < ndata; i++)
celldatavector[i] = &(celldata[ixbest][iybest][i]);
distance = metric(ndata, data, celldatavector, mask,
dummymask, weights, iobject, 0,
transpose);
if (distance < closest) {
ixbest = ix;
iybest = iy;
closest = distance;
}
}
}
free(celldatavector);
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
if (sqrt((ix-ixbest)*(ix-ixbest)+(iy-iybest)*(iy-iybest)) <
radius) {
double sum = 0.;
for (i = 0; i < ndata; i++) {
if (mask[i][iobject] == 0) continue;
celldata[ix][iy][i] +=
tau * (data[i][iobject]/stddata[iobject]
-celldata[ix][iy][i]);
}
for (i = 0; i < ndata; i++) {
double term = celldata[ix][iy][i];
term = term * term;
sum += term;
}
if (sum>0) {
sum = sqrt(sum/ndata);
for (i = 0; i < ndata; i++)
celldata[ix][iy][i] /= sum;
}
}
}
}
}
}
if (transpose == 0)
for (i = 0; i < nygrid; i++) free(dummymask[i]);
else
for (i = 0; i < ndata; i++) free(dummymask[i]);
free(dummymask);
free(stddata);
free(index);
}
/* ******************************************************************* */
static void
somassign(int nrows, int ncolumns, double** data, int** mask,
const double weights[], int transpose, int nxgrid, int nygrid,
double*** celldata, char dist, int clusterid[][2])
/* Collect clusterids */
{
const int ndata = (transpose == 0) ? ncolumns : nrows;
int i, j;
/* Set the metric function as indicated by dist */
double (*metric) (int, double**, double**, int**, int**,
const double[], int, int, int) = setmetric(dist);
if (transpose == 0) {
int** dummymask = malloc(nygrid*sizeof(int*));
for (i = 0; i < nygrid; i++) {
dummymask[i] = malloc(ncolumns*sizeof(int));
for (j = 0; j < ncolumns; j++) dummymask[i][j] = 1;
}
for (i = 0; i < nrows; i++) {
int ixbest = 0;
int iybest = 0;
double closest = metric(ndata, data, celldata[ixbest], mask,
dummymask, weights, i, iybest, transpose);
int ix, iy;
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
double distance = metric(ndata, data, celldata[ix], mask,
dummymask, weights, i, iy,
transpose);
if (distance < closest) {
ixbest = ix;
iybest = iy;
closest = distance;
}
}
}
clusterid[i][0] = ixbest;
clusterid[i][1] = iybest;
}
for (i = 0; i < nygrid; i++) free(dummymask[i]);
free(dummymask);
}
else {
double** celldatavector = malloc(ndata*sizeof(double*));
int** dummymask = malloc(nrows*sizeof(int*));
int ixbest = 0;
int iybest = 0;
for (i = 0; i < nrows; i++) {
dummymask[i] = malloc(sizeof(int));
dummymask[i][0] = 1;
}
for (i = 0; i < ncolumns; i++) {
double closest;
int ix, iy;
for (j = 0; j < ndata; j++)
celldatavector[j] = &(celldata[ixbest][iybest][j]);
closest = metric(ndata, data, celldatavector, mask, dummymask,
weights, i, 0, transpose);
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
double distance;
for (j = 0; j < ndata; j++)
celldatavector[j] = &(celldata[ix][iy][j]);
distance = metric(ndata, data, celldatavector, mask,
dummymask, weights, i, 0, transpose);
if (distance < closest) {
ixbest = ix;
iybest = iy;
closest = distance;
}
}
}
clusterid[i][0] = ixbest;
clusterid[i][1] = iybest;
}
free(celldatavector);
for (i = 0; i < nrows; i++) free(dummymask[i]);
free(dummymask);
}
}
/* ******************************************************************* */
void
somcluster(int nrows, int ncolumns, double** data, int** mask,
const double weight[], int transpose, int nxgrid, int nygrid,
double inittau, int niter, char dist, double*** celldata,
int clusterid[][2])
/*
Purpose
=======
The somcluster routine implements a self-organizing map (Kohonen) on a
rectangular grid, using a given set of vectors. The distance measure to be
used to find the similarity between genes and nodes is given by dist.
Arguments
=========
nrows (input) int
The number of rows in the data matrix, equal to the number of genes.
ncolumns (input) int
The number of columns in the data matrix, equal to the number of samples.
data (input) double[nrows][ncolumns]
The array containing the gene expression data.
mask (input) int[nrows][ncolumns]
This array shows which data values are missing. If
mask[i][j] == 0, then data[i][j] is missing.
weight (input) double[ncolumns] if transpose == 0;
double[nrows] otherwise
The weights that are used to calculate the distance. This is equivalent
to including the jth data point weight[j] times in the calculation. The
weights can be non-integer.
transpose (input) int
If transpose == 0, the rows (genes) of the matrix are clustered. Otherwise,
columns (samples) of the matrix are clustered.
nxgrid (input) int
The number of grid cells horizontally in the rectangular topology of clusters.
nygrid (input) int
The number of grid cells horizontally in the rectangular topology of clusters.
inittau (input) double
The initial value of tau, representing the neighborhood function.
niter (input) int
The number of iterations to be performed.
( run in 0.719 second using v1.01-cache-2.11-cpan-df04353d9ac )