Algorithm-Cluster

 view release on metacpan or  search on metacpan

src/cluster.c  view on Meta::CPAN


/* ********************************************************************* */

static double
ucorrelation(int n, double** data1, double** data2, int** mask1, int** mask2,
             const double weight[], int index1, int index2, int transpose)
/*
Purpose
=======

The ucorrelation routine calculates the weighted Pearson distance between two
rows or columns, using the uncentered version of the Pearson correlation. In
the uncentered Pearson correlation, a zero mean is used for both vectors even
if the actual mean is nonzero.
This definition yields a semi-metric: d(a,b) >= 0, and d(a,b) = 0 iff a = b.
but the triangular inequality d(a,b) + d(b,c) >= d(a,c) does not hold
(e.g., choose b = a + c).

Arguments
=========

n        (input) int
The number of elements in a row or column. If transpose == 0, then n is the
number of columns; otherwise, n is the number of rows.

data1    (input) double array
The data array containing the first vector.

data2    (input) double array
The data array containing the second vector.

mask1    (input) int array
This array which elements in data1 are missing. If mask1[i][j] == 0, then
data1[i][j] is missing.

mask2    (input) int array
This array which elements in data2 are missing. If mask2[i][j] == 0, then
data2[i][j] is missing.

weight   (input) double[ncolumns] if transpose == 0,
                 double[nrows]    otherwise
The weights that are used to calculate the distance. This is equivalent
to including the jth data point weight[j] times in the calculation. The
weights can be non-integer.

index1   (input) int
Index of the first row or column.

index2   (input) int
Index of the second row or column.

transpose (input) int
If transpose == 0, the distance between two rows in the matrix is calculated.
Otherwise, the distance between two columns in the matrix is calculated.
============================================================================
*/
{
    double result = 0.;
    double denom1 = 0.;
    double denom2 = 0.;
    int flag = 0;

    /* flag will remain zero if no nonzero combinations of mask1 and mask2 are
     * found.
     */
    if (transpose == 0) /* Calculate the distance between two rows */ {
        int i;
        for (i = 0; i < n; i++) {
            if (mask1[index1][i] && mask2[index2][i]) {
                double term1 = data1[index1][i];
                double term2 = data2[index2][i];
                double w = weight[i];
                result += w*term1*term2;
                denom1 += w*term1*term1;
                denom2 += w*term2*term2;
                flag = 1;
            }
        }
    }
    else {
        int i;
        for (i = 0; i < n; i++) {
            if (mask1[i][index1] && mask2[i][index2]) {
                double term1 = data1[i][index1];
                double term2 = data2[i][index2];
                double w = weight[i];
                result += w*term1*term2;
                denom1 += w*term1*term1;
                denom2 += w*term2*term2;
                flag = 1;
            }
        }
    }
    if (!flag) return 0.;
    if (denom1 == 0.) return 1.;
    if (denom2 == 0.) return 1.;
    result = result / sqrt(denom1*denom2);
    result = 1. - result;
    return result;
}

/* ********************************************************************* */

static double
uacorrelation(int n, double** data1, double** data2, int** mask1, int** mask2,
              const double weight[], int index1, int index2, int transpose)
/*
Purpose
=======

The uacorrelation routine calculates the weighted Pearson distance between two
rows or columns, using the absolute value of the uncentered version of the
Pearson correlation. In the uncentered Pearson correlation, a zero mean is used
for both vectors even if the actual mean is nonzero.
This definition yields a semi-metric: d(a,b) >= 0, and d(a,b) = 0 iff a = b.
but the triangular inequality d(a,b) + d(b,c) >= d(a,c) does not hold
(e.g., choose b = a + c).

Arguments
=========

n         (input) int
The number of elements in a row or column. If transpose == 0, then n is the
number of columns; otherwise, n is the number of rows.

data1     (input) double array
The data array containing the first vector.

data2     (input) double array
The data array containing the second vector.

mask1     (input) int array
This array which elements in data1 are missing. If mask1[i][j] == 0, then
data1[i][j] is missing.

mask2     (input) int array
This array which elements in data2 are missing. If mask2[i][j] == 0, then
data2[i][j] is missing.

weight    (input) double[ncolumns] if transpose == 0,
                  double[nrows]    otherwise
The weights that are used to calculate the distance. This is equivalent
to including the jth data point weight[j] times in the calculation. The
weights can be non-integer.

index1    (input) int
Index of the first row or column.

index2    (input) int
Index of the second row or column.

transpose (input) int
If transpose == 0, the distance between two rows in the matrix is calculated.
Otherwise, the distance between two columns in the matrix is calculated.
============================================================================
*/
{
    double result = 0.;
    double denom1 = 0.;
    double denom2 = 0.;
    int flag = 0;
    /* flag will remain zero if no nonzero combinations of mask1 and mask2 are
     * found.
     */

    if (transpose == 0) /* Calculate the distance between two rows */ {
        int i;
        for (i = 0; i < n; i++) {
            if (mask1[index1][i] && mask2[index2][i]) {
                double term1 = data1[index1][i];
                double term2 = data2[index2][i];
                double w = weight[i];
                result += w*term1*term2;
                denom1 += w*term1*term1;
                denom2 += w*term2*term2;
                flag = 1;
            }
        }
    }
    else {
        int i;
        for (i = 0; i < n; i++) {
            if (mask1[i][index1] && mask2[i][index2]) {
                double term1 = data1[i][index1];
                double term2 = data2[i][index2];
                double w = weight[i];
                result += w*term1*term2;
                denom1 += w*term1*term1;
                denom2 += w*term2*term2;
                flag = 1;
            }
        }
    }
    if (!flag) return 0.;
    if (denom1 == 0.) return 1.;
    if (denom2 == 0.) return 1.;
    result = fabs(result) / sqrt(denom1*denom2);
    result = 1. - result;
    return result;
}

/* *********************************************************************    */

static double
spearman(int n, double** data1, double** data2, int** mask1, int** mask2,
         const double weight[], int index1, int index2, int transpose)
/*
Purpose
=======

The spearman routine calculates the Spearman distance between two rows or
columns. The Spearman distance is defined as one minus the Spearman rank
correlation.

Arguments
=========

n         (input) int
The number of elements in a row or column. If transpose == 0, then n is the
number of columns; otherwise, n is the number of rows.

data1     (input) double array
The data array containing the first vector.

data2     (input) double array
The data array containing the second vector.

mask1     (input) int array
This array which elements in data1 are missing. If mask1[i][j] == 0, then
data1[i][j] is missing.

mask2     (input) int array
This array which elements in data2 are missing. If mask2[i][j] == 0, then
data2[i][j] is missing.

weight    (input) double[ncolumns] if transpose == 0,
                  double[nrows]    otherwise
The weights that are used to calculate the distance. This is equivalent
to including the jth data point weight[j] times in the calculation. The
weights can be non-integer.

index1    (input) int
Index of the first row or column.

index2    (input) int
Index of the second row or column.

transpose (input) int
If transpose == 0, the distance between two rows in the matrix is calculated.
Otherwise, the distance between two columns in the matrix is calculated.
============================================================================
*/
{
    int i;

src/cluster.c  view on Meta::CPAN

    result = result / sqrt(denom1*denom2);
    result = 1. - result;
    return result;
}

/* *********************************************************************    */

static double
kendall(int n, double** data1, double** data2, int** mask1, int** mask2,
        const double weight[], int index1, int index2, int transpose)
/*
Purpose
=======

The kendall routine calculates the Kendall distance between two
rows or columns. The Kendall distance is defined as one minus Kendall's tau.

Arguments
=========

n            (input) int
The number of elements in a row or column. If transpose == 0, then n is the
number of columns; otherwise, n is the number of rows.

data1     (input) double array
The data array containing the first vector.

data2     (input) double array
The data array containing the second vector.

mask1     (input) int array
This array which elements in data1 are missing. If mask1[i][j] == 0, then
data1[i][j] is missing.

mask2     (input) int array
This array which elements in data2 are missing. If mask2[i][j] == 0, then
data2[i][j] is missing.

weight    (input) double[ncolumns] if transpose == 0,
                  double[nrows]    otherwise
The weights that are used to calculate the distance. This is equivalent
to including the jth data point weight[j] times in the calculation. The
weights can be non-integer.

index1    (input) int
Index of the first row or column.

index2    (input) int
Index of the second row or column.

transpose (input) int
If transpose == 0, the distance between two rows in the matrix is calculated.
Otherwise, the distance between two columns in the matrix is calculated.
============================================================================
*/
{
    double con = 0;
    double dis = 0;
    double exx = 0;
    double exy = 0;
    int flag = 0;
    /* flag will remain zero if no nonzero combinations of mask1 and mask2 are
     * found.
     */
    double denomx;
    double denomy;
    double tau;
    int i, j;

    if (transpose == 0) {
        for (i = 0; i < n; i++) {
            if (mask1[index1][i] && mask2[index2][i]) {
                for (j = 0; j < i; j++) {
                    if (mask1[index1][j] && mask2[index2][j]) {
                        const double x1 = data1[index1][i];
                        const double x2 = data1[index1][j];
                        const double y1 = data2[index2][i];
                        const double y2 = data2[index2][j];
                        const double w = weight[i] * weight[j];
                        if (x1 < x2 && y1 < y2) con += w;
                        else if (x1 > x2 && y1 > y2) con += w;
                        else if (x1 < x2 && y1 > y2) dis += w;
                        else if (x1 > x2 && y1 < y2) dis += w;
                        else if (x1 == x2 && y1 != y2) exx += w;
                        else if (x1 != x2 && y1 == y2) exy += w;
                        flag = 1;
                    }
                }
            }
        }
    }
    else {
        for (i = 0; i < n; i++) {
            if (mask1[i][index1] && mask2[i][index2]) {
                for (j = 0; j < i; j++) {
                    if (mask1[j][index1] && mask2[j][index2]) {
                        const double x1 = data1[i][index1];
                        const double x2 = data1[j][index1];
                        const double y1 = data2[i][index2];
                        const double y2 = data2[j][index2];
                        const double w = weight[i] * weight[j];
                        if (x1 < x2 && y1 < y2) con += w;
                        else if (x1 > x2 && y1 > y2) con += w;
                        else if (x1 < x2 && y1 > y2) dis += w;
                        else if (x1 > x2 && y1 < y2) dis += w;
                        else if (x1 == x2 && y1 != y2) exx += w;
                        else if (x1 != x2 && y1 == y2) exy += w;
                        flag = 1;
                    }
                }
            }
        }
    }
    if (!flag) return 0.;
    denomx = con + dis + exx;
    denomy = con + dis + exy;
    if (denomx == 0) return 1;
    if (denomy == 0) return 1;
    tau = (con-dis)/sqrt(denomx*denomy);
    return 1.-tau;
}

/* *********************************************************************    */

static double(*setmetric(char dist))
    (int, double**, double**, int**, int**, const double[], int, int, int)
{
    switch(dist) {
        case 'e': return &euclid;
        case 'b': return &cityblock;
        case 'c': return &correlation;
        case 'a': return &acorrelation;
        case 'u': return &ucorrelation;
        case 'x': return &uacorrelation;
        case 's': return &spearman;
        case 'k': return &kendall;
        default: return &euclid;
    }
}

/* *********************************************************************    */

static double
uniform(void)
/*
Purpose
=======

This routine returns a uniform random number between 0.0 and 1.0. Both 0.0
and 1.0 are excluded. This random number generator is described in:

Pierre l'Ecuyer
Efficient and Portable Combined Random Number Generators
Communications of the ACM, Volume 31, Number 6, June 1988, pages 742-749, 774.

The first time this routine is called, it initializes the random number
generator using the current time. First, the current epoch time in seconds is
used as a seed for the random number generator in the C library. The first two
random numbers generated by this generator are used to initialize the random
number generator implemented in this routine.


Arguments
=========

None.


Return value
============

A double-precison number between 0.0 and 1.0.
============================================================================
*/



( run in 0.904 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )