Algorithm-Cluster

 view release on metacpan or  search on metacpan

perl/Cluster.pm  view on Meta::CPAN

    } else {
        return _median([@_]);
    }
}


#------------------------------------------------------
# This function is called by the wrappers for library functions.
# It checks the dimensions of the data, mask and weight parameters.
#
# Return false if any errors are found in the data matrix. 
#
# Detect the dimension (nrows x ncols) of the data matrix,
# and set values in the parameter hash. 
#
# Also check the mask matrix and weight arrays, and set
# the parameters to default values if we find any errors, 
# however, we still return true if we find errors.
#
sub check_matrix_dimensions {
    my ($param, $default) = @_;
    #----------------------------------
    # Check the data matrix
    #
    return unless data_is_valid_matrix($param->{data});
    #----------------------------------
    # Remember the dimensions of the weight array
    #

perl/Cluster.pm  view on Meta::CPAN

    }
    foreach $i (@{ $param->{initialid}}) {
        $counter[$i]++;
    }
    for ($i = 0; $i < $param->{nclusters}; $i++) {
        if ($counter[$i]==0) {
            module_warn("Optional parameter 'initialid' contains empty clusters");
            return;
        }
    }
    # No errors detected
    $param->{npass} = 0;
    return 1;
}

#-------------------------------------------------------------
# Wrapper for the kcluster() function
#
sub kcluster {
    #----------------------------------
    # Define default parameters

perl/Cluster.xs  view on Meta::CPAN

    } else {
        return 0;
    }
}



/* -------------------------------------------------
 * Convert a Perl 2D matrix into a 2D matrix of C doubles.
 * If no data are masked, mask can be passed as NULL.
 * NOTE: on errors this function returns a value greater than zero.
 */
static double**
parse_data(pTHX_ SV * matrix_ref, int** mask) {

    AV * matrix_av;
    SV * row_ref;
    AV * row_av;
    SV * cell;

    int type, i, j, nrows, ncols, n;

perl/Cluster.xs  view on Meta::CPAN

        free(matrix);
        matrix = NULL;
    }

    return matrix;
}


/* -------------------------------------------------
 * Convert a Perl 2D matrix into a 2D matrix of C ints.
 * On errors this function returns a value greater than zero.
 */
static int**
parse_mask(pTHX_ SV * matrix_ref) {

    AV * matrix_av;
    SV * row_ref;
    AV * row_av;
    SV * cell;

    int type, i, j, nrows, ncols, n;

perl/Cluster.xs  view on Meta::CPAN

    for(i = 1; i < nrows; ++i ) {
        free(matrix[i]);
    }

    free(matrix);
}


/* -------------------------------------------------
 * Convert a Perl array into an array of doubles
 * On error, this function returns NULL.
 */
static double*
malloc_row_perl2c_dbl (pTHX_ SV * input, int* np) {

    int i;
    AV* array    = (AV *) SvRV(input);
    const int n  = (int) av_len(array) + 1;
    double* data = malloc(n * sizeof(double)); 
    if (!data) {
        return NULL;

perl/Cluster.xs  view on Meta::CPAN

            free(data);
            return NULL;
        }
    }
    if(np) *np = n;
    return data;
}

/* -------------------------------------------------
 * Convert a Perl array into an array of ints
 * On errors this function returns NULL.
 */
static int*
malloc_row_perl2c_int (pTHX_ SV * input) {

    int i;

    AV* array = (AV *) SvRV(input);
    const int n = (int) av_len(array) + 1;
    int* data = malloc(n*sizeof(int)); 
    if (!data) {

perl/Cluster.xs  view on Meta::CPAN

            free(data);
            return NULL;
        }
    }

    return data;
}

/* -------------------------------------------------
 * Copy a Perl array into an array of ints.
 * If an error occurs, return 0; otherwise return 1.
 */
static int
copy_row_perl2c_int (pTHX_ SV * input, int* output) {

    int i;

    AV* array = (AV *) SvRV(input);
    const int n = (int) av_len(array) + 1;

    /* Loop once for each item in the Perl array,

perl/Cluster.xs  view on Meta::CPAN

    AV * row_av     = (AV *) SvRV(row_ref);
    const int ncols = (int) av_len(row_av) + 1;
    if (ncols==0) return 1;

    return 0;
}


/* -------------------------------------------------
 * Convert the 'data' and 'mask' matrices and the 'weight' array
 * from C to Perl.  Also check for errors, and bail out if there are any.
 */
static int
malloc_matrices(pTHX_
    SV *  weight_ref, double  ** weight, int nweights, 
    SV *  data_ref,   double *** matrix,
    SV *  mask_ref,   int    *** mask,
    int   nrows,      int        ncols
) {

    if(SvROK(mask_ref) && SvTYPE(SvRV(mask_ref)) == SVt_PVAV) { 

perl/Cluster.xs  view on Meta::CPAN

    }
    array = (AV *) SvRV(nodes);
    n = (int) av_len(array) + 1;
    tree = malloc(sizeof(Tree));
    if (tree) {
        tree->n = n;
        tree->nodes = malloc(n*sizeof(Node));
    }
    if (! tree || !tree->nodes) {
        if (tree) free(tree);
        croak("Algorithm::Cluster::Tree::new memory error\n");
    }

    for (i = 0; i < n; i++) {
        Node* node;
        SV* node_ref = *(av_fetch(array, (I32) i, 0)); 
        if (!sv_isa(node_ref, "Algorithm::Cluster::Node")) break;
        node = INT2PTR(Node*,SvIV(SvRV(node_ref)));
        tree->nodes[i].left = node->left;
        tree->nodes[i].right = node->right;
        tree->nodes[i].distance = node->distance;

perl/Cluster.xs  view on Meta::CPAN

    if (!sv_isa(obj, "Algorithm::Cluster::Tree")) {
        croak("sort can only be applied to an Algorithm::Cluster::Tree object");
    }
    tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
    if (order) {
        if(!SvROK(order) || SvTYPE(SvRV(order)) != SVt_PVAV) { 
            croak("Algorithm::Cluster::Tree::sort expects an order array\n");
        }
        values = malloc_row_perl2c_dbl(aTHX_ order, &n);
        if (!values) {
            croak("Algorithm::Cluster::Tree::sort memory error\n");
        }
        if (n != tree->n + 1) {
            free(values);
            croak("sort: size of order array is inconsistent with tree size\n");
        }
    }
    else {
        n = tree->n + 1;
    }
    indices = malloc(n*sizeof(int));
    if (!indices) {
        if(values) free(values);
        croak("sort: insufficient memory");
    }
    /* --------------------------------------------------------------- */
    ok = sorttree(tree->n, tree->nodes, values, indices);
    if(values) free(values);
    /* -- Check for errors flagged by the C routine ------------------ */
    if (!ok) {
        free(indices);
        croak("sort: Error in the sorttree routine");
    }
    for(i=0; i<n; i++) XPUSHs(sv_2mortal(newSVnv(indices[i])));
    free(indices);

void
cut(obj, nclusters=0)
    SV* obj

perl/Cluster.xs  view on Meta::CPAN

    const int ndata = transpose ? nrows : ncols;
    const int nelements = transpose ? ncols : nrows;

    CODE:
    /* ------------------------
     * Don't check the parameters, because we rely on the Perl
     * caller to check most paramters.
     */
    /* ------------------------
     * Convert data and mask matrices and the weight array
     * from C to Perl.  Also check for errors, and ignore the
     * mask or the weight array if there are any errors. 
     */
    if (is_distance_matrix(aTHX_ data_ref)) {
        distancematrix = parse_distance(aTHX_ data_ref, nelements);
        if (!distancematrix) {
                croak("memory allocation failure in _treecluster\n");
        }
    } else {
        int ok;
        ok = malloc_matrices(aTHX_ weight_ref, &weight, ndata, 
                    data_ref,   &matrix,

perl/Cluster.xs  view on Meta::CPAN

    int      npass;
    char *   method;
    char *   dist;
    SV *     initialid_ref;

    PREINIT:
    SV  *    clusterid_ref;
    int *    clusterid;
    int      nobjects;
    int      ndata;
    double   error;
    int      ifound;
    int      ok;

    double  * weight;
    double ** matrix;
    int    ** mask;


    PPCODE:
    /* ------------------------

perl/Cluster.xs  view on Meta::CPAN

        nobjects = ncols;
        ndata = nrows;
    }
    clusterid = malloc(nobjects * sizeof(int) );
    if (!clusterid) {
        croak("memory allocation failure in _kcluster\n");
    }

    /* ------------------------
     * Convert data and mask matrices and the weight array
     * from C to Perl.  Also check for errors, and ignore the
     * mask or the weight array if there are any errors. 
     */
    ok = malloc_matrices( aTHX_ weight_ref, &weight, ndata, 
                data_ref,   &matrix,
                mask_ref,   &mask,  
                nrows,      ncols);
    if (!ok) {
        free(clusterid);
        croak("failed to read input data for _kcluster\n");
    }

perl/Cluster.xs  view on Meta::CPAN

    if (npass==0) {
        copy_row_perl2c_int(aTHX_ initialid_ref, clusterid);
    }

    /* ------------------------
     * Run the library function
     */
    kcluster( 
        nclusters, nrows, ncols, 
        matrix, mask, weight, transpose,
        npass, method[0], dist[0], clusterid,  &error, &ifound
        
    );

    /* ------------------------
     * Convert generated C matrices to Perl matrices
     */
    clusterid_ref =    row_c2perl_int(aTHX_ clusterid, nobjects);

    /* ------------------------
     * Push the new Perl matrices onto the return stack
     */
    XPUSHs(sv_2mortal( clusterid_ref   ));
    XPUSHs(sv_2mortal( newSVnv(error) ));
    XPUSHs(sv_2mortal( newSViv(ifound) ));

    /* ------------------------
     * Free what we've malloc'ed 
     */
    free(clusterid);
    free_matrix_int(mask,     nrows);
    free_matrix_dbl(matrix,   nrows);
    free(weight);

perl/Cluster.xs  view on Meta::CPAN

    int      nobjects;
    SV *     distancematrix_ref;
    int      npass;
    SV *     initialid_ref;


    PREINIT:
    double** distancematrix;
    SV  *    clusterid_ref;
    int *    clusterid;
    double   error;
    int      ifound;



    PPCODE:
    /* ------------------------
     * Don't check the parameters, because we rely on the Perl
     * caller to check most parameters.
     */

    /* ------------------------
     * Malloc space for the return values from the library function
     */
    clusterid = malloc(nobjects * sizeof(int));
    if (!clusterid) {
        croak("memory allocation failure in _kmedoids\n");
    }

    /* ------------------------
     * Convert data and mask matrices and the weight array
     * from C to Perl.  Also check for errors, and ignore the
     * mask or the weight array if there are any errors. 
     */
    distancematrix = parse_distance(aTHX_ distancematrix_ref, nobjects);
    if (!distancematrix) {
        free(clusterid);
            croak("failed to allocate memory for distance matrix in _kmedoids\n");
    }

    /* ------------------------
     * Copy initialid to clusterid, if needed
     */

perl/Cluster.xs  view on Meta::CPAN

    if (npass==0) {
        copy_row_perl2c_int(aTHX_ initialid_ref, clusterid);
    }

    /* ------------------------
     * Run the library function
     */
    kmedoids( 
        nclusters, nobjects, 
        distancematrix, npass, clusterid, 
        &error, &ifound
    );

    if(ifound==-1) {
        free(clusterid);
        free_ragged_matrix_dbl(distancematrix, nobjects);
            croak("memory allocation failure in _kmedoids\n");
    }
    else if(ifound==0) {
        free(clusterid);
        free_ragged_matrix_dbl(distancematrix, nobjects);
            croak("error in input arguments in kmedoids\n");
    }
    else {

        /* ------------------------
         * Convert generated C matrices to Perl matrices
         */
        clusterid_ref =    row_c2perl_int(aTHX_ clusterid, nobjects);

        /* ------------------------
         * Push the new Perl matrices onto the return stack
         */
        XPUSHs(sv_2mortal( clusterid_ref   ));
        XPUSHs(sv_2mortal( newSVnv(error) ));
        XPUSHs(sv_2mortal( newSViv(ifound) ));

    }
    /* ------------------------
     * Free what we've malloc'ed 
     */
    free(clusterid);
    free_ragged_matrix_dbl(distancematrix, nobjects);

    /* Finished _kmedoids() */

perl/Cluster.xs  view on Meta::CPAN

    cluster1 = malloc_row_perl2c_int(aTHX_ cluster1_ref);
    cluster2 = malloc_row_perl2c_int(aTHX_ cluster2_ref);
    if (!cluster1 || !cluster2) {
        if (cluster1) free(cluster1);
        if (cluster2) free(cluster2);
        croak("memory allocation failure in _clusterdistance\n");
    }

    /* ------------------------
     * Convert data and mask matrices and the weight array
     * from C to Perl.  Also check for errors, and ignore the
     * mask or the weight array if there are any errors. 
     * Set nweights to the correct number of weights.
     */
    nweights = (transpose==0) ? ncols : nrows;
    ok = malloc_matrices( aTHX_ weight_ref, &weight, nweights, 
                data_ref,   &matrix,
                mask_ref,   &mask,  
                nrows,      ncols);
    if (!ok) {
        free(cluster1);
        free(cluster2);

perl/Cluster.xs  view on Meta::CPAN

    /* ------------------------
     * Convert cluster index Perl arrays to C arrays
     */
    clusterid = malloc_row_perl2c_int(aTHX_ clusterid_ref);
    if (!clusterid) {
        croak("memory allocation failure in _clustercentroids\n");
    }

    /* ------------------------
     * Convert data and mask matrices and the weight array
     * from C to Perl.  Also check for errors, and ignore the
     * mask or the weight array if there are any errors. 
     * Set nweights to the correct number of weights.
     */
    ok = malloc_matrices( aTHX_ NULL, NULL, 0, 
                data_ref,   &matrix,
                mask_ref,   &mask,  
                nrows,      ncols);
    if (!ok) {
        free(clusterid);
            croak("failed to read input data for _clustercentroids\n");
    }

perl/Cluster.xs  view on Meta::CPAN

        if (transpose==0) {
        nobjects = nrows;
        ndata = ncols;
    } else {
        nobjects = ncols;
        ndata = nrows;
    }

    /* ------------------------
     * Convert data and mask matrices and the weight array
     * from C to Perl.  Also check for errors, and ignore the
     * mask or the weight array if there are any errors. 
     */
    ok = malloc_matrices( aTHX_
        weight_ref, &weight, ndata, 
        data_ref,   &data,
        mask_ref,   &mask,  
        nrows,      ncols
    );
    if (!ok) {
        croak("failed to read input data for _distancematrix");
    }

perl/Cluster.xs  view on Meta::CPAN

    clusterid = malloc(nelements*sizeof(int[2]));
    if (!clusterid) {
        croak("memory allocation failure in _somcluster\n");
    }
    celldata  =  0;
    /* Don't return celldata, for now at least */


    /* ------------------------
     * Convert data and mask matrices and the weight array
     * from C to Perl.  Also check for errors, and ignore the
     * mask or the weight array if there are any errors. 
     * Set nweights to the correct number of weights.
     */
    ok = malloc_matrices( aTHX_ weight_ref, &weight, ndata, 
                data_ref,   &matrix,
                mask_ref,   &mask,  
                nrows,      ncols);
    if (!ok) {
            croak("failed to read input data for _somcluster\n");
    }

perl/Cluster.xs  view on Meta::CPAN

    SV * data_ref;

    PREINIT:
    double** u;
    double** v;
    double* w;
    double* m;
    int i;
    int j;
    int nmin;
    int error;
    SV * mean_ref;
    SV * coordinates_ref;
    SV * pc_ref;
    SV * eigenvalues_ref;

    PPCODE:
    if(SvTYPE(SvRV(data_ref)) != SVt_PVAV) { 
        croak("argument to _pca is not an array reference\n");
    }
    nmin = nrows < ncols ? nrows : ncols;

perl/Cluster.xs  view on Meta::CPAN

    /* -- Calculate the mean of each column ------------------------------ */
    for (j = 0; j < ncols; j++) {
        m[j] = 0.0;
        for (i = 0; i < nrows; i++) m[j] += u[i][j];
        m[j] /= nrows;
    }
    /* -- Subtract the mean of each column ------------------------------- */
    for (i = 0; i < nrows; i++)
        for (j = 0; j < ncols; j++)
            u[i][j] -= m[j];
    error = pca(nrows, ncols, u, v, w);
    if (error==0) {
        /* Convert the C variables to Perl variables */
        mean_ref = row_c2perl_dbl(aTHX_ m, ncols);
        if (nrows >= ncols) {
            coordinates_ref = matrix_c2perl_dbl(aTHX_ u, nrows, ncols);
            pc_ref = matrix_c2perl_dbl(aTHX_ v, nmin, nmin);
        }
        else /* nrows < ncols */ {
            pc_ref = matrix_c2perl_dbl(aTHX_ u, nrows, ncols);
            coordinates_ref = matrix_c2perl_dbl(aTHX_ v, nmin, nmin);
        }
        eigenvalues_ref = row_c2perl_dbl(aTHX_ w, nmin);
    }
    for (i = 0; i < nrows; i++) free(u[i]);
    for (i = 0; i < nmin; i++) free(v[i]);
    free(u);
    free(v);
    free(w);
    free(m);
    if (error==-1)
        croak("Insufficient memory for principal components analysis");
    if (error > 0)
        croak("Singular value decomposition failed to converge");
    /* ------------------------
     * Push the new Perl matrices onto the return stack
     */
    XPUSHs(sv_2mortal(mean_ref));
    XPUSHs(sv_2mortal(coordinates_ref));
    XPUSHs(sv_2mortal(pc_ref));
    XPUSHs(sv_2mortal(eigenvalues_ref));

perl/examples/ex1_kcluster  view on Meta::CPAN

my $j = 0;
my (@orfname,@orfdata,@weight,@mask);

open(DATA,"<$file") or die "Can't open file $file: $!";


#------------------
# Read in the data file, and save the data to @orfdata
# We know that the file is intact and has no holes, 
# so just set the mask to 1 for every item.
# We don't check for errors in this case, because the file
# is short and we can spot errors by eye. 
#
my $firstline = <DATA>;  # Skip the title line
while(<DATA>) {
    chomp(my $line = $_);
    my @field     = split /\t/, $line;
    $orfname[$i]  =   $field[0];
    $orfdata[$i]  = [ @field[2..5] ];
    $mask[$i]     = [ 1,1,1,1 ];
    ++$i;
}

perl/examples/ex1_kcluster  view on Meta::CPAN

    dist      =>       'e',
    data      =>    \@orfdata,
    mask      =>       \@mask,
    weight    =>     \@weight,
);


#------------------
# Here is where we invoke the library function!
#
my ($clusters, $error, $found) = kcluster(%params);
#
#------------------



#------------------
# Create a reverse index of the ORF names,  by cluster ID
#
my %orfname_by_cluster;
$i=0;

perl/examples/ex1_kcluster  view on Meta::CPAN


    print "\t$_\n", foreach( sort { $a cmp $b } @{$orfname_by_cluster{$i} } );
    print "\n";
}


#------------------
# Print out the resulting within-cluster sum of distances.
#
print "------------------\n";
printf("Within-cluster sum of distances:  %f\n\n", $error); 


#------------------
# Try this again with a specified initial clustering solution
#

my @initialid = (0,1,2,3,4,5) x 15;
# choice for the initial clustering; the data file contains 90 genes.

%params = (

perl/examples/ex1_kcluster  view on Meta::CPAN

    method    =>       'a',
    dist      =>       'e',
    data      =>    \@orfdata,
    mask      =>       \@mask,
    weight    =>     \@weight,
    initialid =>  \@initialid,
);

printf("Executing k-means clustering with a specified initial clustering\n");

($clusters, $error, $found) = kcluster(%params);

printf("Within-cluster sum of distances:  %f\n\n", $error); 

perl/examples/ex3_kcluster  view on Meta::CPAN

my $weight2 =  [ 1,1,1,1,1 ];

my %params = (
    nclusters =>         3,
    transpose =>         0,
    npass     =>       100,
    method    =>       'a',
    dist      =>       'e',
);

my ($clusters, $error, $found);
my ($i);

$i=0;
($clusters, $error, $found) = kcluster(
    %params,
    data      =>    $data1,
    mask      =>    $mask1,
    weight    =>  $weight1,
);

printf("\n");
printf("Clustering first data set:\n\n");

$i=0;
foreach(@{$clusters}) {
    printf("Gene %2d belongs to cluster %2d\n",$i++,$_);
}

printf("\n");

printf("Within-cluster sum of distances is %f\n", $error);

printf("\n");
printf("Clustering second data set:\n\n");

($clusters, $error, $found) = kcluster(
    %params,
    data      =>    $data2,
    mask      =>    $mask2,
    weight    =>  $weight2,
);

$i=0;
foreach(@{$clusters}) {
    printf("Gene %2d belongs to cluster %2d\n",$i++,$_);
}

printf("\n");

printf("Within-cluster sum of distances is %f\n", $error);

printf("\n");


__END__

perl/examples/ex8_kmedoids  view on Meta::CPAN

my $j = 0;
my (@orfname,@orfdata,@weight,@mask);

open(DATA,"<$file") or die "Can't open file $file: $!";


#------------------
# Read in the data file, and save the data to @orfdata
# We know that the file is intact and has no holes, 
# so just set the mask to 1 for every item.
# We don't check for errors in this case, because the file
# is short and we can spot errors by eye. 
#
my $firstline = <DATA>;  # Skip the title line
while(<DATA>) {
    chomp(my $line = $_);
    my @field     = split /\t/, $line;
    $orfname[$i]  =   $field[0];
    $orfdata[$i]  = [ @field[2..5] ];
    $mask[$i]     = [ 1,1,1,1 ];
    ++$i;
}

perl/examples/ex8_kmedoids  view on Meta::CPAN

#------------------

my %params2 = (
    nclusters =>         6,
    distances =>   $matrix,
    npass     =>      1000,
);

printf("Executing k-medoids clustering 1000 times, using random initial clusterings\n");

my ($clusters, $error, $found) = kmedoids(%params2);

my $item;
$i = 0;
foreach $item (@{$clusters}) {
    print $i, ": ", $item, "\n";
    ++$i;
}

#------------------
# Print out the resulting within-cluster sum of distances.
#
print "------------------\n";
printf("Within-cluster sum of distances:  %f; solution was found %d times\n\n", $error, $found);
                                                                                
                                                                                
#------------------
# Try this again with a specified initial clustering solution
#

my @initialid = (0,1,2,3,4,5) x 15;
# choice for the initial clustering; the data file contains 90 genes.
                                                                                
%params2 = (
    nclusters =>         6,
    distances =>   $matrix,
    initialid =>  \@initialid,
);
                                                                                
printf("Executing k-medoids clustering with a specified initial clustering\n");
                                                                                
($clusters, $error, $found) = kmedoids(%params2);
                                                                                
printf("Within-cluster sum of distances:  %f\n\n", $error);

perl/t/10_kcluster.t  view on Meta::CPAN

    [ 1, 1 ],
    [ 1, 1 ],
    [ 1, 1 ],
    [ 1, 1 ],
];


#------------------------------------------------------
# Tests
# 
my ($clusters, $centroids, $error, $found);
my ($i,$j);

my %params = (
    nclusters =>         3,
    transpose =>         0,
    method    =>       'a',
    dist      =>       'e',
);

#----------
# test dataset 1
#
($clusters, $error, $found) = Algorithm::Cluster::kcluster(
    %params,
    data      =>    $data1,
    mask      =>    $mask1,
    weight    =>  $weight1,
    npass     =>       100,
);

#----------
# Make sure that the length of @clusters matches the length of @data
ok( scalar @$data1 == scalar @$clusters);

#----------
# Test the cluster coordinates
ok ( $clusters->[ 0] != $clusters->[ 1] );
ok ( $clusters->[ 1] == $clusters->[ 2] );
ok ( $clusters->[ 2] != $clusters->[ 3] );

# Test the within-cluster sum of errors
ok( sprintf ("%7.3f", $error) == '  1.300');


#----------
# test dataset 2
#
$i=0;$j=0;
($clusters, $error, $found) = Algorithm::Cluster::kcluster(
    %params,
    data      =>    $data2,
    mask      =>    $mask2,
    weight    =>  $weight2,
    npass     =>       100,
);


#----------
# Make sure that the length of @clusters matches the length of @data
ok (scalar @$data2 == scalar @$clusters);

#----------
# Test the cluster coordinates
ok ($clusters->[ 0] == $clusters->[ 3]);
ok ($clusters->[ 0] != $clusters->[ 6]);
ok ($clusters->[ 0] != $clusters->[ 9]);
ok ($clusters->[11] == $clusters->[12]);

# Test the within-cluster sum of errors
ok ( sprintf ("%7.3f", $error) == '  1.012');

#----------
# test kcluster with initial cluster assignments
#
$initialid = [0,1,2,0,1,2,0,1,2,0,1,2,0];

($clusters, $error, $found) = Algorithm::Cluster::kcluster(
    %params,
    data      =>     $data2,
    mask      =>     $mask2,
    weight    =>   $weight2,
    npass     =>          1, 
    initialid => $initialid, 
);

#----------
# Test the cluster coordinates

perl/t/10_kcluster.t  view on Meta::CPAN

ok ( $clusters->[ 4] == 2 );
ok ( $clusters->[ 5] == 2 );
ok ( $clusters->[ 6] == 0 );
ok ( $clusters->[ 7] == 0 );
ok ( $clusters->[ 8] == 2 );
ok ( $clusters->[ 9] == 1 );
ok ( $clusters->[10] == 1 );
ok ( $clusters->[11] == 1 );
ok ( $clusters->[12] == 1 );

# Test the within-cluster sum of errors
ok ( sprintf ("%7.3f", $error) == '  3.036' );
   
ok ($found == 1 );
                 
__END__

perl/t/14_kmedoids.t  view on Meta::CPAN

    [ 2.1,  7.7,  2.7,  1.9,  1.8,  5.7,  3.4,  5.2],
    [ 1.6,  1.8,  9.2,  8.7,  3.4, 16.8,  4.2,  1.3,  5.0],
    [ 2.7,  3.7,  5.5,  5.5,  1.9, 11.5,  2.0,  1.7,  2.1,  3.1],
    [10.0, 19.3,  1.0,  3.7,  9.1,  1.2,  9.3, 15.7,  6.3, 16.0, 11.5]
];

#------------------------------------------------------
# Tests
# 

my ($clusters, $error, $found);

#------------------------------------------------------
# Test with repeated runs of the k-medoids algorithm
# 

my %params1 = (
        nclusters =>         4,
        distances =>   $matrix,
        npass     =>     10000,
);
                                                                                
($clusters, $error, $found) = Algorithm::Cluster::kmedoids(%params1);

#----------
# Make sure that the length of @clusters matches the length of @data
is (scalar @$matrix, scalar @$clusters );

#----------
# Test the cluster assignments
is ($clusters->[ 0], 9);
is ($clusters->[ 1], 9);
is ($clusters->[ 2], 2);
is ($clusters->[ 3], 2);
is ($clusters->[ 4], 4);
is ($clusters->[ 5], 5);
is ($clusters->[ 6], 4);
is ($clusters->[ 7], 9);
is ($clusters->[ 8], 4);
is ($clusters->[ 9], 9);
is ($clusters->[10], 4);
is ($clusters->[11], 2);

# Test the within-cluster sum of errors
is (sprintf ("%7.3f", $error), ' 11.600');


#------------------------------------------------------
# Test the k-medoids algorithm with a specified initial clustering
# 

$initialid = [0,0,1,1,1,2,2,2,3,3,3,3];

my %params2 = (
    nclusters =>         4,
    distances =>   $matrix,
    npass     =>         1,
    initialid => $initialid,
);
                                                                                
($clusters, $error, $found) = Algorithm::Cluster::kmedoids(%params2);

#----------
# Make sure that the length of @clusters matches the length of @data
is (scalar @$matrix, scalar @$clusters );

#----------
# Test the cluster assignments
is ($clusters->[ 0], 9);
is ($clusters->[ 1], 9);
is ($clusters->[ 2], 2);
is ($clusters->[ 3], 2);
is ($clusters->[ 4], 4);
is ($clusters->[ 5], 2);
is ($clusters->[ 6], 6);
is ($clusters->[ 7], 9);
is ($clusters->[ 8], 4);
is ($clusters->[ 9], 9);
is ($clusters->[10], 4);
is ($clusters->[11], 2);

# Test the within-cluster sum of errors
is (sprintf ("%7.3f", $error), " 13.000");

perl/t/16_pca.t  view on Meta::CPAN

is (sprintf("%7.4f", $pc->[0][3]), " 0.2615");
is (sprintf("%7.4f", $pc->[1][0]), " 0.0507");
is (sprintf("%7.4f", $pc->[1][1]), " 0.6862");
is (sprintf("%7.4f", $pc->[1][2]), " 0.1382");
is (sprintf("%7.4f", $pc->[1][3]), " 0.1978");
is (sprintf("%7.4f", $pc->[2][0]), "-0.6300");
is (sprintf("%7.4f", $pc->[2][1]), " 0.0912");
is (sprintf("%7.4f", $pc->[2][2]), " 0.0456");
is (sprintf("%7.4f", $pc->[2][3]), "-0.6746");
# The last eigenvalue is zero. The corresponding eigenvector is strongly
# affected by roundoff error, so we don't test it. For PCA, it doesn't matter
# since the coordinates along this eigenvector are zero anyway.
is (sprintf("%7.4f", $eigenvalues->[0]), " 6.7679");
is (sprintf("%7.4f", $eigenvalues->[1]), " 3.0109");
is (sprintf("%7.4f", $eigenvalues->[2]), " 1.8776");
is (sprintf("%7.4f", abs($eigenvalues->[3])), " 0.0000");

src/cluster.c  view on Meta::CPAN

}

/* ********************************************************************** */

static double*
getrank(int n, const double data[], const double weight[])
/* Calculates the ranks of the elements in the array data. Two elements with
 * the same value get the same rank, equal to the average of the ranks had the
 * elements different values. The ranks are returned as a newly allocated
 * array that should be freed by the calling routine. If getrank fails due to
 * a memory allocation error, it returns NULL.
 */
{
    int i, j, k, l;
    double* rank;
    int* index;
    double total = 0.0;
    double subtotal;
    double current;
    double value;

src/cluster.c  view on Meta::CPAN

 *
 * On output:
 *
 * the routine returns an integer ierr equal to
 *  0:  to indicate a normal return,
 *  k:  if the k-th singular value has not been determined after 30 iterations,
 * -1:  if memory allocation fails.
 *
 * w  contains the n (non-negative) singular values of a (the
 *    diagonal elements of s). they are unordered.
 *    If an error exit is made, the singular values should be correct for
 *    indices ierr+1, ierr+2, ..., n.
 *
 * u  contains the matrix u (orthogonal column vectors) of the decomposition.
 *    If an error exit is made, the columns of u corresponding to indices of
 *    correct singular values should be correct.
 *                         t
 * vt contains the matrix v (orthogonal) of the decomposition.
 *    If an error exit is made, the columns of v corresponding to indices of
 *    correct singular values should be correct.
 *
 *
 * Questions and comments should be directed to B. S. Garbow,
 * Applied Mathematics division, Argonne National Laboratory
 *
 * Modified to eliminate machep
 *
 * Translated to C by Michiel de Hoon, Human Genome Center,
 * University of Tokyo, for inclusion in the C Clustering Library.

src/cluster.c  view on Meta::CPAN


The arrays u, v, and w are sorted according to eigenvalue, with the largest
eigenvalues appearing first.

The function returns 0 if successful, -1 if memory allocation fails, and a
positive integer if the singular value decomposition fails to converge.
*/
{
    int i;
    int j;
    int error;
    int* index = malloc(ncolumns*sizeof(int));
    double* temp = malloc(ncolumns*sizeof(double));

    if (!index || !temp) {
        if (index) free(index);
        if (temp) free(temp);
        return -1;
    }
    error = svd(nrows, ncolumns, u, w, v);
    if (error == 0) {
        if (nrows >= ncolumns) {
            for (j = 0; j < ncolumns; j++) {
                const double s = w[j];
                for (i = 0; i < nrows; i++) u[i][j] *= s;
            }
            sort(ncolumns, w, index);
            for (i = 0; i < ncolumns/2; i++) {
                j = index[i];
                index[i] = index[ncolumns-1-i];
                index[ncolumns-1-i] = j;

src/cluster.c  view on Meta::CPAN

            for (j = 0; j < nrows; j++) {
                for (i = 0; i < nrows; i++) temp[i] = v[j][index[i]];
                for (i = 0; i < nrows; i++) v[j][i] = temp[i];
            }
            for (i = 0; i < nrows; i++) temp[i] = w[index[i]];
            for (i = 0; i < nrows; i++) w[i] = temp[i];
        }
    }
    free(index);
    free(temp);
    return error;
}

/* ********************************************************************* */

static double
euclid(int n, double** data1, double** data2, int** mask1, int** mask2,
       const double weight[], int index1, int index2, int transpose)

/*
Purpose

src/cluster.c  view on Meta::CPAN

                denom1 += w*term1*term1;
                denom2 += w*term2*term2;
                tweight += w;
            }
        }
    }
    if (!tweight) return 0; /* usually due to empty clusters */
    result -= sum1 * sum2 / tweight;
    denom1 -= sum1 * sum1 / tweight;
    denom2 -= sum2 * sum2 / tweight;
    if (denom1 <= 0) return 1; /* include '<' to deal with roundoff errors */
    if (denom2 <= 0) return 1; /* include '<' to deal with roundoff errors */
    result = result / sqrt(denom1*denom2);
    result = 1. - result;
    return result;
}

/* ********************************************************************* */

static double
acorrelation(int n, double** data1, double** data2, int** mask1, int** mask2,
             const double weight[], int index1, int index2, int transpose)

src/cluster.c  view on Meta::CPAN

                denom1 += w*term1*term1;
                denom2 += w*term2*term2;
                tweight += w;
            }
        }
    }
    if (!tweight) return 0; /* usually due to empty clusters */
    result -= sum1 * sum2 / tweight;
    denom1 -= sum1 * sum1 / tweight;
    denom2 -= sum2 * sum2 / tweight;
    if (denom1 <= 0) return 1; /* include '<' to deal with roundoff errors */
    if (denom2 <= 0) return 1; /* include '<' to deal with roundoff errors */
    result = fabs(result) / sqrt(denom1*denom2);
    result = 1. - result;
    return result;
}

/* ********************************************************************* */

static double
ucorrelation(int n, double** data1, double** data2, int** mask1, int** mask2,
             const double weight[], int index1, int index2, int transpose)

src/cluster.c  view on Meta::CPAN

    double result = 0.;
    double denom1 = 0.;
    double denom2 = 0.;
    double sum1 = 0.;
    double sum2 = 0.;
    double totalweight = 0.;
    double* tdata1;
    double* tdata2;

    tdata1 = malloc(n*sizeof(double));
    if (!tdata1) return 0.0; /* Memory allocation error */
    tdata2 = malloc(n*sizeof(double));
    if (!tdata2) /* Memory allocation error */ {
        free(tdata1);
        return 0.0;
    }
    if (transpose == 0) {
        for (i = 0; i < n; i++) {
            if (mask1[index1][i] && mask2[index2][i]) {
                tdata1[m] = data1[index1][i];
                tdata2[m] = data2[index2][i];
                m++;
            }

src/cluster.c  view on Meta::CPAN

    }
    if (m == 0) {
        free(tdata1);
        free(tdata2);
        return 0;
    }
    rank1 = getrank(m, tdata1, weight);
    free(tdata1);
    if (!rank1) {
        free(tdata2);
        return 0.0; /* Memory allocation error */
    }
    rank2 = getrank(m, tdata2, weight);
    free(tdata2);
    if (!rank2) /* Memory allocation error */ {
        free(rank1);
        return 0.0;
    }
    for (i = 0; i < m; i++) {
        const double term1 = rank1[i];
        const double term2 = rank2[i];
        const double w = weight[i];
        sum1 += term1 * w;
        sum2 += term2 * w;
        result += term1 * term2 * w;

src/cluster.c  view on Meta::CPAN

    /* Note: denom1 and denom2 cannot be calculated directly from the number
     * of elements. If two elements have the same rank, the squared sum of
     * their ranks will change.
     */
    free(rank1);
    free(rank2);
    if (!totalweight) return 0; /* usually due to empty clusters */
    result -= sum1 * sum2 / totalweight;
    denom1 -= sum1 * sum1 / totalweight;
    denom2 -= sum2 * sum2 / totalweight;
    if (denom1 <= 0) return 1; /* include '<' to deal with roundoff errors */
    if (denom2 <= 0) return 1; /* include '<' to deal with roundoff errors */
    result = result / sqrt(denom1*denom2);
    result = 1. - result;
    return result;
}

/* *********************************************************************    */

static double
kendall(int n, double** data1, double** data2, int** mask1, int** mask2,
        const double weight[], int index1, int index2, int transpose)

src/cluster.c  view on Meta::CPAN

method     (input) char
For method == 'a', the centroid is defined as the mean over all elements
belonging to a cluster for each dimension.
For method == 'm', the centroid is defined as the median over all elements
belonging to a cluster for each dimension.

Return value
============

The function returns an integer to indicate success or failure. If a
memory error occurs, or if method is not 'm' or 'a', getclustercentroids
returns 0. If successful, getclustercentroids returns 1.
========================================================================
*/
{
    switch(method) {
        case 'm': {
            const int nelements = (transpose == 0) ? nrows : ncolumns;
            double* cache = malloc(nelements*sizeof(double));
            if (!cache) return 0;
            getclustermedians(nclusters, nrows, ncolumns, data, mask,

src/cluster.c  view on Meta::CPAN

            return 1;
        }
    }
    return 0;
}

/* ********************************************************************* */

void
getclustermedoids(int nclusters, int nelements, double** distance,
    int clusterid[], int centroids[], double errors[])
/*
Purpose
=======

The getclustermedoids routine calculates the cluster centroids, given to which
cluster each element belongs. The centroid is defined as the element with the
smallest sum of distances to the other elements.

Arguments
=========

src/cluster.c  view on Meta::CPAN

The distance matrix. To save space, the distance matrix is given in the
form of a ragged array. The distance matrix is symmetric and has zeros
on the diagonal. See distancematrix for a description of the content.

clusterid    (output) int[nelements]
The cluster number to which each element belongs.

centroid     (output) int[nclusters]
The index of the element that functions as the centroid for each cluster.

errors       (output) double[nclusters]
The within-cluster sum of distances between the items and the cluster
centroid.

========================================================================
*/
{
    int i, j, k;

    for (j = 0; j < nclusters; j++) errors[j] = DBL_MAX;
    for (i = 0; i < nelements; i++) {
        double d = 0.0;
        j = clusterid[i];
        for (k = 0; k < nelements; k++) {
            if (i == k || clusterid[k]!=j) continue;
            d += (i < k ? distance[k][i] : distance[i][k]);
            if (d > errors[j]) break;
        }
        if (d < errors[j]) {
            errors[j] = d;
            centroids[j] = i;
        }
    }
}

/* ********************************************************************* */

static int
kmeans(int nclusters, int nrows, int ncolumns, double** data, int** mask,
    double weight[], int transpose, int npass, char dist,
    double** cdata, int** cmask, int clusterid[], double* error,
    int tclusterid[], int counts[], int mapping[])
{
    int i, j, k;
    const int nelements = (transpose == 0) ? nrows : ncolumns;
    const int ndata = (transpose == 0) ? ncolumns : nrows;
    int ifound = 1;
    int ipass = 0;
    /* Set the metric function as indicated by dist */
    double (*metric) (int, double**, double**, int**, int**,
                      const double[], int, int, int) = setmetric(dist);

    /* Save the clustering solution periodically and check if it reappears */
    int* saved = malloc(nelements*sizeof(int));
    if (saved == NULL) return -1;

    *error = DBL_MAX;

    do {
        double total = DBL_MAX;
        int counter = 0;
        int period = 10;

        /* Perform the EM algorithm.
         * First, randomly assign elements to clusters. */
        if (npass != 0) randomassign(nclusters, nelements, tclusterid);

src/cluster.c  view on Meta::CPAN

            if (total >= previous) break;
            /* total >= previous is FALSE on some machines even if total and
             * previous are bitwise identical. */
            for (i = 0; i < nelements; i++)
                if (saved[i]!=tclusterid[i]) break;
            if (i == nelements)
                break; /* Identical solution found; break out of this loop */
        }

        if (npass <= 1) {
            *error = total;
            break;
        }

        for (i = 0; i < nclusters; i++) mapping[i] = -1;
        for (i = 0; i < nelements; i++) {
            j = tclusterid[i];
            k = clusterid[i];
            if (mapping[k] == -1) mapping[k] = j;
            else if (mapping[k] != j) {
                if (total < *error) {
                    ifound = 1;
                    *error = total;
                    for (j = 0; j < nelements; j++)
                        clusterid[j] = tclusterid[j];
                }
                break;
            }
        }
        if (i == nelements) ifound++; /* break statement not encountered */
    } while (++ipass < npass);

    free(saved);
    return ifound;
}

/* ---------------------------------------------------------------------- */

static int
kmedians(int nclusters, int nrows, int ncolumns, double** data, int** mask,
    double weight[], int transpose, int npass, char dist,
    double** cdata, int** cmask, int clusterid[], double* error,
    int tclusterid[], int counts[], int mapping[], double cache[])
{
    int i, j, k;
    const int nelements = (transpose == 0) ? nrows : ncolumns;
    const int ndata = (transpose == 0) ? ncolumns : nrows;
    int ifound = 1;
    int ipass = 0;
    int* saved;
    /* Set the metric function as indicated by dist */
    double (*metric) (int, double**, double**, int**, int**,
                      const double[], int, int, int) = setmetric(dist);

    /* Save the clustering solution periodically and check if it reappears */
    saved = malloc(nelements*sizeof(int));
    if (saved == NULL) return -1;

    *error = DBL_MAX;

    do {
        double total = DBL_MAX;
        int counter = 0;
        int period = 10;

        /* Perform the EM algorithm.
         * First, randomly assign elements to clusters. */
        if (npass != 0) randomassign(nclusters, nelements, tclusterid);

src/cluster.c  view on Meta::CPAN

            if (total >= previous) break;
            /* total >= previous is FALSE on some machines even if total and
             * previous are bitwise identical. */
            for (i = 0; i < nelements; i++)
                if (saved[i]!=tclusterid[i]) break;
            if (i == nelements)
                break; /* Identical solution found; break out of this loop */
        }

        if (npass <= 1) {
            *error = total;
            break;
        }

        for (i = 0; i < nclusters; i++) mapping[i] = -1;
        for (i = 0; i < nelements; i++) {
            j = tclusterid[i];
            k = clusterid[i];
            if (mapping[k] == -1) mapping[k] = j;
            else if (mapping[k] != j) {
                if (total < *error) {
                    ifound = 1;
                    *error = total;
                    for (j = 0; j < nelements; j++)
                        clusterid[j] = tclusterid[j];
                }
                break;
            }
        }
        if (i == nelements) ifound++; /* break statement not encountered */
    } while (++ipass < npass);

    free(saved);
    return ifound;
}

/* ********************************************************************* */

void
kcluster(int nclusters, int nrows, int ncolumns, double** data, int** mask,
    double weight[], int transpose, int npass, char method, char dist,
    int clusterid[], double* error, int* ifound)
/*
Purpose
=======

The kcluster routine performs k-means or k-median clustering on a given set of
elements, using the specified distance measure. The number of clusters is given
by the user. Multiple passes are being made to find the optimal clustering
solution, each time starting from a different initial clustering.


src/cluster.c  view on Meta::CPAN

dist == 'k': Kendall's tau
For other values of dist, the default (Euclidean distance) is used.

clusterid  (output; input) int[nrows] if transpose == 0
                           int[ncolumns] otherwise
The cluster number to which a gene or microarray was assigned. If npass == 0,
then on input clusterid contains the initial clustering assignment from which
the clustering algorithm starts. On output, it contains the clustering solution
that was found.

error      (output) double*
The sum of distances to the cluster center of each item in the optimal k-means
clustering solution that was found.

ifound     (output) int*
The number of times the optimal clustering solution was
found. The value of ifound is at least 1; its maximum value is npass. If the
number of clusters is larger than the number of elements being clustered,
*ifound is set to 0 as an error code. If a memory allocation error occurs,
*ifound is set to -1.

========================================================================
*/
{
    const int nelements = (transpose == 0) ? nrows : ncolumns;
    const int ndata = (transpose == 0) ? ncolumns : nrows;

    int i;
    int ok;

src/cluster.c  view on Meta::CPAN

            free(mapping);
        }
        return;
    }

    if (method == 'm') {
        double* cache = malloc(nelements*sizeof(double));
        if (cache) {
            *ifound = kmedians(nclusters, nrows, ncolumns, data, mask, weight,
                               transpose, npass, dist, cdata, cmask, clusterid,
                               error, tclusterid, counts, mapping, cache);
            free(cache);
        }
    }
    else
        *ifound = kmeans(nclusters, nrows, ncolumns, data, mask, weight,
                         transpose, npass, dist, cdata, cmask, clusterid,
                         error, tclusterid, counts, mapping);

    /* Deallocate temporarily used space */
    if (npass > 1) {
        free(mapping);
        free(tclusterid);
    }

    if (transpose == 0) freedatamask(nclusters, cdata, cmask);
    else freedatamask(ndata, cdata, cmask);

    free(counts);
}

/* *********************************************************************** */

void
kmedoids(int nclusters, int nelements, double** distmatrix, int npass,
    int clusterid[], double* error, int* ifound)
/*
Purpose
=======

The kmedoids routine performs k-medoids clustering on a given set of elements,
using the distance matrix and the number of clusters passed by the user.
Multiple passes are being made to find the optimal clustering solution, each
time starting from a different initial clustering.


src/cluster.c  view on Meta::CPAN

clusterid  (output; input) int[nelements]
On input, if npass == 0, then clusterid contains the initial clustering
assignment from which the clustering algorithm starts; all numbers in clusterid
should be between zero and nelements-1 inclusive. If npass != 0, clusterid is
ignored on input.
On output, clusterid contains the clustering solution that was found: clusterid
contains the number of the cluster to which each item was assigned. On output,
the number of a cluster is defined as the item number of the centroid of the
cluster.

error      (output) double
The sum of distances to the cluster center of each item in the optimal
k-medoids clustering solution that was found.

ifound     (output) int
If kmedoids is successful: the number of times the optimal clustering solution
was found. The value of ifound is at least 1; its maximum value is npass.
If the user requested more clusters than elements available, ifound is set
to 0. If kmedoids fails due to a memory allocation error, ifound is set to -1.

========================================================================
*/
{
    int i, j, icluster;
    int* tclusterid;
    int* saved;
    int* centroids;
    double* errors;
    int ipass = 0;

    if (nelements < nclusters) {
        *ifound = 0;
        return;
    } /* More clusters asked for than elements available */

    *ifound = -1;

    /* Save the clustering solution periodically and check if it reappears */
    saved = malloc(nelements*sizeof(int));
    if (saved == NULL) return;

    centroids = malloc(nclusters*sizeof(int));
    if (!centroids) {
        free(saved);
        return;
    }

    errors = malloc(nclusters*sizeof(double));
    if (!errors) {
        free(saved);
        free(centroids);
        return;
    }

    /* Find out if the user specified an initial clustering */
    if (npass <= 1) tclusterid = clusterid;
    else {
        tclusterid = malloc(nelements*sizeof(int));
        if (!tclusterid) {
            free(saved);
            free(centroids);
            free(errors);
            return;
        }
        for (i = 0; i < nelements; i++) clusterid[i] = -1;
    }

    *error = DBL_MAX;
    do /* Start the loop */ {
        double total = DBL_MAX;
        int counter = 0;
        int period = 10;

        if (npass != 0) randomassign(nclusters, nelements, tclusterid);
        while (1) {
            double previous = total;
            total = 0.0;

            if (counter % period == 0) {
                /* Save the current cluster assignments */
                for (i = 0; i < nelements; i++) saved[i] = tclusterid[i];
                if (period < INT_MAX / 2) period *= 2;
            }
            counter++;

            /* Find the center */
            getclustermedoids(nclusters, nelements, distmatrix, tclusterid,
                              centroids, errors);

            for (i = 0; i < nelements; i++) {
                /* Find the closest cluster */
                double distance = DBL_MAX;
                for (icluster = 0; icluster < nclusters; icluster++) {
                    double tdistance;
                    j = centroids[icluster];
                    if (i == j) {
                        distance = 0.0;
                        tclusterid[i] = icluster;

src/cluster.c  view on Meta::CPAN

            /* total >= previous is FALSE on some machines even if total and
             * previous are bitwise identical. */
            for (i = 0; i < nelements; i++)
                if (saved[i] != tclusterid[i]) break;
            if (i == nelements)
                break; /* Identical solution found; break out of this loop */
        }

        if (npass <= 1) {
            *ifound = 1;
            *error = total;
            /* Replace by the centroid in each cluster. */
            for (j = 0; j < nelements; j++) {
                clusterid[j] = centroids[tclusterid[j]];
            }
            break;
        }

        for (i = 0; i < nelements; i++) {
            if (clusterid[i]!=centroids[tclusterid[i]]) {
                if (total < *error) {
                    *ifound = 1;
                    *error = total;
                    /* Replace by the centroid in each cluster. */
                    for (j = 0; j < nelements; j++) {
                        clusterid[j] = centroids[tclusterid[j]];
                    }
                }
                break;
            }
        }
        if (i == nelements) (*ifound)++; /* break statement not encountered */
    } while (++ipass < npass);

    /* Deallocate temporarily used space */
    if (npass > 1) free(tclusterid);

    free(saved);
    free(centroids);
    free(errors);
}

/* ******************************************************************** */

void
distancematrix(int nrows, int ncolumns, double** data, int** mask,
    double weights[], char dist, int transpose, double** matrix)
/*
Purpose
=======

src/cluster.c  view on Meta::CPAN


clusterid    (output) int[nelements]
The number of the cluster to which each element was assigned. Clusters are
numbered 0..nclusters-1 in the left-to-right order in which they appear in the
hierarchical clustering tree. Space for the clusterid array should be allocated
before calling the cuttree routine.

Return value
============

If no errors occur, cuttree returns 1.
If a memory error occurs, cuttree returns 0.

========================================================================
*/
{
    int i = -nelements+1; /* top node */
    int j;
    int k = -1;
    int previous = nelements;
    const int n = nelements-nclusters; /* number of nodes to join */
    int* parents;

src/cluster.c  view on Meta::CPAN

does not deallocate it.

Return value
============

A pointer to a newly allocated array of Node structs, describing the
hierarchical clustering solution consisting of nelements-1 nodes. Depending
on whether genes (rows) or samples (columns) were clustered, nelements is
equal to nrows or ncolumns. See src/cluster.h for a description of the Node
structure.
If a memory error occurs, pclcluster returns NULL.
========================================================================
*/
{
    int i, j;
    const int nelements = (transpose == 0) ? nrows : ncolumns;
    int inode;
    const int ndata = transpose ? nrows : ncolumns;
    const int nnodes = nelements - 1;
    Node* result;
    double** newdata;

src/cluster.c  view on Meta::CPAN



Return value
============

A pointer to a newly allocated array of Node structs, describing the
hierarchical clustering solution consisting of nelements-1 nodes. Depending
on whether genes (rows) or samples (columns) were clustered, nelements is
equal to nrows or ncolumns. See src/cluster.h for a description of the Node
structure.
If a memory error occurs, pslcluster returns NULL.

========================================================================
*/
{
    int i, j, k;
    const int nelements = transpose ? ncolumns : nrows;
    const int nnodes = nelements - 1;
    int* vector;
    double* temp;
    int* index;

src/cluster.c  view on Meta::CPAN

zero. The distance matrix will be modified by this routine.

Return value
============

A pointer to a newly allocated array of Node structs, describing the
hierarchical clustering solution consisting of nelements-1 nodes. Depending on
whether genes (rows) or samples (columns) were clustered, nelements is equal
to nrows or ncolumns. See src/cluster.h for a description of the Node
structure.
If a memory error occurs, pmlcluster returns NULL.
========================================================================
*/
{
    int j;
    int n;
    int* clusterid;
    Node* result;

    clusterid = malloc(nelements*sizeof(int));
    if (!clusterid) return NULL;

src/cluster.c  view on Meta::CPAN

zero. The distance matrix will be modified by this routine.

Return value
============

A pointer to a newly allocated array of Node structs, describing the
hierarchical clustering solution consisting of nelements-1 nodes. Depending on
whether genes (rows) or samples (columns) were clustered, nelements is equal
to nrows or ncolumns. See src/cluster.h for a description of the Node
structure.
If a memory error occurs, palcluster returns NULL.
========================================================================
*/
{
    int j;
    int n;
    int* clusterid;
    int* number;
    Node* result;

    clusterid = malloc(nelements*sizeof(int));

src/cluster.c  view on Meta::CPAN

    double weight[], int transpose, char dist, char method,
    double** distmatrix)
/*
Purpose
=======

The treecluster routine performs hierarchical clustering using pairwise
single-, maximum-, centroid-, or average-linkage, as defined by method, on a
given set of gene expression data, using the distance metric given by dist.
If successful, the function returns a pointer to a newly allocated Tree struct
containing the hierarchical clustering solution, and NULL if a memory error
occurs. The pointer should be freed by the calling routine to prevent memory
leaks.

Arguments
=========

nrows      (input) int
The number of rows in the data matrix, equal to the number of genes.

ncolumns   (input) int

src/cluster.c  view on Meta::CPAN

treecluster.

Return value
============

A pointer to a newly allocated array of Node structs, describing the
hierarchical clustering solution consisting of nelements-1 nodes. Depending on
whether genes (rows) or samples (columns) were clustered, nelements is equal
to nrows or ncolumns. See src/cluster.h for a description of the Node
structure.
If a memory error occurs, treecluster returns NULL.

========================================================================
*/
{
    Node* result = NULL;
    const int nelements = (transpose == 0) ? nrows : ncolumns;
    const int ldistmatrix = (distmatrix == NULL && method != 's') ? 1 : 0;

    if (nelements < 2) return NULL;

src/cluster.c  view on Meta::CPAN

order   (input) double[nnodes+1]
The preferred order of the items.

indices (output) int*
The indices of each item after sorting, with item i appearing at indices[i]
after sorting.

Return value
============

If no errors occur, sorttree returns 1.
If a memory error occurs, sorttree returns 0.

========================================================================
*/

{
    int i;
    int index;
    int i1, i2;
    double order1, order2;
    int counts1, counts2;

src/cluster.h  view on Meta::CPAN

  double weight[], int n1, int n2, int index1[], int index2[], char dist,
  char method, int transpose);
void distancematrix(int ngenes, int ndata, double** data, int** mask,
  double* weight, char dist, int transpose, double** distances);

/* Chapter 3 */
int getclustercentroids(int nclusters, int nrows, int ncolumns,
  double** data, int** mask, int clusterid[], double** cdata, int** cmask,
  int transpose, char method);
void getclustermedoids(int nclusters, int nelements, double** distance,
  int clusterid[], int centroids[], double errors[]);
void kcluster(int nclusters, int ngenes, int ndata, double** data,
  int** mask, double weight[], int transpose, int npass, char method, char dist,
  int clusterid[], double* error, int* ifound);
void kmedoids(int nclusters, int nelements, double** distance,
  int npass, int clusterid[], double* error, int* ifound);

/* Chapter 4 */
typedef struct {int left; int right; double distance;} Node;
/*
 * A Node struct describes a single node in a tree created by hierarchical
 * clustering. The tree can be represented by an array of n Node structs,
 * where n is the number of elements minus one. The integers left and right
 * in each Node struct refer to the two elements or subnodes that are joined
 * in this node. The original elements are numbered 0..nelements-1, and the
 * nodes -1..-(nelements-1). For each node, distance contains the distance



( run in 0.685 second using v1.01-cache-2.11-cpan-65fba6d93b7 )