Algorithm-Cluster

 view release on metacpan or  search on metacpan

perl/Cluster.xs  view on Meta::CPAN

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

/* The Perl include files perl.h redefines malloc and free. Here, we need the
 * usual malloc and free, defined in stdlib.h. So we undefine the ones in
 * perl.h.
 */

#ifdef malloc
#undef malloc
#endif
#ifdef free
#undef free
#endif

#include <stdlib.h>

#include "../src/cluster.h"


typedef struct {Node* nodes; int n;} Tree;

/* -------------------------------------------------
 * Using the warnings registry, check to see if warnings
 * are enabled for the Algorithm::Cluster module.
 */
static int
warnings_enabled(pTHX) {

    dSP;

    I32 count;
    bool isEnabled; 
    SV * mysv;

    ENTER ;
    SAVETMPS;
    PUSHMARK(SP) ;
    XPUSHs(sv_2mortal(newSVpv("Algorithm::Cluster",18)));
    PUTBACK ;

    count = perl_call_pv("warnings::enabled", G_SCALAR) ;

    if (count != 1) croak("No arguments returned from call_pv()\n") ;

    mysv = POPs; 
    isEnabled = (bool) SvTRUE(mysv); 

    PUTBACK ;
    FREETMPS ;
    LEAVE ;

    return isEnabled;
}

/* -------------------------------------------------
 * Create a row of doubles, initialized to a value
 */
static double*
malloc_row_dbl(pTHX_ int ncols, double val) {

    int j;
    double * row;

    row = malloc(ncols * sizeof(double) );
    if (!row) {
        return NULL;
    }

    for (j = 0; j < ncols; j++) { 
        row[j] = val;
    }
    return row;
}

/* -------------------------------------------------
 * Only coerce to a double if we already know it's 
 * an integer or double, or a string which is actually numeric.
 * Don't blindly run the macro SvNV, because that will coerce 
 * a non-numeric string to be a double of value 0.0, 
 * and we do not want that to happen, because if we test it again, 
 * it will then appear to be a valid double value. 
 */
static int
extract_double_from_scalar(pTHX_ SV * mysv, double * number) {

    if (SvPOKp(mysv) && SvLEN(mysv)) {  

        /* This function is not in the public perl API */
        if (Perl_looks_like_number(aTHX_ mysv)) {
            *number = SvNV( mysv );
            return 1;
        } else {
            return 0;
        } 
    } else if (SvNIOK(mysv)) {  
        *number = SvNV( mysv );
        return 1;
    } else {
        return 0;
    }
}



/* -------------------------------------------------
 * Convert a Perl 2D matrix into a 2D matrix of C doubles.
 * If no data are masked, mask can be passed as NULL.
 * NOTE: on errors this function returns a value greater than zero.

perl/Cluster.xs  view on Meta::CPAN

        AV* row_av  = (AV *) SvRV(row_ref);
        matrix[i] = malloc(i * sizeof(double));
        if (!matrix[i]) {
            break;
        }
        /* Loop once for each cell in the row. */
        for (j=0; j < i; j++) { 
            double num;
            SV* cell = *(av_fetch(row_av, (I32) j, 0)); 
            if(extract_double_from_scalar(aTHX_ cell,&num) > 0) {    
                matrix[i][j] = num;
            } else {
                if(warnings_enabled(aTHX))
                    Perl_warn(aTHX_ 
                        "Row %d col %d: Value is not "
                                                "a number.\n", i, j);
                break;
            }
        }
    }

    if (i < nobjects) {
        nobjects = i+1;
        for (i = 1; i < nobjects; i++) free(matrix[i]);
        free(matrix);
        matrix = NULL;
    }

    return matrix;
}

/******************************************************************************/
/**                                                                          **/
/** XS code begins here                                                      **/
/**                                                                          **/
/******************************************************************************/
/******************************************************************************/

MODULE = Algorithm::Cluster PACKAGE = Algorithm::Cluster::Node
PROTOTYPES: ENABLE

SV*
new (class, left, right, distance)
    char* class
    int left
    int right
    double distance
    PREINIT:
    Node* node;
    SV* obj;
    CODE:
    node = malloc(sizeof(Node));
    RETVAL = newSViv(0);
    obj = newSVrv(RETVAL, class);
    node->left = left;
    node->right = right;
    node->distance = distance;

    sv_setiv(obj, PTR2IV(node));
    SvREADONLY_on(obj);
    OUTPUT:
    RETVAL


int
left (obj)
    SV* obj
    CODE:
    RETVAL = (INT2PTR(Node*,SvIV(SvRV(obj))))->left;
    OUTPUT:
    RETVAL

int
right (obj)
    SV* obj
    CODE:
    RETVAL = (INT2PTR(Node*,SvIV(SvRV(obj))))->right;
    OUTPUT:
    RETVAL

double
distance (obj)
    SV* obj
    CODE:
    RETVAL = (INT2PTR(Node*,SvIV(SvRV(obj))))->distance;
    OUTPUT:
    RETVAL

void
set_left (obj, left)
    SV* obj
    int left
    PREINIT:
    Node* node;
    CODE:
    if (!sv_isa(obj, "Algorithm::Cluster::Node")) {
        croak("set_left should be applied to an Algorithm::Cluster::Node object");
    }
    node = INT2PTR(Node*,SvIV(SvRV(obj)));
    node->left = left;

void
set_right (obj, right)
    SV* obj
    int right
    PREINIT:
    Node* node;
    CODE:
    if (!sv_isa(obj, "Algorithm::Cluster::Node")) {
        croak("set_right should be applied to an Algorithm::Cluster::Node object");
    }
    node = INT2PTR(Node*,SvIV(SvRV(obj)));
    node->right = right;

void
set_distance (obj, distance)
    SV* obj
    double distance
    PREINIT:
    Node* node;
    CODE:
    if (!sv_isa(obj, "Algorithm::Cluster::Node")) {
        croak("set_distance should be applied to an Algorithm::Cluster::Node object");
    }
    node = INT2PTR(Node*,SvIV(SvRV(obj)));
    node->distance = distance;

void DESTROY (obj)
    SV* obj
    PREINIT:
    I32* temp;
    Node* node;
    PPCODE:
    temp = PL_markstack_ptr++;
    node = INT2PTR(Node*, SvIV(SvRV(obj)));
    free(node);
    if (PL_markstack_ptr != temp) {
        /* truly void, because dXSARGS not invoked */
        PL_markstack_ptr = temp;
        XSRETURN_EMPTY;
        /* return empty stack */
    }  /* must have used dXSARGS; list context implied */
    return;  /* assume stack size is correct */


MODULE = Algorithm::Cluster PACKAGE = Algorithm::Cluster::Tree

perl/Cluster.xs  view on Meta::CPAN

    }
    if (! tree || !tree->nodes) {
        if (tree) free(tree);
        croak("Algorithm::Cluster::Tree::new memory error\n");
    }

    for (i = 0; i < n; i++) {
        Node* node;
        SV* node_ref = *(av_fetch(array, (I32) i, 0)); 
        if (!sv_isa(node_ref, "Algorithm::Cluster::Node")) break;
        node = INT2PTR(Node*,SvIV(SvRV(node_ref)));
        tree->nodes[i].left = node->left;
        tree->nodes[i].right = node->right;
        tree->nodes[i].distance = node->distance;
    }

    if (i < n) {
        /* break encountered */
        free(tree->nodes);
        free(tree);
        croak("Algorithm::Cluster::Tree::new expects an array of nodes\n");
    }

    flag = malloc((2*n+1)*sizeof(int));
    if(flag) {
         int j;
        for (i = 0; i < 2*n+1; i++) flag[i] = 0;
        for (i = 0; i < n; i++) {
            j = tree->nodes[i].left;
            if (j < 0) {
                j = -j-1;
                if (j>=i) break;
            }
            else j+=n;
            if (flag[j]) break;
            flag[j] = 1;
            j = tree->nodes[i].right;
            if (j < 0) {
                j = -j-1;
                if (j>=i) break;
            }
            else j+=n;
            if (flag[j]) break;
            flag[j] = 1;
        }
        free(flag);
    }

    if (!flag || i < n) {
        /* break encountered */
        free(tree->nodes);
        free(tree);
        croak("the array of nodes passed to Algorithm::Cluster::Tree::new does not represent a valid tree\n");
    }

    RETVAL = newSViv(0);
    obj = newSVrv(RETVAL, class);
    sv_setiv(obj, PTR2IV(tree));
    SvREADONLY_on(obj);

    OUTPUT:
    RETVAL

int
length (obj)
    SV* obj
    CODE:
    RETVAL = (INT2PTR(Tree*,SvIV(SvRV(obj))))->n;
    OUTPUT:
    RETVAL


SV *
get (obj, index)
    SV* obj
    int index
    PREINIT:
    Tree* tree;
    Node* node;
    SV* scalar;
    CODE:
    tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
    if (index < 0 || index >= tree->n) {
        croak("Index out of bounds in Algorithm::Cluster::Tree::get\n");
    }
    RETVAL = newSViv(0);
    scalar = newSVrv(RETVAL, "Algorithm::Cluster::Node");
    node = malloc(sizeof(Node));
    if (!node) {
        croak("Memory allocation failure in Algorithm::Cluster::Tree::get\n");
    }
    node->left = tree->nodes[index].left;
    node->right = tree->nodes[index].right;
    node->distance = tree->nodes[index].distance;
    sv_setiv(scalar, PTR2IV(node));
    SvREADONLY_on(scalar);
    OUTPUT:
    RETVAL

void
scale(obj)
    SV* obj
    PREINIT:
    int i;
    int n;
    Tree* tree;
    Node* nodes;
    double maximum;
    CODE:
    if (!sv_isa(obj, "Algorithm::Cluster::Tree")) {
        croak("scale can only be applied to an Algorithm::Cluster::Tree object");
    }
    tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
    n = tree->n;
    nodes = tree->nodes;
    maximum = DBL_MIN;
    for (i = 0; i < n; i++) {
        double distance = nodes[i].distance;
        if (distance > maximum) maximum = distance;
    }
    if (maximum!=0.0) {
        for (i = 0; i < n; i++) nodes[i].distance /= maximum;
    }


void
sort(obj, order = NULL)
    SV* obj
    SV* order 

    PREINIT:
    int i;
    int n;
    Tree* tree;
    int* indices;
    double* values = NULL;
    int ok;

    PPCODE:
    if (!sv_isa(obj, "Algorithm::Cluster::Tree")) {
        croak("sort can only be applied to an Algorithm::Cluster::Tree object");
    }
    tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
    if (order) {
        if(!SvROK(order) || SvTYPE(SvRV(order)) != SVt_PVAV) { 
            croak("Algorithm::Cluster::Tree::sort expects an order array\n");
        }
        values = malloc_row_perl2c_dbl(aTHX_ order, &n);
        if (!values) {
            croak("Algorithm::Cluster::Tree::sort memory error\n");
        }
        if (n != tree->n + 1) {
            free(values);
            croak("sort: size of order array is inconsistent with tree size\n");
        }
    }
    else {

perl/Cluster.xs  view on Meta::CPAN

    int i;
    int n;
    Tree* tree;
    int* clusterid;
    PPCODE:
    if (!sv_isa(obj, "Algorithm::Cluster::Tree")) {
        croak("cut can only be applied to an Algorithm::Cluster::Tree object\n");
    }
    tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
    n = tree->n + 1;
    if (nclusters < 0) {
        croak("cut: Requested number of clusters should be positive\n");
    }
    if (nclusters > n) {
        croak("cut: More clusters requested than items available\n");
    }
    if (nclusters == 0) {
        nclusters = n;
    }
    clusterid = malloc(n*sizeof(int));
    if (!clusterid) {
        croak("cut: Insufficient memory\n");
    }
    ok = cuttree(n, tree->nodes, nclusters, clusterid);
    if (!ok) {
        free(clusterid);
        croak("cut: Error in the cuttree routine\n");
    }
    for(i=0; i<n; i++) XPUSHs(sv_2mortal(newSVnv(clusterid[i])));
    free(clusterid);


void DESTROY (obj)
    SV* obj
    PREINIT:
    I32* temp;
    Tree* tree;
    PPCODE:
    temp = PL_markstack_ptr++;
    tree = INT2PTR(Tree*, SvIV(SvRV(obj)));
    free(tree->nodes);
    free(tree);
    if (PL_markstack_ptr != temp) {
        /* truly void, because dXSARGS not invoked */
        PL_markstack_ptr = temp;
        XSRETURN_EMPTY;
        /* return empty stack */
    }  /* must have used dXSARGS; list context implied */
    return;  /* assume stack size is correct */


MODULE = Algorithm::Cluster    PACKAGE = Algorithm::Cluster
PROTOTYPES: ENABLE


SV *
_version()
    CODE:
    RETVAL = newSVpv( CLUSTERVERSION , 0);

    OUTPUT:
    RETVAL



SV *
_mean(input)
    SV * input;

    PREINIT:
    int array_length;
    double * data;  /* one-dimensional array of doubles */

    CODE:
    if(SvTYPE(SvRV(input)) != SVt_PVAV) { 
        XSRETURN_UNDEF;
    }

    data = malloc_row_perl2c_dbl (aTHX_ input, &array_length);
    if (data) {
        RETVAL = newSVnv( mean(array_length, data) );
        free(data);
    } else {
        croak("memory allocation failure in _mean\n");
    }

    OUTPUT:
    RETVAL


SV *
_median(input)
    SV * input;

    PREINIT:
    int array_length;
    double * data;  /* one-dimensional array of doubles */

    CODE:
    if(SvTYPE(SvRV(input)) != SVt_PVAV) { 
        XSRETURN_UNDEF;
    }

    data = malloc_row_perl2c_dbl (aTHX_ input, &array_length);
    if (data) {
        RETVAL = newSVnv( median(array_length, data) );
        free(data);
    } else {
        croak("memory allocation failure in _median\n");
    }

    OUTPUT:
    RETVAL


SV *
_treecluster(nrows,ncols,data_ref,mask_ref,weight_ref,transpose,dist,method)
    int      nrows;
    int      ncols;
    SV *     data_ref;
    SV *     mask_ref;
    SV *     weight_ref;
    int      transpose;
    char *   dist;
    char *   method;

    PREINIT:
    Node*    nodes;

    double  * weight = NULL;
    double ** matrix = NULL;
    int    ** mask   = NULL;
    double ** distancematrix = NULL;
    const int ndata = transpose ? nrows : ncols;
    const int nelements = transpose ? ncols : nrows;

    CODE:
    /* ------------------------
     * Don't check the parameters, because we rely on the Perl
     * caller to check most paramters.
     */
    /* ------------------------
     * Convert data and mask matrices and the weight array
     * from C to Perl.  Also check for errors, and ignore the
     * mask or the weight array if there are any errors. 
     */
    if (is_distance_matrix(aTHX_ data_ref)) {
        distancematrix = parse_distance(aTHX_ data_ref, nelements);
        if (!distancematrix) {
                croak("memory allocation failure in _treecluster\n");
        }
    } else {
        int ok;
        ok = malloc_matrices(aTHX_ weight_ref, &weight, ndata, 
                    data_ref,   &matrix,
                    mask_ref,   &mask,  
                    nrows,      ncols);
        if (!ok) {
            croak("failed to read input data for _treecluster\n");
        }
    }

    /* ------------------------
     * Run the library function
     */
    nodes = treecluster(nrows, ncols, matrix, mask, weight, transpose,
                dist[0], method[0], distancematrix);

    /* ------------------------
     * Check result to make sure we didn't run into memory problems
     */
    if(!nodes) {
        /* treecluster failed due to insufficient memory */
        if (matrix) {
            free_matrix_int(mask,     nrows);
            free_matrix_dbl(matrix,   nrows);
            free(weight);
        } else {
            free_ragged_matrix_dbl(distancematrix, nelements);
        }
        croak("memory allocation failure in treecluster\n");
    }
    else {

        /* ------------------------
          * Convert generated C matrices to Perl matrices
          */
        const int n = nelements-1;
        int i;
        SV* obj;
        Tree* tree;
        RETVAL = newSViv(0);
        obj = newSVrv(RETVAL, "Algorithm::Cluster::Tree");
        tree = malloc(sizeof(Tree));
        if (!tree)
            croak("Memory allocation failure in Algorithm::Cluster::Tree\n");
        tree->n = n;
        tree->nodes = malloc(n*sizeof(Node));
        if (!tree->nodes) {
            free(tree);
            croak("Memory allocation failure in Algorithm::Cluster::Tree\n");
        }
        sv_setiv(obj, PTR2IV(tree));
        SvREADONLY_on(obj);
        for(i=0; i<n; i++) {
            tree->nodes[i].left = nodes[i].left;
            tree->nodes[i].right = nodes[i].right;
            tree->nodes[i].distance = nodes[i].distance;
        }
        free(nodes);
    }

    /* ------------------------
     * Free what we've malloc'ed 
     */
    if (matrix) {
        free_matrix_int(mask,     nrows);
        free_matrix_dbl(matrix,   nrows);
        free(weight);
    } else {
        free_ragged_matrix_dbl(distancematrix, nelements);
    }

    /* Finished _treecluster() */
    OUTPUT:
    RETVAL


void
_kcluster(nclusters,nrows,ncols,data_ref,mask_ref,weight_ref,transpose,npass,method,dist,initialid_ref)
    int      nclusters;
    int      nrows;
    int      ncols;
    SV *     data_ref;
    SV *     mask_ref;
    SV *     weight_ref;
    int      transpose;
    int      npass;
    char *   method;
    char *   dist;
    SV *     initialid_ref;

    PREINIT:
    SV  *    clusterid_ref;
    int *    clusterid;
    int      nobjects;
    int      ndata;
    double   error;
    int      ifound;
    int      ok;

    double  * weight;
    double ** matrix;
    int    ** mask;


    PPCODE:
    /* ------------------------
     * Don't check the parameters, because we rely on the Perl
     * caller to check most parameters.
     */

    /* ------------------------
     * Malloc space for the return values from the library function
     */
    if (transpose==0) {
        nobjects = nrows;
        ndata = ncols;
    } else {
        nobjects = ncols;
        ndata = nrows;
    }
    clusterid = malloc(nobjects * sizeof(int) );
    if (!clusterid) {
        croak("memory allocation failure in _kcluster\n");
    }

    /* ------------------------
     * Convert data and mask matrices and the weight array
     * from C to Perl.  Also check for errors, and ignore the
     * mask or the weight array if there are any errors. 
     */
    ok = malloc_matrices( aTHX_ weight_ref, &weight, ndata, 
                data_ref,   &matrix,
                mask_ref,   &mask,  

perl/Cluster.xs  view on Meta::CPAN

    int ok;

    CODE:

    /* ------------------------
     * Don't check the parameters, because we rely on the Perl
     * caller to check most paramters.
     */

    /* ------------------------
     * Convert cluster index Perl arrays to C arrays
     */
    cluster1 = malloc_row_perl2c_int(aTHX_ cluster1_ref);
    cluster2 = malloc_row_perl2c_int(aTHX_ cluster2_ref);
    if (!cluster1 || !cluster2) {
        if (cluster1) free(cluster1);
        if (cluster2) free(cluster2);
        croak("memory allocation failure in _clusterdistance\n");
    }

    /* ------------------------
     * Convert data and mask matrices and the weight array
     * from C to Perl.  Also check for errors, and ignore the
     * mask or the weight array if there are any errors. 
     * Set nweights to the correct number of weights.
     */
    nweights = (transpose==0) ? ncols : nrows;
    ok = malloc_matrices( aTHX_ weight_ref, &weight, nweights, 
                data_ref,   &matrix,
                mask_ref,   &mask,  
                nrows,      ncols);
    if (!ok) {
        free(cluster1);
        free(cluster2);
            croak("failed to read input data for _clusterdistance\n");
    }

    /* ------------------------
     * Run the library function
     */
    distance = clusterdistance( 
        nrows, ncols, 
        matrix, mask, weight,
        cluster1_len, cluster2_len, cluster1, cluster2,
        dist[0], method[0], transpose
    );

    RETVAL = distance;

    /* ------------------------
     * Free what we've malloc'ed 
     */
    free_matrix_int(mask,     nrows);
    free_matrix_dbl(matrix,   nrows);
    free(weight);
    free(cluster1);
    free(cluster2);

    /* Finished _clusterdistance() */

    OUTPUT:
    RETVAL



void
_clustercentroids(nclusters,nrows,ncols,data_ref,mask_ref,clusterid_ref,transpose,method)
    int      nclusters;
    int      nrows;
    int      ncols;
    SV *     data_ref;
    SV *     mask_ref;
    SV *     clusterid_ref;
    int      transpose;
    char *   method;

    PREINIT:
    SV  *    cdata_ref;
    SV  *    cmask_ref;
    int     * clusterid;

    double ** matrix;
    int    ** mask;

    double ** cdata;
    int    ** cmask;
    int       cnrows = 0; /* Initialize to make the compiler shut up */
    int       cncols = 0; /* Initialize to make the compiler shut up */

    int i;
    int ok;

    PPCODE:

    /* ------------------------
     * Don't check the parameters, because we rely on the Perl
     * caller to check most paramters.
     */
        if (transpose==0)
        {    cnrows = nclusters;
             cncols = ncols;
        }
        else if (transpose==1)
        {    cnrows = nrows;
             cncols = nclusters;
        }

    /* ------------------------
     * Convert cluster index Perl arrays to C arrays
     */
    clusterid = malloc_row_perl2c_int(aTHX_ clusterid_ref);
    if (!clusterid) {
        croak("memory allocation failure in _clustercentroids\n");
    }

    /* ------------------------
     * Convert data and mask matrices and the weight array
     * from C to Perl.  Also check for errors, and ignore the
     * mask or the weight array if there are any errors. 
     * Set nweights to the correct number of weights.
     */



( run in 0.701 second using v1.01-cache-2.11-cpan-e1769b4cff6 )