AI-MaxEntropy
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
AI-MaxEntropy.xs view on Meta::CPAN
/**************************************************************************
* XS of AI:MaxEntropy
* -> by Laye Suen
**************************************************************************/
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "ppport.h"
/* Macros for debugging */
/* uncomment the line below to enable tracing and timing */
/*#define __ENABLE_TRACING__*/
#ifdef __ENABLE_TRACING__
#include "time.h"
#define TRACE(msg) \
printf(_fn); printf(": "); printf(msg); \
printf(": %0.10f s\n", 1.0 * (clock() - _t) / CLOCKS_PER_SEC); \
fflush(stdout); _t = clock()
#define dTRACE(fn) clock_t _t = clock(); char* _fn = fn
#else
#define TRACE(msg)
#define dTRACE
#endif
/* Other macros */
#define hvref_fetch(hvref, key) \
hv_fetch((HV*)SvRV(hvref), key, strlen(key), 0)
#define hvref_exists(hvref, key) \
hv_exists((HV*)SvRV(hvref), key, strlen(key))
#define hvref_store(hvref, key, value) \
hv_store((HV*)SvRV(hvref), key, strlen(key), value, 0)
#define hvref_delete(hvref, key) \
hv_delete((HV*)SvRV(hvref), key, strlen(key), G_DISCARD)
/* internal structures */
struct samples_t {
int s_num;
int* x_len;
int** x;
int* y;
double* w;
};
struct f_map_t {
int y_num;
int** lambda_idx;
};
/**************************************************************************
* EXPORTED XSUBS
**************************************************************************/
MODULE = AI::MaxEntropy PACKAGE = AI::MaxEntropy
void
_neg_log_likelihood(lambda_in, step, self, OUTLIST SV* f, OUTLIST SV* g)
AV* lambda_in
SV* step
SV* self
PREINIT:
dTRACE("_neg_log_likelihood");
/* fetch the pre-cached samples and f_map */
SV* _c = *hvref_fetch(self, "_c");
struct samples_t* samples =
INT2PTR(struct samples_t*, SvIV(*hvref_fetch(_c, "samples")));
struct f_map_t* f_map =
INT2PTR(struct f_map_t*, SvIV(*hvref_fetch(_c, "f_map")));
int** lambda_idx = f_map->lambda_idx;
/* fetch other useful data */
SV* smoother = *hvref_fetch(self, "smoother");
int x_num = SvIV(*hvref_fetch(self, "x_num"));
int y_num = SvIV(*hvref_fetch(self, "y_num"));
int f_num = SvIV(*hvref_fetch(self, "f_num"));
/* intermediate variables */
AV* av_d_log_lh;
char* smoother_type;
int i, j, x, y, lambda_i;
double log_lh, sum_exp_lambda_f, sigma, fxy;
double* lambda_f = (double*)malloc(sizeof(double) * y_num);
double* exp_lambda_f = (double*)malloc(sizeof(double) * y_num);
double* d_log_lh = (double*)malloc(sizeof(double) * f_num);
double* lambda = (double*)malloc(sizeof(double) * f_num);
CODE:
/* initialize */
TRACE("enter");
for (i = 0; i < f_num; i++)
lambda[i] = SvNV(*av_fetch(lambda_in, i, 0));
log_lh = 0;
for (i = 0; i < f_num; i++) d_log_lh[i] = 0;
TRACE("finish initializing");
/* calculate log likelihood and its gradient */
for (i = 0; i < samples->s_num; i++) {
/* log likelihood */
for (sum_exp_lambda_f = 0, y = 0; y < y_num; y++) {
for (lambda_f[y] = 0, j = 0; j < samples->x_len[i]; j++) {
lambda_i = lambda_idx[y][samples->x[i][j]];
if (lambda_i != -1) lambda_f[y] += lambda[lambda_i];
}
sum_exp_lambda_f += (exp_lambda_f[y] = exp(lambda_f[y]));
}
log_lh += samples->w[i] *
(lambda_f[samples->y[i]] - log(sum_exp_lambda_f));
/* gradient */
for (y = 0; y < y_num; y++) {
fxy = (y == samples->y[i] ? 1.0 : 0.0);
for (j = 0; j < samples->x_len[i]; j++) {
lambda_i = lambda_idx[y][samples->x[i][j]];
if (lambda_i != -1)
d_log_lh[lambda_i] += samples->w[i] *
(fxy - exp_lambda_f[y] / sum_exp_lambda_f);
}
}
}
TRACE("finish log likelihood and gradient");
/* smoothing */
if (SvOK(smoother) && hvref_exists(smoother, "type")) {
smoother_type = SvPV_nolen(*hvref_fetch(smoother, "type"));
if (strcmp(smoother_type, "gaussian") == 0) {
sigma = SvOK(*hvref_fetch(smoother, "sigma")) ?
SvNV(*hvref_fetch(smoother, "sigma")) : 1.0;
for (i = 0; i < f_num; i++) {
log_lh -= (lambda[i] * lambda[i]) / (2 * sigma * sigma);
d_log_lh[i] -= lambda[i] / (sigma * sigma);
}
}
}
TRACE("finish smoothing");
/* negate the value and finish */
log_lh = -log_lh;
av_d_log_lh = newAV();
av_extend(av_d_log_lh, f_num - 1);
for (i = 0; i < f_num; i++)
av_store(av_d_log_lh, i, newSVnv(-d_log_lh[i]));
f = sv_2mortal(newSVnv(log_lh));
g = sv_2mortal(newRV_noinc((SV*)av_d_log_lh));
TRACE("leave");
CLEANUP:
free(lambda_f);
free(exp_lambda_f);
free(d_log_lh);
free(lambda);
SV*
_apply_gis(self, progress_cb, epsilon)
SV* self
SV* progress_cb
double epsilon
PREINIT:
dSP;
dTRACE("_apply_gis");
/* fetch the pre-cached samples and f_map */
SV* _c = *hvref_fetch(self, "_c");
struct samples_t* samples =
INT2PTR(struct samples_t*, SvIV(*hvref_fetch(_c, "samples")));
struct f_map_t* f_map =
INT2PTR(struct f_map_t*, SvIV(*hvref_fetch(_c, "f_map")));
int** lambda_idx = f_map->lambda_idx;
/* fetch other useful data */
AV* f_freq = (AV*)SvRV(*hvref_fetch(self, "f_freq"));
int x_num = SvIV(*hvref_fetch(self, "x_num"));
int y_num = SvIV(*hvref_fetch(self, "y_num"));
int f_num = SvIV(*hvref_fetch(self, "f_num"));
int af_num = SvIV(*hvref_fetch(self, "af_num"));
/* intermediate variables */
SV *sv_r;
AV *av_lambda, *av_d_lambda;
int i, j, k, y, lambda_i, r;
double sum_exp_lambda_f, pxy;
double d_lambda_norm, lambda_norm;
double* p_f = (double*)malloc(sizeof(double) * f_num);
double* p1_f = (double*)malloc(sizeof(double) * f_num);
double* lambda = (double*)malloc(sizeof(double) * f_num);
double* d_lambda = (double*)malloc(sizeof(double) * f_num);
double* exp_lambda_f = (double*)malloc(sizeof(double) * y_num);
CODE:
TRACE("enter");
/* initiate lambda */
for (i = 0; i < f_num; i++) lambda[i] = 0;
/* initiate p(f) */
for (j = 0; j < y_num; j++)
for (i = 0; i < x_num; i++) {
lambda_i = lambda_idx[j][i];
if (lambda_i != -1) p_f[lambda_i] = SvNV(
*av_fetch((AV*)SvRV(*av_fetch(f_freq, j, 0)), i, 0)) +
1e-5;
}
/* iterate */
k = 0;
do {
TRACE("iteration");
/* get p1(f) for current lambda */
for (i = 0; i < f_num; i++) p1_f[i] = 0;
for (i = 0; i < samples->s_num; i++) {
for (sum_exp_lambda_f = 0, y = 0; y < y_num; y++) {
for (exp_lambda_f[y] = 0, j = 0; j < af_num; j++) {
lambda_i = lambda_idx[y][samples->x[i][j]];
if (lambda_i != -1)
exp_lambda_f[y] += lambda[lambda_i];
}
exp_lambda_f[y] = exp(exp_lambda_f[y]);
sum_exp_lambda_f += exp_lambda_f[y];
}
for (y = 0; y < y_num; y++) {
pxy = exp_lambda_f[y] / sum_exp_lambda_f;
for (j = 0; j < af_num; j++) {
lambda_i = lambda_idx[y][samples->x[i][j]];
if (lambda_i != -1)
p1_f[lambda_i] += pxy * samples->w[i];
}
}
}
/* lambda = lambda + d_lambda */
d_lambda_norm = 0;
lambda_norm = 0;
for (i = 0; i < f_num; i++) {
d_lambda[i] = (1.0 / af_num) * log(p_f[i] / p1_f[i]);
lambda[i] += d_lambda[i];
d_lambda_norm += d_lambda[i] * d_lambda[i];
lambda_norm += lambda[i] * lambda[i];
}
d_lambda_norm = sqrt(d_lambda_norm);
lambda_norm = sqrt(lambda_norm);
/* call progress_cb if defined */
if (SvOK(progress_cb) && SvROK(progress_cb) &&
SvTYPE(SvRV(progress_cb)) == SVt_PVCV) {
TRACE("call progress_cb");
av_lambda = newAV();
av_d_lambda = newAV();
av_extend(av_lambda, f_num - 1);
av_extend(av_d_lambda, f_num - 1);
for (i = 0; i < f_num; i++) {
av_store(av_lambda, i, newSVnv(lambda[i]));
av_store(av_d_lambda, i, newSVnv(d_lambda[i]));
}
ENTER;
SAVETMPS;
PUSHMARK(SP);
XPUSHs(sv_2mortal(newSViv(k)));
XPUSHs(sv_2mortal(newRV_noinc((SV*)av_lambda)));
XPUSHs(sv_2mortal(newRV_noinc((SV*)av_d_lambda)));
XPUSHs(sv_2mortal(newSVnv(lambda_norm)));
XPUSHs(sv_2mortal(newSVnv(d_lambda_norm)));
PUTBACK;
call_sv(progress_cb, G_ARRAY);
SPAGAIN;
sv_r = POPs;
r = SvIV(sv_r);
PUTBACK;
FREETMPS;
LEAVE;
while (SvREFCNT(sv_r) > 0) { SvREFCNT_dec(sv_r); }
if (r != 0) break;
}
k++;
} while (d_lambda_norm > lambda_norm * epsilon);
/* finish */
av_lambda = newAV();
av_extend(av_lambda, f_num - 1);
for (i = 0; i < f_num; i++)
av_store(av_lambda, i, newSVnv(lambda[i]));
RETVAL = newRV_noinc((SV*)av_lambda);
TRACE("leave");
OUTPUT:
RETVAL
CLEANUP:
free(p_f);
free(p1_f);
free(lambda);
free(d_lambda);
free(exp_lambda_f);
void
_cache_samples(self)
SV* self
PREINIT:
SV* _c = *hvref_fetch(self, "_c");
AV* samples = (AV*)SvRV(*hvref_fetch(self, "samples"));
AV *sample, *x;
struct samples_t* ss =
(struct samples_t*)malloc(sizeof(struct samples_t));;
int i, j;
CODE:
ss->s_num = av_len(samples) + 1;
ss->x_len = (int*)malloc(sizeof(int) * ss->s_num);
ss->x = (int**)malloc(sizeof(int*) * ss->s_num);
ss->y = (int*)malloc(sizeof(int) * ss->s_num);
ss->w = (double*)malloc(sizeof(double) * ss->s_num);
for (i = 0; i < ss->s_num; i++) {
sample = (AV*)SvRV(*av_fetch(samples, i, 0));
x = (AV*)SvRV(*av_fetch(sample, 0, 0));
ss->x_len[i] = av_len(x) + 1;
ss->x[i] = (int*)malloc(sizeof(int) * ss->x_len[i]);
for (j = 0; j < ss->x_len[i]; j++)
ss->x[i][j] = SvIV(*av_fetch(x, j, 0));
ss->y[i] = SvIV(*av_fetch(sample, 1, 0));
ss->w[i] = SvNV(*av_fetch(sample, 2, 0));
}
hvref_store(_c, "samples", newSViv(PTR2IV(ss)));
void
_free_cache_samples(self)
SV* self
PREINIT:
SV* _c = *hvref_fetch(self, "_c");
struct samples_t* ss =
INT2PTR(struct samples_t*, SvIV(*hvref_fetch(_c, "samples")));
int i;
CODE:
free(ss->x_len);
for (i = 0; i < ss->s_num; i++) free(ss->x[i]);
free(ss->x);
free(ss->y);
free(ss->w);
free(ss);
hvref_delete(_c, "samples");
void
_cache_f_map(self)
SV* self
PREINIT:
SV* _c = *hvref_fetch(self, "_c");
AV* f_map = (AV*)SvRV(*hvref_fetch(self, "f_map"));
AV* f_map_y;
struct f_map_t* fm =
(struct f_map_t*)malloc(sizeof(struct f_map_t));;
int i, j, x_num;
CODE:
fm->y_num = av_len(f_map) + 1;
fm->lambda_idx = (int**)malloc(sizeof(int*) * fm->y_num);
for (j = 0; j < fm->y_num; j++) {
f_map_y = (AV*)SvRV(*av_fetch(f_map, j, 0));
x_num = av_len(f_map_y) + 1;
fm->lambda_idx[j] = (int*)malloc(sizeof(int) * x_num);
for (i = 0; i < x_num; i++)
fm->lambda_idx[j][i] = SvIV(*av_fetch(f_map_y, i, 0));
}
hvref_store(_c, "f_map", newSVuv(PTR2IV(fm)));
void
_free_cache_f_map(self)
SV* self
PREINIT:
SV* _c = *hvref_fetch(self, "_c");
struct f_map_t* fm =
INT2PTR(struct f_map_t*, SvIV(*hvref_fetch(_c, "f_map")));
int i;
CODE:
for (i = 0; i < fm->y_num; i++) free(fm->lambda_idx[i]);
free(fm->lambda_idx);
free(fm);
hvref_delete(_c, "f_map");
view all matches for this distributionview release on metacpan - search on metacpan
( run in 0.470 second using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )