Stats-LikeR
view release on metacpan or search on metacpan
if (!SvROK(rv) || SvTYPE(SvRV(rv)) != SVt_PVHV) croak("hoh2hoa: every value must be a hash ref (hash of hashes)");
av_push(rows_av, newSVsv(hv_iterkeysv(e)));
}
}
SSize_t nrows = av_len(rows_av) + 1;
if (nrows > 1) qsort(AvARRAY(rows_av), (size_t)nrows, sizeof(SV*), h2h_keycmp);
// 4. discover the union of inner keys. Each new column gets an empty array
// in the result straight away so step 5 can just push into it.
{
HE *restrict e;
hv_iterinit(in_hv);
while ((e = hv_iternext(in_hv))) {
HV *restrict row = (HV*)SvRV(hv_iterval(in_hv, e));
HE *restrict ie;
hv_iterinit(row);
while ((ie = hv_iternext(row))) {
SV *restrict ck = hv_iterkeysv(ie);
if (!hv_exists_ent(seen, ck, 0)) {
(void)hv_store_ent(seen, ck, &PL_sv_yes, 0);
av_push(cols_av, newSVsv(ck));
(void)hv_store_ent(out_hv, ck, newRV_noinc((SV*)newAV()), 0);
}
}
}
}
SSize_t ncols = av_len(cols_av) + 1;
// 5. walk the rows in sorted order; for every column push the cell (a copy)
// or the fill value, so each column ends up exactly nrows long.
for (SSize_t r = 0; r < nrows; r++) {
SV *restrict rk = *av_fetch(rows_av, r, 0);
HE *restrict rhe = hv_fetch_ent(in_hv, rk, 0, 0);
HV *restrict row = (HV*)SvRV(HeVAL(rhe));
for (SSize_t c = 0; c < ncols; c++) {
SV *restrict ck = *av_fetch(cols_av, c, 0);
HE *restrict che = hv_fetch_ent(row, ck, 0, 0);
SV *restrict src = che ? HeVAL(che) : NULL;
SV *restrict cell = (src && SvOK(src)) ? newSVsv(src) : (fill ? newSVsv(fill) : newSV(0));
HE *restrict colhe = hv_fetch_ent(out_hv, ck, 0, 0);
av_push((AV*)SvRV(HeVAL(colhe)), cell);
}
}
// 6. optional row-names column: the sorted labels under the requested name.
if (rn_sv) {
if (hv_exists_ent(out_hv, rn_sv, 0)) croak("hoh2hoa: row.names column '%s' collides with an existing column", SvPV_nolen(rn_sv));
AV *restrict rn_av = newAV();
for (SSize_t r = 0; r < nrows; r++) av_push(rn_av, newSVsv(*av_fetch(rows_av, r, 0)));
(void)hv_store_ent(out_hv, rn_sv, newRV_noinc((SV*)rn_av), 0);
}
// 7. tidy up the scratch structures (the result keeps its own copies).
SvREFCNT_dec((SV*)rows_av);
SvREFCNT_dec((SV*)cols_av);
SvREFCNT_dec((SV*)seen);
RETVAL = newRV_noinc((SV*)out_hv);
}
OUTPUT:
RETVAL
void filter(df, pred)
SV *df
SV *pred
PPCODE:
{
if (!df || !SvROK(df))
croak("filter: first argument must be a HASH or ARRAY reference (a data frame)");
bool is_code = (pred && SvROK(pred) && SvTYPE(SvRV(pred)) == SVt_PVCV);
if (!is_code && (!pred || !SvROK(pred) || SvTYPE(SvRV(pred)) != SVt_PVHV))
croak("filter: second argument must be a CODE ref or a predicate built with col()");
SV *restrict ref = SvRV(df);
SV *restrict result;
if (SvTYPE(ref) == SVt_PVAV) {
// ----- Array of Hashes: keep matching row hashrefs (shared, not copied) -----
AV *restrict in = (AV*)ref;
AV *restrict out = newAV();
SSize_t n = av_len(in) + 1, i;
filt_ctx ctx; ctx.is_aoh = 1; ctx.data_hv = NULL; ctx.idx = 0;
for (i = 0; i < n; i++) {
SV **restrict rp = av_fetch(in, i, 0);
if (!rp || !*rp || !SvROK(*rp) || SvTYPE(SvRV(*rp)) != SVt_PVHV) {
SvREFCNT_dec((SV*)out);
croak("filter: array data frame must hold HASH references; element %ld is not one", (long)i);
}
bool keep;
if (is_code) keep = filt_call(aTHX_ pred, *rp);
else { ctx.row_hv = (HV*)SvRV(*rp); keep = filt_eval(aTHX_ pred, &ctx); }
if (keep) av_push(out, SvREFCNT_inc_simple_NN(*rp));
}
result = newRV_noinc((SV*)out);
} else if (SvTYPE(ref) == SVt_PVHV) {
// ----- Hash of Arrays: keep matching row indices across every column -----
HV *restrict in = (HV*)ref;
I32 ncols = hv_iterinit(in);
if (ncols <= 0) {
result = newRV_noinc((SV*)newHV());
} else {
char **restrict names = (char**)safemalloc(ncols * sizeof(char*));
STRLEN *restrict nlens = (STRLEN*)safemalloc(ncols * sizeof(STRLEN));
AV **restrict inav = (AV**)safemalloc(ncols * sizeof(AV*));
AV **restrict outav = (AV**)safemalloc(ncols * sizeof(AV*));
HV *restrict out = newHV();
SSize_t maxrows = 0, i;
I32 c = 0, cc;
HE *restrict e;
while ((e = hv_iternext(in)) && c < ncols) {
STRLEN klen;
char *restrict k = HePV(e, klen);
SV *restrict v = HeVAL(e);
if (!v || !SvROK(v) || SvTYPE(SvRV(v)) != SVt_PVAV) {
safefree(names); safefree(nlens); safefree(inav); safefree(outav);
SvREFCNT_dec((SV*)out);
croak("filter: hash data frame must hold ARRAY references (a hash of arrays); column '%s' is not one", k);
}
AV *restrict a = (AV*)SvRV(v);
SSize_t len = av_len(a) + 1;
if (len > maxrows) maxrows = len;
names[c] = k; nlens[c] = klen; inav[c] = a;
outav[c] = newAV();
hv_store(out, k, klen, newRV_noinc((SV*)outav[c]), 0);
c++;
}
filt_ctx ctx; ctx.is_aoh = 0; ctx.row_hv = NULL; ctx.data_hv = in;
for (i = 0; i < maxrows; i++) {
hv_store_ent(expected_hv, *col_key_sv, newSVnv(E), 0);
stat += ((O - E) * (O - E)) / E;
}
expected_ref = newRV_noinc((SV*)expected_hv);
}
df = c - 1;
}
// Memory Cleanup for Matrices/Arrays
if (obs_matrix) {
for (unsigned int i = 0; i < r; i++) {
safefree(obs_matrix[i]);
}
safefree(obs_matrix);
}
if (obs_array) safefree(obs_array);
if (row_keys) SvREFCNT_dec(row_keys);
if (col_keys) SvREFCNT_dec(col_keys);
NV p_val = get_p_value(stat, df);
// 3. Build the top-level results Hash (mimicking R's htest structure)
HV*restrict results = newHV();
HV*restrict statistic_hv = newHV();
hv_store(statistic_hv, "X-squared", 9, newSVnv(stat), 0);
hv_store(results, "statistic", 9, newRV_noinc((SV*)statistic_hv), 0);
HV*restrict parameter_hv = newHV();
hv_store(parameter_hv, "df", 2, newSViv(df), 0);
hv_store(results, "parameter", 9, newRV_noinc((SV*)parameter_hv), 0);
hv_store(results, "p.value", 7, newSVnv(p_val), 0);
hv_store(results, "expected", 8, expected_ref, 0);
hv_store(results, "observed", 8, SvREFCNT_inc(data_ref), 0);
if (input_type == SVt_PVAV) {
hv_store(results, "data.name", 9, newSVpv("Perl ArrayRef", 0), 0);
} else {
hv_store(results, "data.name", 9, newSVpv("Perl HashRef", 0), 0);
}
if (is_2d) {
if (yates) {
hv_store(results, "method", 6, newSVpv("Pearson's Chi-squared test with Yates' continuity correction", 0), 0);
} else {
hv_store(results, "method", 6, newSVpv("Pearson's Chi-squared test", 0), 0);
}
} else {
hv_store(results, "method", 6, newSVpv("Chi-squared test for given probabilities", 0), 0);
}
RETVAL = newRV_noinc((SV*)results);
}
OUTPUT:
RETVAL
PROTOTYPES: ENABLE
void write_table(...)
PPCODE:
{
SV *restrict data_sv = NULL;
SV *restrict file_sv = NULL;
unsigned int arg_idx = 0;
// Mimic the Perl shift logic
if (arg_idx < items && SvROK(ST(arg_idx))) {
int type = SvTYPE(SvRV(ST(arg_idx)));
if (type == SVt_PVHV || type == SVt_PVAV) {
data_sv = ST(arg_idx);
arg_idx++;
}
}
// Only consume a positional file argument if it is a plain string that is
// NOT one of the named option keys. Otherwise write_table(data=>..., file=>...)
// would grab the literal string "data" as the filename.
if (arg_idx < items) {
SV *restrict cand = ST(arg_idx);
if (SvOK(cand) && !SvROK(cand)) {
const char *restrict k = SvPV_nolen(cand);
if (!(strEQ(k, "data") || strEQ(k, "file") || strEQ(k, "col.names") ||
strEQ(k, "row.names") || strEQ(k, "sep") || strEQ(k, "delim") ||
strEQ(k, "undef.val"))) {
file_sv = cand;
arg_idx++;
}
}
}
const char *restrict sep = ",";
bool explicit_sep = 0; // Track if delimiter was manually specified
// CHANGED: default undef cells to a true empty value ("") instead of NULL.
// With print_string_row emitting zero-length fields bare (no quotes), an
// undef cell now prints as nothing at all: a,,c -- not a,'',c or a,"",c.
// 'undef.val' => 'NA' (etc.) still overrides this.
const char *restrict undef_val = "";
SV *restrict row_names_sv = sv_2mortal(newSViv(1));
SV *restrict col_names_sv = NULL;
// Read the remaining Hash-style arguments
for (; arg_idx < items; arg_idx += 2) {
if (arg_idx + 1 >= items) croak("write_table: Odd number of arguments passed");
const char *restrict key = SvPV_nolen(ST(arg_idx));
SV *restrict val = ST(arg_idx + 1);
if (strEQ(key, "data")) data_sv = val;
else if (strEQ(key, "col.names")) col_names_sv = val;
else if (strEQ(key, "file")) file_sv = val;
else if (strEQ(key, "row.names")) row_names_sv = val;
// Check for either "sep" or "delim" and mark as explicitly provided
else if (strEQ(key, "sep") || strEQ(key, "delim")) {
sep = SvPV_nolen(val);
explicit_sep = 1;
}
// FIX: 'undef.val' => undef used to call SvPV_nolen(&PL_sv_undef)
// (warning + empty string by accident); make it explicit.
else if (strEQ(key, "undef.val")) undef_val = SvOK(val) ? SvPV_nolen(val) : "";
else croak("write_table: Unknown arguments passed: %s", key);
}
if (!data_sv || !SvROK(data_sv)) {
croak("write_table: 'data' must be a HASH or ARRAY reference\n");
}
SV *restrict data_ref = SvRV(data_sv);
if (SvTYPE(data_ref) != SVt_PVHV && SvTYPE(data_ref) != SVt_PVAV) {
else
do_exact = SvTRUE(exact_sv) ? 1 : 0;
if (do_exact) {
statistic = S_stat;
p_value = spearman_exact_pvalue(S_stat, n, alternative);
} else {
NV r = estimate;
/* NOTE: R silently ignores continuity correction for Spearman.
* The adjustment below is non-standard; a warning is emitted
* so callers are not silently misled. */
if (continuity) {
warn("cor_test: continuity correction is not defined for Spearman in R and is ignored here");
}
/* BUG FIX: guard divide-by-zero when |r| == 1 exactly */
NV denom_t = 1.0 - r * r;
if (denom_t <= 0.0)
statistic = (r > 0.0) ? INFINITY : -INFINITY;
else
statistic = r * sqrt((NV)(n - 2) / denom_t);
p_value = get_t_pvalue(statistic, (NV)(n - 2), alternative);
}
Safefree(rank_x);
Safefree(rank_y);
} else {
Safefree(x);
Safefree(y);
croak("Unknown method '%s': must be 'pearson', 'kendall', or 'spearman'", method);
}
Safefree(x);
Safefree(y);
rhv = newHV();
hv_stores(rhv, "estimate", newSVnv(estimate));
hv_stores(rhv, "p.value", newSVnv(p_value));
hv_stores(rhv, "statistic", newSVnv(statistic));
hv_stores(rhv, "method", newSVpv(method, 0));
hv_stores(rhv, "alternative", newSVpv(alternative, 0));
if (is_pearson) {
hv_stores(rhv, "parameter", newSVnv(df));
AV *restrict ci_av = newAV();
av_push(ci_av, newSVnv(ci_lower));
av_push(ci_av, newSVnv(ci_upper));
hv_stores(rhv, "conf.int", newRV_noinc((SV*)ci_av));
}
RETVAL = newRV_noinc((SV*)rhv);
}
OUTPUT:
RETVAL
void shapiro_test(data)
SV *data
PREINIT:
AV *restrict av;
HV *restrict ret_hash;
size_t n_raw, n = 0;
NV *restrict x, w = 0.0, p_val = 0.0, mean = 0.0, ssq = 0.0;
PPCODE:
if (!SvROK(data) || SvTYPE(SvRV(data)) != SVt_PVAV) {
croak("Expected an array reference");
}
av = (AV *)SvRV(data);
n_raw = av_len(av) + 1;
Newx(x, n_raw, NV);
// Extract variables and calculate mean (skipping undefined/NaN values)
for (size_t i = 0; i < n_raw; i++) {
SV **restrict elem = av_fetch(av, i, 0);
if (elem && SvOK(*elem)) {
NV val = SvNV(*elem);
if (!isnan(val)) {
x[n] = val;
mean += val;
n++;
}
}
}
if (n < 3 || n > 5000) {
Safefree(x);
croak("Sample size must be between 3 and 5000 (R's limit)");
}
mean /= n;
// Calculate Sum of Squares
for (size_t i = 0; i < n; i++) {
ssq += (x[i] - mean) * (x[i] - mean);
}
if (ssq == 0.0) {
Safefree(x);
croak("Data is perfectly constant; cannot compute Shapiro-Wilk test");
}
qsort(x, n, sizeof(NV), compare_doubles);
// --- Core AS R94 Algorithm: Weights and Statistic W
if (n == 3) {
NV a_val = 0.7071067811865475; // sqrt(1/2)
NV b_val = a_val * (x[2] - x[0]);
w = (b_val * b_val) / ssq;
if (w < 0.75) w = 0.75;
// Exact P-value for n=3
p_val = 1.90985931710274 * (asin(sqrt(w)) - 1.04719755119660);
} else {
NV *restrict m, *restrict a;
NV sum_m2 = 0.0, b_val = 0.0;
Newx(m, n, NV);
Newx(a, n, NV);
for (size_t i = 0; i < n; i++) {
m[i] = inverse_normal_cdf((i + 1.0 - 0.375) / (n + 0.25));
sum_m2 += m[i] * m[i];
}
NV u = 1.0 / sqrt((NV)n);
NV a_n = -2.706056*pow(u,5) + 4.434685*pow(u,4) - 2.071190*pow(u,3) - 0.147981*pow(u,2) + 0.221157*u + m[n-1]/sqrt(sum_m2);
a[n-1] = a_n;
a[0] = -a_n;
if (n == 4 || n == 5) {
NV eps = (sum_m2 - 2.0 * m[n-1]*m[n-1]) / (1.0 - 2.0 * a_n*a_n);
q = (1.0 - gamma) * x[j] + gamma * x[j + 1];
}
// --- Format hash key with Epsilon guarding ---
char key[32];
double pct = (double)(p * 100.0); // Safe to cast to double just for formatting
double pct_rounded = floor(pct + 0.5); // C89 safe rounding
// Use 1e-9 epsilon check instead of strict integer equality
if (fabs(pct - pct_rounded) < 1e-9) {
snprintf(key, sizeof(key), "%.0f%%", pct_rounded);
} else {
snprintf(key, sizeof(key), "%.1f%%", pct);
}
hv_store(res_hv, key, strlen(key), newSVnv(q), 0);
}
Safefree(x); Safefree(probs);
RETVAL = newRV_noinc((SV*)res_hv);
}
OUTPUT:
RETVAL
NV mean(...)
PROTOTYPE: @
INIT:
NV total = 0;
size_t count = 0;
CODE:
for (size_t i = 0; i < items; i++) {
SV* restrict arg = ST(i);
if (SvROK(arg) && SvTYPE(SvRV(arg)) == SVt_PVAV) {
AV* restrict av = (AV*)SvRV(arg);
size_t len = av_len(av) + 1;
for (size_t j = 0; j < len; j++) {
SV** restrict tv = av_fetch(av, j, 0);
if (tv && SvOK(*tv)) {
total += SvNV(*tv);
count++;
} else {
croak("mean: undefined value at array ref index %zu (argument %zu)", j, i);
}
}
} else if (SvOK(arg)) {
total += SvNV(arg);
count++;
} else {
croak("mean: undefined value at argument index %zu", i);
}
}
if (count == 0) croak("mean needs >= 1 element");
RETVAL = total / count;
OUTPUT:
RETVAL
void mode(...)
PROTOTYPE: @
PREINIT:
HV *restrict counts;
HV *restrict originals;
size_t max_count = 0, arg_count = 0;
HE *restrict he;
PPCODE:
/* counts: string(value) -> occurrence count */
/* originals: string(value) -> SV* first-seen original */
counts = (HV *)sv_2mortal((SV *)newHV());
originals = (HV *)sv_2mortal((SV *)newHV());
for (size_t i = 0; i < items; i++) {
SV *restrict arg = ST(i);
if (SvROK(arg) && SvTYPE(SvRV(arg)) == SVt_PVAV) {
AV *restrict av = (AV *)SvRV(arg);
size_t len = av_len(av) + 1;
for (size_t j = 0; j < len; j++) {
SV **restrict tv = av_fetch(av, j, 0);
if (tv && SvOK(*tv)) {
STRLEN klen;
const char *restrict key = SvPV(*tv, klen);
SV **restrict slot = hv_fetch(counts, key, klen, 1);
if (!slot) croak("mode: internal hash error");
size_t cnt = SvOK(*slot) ? SvIV(*slot) + 1 : 1;
sv_setiv(*slot, cnt);
if (cnt > max_count) max_count = cnt;
if (cnt == 1)
hv_store(originals, key, klen, newSVsv(*tv), 0);
arg_count++;
} else {
croak("mode: undefined value at array ref index %zu (argument %zu)", j, i);
}
}
} else if (SvOK(arg)) {
STRLEN klen;
const char *restrict key = SvPV(arg, klen);
SV **restrict slot = hv_fetch(counts, key, klen, 1);
if (!slot) croak("mode: internal hash error");
size_t cnt = SvOK(*slot) ? SvIV(*slot) + 1 : 1;
sv_setiv(*slot, cnt);
if (cnt > max_count) max_count = cnt;
if (cnt == 1)
hv_store(originals, key, klen, newSVsv(arg), 0);
arg_count++;
} else {
croak("mode: undefined value at argument index %zu", i);
}
}
if (arg_count == 0)
croak("mode needs >= 1 element");
hv_iterinit(counts);
while ((he = hv_iternext(counts))) {
if (SvIV(hv_iterval(counts, he)) == max_count) {
STRLEN klen;
const char *restrict key = HePV(he, klen);
SV **restrict orig = hv_fetch(originals, key, klen, 0);
mXPUSHs(orig ? newSVsv(*orig) : newSVpvn(key, klen));
}
}
NV sum(...)
PROTOTYPE: @
INIT:
NV total = 0;
}
p_val = get_t_pvalue(t_stat, df, alternative);
NV alpha = 1.0 - conf_level, t_crit, ci_lower, ci_upper;
if (strcmp(alternative, "less") == 0) {
t_crit = qt_tail(df, alpha);
ci_lower = -INFINITY;
ci_upper = cint_est + t_crit * std_err;
} else if (strcmp(alternative, "greater") == 0) {
t_crit = qt_tail(df, alpha);
ci_lower = cint_est - t_crit * std_err;
ci_upper = INFINITY;
} else {
t_crit = qt_tail(df, alpha / 2.0);
ci_lower = cint_est - t_crit * std_err;
ci_upper = cint_est + t_crit * std_err;
}
AV*restrict conf_int = newAV();
av_push(conf_int, newSVnv(ci_lower));
av_push(conf_int, newSVnv(ci_upper));
hv_store(results, "statistic", 9, newSVnv(t_stat), 0);
hv_store(results, "df", 2, newSVnv(df), 0);
hv_store(results, "p_value", 7, newSVnv(p_val), 0);
hv_store(results, "conf_int", 8, newRV_noinc((SV*)conf_int), 0);
RETVAL = newRV_noinc((SV*)results);
}
OUTPUT:
RETVAL
void p_adjust(SV* p_sv, const char* method = "holm")
INIT:
if (!SvROK(p_sv) || SvTYPE(SvRV(p_sv)) != SVt_PVAV) {
croak("p_adjust: first argument must be an ARRAY reference of p-values");
}
AV *restrict p_av = (AV*)SvRV(p_sv);
size_t n = av_len(p_av) + 1;
// Handle empty input
if (n == 0) {
XSRETURN_EMPTY;
}
// Normalize method string
char meth[64];
strncpy(meth, method, 63); meth[63] = '\0';
for(unsigned short int i = 0; meth[i]; i++) meth[i] = tolower(meth[i]);
// Resolve aliases
if (strstr(meth, "benjamini") && strstr(meth, "hochberg")) strcpy(meth, "bh");
if (strstr(meth, "benjamini") && strstr(meth, "yekutieli")) strcpy(meth, "by");
if (strcmp(meth, "fdr") == 0) strcpy(meth, "bh");
// Allocate C memory
PVal *restrict arr;
NV *restrict adj;
Newx(arr, n, PVal);
Newx(adj, n, NV);
for (size_t i = 0; i < n; i++) {
SV**restrict tv = av_fetch(p_av, i, 0);
arr[i].p = (tv && SvOK(*tv)) ? SvNV(*tv) : 1.0;
arr[i].orig_idx = i;
}
// Sort ascending (Stable sort using original index)
qsort(arr, n, sizeof(PVal), cmp_pval);
PPCODE:
if (strcmp(meth, "bonferroni") == 0) {
for (size_t i = 0; i < n; i++) {
NV v = arr[i].p * n;
adj[arr[i].orig_idx] = (v < 1.0) ? v : 1.0;
}
} else if (strcmp(meth, "holm") == 0) {
NV cummax = 0.0;
for (size_t i = 0; i < n; i++) {
NV v = arr[i].p * (n - i);
if (v > cummax) cummax = v;
adj[arr[i].orig_idx] = (cummax < 1.0) ? cummax : 1.0;
}
} else if (strcmp(meth, "hochberg") == 0) {
NV cummin = 1.0;
for (ssize_t i = n - 1; i >= 0; i--) {
NV v = arr[i].p * (n - i);
if (v < cummin) cummin = v;
adj[arr[i].orig_idx] = (cummin < 1.0) ? cummin : 1.0;
}
} else if (strcmp(meth, "bh") == 0) {
NV cummin = 1.0;
for (ssize_t i = n - 1; i >= 0; i--) {
NV v = arr[i].p * n / (i + 1.0);
if (v < cummin) cummin = v;
adj[arr[i].orig_idx] = (cummin < 1.0) ? cummin : 1.0;
}
} else if (strcmp(meth, "by") == 0) {
NV q = 0.0;
for (size_t i = 1; i <= n; i++) q += 1.0 / i;
NV cummin = 1.0;
for (ssize_t i = n - 1; i >= 0; i--) {
NV v = arr[i].p * n / (i + 1.0) * q;
if (v < cummin) cummin = v;
adj[arr[i].orig_idx] = (cummin < 1.0) ? cummin : 1.0;
}
} else if (strcmp(meth, "hommel") == 0) {
NV *restrict pa, *restrict q_arr;
Newx(pa, n, NV);
Newx(q_arr, n, NV);
// Initial: min(n * p[i] / (i + 1))
NV min_val = n * arr[0].p;
for (size_t i = 1; i < n; i++) {
NV temp = (n * arr[i].p) / (i + 1.0);
if (temp < min_val) {
min_val = temp;
}
}
// pa <- q <- rep(min, n)
for (size_t i = 0; i < n; i++) {
pa[i] = min_val;
q_arr[i] = min_val;
}
for (size_t j = n - 1; j >= 2; j--) {
ssize_t n_mj = n - j; // Max index for 'ij'. Length is n_mj + 1
ssize_t i2_len = j - 1; // Length of 'i2
// Calculate q1 = min(j * p[i2] / (2:j))
NV q1 = (j * arr[n_mj + 1].p) / 2.0;
for (size_t k = 1; k < i2_len; k++) {
NV temp_q1 = (j * arr[n_mj + 1 + k].p) / (2.0 + k);
if (temp_q1 < q1) {
symmetric = 1;
}
if (nrows < 2)
croak("cor: need at least 2 observations (got %lu)", nrows);
// -- build cache for symmetric case: compute upper triangle, store results, mirror to lower triangle
AV*restrict result_av = newAV();
av_extend(result_av, ncols_x - 1);
// Allocate per-row AVs up front so we can fill them in order
AV **restrict rows_out;
Newx(rows_out, ncols_x, AV*);
for (size_t i = 0; i < ncols_x; i++) {
rows_out[i] = newAV();
av_extend(rows_out[i], ncols_y - 1);
}
if (symmetric) {
/* Upper triangle + diagonal, then mirror. r_cache[i][j] (j >= i) holds the computed value. */
NV **restrict r_cache;
Newx(r_cache, ncols_x, NV*);
for (size_t i = 0; i < ncols_x; i++)
Newx(r_cache[i], ncols_x, NV);
for (size_t i = 0; i < ncols_x; i++) {
r_cache[i][i] = 1.0; // diagonal
for (size_t j = i + 1; j < ncols_x; j++) {
NV r = compute_cor(col_x[i], col_x[j], nrows, method);
r_cache[i][j] = r;
r_cache[j][i] = r; // symmetry
}
}
// fill output AoA from cache
for (size_t i = 0; i < ncols_x; i++)
for (size_t j = 0; j < ncols_x; j++)
av_store(rows_out[i], j, newSVnv(r_cache[i][j]));
for (size_t i = 0; i < ncols_x; i++) Safefree(r_cache[i]);
Safefree(r_cache); r_cache = NULL;
} else {
// cross-correlation: every (i,j) pair is independent
for (size_t i = 0; i < ncols_x; i++)
for (size_t j = 0; j < ncols_y; j++)
av_store(rows_out[i], j, newSVnv(compute_cor(col_x[i], col_y[j], nrows, method)));
}
// push row AVs into result
for (size_t i = 0; i < ncols_x; i++)
av_store(result_av, i, newRV_noinc((SV*)rows_out[i]));
Safefree(rows_out); rows_out = NULL;
// -- free column arrays -------------------------------------
for (size_t j = 0; j < ncols_x; j++) Safefree(col_x[j]);
Safefree(col_x); col_x = NULL;
if (!symmetric) {
for (size_t j = 0; j < ncols_y; j++) Safefree(col_y[j]);
Safefree(col_y);
}
RETVAL = newRV_noinc((SV*)result_av);
}
OUTPUT:
RETVAL
void scale(...)
PROTOTYPE: @
PPCODE:
{
bool do_center_mean = TRUE, do_scale_sd = TRUE;
NV center_val = 0.0, scale_val = 1.0;
size_t data_items = items;
// 1. Parse Options Hash (if it exists as the last argument)
if (items > 0) {
SV*restrict last_arg = ST(items - 1);
if (SvROK(last_arg) && SvTYPE(SvRV(last_arg)) == SVt_PVHV) {
data_items = items - 1; // Exclude hash from data processing
HV*restrict opt_hv = (HV*)SvRV(last_arg);
// --- Parse 'center'
SV**restrict center_sv = hv_fetch(opt_hv, "center", 6, 0);
if (center_sv) {
SV*restrict val_sv = *center_sv;
if (!SvOK(val_sv)) {
do_center_mean = FALSE; center_val = 0.0;
} else {
char *restrict str = SvPV_nolen(val_sv);
/* Trap booleans and empty strings before numeric checks */
if (strcasecmp(str, "mean") == 0 || strcasecmp(str, "true") == 0 || strcmp(str, "1") == 0) {
do_center_mean = TRUE;
} else if (strcasecmp(str, "none") == 0 || strcasecmp(str, "false") == 0 || strcmp(str, "0") == 0 || strcmp(str, "") == 0) {
do_center_mean = FALSE; center_val = 0.0;
} else if (looks_like_number(val_sv)) {
do_center_mean = FALSE; center_val = SvNV(val_sv);
} else if (SvTRUE(val_sv)) {
do_center_mean = TRUE;
} else {
do_center_mean = FALSE; center_val = 0.0;
}
}
}
// --- Parse 'scale' ---
SV**restrict scale_sv = hv_fetch(opt_hv, "scale", 5, 0);
if (scale_sv) {
SV*restrict val_sv = *scale_sv;
if (!SvOK(val_sv)) {
do_scale_sd = FALSE; scale_val = 1.0;
} else {
char *restrict str = SvPV_nolen(val_sv);
if (strcasecmp(str, "sd") == 0 || strcasecmp(str, "true") == 0 || strcmp(str, "1") == 0) {
do_scale_sd = TRUE;
} else if (strcasecmp(str, "none") == 0 || strcasecmp(str, "false") == 0 || strcmp(str, "0") == 0 || strcmp(str, "") == 0) {
do_scale_sd = FALSE; scale_val = 1.0;
} else if (looks_like_number(val_sv)) {
do_scale_sd = FALSE; scale_val = SvNV(val_sv);
if (scale_val == 0.0) scale_val = 1.0; /* Prevent Division By Zero */
} else if (SvTRUE(val_sv)) {
do_scale_sd = TRUE;
} else {
do_scale_sd = FALSE; scale_val = 1.0;
}
}
}
}
}
// 2. Detect if the input is a Matrix (Array of Arrays)
bool is_matrix = FALSE;
if (data_items == 1) {
SV*restrict first_arg = ST(0);
}
for (j = 0; j < p; j++) {
hv_store(coef_hv, exp_terms[j], strlen(exp_terms[j]), newSVnv(beta[j]), 0);
av_push(terms_av, newSVpv(exp_terms[j], 0));
HV *restrict row_hv = newHV();
if (aliased[j]) {
hv_store(row_hv, "Estimate", 8, newSVpv("NaN", 0), 0);
hv_store(row_hv, "Std. Error", 10, newSVpv("NaN", 0), 0);
hv_store(row_hv, "t value", 7, newSVpv("NaN", 0), 0);
hv_store(row_hv, "Pr(>|t|)", 8, newSVpv("NaN", 0), 0);
} else {
NV se = sqrt(rse_sq * XtX[j * p + j]);
NV t_val = (se > 0.0) ? (beta[j] / se) : (INFINITY * (beta[j] >= 0.0 ? 1.0 : -1.0));
NV p_val = get_t_pvalue(t_val, df_res, "two.sided");
hv_store(row_hv, "Estimate", 8, newSVnv(beta[j]), 0);
hv_store(row_hv, "Std. Error", 10, newSVnv(se), 0);
hv_store(row_hv, "t value", 7, newSVnv(t_val), 0);
hv_store(row_hv, "Pr(>|t|)", 8, newSVnv(p_val), 0);
}
hv_store(summary_hv, exp_terms[j], strlen(exp_terms[j]), newRV_noinc((SV*)row_hv), 0);
}
hv_store(res_hv, "coefficients", 12, newRV_noinc((SV*)coef_hv), 0);
hv_store(res_hv, "fitted.values", 13, newRV_noinc((SV*)fitted_hv), 0);
hv_store(res_hv, "residuals", 9, newRV_noinc((SV*)resid_hv), 0);
hv_store(res_hv, "df.residual", 11, newSVuv(df_res), 0);
hv_store(res_hv, "rank", 4, newSVuv(final_rank), 0);
hv_store(res_hv, "rss", 3, newSVnv(rss), 0);
hv_store(res_hv, "summary", 7, newRV_noinc((SV*)summary_hv),0);
hv_store(res_hv, "terms", 5, newRV_noinc((SV*)terms_av), 0);
hv_store(res_hv, "r.squared", 9, newSVnv(r_squared), 0);
hv_store(res_hv, "adj.r.squared", 13, newSVnv(adj_r_squared), 0);
if (!isnan(f_stat)) {
AV *fstat_av = newAV();
av_push(fstat_av, newSVnv(f_stat));
av_push(fstat_av, newSViv(numdf));
av_push(fstat_av, newSViv(df_res));
hv_store(res_hv, "fstatistic", 10, newRV_noinc((SV*)fstat_av), 0);
hv_store(res_hv, "f.pvalue", 8, newSVnv(f_pvalue), 0);
}
// Deep Cleanup
for (i = 0; i < num_terms; i++) Safefree(terms[i]); Safefree(terms);
for (i = 0; i < num_uniq; i++) Safefree(uniq_terms[i]); Safefree(uniq_terms);
for (j = 0; j < p_exp; j++) {
Safefree(exp_terms[j]);
if (is_dummy[j]) { Safefree(dummy_base[j]); Safefree(dummy_level[j]); }
}
Safefree(exp_terms); Safefree(is_dummy); Safefree(dummy_base); Safefree(dummy_level);
Safefree(X); Safefree(Y); Safefree(XtX); Safefree(XtY);
Safefree(beta); Safefree(aliased);
if (row_hashes) Safefree(row_hashes);
RETVAL = newRV_noinc((SV*)res_hv);
}
OUTPUT:
RETVAL
void seq(from, to, by = 1.0)
NV from
NV to
NV by
PPCODE:
{
//Handle the zero 'by' case
if (by == 0.0) {
if (from == to) {
EXTEND(SP, 1);
mPUSHn(from);
XSRETURN(1);
} else {
croak("invalid 'by' argument: cannot be zero when from != to");
}
}
// Check for wrong direction / infinite loop
if ((from < to && by < 0.0) || (from > to && by > 0.0)) {
croak("wrong sign in 'by' argument");
}
/* * Calculate number of elements.
* R uses a small epsilon (like 1e-10) to avoid dropping the last
* element due to floating point inaccuracies.
*/
NV n_elements_d = (to - from) / by;
if (n_elements_d < 0.0) n_elements_d = 0.0;
size_t n_elements = (n_elements_d + 1e-10) + 1;
// Pre-extend the stack to avoid reallocating inside the loop
EXTEND(SP, n_elements);
for (size_t i = 0; i < n_elements; i++) {
mPUSHn(from + i * by);
}
XSRETURN(n_elements);
}
SV* rnorm(...)
CODE:
{
// Auto-seed the PRNG if the Perl script hasn't done so yet
AUTO_SEED_PRNG();
size_t n = 0;
NV mean = 0.0, sd = 1.0;
int arg_start = 0;
// Check if the first argument is a simple integer (rnorm(33))
if (items > 0 && SvIOK(ST(0)) && (items == 1 || items % 2 != 0)) {
n = (unsigned int)SvUV(ST(0));
arg_start = 1; // Start parsing named arguments from the second element
}
// --- Parse remaining named arguments from the flat stack ---
if ((items - arg_start) % 2 != 0) {
croak("Usage: rnorm(n), rnorm(n => 10, mean => 0, sd => 1), or rnorm(33, mean => 0)");
}
for (int i = arg_start; i < items; i += 2) {
const char* restrict key = SvPV_nolen(ST(i));
SV* restrict val = ST(i + 1);
if (strEQ(key, "n")) n = (unsigned int)SvUV(val);
else if (strEQ(key, "mean")) mean = SvNV(val);
else if (strEQ(key, "sd")) sd = SvNV(val);
else croak("rnorm: unknown argument '%s'", key);
}
if (sd < 0.0) croak("rnorm: standard deviation must be non-negative");
AV *restrict result_av = newAV();
( run in 1.000 second using v1.01-cache-2.11-cpan-5511b514fd6 )