Stats-LikeR

 view release on metacpan or  search on metacpan

LikeR.xs  view on Meta::CPAN

				if (!SvROK(rv) || SvTYPE(SvRV(rv)) != SVt_PVHV) croak("hoh2hoa: every value must be a hash ref (hash of hashes)");
				av_push(rows_av, newSVsv(hv_iterkeysv(e)));
			}
		}
		SSize_t nrows = av_len(rows_av) + 1;
		if (nrows > 1) qsort(AvARRAY(rows_av), (size_t)nrows, sizeof(SV*), h2h_keycmp);
		// 4. discover the union of inner keys. Each new column gets an empty array
		//    in the result straight away so step 5 can just push into it.
		{
			HE *restrict e;
			hv_iterinit(in_hv);
			while ((e = hv_iternext(in_hv))) {
				HV *restrict row = (HV*)SvRV(hv_iterval(in_hv, e));
				HE *restrict ie;
				hv_iterinit(row);
				while ((ie = hv_iternext(row))) {
					SV *restrict ck = hv_iterkeysv(ie);
					if (!hv_exists_ent(seen, ck, 0)) {
						(void)hv_store_ent(seen, ck, &PL_sv_yes, 0);
						av_push(cols_av, newSVsv(ck));
						(void)hv_store_ent(out_hv, ck, newRV_noinc((SV*)newAV()), 0);
					}
				}
			}
		}
		SSize_t ncols = av_len(cols_av) + 1;
		// 5. walk the rows in sorted order; for every column push the cell (a copy)
		//    or the fill value, so each column ends up exactly nrows long.
		for (SSize_t r = 0; r < nrows; r++) {
			SV *restrict rk = *av_fetch(rows_av, r, 0);
			HE *restrict rhe = hv_fetch_ent(in_hv, rk, 0, 0);
			HV *restrict row = (HV*)SvRV(HeVAL(rhe));
			for (SSize_t c = 0; c < ncols; c++) {
				SV *restrict ck = *av_fetch(cols_av, c, 0);
				HE *restrict che = hv_fetch_ent(row, ck, 0, 0);
				SV *restrict src = che ? HeVAL(che) : NULL;
				SV *restrict cell = (src && SvOK(src)) ? newSVsv(src) : (fill ? newSVsv(fill) : newSV(0));
				HE *restrict colhe = hv_fetch_ent(out_hv, ck, 0, 0);
				av_push((AV*)SvRV(HeVAL(colhe)), cell);
			}
		}
		// 6. optional row-names column: the sorted labels under the requested name.
		if (rn_sv) {
			if (hv_exists_ent(out_hv, rn_sv, 0)) croak("hoh2hoa: row.names column '%s' collides with an existing column", SvPV_nolen(rn_sv));
			AV *restrict rn_av = newAV();
			for (SSize_t r = 0; r < nrows; r++) av_push(rn_av, newSVsv(*av_fetch(rows_av, r, 0)));
			(void)hv_store_ent(out_hv, rn_sv, newRV_noinc((SV*)rn_av), 0);
		}
		// 7. tidy up the scratch structures (the result keeps its own copies).
		SvREFCNT_dec((SV*)rows_av);
		SvREFCNT_dec((SV*)cols_av);
		SvREFCNT_dec((SV*)seen);
		RETVAL = newRV_noinc((SV*)out_hv);
	}
	OUTPUT:
		RETVAL

void filter(df, pred)
	SV *df
	SV *pred
PPCODE:
{
	if (!df || !SvROK(df))
		croak("filter: first argument must be a HASH or ARRAY reference (a data frame)");
	bool is_code = (pred && SvROK(pred) && SvTYPE(SvRV(pred)) == SVt_PVCV);
	if (!is_code && (!pred || !SvROK(pred) || SvTYPE(SvRV(pred)) != SVt_PVHV))
		croak("filter: second argument must be a CODE ref or a predicate built with col()");
	SV *restrict ref = SvRV(df);
	SV *restrict result;
	if (SvTYPE(ref) == SVt_PVAV) {
		// ----- Array of Hashes: keep matching row hashrefs (shared, not copied) -----
		AV *restrict in = (AV*)ref;
		AV *restrict out = newAV();
		SSize_t n = av_len(in) + 1, i;
		filt_ctx ctx; ctx.is_aoh = 1; ctx.data_hv = NULL; ctx.idx = 0;
		for (i = 0; i < n; i++) {
			SV **restrict rp = av_fetch(in, i, 0);
			if (!rp || !*rp || !SvROK(*rp) || SvTYPE(SvRV(*rp)) != SVt_PVHV) {
				SvREFCNT_dec((SV*)out);
				croak("filter: array data frame must hold HASH references; element %ld is not one", (long)i);
			}
			bool keep;
			if (is_code) keep = filt_call(aTHX_ pred, *rp);
			else { ctx.row_hv = (HV*)SvRV(*rp); keep = filt_eval(aTHX_ pred, &ctx); }
			if (keep) av_push(out, SvREFCNT_inc_simple_NN(*rp));
		}
		result = newRV_noinc((SV*)out);
	} else if (SvTYPE(ref) == SVt_PVHV) {
		// ----- Hash of Arrays: keep matching row indices across every column -----
		HV *restrict in = (HV*)ref;
		I32 ncols = hv_iterinit(in);
		if (ncols <= 0) {
			result = newRV_noinc((SV*)newHV());
		} else {
			char   **restrict names = (char**)safemalloc(ncols * sizeof(char*));
			STRLEN  *restrict nlens = (STRLEN*)safemalloc(ncols * sizeof(STRLEN));
			AV     **restrict inav  = (AV**)safemalloc(ncols * sizeof(AV*));
			AV     **restrict outav = (AV**)safemalloc(ncols * sizeof(AV*));
			HV *restrict out = newHV();
			SSize_t maxrows = 0, i;
			I32 c = 0, cc;
			HE *restrict e;
			while ((e = hv_iternext(in)) && c < ncols) {
				STRLEN klen;
				char *restrict k = HePV(e, klen);
				SV *restrict v = HeVAL(e);
				if (!v || !SvROK(v) || SvTYPE(SvRV(v)) != SVt_PVAV) {
					safefree(names); safefree(nlens); safefree(inav); safefree(outav);
					SvREFCNT_dec((SV*)out);
					croak("filter: hash data frame must hold ARRAY references (a hash of arrays); column '%s' is not one", k);
				}
				AV *restrict a = (AV*)SvRV(v);
				SSize_t len = av_len(a) + 1;
				if (len > maxrows) maxrows = len;
				names[c] = k; nlens[c] = klen; inav[c] = a;
				outav[c] = newAV();
				hv_store(out, k, klen, newRV_noinc((SV*)outav[c]), 0);
				c++;
			}
			filt_ctx ctx; ctx.is_aoh = 0; ctx.row_hv = NULL; ctx.data_hv = in;
			for (i = 0; i < maxrows; i++) {

LikeR.xs  view on Meta::CPAN

				hv_store_ent(expected_hv, *col_key_sv, newSVnv(E), 0);
				stat += ((O - E) * (O - E)) / E;
			}
			expected_ref = newRV_noinc((SV*)expected_hv);
		}
		df = c - 1;
	}

	// Memory Cleanup for Matrices/Arrays
	if (obs_matrix) {
		for (unsigned int i = 0; i < r; i++) {
			safefree(obs_matrix[i]);
		}
		safefree(obs_matrix);
	}
	if (obs_array) safefree(obs_array);
	if (row_keys) SvREFCNT_dec(row_keys);
	if (col_keys) SvREFCNT_dec(col_keys);

	NV p_val = get_p_value(stat, df);

	// 3. Build the top-level results Hash (mimicking R's htest structure)
	HV*restrict results = newHV();

	HV*restrict statistic_hv = newHV();
	hv_store(statistic_hv, "X-squared", 9, newSVnv(stat), 0);
	hv_store(results, "statistic", 9, newRV_noinc((SV*)statistic_hv), 0);

	HV*restrict parameter_hv = newHV();
	hv_store(parameter_hv, "df", 2, newSViv(df), 0);
	hv_store(results, "parameter", 9, newRV_noinc((SV*)parameter_hv), 0);

	hv_store(results, "p.value", 7, newSVnv(p_val), 0);
	hv_store(results, "expected", 8, expected_ref, 0);
	hv_store(results, "observed", 8, SvREFCNT_inc(data_ref), 0);

	if (input_type == SVt_PVAV) {
		hv_store(results, "data.name", 9, newSVpv("Perl ArrayRef", 0), 0);
	} else {
		hv_store(results, "data.name", 9, newSVpv("Perl HashRef", 0), 0);
	}

	if (is_2d) {
		if (yates) {
			hv_store(results, "method", 6, newSVpv("Pearson's Chi-squared test with Yates' continuity correction", 0), 0);
		} else {
			hv_store(results, "method", 6, newSVpv("Pearson's Chi-squared test", 0), 0);
		}
	} else {
		hv_store(results, "method", 6, newSVpv("Chi-squared test for given probabilities", 0), 0);
	}

	RETVAL = newRV_noinc((SV*)results);
}
OUTPUT:
	RETVAL

PROTOTYPES: ENABLE

void write_table(...)
PPCODE:
{
	SV *restrict data_sv = NULL;
	SV *restrict file_sv = NULL;
	unsigned int arg_idx = 0;
	// Mimic the Perl shift logic
	if (arg_idx < items && SvROK(ST(arg_idx))) {
		int type = SvTYPE(SvRV(ST(arg_idx)));
		if (type == SVt_PVHV || type == SVt_PVAV) {
			data_sv = ST(arg_idx);
			arg_idx++;
		}
	}
// Only consume a positional file argument if it is a plain string that is
// NOT one of the named option keys. Otherwise write_table(data=>..., file=>...)
// would grab the literal string "data" as the filename.
	if (arg_idx < items) {
		SV *restrict cand = ST(arg_idx);
		if (SvOK(cand) && !SvROK(cand)) {
			const char *restrict k = SvPV_nolen(cand);
			if (!(strEQ(k, "data") || strEQ(k, "file") || strEQ(k, "col.names") ||
				  strEQ(k, "row.names") || strEQ(k, "sep") || strEQ(k, "delim") ||
				  strEQ(k, "undef.val"))) {
				file_sv = cand;
				arg_idx++;
			}
		}
	}
	const char *restrict sep = ",";
	bool explicit_sep = 0; // Track if delimiter was manually specified
	// CHANGED: default undef cells to a true empty value ("") instead of NULL.
	// With print_string_row emitting zero-length fields bare (no quotes), an
	// undef cell now prints as nothing at all: a,,c -- not a,'',c or a,"",c.
	// 'undef.val' => 'NA' (etc.) still overrides this.
	const char *restrict undef_val = "";
	SV *restrict row_names_sv = sv_2mortal(newSViv(1));
	SV *restrict col_names_sv = NULL;
	// Read the remaining Hash-style arguments
	for (; arg_idx < items; arg_idx += 2) {
		if (arg_idx + 1 >= items) croak("write_table: Odd number of arguments passed");
		const char *restrict key = SvPV_nolen(ST(arg_idx));
		SV *restrict val = ST(arg_idx + 1);
		if (strEQ(key, "data")) data_sv = val;
		else if (strEQ(key, "col.names")) col_names_sv = val;
		else if (strEQ(key, "file")) file_sv = val;
		else if (strEQ(key, "row.names")) row_names_sv = val;
		// Check for either "sep" or "delim" and mark as explicitly provided
		else if (strEQ(key, "sep") || strEQ(key, "delim")) {
			sep = SvPV_nolen(val);
			explicit_sep = 1;
		}
		// FIX: 'undef.val' => undef used to call SvPV_nolen(&PL_sv_undef)
		// (warning + empty string by accident); make it explicit.
		else if (strEQ(key, "undef.val")) undef_val = SvOK(val) ? SvPV_nolen(val) : "";
		else croak("write_table: Unknown arguments passed: %s", key);
	}
	if (!data_sv || !SvROK(data_sv)) {
		croak("write_table: 'data' must be a HASH or ARRAY reference\n");
	}
	SV *restrict data_ref = SvRV(data_sv);
	if (SvTYPE(data_ref) != SVt_PVHV && SvTYPE(data_ref) != SVt_PVAV) {

LikeR.xs  view on Meta::CPAN

	  else
		  do_exact = SvTRUE(exact_sv) ? 1 : 0;

	  if (do_exact) {
		  statistic = S_stat;
		  p_value   = spearman_exact_pvalue(S_stat, n, alternative);
	  } else {
		  NV r = estimate;
		  /* NOTE: R silently ignores continuity correction for Spearman.
		   * The adjustment below is non-standard; a warning is emitted
		   * so callers are not silently misled. */
		  if (continuity) {
			  warn("cor_test: continuity correction is not defined for Spearman in R and is ignored here");
		  }
		  /* BUG FIX: guard divide-by-zero when |r| == 1 exactly */
		  NV denom_t = 1.0 - r * r;
		  if (denom_t <= 0.0)
			  statistic = (r > 0.0) ? INFINITY : -INFINITY;
		  else
			  statistic = r * sqrt((NV)(n - 2) / denom_t);
		  p_value = get_t_pvalue(statistic, (NV)(n - 2), alternative);
	  }
	  Safefree(rank_x);
	  Safefree(rank_y);

	} else {
	  Safefree(x);
	  Safefree(y);
	  croak("Unknown method '%s': must be 'pearson', 'kendall', or 'spearman'", method);
	}

	Safefree(x);
	Safefree(y);

	rhv = newHV();
	hv_stores(rhv, "estimate",    newSVnv(estimate));
	hv_stores(rhv, "p.value",     newSVnv(p_value));
	hv_stores(rhv, "statistic",   newSVnv(statistic));
	hv_stores(rhv, "method",      newSVpv(method, 0));
	hv_stores(rhv, "alternative", newSVpv(alternative, 0));
	if (is_pearson) {
	  hv_stores(rhv, "parameter", newSVnv(df));
	  AV *restrict ci_av = newAV();
	  av_push(ci_av, newSVnv(ci_lower));
	  av_push(ci_av, newSVnv(ci_upper));
	  hv_stores(rhv, "conf.int", newRV_noinc((SV*)ci_av));
	}

	RETVAL = newRV_noinc((SV*)rhv);
}
OUTPUT:
	RETVAL

void shapiro_test(data)
	SV *data
PREINIT:
	AV *restrict av;
	HV *restrict ret_hash;
	size_t n_raw, n = 0;
	NV *restrict x, w = 0.0, p_val = 0.0, mean = 0.0, ssq = 0.0;
PPCODE:
	if (!SvROK(data) || SvTYPE(SvRV(data)) != SVt_PVAV) {
	  croak("Expected an array reference");
	}

	av = (AV *)SvRV(data);
	n_raw = av_len(av) + 1;

	Newx(x, n_raw, NV);

	// Extract variables and calculate mean (skipping undefined/NaN values)
	for (size_t i = 0; i < n_raw; i++) {
	  SV **restrict elem = av_fetch(av, i, 0);
	  if (elem && SvOK(*elem)) {
		   NV val = SvNV(*elem);
		   if (!isnan(val)) {
			   x[n] = val;
			   mean += val;
			   n++;
		   }
	  }
	}

	if (n < 3 || n > 5000) {
	  Safefree(x);
	  croak("Sample size must be between 3 and 5000 (R's limit)");
	}

	mean /= n;
	// Calculate Sum of Squares
	for (size_t i = 0; i < n; i++) {
	  ssq += (x[i] - mean) * (x[i] - mean);
	}
	if (ssq == 0.0) {
	  Safefree(x);
	  croak("Data is perfectly constant; cannot compute Shapiro-Wilk test");
	}
	qsort(x, n, sizeof(NV), compare_doubles);
	// --- Core AS R94 Algorithm: Weights and Statistic W
	if (n == 3) {
	  NV a_val = 0.7071067811865475; // sqrt(1/2)
	  NV b_val = a_val * (x[2] - x[0]);
	  w = (b_val * b_val) / ssq;
	  if (w < 0.75) w = 0.75; 
	  // Exact P-value for n=3
	  p_val = 1.90985931710274 * (asin(sqrt(w)) - 1.04719755119660);
	} else {
		NV *restrict m, *restrict a;
		NV sum_m2 = 0.0, b_val = 0.0;
		Newx(m, n, NV);
		Newx(a, n, NV);
		for (size_t i = 0; i < n; i++) {
			m[i] = inverse_normal_cdf((i + 1.0 - 0.375) / (n + 0.25));
			sum_m2 += m[i] * m[i];
		}
		NV u = 1.0 / sqrt((NV)n);
		NV a_n = -2.706056*pow(u,5) + 4.434685*pow(u,4) - 2.071190*pow(u,3) - 0.147981*pow(u,2) + 0.221157*u + m[n-1]/sqrt(sum_m2);
		a[n-1] = a_n;
		a[0]   = -a_n;
		if (n == 4 || n == 5) {
			NV eps = (sum_m2 - 2.0 * m[n-1]*m[n-1]) / (1.0 - 2.0 * a_n*a_n);

LikeR.xs  view on Meta::CPAN

				 q = (1.0 - gamma) * x[j] + gamma * x[j + 1];
			}
			// --- Format hash key with Epsilon guarding ---
			char key[32];
			double pct = (double)(p * 100.0); // Safe to cast to double just for formatting
			double pct_rounded = floor(pct + 0.5); // C89 safe rounding
			// Use 1e-9 epsilon check instead of strict integer equality
			if (fabs(pct - pct_rounded) < 1e-9) {
				 snprintf(key, sizeof(key), "%.0f%%", pct_rounded);
			} else {
				 snprintf(key, sizeof(key), "%.1f%%", pct);
			}
			
			hv_store(res_hv, key, strlen(key), newSVnv(q), 0);
		}
		Safefree(x); Safefree(probs);
		RETVAL = newRV_noinc((SV*)res_hv);
	}
	OUTPUT:
	  RETVAL

NV mean(...)
	PROTOTYPE: @
	INIT:
	  NV total = 0;
	  size_t count = 0;
	CODE:
		for (size_t i = 0; i < items; i++) {
			SV* restrict arg = ST(i);
			if (SvROK(arg) && SvTYPE(SvRV(arg)) == SVt_PVAV) {
				AV* restrict av = (AV*)SvRV(arg);
				size_t len = av_len(av) + 1;
				for (size_t j = 0; j < len; j++) {
					 SV** restrict tv = av_fetch(av, j, 0);
					 if (tv && SvOK(*tv)) {
						 total += SvNV(*tv);
						 count++;
					 } else {
						 croak("mean: undefined value at array ref index %zu (argument %zu)", j, i);
					 }
				}
			} else if (SvOK(arg)) {
				 total += SvNV(arg);
				 count++;
			} else {
				 croak("mean: undefined value at argument index %zu", i);
			}
		}
		if (count == 0) croak("mean needs >= 1 element");
		RETVAL = total / count;
	OUTPUT:
	  RETVAL

void mode(...)
	PROTOTYPE: @
	PREINIT:
	HV *restrict counts;
	HV *restrict originals;
	size_t max_count = 0, arg_count = 0;
	HE *restrict he;
	PPCODE:
	/* counts:    string(value) -> occurrence count */
	/* originals: string(value) -> SV* first-seen original */
	counts    = (HV *)sv_2mortal((SV *)newHV());
	originals = (HV *)sv_2mortal((SV *)newHV());

	for (size_t i = 0; i < items; i++) {
		SV *restrict arg = ST(i);
		if (SvROK(arg) && SvTYPE(SvRV(arg)) == SVt_PVAV) {
			AV *restrict av = (AV *)SvRV(arg);
			size_t len = av_len(av) + 1;
			for (size_t j = 0; j < len; j++) {
				SV **restrict tv = av_fetch(av, j, 0);
				if (tv && SvOK(*tv)) {
					STRLEN klen;
					const char *restrict key = SvPV(*tv, klen);
					SV **restrict slot = hv_fetch(counts, key, klen, 1);
					if (!slot) croak("mode: internal hash error");
					size_t cnt = SvOK(*slot) ? SvIV(*slot) + 1 : 1;
					sv_setiv(*slot, cnt);
					if (cnt > max_count) max_count = cnt;
					if (cnt == 1)
						 hv_store(originals, key, klen, newSVsv(*tv), 0);
					arg_count++;
				} else {
					croak("mode: undefined value at array ref index %zu (argument %zu)", j, i);
				}
			}
		} else if (SvOK(arg)) {
			STRLEN klen;
			const char *restrict key = SvPV(arg, klen);
			SV **restrict slot = hv_fetch(counts, key, klen, 1);
			if (!slot) croak("mode: internal hash error");
			size_t cnt = SvOK(*slot) ? SvIV(*slot) + 1 : 1;
			sv_setiv(*slot, cnt);
			if (cnt > max_count) max_count = cnt;
			if (cnt == 1)
			  hv_store(originals, key, klen, newSVsv(arg), 0);
			arg_count++;
		} else {
			croak("mode: undefined value at argument index %zu", i);
		}
	}

	if (arg_count == 0)
		croak("mode needs >= 1 element");

	hv_iterinit(counts);
	while ((he = hv_iternext(counts))) {
		if (SvIV(hv_iterval(counts, he)) == max_count) {
			STRLEN klen;
			const char *restrict key = HePV(he, klen);
			SV **restrict orig = hv_fetch(originals, key, klen, 0);
			mXPUSHs(orig ? newSVsv(*orig) : newSVpvn(key, klen));
		}
	}

NV sum(...)
	PROTOTYPE: @
	INIT:
		NV total = 0;

LikeR.xs  view on Meta::CPAN

		}
		p_val = get_t_pvalue(t_stat, df, alternative);
		NV alpha = 1.0 - conf_level, t_crit, ci_lower, ci_upper;
		if (strcmp(alternative, "less") == 0) {
			t_crit   = qt_tail(df, alpha);
			ci_lower = -INFINITY;
			ci_upper = cint_est + t_crit * std_err;
		} else if (strcmp(alternative, "greater") == 0) {
			t_crit   = qt_tail(df, alpha);
			ci_lower = cint_est - t_crit * std_err;
			ci_upper = INFINITY;
		} else {
			t_crit   = qt_tail(df, alpha / 2.0);
			ci_lower = cint_est - t_crit * std_err;
			ci_upper = cint_est + t_crit * std_err;
		}
		AV*restrict conf_int = newAV();
		av_push(conf_int, newSVnv(ci_lower));
		av_push(conf_int, newSVnv(ci_upper));
		hv_store(results, "statistic", 9, newSVnv(t_stat), 0);
		hv_store(results, "df",        2, newSVnv(df),     0);
		hv_store(results, "p_value",   7, newSVnv(p_val),  0);
		hv_store(results, "conf_int",  8, newRV_noinc((SV*)conf_int), 0);
		RETVAL = newRV_noinc((SV*)results);
	}
	OUTPUT:
		RETVAL

void p_adjust(SV* p_sv, const char* method = "holm")
	INIT:
		if (!SvROK(p_sv) || SvTYPE(SvRV(p_sv)) != SVt_PVAV) {
			croak("p_adjust: first argument must be an ARRAY reference of p-values");
		}
		AV *restrict p_av = (AV*)SvRV(p_sv);
		size_t n = av_len(p_av) + 1;
		// Handle empty input
		if (n == 0) {
			XSRETURN_EMPTY;
		}
		// Normalize method string
		char meth[64];
		strncpy(meth, method, 63); meth[63] = '\0';
		for(unsigned short int i = 0; meth[i]; i++) meth[i] = tolower(meth[i]);
		// Resolve aliases
		if (strstr(meth, "benjamini") && strstr(meth, "hochberg")) strcpy(meth, "bh");
		if (strstr(meth, "benjamini") && strstr(meth, "yekutieli")) strcpy(meth, "by");
		if (strcmp(meth, "fdr") == 0) strcpy(meth, "bh");
		// Allocate C memory
		PVal *restrict arr;
		NV *restrict adj;
		Newx(arr, n, PVal);
		Newx(adj, n, NV);

		for (size_t i = 0; i < n; i++) {
			SV**restrict tv = av_fetch(p_av, i, 0);
			arr[i].p = (tv && SvOK(*tv)) ? SvNV(*tv) : 1.0;
			arr[i].orig_idx = i;
		}
		// Sort ascending (Stable sort using original index)
		qsort(arr, n, sizeof(PVal), cmp_pval);
	PPCODE:
		if (strcmp(meth, "bonferroni") == 0) {
			for (size_t i = 0; i < n; i++) {
				NV v = arr[i].p * n;
				adj[arr[i].orig_idx] = (v < 1.0) ? v : 1.0;
			}
		} else if (strcmp(meth, "holm") == 0) {
			NV cummax = 0.0;
			for (size_t i = 0; i < n; i++) {
				 NV v = arr[i].p * (n - i);
				 if (v > cummax) cummax = v;
				 adj[arr[i].orig_idx] = (cummax < 1.0) ? cummax : 1.0;
			}
		} else if (strcmp(meth, "hochberg") == 0) {
			NV cummin = 1.0;
			for (ssize_t i = n - 1; i >= 0; i--) {
				 NV v = arr[i].p * (n - i);
				 if (v < cummin) cummin = v;
				 adj[arr[i].orig_idx] = (cummin < 1.0) ? cummin : 1.0;
			}
		} else if (strcmp(meth, "bh") == 0) {
			NV cummin = 1.0;
			for (ssize_t i = n - 1; i >= 0; i--) {
				NV v = arr[i].p * n / (i + 1.0);
				if (v < cummin) cummin = v;
				adj[arr[i].orig_idx] = (cummin < 1.0) ? cummin : 1.0;
			}
		} else if (strcmp(meth, "by") == 0) {
			NV q = 0.0;
			for (size_t i = 1; i <= n; i++) q += 1.0 / i;
			NV cummin = 1.0;
			for (ssize_t i = n - 1; i >= 0; i--) {
				NV v = arr[i].p * n / (i + 1.0) * q;
				if (v < cummin) cummin = v;
				adj[arr[i].orig_idx] = (cummin < 1.0) ? cummin : 1.0;
			}
		} else if (strcmp(meth, "hommel") == 0) {
			NV *restrict pa, *restrict q_arr;
			Newx(pa, n, NV);
			Newx(q_arr, n, NV);
			// Initial: min(n * p[i] / (i + 1))
			NV min_val = n * arr[0].p;
			for (size_t i = 1; i < n; i++) {
				NV temp = (n * arr[i].p) / (i + 1.0);
				if (temp < min_val) {
				   min_val = temp;
				}
			}
			// pa <- q <- rep(min, n)
			for (size_t i = 0; i < n; i++) {
				 pa[i] = min_val;
				 q_arr[i] = min_val;
			}
			for (size_t j = n - 1; j >= 2; j--) {
				 ssize_t n_mj = n - j;       // Max index for 'ij'. Length is n_mj + 1
				 ssize_t i2_len = j - 1;     // Length of 'i2
				 // Calculate q1 = min(j * p[i2] / (2:j))
				 NV q1 = (j * arr[n_mj + 1].p) / 2.0;
				 for (size_t k = 1; k < i2_len; k++) {
					 NV temp_q1 = (j * arr[n_mj + 1 + k].p) / (2.0 + k);
					 if (temp_q1 < q1) {

LikeR.xs  view on Meta::CPAN

			symmetric = 1;
		}
		if (nrows < 2)
			croak("cor: need at least 2 observations (got %lu)", nrows);
		// -- build cache for symmetric case: compute upper triangle, store results, mirror to lower triangle
		AV*restrict result_av = newAV();
		av_extend(result_av, ncols_x - 1);
		// Allocate per-row AVs up front so we can fill them in order
		AV **restrict rows_out;
		Newx(rows_out, ncols_x, AV*);
		for (size_t i = 0; i < ncols_x; i++) {
			rows_out[i] = newAV();
			av_extend(rows_out[i], ncols_y - 1);
		}
		if (symmetric) {
		/* Upper triangle + diagonal, then mirror. r_cache[i][j] (j >= i) holds the computed value. */
			NV **restrict r_cache;
			Newx(r_cache, ncols_x, NV*);
			for (size_t i = 0; i < ncols_x; i++)
				 Newx(r_cache[i], ncols_x, NV);

			for (size_t i = 0; i < ncols_x; i++) {
				 r_cache[i][i] = 1.0; // diagonal
				 for (size_t j = i + 1; j < ncols_x; j++) {
					 NV r = compute_cor(col_x[i], col_x[j], nrows, method);
					 r_cache[i][j] = r;
					 r_cache[j][i] = r; // symmetry
				 }
			}
			// fill output AoA from cache
			for (size_t i = 0; i < ncols_x; i++)
				 for (size_t j = 0; j < ncols_x; j++)
					 av_store(rows_out[i], j, newSVnv(r_cache[i][j]));

			for (size_t i = 0; i < ncols_x; i++) Safefree(r_cache[i]);
			Safefree(r_cache); r_cache = NULL;
		} else {
			// cross-correlation: every (i,j) pair is independent
			for (size_t i = 0; i < ncols_x; i++)
				for (size_t j = 0; j < ncols_y; j++)
					av_store(rows_out[i], j, newSVnv(compute_cor(col_x[i], col_y[j], nrows, method)));
		}
		// push row AVs into result
		for (size_t i = 0; i < ncols_x; i++)
			av_store(result_av, i, newRV_noinc((SV*)rows_out[i]));
		Safefree(rows_out); rows_out = NULL;
		// -- free column arrays -------------------------------------
		for (size_t j = 0; j < ncols_x; j++) Safefree(col_x[j]);
		Safefree(col_x); col_x = NULL;
		if (!symmetric) {
			for (size_t j = 0; j < ncols_y; j++) Safefree(col_y[j]);
			Safefree(col_y);
		}
		RETVAL = newRV_noinc((SV*)result_av);
	}
	OUTPUT:
		RETVAL

void scale(...)
	PROTOTYPE: @
	PPCODE:
	{
		bool do_center_mean = TRUE, do_scale_sd = TRUE;
		NV center_val = 0.0, scale_val = 1.0;
		size_t data_items = items;
		// 1. Parse Options Hash (if it exists as the last argument)
		if (items > 0) {
			SV*restrict last_arg = ST(items - 1);
			if (SvROK(last_arg) && SvTYPE(SvRV(last_arg)) == SVt_PVHV) {
				data_items = items - 1; // Exclude hash from data processing
				HV*restrict opt_hv = (HV*)SvRV(last_arg);
				// --- Parse 'center'
				SV**restrict center_sv = hv_fetch(opt_hv, "center", 6, 0);
				if (center_sv) {
				  SV*restrict val_sv = *center_sv;
				  if (!SvOK(val_sv)) {
						do_center_mean = FALSE; center_val = 0.0;
				  } else {
						char *restrict str = SvPV_nolen(val_sv);
						/* Trap booleans and empty strings before numeric checks */
						if (strcasecmp(str, "mean") == 0 || strcasecmp(str, "true") == 0 || strcmp(str, "1") == 0) {
							 do_center_mean = TRUE;
						} else if (strcasecmp(str, "none") == 0 || strcasecmp(str, "false") == 0 || strcmp(str, "0") == 0 || strcmp(str, "") == 0) {
							 do_center_mean = FALSE; center_val = 0.0;
						} else if (looks_like_number(val_sv)) {
							 do_center_mean = FALSE; center_val = SvNV(val_sv);
						} else if (SvTRUE(val_sv)) {
							 do_center_mean = TRUE;
						} else {
							 do_center_mean = FALSE; center_val = 0.0;
						}
				  }
				}
				// --- Parse 'scale' ---
				SV**restrict scale_sv = hv_fetch(opt_hv, "scale", 5, 0);
				if (scale_sv) {
				  SV*restrict val_sv = *scale_sv;
				  if (!SvOK(val_sv)) {
						do_scale_sd = FALSE; scale_val = 1.0;
				  } else {
						char *restrict str = SvPV_nolen(val_sv);
						if (strcasecmp(str, "sd") == 0 || strcasecmp(str, "true") == 0 || strcmp(str, "1") == 0) {
							 do_scale_sd = TRUE;
						} else if (strcasecmp(str, "none") == 0 || strcasecmp(str, "false") == 0 || strcmp(str, "0") == 0 || strcmp(str, "") == 0) {
							 do_scale_sd = FALSE; scale_val = 1.0;
						} else if (looks_like_number(val_sv)) {
							 do_scale_sd = FALSE; scale_val = SvNV(val_sv);
							 if (scale_val == 0.0) scale_val = 1.0; /* Prevent Division By Zero */
						} else if (SvTRUE(val_sv)) {
							 do_scale_sd = TRUE;
						} else {
							 do_scale_sd = FALSE; scale_val = 1.0;
						}
				  }
				}
			}
		}
		// 2. Detect if the input is a Matrix (Array of Arrays)
		bool is_matrix = FALSE;
		if (data_items == 1) {
			SV*restrict first_arg = ST(0);

LikeR.xs  view on Meta::CPAN

	}
	for (j = 0; j < p; j++) {
	  hv_store(coef_hv, exp_terms[j], strlen(exp_terms[j]), newSVnv(beta[j]), 0);
	  av_push(terms_av, newSVpv(exp_terms[j], 0));
	  HV *restrict row_hv = newHV();
	  if (aliased[j]) {
		   hv_store(row_hv, "Estimate",   8,  newSVpv("NaN", 0), 0);
		   hv_store(row_hv, "Std. Error", 10, newSVpv("NaN", 0), 0);
		   hv_store(row_hv, "t value",    7,  newSVpv("NaN", 0), 0);
		   hv_store(row_hv, "Pr(>|t|)",   8,  newSVpv("NaN", 0), 0);
	  } else {
		   NV se    = sqrt(rse_sq * XtX[j * p + j]);
		   NV t_val = (se > 0.0) ? (beta[j] / se) : (INFINITY * (beta[j] >= 0.0 ? 1.0 : -1.0));
		   NV p_val = get_t_pvalue(t_val, df_res, "two.sided");
		   hv_store(row_hv, "Estimate",   8,  newSVnv(beta[j]), 0);
		   hv_store(row_hv, "Std. Error", 10, newSVnv(se),      0);
		   hv_store(row_hv, "t value",    7,  newSVnv(t_val),   0);
		   hv_store(row_hv, "Pr(>|t|)",   8,  newSVnv(p_val),   0);
	  }
	  hv_store(summary_hv, exp_terms[j], strlen(exp_terms[j]), newRV_noinc((SV*)row_hv), 0);
	}
	hv_store(res_hv, "coefficients",  12, newRV_noinc((SV*)coef_hv),   0);
	hv_store(res_hv, "fitted.values", 13, newRV_noinc((SV*)fitted_hv), 0);
	hv_store(res_hv, "residuals",      9, newRV_noinc((SV*)resid_hv),  0);
	hv_store(res_hv, "df.residual",   11, newSVuv(df_res),             0);
	hv_store(res_hv, "rank",           4, newSVuv(final_rank),         0);
	hv_store(res_hv, "rss",            3, newSVnv(rss),                0);
	hv_store(res_hv, "summary",        7, newRV_noinc((SV*)summary_hv),0);
	hv_store(res_hv, "terms",          5, newRV_noinc((SV*)terms_av),  0);
	hv_store(res_hv, "r.squared",      9, newSVnv(r_squared),          0);
	hv_store(res_hv, "adj.r.squared", 13, newSVnv(adj_r_squared),      0);
	if (!isnan(f_stat)) {
	  AV *fstat_av = newAV();
	  av_push(fstat_av, newSVnv(f_stat));
	  av_push(fstat_av, newSViv(numdf));
	  av_push(fstat_av, newSViv(df_res));
	  hv_store(res_hv, "fstatistic", 10, newRV_noinc((SV*)fstat_av), 0);
	  hv_store(res_hv, "f.pvalue",    8, newSVnv(f_pvalue),          0);
	}
	// Deep Cleanup
	for (i = 0; i < num_terms; i++) Safefree(terms[i]); Safefree(terms);
	for (i = 0; i < num_uniq; i++) Safefree(uniq_terms[i]); Safefree(uniq_terms);
	for (j = 0; j < p_exp; j++) {
	  Safefree(exp_terms[j]);
	  if (is_dummy[j]) { Safefree(dummy_base[j]); Safefree(dummy_level[j]); }
	}
	Safefree(exp_terms); Safefree(is_dummy); Safefree(dummy_base); Safefree(dummy_level);
	Safefree(X); Safefree(Y); Safefree(XtX); Safefree(XtY);
	Safefree(beta); Safefree(aliased);
	if (row_hashes) Safefree(row_hashes);

	RETVAL = newRV_noinc((SV*)res_hv);
}
OUTPUT:
	RETVAL

void seq(from, to, by = 1.0)
	NV from
	NV to
	NV by
PPCODE:
	{
		//Handle the zero 'by' case
		if (by == 0.0) {
			if (from == to) {
				 EXTEND(SP, 1);
				 mPUSHn(from);
				 XSRETURN(1);
			} else {
				 croak("invalid 'by' argument: cannot be zero when from != to");
			}
		}
		// Check for wrong direction / infinite loop
		if ((from < to && by < 0.0) || (from > to && by > 0.0)) {
			croak("wrong sign in 'by' argument");
		}
		/* * Calculate number of elements. 
		* R uses a small epsilon (like 1e-10) to avoid dropping the last 
		* element due to floating point inaccuracies.
		*/
		NV n_elements_d = (to - from) / by;
		if (n_elements_d < 0.0) n_elements_d = 0.0;
		size_t n_elements = (n_elements_d + 1e-10) + 1;
		// Pre-extend the stack to avoid reallocating inside the loop
		EXTEND(SP, n_elements);
		for (size_t i = 0; i < n_elements; i++) {
			mPUSHn(from + i * by);
		}
		XSRETURN(n_elements);
	}

SV* rnorm(...)
	CODE:
	{
	  // Auto-seed the PRNG if the Perl script hasn't done so yet
	  AUTO_SEED_PRNG();
	  size_t n = 0;
	  NV mean = 0.0, sd = 1.0;
	  int arg_start = 0;
	  // Check if the first argument is a simple integer (rnorm(33))
	  if (items > 0 && SvIOK(ST(0)) && (items == 1 || items % 2 != 0)) {
		   n = (unsigned int)SvUV(ST(0));
		   arg_start = 1; // Start parsing named arguments from the second element
	  }

	  // --- Parse remaining named arguments from the flat stack ---
	  if ((items - arg_start) % 2 != 0) {
		   croak("Usage: rnorm(n), rnorm(n => 10, mean => 0, sd => 1), or rnorm(33, mean => 0)");
	  }

	  for (int i = arg_start; i < items; i += 2) {
		   const char* restrict key = SvPV_nolen(ST(i));
		   SV* restrict val = ST(i + 1);

		   if      (strEQ(key, "n"))    n    = (unsigned int)SvUV(val);
		   else if (strEQ(key, "mean")) mean = SvNV(val);
		   else if (strEQ(key, "sd"))   sd   = SvNV(val);
		   else croak("rnorm: unknown argument '%s'", key);
	  }
	  if (sd < 0.0) croak("rnorm: standard deviation must be non-negative");
	  AV *restrict result_av = newAV();



( run in 1.000 second using v1.01-cache-2.11-cpan-5511b514fd6 )