Ancient
view release on metacpan or search on metacpan
xs/nvec/nvec.c view on Meta::CPAN
#endif
return ptr;
}
static void vec_free_aligned(void *ptr) {
if (!ptr) return;
#if defined(_WIN32) || defined(_WIN64)
_aligned_free(ptr);
#elif defined(__APPLE__) || _POSIX_C_SOURCE >= 200112L
free(ptr);
#else
void *raw = ((void**)ptr)[-1];
free(raw);
#endif
}
/* ============================================
Vec Lifecycle
============================================ */
static Vec* vec_create(pTHX_ IV capacity) {
Vec *v;
/* Validate capacity to prevent overflow */
if (capacity < 0) {
croak("vec: negative capacity %ld", (long)capacity);
}
if (capacity > VEC_MAX_SIZE / (IV)sizeof(double)) {
croak("vec: capacity %ld exceeds maximum safe size", (long)capacity);
}
Newxz(v, 1, Vec);
if (capacity > 0) {
v->data = (double*)vec_alloc_aligned((size_t)capacity * sizeof(double));
if (!v->data) {
Safefree(v);
croak("vec: failed to allocate %ld elements", (long)capacity);
}
}
v->len = 0;
v->capacity = capacity;
v->flags = 0;
return v;
}
static void vec_destroy(pTHX_ Vec *v) {
if (v) {
if (v->data) vec_free_aligned(v->data);
Safefree(v);
}
}
static int vec_mg_free(pTHX_ SV *sv, MAGIC *mg) {
Vec *v = (Vec*)mg->mg_ptr;
vec_destroy(aTHX_ v);
return 0;
}
static int vec_mg_dup(pTHX_ MAGIC *mg, CLONE_PARAMS *param) {
PERL_UNUSED_ARG(param);
/* For threads: would need to deep-copy */
return 0;
}
/* ============================================
Vec Object Wrapping
============================================ */
static SV* vec_wrap(pTHX_ Vec *v) {
SV *rv;
SV *sv = newSV(0);
sv_magicext(sv, NULL, PERL_MAGIC_ext, &vec_vtbl, (char*)v, 0);
rv = newRV_noinc(sv);
sv_bless(rv, gv_stashpv("nvec", GV_ADD));
return rv;
}
static Vec* vec_from_sv(pTHX_ SV *sv) {
MAGIC *mg;
if (!sv_isobject(sv) || !sv_derived_from(sv, "nvec")) {
croak("Not a vec object");
}
sv = SvRV(sv);
mg = mg_findext(sv, PERL_MAGIC_ext, &vec_vtbl);
if (!mg) {
croak("Corrupted vec object");
}
return (Vec*)mg->mg_ptr;
}
/* ============================================
SIMD Implementations - ADD
============================================ */
static void vec_add_impl(double *c, const double *a, const double *b, IV n) {
IV i = 0;
#if VEC_USE_NEON
for (; i + 2 <= n; i += 2) {
float64x2_t va = vld1q_f64(a + i);
float64x2_t vb = vld1q_f64(b + i);
vst1q_f64(c + i, vaddq_f64(va, vb));
}
#elif VEC_USE_AVX || VEC_USE_AVX2
for (; i + 4 <= n; i += 4) {
__m256d va = _mm256_load_pd(a + i);
__m256d vb = _mm256_load_pd(b + i);
_mm256_store_pd(c + i, _mm256_add_pd(va, vb));
}
#elif VEC_USE_SSE2
for (; i + 2 <= n; i += 2) {
__m128d va = _mm_load_pd(a + i);
__m128d vb = _mm_load_pd(b + i);
_mm_store_pd(c + i, _mm_add_pd(va, vb));
}
#endif
( run in 0.595 second using v1.01-cache-2.11-cpan-f889d44b568 )