Ancient

 view release on metacpan or  search on metacpan

xs/nvec/nvec.c  view on Meta::CPAN

#endif
    return ptr;
}

static void vec_free_aligned(void *ptr) {
    if (!ptr) return;
#if defined(_WIN32) || defined(_WIN64)
    _aligned_free(ptr);
#elif defined(__APPLE__) || _POSIX_C_SOURCE >= 200112L
    free(ptr);
#else
    void *raw = ((void**)ptr)[-1];
    free(raw);
#endif
}

/* ============================================
   Vec Lifecycle
   ============================================ */

static Vec* vec_create(pTHX_ IV capacity) {
    Vec *v;
    
    /* Validate capacity to prevent overflow */
    if (capacity < 0) {
        croak("vec: negative capacity %ld", (long)capacity);
    }
    if (capacity > VEC_MAX_SIZE / (IV)sizeof(double)) {
        croak("vec: capacity %ld exceeds maximum safe size", (long)capacity);
    }
    
    Newxz(v, 1, Vec);
    if (capacity > 0) {
        v->data = (double*)vec_alloc_aligned((size_t)capacity * sizeof(double));
        if (!v->data) {
            Safefree(v);
            croak("vec: failed to allocate %ld elements", (long)capacity);
        }
    }
    v->len = 0;
    v->capacity = capacity;
    v->flags = 0;
    return v;
}

static void vec_destroy(pTHX_ Vec *v) {
    if (v) {
        if (v->data) vec_free_aligned(v->data);
        Safefree(v);
    }
}

static int vec_mg_free(pTHX_ SV *sv, MAGIC *mg) {
    Vec *v = (Vec*)mg->mg_ptr;
    vec_destroy(aTHX_ v);
    return 0;
}

static int vec_mg_dup(pTHX_ MAGIC *mg, CLONE_PARAMS *param) {
    PERL_UNUSED_ARG(param);
    /* For threads: would need to deep-copy */
    return 0;
}

/* ============================================
   Vec Object Wrapping
   ============================================ */

static SV* vec_wrap(pTHX_ Vec *v) {
    SV *rv;
    SV *sv = newSV(0);
    
    sv_magicext(sv, NULL, PERL_MAGIC_ext, &vec_vtbl, (char*)v, 0);
    rv = newRV_noinc(sv);
    sv_bless(rv, gv_stashpv("nvec", GV_ADD));
    
    return rv;
}

static Vec* vec_from_sv(pTHX_ SV *sv) {
    MAGIC *mg;
    
    if (!sv_isobject(sv) || !sv_derived_from(sv, "nvec")) {
        croak("Not a vec object");
    }
    
    sv = SvRV(sv);
    mg = mg_findext(sv, PERL_MAGIC_ext, &vec_vtbl);
    if (!mg) {
        croak("Corrupted vec object");
    }
    
    return (Vec*)mg->mg_ptr;
}

/* ============================================
   SIMD Implementations - ADD
   ============================================ */

static void vec_add_impl(double *c, const double *a, const double *b, IV n) {
    IV i = 0;
    
#if VEC_USE_NEON
    for (; i + 2 <= n; i += 2) {
        float64x2_t va = vld1q_f64(a + i);
        float64x2_t vb = vld1q_f64(b + i);
        vst1q_f64(c + i, vaddq_f64(va, vb));
    }
#elif VEC_USE_AVX || VEC_USE_AVX2
    for (; i + 4 <= n; i += 4) {
        __m256d va = _mm256_load_pd(a + i);
        __m256d vb = _mm256_load_pd(b + i);
        _mm256_store_pd(c + i, _mm256_add_pd(va, vb));
    }
#elif VEC_USE_SSE2
    for (; i + 2 <= n; i += 2) {
        __m128d va = _mm_load_pd(a + i);
        __m128d vb = _mm_load_pd(b + i);
        _mm_store_pd(c + i, _mm_add_pd(va, vb));
    }
#endif



( run in 0.595 second using v1.01-cache-2.11-cpan-f889d44b568 )