C-sparse

 view release on metacpan or  search on metacpan

src/sparse-0.4.4/perl/t/target-arm/neon_helper.c  view on Meta::CPAN

        x = 0x7fff; \
        SET_QC(); \
    } else { \
        x = -x; \
    }} while (0)
uint32_t HELPER(neon_qneg_s16)(CPUARMState *env, uint32_t x)
{
    neon_s16 vec;
    NEON_UNPACK(neon_s16, vec, x);
    DO_QNEG16(vec.v1);
    DO_QNEG16(vec.v2);
    NEON_PACK(neon_s16, x, vec);
    return x;
}
#undef DO_QNEG16

uint32_t HELPER(neon_qabs_s32)(CPUARMState *env, uint32_t x)
{
    if (x == SIGNBIT) {
        SET_QC();
        x = ~SIGNBIT;
    } else if ((int32_t)x < 0) {
        x = -x;
    }
    return x;
}

uint32_t HELPER(neon_qneg_s32)(CPUARMState *env, uint32_t x)
{
    if (x == SIGNBIT) {
        SET_QC();
        x = ~SIGNBIT;
    } else {
        x = -x;
    }
    return x;
}

/* NEON Float helpers.  */
uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b, void *fpstp)
{
    float_status *fpst = fpstp;
    return float32_val(float32_min(make_float32(a), make_float32(b), fpst));
}

uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b, void *fpstp)
{
    float_status *fpst = fpstp;
    return float32_val(float32_max(make_float32(a), make_float32(b), fpst));
}

uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
{
    float_status *fpst = fpstp;
    float32 f0 = make_float32(a);
    float32 f1 = make_float32(b);
    return float32_val(float32_abs(float32_sub(f0, f1, fpst)));
}

/* Floating point comparisons produce an integer result.
 * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
 * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires.
 */
uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b, void *fpstp)
{
    float_status *fpst = fpstp;
    return -float32_eq_quiet(make_float32(a), make_float32(b), fpst);
}

uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b, void *fpstp)
{
    float_status *fpst = fpstp;
    return -float32_le(make_float32(b), make_float32(a), fpst);
}

uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b, void *fpstp)
{
    float_status *fpst = fpstp;
    return -float32_lt(make_float32(b), make_float32(a), fpst);
}

uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b, void *fpstp)
{
    float_status *fpst = fpstp;
    float32 f0 = float32_abs(make_float32(a));
    float32 f1 = float32_abs(make_float32(b));
    return -float32_le(f1, f0, fpst);
}

uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, void *fpstp)
{
    float_status *fpst = fpstp;
    float32 f0 = float32_abs(make_float32(a));
    float32 f1 = float32_abs(make_float32(b));
    return -float32_lt(f1, f0, fpst);
}

#define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1))

void HELPER(neon_qunzip8)(CPUARMState *env, uint32_t rd, uint32_t rm)
{
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
    uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8)
        | (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24)
        | (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40)
        | (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56);
    uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8)
        | (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24)
        | (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40)
        | (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56);
    uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8)
        | (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24)
        | (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40)
        | (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56);
    uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8)
        | (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24)
        | (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40)
        | (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56);



( run in 1.312 second using v1.01-cache-2.11-cpan-5a3173703d6 )