C-sparse
view release on metacpan or search on metacpan
src/sparse-0.4.4/perl/t/target-arm/neon_helper.c view on Meta::CPAN
x = 0x7fff; \
SET_QC(); \
} else { \
x = -x; \
}} while (0)
uint32_t HELPER(neon_qneg_s16)(CPUARMState *env, uint32_t x)
{
neon_s16 vec;
NEON_UNPACK(neon_s16, vec, x);
DO_QNEG16(vec.v1);
DO_QNEG16(vec.v2);
NEON_PACK(neon_s16, x, vec);
return x;
}
#undef DO_QNEG16
uint32_t HELPER(neon_qabs_s32)(CPUARMState *env, uint32_t x)
{
if (x == SIGNBIT) {
SET_QC();
x = ~SIGNBIT;
} else if ((int32_t)x < 0) {
x = -x;
}
return x;
}
uint32_t HELPER(neon_qneg_s32)(CPUARMState *env, uint32_t x)
{
if (x == SIGNBIT) {
SET_QC();
x = ~SIGNBIT;
} else {
x = -x;
}
return x;
}
/* NEON Float helpers. */
uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b, void *fpstp)
{
float_status *fpst = fpstp;
return float32_val(float32_min(make_float32(a), make_float32(b), fpst));
}
uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b, void *fpstp)
{
float_status *fpst = fpstp;
return float32_val(float32_max(make_float32(a), make_float32(b), fpst));
}
uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
{
float_status *fpst = fpstp;
float32 f0 = make_float32(a);
float32 f1 = make_float32(b);
return float32_val(float32_abs(float32_sub(f0, f1, fpst)));
}
/* Floating point comparisons produce an integer result.
* Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
* Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires.
*/
uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b, void *fpstp)
{
float_status *fpst = fpstp;
return -float32_eq_quiet(make_float32(a), make_float32(b), fpst);
}
uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b, void *fpstp)
{
float_status *fpst = fpstp;
return -float32_le(make_float32(b), make_float32(a), fpst);
}
uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b, void *fpstp)
{
float_status *fpst = fpstp;
return -float32_lt(make_float32(b), make_float32(a), fpst);
}
uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b, void *fpstp)
{
float_status *fpst = fpstp;
float32 f0 = float32_abs(make_float32(a));
float32 f1 = float32_abs(make_float32(b));
return -float32_le(f1, f0, fpst);
}
uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, void *fpstp)
{
float_status *fpst = fpstp;
float32 f0 = float32_abs(make_float32(a));
float32 f1 = float32_abs(make_float32(b));
return -float32_lt(f1, f0, fpst);
}
#define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1))
void HELPER(neon_qunzip8)(CPUARMState *env, uint32_t rd, uint32_t rm)
{
uint64_t zm0 = float64_val(env->vfp.regs[rm]);
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
uint64_t zd0 = float64_val(env->vfp.regs[rd]);
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8)
| (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24)
| (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40)
| (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56);
uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8)
| (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24)
| (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40)
| (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56);
uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8)
| (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24)
| (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40)
| (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56);
uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8)
| (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24)
| (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40)
| (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56);
( run in 1.312 second using v1.01-cache-2.11-cpan-5a3173703d6 )