NaN results from the CPAN

C-sparse
                }
            }

            /* Load the initial operands.  */
            if (op == 15) {
                switch (rn) {
                case 16:
                case 17:
                    /* Integer source */
                    gen_mov_F0_vreg(0, rm);
                    break;
                case 8:
                case 9:
                    /* Compare */
                    gen_mov_F0_vreg(dp, rd);
                    gen_mov_F1_vreg(dp, rm);
                    break;
                case 10:
                case 11:
                    /* Compare with zero */
                    gen_mov_F0_vreg(dp, rd);
                    gen_vfp_F1_ld0(dp);
                    break;
                case 20:
                case 21:
                case 22:
                case 23:
                case 28:
                case 29:
                case 30:
                case 31:
                    /* Source and destination the same.  */
                    gen_mov_F0_vreg(dp, rd);
                    break;
                case 4:
                case 5:
                case 6:
                case 7:
                    /* VCVTB, VCVTT: only present with the halfprec extension,
                     * UNPREDICTABLE if bit 8 is set (we choose to UNDEF)
                     */
                    if (dp || !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
                        return 1;
                    }
                    /* Otherwise fall through */
                default:
                    /* One source operand.  */
                    gen_mov_F0_vreg(dp, rm);
                    break;
                }
            } else {
                /* Two source operands.  */
                gen_mov_F0_vreg(dp, rn);
                gen_mov_F1_vreg(dp, rm);
            }

            for (;;) {
                /* Perform the calculation.  */
                switch (op) {
                case 0: /* VMLA: fd + (fn * fm) */
                    /* Note that order of inputs to the add matters for NaNs */
                    gen_vfp_F1_mul(dp);
                    gen_mov_F0_vreg(dp, rd);
                    gen_vfp_add(dp);
                    break;
                case 1: /* VMLS: fd + -(fn * fm) */
                    gen_vfp_mul(dp);
                    gen_vfp_F1_neg(dp);
                    gen_mov_F0_vreg(dp, rd);
                    gen_vfp_add(dp);
                    break;
                case 2: /* VNMLS: -fd + (fn * fm) */
                    /* Note that it isn't valid to replace (-A + B) with (B - A)
                     * or similar plausible looking simplifications
                     * because this will give wrong results for NaNs.
                     */
                    gen_vfp_F1_mul(dp);
                    gen_mov_F0_vreg(dp, rd);
                    gen_vfp_neg(dp);
                    gen_vfp_add(dp);
                    break;
                case 3: /* VNMLA: -fd + -(fn * fm) */
                    gen_vfp_mul(dp);
                    gen_vfp_F1_neg(dp);
                    gen_mov_F0_vreg(dp, rd);
                    gen_vfp_neg(dp);
                    gen_vfp_add(dp);
                    break;
                case 4: /* mul: fn * fm */
                    gen_vfp_mul(dp);
                    break;
                case 5: /* nmul: -(fn * fm) */
                    gen_vfp_mul(dp);
                    gen_vfp_neg(dp);
                    break;
                case 6: /* add: fn + fm */
                    gen_vfp_add(dp);
                    break;
                case 7: /* sub: fn - fm */
                    gen_vfp_sub(dp);
                    break;
                case 8: /* div: fn / fm */
                    gen_vfp_div(dp);
                    break;
                case 10: /* VFNMA : fd = muladd(-fd,  fn, fm) */
                case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
                case 12: /* VFMA  : fd = muladd( fd,  fn, fm) */
                case 13: /* VFMS  : fd = muladd( fd, -fn, fm) */
                    /* These are fused multiply-add, and must be done as one
                     * floating point operation with no rounding between the
                     * multiplication and addition steps.
                     * NB that doing the negations here as separate steps is
                     * correct : an input NaN should come out with its sign bit
                     * flipped if it is a negated-input.
                     */
                    if (!arm_feature(env, ARM_FEATURE_VFP4)) {
                        return 1;
                    }
                    if (dp) {
                        TCGv_ptr fpst;
                        TCGv_i64 frd;
                        if (op & 1) {
                            /* VFNMS, VFMS */
                            gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
                        }
                        frd = tcg_temp_new_i64();
                        tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
                        if (op & 2) {
                            /* VFNMA, VFNMS */
                            gen_helper_vfp_negd(frd, frd);
                        }
                        fpst = get_fpstatus_ptr(0);
                        gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
                                               cpu_F1d, frd, fpst);
                        tcg_temp_free_ptr(fpst);
                        tcg_temp_free_i64(frd);
                    } else {
                        TCGv_ptr fpst;
                        TCGv_i32 frd;
                        if (op & 1) {
                            /* VFNMS, VFMS */
                            gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
                        }
                        frd = tcg_temp_new_i32();
                        tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
                        if (op & 2) {
                            gen_helper_vfp_negs(frd, frd);
                        }
                        fpst = get_fpstatus_ptr(0);
                        gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
                                               cpu_F1s, frd, fpst);
                        tcg_temp_free_ptr(fpst);
                        tcg_temp_free_i32(frd);
                    }
                    break;
                case 14: /* fconst */
                    if (!arm_feature(env, ARM_FEATURE_VFP3))
                      return 1;

                    n = (insn << 12) & 0x80000000;
                    i = ((insn >> 12) & 0x70) | (insn & 0xf);
                    if (dp) {
                        if (i & 0x40)
                            i |= 0x3f80;
                        else
                            i |= 0x4000;
                        n |= i << 16;
                        tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
                    } else {
                        if (i & 0x40)
                            i |= 0x780;
                        else
                            i |= 0x800;
( run in 1.343 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )