diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index e747bc8..3ec867a 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -171,20 +171,14 @@
 struct CPUMIPSState {
     TCState active_tc;
 
-    /* temporary hack for FP globals */
-#ifndef USE_HOST_FLOAT_REGS
-    fpr_t ft0;
-    fpr_t ft1;
-    fpr_t ft2;
-#endif
     CPUMIPSMVPContext *mvp;
     CPUMIPSTLBContext *tlb;
     CPUMIPSFPUContext *fpu;
     uint32_t current_tc;
 
     uint32_t SEGBITS;
-    target_ulong SEGMask;
     uint32_t PABITS;
+    target_ulong SEGMask;
     target_ulong PAMask;
 
     int32_t CP0_Index;
diff --git a/target-mips/exec.h b/target-mips/exec.h
index a7014ee..8600e79 100644
--- a/target-mips/exec.h
+++ b/target-mips/exec.h
@@ -10,29 +10,6 @@
 
 register struct CPUMIPSState *env asm(AREG0);
 
-#if defined (USE_HOST_FLOAT_REGS)
-#error "implement me."
-#else
-#define FDT0 (env->ft0.fd)
-#define FDT1 (env->ft1.fd)
-#define FDT2 (env->ft2.fd)
-#define FST0 (env->ft0.fs[FP_ENDIAN_IDX])
-#define FST1 (env->ft1.fs[FP_ENDIAN_IDX])
-#define FST2 (env->ft2.fs[FP_ENDIAN_IDX])
-#define FSTH0 (env->ft0.fs[!FP_ENDIAN_IDX])
-#define FSTH1 (env->ft1.fs[!FP_ENDIAN_IDX])
-#define FSTH2 (env->ft2.fs[!FP_ENDIAN_IDX])
-#define DT0 (env->ft0.d)
-#define DT1 (env->ft1.d)
-#define DT2 (env->ft2.d)
-#define WT0 (env->ft0.w[FP_ENDIAN_IDX])
-#define WT1 (env->ft1.w[FP_ENDIAN_IDX])
-#define WT2 (env->ft2.w[FP_ENDIAN_IDX])
-#define WTH0 (env->ft0.w[!FP_ENDIAN_IDX])
-#define WTH1 (env->ft1.w[!FP_ENDIAN_IDX])
-#define WTH2 (env->ft2.w[!FP_ENDIAN_IDX])
-#endif
-
 #include "cpu.h"
 #include "exec-all.h"
 
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 7dfc259..9d4ca82 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -159,67 +159,85 @@
 DEF_HELPER(target_ulong, do_cfc1, (uint32_t reg))
 DEF_HELPER(void, do_ctc1, (target_ulong t0, uint32_t reg))
 
-DEF_HELPER(void, do_float_cvtd_s, (void))
-DEF_HELPER(void, do_float_cvtd_w, (void))
-DEF_HELPER(void, do_float_cvtd_l, (void))
-DEF_HELPER(void, do_float_cvtl_d, (void))
-DEF_HELPER(void, do_float_cvtl_s, (void))
-DEF_HELPER(void, do_float_cvtps_pw, (void))
-DEF_HELPER(void, do_float_cvtpw_ps, (void))
-DEF_HELPER(void, do_float_cvts_d, (void))
-DEF_HELPER(void, do_float_cvts_w, (void))
-DEF_HELPER(void, do_float_cvts_l, (void))
-DEF_HELPER(void, do_float_cvts_pl, (void))
-DEF_HELPER(void, do_float_cvts_pu, (void))
-DEF_HELPER(void, do_float_cvtw_s, (void))
-DEF_HELPER(void, do_float_cvtw_d, (void))
+DEF_HELPER(uint64_t, do_float_cvtd_s, (uint32_t fst0))
+DEF_HELPER(uint64_t, do_float_cvtd_w, (uint32_t wt0))
+DEF_HELPER(uint64_t, do_float_cvtd_l, (uint64_t dt0))
+DEF_HELPER(uint64_t, do_float_cvtl_d, (uint64_t fd0))
+DEF_HELPER(uint64_t, do_float_cvtl_s, (uint32_t fst0))
+DEF_HELPER(uint64_t, do_float_cvtps_pw, (uint64_t dt0))
+DEF_HELPER(uint64_t, do_float_cvtpw_ps, (uint64_t fdt0))
+DEF_HELPER(uint32_t, do_float_cvts_d, (uint64_t fd0))
+DEF_HELPER(uint32_t, do_float_cvts_w, (uint32_t wt0))
+DEF_HELPER(uint32_t, do_float_cvts_l, (uint64_t dt0))
+DEF_HELPER(uint32_t, do_float_cvts_pl, (uint32_t wt0))
+DEF_HELPER(uint32_t, do_float_cvts_pu, (uint32_t wth0))
+DEF_HELPER(uint32_t, do_float_cvtw_s, (uint32_t fst0))
+DEF_HELPER(uint32_t, do_float_cvtw_d, (uint64_t fd0))
 
-DEF_HELPER(void, do_float_addr_ps, (void))
-DEF_HELPER(void, do_float_mulr_ps, (void))
+DEF_HELPER(uint64_t, do_float_addr_ps, (uint64_t fdt0, uint64_t fdt1))
+DEF_HELPER(uint64_t, do_float_mulr_ps, (uint64_t fdt0, uint64_t fdt1))
 
-#define FOP_PROTO(op)                            \
-DEF_HELPER(void, do_float_ ## op ## _s, (void))  \
-DEF_HELPER(void, do_float_ ## op ## _d, (void))
+#define FOP_PROTO(op)                                          \
+DEF_HELPER(uint64_t, do_float_ ## op ## l_s, (uint32_t fst0))  \
+DEF_HELPER(uint64_t, do_float_ ## op ## l_d, (uint64_t fdt0))  \
+DEF_HELPER(uint32_t, do_float_ ## op ## w_s, (uint32_t fst0))  \
+DEF_HELPER(uint32_t, do_float_ ## op ## w_d, (uint64_t fdt0))
+FOP_PROTO(round)
+FOP_PROTO(trunc)
+FOP_PROTO(ceil)
+FOP_PROTO(floor)
+#undef FOP_PROTO
+
+#define FOP_PROTO(op)                                          \
+DEF_HELPER(uint32_t, do_float_ ## op ## _s, (uint32_t fst0))   \
+DEF_HELPER(uint64_t, do_float_ ## op ## _d, (uint64_t fdt0))
 FOP_PROTO(sqrt)
-FOP_PROTO(roundl)
-FOP_PROTO(roundw)
-FOP_PROTO(truncl)
-FOP_PROTO(truncw)
-FOP_PROTO(ceill)
-FOP_PROTO(ceilw)
-FOP_PROTO(floorl)
-FOP_PROTO(floorw)
 FOP_PROTO(rsqrt)
 FOP_PROTO(recip)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                            \
-DEF_HELPER(void, do_float_ ## op ## _s, (void))  \
-DEF_HELPER(void, do_float_ ## op ## _d, (void))  \
-DEF_HELPER(void, do_float_ ## op ## _ps, (void))
+#define FOP_PROTO(op)                                          \
+DEF_HELPER(uint32_t, do_float_ ## op ## _s, (uint32_t fst0))   \
+DEF_HELPER(uint64_t, do_float_ ## op ## _d, (uint64_t fdt0))   \
+DEF_HELPER(uint64_t, do_float_ ## op ## _ps, (uint64_t fdt0))
+FOP_PROTO(abs)
+FOP_PROTO(chs)
+FOP_PROTO(recip1)
+FOP_PROTO(rsqrt1)
+#undef FOP_PROTO
+
+#define FOP_PROTO(op)                                                       \
+DEF_HELPER(uint32_t, do_float_ ## op ## _s, (uint32_t fst0, uint32_t fst2)) \
+DEF_HELPER(uint64_t, do_float_ ## op ## _d, (uint64_t fdt0, uint64_t fdt2)) \
+DEF_HELPER(uint64_t, do_float_ ## op ## _ps, (uint64_t fdt0, uint64_t fdt2))
 FOP_PROTO(add)
 FOP_PROTO(sub)
 FOP_PROTO(mul)
 FOP_PROTO(div)
-FOP_PROTO(abs)
-FOP_PROTO(chs)
+FOP_PROTO(recip2)
+FOP_PROTO(rsqrt2)
+#undef FOP_PROTO
+
+#define FOP_PROTO(op)                                                       \
+DEF_HELPER(uint32_t, do_float_ ## op ## _s, (uint32_t fst0, uint32_t fst1,  \
+                                             uint32_t fst2))                \
+DEF_HELPER(uint64_t, do_float_ ## op ## _d, (uint64_t fdt0, uint64_t fdt1,  \
+                                             uint64_t fdt2))                \
+DEF_HELPER(uint64_t, do_float_ ## op ## _ps, (uint64_t fdt0, uint64_t fdt1, \
+                                              uint64_t fdt2))
 FOP_PROTO(muladd)
 FOP_PROTO(mulsub)
 FOP_PROTO(nmuladd)
 FOP_PROTO(nmulsub)
-FOP_PROTO(recip1)
-FOP_PROTO(recip2)
-FOP_PROTO(rsqrt1)
-FOP_PROTO(rsqrt2)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                            \
-DEF_HELPER(void, do_cmp_d_ ## op, (long cc))     \
-DEF_HELPER(void, do_cmpabs_d_ ## op, (long cc))  \
-DEF_HELPER(void, do_cmp_s_ ## op, (long cc))     \
-DEF_HELPER(void, do_cmpabs_s_ ## op, (long cc))  \
-DEF_HELPER(void, do_cmp_ps_ ## op, (long cc))    \
-DEF_HELPER(void, do_cmpabs_ps_ ## op, (long cc))
+#define FOP_PROTO(op)                                                        \
+DEF_HELPER(void, do_cmp_d_ ## op, (uint64_t fdt0, uint64_t fdt1, int cc))    \
+DEF_HELPER(void, do_cmpabs_d_ ## op, (uint64_t fdt0, uint64_t fdt1, int cc)) \
+DEF_HELPER(void, do_cmp_s_ ## op, (uint32_t fst0, uint32_t fst1, int cc))    \
+DEF_HELPER(void, do_cmpabs_s_ ## op, (uint32_t fst0, uint32_t fst1, int cc)) \
+DEF_HELPER(void, do_cmp_ps_ ## op, (uint64_t fdt0, uint64_t fdt1, int cc))   \
+DEF_HELPER(void, do_cmpabs_ps_ ## op, (uint64_t fdt0, uint64_t fdt1, int cc))
 FOP_PROTO(f)
 FOP_PROTO(un)
 FOP_PROTO(eq)
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 3098563..008fb2c 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -2121,380 +2121,555 @@
    "d" suffix, 32bit integer "w", 64bit integer "l", paired single "ps",
    paired single lower "pl", paired single upper "pu".  */
 
-#define FLOAT_OP(name, p) void do_float_##name##_##p(void)
-
 /* unary operations, modifying fp status  */
-#define FLOAT_UNOP(name)  \
-FLOAT_OP(name, d)         \
-{                         \
-    FDT2 = float64_ ## name(FDT0, &env->fpu->fp_status); \
-}                         \
-FLOAT_OP(name, s)         \
-{                         \
-    FST2 = float32_ ## name(FST0, &env->fpu->fp_status); \
-}
-FLOAT_UNOP(sqrt)
-#undef FLOAT_UNOP
-
-FLOAT_OP(cvtd, s)
+uint64_t do_float_sqrt_d(uint64_t fdt0)
 {
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FDT2 = float32_to_float64(FST0, &env->fpu->fp_status);
-    update_fcr31();
-}
-FLOAT_OP(cvtd, w)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FDT2 = int32_to_float64(WT0, &env->fpu->fp_status);
-    update_fcr31();
-}
-FLOAT_OP(cvtd, l)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FDT2 = int64_to_float64(DT0, &env->fpu->fp_status);
-    update_fcr31();
-}
-FLOAT_OP(cvtl, d)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    DT2 = float64_to_int64(FDT0, &env->fpu->fp_status);
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        DT2 = FLOAT_SNAN64;
-}
-FLOAT_OP(cvtl, s)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    DT2 = float32_to_int64(FST0, &env->fpu->fp_status);
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        DT2 = FLOAT_SNAN64;
+    return float64_sqrt(fdt0, &env->fpu->fp_status);
 }
 
-FLOAT_OP(cvtps, pw)
+uint32_t do_float_sqrt_s(uint32_t fst0)
 {
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = int32_to_float32(WT0, &env->fpu->fp_status);
-    FSTH2 = int32_to_float32(WTH0, &env->fpu->fp_status);
-    update_fcr31();
-}
-FLOAT_OP(cvtpw, ps)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    WT2 = float32_to_int32(FST0, &env->fpu->fp_status);
-    WTH2 = float32_to_int32(FSTH0, &env->fpu->fp_status);
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        WT2 = FLOAT_SNAN32;
-}
-FLOAT_OP(cvts, d)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float64_to_float32(FDT0, &env->fpu->fp_status);
-    update_fcr31();
-}
-FLOAT_OP(cvts, w)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = int32_to_float32(WT0, &env->fpu->fp_status);
-    update_fcr31();
-}
-FLOAT_OP(cvts, l)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = int64_to_float32(DT0, &env->fpu->fp_status);
-    update_fcr31();
-}
-FLOAT_OP(cvts, pl)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    WT2 = WT0;
-    update_fcr31();
-}
-FLOAT_OP(cvts, pu)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    WT2 = WTH0;
-    update_fcr31();
-}
-FLOAT_OP(cvtw, s)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    WT2 = float32_to_int32(FST0, &env->fpu->fp_status);
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        WT2 = FLOAT_SNAN32;
-}
-FLOAT_OP(cvtw, d)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    WT2 = float64_to_int32(FDT0, &env->fpu->fp_status);
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        WT2 = FLOAT_SNAN32;
+    return float32_sqrt(fst0, &env->fpu->fp_status);
 }
 
-FLOAT_OP(roundl, d)
+uint64_t do_float_cvtd_s(uint32_t fst0)
 {
+    uint64_t fdt2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fdt2 = float32_to_float64(fst0, &env->fpu->fp_status);
+    update_fcr31();
+    return fdt2;
+}
+
+uint64_t do_float_cvtd_w(uint32_t wt0)
+{
+    uint64_t fdt2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fdt2 = int32_to_float64(wt0, &env->fpu->fp_status);
+    update_fcr31();
+    return fdt2;
+}
+
+uint64_t do_float_cvtd_l(uint64_t dt0)
+{
+    uint64_t fdt2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fdt2 = int64_to_float64(dt0, &env->fpu->fp_status);
+    update_fcr31();
+    return fdt2;
+}
+
+uint64_t do_float_cvtl_d(uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    dt2 = float64_to_int64(fdt0, &env->fpu->fp_status);
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        dt2 = FLOAT_SNAN64;
+    return dt2;
+}
+
+uint64_t do_float_cvtl_s(uint32_t fst0)
+{
+    uint64_t dt2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    dt2 = float32_to_int64(fst0, &env->fpu->fp_status);
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        dt2 = FLOAT_SNAN64;
+    return dt2;
+}
+
+uint64_t do_float_cvtps_pw(uint64_t dt0)
+{
+    uint32_t fst2;
+    uint32_t fsth2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fst2 = int32_to_float32(dt0 & 0XFFFFFFFF, &env->fpu->fp_status);
+    fsth2 = int32_to_float32(dt0 >> 32, &env->fpu->fp_status);
+    update_fcr31();
+    return ((uint64_t)fsth2 << 32) | fst2;
+}
+
+uint64_t do_float_cvtpw_ps(uint64_t fdt0)
+{
+    uint32_t wt2;
+    uint32_t wth2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    wt2 = float32_to_int32(fdt0 & 0XFFFFFFFF, &env->fpu->fp_status);
+    wth2 = float32_to_int32(fdt0 >> 32, &env->fpu->fp_status);
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID)) {
+        wt2 = FLOAT_SNAN32;
+        wth2 = FLOAT_SNAN32;
+    }
+    return ((uint64_t)wth2 << 32) | wt2;
+}
+
+uint32_t do_float_cvts_d(uint64_t fdt0)
+{
+    uint32_t fst2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fst2 = float64_to_float32(fdt0, &env->fpu->fp_status);
+    update_fcr31();
+    return fst2;
+}
+
+uint32_t do_float_cvts_w(uint32_t wt0)
+{
+    uint32_t fst2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fst2 = int32_to_float32(wt0, &env->fpu->fp_status);
+    update_fcr31();
+    return fst2;
+}
+
+uint32_t do_float_cvts_l(uint64_t dt0)
+{
+    uint32_t fst2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fst2 = int64_to_float32(dt0, &env->fpu->fp_status);
+    update_fcr31();
+    return fst2;
+}
+
+uint32_t do_float_cvts_pl(uint32_t wt0)
+{
+    uint32_t wt2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    wt2 = wt0;
+    update_fcr31();
+    return wt2;
+}
+
+uint32_t do_float_cvts_pu(uint32_t wth0)
+{
+    uint32_t wt2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    wt2 = wth0;
+    update_fcr31();
+    return wt2;
+}
+
+uint32_t do_float_cvtw_s(uint32_t fst0)
+{
+    uint32_t wt2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    wt2 = float32_to_int32(fst0, &env->fpu->fp_status);
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        wt2 = FLOAT_SNAN32;
+    return wt2;
+}
+
+uint32_t do_float_cvtw_d(uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    wt2 = float64_to_int32(fdt0, &env->fpu->fp_status);
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        wt2 = FLOAT_SNAN32;
+    return wt2;
+}
+
+uint64_t do_float_roundl_d(uint64_t fdt0)
+{
+    uint64_t dt2;
+
     set_float_rounding_mode(float_round_nearest_even, &env->fpu->fp_status);
-    DT2 = float64_to_int64(FDT0, &env->fpu->fp_status);
+    dt2 = float64_to_int64(fdt0, &env->fpu->fp_status);
     RESTORE_ROUNDING_MODE;
     update_fcr31();
     if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        DT2 = FLOAT_SNAN64;
-}
-FLOAT_OP(roundl, s)
-{
-    set_float_rounding_mode(float_round_nearest_even, &env->fpu->fp_status);
-    DT2 = float32_to_int64(FST0, &env->fpu->fp_status);
-    RESTORE_ROUNDING_MODE;
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        DT2 = FLOAT_SNAN64;
-}
-FLOAT_OP(roundw, d)
-{
-    set_float_rounding_mode(float_round_nearest_even, &env->fpu->fp_status);
-    WT2 = float64_to_int32(FDT0, &env->fpu->fp_status);
-    RESTORE_ROUNDING_MODE;
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        WT2 = FLOAT_SNAN32;
-}
-FLOAT_OP(roundw, s)
-{
-    set_float_rounding_mode(float_round_nearest_even, &env->fpu->fp_status);
-    WT2 = float32_to_int32(FST0, &env->fpu->fp_status);
-    RESTORE_ROUNDING_MODE;
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        WT2 = FLOAT_SNAN32;
+        dt2 = FLOAT_SNAN64;
+    return dt2;
 }
 
-FLOAT_OP(truncl, d)
+uint64_t do_float_roundl_s(uint32_t fst0)
 {
-    DT2 = float64_to_int64_round_to_zero(FDT0, &env->fpu->fp_status);
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_nearest_even, &env->fpu->fp_status);
+    dt2 = float32_to_int64(fst0, &env->fpu->fp_status);
+    RESTORE_ROUNDING_MODE;
     update_fcr31();
     if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        DT2 = FLOAT_SNAN64;
-}
-FLOAT_OP(truncl, s)
-{
-    DT2 = float32_to_int64_round_to_zero(FST0, &env->fpu->fp_status);
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        DT2 = FLOAT_SNAN64;
-}
-FLOAT_OP(truncw, d)
-{
-    WT2 = float64_to_int32_round_to_zero(FDT0, &env->fpu->fp_status);
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        WT2 = FLOAT_SNAN32;
-}
-FLOAT_OP(truncw, s)
-{
-    WT2 = float32_to_int32_round_to_zero(FST0, &env->fpu->fp_status);
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        WT2 = FLOAT_SNAN32;
+        dt2 = FLOAT_SNAN64;
+    return dt2;
 }
 
-FLOAT_OP(ceill, d)
+uint32_t do_float_roundw_d(uint64_t fdt0)
 {
-    set_float_rounding_mode(float_round_up, &env->fpu->fp_status);
-    DT2 = float64_to_int64(FDT0, &env->fpu->fp_status);
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_nearest_even, &env->fpu->fp_status);
+    wt2 = float64_to_int32(fdt0, &env->fpu->fp_status);
     RESTORE_ROUNDING_MODE;
     update_fcr31();
     if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        DT2 = FLOAT_SNAN64;
-}
-FLOAT_OP(ceill, s)
-{
-    set_float_rounding_mode(float_round_up, &env->fpu->fp_status);
-    DT2 = float32_to_int64(FST0, &env->fpu->fp_status);
-    RESTORE_ROUNDING_MODE;
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        DT2 = FLOAT_SNAN64;
-}
-FLOAT_OP(ceilw, d)
-{
-    set_float_rounding_mode(float_round_up, &env->fpu->fp_status);
-    WT2 = float64_to_int32(FDT0, &env->fpu->fp_status);
-    RESTORE_ROUNDING_MODE;
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        WT2 = FLOAT_SNAN32;
-}
-FLOAT_OP(ceilw, s)
-{
-    set_float_rounding_mode(float_round_up, &env->fpu->fp_status);
-    WT2 = float32_to_int32(FST0, &env->fpu->fp_status);
-    RESTORE_ROUNDING_MODE;
-    update_fcr31();
-    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        WT2 = FLOAT_SNAN32;
+        wt2 = FLOAT_SNAN32;
+    return wt2;
 }
 
-FLOAT_OP(floorl, d)
+uint32_t do_float_roundw_s(uint32_t fst0)
 {
-    set_float_rounding_mode(float_round_down, &env->fpu->fp_status);
-    DT2 = float64_to_int64(FDT0, &env->fpu->fp_status);
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_nearest_even, &env->fpu->fp_status);
+    wt2 = float32_to_int32(fst0, &env->fpu->fp_status);
     RESTORE_ROUNDING_MODE;
     update_fcr31();
     if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        DT2 = FLOAT_SNAN64;
+        wt2 = FLOAT_SNAN32;
+    return wt2;
 }
-FLOAT_OP(floorl, s)
+
+uint64_t do_float_truncl_d(uint64_t fdt0)
 {
-    set_float_rounding_mode(float_round_down, &env->fpu->fp_status);
-    DT2 = float32_to_int64(FST0, &env->fpu->fp_status);
-    RESTORE_ROUNDING_MODE;
+    uint64_t dt2;
+
+    dt2 = float64_to_int64_round_to_zero(fdt0, &env->fpu->fp_status);
     update_fcr31();
     if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        DT2 = FLOAT_SNAN64;
+        dt2 = FLOAT_SNAN64;
+    return dt2;
 }
-FLOAT_OP(floorw, d)
+
+uint64_t do_float_truncl_s(uint32_t fst0)
 {
-    set_float_rounding_mode(float_round_down, &env->fpu->fp_status);
-    WT2 = float64_to_int32(FDT0, &env->fpu->fp_status);
-    RESTORE_ROUNDING_MODE;
+    uint64_t dt2;
+
+    dt2 = float32_to_int64_round_to_zero(fst0, &env->fpu->fp_status);
     update_fcr31();
     if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        WT2 = FLOAT_SNAN32;
+        dt2 = FLOAT_SNAN64;
+    return dt2;
 }
-FLOAT_OP(floorw, s)
+
+uint32_t do_float_truncw_d(uint64_t fdt0)
 {
-    set_float_rounding_mode(float_round_down, &env->fpu->fp_status);
-    WT2 = float32_to_int32(FST0, &env->fpu->fp_status);
+    uint32_t wt2;
+
+    wt2 = float64_to_int32_round_to_zero(fdt0, &env->fpu->fp_status);
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        wt2 = FLOAT_SNAN32;
+    return wt2;
+}
+
+uint32_t do_float_truncw_s(uint32_t fst0)
+{
+    uint32_t wt2;
+
+    wt2 = float32_to_int32_round_to_zero(fst0, &env->fpu->fp_status);
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        wt2 = FLOAT_SNAN32;
+    return wt2;
+}
+
+uint64_t do_float_ceill_d(uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_up, &env->fpu->fp_status);
+    dt2 = float64_to_int64(fdt0, &env->fpu->fp_status);
     RESTORE_ROUNDING_MODE;
     update_fcr31();
     if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
-        WT2 = FLOAT_SNAN32;
+        dt2 = FLOAT_SNAN64;
+    return dt2;
+}
+
+uint64_t do_float_ceill_s(uint32_t fst0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_up, &env->fpu->fp_status);
+    dt2 = float32_to_int64(fst0, &env->fpu->fp_status);
+    RESTORE_ROUNDING_MODE;
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        dt2 = FLOAT_SNAN64;
+    return dt2;
+}
+
+uint32_t do_float_ceilw_d(uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_up, &env->fpu->fp_status);
+    wt2 = float64_to_int32(fdt0, &env->fpu->fp_status);
+    RESTORE_ROUNDING_MODE;
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        wt2 = FLOAT_SNAN32;
+    return wt2;
+}
+
+uint32_t do_float_ceilw_s(uint32_t fst0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_up, &env->fpu->fp_status);
+    wt2 = float32_to_int32(fst0, &env->fpu->fp_status);
+    RESTORE_ROUNDING_MODE;
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        wt2 = FLOAT_SNAN32;
+    return wt2;
+}
+
+uint64_t do_float_floorl_d(uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_down, &env->fpu->fp_status);
+    dt2 = float64_to_int64(fdt0, &env->fpu->fp_status);
+    RESTORE_ROUNDING_MODE;
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        dt2 = FLOAT_SNAN64;
+    return dt2;
+}
+
+uint64_t do_float_floorl_s(uint32_t fst0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_down, &env->fpu->fp_status);
+    dt2 = float32_to_int64(fst0, &env->fpu->fp_status);
+    RESTORE_ROUNDING_MODE;
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        dt2 = FLOAT_SNAN64;
+    return dt2;
+}
+
+uint32_t do_float_floorw_d(uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_down, &env->fpu->fp_status);
+    wt2 = float64_to_int32(fdt0, &env->fpu->fp_status);
+    RESTORE_ROUNDING_MODE;
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        wt2 = FLOAT_SNAN32;
+    return wt2;
+}
+
+uint32_t do_float_floorw_s(uint32_t fst0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_down, &env->fpu->fp_status);
+    wt2 = float32_to_int32(fst0, &env->fpu->fp_status);
+    RESTORE_ROUNDING_MODE;
+    update_fcr31();
+    if (GET_FP_CAUSE(env->fpu->fcr31) & (FP_OVERFLOW | FP_INVALID))
+        wt2 = FLOAT_SNAN32;
+    return wt2;
 }
 
 /* unary operations, not modifying fp status  */
-#define FLOAT_UNOP(name)  \
-FLOAT_OP(name, d)         \
-{                         \
-    FDT2 = float64_ ## name(FDT0);   \
-}                         \
-FLOAT_OP(name, s)         \
-{                         \
-    FST2 = float32_ ## name(FST0);   \
-}                         \
-FLOAT_OP(name, ps)        \
-{                         \
-    FST2 = float32_ ## name(FST0);   \
-    FSTH2 = float32_ ## name(FSTH0); \
+#define FLOAT_UNOP(name)                                       \
+uint64_t do_float_ ## name ## _d(uint64_t fdt0)                \
+{                                                              \
+    return float64_ ## name(fdt0);                             \
+}                                                              \
+uint32_t do_float_ ## name ## _s(uint32_t fst0)                \
+{                                                              \
+    return float32_ ## name(fst0);                             \
+}                                                              \
+uint64_t do_float_ ## name ## _ps(uint64_t fdt0)               \
+{                                                              \
+    uint32_t wt0;                                              \
+    uint32_t wth0;                                             \
+                                                               \
+    wt0 = float32_ ## name(fdt0 & 0XFFFFFFFF);                 \
+    wth0 = float32_ ## name(fdt0 >> 32);                       \
+    return ((uint64_t)wth0 << 32) | wt0;                       \
 }
 FLOAT_UNOP(abs)
 FLOAT_UNOP(chs)
 #undef FLOAT_UNOP
 
 /* MIPS specific unary operations */
-FLOAT_OP(recip, d)
+uint64_t do_float_recip_d(uint64_t fdt0)
 {
+    uint64_t fdt2;
+
     set_float_exception_flags(0, &env->fpu->fp_status);
-    FDT2 = float64_div(FLOAT_ONE64, FDT0, &env->fpu->fp_status);
+    fdt2 = float64_div(FLOAT_ONE64, fdt0, &env->fpu->fp_status);
     update_fcr31();
-}
-FLOAT_OP(recip, s)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float32_div(FLOAT_ONE32, FST0, &env->fpu->fp_status);
-    update_fcr31();
+    return fdt2;
 }
 
-FLOAT_OP(rsqrt, d)
+uint32_t do_float_recip_s(uint32_t fst0)
 {
+    uint32_t fst2;
+
     set_float_exception_flags(0, &env->fpu->fp_status);
-    FDT2 = float64_sqrt(FDT0, &env->fpu->fp_status);
-    FDT2 = float64_div(FLOAT_ONE64, FDT2, &env->fpu->fp_status);
+    fst2 = float32_div(FLOAT_ONE32, fst0, &env->fpu->fp_status);
     update_fcr31();
-}
-FLOAT_OP(rsqrt, s)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float32_sqrt(FST0, &env->fpu->fp_status);
-    FST2 = float32_div(FLOAT_ONE32, FST2, &env->fpu->fp_status);
-    update_fcr31();
+    return fst2;
 }
 
-FLOAT_OP(recip1, d)
+uint64_t do_float_rsqrt_d(uint64_t fdt0)
 {
+    uint64_t fdt2;
+
     set_float_exception_flags(0, &env->fpu->fp_status);
-    FDT2 = float64_div(FLOAT_ONE64, FDT0, &env->fpu->fp_status);
+    fdt2 = float64_sqrt(fdt0, &env->fpu->fp_status);
+    fdt2 = float64_div(FLOAT_ONE64, fdt2, &env->fpu->fp_status);
     update_fcr31();
-}
-FLOAT_OP(recip1, s)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float32_div(FLOAT_ONE32, FST0, &env->fpu->fp_status);
-    update_fcr31();
-}
-FLOAT_OP(recip1, ps)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float32_div(FLOAT_ONE32, FST0, &env->fpu->fp_status);
-    FSTH2 = float32_div(FLOAT_ONE32, FSTH0, &env->fpu->fp_status);
-    update_fcr31();
+    return fdt2;
 }
 
-FLOAT_OP(rsqrt1, d)
+uint32_t do_float_rsqrt_s(uint32_t fst0)
 {
+    uint32_t fst2;
+
     set_float_exception_flags(0, &env->fpu->fp_status);
-    FDT2 = float64_sqrt(FDT0, &env->fpu->fp_status);
-    FDT2 = float64_div(FLOAT_ONE64, FDT2, &env->fpu->fp_status);
+    fst2 = float32_sqrt(fst0, &env->fpu->fp_status);
+    fst2 = float32_div(FLOAT_ONE32, fst2, &env->fpu->fp_status);
     update_fcr31();
+    return fst2;
 }
-FLOAT_OP(rsqrt1, s)
+
+uint64_t do_float_recip1_d(uint64_t fdt0)
 {
+    uint64_t fdt2;
+
     set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float32_sqrt(FST0, &env->fpu->fp_status);
-    FST2 = float32_div(FLOAT_ONE32, FST2, &env->fpu->fp_status);
+    fdt2 = float64_div(FLOAT_ONE64, fdt0, &env->fpu->fp_status);
     update_fcr31();
+    return fdt2;
 }
-FLOAT_OP(rsqrt1, ps)
+
+uint32_t do_float_recip1_s(uint32_t fst0)
 {
+    uint32_t fst2;
+
     set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float32_sqrt(FST0, &env->fpu->fp_status);
-    FSTH2 = float32_sqrt(FSTH0, &env->fpu->fp_status);
-    FST2 = float32_div(FLOAT_ONE32, FST2, &env->fpu->fp_status);
-    FSTH2 = float32_div(FLOAT_ONE32, FSTH2, &env->fpu->fp_status);
+    fst2 = float32_div(FLOAT_ONE32, fst0, &env->fpu->fp_status);
     update_fcr31();
+    return fst2;
 }
 
+uint64_t do_float_recip1_ps(uint64_t fdt0)
+{
+    uint32_t fst2;
+    uint32_t fsth2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fst2 = float32_div(FLOAT_ONE32, fdt0 & 0XFFFFFFFF, &env->fpu->fp_status);
+    fsth2 = float32_div(FLOAT_ONE32, fdt0 >> 32, &env->fpu->fp_status);
+    update_fcr31();
+    return ((uint64_t)fsth2 << 32) | fst2;
+}
+
+uint64_t do_float_rsqrt1_d(uint64_t fdt0)
+{
+    uint64_t fdt2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fdt2 = float64_sqrt(fdt0, &env->fpu->fp_status);
+    fdt2 = float64_div(FLOAT_ONE64, fdt2, &env->fpu->fp_status);
+    update_fcr31();
+    return fdt2;
+}
+
+uint32_t do_float_rsqrt1_s(uint32_t fst0)
+{
+    uint32_t fst2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fst2 = float32_sqrt(fst0, &env->fpu->fp_status);
+    fst2 = float32_div(FLOAT_ONE32, fst2, &env->fpu->fp_status);
+    update_fcr31();
+    return fst2;
+}
+
+uint64_t do_float_rsqrt1_ps(uint64_t fdt0)
+{
+    uint32_t fst2;
+    uint32_t fsth2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fst2 = float32_sqrt(fdt0 & 0XFFFFFFFF, &env->fpu->fp_status);
+    fsth2 = float32_sqrt(fdt0 >> 32, &env->fpu->fp_status);
+    fst2 = float32_div(FLOAT_ONE32, fst2, &env->fpu->fp_status);
+    fsth2 = float32_div(FLOAT_ONE32, fsth2, &env->fpu->fp_status);
+    update_fcr31();
+    return ((uint64_t)fsth2 << 32) | fst2;
+}
+
+#define FLOAT_OP(name, p) void do_float_##name##_##p(void)
+
 /* binary operations */
-#define FLOAT_BINOP(name) \
-FLOAT_OP(name, d)         \
-{                         \
+#define FLOAT_BINOP(name)                                          \
+uint64_t do_float_ ## name ## _d(uint64_t fdt0, uint64_t fdt1)     \
+{                                                                  \
+    uint64_t dt2;                                                  \
+                                                                   \
     set_float_exception_flags(0, &env->fpu->fp_status);            \
-    FDT2 = float64_ ## name (FDT0, FDT1, &env->fpu->fp_status);    \
+    dt2 = float64_ ## name (fdt0, fdt1, &env->fpu->fp_status);     \
     update_fcr31();                                                \
     if (GET_FP_CAUSE(env->fpu->fcr31) & FP_INVALID)                \
-        DT2 = FLOAT_QNAN64;                                        \
-}                         \
-FLOAT_OP(name, s)         \
-{                         \
+        dt2 = FLOAT_QNAN64;                                        \
+    return dt2;                                                    \
+}                                                                  \
+                                                                   \
+uint32_t do_float_ ## name ## _s(uint32_t fst0, uint32_t fst1)     \
+{                                                                  \
+    uint32_t wt2;                                                  \
+                                                                   \
     set_float_exception_flags(0, &env->fpu->fp_status);            \
-    FST2 = float32_ ## name (FST0, FST1, &env->fpu->fp_status);    \
+    wt2 = float32_ ## name (fst0, fst1, &env->fpu->fp_status);     \
     update_fcr31();                                                \
     if (GET_FP_CAUSE(env->fpu->fcr31) & FP_INVALID)                \
-        WT2 = FLOAT_QNAN32;                                        \
-}                         \
-FLOAT_OP(name, ps)        \
-{                         \
+        wt2 = FLOAT_QNAN32;                                        \
+    return wt2;                                                    \
+}                                                                  \
+                                                                   \
+uint64_t do_float_ ## name ## _ps(uint64_t fdt0, uint64_t fdt1)    \
+{                                                                  \
+    uint32_t fst0 = fdt0 & 0XFFFFFFFF;                             \
+    uint32_t fsth0 = fdt0 >> 32;                                   \
+    uint32_t fst1 = fdt1 & 0XFFFFFFFF;                             \
+    uint32_t fsth1 = fdt1 >> 32;                                   \
+    uint32_t wt2;                                                  \
+    uint32_t wth2;                                                 \
+                                                                   \
     set_float_exception_flags(0, &env->fpu->fp_status);            \
-    FST2 = float32_ ## name (FST0, FST1, &env->fpu->fp_status);    \
-    FSTH2 = float32_ ## name (FSTH0, FSTH1, &env->fpu->fp_status); \
-    update_fcr31();       \
+    wt2 = float32_ ## name (fst0, fst1, &env->fpu->fp_status);     \
+    wth2 = float32_ ## name (fsth0, fsth1, &env->fpu->fp_status);  \
+    update_fcr31();                                                \
     if (GET_FP_CAUSE(env->fpu->fcr31) & FP_INVALID) {              \
-        WT2 = FLOAT_QNAN32;                                        \
-        WTH2 = FLOAT_QNAN32;                                       \
-    }                     \
+        wt2 = FLOAT_QNAN32;                                        \
+        wth2 = FLOAT_QNAN32;                                       \
+    }                                                              \
+    return ((uint64_t)wth2 << 32) | wt2;                           \
 }
+
 FLOAT_BINOP(add)
 FLOAT_BINOP(sub)
 FLOAT_BINOP(mul)
@@ -2502,146 +2677,210 @@
 #undef FLOAT_BINOP
 
 /* ternary operations */
-#define FLOAT_TERNOP(name1, name2) \
-FLOAT_OP(name1 ## name2, d)        \
-{                                  \
-    FDT0 = float64_ ## name1 (FDT0, FDT1, &env->fpu->fp_status);    \
-    FDT2 = float64_ ## name2 (FDT0, FDT2, &env->fpu->fp_status);    \
-}                                  \
-FLOAT_OP(name1 ## name2, s)        \
-{                                  \
-    FST0 = float32_ ## name1 (FST0, FST1, &env->fpu->fp_status);    \
-    FST2 = float32_ ## name2 (FST0, FST2, &env->fpu->fp_status);    \
-}                                  \
-FLOAT_OP(name1 ## name2, ps)       \
-{                                  \
-    FST0 = float32_ ## name1 (FST0, FST1, &env->fpu->fp_status);    \
-    FSTH0 = float32_ ## name1 (FSTH0, FSTH1, &env->fpu->fp_status); \
-    FST2 = float32_ ## name2 (FST0, FST2, &env->fpu->fp_status);    \
-    FSTH2 = float32_ ## name2 (FSTH0, FSTH2, &env->fpu->fp_status); \
+#define FLOAT_TERNOP(name1, name2)                                        \
+uint64_t do_float_ ## name1 ## name2 ## _d(uint64_t fdt0, uint64_t fdt1,  \
+                                           uint64_t fdt2)                 \
+{                                                                         \
+    fdt0 = float64_ ## name1 (fdt0, fdt1, &env->fpu->fp_status);          \
+    return float64_ ## name2 (fdt0, fdt2, &env->fpu->fp_status);          \
+}                                                                         \
+                                                                          \
+uint32_t do_float_ ## name1 ## name2 ## _s(uint32_t fst0, uint32_t fst1,  \
+                                           uint32_t fst2)                 \
+{                                                                         \
+    fst0 = float32_ ## name1 (fst0, fst1, &env->fpu->fp_status);          \
+    return float32_ ## name2 (fst0, fst2, &env->fpu->fp_status);          \
+}                                                                         \
+                                                                          \
+uint64_t do_float_ ## name1 ## name2 ## _ps(uint64_t fdt0, uint64_t fdt1, \
+                                            uint64_t fdt2)                \
+{                                                                         \
+    uint32_t fst0 = fdt0 & 0XFFFFFFFF;                                    \
+    uint32_t fsth0 = fdt0 >> 32;                                          \
+    uint32_t fst1 = fdt1 & 0XFFFFFFFF;                                    \
+    uint32_t fsth1 = fdt1 >> 32;                                          \
+    uint32_t fst2 = fdt2 & 0XFFFFFFFF;                                    \
+    uint32_t fsth2 = fdt2 >> 32;                                          \
+                                                                          \
+    fst0 = float32_ ## name1 (fst0, fst1, &env->fpu->fp_status);          \
+    fsth0 = float32_ ## name1 (fsth0, fsth1, &env->fpu->fp_status);       \
+    fst2 = float32_ ## name2 (fst0, fst2, &env->fpu->fp_status);          \
+    fsth2 = float32_ ## name2 (fsth0, fsth2, &env->fpu->fp_status);       \
+    return ((uint64_t)fsth2 << 32) | fst2;                                \
 }
+
 FLOAT_TERNOP(mul, add)
 FLOAT_TERNOP(mul, sub)
 #undef FLOAT_TERNOP
 
 /* negated ternary operations */
-#define FLOAT_NTERNOP(name1, name2) \
-FLOAT_OP(n ## name1 ## name2, d)    \
-{                                   \
-    FDT0 = float64_ ## name1 (FDT0, FDT1, &env->fpu->fp_status);    \
-    FDT2 = float64_ ## name2 (FDT0, FDT2, &env->fpu->fp_status);    \
-    FDT2 = float64_chs(FDT2);       \
-}                                   \
-FLOAT_OP(n ## name1 ## name2, s)    \
-{                                   \
-    FST0 = float32_ ## name1 (FST0, FST1, &env->fpu->fp_status);    \
-    FST2 = float32_ ## name2 (FST0, FST2, &env->fpu->fp_status);    \
-    FST2 = float32_chs(FST2);       \
-}                                   \
-FLOAT_OP(n ## name1 ## name2, ps)   \
-{                                   \
-    FST0 = float32_ ## name1 (FST0, FST1, &env->fpu->fp_status);    \
-    FSTH0 = float32_ ## name1 (FSTH0, FSTH1, &env->fpu->fp_status); \
-    FST2 = float32_ ## name2 (FST0, FST2, &env->fpu->fp_status);    \
-    FSTH2 = float32_ ## name2 (FSTH0, FSTH2, &env->fpu->fp_status); \
-    FST2 = float32_chs(FST2);       \
-    FSTH2 = float32_chs(FSTH2);     \
+#define FLOAT_NTERNOP(name1, name2)                                       \
+uint64_t do_float_n ## name1 ## name2 ## _d(uint64_t fdt0, uint64_t fdt1, \
+                                           uint64_t fdt2)                 \
+{                                                                         \
+    fdt0 = float64_ ## name1 (fdt0, fdt1, &env->fpu->fp_status);          \
+    fdt2 = float64_ ## name2 (fdt0, fdt2, &env->fpu->fp_status);          \
+    return float64_chs(fdt2);                                             \
+}                                                                         \
+                                                                          \
+uint32_t do_float_n ## name1 ## name2 ## _s(uint32_t fst0, uint32_t fst1, \
+                                           uint32_t fst2)                 \
+{                                                                         \
+    fst0 = float32_ ## name1 (fst0, fst1, &env->fpu->fp_status);          \
+    fst2 = float32_ ## name2 (fst0, fst2, &env->fpu->fp_status);          \
+    return float32_chs(fst2);                                             \
+}                                                                         \
+                                                                          \
+uint64_t do_float_n ## name1 ## name2 ## _ps(uint64_t fdt0, uint64_t fdt1,\
+                                           uint64_t fdt2)                 \
+{                                                                         \
+    uint32_t fst0 = fdt0 & 0XFFFFFFFF;                                    \
+    uint32_t fsth0 = fdt0 >> 32;                                          \
+    uint32_t fst1 = fdt1 & 0XFFFFFFFF;                                    \
+    uint32_t fsth1 = fdt1 >> 32;                                          \
+    uint32_t fst2 = fdt2 & 0XFFFFFFFF;                                    \
+    uint32_t fsth2 = fdt2 >> 32;                                          \
+                                                                          \
+    fst0 = float32_ ## name1 (fst0, fst1, &env->fpu->fp_status);          \
+    fsth0 = float32_ ## name1 (fsth0, fsth1, &env->fpu->fp_status);       \
+    fst2 = float32_ ## name2 (fst0, fst2, &env->fpu->fp_status);          \
+    fsth2 = float32_ ## name2 (fsth0, fsth2, &env->fpu->fp_status);       \
+    fst2 = float32_chs(fst2);                                             \
+    fsth2 = float32_chs(fsth2);                                           \
+    return ((uint64_t)fsth2 << 32) | fst2;                                \
 }
+
 FLOAT_NTERNOP(mul, add)
 FLOAT_NTERNOP(mul, sub)
 #undef FLOAT_NTERNOP
 
 /* MIPS specific binary operations */
-FLOAT_OP(recip2, d)
+uint64_t do_float_recip2_d(uint64_t fdt0, uint64_t fdt2)
 {
     set_float_exception_flags(0, &env->fpu->fp_status);
-    FDT2 = float64_mul(FDT0, FDT2, &env->fpu->fp_status);
-    FDT2 = float64_chs(float64_sub(FDT2, FLOAT_ONE64, &env->fpu->fp_status));
+    fdt2 = float64_mul(fdt0, fdt2, &env->fpu->fp_status);
+    fdt2 = float64_chs(float64_sub(fdt2, FLOAT_ONE64, &env->fpu->fp_status));
     update_fcr31();
-}
-FLOAT_OP(recip2, s)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float32_mul(FST0, FST2, &env->fpu->fp_status);
-    FST2 = float32_chs(float32_sub(FST2, FLOAT_ONE32, &env->fpu->fp_status));
-    update_fcr31();
-}
-FLOAT_OP(recip2, ps)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float32_mul(FST0, FST2, &env->fpu->fp_status);
-    FSTH2 = float32_mul(FSTH0, FSTH2, &env->fpu->fp_status);
-    FST2 = float32_chs(float32_sub(FST2, FLOAT_ONE32, &env->fpu->fp_status));
-    FSTH2 = float32_chs(float32_sub(FSTH2, FLOAT_ONE32, &env->fpu->fp_status));
-    update_fcr31();
+    return fdt2;
 }
 
-FLOAT_OP(rsqrt2, d)
+uint32_t do_float_recip2_s(uint32_t fst0, uint32_t fst2)
 {
     set_float_exception_flags(0, &env->fpu->fp_status);
-    FDT2 = float64_mul(FDT0, FDT2, &env->fpu->fp_status);
-    FDT2 = float64_sub(FDT2, FLOAT_ONE64, &env->fpu->fp_status);
-    FDT2 = float64_chs(float64_div(FDT2, FLOAT_TWO64, &env->fpu->fp_status));
+    fst2 = float32_mul(fst0, fst2, &env->fpu->fp_status);
+    fst2 = float32_chs(float32_sub(fst2, FLOAT_ONE32, &env->fpu->fp_status));
     update_fcr31();
-}
-FLOAT_OP(rsqrt2, s)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float32_mul(FST0, FST2, &env->fpu->fp_status);
-    FST2 = float32_sub(FST2, FLOAT_ONE32, &env->fpu->fp_status);
-    FST2 = float32_chs(float32_div(FST2, FLOAT_TWO32, &env->fpu->fp_status));
-    update_fcr31();
-}
-FLOAT_OP(rsqrt2, ps)
-{
-    set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float32_mul(FST0, FST2, &env->fpu->fp_status);
-    FSTH2 = float32_mul(FSTH0, FSTH2, &env->fpu->fp_status);
-    FST2 = float32_sub(FST2, FLOAT_ONE32, &env->fpu->fp_status);
-    FSTH2 = float32_sub(FSTH2, FLOAT_ONE32, &env->fpu->fp_status);
-    FST2 = float32_chs(float32_div(FST2, FLOAT_TWO32, &env->fpu->fp_status));
-    FSTH2 = float32_chs(float32_div(FSTH2, FLOAT_TWO32, &env->fpu->fp_status));
-    update_fcr31();
+    return fst2;
 }
 
-FLOAT_OP(addr, ps)
+uint64_t do_float_recip2_ps(uint64_t fdt0, uint64_t fdt2)
 {
+    uint32_t fst0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fst2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fsth2 = fdt2 >> 32;
+
     set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float32_add (FST0, FSTH0, &env->fpu->fp_status);
-    FSTH2 = float32_add (FST1, FSTH1, &env->fpu->fp_status);
+    fst2 = float32_mul(fst0, fst2, &env->fpu->fp_status);
+    fsth2 = float32_mul(fsth0, fsth2, &env->fpu->fp_status);
+    fst2 = float32_chs(float32_sub(fst2, FLOAT_ONE32, &env->fpu->fp_status));
+    fsth2 = float32_chs(float32_sub(fsth2, FLOAT_ONE32, &env->fpu->fp_status));
     update_fcr31();
+    return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-FLOAT_OP(mulr, ps)
+uint64_t do_float_rsqrt2_d(uint64_t fdt0, uint64_t fdt2)
 {
     set_float_exception_flags(0, &env->fpu->fp_status);
-    FST2 = float32_mul (FST0, FSTH0, &env->fpu->fp_status);
-    FSTH2 = float32_mul (FST1, FSTH1, &env->fpu->fp_status);
+    fdt2 = float64_mul(fdt0, fdt2, &env->fpu->fp_status);
+    fdt2 = float64_sub(fdt2, FLOAT_ONE64, &env->fpu->fp_status);
+    fdt2 = float64_chs(float64_div(fdt2, FLOAT_TWO64, &env->fpu->fp_status));
     update_fcr31();
+    return fdt2;
+}
+
+uint32_t do_float_rsqrt2_s(uint32_t fst0, uint32_t fst2)
+{
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fst2 = float32_mul(fst0, fst2, &env->fpu->fp_status);
+    fst2 = float32_sub(fst2, FLOAT_ONE32, &env->fpu->fp_status);
+    fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, &env->fpu->fp_status));
+    update_fcr31();
+    return fst2;
+}
+
+uint64_t do_float_rsqrt2_ps(uint64_t fdt0, uint64_t fdt2)
+{
+    uint32_t fst0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fst2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fsth2 = fdt2 >> 32;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fst2 = float32_mul(fst0, fst2, &env->fpu->fp_status);
+    fsth2 = float32_mul(fsth0, fsth2, &env->fpu->fp_status);
+    fst2 = float32_sub(fst2, FLOAT_ONE32, &env->fpu->fp_status);
+    fsth2 = float32_sub(fsth2, FLOAT_ONE32, &env->fpu->fp_status);
+    fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, &env->fpu->fp_status));
+    fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32, &env->fpu->fp_status));
+    update_fcr31();
+    return ((uint64_t)fsth2 << 32) | fst2;
+}
+
+uint64_t do_float_addr_ps(uint64_t fdt0, uint64_t fdt1)
+{
+    uint32_t fst0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fst1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t fst2;
+    uint32_t fsth2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fst2 = float32_add (fst0, fsth0, &env->fpu->fp_status);
+    fsth2 = float32_add (fst1, fsth1, &env->fpu->fp_status);
+    update_fcr31();
+    return ((uint64_t)fsth2 << 32) | fst2;
+}
+
+uint64_t do_float_mulr_ps(uint64_t fdt0, uint64_t fdt1)
+{
+    uint32_t fst0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fst1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t fst2;
+    uint32_t fsth2;
+
+    set_float_exception_flags(0, &env->fpu->fp_status);
+    fst2 = float32_mul (fst0, fsth0, &env->fpu->fp_status);
+    fsth2 = float32_mul (fst1, fsth1, &env->fpu->fp_status);
+    update_fcr31();
+    return ((uint64_t)fsth2 << 32) | fst2;
 }
 
 /* compare operations */
-#define FOP_COND_D(op, cond)                   \
-void do_cmp_d_ ## op (long cc)                 \
-{                                              \
-    int c = cond;                              \
-    update_fcr31();                            \
-    if (c)                                     \
-        SET_FP_COND(cc, env->fpu);             \
-    else                                       \
-        CLEAR_FP_COND(cc, env->fpu);           \
-}                                              \
-void do_cmpabs_d_ ## op (long cc)              \
-{                                              \
-    int c;                                     \
-    FDT0 = float64_abs(FDT0);                  \
-    FDT1 = float64_abs(FDT1);                  \
-    c = cond;                                  \
-    update_fcr31();                            \
-    if (c)                                     \
-        SET_FP_COND(cc, env->fpu);             \
-    else                                       \
-        CLEAR_FP_COND(cc, env->fpu);           \
+#define FOP_COND_D(op, cond)                                   \
+void do_cmp_d_ ## op (uint64_t fdt0, uint64_t fdt1, int cc)    \
+{                                                              \
+    int c = cond;                                              \
+    update_fcr31();                                            \
+    if (c)                                                     \
+        SET_FP_COND(cc, env->fpu);                             \
+    else                                                       \
+        CLEAR_FP_COND(cc, env->fpu);                           \
+}                                                              \
+void do_cmpabs_d_ ## op (uint64_t fdt0, uint64_t fdt1, int cc) \
+{                                                              \
+    int c;                                                     \
+    fdt0 = float64_abs(fdt0);                                  \
+    fdt1 = float64_abs(fdt1);                                  \
+    c = cond;                                                  \
+    update_fcr31();                                            \
+    if (c)                                                     \
+        SET_FP_COND(cc, env->fpu);                             \
+    else                                                       \
+        CLEAR_FP_COND(cc, env->fpu);                           \
 }
 
 int float64_is_unordered(int sig, float64 a, float64 b STATUS_PARAM)
@@ -2660,46 +2899,46 @@
 
 /* NOTE: the comma operator will make "cond" to eval to false,
  * but float*_is_unordered() is still called. */
-FOP_COND_D(f,   (float64_is_unordered(0, FDT1, FDT0, &env->fpu->fp_status), 0))
-FOP_COND_D(un,  float64_is_unordered(0, FDT1, FDT0, &env->fpu->fp_status))
-FOP_COND_D(eq,  !float64_is_unordered(0, FDT1, FDT0, &env->fpu->fp_status) && float64_eq(FDT0, FDT1, &env->fpu->fp_status))
-FOP_COND_D(ueq, float64_is_unordered(0, FDT1, FDT0, &env->fpu->fp_status)  || float64_eq(FDT0, FDT1, &env->fpu->fp_status))
-FOP_COND_D(olt, !float64_is_unordered(0, FDT1, FDT0, &env->fpu->fp_status) && float64_lt(FDT0, FDT1, &env->fpu->fp_status))
-FOP_COND_D(ult, float64_is_unordered(0, FDT1, FDT0, &env->fpu->fp_status)  || float64_lt(FDT0, FDT1, &env->fpu->fp_status))
-FOP_COND_D(ole, !float64_is_unordered(0, FDT1, FDT0, &env->fpu->fp_status) && float64_le(FDT0, FDT1, &env->fpu->fp_status))
-FOP_COND_D(ule, float64_is_unordered(0, FDT1, FDT0, &env->fpu->fp_status)  || float64_le(FDT0, FDT1, &env->fpu->fp_status))
+FOP_COND_D(f,   (float64_is_unordered(0, fdt1, fdt0, &env->fpu->fp_status), 0))
+FOP_COND_D(un,  float64_is_unordered(0, fdt1, fdt0, &env->fpu->fp_status))
+FOP_COND_D(eq,  !float64_is_unordered(0, fdt1, fdt0, &env->fpu->fp_status) && float64_eq(fdt0, fdt1, &env->fpu->fp_status))
+FOP_COND_D(ueq, float64_is_unordered(0, fdt1, fdt0, &env->fpu->fp_status)  || float64_eq(fdt0, fdt1, &env->fpu->fp_status))
+FOP_COND_D(olt, !float64_is_unordered(0, fdt1, fdt0, &env->fpu->fp_status) && float64_lt(fdt0, fdt1, &env->fpu->fp_status))
+FOP_COND_D(ult, float64_is_unordered(0, fdt1, fdt0, &env->fpu->fp_status)  || float64_lt(fdt0, fdt1, &env->fpu->fp_status))
+FOP_COND_D(ole, !float64_is_unordered(0, fdt1, fdt0, &env->fpu->fp_status) && float64_le(fdt0, fdt1, &env->fpu->fp_status))
+FOP_COND_D(ule, float64_is_unordered(0, fdt1, fdt0, &env->fpu->fp_status)  || float64_le(fdt0, fdt1, &env->fpu->fp_status))
 /* NOTE: the comma operator will make "cond" to eval to false,
  * but float*_is_unordered() is still called. */
-FOP_COND_D(sf,  (float64_is_unordered(1, FDT1, FDT0, &env->fpu->fp_status), 0))
-FOP_COND_D(ngle,float64_is_unordered(1, FDT1, FDT0, &env->fpu->fp_status))
-FOP_COND_D(seq, !float64_is_unordered(1, FDT1, FDT0, &env->fpu->fp_status) && float64_eq(FDT0, FDT1, &env->fpu->fp_status))
-FOP_COND_D(ngl, float64_is_unordered(1, FDT1, FDT0, &env->fpu->fp_status)  || float64_eq(FDT0, FDT1, &env->fpu->fp_status))
-FOP_COND_D(lt,  !float64_is_unordered(1, FDT1, FDT0, &env->fpu->fp_status) && float64_lt(FDT0, FDT1, &env->fpu->fp_status))
-FOP_COND_D(nge, float64_is_unordered(1, FDT1, FDT0, &env->fpu->fp_status)  || float64_lt(FDT0, FDT1, &env->fpu->fp_status))
-FOP_COND_D(le,  !float64_is_unordered(1, FDT1, FDT0, &env->fpu->fp_status) && float64_le(FDT0, FDT1, &env->fpu->fp_status))
-FOP_COND_D(ngt, float64_is_unordered(1, FDT1, FDT0, &env->fpu->fp_status)  || float64_le(FDT0, FDT1, &env->fpu->fp_status))
+FOP_COND_D(sf,  (float64_is_unordered(1, fdt1, fdt0, &env->fpu->fp_status), 0))
+FOP_COND_D(ngle,float64_is_unordered(1, fdt1, fdt0, &env->fpu->fp_status))
+FOP_COND_D(seq, !float64_is_unordered(1, fdt1, fdt0, &env->fpu->fp_status) && float64_eq(fdt0, fdt1, &env->fpu->fp_status))
+FOP_COND_D(ngl, float64_is_unordered(1, fdt1, fdt0, &env->fpu->fp_status)  || float64_eq(fdt0, fdt1, &env->fpu->fp_status))
+FOP_COND_D(lt,  !float64_is_unordered(1, fdt1, fdt0, &env->fpu->fp_status) && float64_lt(fdt0, fdt1, &env->fpu->fp_status))
+FOP_COND_D(nge, float64_is_unordered(1, fdt1, fdt0, &env->fpu->fp_status)  || float64_lt(fdt0, fdt1, &env->fpu->fp_status))
+FOP_COND_D(le,  !float64_is_unordered(1, fdt1, fdt0, &env->fpu->fp_status) && float64_le(fdt0, fdt1, &env->fpu->fp_status))
+FOP_COND_D(ngt, float64_is_unordered(1, fdt1, fdt0, &env->fpu->fp_status)  || float64_le(fdt0, fdt1, &env->fpu->fp_status))
 
-#define FOP_COND_S(op, cond)                   \
-void do_cmp_s_ ## op (long cc)                 \
-{                                              \
-    int c = cond;                              \
-    update_fcr31();                            \
-    if (c)                                     \
-        SET_FP_COND(cc, env->fpu);             \
-    else                                       \
-        CLEAR_FP_COND(cc, env->fpu);           \
-}                                              \
-void do_cmpabs_s_ ## op (long cc)              \
-{                                              \
-    int c;                                     \
-    FST0 = float32_abs(FST0);                  \
-    FST1 = float32_abs(FST1);                  \
-    c = cond;                                  \
-    update_fcr31();                            \
-    if (c)                                     \
-        SET_FP_COND(cc, env->fpu);             \
-    else                                       \
-        CLEAR_FP_COND(cc, env->fpu);           \
+#define FOP_COND_S(op, cond)                                   \
+void do_cmp_s_ ## op (uint32_t fst0, uint32_t fst1, int cc)    \
+{                                                              \
+    int c = cond;                                              \
+    update_fcr31();                                            \
+    if (c)                                                     \
+        SET_FP_COND(cc, env->fpu);                             \
+    else                                                       \
+        CLEAR_FP_COND(cc, env->fpu);                           \
+}                                                              \
+void do_cmpabs_s_ ## op (uint32_t fst0, uint32_t fst1, int cc) \
+{                                                              \
+    int c;                                                     \
+    fst0 = float32_abs(fst0);                                  \
+    fst1 = float32_abs(fst1);                                  \
+    c = cond;                                                  \
+    update_fcr31();                                            \
+    if (c)                                                     \
+        SET_FP_COND(cc, env->fpu);                             \
+    else                                                       \
+        CLEAR_FP_COND(cc, env->fpu);                           \
 }
 
 flag float32_is_unordered(int sig, float32 a, float32 b STATUS_PARAM)
@@ -2718,93 +2957,98 @@
 
 /* NOTE: the comma operator will make "cond" to eval to false,
  * but float*_is_unordered() is still called. */
-FOP_COND_S(f,   (float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status), 0))
-FOP_COND_S(un,  float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status))
-FOP_COND_S(eq,  !float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status) && float32_eq(FST0, FST1, &env->fpu->fp_status))
-FOP_COND_S(ueq, float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status)  || float32_eq(FST0, FST1, &env->fpu->fp_status))
-FOP_COND_S(olt, !float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status) && float32_lt(FST0, FST1, &env->fpu->fp_status))
-FOP_COND_S(ult, float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status)  || float32_lt(FST0, FST1, &env->fpu->fp_status))
-FOP_COND_S(ole, !float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status) && float32_le(FST0, FST1, &env->fpu->fp_status))
-FOP_COND_S(ule, float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status)  || float32_le(FST0, FST1, &env->fpu->fp_status))
+FOP_COND_S(f,   (float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status), 0))
+FOP_COND_S(un,  float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status))
+FOP_COND_S(eq,  !float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status) && float32_eq(fst0, fst1, &env->fpu->fp_status))
+FOP_COND_S(ueq, float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status)  || float32_eq(fst0, fst1, &env->fpu->fp_status))
+FOP_COND_S(olt, !float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status) && float32_lt(fst0, fst1, &env->fpu->fp_status))
+FOP_COND_S(ult, float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status)  || float32_lt(fst0, fst1, &env->fpu->fp_status))
+FOP_COND_S(ole, !float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status) && float32_le(fst0, fst1, &env->fpu->fp_status))
+FOP_COND_S(ule, float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status)  || float32_le(fst0, fst1, &env->fpu->fp_status))
 /* NOTE: the comma operator will make "cond" to eval to false,
  * but float*_is_unordered() is still called. */
-FOP_COND_S(sf,  (float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status), 0))
-FOP_COND_S(ngle,float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status))
-FOP_COND_S(seq, !float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status) && float32_eq(FST0, FST1, &env->fpu->fp_status))
-FOP_COND_S(ngl, float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status)  || float32_eq(FST0, FST1, &env->fpu->fp_status))
-FOP_COND_S(lt,  !float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status) && float32_lt(FST0, FST1, &env->fpu->fp_status))
-FOP_COND_S(nge, float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status)  || float32_lt(FST0, FST1, &env->fpu->fp_status))
-FOP_COND_S(le,  !float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status) && float32_le(FST0, FST1, &env->fpu->fp_status))
-FOP_COND_S(ngt, float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status)  || float32_le(FST0, FST1, &env->fpu->fp_status))
+FOP_COND_S(sf,  (float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status), 0))
+FOP_COND_S(ngle,float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status))
+FOP_COND_S(seq, !float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status) && float32_eq(fst0, fst1, &env->fpu->fp_status))
+FOP_COND_S(ngl, float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status)  || float32_eq(fst0, fst1, &env->fpu->fp_status))
+FOP_COND_S(lt,  !float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status) && float32_lt(fst0, fst1, &env->fpu->fp_status))
+FOP_COND_S(nge, float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status)  || float32_lt(fst0, fst1, &env->fpu->fp_status))
+FOP_COND_S(le,  !float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status) && float32_le(fst0, fst1, &env->fpu->fp_status))
+FOP_COND_S(ngt, float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status)  || float32_le(fst0, fst1, &env->fpu->fp_status))
 
-#define FOP_COND_PS(op, condl, condh)          \
-void do_cmp_ps_ ## op (long cc)                \
-{                                              \
-    int cl = condl;                            \
-    int ch = condh;                            \
-    update_fcr31();                            \
-    if (cl)                                    \
-        SET_FP_COND(cc, env->fpu);             \
-    else                                       \
-        CLEAR_FP_COND(cc, env->fpu);           \
-    if (ch)                                    \
-        SET_FP_COND(cc + 1, env->fpu);         \
-    else                                       \
-        CLEAR_FP_COND(cc + 1, env->fpu);       \
-}                                              \
-void do_cmpabs_ps_ ## op (long cc)             \
-{                                              \
-    int cl, ch;                                \
-    FST0 = float32_abs(FST0);                  \
-    FSTH0 = float32_abs(FSTH0);                \
-    FST1 = float32_abs(FST1);                  \
-    FSTH1 = float32_abs(FSTH1);                \
-    cl = condl;                                \
-    ch = condh;                                \
-    update_fcr31();                            \
-    if (cl)                                    \
-        SET_FP_COND(cc, env->fpu);             \
-    else                                       \
-        CLEAR_FP_COND(cc, env->fpu);           \
-    if (ch)                                    \
-        SET_FP_COND(cc + 1, env->fpu);         \
-    else                                       \
-        CLEAR_FP_COND(cc + 1, env->fpu);       \
+#define FOP_COND_PS(op, condl, condh)                           \
+void do_cmp_ps_ ## op (uint64_t fdt0, uint64_t fdt1, int cc)    \
+{                                                               \
+    uint32_t fst0 = float32_abs(fdt0 & 0XFFFFFFFF);             \
+    uint32_t fsth0 = float32_abs(fdt0 >> 32);                   \
+    uint32_t fst1 = float32_abs(fdt1 & 0XFFFFFFFF);             \
+    uint32_t fsth1 = float32_abs(fdt1 >> 32);                   \
+    int cl = condl;                                             \
+    int ch = condh;                                             \
+                                                                \
+    update_fcr31();                                             \
+    if (cl)                                                     \
+        SET_FP_COND(cc, env->fpu);                              \
+    else                                                        \
+        CLEAR_FP_COND(cc, env->fpu);                            \
+    if (ch)                                                     \
+        SET_FP_COND(cc + 1, env->fpu);                          \
+    else                                                        \
+        CLEAR_FP_COND(cc + 1, env->fpu);                        \
+}                                                               \
+void do_cmpabs_ps_ ## op (uint64_t fdt0, uint64_t fdt1, int cc) \
+{                                                               \
+    uint32_t fst0 = float32_abs(fdt0 & 0XFFFFFFFF);             \
+    uint32_t fsth0 = float32_abs(fdt0 >> 32);                   \
+    uint32_t fst1 = float32_abs(fdt1 & 0XFFFFFFFF);             \
+    uint32_t fsth1 = float32_abs(fdt1 >> 32);                   \
+    int cl = condl;                                             \
+    int ch = condh;                                             \
+                                                                \
+    update_fcr31();                                             \
+    if (cl)                                                     \
+        SET_FP_COND(cc, env->fpu);                              \
+    else                                                        \
+        CLEAR_FP_COND(cc, env->fpu);                            \
+    if (ch)                                                     \
+        SET_FP_COND(cc + 1, env->fpu);                          \
+    else                                                        \
+        CLEAR_FP_COND(cc + 1, env->fpu);                        \
 }
 
 /* NOTE: the comma operator will make "cond" to eval to false,
  * but float*_is_unordered() is still called. */
-FOP_COND_PS(f,   (float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status), 0),
-                 (float32_is_unordered(0, FSTH1, FSTH0, &env->fpu->fp_status), 0))
-FOP_COND_PS(un,  float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status),
-                 float32_is_unordered(0, FSTH1, FSTH0, &env->fpu->fp_status))
-FOP_COND_PS(eq,  !float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status)   && float32_eq(FST0, FST1, &env->fpu->fp_status),
-                 !float32_is_unordered(0, FSTH1, FSTH0, &env->fpu->fp_status) && float32_eq(FSTH0, FSTH1, &env->fpu->fp_status))
-FOP_COND_PS(ueq, float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status)    || float32_eq(FST0, FST1, &env->fpu->fp_status),
-                 float32_is_unordered(0, FSTH1, FSTH0, &env->fpu->fp_status)  || float32_eq(FSTH0, FSTH1, &env->fpu->fp_status))
-FOP_COND_PS(olt, !float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status)   && float32_lt(FST0, FST1, &env->fpu->fp_status),
-                 !float32_is_unordered(0, FSTH1, FSTH0, &env->fpu->fp_status) && float32_lt(FSTH0, FSTH1, &env->fpu->fp_status))
-FOP_COND_PS(ult, float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status)    || float32_lt(FST0, FST1, &env->fpu->fp_status),
-                 float32_is_unordered(0, FSTH1, FSTH0, &env->fpu->fp_status)  || float32_lt(FSTH0, FSTH1, &env->fpu->fp_status))
-FOP_COND_PS(ole, !float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status)   && float32_le(FST0, FST1, &env->fpu->fp_status),
-                 !float32_is_unordered(0, FSTH1, FSTH0, &env->fpu->fp_status) && float32_le(FSTH0, FSTH1, &env->fpu->fp_status))
-FOP_COND_PS(ule, float32_is_unordered(0, FST1, FST0, &env->fpu->fp_status)    || float32_le(FST0, FST1, &env->fpu->fp_status),
-                 float32_is_unordered(0, FSTH1, FSTH0, &env->fpu->fp_status)  || float32_le(FSTH0, FSTH1, &env->fpu->fp_status))
+FOP_COND_PS(f,   (float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status), 0),
+                 (float32_is_unordered(0, fsth1, fsth0, &env->fpu->fp_status), 0))
+FOP_COND_PS(un,  float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status),
+                 float32_is_unordered(0, fsth1, fsth0, &env->fpu->fp_status))
+FOP_COND_PS(eq,  !float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status)   && float32_eq(fst0, fst1, &env->fpu->fp_status),
+                 !float32_is_unordered(0, fsth1, fsth0, &env->fpu->fp_status) && float32_eq(fsth0, fsth1, &env->fpu->fp_status))
+FOP_COND_PS(ueq, float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status)    || float32_eq(fst0, fst1, &env->fpu->fp_status),
+                 float32_is_unordered(0, fsth1, fsth0, &env->fpu->fp_status)  || float32_eq(fsth0, fsth1, &env->fpu->fp_status))
+FOP_COND_PS(olt, !float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status)   && float32_lt(fst0, fst1, &env->fpu->fp_status),
+                 !float32_is_unordered(0, fsth1, fsth0, &env->fpu->fp_status) && float32_lt(fsth0, fsth1, &env->fpu->fp_status))
+FOP_COND_PS(ult, float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status)    || float32_lt(fst0, fst1, &env->fpu->fp_status),
+                 float32_is_unordered(0, fsth1, fsth0, &env->fpu->fp_status)  || float32_lt(fsth0, fsth1, &env->fpu->fp_status))
+FOP_COND_PS(ole, !float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status)   && float32_le(fst0, fst1, &env->fpu->fp_status),
+                 !float32_is_unordered(0, fsth1, fsth0, &env->fpu->fp_status) && float32_le(fsth0, fsth1, &env->fpu->fp_status))
+FOP_COND_PS(ule, float32_is_unordered(0, fst1, fst0, &env->fpu->fp_status)    || float32_le(fst0, fst1, &env->fpu->fp_status),
+                 float32_is_unordered(0, fsth1, fsth0, &env->fpu->fp_status)  || float32_le(fsth0, fsth1, &env->fpu->fp_status))
 /* NOTE: the comma operator will make "cond" to eval to false,
  * but float*_is_unordered() is still called. */
-FOP_COND_PS(sf,  (float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status), 0),
-                 (float32_is_unordered(1, FSTH1, FSTH0, &env->fpu->fp_status), 0))
-FOP_COND_PS(ngle,float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status),
-                 float32_is_unordered(1, FSTH1, FSTH0, &env->fpu->fp_status))
-FOP_COND_PS(seq, !float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status)   && float32_eq(FST0, FST1, &env->fpu->fp_status),
-                 !float32_is_unordered(1, FSTH1, FSTH0, &env->fpu->fp_status) && float32_eq(FSTH0, FSTH1, &env->fpu->fp_status))
-FOP_COND_PS(ngl, float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status)    || float32_eq(FST0, FST1, &env->fpu->fp_status),
-                 float32_is_unordered(1, FSTH1, FSTH0, &env->fpu->fp_status)  || float32_eq(FSTH0, FSTH1, &env->fpu->fp_status))
-FOP_COND_PS(lt,  !float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status)   && float32_lt(FST0, FST1, &env->fpu->fp_status),
-                 !float32_is_unordered(1, FSTH1, FSTH0, &env->fpu->fp_status) && float32_lt(FSTH0, FSTH1, &env->fpu->fp_status))
-FOP_COND_PS(nge, float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status)    || float32_lt(FST0, FST1, &env->fpu->fp_status),
-                 float32_is_unordered(1, FSTH1, FSTH0, &env->fpu->fp_status)  || float32_lt(FSTH0, FSTH1, &env->fpu->fp_status))
-FOP_COND_PS(le,  !float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status)   && float32_le(FST0, FST1, &env->fpu->fp_status),
-                 !float32_is_unordered(1, FSTH1, FSTH0, &env->fpu->fp_status) && float32_le(FSTH0, FSTH1, &env->fpu->fp_status))
-FOP_COND_PS(ngt, float32_is_unordered(1, FST1, FST0, &env->fpu->fp_status)    || float32_le(FST0, FST1, &env->fpu->fp_status),
-                 float32_is_unordered(1, FSTH1, FSTH0, &env->fpu->fp_status)  || float32_le(FSTH0, FSTH1, &env->fpu->fp_status))
+FOP_COND_PS(sf,  (float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status), 0),
+                 (float32_is_unordered(1, fsth1, fsth0, &env->fpu->fp_status), 0))
+FOP_COND_PS(ngle,float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status),
+                 float32_is_unordered(1, fsth1, fsth0, &env->fpu->fp_status))
+FOP_COND_PS(seq, !float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status)   && float32_eq(fst0, fst1, &env->fpu->fp_status),
+                 !float32_is_unordered(1, fsth1, fsth0, &env->fpu->fp_status) && float32_eq(fsth0, fsth1, &env->fpu->fp_status))
+FOP_COND_PS(ngl, float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status)    || float32_eq(fst0, fst1, &env->fpu->fp_status),
+                 float32_is_unordered(1, fsth1, fsth0, &env->fpu->fp_status)  || float32_eq(fsth0, fsth1, &env->fpu->fp_status))
+FOP_COND_PS(lt,  !float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status)   && float32_lt(fst0, fst1, &env->fpu->fp_status),
+                 !float32_is_unordered(1, fsth1, fsth0, &env->fpu->fp_status) && float32_lt(fsth0, fsth1, &env->fpu->fp_status))
+FOP_COND_PS(nge, float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status)    || float32_lt(fst0, fst1, &env->fpu->fp_status),
+                 float32_is_unordered(1, fsth1, fsth0, &env->fpu->fp_status)  || float32_lt(fsth0, fsth1, &env->fpu->fp_status))
+FOP_COND_PS(le,  !float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status)   && float32_le(fst0, fst1, &env->fpu->fp_status),
+                 !float32_is_unordered(1, fsth1, fsth0, &env->fpu->fp_status) && float32_le(fsth0, fsth1, &env->fpu->fp_status))
+FOP_COND_PS(ngt, float32_is_unordered(1, fst1, fst0, &env->fpu->fp_status)    || float32_le(fst0, fst1, &env->fpu->fp_status),
+                 float32_is_unordered(1, fsth1, fsth0, &env->fpu->fp_status)  || float32_le(fsth0, fsth1, &env->fpu->fp_status))
diff --git a/target-mips/translate.c b/target-mips/translate.c
index f923263..bdc0f7b 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -425,9 +425,6 @@
 /* global register indices */
 static TCGv cpu_env, bcond, btarget, current_fpu;
 
-/* FPU TNs, global for now. */
-static TCGv fpu32_T[3], fpu64_T[3], fpu32h_T[3];
-
 #include "gen-icount.h"
 
 static inline void tcg_gen_helper_0_i(void *func, TCGv arg)
@@ -723,36 +720,39 @@
     tcg_temp_free(r_tmp2);
 }
 
-#define FOP_CONDS(type, fmt)                                              \
-static GenOpFunc1 * fcmp ## type ## _ ## fmt ## _table[16] = {            \
-    do_cmp ## type ## _ ## fmt ## _f,                                     \
-    do_cmp ## type ## _ ## fmt ## _un,                                    \
-    do_cmp ## type ## _ ## fmt ## _eq,                                    \
-    do_cmp ## type ## _ ## fmt ## _ueq,                                   \
-    do_cmp ## type ## _ ## fmt ## _olt,                                   \
-    do_cmp ## type ## _ ## fmt ## _ult,                                   \
-    do_cmp ## type ## _ ## fmt ## _ole,                                   \
-    do_cmp ## type ## _ ## fmt ## _ule,                                   \
-    do_cmp ## type ## _ ## fmt ## _sf,                                    \
-    do_cmp ## type ## _ ## fmt ## _ngle,                                  \
-    do_cmp ## type ## _ ## fmt ## _seq,                                   \
-    do_cmp ## type ## _ ## fmt ## _ngl,                                   \
-    do_cmp ## type ## _ ## fmt ## _lt,                                    \
-    do_cmp ## type ## _ ## fmt ## _nge,                                   \
-    do_cmp ## type ## _ ## fmt ## _le,                                    \
-    do_cmp ## type ## _ ## fmt ## _ngt,                                   \
-};                                                                        \
-static inline void gen_cmp ## type ## _ ## fmt(int n, long cc)            \
-{                                                                         \
-    tcg_gen_helper_0_i(fcmp ## type ## _ ## fmt ## _table[n], cc);        \
+typedef void (fcmp_fun32)(uint32_t, uint32_t, int);
+typedef void (fcmp_fun64)(uint64_t, uint64_t, int);
+
+#define FOP_CONDS(fcmp_fun, type, arg0, arg1, fmt)                            \
+static fcmp_fun * fcmp ## type ## _ ## fmt ## _table[16] = {                  \
+    do_cmp ## type ## _ ## fmt ## _f,                                         \
+    do_cmp ## type ## _ ## fmt ## _un,                                        \
+    do_cmp ## type ## _ ## fmt ## _eq,                                        \
+    do_cmp ## type ## _ ## fmt ## _ueq,                                       \
+    do_cmp ## type ## _ ## fmt ## _olt,                                       \
+    do_cmp ## type ## _ ## fmt ## _ult,                                       \
+    do_cmp ## type ## _ ## fmt ## _ole,                                       \
+    do_cmp ## type ## _ ## fmt ## _ule,                                       \
+    do_cmp ## type ## _ ## fmt ## _sf,                                        \
+    do_cmp ## type ## _ ## fmt ## _ngle,                                      \
+    do_cmp ## type ## _ ## fmt ## _seq,                                       \
+    do_cmp ## type ## _ ## fmt ## _ngl,                                       \
+    do_cmp ## type ## _ ## fmt ## _lt,                                        \
+    do_cmp ## type ## _ ## fmt ## _nge,                                       \
+    do_cmp ## type ## _ ## fmt ## _le,                                        \
+    do_cmp ## type ## _ ## fmt ## _ngt,                                       \
+};                                                                            \
+static inline void gen_cmp ## type ## _ ## fmt(int n, arg0 a, arg1 b, int cc) \
+{                                                                             \
+    tcg_gen_helper_0_2i(fcmp ## type ## _ ## fmt ## _table[n], a, b, cc);     \
 }
 
-FOP_CONDS(, d)
-FOP_CONDS(abs, d)
-FOP_CONDS(, s)
-FOP_CONDS(abs, s)
-FOP_CONDS(, ps)
-FOP_CONDS(abs, ps)
+FOP_CONDS(fcmp_fun64, , uint64_t, uint64_t, d)
+FOP_CONDS(fcmp_fun64, abs, uint64_t, uint64_t, d)
+FOP_CONDS(fcmp_fun32, , uint32_t, uint32_t, s)
+FOP_CONDS(fcmp_fun32, abs, uint32_t, uint32_t, s)
+FOP_CONDS(fcmp_fun64, , uint64_t, uint64_t, ps)
+FOP_CONDS(fcmp_fun64, abs, uint64_t, uint64_t, ps)
 #undef FOP_CONDS
 
 /* Tests */
@@ -1234,23 +1234,43 @@
        memory access. */
     switch (opc) {
     case OPC_LWC1:
-        tcg_gen_qemu_ld32s(fpu32_T[0], t0, ctx->mem_idx);
-        gen_store_fpr32(fpu32_T[0], ft);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            tcg_gen_qemu_ld32s(fp0, t0, ctx->mem_idx);
+            gen_store_fpr32(fp0, ft);
+            tcg_temp_free(fp0);
+        }
         opn = "lwc1";
         break;
     case OPC_SWC1:
-        gen_load_fpr32(fpu32_T[0], ft);
-        tcg_gen_qemu_st32(fpu32_T[0], t0, ctx->mem_idx);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, ft);
+            tcg_gen_qemu_st32(fp0, t0, ctx->mem_idx);
+            tcg_temp_free(fp0);
+        }
         opn = "swc1";
         break;
     case OPC_LDC1:
-        tcg_gen_qemu_ld64(fpu64_T[0], t0, ctx->mem_idx);
-        gen_store_fpr64(ctx, fpu64_T[0], ft);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            tcg_gen_qemu_ld64(fp0, t0, ctx->mem_idx);
+            gen_store_fpr64(ctx, fp0, ft);
+            tcg_temp_free(fp0);
+        }
         opn = "ldc1";
         break;
     case OPC_SDC1:
-        gen_load_fpr64(ctx, fpu64_T[0], ft);
-        tcg_gen_qemu_st64(fpu64_T[0], t0, ctx->mem_idx);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, ft);
+            tcg_gen_qemu_st64(fp0, t0, ctx->mem_idx);
+            tcg_temp_free(fp0);
+        }
         opn = "sdc1";
         break;
     default:
@@ -5318,11 +5338,17 @@
     case 2:
         /* XXX: For now we support only a single FPU context. */
         if (h == 0) {
-            gen_load_fpr32(fpu32_T[0], rt);
-            tcg_gen_ext_i32_tl(t0, fpu32_T[0]);
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, rt);
+            tcg_gen_ext_i32_tl(t0, fp0);
+            tcg_temp_free(fp0);
         } else {
-            gen_load_fpr32h(fpu32h_T[0], rt);
-            tcg_gen_ext_i32_tl(t0, fpu32h_T[0]);
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32h(fp0, rt);
+            tcg_gen_ext_i32_tl(t0, fp0);
+            tcg_temp_free(fp0);
         }
         break;
     case 3:
@@ -5487,11 +5513,17 @@
     case 2:
         /* XXX: For now we support only a single FPU context. */
         if (h == 0) {
-            tcg_gen_trunc_tl_i32(fpu32_T[0], t0);
-            gen_store_fpr32(fpu32_T[0], rd);
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            tcg_gen_trunc_tl_i32(fp0, t0);
+            gen_store_fpr32(fp0, rd);
+            tcg_temp_free(fp0);
         } else {
-            tcg_gen_trunc_tl_i32(fpu32h_T[0], t0);
-            gen_store_fpr32h(fpu32h_T[0], rd);
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            tcg_gen_trunc_tl_i32(fp0, t0);
+            gen_store_fpr32h(fp0, rd);
+            tcg_temp_free(fp0);
         }
         break;
     case 3:
@@ -5873,15 +5905,25 @@
 
     switch (opc) {
     case OPC_MFC1:
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_ext_i32_tl(t0, fpu32_T[0]);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_ext_i32_tl(t0, fp0);
+            tcg_temp_free(fp0);
+	}
         gen_store_gpr(t0, rt);
         opn = "mfc1";
         break;
     case OPC_MTC1:
         gen_load_gpr(t0, rt);
-        tcg_gen_trunc_tl_i32(fpu32_T[0], t0);
-        gen_store_fpr32(fpu32_T[0], fs);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            tcg_gen_trunc_tl_i32(fp0, t0);
+            gen_store_fpr32(fp0, fs);
+            tcg_temp_free(fp0);
+	}
         opn = "mtc1";
         break;
     case OPC_CFC1:
@@ -5895,27 +5937,47 @@
         opn = "ctc1";
         break;
     case OPC_DMFC1:
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_mov_tl(t0, fpu64_T[0]);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_mov_tl(t0, fp0);
+            tcg_temp_free(fp0);
+	}
         gen_store_gpr(t0, rt);
         opn = "dmfc1";
         break;
     case OPC_DMTC1:
         gen_load_gpr(t0, rt);
-        tcg_gen_mov_tl(fpu64_T[0], t0);
-        gen_store_fpr64(ctx, fpu64_T[0], fs);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            tcg_gen_mov_tl(fp0, t0);
+            gen_store_fpr64(ctx, fp0, fs);
+            tcg_temp_free(fp0);
+	}
         opn = "dmtc1";
         break;
     case OPC_MFHC1:
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        tcg_gen_ext_i32_tl(t0, fpu32h_T[0]);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32h(fp0, fs);
+            tcg_gen_ext_i32_tl(t0, fp0);
+            tcg_temp_free(fp0);
+	}
         gen_store_gpr(t0, rt);
         opn = "mfhc1";
         break;
     case OPC_MTHC1:
         gen_load_gpr(t0, rt);
-        tcg_gen_trunc_tl_i32(fpu32h_T[0], t0);
-        gen_store_fpr32h(fpu32h_T[0], fs);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            tcg_gen_trunc_tl_i32(fp0, t0);
+            gen_store_fpr32h(fp0, fs);
+            tcg_temp_free(fp0);
+	}
         opn = "mthc1";
         break;
     default:
@@ -5967,11 +6029,13 @@
     tcg_temp_free(t0);
 }
 
-static inline void gen_movcf_s (int cc, int tf)
+static inline void gen_movcf_s (int fs, int fd, int cc, int tf)
 {
     uint32_t ccbit;
     int cond;
-    TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+    TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32);
+    TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
+    TCGv fp1 = tcg_temp_local_new(TCG_TYPE_I32);
     int l1 = gen_new_label();
 
     if (cc)
@@ -5984,19 +6048,26 @@
     else
         cond = TCG_COND_NE;
 
+    gen_load_fpr32(fp0, fs);
+    gen_load_fpr32(fp1, fd);
     tcg_gen_ld_i32(r_tmp1, current_fpu, offsetof(CPUMIPSFPUContext, fcr31));
     tcg_gen_andi_i32(r_tmp1, r_tmp1, ccbit);
     tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
-    tcg_gen_movi_i32(fpu32_T[2], fpu32_T[0]);
+    tcg_gen_movi_i32(fp1, fp0);
+    tcg_temp_free(fp0);
     gen_set_label(l1);
     tcg_temp_free(r_tmp1);
+    gen_store_fpr32(fp1, fd);
+    tcg_temp_free(fp1);
 }
 
-static inline void gen_movcf_d (int cc, int tf)
+static inline void gen_movcf_d (DisasContext *ctx, int fs, int fd, int cc, int tf)
 {
     uint32_t ccbit;
     int cond;
-    TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+    TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32);
+    TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I64);
+    TCGv fp1 = tcg_temp_local_new(TCG_TYPE_I64);
     int l1 = gen_new_label();
 
     if (cc)
@@ -6009,19 +6080,28 @@
     else
         cond = TCG_COND_NE;
 
+    gen_load_fpr64(ctx, fp0, fs);
+    gen_load_fpr64(ctx, fp1, fd);
     tcg_gen_ld_i32(r_tmp1, current_fpu, offsetof(CPUMIPSFPUContext, fcr31));
     tcg_gen_andi_i32(r_tmp1, r_tmp1, ccbit);
     tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
-    tcg_gen_movi_i64(fpu64_T[2], fpu64_T[0]);
+    tcg_gen_movi_i64(fp1, fp0);
+    tcg_temp_free(fp0);
     gen_set_label(l1);
     tcg_temp_free(r_tmp1);
+    gen_store_fpr64(ctx, fp1, fd);
+    tcg_temp_free(fp1);
 }
 
-static inline void gen_movcf_ps (int cc, int tf)
+static inline void gen_movcf_ps (int fs, int fd, int cc, int tf)
 {
     int cond;
     TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32);
     TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_I32);
+    TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
+    TCGv fph0 = tcg_temp_local_new(TCG_TYPE_I32);
+    TCGv fp1 = tcg_temp_local_new(TCG_TYPE_I32);
+    TCGv fph1 = tcg_temp_local_new(TCG_TYPE_I32);
     int l1 = gen_new_label();
     int l2 = gen_new_label();
 
@@ -6030,18 +6110,28 @@
     else
         cond = TCG_COND_NE;
 
+    gen_load_fpr32(fp0, fs);
+    gen_load_fpr32h(fph0, fs);
+    gen_load_fpr32(fp1, fd);
+    gen_load_fpr32h(fph1, fd);
     get_fp_cond(r_tmp1);
     tcg_gen_shri_i32(r_tmp1, r_tmp1, cc);
     tcg_gen_andi_i32(r_tmp2, r_tmp1, 0x1);
     tcg_gen_brcondi_i32(cond, r_tmp2, 0, l1);
-    tcg_gen_movi_i32(fpu32_T[2], fpu32_T[0]);
+    tcg_gen_movi_i32(fp1, fp0);
+    tcg_temp_free(fp0);
     gen_set_label(l1);
     tcg_gen_andi_i32(r_tmp2, r_tmp1, 0x2);
     tcg_gen_brcondi_i32(cond, r_tmp2, 0, l2);
-    tcg_gen_movi_i32(fpu32h_T[2], fpu32h_T[0]);
+    tcg_gen_movi_i32(fph1, fph0);
+    tcg_temp_free(fph0);
     gen_set_label(l2);
     tcg_temp_free(r_tmp1);
     tcg_temp_free(r_tmp2);
+    gen_store_fpr32(fp1, fd);
+    gen_store_fpr32h(fph1, fd);
+    tcg_temp_free(fp1);
+    tcg_temp_free(fph1);
 }
 
 
@@ -6090,224 +6180,381 @@
 
     switch (ctx->opcode & FOP(0x3f, 0x1f)) {
     case FOP(0, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        tcg_gen_helper_0_0(do_float_add_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(fp1, ft);
+            tcg_gen_helper_1_2(do_float_add_s, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "add.s";
         optype = BINOP;
         break;
     case FOP(1, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        tcg_gen_helper_0_0(do_float_sub_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(fp1, ft);
+            tcg_gen_helper_1_2(do_float_sub_s, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "sub.s";
         optype = BINOP;
         break;
     case FOP(2, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        tcg_gen_helper_0_0(do_float_mul_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(fp1, ft);
+            tcg_gen_helper_1_2(do_float_mul_s, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "mul.s";
         optype = BINOP;
         break;
     case FOP(3, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        tcg_gen_helper_0_0(do_float_div_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(fp1, ft);
+            tcg_gen_helper_1_2(do_float_div_s, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "div.s";
         optype = BINOP;
         break;
     case FOP(4, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_sqrt_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_sqrt_s, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "sqrt.s";
         break;
     case FOP(5, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_abs_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_abs_s, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "abs.s";
         break;
     case FOP(6, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_store_fpr32(fpu32_T[0], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "mov.s";
         break;
     case FOP(7, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_chs_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_chs_s, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "neg.s";
         break;
     case FOP(8, 16):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_roundl_s);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr32(fp32, fs);
+            tcg_gen_helper_1_1(do_float_roundl_s, fp64, fp32);
+            tcg_temp_free(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free(fp64);
+        }
         opn = "round.l.s";
         break;
     case FOP(9, 16):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_truncl_s);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr32(fp32, fs);
+            tcg_gen_helper_1_1(do_float_truncl_s, fp64, fp32);
+            tcg_temp_free(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free(fp64);
+        }
         opn = "trunc.l.s";
         break;
     case FOP(10, 16):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_ceill_s);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr32(fp32, fs);
+            tcg_gen_helper_1_1(do_float_ceill_s, fp64, fp32);
+            tcg_temp_free(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free(fp64);
+        }
         opn = "ceil.l.s";
         break;
     case FOP(11, 16):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_floorl_s);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr32(fp32, fs);
+            tcg_gen_helper_1_1(do_float_floorl_s, fp64, fp32);
+            tcg_temp_free(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free(fp64);
+        }
         opn = "floor.l.s";
         break;
     case FOP(12, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_roundw_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_roundw_s, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "round.w.s";
         break;
     case FOP(13, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_truncw_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_truncw_s, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "trunc.w.s";
         break;
     case FOP(14, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_ceilw_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_ceilw_s, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "ceil.w.s";
         break;
     case FOP(15, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_floorw_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_floorw_s, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "floor.w.s";
         break;
     case FOP(17, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[2], fd);
-        gen_movcf_s((ft >> 2) & 0x7, ft & 0x1);
-        gen_store_fpr32(fpu32_T[2], fd);
+        gen_movcf_s(fs, fd, (ft >> 2) & 0x7, ft & 0x1);
         opn = "movcf.s";
         break;
     case FOP(18, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[2], fd);
         {
             int l1 = gen_new_label();
             TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+            TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
 
             gen_load_gpr(t0, ft);
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
             tcg_temp_free(t0);
-            tcg_gen_mov_i32(fpu32_T[2], fpu32_T[0]);
+            gen_load_fpr32(fp0, fs);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
             gen_set_label(l1);
         }
-        gen_store_fpr32(fpu32_T[2], fd);
         opn = "movz.s";
         break;
     case FOP(19, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[2], fd);
         {
             int l1 = gen_new_label();
             TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+            TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
 
             gen_load_gpr(t0, ft);
             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
             tcg_temp_free(t0);
-            tcg_gen_mov_i32(fpu32_T[2], fpu32_T[0]);
+            gen_load_fpr32(fp0, fs);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
             gen_set_label(l1);
         }
-        gen_store_fpr32(fpu32_T[2], fd);
         opn = "movn.s";
         break;
     case FOP(21, 16):
         check_cop1x(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_recip_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_recip_s, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "recip.s";
         break;
     case FOP(22, 16):
         check_cop1x(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_rsqrt_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_rsqrt_s, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "rsqrt.s";
         break;
     case FOP(28, 16):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[2], fd);
-        tcg_gen_helper_0_0(do_float_recip2_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(fp1, fd);
+            tcg_gen_helper_1_2(do_float_recip2_s, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "recip2.s";
         break;
     case FOP(29, 16):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_recip1_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_recip1_s, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "recip1.s";
         break;
     case FOP(30, 16):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_rsqrt1_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_rsqrt1_s, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "rsqrt1.s";
         break;
     case FOP(31, 16):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[2], ft);
-        tcg_gen_helper_0_0(do_float_rsqrt2_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(fp1, ft);
+            tcg_gen_helper_1_2(do_float_rsqrt2_s, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "rsqrt2.s";
         break;
     case FOP(33, 16):
         check_cp1_registers(ctx, fd);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvtd_s);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr32(fp32, fs);
+            tcg_gen_helper_1_1(do_float_cvtd_s, fp64, fp32);
+            tcg_temp_free(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free(fp64);
+        }
         opn = "cvt.d.s";
         break;
     case FOP(36, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvtw_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_cvtw_s, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "cvt.w.s";
         break;
     case FOP(37, 16):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvtl_s);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr32(fp32, fs);
+            tcg_gen_helper_1_1(do_float_cvtl_s, fp64, fp32);
+            tcg_temp_free(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free(fp64);
+        }
         opn = "cvt.l.s";
         break;
     case FOP(38, 16):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        tcg_gen_extu_i32_i64(fpu64_T[0], fpu32_T[0]);
-        tcg_gen_extu_i32_i64(fpu64_T[1], fpu32_T[1]);
-        tcg_gen_shli_i64(fpu64_T[1], fpu64_T[1], 32);
-        tcg_gen_or_i64(fpu64_T[2], fpu64_T[0], fpu64_T[1]);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp64_0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp64_1 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp32_0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp32_1 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp32_0, fs);
+            gen_load_fpr32(fp32_1, ft);
+            tcg_gen_extu_i32_i64(fp64_0, fp32_0);
+            tcg_gen_extu_i32_i64(fp64_1, fp32_1);
+            tcg_temp_free(fp32_0);
+            tcg_temp_free(fp32_1);
+            tcg_gen_shli_i64(fp64_1, fp64_1, 32);
+            tcg_gen_or_i64(fp64_0, fp64_0, fp64_1);
+            tcg_temp_free(fp64_1);
+            gen_store_fpr64(ctx, fp64_0, fd);
+            tcg_temp_free(fp64_0);
+        }
         opn = "cvt.ps.s";
         break;
     case FOP(48, 16):
@@ -6326,217 +6573,351 @@
     case FOP(61, 16):
     case FOP(62, 16):
     case FOP(63, 16):
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        if (ctx->opcode & (1 << 6)) {
-            check_cop1x(ctx);
-            gen_cmpabs_s(func-48, cc);
-            opn = condnames_abs[func-48];
-        } else {
-            gen_cmp_s(func-48, cc);
-            opn = condnames[func-48];
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(fp1, ft);
+            if (ctx->opcode & (1 << 6)) {
+                check_cop1x(ctx);
+                gen_cmpabs_s(func-48, fp0, fp1, cc);
+                opn = condnames_abs[func-48];
+            } else {
+                gen_cmp_s(func-48, fp0, fp1, cc);
+                opn = condnames[func-48];
+            }
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
         }
         break;
     case FOP(0, 17):
         check_cp1_registers(ctx, fs | ft | fd);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[1], ft);
-        tcg_gen_helper_0_0(do_float_add_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            tcg_gen_helper_1_2(do_float_add_d, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "add.d";
         optype = BINOP;
         break;
     case FOP(1, 17):
         check_cp1_registers(ctx, fs | ft | fd);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[1], ft);
-        tcg_gen_helper_0_0(do_float_sub_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            tcg_gen_helper_1_2(do_float_sub_d, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "sub.d";
         optype = BINOP;
         break;
     case FOP(2, 17):
         check_cp1_registers(ctx, fs | ft | fd);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[1], ft);
-        tcg_gen_helper_0_0(do_float_mul_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            tcg_gen_helper_1_2(do_float_mul_d, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "mul.d";
         optype = BINOP;
         break;
     case FOP(3, 17):
         check_cp1_registers(ctx, fs | ft | fd);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[1], ft);
-        tcg_gen_helper_0_0(do_float_div_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            tcg_gen_helper_1_2(do_float_div_d, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "div.d";
         optype = BINOP;
         break;
     case FOP(4, 17):
         check_cp1_registers(ctx, fs | fd);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_sqrt_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_sqrt_d, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "sqrt.d";
         break;
     case FOP(5, 17):
         check_cp1_registers(ctx, fs | fd);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_abs_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_abs_d, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "abs.d";
         break;
     case FOP(6, 17):
         check_cp1_registers(ctx, fs | fd);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_store_fpr64(ctx, fpu64_T[0], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "mov.d";
         break;
     case FOP(7, 17):
         check_cp1_registers(ctx, fs | fd);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_chs_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_chs_d, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "neg.d";
         break;
     case FOP(8, 17):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_roundl_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_roundl_d, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "round.l.d";
         break;
     case FOP(9, 17):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_truncl_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_truncl_d, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "trunc.l.d";
         break;
     case FOP(10, 17):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_ceill_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_ceill_d, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "ceil.l.d";
         break;
     case FOP(11, 17):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_floorl_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_floorl_d, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "floor.l.d";
         break;
     case FOP(12, 17):
         check_cp1_registers(ctx, fs);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_roundw_d);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp64, fs);
+            tcg_gen_helper_1_1(do_float_roundw_d, fp32, fp64);
+            tcg_temp_free(fp64);
+            gen_store_fpr32(fp32, fd);
+            tcg_temp_free(fp32);
+        }
         opn = "round.w.d";
         break;
     case FOP(13, 17):
         check_cp1_registers(ctx, fs);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_truncw_d);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp64, fs);
+            tcg_gen_helper_1_1(do_float_truncw_d, fp32, fp64);
+            tcg_temp_free(fp64);
+            gen_store_fpr32(fp32, fd);
+            tcg_temp_free(fp32);
+        }
         opn = "trunc.w.d";
         break;
     case FOP(14, 17):
         check_cp1_registers(ctx, fs);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_ceilw_d);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp64, fs);
+            tcg_gen_helper_1_1(do_float_ceilw_d, fp32, fp64);
+            tcg_temp_free(fp64);
+            gen_store_fpr32(fp32, fd);
+            tcg_temp_free(fp32);
+        }
         opn = "ceil.w.d";
         break;
     case FOP(15, 17):
         check_cp1_registers(ctx, fs);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_floorw_d);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp64, fs);
+            tcg_gen_helper_1_1(do_float_floorw_d, fp32, fp64);
+            tcg_temp_free(fp64);
+            gen_store_fpr32(fp32, fd);
+            tcg_temp_free(fp32);
+        }
         opn = "floor.w.d";
         break;
     case FOP(17, 17):
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[2], fd);
-        gen_movcf_d((ft >> 2) & 0x7, ft & 0x1);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        gen_movcf_d(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
         opn = "movcf.d";
         break;
     case FOP(18, 17):
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[2], fd);
         {
             int l1 = gen_new_label();
             TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+            TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I64);
 
             gen_load_gpr(t0, ft);
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
             tcg_temp_free(t0);
-            tcg_gen_mov_i64(fpu64_T[2], fpu64_T[0]);
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
             gen_set_label(l1);
         }
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
         opn = "movz.d";
         break;
     case FOP(19, 17):
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[2], fd);
         {
             int l1 = gen_new_label();
             TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+            TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I64);
 
             gen_load_gpr(t0, ft);
             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
             tcg_temp_free(t0);
-            tcg_gen_mov_i64(fpu64_T[2], fpu64_T[0]);
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
             gen_set_label(l1);
         }
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
         opn = "movn.d";
         break;
     case FOP(21, 17):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_recip_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_recip_d, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "recip.d";
         break;
     case FOP(22, 17):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_rsqrt_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_rsqrt_d, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "rsqrt.d";
         break;
     case FOP(28, 17):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[2], ft);
-        tcg_gen_helper_0_0(do_float_recip2_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            tcg_gen_helper_1_2(do_float_recip2_d, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "recip2.d";
         break;
     case FOP(29, 17):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_recip1_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_recip1_d, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "recip1.d";
         break;
     case FOP(30, 17):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_rsqrt1_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_rsqrt1_d, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "rsqrt1.d";
         break;
     case FOP(31, 17):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[2], ft);
-        tcg_gen_helper_0_0(do_float_rsqrt2_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            tcg_gen_helper_1_2(do_float_rsqrt2_d, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "rsqrt2.d";
         break;
     case FOP(48, 17):
@@ -6555,303 +6936,434 @@
     case FOP(61, 17):
     case FOP(62, 17):
     case FOP(63, 17):
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[1], ft);
-        if (ctx->opcode & (1 << 6)) {
-            check_cop1x(ctx);
-            check_cp1_registers(ctx, fs | ft);
-            gen_cmpabs_d(func-48, cc);
-            opn = condnames_abs[func-48];
-        } else {
-            check_cp1_registers(ctx, fs | ft);
-            gen_cmp_d(func-48, cc);
-            opn = condnames[func-48];
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            if (ctx->opcode & (1 << 6)) {
+                check_cop1x(ctx);
+                check_cp1_registers(ctx, fs | ft);
+                gen_cmpabs_d(func-48, fp0, fp1, cc);
+                opn = condnames_abs[func-48];
+            } else {
+                check_cp1_registers(ctx, fs | ft);
+                gen_cmp_d(func-48, fp0, fp1, cc);
+                opn = condnames[func-48];
+            }
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
         }
         break;
     case FOP(32, 17):
         check_cp1_registers(ctx, fs);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvts_d);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp64, fs);
+            tcg_gen_helper_1_1(do_float_cvts_d, fp32, fp64);
+            tcg_temp_free(fp64);
+            gen_store_fpr32(fp32, fd);
+            tcg_temp_free(fp32);
+        }
         opn = "cvt.s.d";
         break;
     case FOP(36, 17):
         check_cp1_registers(ctx, fs);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvtw_d);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp64, fs);
+            tcg_gen_helper_1_1(do_float_cvtw_d, fp32, fp64);
+            tcg_temp_free(fp64);
+            gen_store_fpr32(fp32, fd);
+            tcg_temp_free(fp32);
+        }
         opn = "cvt.w.d";
         break;
     case FOP(37, 17):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvtl_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_cvtl_d, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "cvt.l.d";
         break;
     case FOP(32, 20):
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvts_w);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_cvts_w, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "cvt.s.w";
         break;
     case FOP(33, 20):
         check_cp1_registers(ctx, fd);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvtd_w);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr32(fp32, fs);
+            tcg_gen_helper_1_1(do_float_cvtd_w, fp64, fp32);
+            tcg_temp_free(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free(fp64);
+        }
         opn = "cvt.d.w";
         break;
     case FOP(32, 21):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvts_l);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp32 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp64, fs);
+            tcg_gen_helper_1_1(do_float_cvts_l, fp32, fp64);
+            tcg_temp_free(fp64);
+            gen_store_fpr32(fp32, fd);
+            tcg_temp_free(fp32);
+        }
         opn = "cvt.s.l";
         break;
     case FOP(33, 21):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvtd_l);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_cvtd_l, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "cvt.d.l";
         break;
     case FOP(38, 20):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvtps_pw);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_cvtps_pw, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "cvt.ps.pw";
         break;
     case FOP(0, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_load_fpr32h(fpu32h_T[1], ft);
-        tcg_gen_helper_0_0(do_float_add_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            tcg_gen_helper_1_2(do_float_add_ps, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "add.ps";
         break;
     case FOP(1, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_load_fpr32h(fpu32h_T[1], ft);
-        tcg_gen_helper_0_0(do_float_sub_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            tcg_gen_helper_1_2(do_float_sub_ps, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "sub.ps";
         break;
     case FOP(2, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_load_fpr32h(fpu32h_T[1], ft);
-        tcg_gen_helper_0_0(do_float_mul_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            tcg_gen_helper_1_2(do_float_mul_ps, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "mul.ps";
         break;
     case FOP(5, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        tcg_gen_helper_0_0(do_float_abs_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_abs_ps, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "abs.ps";
         break;
     case FOP(6, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_store_fpr32(fpu32_T[0], fd);
-        gen_store_fpr32h(fpu32h_T[0], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "mov.ps";
         break;
     case FOP(7, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        tcg_gen_helper_0_0(do_float_chs_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_chs_ps, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "neg.ps";
         break;
     case FOP(17, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[2], fd);
-        gen_load_fpr32h(fpu32h_T[2], fd);
-        gen_movcf_ps((ft >> 2) & 0x7, ft & 0x1);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        gen_movcf_ps(fs, fd, (ft >> 2) & 0x7, ft & 0x1);
         opn = "movcf.ps";
         break;
     case FOP(18, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[2], fd);
-        gen_load_fpr32h(fpu32h_T[2], fd);
         {
             int l1 = gen_new_label();
             TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+            TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
+            TCGv fph0 = tcg_temp_local_new(TCG_TYPE_I32);
 
             gen_load_gpr(t0, ft);
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
             tcg_temp_free(t0);
-            tcg_gen_mov_i32(fpu32_T[2], fpu32_T[0]);
-            tcg_gen_mov_i32(fpu32h_T[2], fpu32h_T[0]);
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32h(fph0, fs);
+            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32h(fph0, fd);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fph0);
             gen_set_label(l1);
         }
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "movz.ps";
         break;
     case FOP(19, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[2], fd);
-        gen_load_fpr32h(fpu32h_T[2], fd);
         {
             int l1 = gen_new_label();
             TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+            TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
+            TCGv fph0 = tcg_temp_local_new(TCG_TYPE_I32);
 
             gen_load_gpr(t0, ft);
             tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
             tcg_temp_free(t0);
-            tcg_gen_mov_i32(fpu32_T[2], fpu32_T[0]);
-            tcg_gen_mov_i32(fpu32h_T[2], fpu32h_T[0]);
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32h(fph0, fs);
+            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32h(fph0, fd);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fph0);
             gen_set_label(l1);
         }
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "movn.ps";
         break;
     case FOP(24, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], ft);
-        gen_load_fpr32h(fpu32h_T[0], ft);
-        gen_load_fpr32(fpu32_T[1], fs);
-        gen_load_fpr32h(fpu32h_T[1], fs);
-        tcg_gen_helper_0_0(do_float_addr_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, ft);
+            gen_load_fpr64(ctx, fp1, fs);
+            tcg_gen_helper_1_2(do_float_addr_ps, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "addr.ps";
         break;
     case FOP(26, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], ft);
-        gen_load_fpr32h(fpu32h_T[0], ft);
-        gen_load_fpr32(fpu32_T[1], fs);
-        gen_load_fpr32h(fpu32h_T[1], fs);
-        tcg_gen_helper_0_0(do_float_mulr_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, ft);
+            gen_load_fpr64(ctx, fp1, fs);
+            tcg_gen_helper_1_2(do_float_mulr_ps, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "mulr.ps";
         break;
     case FOP(28, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[2], fd);
-        gen_load_fpr32h(fpu32h_T[2], fd);
-        tcg_gen_helper_0_0(do_float_recip2_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, fd);
+            tcg_gen_helper_1_2(do_float_recip2_ps, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "recip2.ps";
         break;
     case FOP(29, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        tcg_gen_helper_0_0(do_float_recip1_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_recip1_ps, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "recip1.ps";
         break;
     case FOP(30, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        tcg_gen_helper_0_0(do_float_rsqrt1_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_rsqrt1_ps, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "rsqrt1.ps";
         break;
     case FOP(31, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[2], ft);
-        gen_load_fpr32h(fpu32h_T[2], ft);
-        tcg_gen_helper_0_0(do_float_rsqrt2_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            tcg_gen_helper_1_2(do_float_rsqrt2_ps, fp0, fp0, fp1);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "rsqrt2.ps";
         break;
     case FOP(32, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvts_pu);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32h(fp0, fs);
+            tcg_gen_helper_1_1(do_float_cvts_pu, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "cvt.s.pu";
         break;
     case FOP(36, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvtpw_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_helper_1_1(do_float_cvtpw_ps, fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "cvt.pw.ps";
         break;
     case FOP(40, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_helper_0_0(do_float_cvts_pl);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_helper_1_1(do_float_cvts_pl, fp0, fp0);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "cvt.s.pl";
         break;
     case FOP(44, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_store_fpr32h(fpu32_T[0], fd);
-        gen_store_fpr32(fpu32_T[1], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(fp1, ft);
+            gen_store_fpr32h(fp0, fd);
+            gen_store_fpr32(fp1, fd);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+        }
         opn = "pll.ps";
         break;
     case FOP(45, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[1], ft);
-        gen_store_fpr32(fpu32h_T[1], fd);
-        gen_store_fpr32h(fpu32_T[0], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32h(fp1, ft);
+            gen_store_fpr32(fp1, fd);
+            gen_store_fpr32h(fp0, fd);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+        }
         opn = "plu.ps";
         break;
     case FOP(46, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_store_fpr32(fpu32_T[1], fd);
-        gen_store_fpr32h(fpu32h_T[0], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32h(fp0, fs);
+            gen_load_fpr32(fp1, ft);
+            gen_store_fpr32(fp1, fd);
+            gen_store_fpr32h(fp0, fd);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+        }
         opn = "pul.ps";
         break;
     case FOP(47, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[1], ft);
-        gen_store_fpr32(fpu32h_T[1], fd);
-        gen_store_fpr32h(fpu32h_T[0], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32h(fp0, fs);
+            gen_load_fpr32h(fp1, ft);
+            gen_store_fpr32(fp1, fd);
+            gen_store_fpr32h(fp0, fd);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+        }
         opn = "puu.ps";
         break;
     case FOP(48, 22):
@@ -6871,16 +7383,21 @@
     case FOP(62, 22):
     case FOP(63, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_load_fpr32h(fpu32h_T[1], ft);
-        if (ctx->opcode & (1 << 6)) {
-            gen_cmpabs_ps(func-48, cc);
-            opn = condnames_abs[func-48];
-        } else {
-            gen_cmp_ps(func-48, cc);
-            opn = condnames[func-48];
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            if (ctx->opcode & (1 << 6)) {
+                gen_cmpabs_ps(func-48, fp0, fp1, cc);
+                opn = condnames_abs[func-48];
+            } else {
+                gen_cmp_ps(func-48, fp0, fp1, cc);
+                opn = condnames[func-48];
+            }
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
         }
         break;
     default:
@@ -6924,44 +7441,74 @@
     switch (opc) {
     case OPC_LWXC1:
         check_cop1x(ctx);
-        tcg_gen_qemu_ld32s(fpu32_T[0], t0, ctx->mem_idx);
-        gen_store_fpr32(fpu32_T[0], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            tcg_gen_qemu_ld32s(fp0, t0, ctx->mem_idx);
+            gen_store_fpr32(fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "lwxc1";
         break;
     case OPC_LDXC1:
         check_cop1x(ctx);
         check_cp1_registers(ctx, fd);
-        tcg_gen_qemu_ld64(fpu64_T[0], t0, ctx->mem_idx);
-        gen_store_fpr64(ctx, fpu64_T[0], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            tcg_gen_qemu_ld64(fp0, t0, ctx->mem_idx);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "ldxc1";
         break;
     case OPC_LUXC1:
         check_cp1_64bitmode(ctx);
         tcg_gen_andi_tl(t0, t0, ~0x7);
-        tcg_gen_qemu_ld64(fpu64_T[0], t0, ctx->mem_idx);
-        gen_store_fpr64(ctx, fpu64_T[0], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            tcg_gen_qemu_ld64(fp0, t0, ctx->mem_idx);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free(fp0);
+        }
         opn = "luxc1";
         break;
     case OPC_SWXC1:
         check_cop1x(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        tcg_gen_qemu_st32(fpu32_T[0], t0, ctx->mem_idx);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            tcg_gen_qemu_st32(fp0, t0, ctx->mem_idx);
+            tcg_temp_free(fp0);
+        }
         opn = "swxc1";
         store = 1;
         break;
     case OPC_SDXC1:
         check_cop1x(ctx);
         check_cp1_registers(ctx, fs);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        tcg_gen_qemu_st64(fpu64_T[0], t0, ctx->mem_idx);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_qemu_st64(fp0, t0, ctx->mem_idx);
+            tcg_temp_free(fp0);
+        }
         opn = "sdxc1";
         store = 1;
         break;
     case OPC_SUXC1:
         check_cp1_64bitmode(ctx);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
         tcg_gen_andi_tl(t0, t0, ~0x7);
-        tcg_gen_qemu_st64(fpu64_T[0], t0, ctx->mem_idx);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            tcg_gen_qemu_st64(fp0, t0, ctx->mem_idx);
+            tcg_temp_free(fp0);
+        }
         opn = "suxc1";
         store = 1;
         break;
@@ -6988,162 +7535,260 @@
         check_cp1_64bitmode(ctx);
         {
             TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+            TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
+            TCGv fph0 = tcg_temp_local_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_local_new(TCG_TYPE_I32);
+            TCGv fph1 = tcg_temp_local_new(TCG_TYPE_I32);
             int l1 = gen_new_label();
             int l2 = gen_new_label();
 
             gen_load_gpr(t0, fr);
             tcg_gen_andi_tl(t0, t0, 0x7);
-            gen_load_fpr32(fpu32_T[0], fs);
-            gen_load_fpr32h(fpu32h_T[0], fs);
-            gen_load_fpr32(fpu32_T[1], ft);
-            gen_load_fpr32h(fpu32h_T[1], ft);
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32h(fph0, fs);
+            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32h(fph1, ft);
 
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_mov_i32(fpu32_T[2], fpu32_T[0]);
-            tcg_gen_mov_i32(fpu32h_T[2], fpu32h_T[0]);
+            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32h(fph0, fd);
             tcg_gen_br(l2);
             gen_set_label(l1);
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 4, l2);
             tcg_temp_free(t0);
 #ifdef TARGET_WORDS_BIGENDIAN
-            tcg_gen_mov_i32(fpu32h_T[2], fpu32_T[0]);
-            tcg_gen_mov_i32(fpu32_T[2], fpu32h_T[1]);
+            gen_store_fpr32(fph1, fd);
+            gen_store_fpr32h(fp0, fd);
 #else
-            tcg_gen_mov_i32(fpu32h_T[2], fpu32_T[1]);
-            tcg_gen_mov_i32(fpu32_T[2], fpu32h_T[0]);
+            gen_store_fpr32(fph0, fd);
+            gen_store_fpr32h(fp1, fd);
 #endif
             gen_set_label(l2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fph0);
+            tcg_temp_free(fp1);
+            tcg_temp_free(fph1);
         }
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "alnv.ps";
         break;
     case OPC_MADD_S:
         check_cop1x(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_load_fpr32(fpu32_T[2], fr);
-        tcg_gen_helper_0_0(do_float_muladd_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp2 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(fp2, fr);
+            tcg_gen_helper_1_3(do_float_muladd_s, fp2, fp0, fp1, fp2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+            gen_store_fpr32(fp2, fd);
+            tcg_temp_free(fp2);
+        }
         opn = "madd.s";
         break;
     case OPC_MADD_D:
         check_cop1x(ctx);
         check_cp1_registers(ctx, fd | fs | ft | fr);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[1], ft);
-        gen_load_fpr64(ctx, fpu64_T[2], fr);
-        tcg_gen_helper_0_0(do_float_muladd_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp2 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            tcg_gen_helper_1_3(do_float_muladd_d, fp2, fp0, fp1, fp2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free(fp2);
+        }
         opn = "madd.d";
         break;
     case OPC_MADD_PS:
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_load_fpr32h(fpu32h_T[1], ft);
-        gen_load_fpr32(fpu32_T[2], fr);
-        gen_load_fpr32h(fpu32h_T[2], fr);
-        tcg_gen_helper_0_0(do_float_muladd_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp2 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            tcg_gen_helper_1_3(do_float_muladd_ps, fp2, fp0, fp1, fp2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free(fp2);
+        }
         opn = "madd.ps";
         break;
     case OPC_MSUB_S:
         check_cop1x(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_load_fpr32(fpu32_T[2], fr);
-        tcg_gen_helper_0_0(do_float_mulsub_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp2 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(fp2, fr);
+            tcg_gen_helper_1_3(do_float_mulsub_s, fp2, fp0, fp1, fp2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+            gen_store_fpr32(fp2, fd);
+            tcg_temp_free(fp2);
+        }
         opn = "msub.s";
         break;
     case OPC_MSUB_D:
         check_cop1x(ctx);
         check_cp1_registers(ctx, fd | fs | ft | fr);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[1], ft);
-        gen_load_fpr64(ctx, fpu64_T[2], fr);
-        tcg_gen_helper_0_0(do_float_mulsub_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp2 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            tcg_gen_helper_1_3(do_float_mulsub_d, fp2, fp0, fp1, fp2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free(fp2);
+        }
         opn = "msub.d";
         break;
     case OPC_MSUB_PS:
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_load_fpr32h(fpu32h_T[1], ft);
-        gen_load_fpr32(fpu32_T[2], fr);
-        gen_load_fpr32h(fpu32h_T[2], fr);
-        tcg_gen_helper_0_0(do_float_mulsub_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp2 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            tcg_gen_helper_1_3(do_float_mulsub_ps, fp2, fp0, fp1, fp2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free(fp2);
+        }
         opn = "msub.ps";
         break;
     case OPC_NMADD_S:
         check_cop1x(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_load_fpr32(fpu32_T[2], fr);
-        tcg_gen_helper_0_0(do_float_nmuladd_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp2 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(fp2, fr);
+            tcg_gen_helper_1_3(do_float_nmuladd_s, fp2, fp0, fp1, fp2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+            gen_store_fpr32(fp2, fd);
+            tcg_temp_free(fp2);
+        }
         opn = "nmadd.s";
         break;
     case OPC_NMADD_D:
         check_cop1x(ctx);
         check_cp1_registers(ctx, fd | fs | ft | fr);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[1], ft);
-        gen_load_fpr64(ctx, fpu64_T[2], fr);
-        tcg_gen_helper_0_0(do_float_nmuladd_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp2 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            tcg_gen_helper_1_3(do_float_nmuladd_d, fp2, fp0, fp1, fp2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free(fp2);
+        }
         opn = "nmadd.d";
         break;
     case OPC_NMADD_PS:
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_load_fpr32h(fpu32h_T[1], ft);
-        gen_load_fpr32(fpu32_T[2], fr);
-        gen_load_fpr32h(fpu32h_T[2], fr);
-        tcg_gen_helper_0_0(do_float_nmuladd_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp2 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            tcg_gen_helper_1_3(do_float_nmuladd_ps, fp2, fp0, fp1, fp2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free(fp2);
+        }
         opn = "nmadd.ps";
         break;
     case OPC_NMSUB_S:
         check_cop1x(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_load_fpr32(fpu32_T[2], fr);
-        tcg_gen_helper_0_0(do_float_nmulsub_s);
-        gen_store_fpr32(fpu32_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I32);
+            TCGv fp2 = tcg_temp_new(TCG_TYPE_I32);
+
+            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(fp2, fr);
+            tcg_gen_helper_1_3(do_float_nmulsub_s, fp2, fp0, fp1, fp2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+            gen_store_fpr32(fp2, fd);
+            tcg_temp_free(fp2);
+        }
         opn = "nmsub.s";
         break;
     case OPC_NMSUB_D:
         check_cop1x(ctx);
         check_cp1_registers(ctx, fd | fs | ft | fr);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[1], ft);
-        gen_load_fpr64(ctx, fpu64_T[2], fr);
-        tcg_gen_helper_0_0(do_float_nmulsub_d);
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp2 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            tcg_gen_helper_1_3(do_float_nmulsub_d, fp2, fp0, fp1, fp2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free(fp2);
+        }
         opn = "nmsub.d";
         break;
     case OPC_NMSUB_PS:
         check_cp1_64bitmode(ctx);
-        gen_load_fpr32(fpu32_T[0], fs);
-        gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_load_fpr32(fpu32_T[1], ft);
-        gen_load_fpr32h(fpu32h_T[1], ft);
-        gen_load_fpr32(fpu32_T[2], fr);
-        gen_load_fpr32h(fpu32h_T[2], fr);
-        tcg_gen_helper_0_0(do_float_nmulsub_ps);
-        gen_store_fpr32(fpu32_T[2], fd);
-        gen_store_fpr32h(fpu32h_T[2], fd);
+        {
+            TCGv fp0 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp1 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp2 = tcg_temp_new(TCG_TYPE_I64);
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            tcg_gen_helper_1_3(do_float_nmulsub_ps, fp2, fp0, fp1, fp2);
+            tcg_temp_free(fp0);
+            tcg_temp_free(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free(fp2);
+        }
         opn = "nmsub.ps";
         break;
     default:
@@ -7994,9 +8639,6 @@
     fpu_fprintf(f, "CP1 FCR0 0x%08x  FCR31 0x%08x  SR.FR %d  fp_status 0x%08x(0x%02x)\n",
                 env->fpu->fcr0, env->fpu->fcr31, is_fpu64, env->fpu->fp_status,
                 get_float_exception_flags(&env->fpu->fp_status));
-    fpu_fprintf(f, "FT0: "); printfpr(&env->ft0);
-    fpu_fprintf(f, "FT1: "); printfpr(&env->ft1);
-    fpu_fprintf(f, "FT2: "); printfpr(&env->ft2);
     for (i = 0; i < 32; (is_fpu64) ? i++ : (i += 2)) {
         fpu_fprintf(f, "%3s: ", fregnames[i]);
         printfpr(&env->fpu->fpr[i]);
@@ -8102,16 +8744,6 @@
 #define DEF_HELPER(ret, name, params) tcg_register_helper(name, #name);
 #include "helper.h"
 
-    fpu32_T[0] = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0, offsetof(CPUState, ft0.w[FP_ENDIAN_IDX]), "WT0");
-    fpu32_T[1] = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0, offsetof(CPUState, ft1.w[FP_ENDIAN_IDX]), "WT1");
-    fpu32_T[2] = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0, offsetof(CPUState, ft2.w[FP_ENDIAN_IDX]), "WT2");
-    fpu64_T[0] = tcg_global_mem_new(TCG_TYPE_I64, TCG_AREG0, offsetof(CPUState, ft0.d), "DT0");
-    fpu64_T[1] = tcg_global_mem_new(TCG_TYPE_I64, TCG_AREG0, offsetof(CPUState, ft1.d), "DT1");
-    fpu64_T[2] = tcg_global_mem_new(TCG_TYPE_I64, TCG_AREG0, offsetof(CPUState, ft2.d), "DT2");
-    fpu32h_T[0] = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0, offsetof(CPUState, ft0.w[!FP_ENDIAN_IDX]), "WTH0");
-    fpu32h_T[1] = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0, offsetof(CPUState, ft1.w[!FP_ENDIAN_IDX]), "WTH1");
-    fpu32h_T[2] = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0, offsetof(CPUState, ft2.w[!FP_ENDIAN_IDX]), "WTH2");
-
     inited = 1;
 }
 
