Merge branch 'rth/vis2' of git://repo.or.cz/qemu/rth

* 'rth/vis2' of git://repo.or.cz/qemu/rth:
  target-sparc: Implement FALIGNDATA inline.
  target-sparc: Implement BMASK/BSHUFFLE.
  target-sparc: Implement ALIGNADDR* inline.
  target-sparc: Implement EDGE* instructions.
  target-sparc: Implement fpack{16,32,fix}.
  target-sparc: Implement PDIST.
  target-sparc: Do exceptions management fully inside the helpers.
  target-sparc: Change fpr representation to doubles.
  target-sparc: Undo cpu_fpr rename.
  target-sparc: Extract float128 move to a function.
  target-sparc: Extract common code for floating-point operations.
  target-sparc: Make FPU/VIS helpers const when possible.
  target-sparc: Pass float64 parameters instead of dt0/1 temporaries.
  target-sparc: Add accessors for double-precision fpr access.
  target-sparc: Mark fprs dirty in store accessor.
  target-sparc: Add accessors for single-precision fpr access.
diff --git a/gdbstub.c b/gdbstub.c
index 4009058..a25f404 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -814,7 +814,11 @@
 #if defined(TARGET_ABI32) || !defined(TARGET_SPARC64)
     if (n < 64) {
         /* fprs */
-        GET_REG32(*((uint32_t *)&env->fpr[n - 32]));
+        if (n & 1) {
+            GET_REG32(env->fpr[(n - 32) / 2].l.lower);
+        } else {
+            GET_REG32(env->fpr[(n - 32) / 2].l.upper);
+        }
     }
     /* Y, PSR, WIM, TBR, PC, NPC, FPSR, CPSR */
     switch (n) {
@@ -831,15 +835,15 @@
 #else
     if (n < 64) {
         /* f0-f31 */
-        GET_REG32(*((uint32_t *)&env->fpr[n - 32]));
+        if (n & 1) {
+            GET_REG32(env->fpr[(n - 32) / 2].l.lower);
+        } else {
+            GET_REG32(env->fpr[(n - 32) / 2].l.upper);
+        }
     }
     if (n < 80) {
         /* f32-f62 (double width, even numbers only) */
-        uint64_t val;
-
-        val = (uint64_t)*((uint32_t *)&env->fpr[(n - 64) * 2 + 32]) << 32;
-        val |= *((uint32_t *)&env->fpr[(n - 64) * 2 + 33]);
-        GET_REG64(val);
+        GET_REG64(env->fpr[(n - 32) / 2].ll);
     }
     switch (n) {
     case 80: GET_REGL(env->pc);
@@ -878,7 +882,12 @@
 #if defined(TARGET_ABI32) || !defined(TARGET_SPARC64)
     else if (n < 64) {
         /* fprs */
-        *((uint32_t *)&env->fpr[n - 32]) = tmp;
+        /* f0-f31 */
+        if (n & 1) {
+            env->fpr[(n - 32) / 2].l.lower = tmp;
+        } else {
+            env->fpr[(n - 32) / 2].l.upper = tmp;
+        }
     } else {
         /* Y, PSR, WIM, TBR, PC, NPC, FPSR, CPSR */
         switch (n) {
@@ -896,12 +905,16 @@
 #else
     else if (n < 64) {
         /* f0-f31 */
-        env->fpr[n] = ldfl_p(mem_buf);
+        tmp = ldl_p(mem_buf);
+        if (n & 1) {
+            env->fpr[(n - 32) / 2].l.lower = tmp;
+        } else {
+            env->fpr[(n - 32) / 2].l.upper = tmp;
+        }
         return 4;
     } else if (n < 80) {
         /* f32-f62 (double width, even numbers only) */
-        *((uint32_t *)&env->fpr[(n - 64) * 2 + 32]) = tmp >> 32;
-        *((uint32_t *)&env->fpr[(n - 64) * 2 + 33]) = tmp;
+        env->fpr[(n - 32) / 2].ll = tmp;
     } else {
         switch (n) {
         case 80: env->pc = tmp; break;
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 40c5eb1..f3b767e 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -2296,12 +2296,14 @@
      */
     err |= __get_user(env->fprs, &(ucp->tuc_mcontext.mc_fpregs.mcfpu_fprs));
     {
-        uint32_t *src, *dst;
-        src = ucp->tuc_mcontext.mc_fpregs.mcfpu_fregs.sregs;
-        dst = env->fpr;
-        /* XXX: check that the CPU storage is the same as user context */
-        for (i = 0; i < 64; i++, dst++, src++)
-            err |= __get_user(*dst, src);
+        uint32_t *src = ucp->tuc_mcontext.mc_fpregs.mcfpu_fregs.sregs;
+        for (i = 0; i < 64; i++, src++) {
+            if (i & 1) {
+                err |= __get_user(env->fpr[i/2].l.lower, src);
+            } else {
+                err |= __get_user(env->fpr[i/2].l.upper, src);
+            }
+        }
     }
     err |= __get_user(env->fsr,
                       &(ucp->tuc_mcontext.mc_fpregs.mcfpu_fsr));
@@ -2390,12 +2392,14 @@
     err |= __put_user(i7, &(mcp->mc_i7));
 
     {
-        uint32_t *src, *dst;
-        src = env->fpr;
-        dst = ucp->tuc_mcontext.mc_fpregs.mcfpu_fregs.sregs;
-        /* XXX: check that the CPU storage is the same as user context */
-        for (i = 0; i < 64; i++, dst++, src++)
-            err |= __put_user(*src, dst);
+        uint32_t *dst = ucp->tuc_mcontext.mc_fpregs.mcfpu_fregs.sregs;
+        for (i = 0; i < 64; i++, dst++) {
+            if (i & 1) {
+                err |= __put_user(env->fpr[i/2].l.lower, dst);
+            } else {
+                err |= __put_user(env->fpr[i/2].l.upper, dst);
+            }
+        }
     }
     err |= __put_user(env->fsr, &(mcp->mc_fpregs.mcfpu_fsr));
     err |= __put_user(env->gsr, &(mcp->mc_fpregs.mcfpu_gsr));
diff --git a/monitor.c b/monitor.c
index ffda0fe..d13bd15 100644
--- a/monitor.c
+++ b/monitor.c
@@ -3471,55 +3471,55 @@
 #endif
     { "tbr", offsetof(CPUState, tbr) },
     { "fsr", offsetof(CPUState, fsr) },
-    { "f0", offsetof(CPUState, fpr[0]) },
-    { "f1", offsetof(CPUState, fpr[1]) },
-    { "f2", offsetof(CPUState, fpr[2]) },
-    { "f3", offsetof(CPUState, fpr[3]) },
-    { "f4", offsetof(CPUState, fpr[4]) },
-    { "f5", offsetof(CPUState, fpr[5]) },
-    { "f6", offsetof(CPUState, fpr[6]) },
-    { "f7", offsetof(CPUState, fpr[7]) },
-    { "f8", offsetof(CPUState, fpr[8]) },
-    { "f9", offsetof(CPUState, fpr[9]) },
-    { "f10", offsetof(CPUState, fpr[10]) },
-    { "f11", offsetof(CPUState, fpr[11]) },
-    { "f12", offsetof(CPUState, fpr[12]) },
-    { "f13", offsetof(CPUState, fpr[13]) },
-    { "f14", offsetof(CPUState, fpr[14]) },
-    { "f15", offsetof(CPUState, fpr[15]) },
-    { "f16", offsetof(CPUState, fpr[16]) },
-    { "f17", offsetof(CPUState, fpr[17]) },
-    { "f18", offsetof(CPUState, fpr[18]) },
-    { "f19", offsetof(CPUState, fpr[19]) },
-    { "f20", offsetof(CPUState, fpr[20]) },
-    { "f21", offsetof(CPUState, fpr[21]) },
-    { "f22", offsetof(CPUState, fpr[22]) },
-    { "f23", offsetof(CPUState, fpr[23]) },
-    { "f24", offsetof(CPUState, fpr[24]) },
-    { "f25", offsetof(CPUState, fpr[25]) },
-    { "f26", offsetof(CPUState, fpr[26]) },
-    { "f27", offsetof(CPUState, fpr[27]) },
-    { "f28", offsetof(CPUState, fpr[28]) },
-    { "f29", offsetof(CPUState, fpr[29]) },
-    { "f30", offsetof(CPUState, fpr[30]) },
-    { "f31", offsetof(CPUState, fpr[31]) },
+    { "f0", offsetof(CPUState, fpr[0].l.upper) },
+    { "f1", offsetof(CPUState, fpr[0].l.lower) },
+    { "f2", offsetof(CPUState, fpr[1].l.upper) },
+    { "f3", offsetof(CPUState, fpr[1].l.lower) },
+    { "f4", offsetof(CPUState, fpr[2].l.upper) },
+    { "f5", offsetof(CPUState, fpr[2].l.lower) },
+    { "f6", offsetof(CPUState, fpr[3].l.upper) },
+    { "f7", offsetof(CPUState, fpr[3].l.lower) },
+    { "f8", offsetof(CPUState, fpr[4].l.upper) },
+    { "f9", offsetof(CPUState, fpr[4].l.lower) },
+    { "f10", offsetof(CPUState, fpr[5].l.upper) },
+    { "f11", offsetof(CPUState, fpr[5].l.lower) },
+    { "f12", offsetof(CPUState, fpr[6].l.upper) },
+    { "f13", offsetof(CPUState, fpr[6].l.lower) },
+    { "f14", offsetof(CPUState, fpr[7].l.upper) },
+    { "f15", offsetof(CPUState, fpr[7].l.lower) },
+    { "f16", offsetof(CPUState, fpr[8].l.upper) },
+    { "f17", offsetof(CPUState, fpr[8].l.lower) },
+    { "f18", offsetof(CPUState, fpr[9].l.upper) },
+    { "f19", offsetof(CPUState, fpr[9].l.lower) },
+    { "f20", offsetof(CPUState, fpr[10].l.upper) },
+    { "f21", offsetof(CPUState, fpr[10].l.lower) },
+    { "f22", offsetof(CPUState, fpr[11].l.upper) },
+    { "f23", offsetof(CPUState, fpr[11].l.lower) },
+    { "f24", offsetof(CPUState, fpr[12].l.upper) },
+    { "f25", offsetof(CPUState, fpr[12].l.lower) },
+    { "f26", offsetof(CPUState, fpr[13].l.upper) },
+    { "f27", offsetof(CPUState, fpr[13].l.lower) },
+    { "f28", offsetof(CPUState, fpr[14].l.upper) },
+    { "f29", offsetof(CPUState, fpr[14].l.lower) },
+    { "f30", offsetof(CPUState, fpr[15].l.upper) },
+    { "f31", offsetof(CPUState, fpr[15].l.lower) },
 #ifdef TARGET_SPARC64
-    { "f32", offsetof(CPUState, fpr[32]) },
-    { "f34", offsetof(CPUState, fpr[34]) },
-    { "f36", offsetof(CPUState, fpr[36]) },
-    { "f38", offsetof(CPUState, fpr[38]) },
-    { "f40", offsetof(CPUState, fpr[40]) },
-    { "f42", offsetof(CPUState, fpr[42]) },
-    { "f44", offsetof(CPUState, fpr[44]) },
-    { "f46", offsetof(CPUState, fpr[46]) },
-    { "f48", offsetof(CPUState, fpr[48]) },
-    { "f50", offsetof(CPUState, fpr[50]) },
-    { "f52", offsetof(CPUState, fpr[52]) },
-    { "f54", offsetof(CPUState, fpr[54]) },
-    { "f56", offsetof(CPUState, fpr[56]) },
-    { "f58", offsetof(CPUState, fpr[58]) },
-    { "f60", offsetof(CPUState, fpr[60]) },
-    { "f62", offsetof(CPUState, fpr[62]) },
+    { "f32", offsetof(CPUState, fpr[16]) },
+    { "f34", offsetof(CPUState, fpr[17]) },
+    { "f36", offsetof(CPUState, fpr[18]) },
+    { "f38", offsetof(CPUState, fpr[19]) },
+    { "f40", offsetof(CPUState, fpr[20]) },
+    { "f42", offsetof(CPUState, fpr[21]) },
+    { "f44", offsetof(CPUState, fpr[22]) },
+    { "f46", offsetof(CPUState, fpr[23]) },
+    { "f48", offsetof(CPUState, fpr[24]) },
+    { "f50", offsetof(CPUState, fpr[25]) },
+    { "f52", offsetof(CPUState, fpr[26]) },
+    { "f54", offsetof(CPUState, fpr[27]) },
+    { "f56", offsetof(CPUState, fpr[28]) },
+    { "f58", offsetof(CPUState, fpr[29]) },
+    { "f60", offsetof(CPUState, fpr[30]) },
+    { "f62", offsetof(CPUState, fpr[31]) },
     { "asi", offsetof(CPUState, asi) },
     { "pstate", offsetof(CPUState, pstate) },
     { "cansave", offsetof(CPUState, cansave) },
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 25b4f1a..38a7074 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -3,16 +3,17 @@
 
 #include "config.h"
 #include "qemu-common.h"
+#include "bswap.h"
 
 #if !defined(TARGET_SPARC64)
 #define TARGET_LONG_BITS 32
-#define TARGET_FPREGS 32
+#define TARGET_DPREGS 16
 #define TARGET_PAGE_BITS 12 /* 4k */
 #define TARGET_PHYS_ADDR_SPACE_BITS 36
 #define TARGET_VIRT_ADDR_SPACE_BITS 32
 #else
 #define TARGET_LONG_BITS 64
-#define TARGET_FPREGS 64
+#define TARGET_DPREGS 32
 #define TARGET_PAGE_BITS 13 /* 8k */
 #define TARGET_PHYS_ADDR_SPACE_BITS 41
 # ifdef TARGET_ABI32
@@ -395,7 +396,7 @@
 
     uint32_t psr;      /* processor state register */
     target_ulong fsr;      /* FPU state register */
-    float32 fpr[TARGET_FPREGS];  /* floating point registers */
+    CPU_DoubleU fpr[TARGET_DPREGS];  /* floating point registers */
     uint32_t cwp;      /* index of current register window (extracted
                           from PSR) */
 #if !defined(TARGET_SPARC64) || defined(TARGET_ABI32)
@@ -463,7 +464,6 @@
     uint64_t prom_addr;
 #endif
     /* temporary float registers */
-    float64 dt0, dt1;
     float128 qt0, qt1;
     float_status fp_status;
 #if defined(TARGET_SPARC64)
diff --git a/target-sparc/cpu_init.c b/target-sparc/cpu_init.c
index 6954800..c7269b5 100644
--- a/target-sparc/cpu_init.c
+++ b/target-sparc/cpu_init.c
@@ -813,11 +813,11 @@
         }
     }
     cpu_fprintf(f, "\nFloating Point Registers:\n");
-    for (i = 0; i < TARGET_FPREGS; i++) {
+    for (i = 0; i < TARGET_DPREGS; i++) {
         if ((i & 3) == 0) {
-            cpu_fprintf(f, "%%f%02d:", i);
+            cpu_fprintf(f, "%%f%02d:", i * 2);
         }
-        cpu_fprintf(f, " %016f", *(float *)&env->fpr[i]);
+        cpu_fprintf(f, " %016" PRIx64, env->fpr[i].ll);
         if ((i & 3) == 3) {
             cpu_fprintf(f, "\n");
         }
diff --git a/target-sparc/fop_helper.c b/target-sparc/fop_helper.c
index 23502f3..c7a2512 100644
--- a/target-sparc/fop_helper.c
+++ b/target-sparc/fop_helper.c
@@ -20,26 +20,74 @@
 #include "cpu.h"
 #include "helper.h"
 
-#define DT0 (env->dt0)
-#define DT1 (env->dt1)
 #define QT0 (env->qt0)
 #define QT1 (env->qt1)
 
+static void check_ieee_exceptions(CPUState *env)
+{
+    target_ulong status;
+
+    status = get_float_exception_flags(&env->fp_status);
+    if (status) {
+        /* Copy IEEE 754 flags into FSR */
+        if (status & float_flag_invalid) {
+            env->fsr |= FSR_NVC;
+        }
+        if (status & float_flag_overflow) {
+            env->fsr |= FSR_OFC;
+        }
+        if (status & float_flag_underflow) {
+            env->fsr |= FSR_UFC;
+        }
+        if (status & float_flag_divbyzero) {
+            env->fsr |= FSR_DZC;
+        }
+        if (status & float_flag_inexact) {
+            env->fsr |= FSR_NXC;
+        }
+
+        if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
+            /* Unmasked exception, generate a trap */
+            env->fsr |= FSR_FTT_IEEE_EXCP;
+            helper_raise_exception(env, TT_FP_EXCP);
+        } else {
+            /* Accumulate exceptions */
+            env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
+        }
+    }
+}
+
+static inline void clear_float_exceptions(CPUState *env)
+{
+    set_float_exception_flags(0, &env->fp_status);
+}
+
 #define F_HELPER(name, p) void helper_f##name##p(CPUState *env)
 
 #define F_BINOP(name)                                           \
-    float32 helper_f ## name ## s (CPUState * env, float32 src1,\
+    float32 helper_f ## name ## s (CPUState *env, float32 src1, \
                                    float32 src2)                \
     {                                                           \
-        return float32_ ## name (src1, src2, &env->fp_status);  \
+        float32 ret;                                            \
+        clear_float_exceptions(env);                            \
+        ret = float32_ ## name (src1, src2, &env->fp_status);   \
+        check_ieee_exceptions(env);                             \
+        return ret;                                             \
     }                                                           \
-    F_HELPER(name, d)                                           \
+    float64 helper_f ## name ## d (CPUState * env, float64 src1,\
+                                   float64 src2)                \
     {                                                           \
-        DT0 = float64_ ## name (DT0, DT1, &env->fp_status);     \
+        float64 ret;                                            \
+        clear_float_exceptions(env);                            \
+        ret = float64_ ## name (src1, src2, &env->fp_status);   \
+        check_ieee_exceptions(env);                             \
+        return ret;                                             \
     }                                                           \
     F_HELPER(name, q)                                           \
     {                                                           \
+        clear_float_exceptions(env);                            \
         QT0 = float128_ ## name (QT0, QT1, &env->fp_status);    \
+        check_ieee_exceptions(env);                             \
     }
 
 F_BINOP(add);
@@ -48,18 +96,24 @@
 F_BINOP(div);
 #undef F_BINOP
 
-void helper_fsmuld(CPUState *env, float32 src1, float32 src2)
+float64 helper_fsmuld(CPUState *env, float32 src1, float32 src2)
 {
-    DT0 = float64_mul(float32_to_float64(src1, &env->fp_status),
+    float64 ret;
+    clear_float_exceptions(env);
+    ret = float64_mul(float32_to_float64(src1, &env->fp_status),
                       float32_to_float64(src2, &env->fp_status),
                       &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fdmulq(CPUState *env)
+void helper_fdmulq(CPUState *env, float64 src1, float64 src2)
 {
-    QT0 = float128_mul(float64_to_float128(DT0, &env->fp_status),
-                       float64_to_float128(DT1, &env->fp_status),
+    clear_float_exceptions(env);
+    QT0 = float128_mul(float64_to_float128(src1, &env->fp_status),
+                       float64_to_float128(src2, &env->fp_status),
                        &env->fp_status);
+    check_ieee_exceptions(env);
 }
 
 float32 helper_fnegs(float32 src)
@@ -68,9 +122,9 @@
 }
 
 #ifdef TARGET_SPARC64
-F_HELPER(neg, d)
+float64 helper_fnegd(float64 src)
 {
-    DT0 = float64_chs(DT1);
+    return float64_chs(src);
 }
 
 F_HELPER(neg, q)
@@ -82,98 +136,158 @@
 /* Integer to float conversion.  */
 float32 helper_fitos(CPUState *env, int32_t src)
 {
-    return int32_to_float32(src, &env->fp_status);
+    /* Inexact error possible converting int to float.  */
+    float32 ret;
+    clear_float_exceptions(env);
+    ret = int32_to_float32(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fitod(CPUState *env, int32_t src)
+float64 helper_fitod(CPUState *env, int32_t src)
 {
-    DT0 = int32_to_float64(src, &env->fp_status);
+    /* No possible exceptions converting int to double.  */
+    return int32_to_float64(src, &env->fp_status);
 }
 
 void helper_fitoq(CPUState *env, int32_t src)
 {
+    /* No possible exceptions converting int to long double.  */
     QT0 = int32_to_float128(src, &env->fp_status);
 }
 
 #ifdef TARGET_SPARC64
-float32 helper_fxtos(CPUState *env)
+float32 helper_fxtos(CPUState *env, int64_t src)
 {
-    return int64_to_float32(*((int64_t *)&DT1), &env->fp_status);
+    float32 ret;
+    clear_float_exceptions(env);
+    ret = int64_to_float32(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-F_HELPER(xto, d)
+float64 helper_fxtod(CPUState *env, int64_t src)
 {
-    DT0 = int64_to_float64(*((int64_t *)&DT1), &env->fp_status);
+    float64 ret;
+    clear_float_exceptions(env);
+    ret = int64_to_float64(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-F_HELPER(xto, q)
+void helper_fxtoq(CPUState *env, int64_t src)
 {
-    QT0 = int64_to_float128(*((int64_t *)&DT1), &env->fp_status);
+    /* No possible exceptions converting long long to long double.  */
+    QT0 = int64_to_float128(src, &env->fp_status);
 }
 #endif
 #undef F_HELPER
 
 /* floating point conversion */
-float32 helper_fdtos(CPUState *env)
+float32 helper_fdtos(CPUState *env, float64 src)
 {
-    return float64_to_float32(DT1, &env->fp_status);
+    float32 ret;
+    clear_float_exceptions(env);
+    ret = float64_to_float32(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fstod(CPUState *env, float32 src)
+float64 helper_fstod(CPUState *env, float32 src)
 {
-    DT0 = float32_to_float64(src, &env->fp_status);
+    float64 ret;
+    clear_float_exceptions(env);
+    ret = float32_to_float64(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
 float32 helper_fqtos(CPUState *env)
 {
-    return float128_to_float32(QT1, &env->fp_status);
+    float32 ret;
+    clear_float_exceptions(env);
+    ret = float128_to_float32(QT1, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
 void helper_fstoq(CPUState *env, float32 src)
 {
+    clear_float_exceptions(env);
     QT0 = float32_to_float128(src, &env->fp_status);
+    check_ieee_exceptions(env);
 }
 
-void helper_fqtod(CPUState *env)
+float64 helper_fqtod(CPUState *env)
 {
-    DT0 = float128_to_float64(QT1, &env->fp_status);
+    float64 ret;
+    clear_float_exceptions(env);
+    ret = float128_to_float64(QT1, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fdtoq(CPUState *env)
+void helper_fdtoq(CPUState *env, float64 src)
 {
-    QT0 = float64_to_float128(DT1, &env->fp_status);
+    clear_float_exceptions(env);
+    QT0 = float64_to_float128(src, &env->fp_status);
+    check_ieee_exceptions(env);
 }
 
 /* Float to integer conversion.  */
 int32_t helper_fstoi(CPUState *env, float32 src)
 {
-    return float32_to_int32_round_to_zero(src, &env->fp_status);
+    int32_t ret;
+    clear_float_exceptions(env);
+    ret = float32_to_int32_round_to_zero(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-int32_t helper_fdtoi(CPUState *env)
+int32_t helper_fdtoi(CPUState *env, float64 src)
 {
-    return float64_to_int32_round_to_zero(DT1, &env->fp_status);
+    int32_t ret;
+    clear_float_exceptions(env);
+    ret = float64_to_int32_round_to_zero(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
 int32_t helper_fqtoi(CPUState *env)
 {
-    return float128_to_int32_round_to_zero(QT1, &env->fp_status);
+    int32_t ret;
+    clear_float_exceptions(env);
+    ret = float128_to_int32_round_to_zero(QT1, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
 #ifdef TARGET_SPARC64
-void helper_fstox(CPUState *env, float32 src)
+int64_t helper_fstox(CPUState *env, float32 src)
 {
-    *((int64_t *)&DT0) = float32_to_int64_round_to_zero(src, &env->fp_status);
+    int64_t ret;
+    clear_float_exceptions(env);
+    ret = float32_to_int64_round_to_zero(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fdtox(CPUState *env)
+int64_t helper_fdtox(CPUState *env, float64 src)
 {
-    *((int64_t *)&DT0) = float64_to_int64_round_to_zero(DT1, &env->fp_status);
+    int64_t ret;
+    clear_float_exceptions(env);
+    ret = float64_to_int64_round_to_zero(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fqtox(CPUState *env)
+int64_t helper_fqtox(CPUState *env)
 {
-    *((int64_t *)&DT0) = float128_to_int64_round_to_zero(QT1, &env->fp_status);
+    int64_t ret;
+    clear_float_exceptions(env);
+    ret = float128_to_int64_round_to_zero(QT1, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 #endif
 
@@ -183,9 +297,9 @@
 }
 
 #ifdef TARGET_SPARC64
-void helper_fabsd(CPUState *env)
+float64 helper_fabsd(float64 src)
 {
-    DT0 = float64_abs(DT1);
+    return float64_abs(src);
 }
 
 void helper_fabsq(CPUState *env)
@@ -196,17 +310,27 @@
 
 float32 helper_fsqrts(CPUState *env, float32 src)
 {
-    return float32_sqrt(src, &env->fp_status);
+    float32 ret;
+    clear_float_exceptions(env);
+    ret = float32_sqrt(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fsqrtd(CPUState *env)
+float64 helper_fsqrtd(CPUState *env, float64 src)
 {
-    DT0 = float64_sqrt(DT1, &env->fp_status);
+    float64 ret;
+    clear_float_exceptions(env);
+    ret = float64_sqrt(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
 void helper_fsqrtq(CPUState *env)
 {
+    clear_float_exceptions(env);
     QT0 = float128_sqrt(QT1, &env->fp_status);
+    check_ieee_exceptions(env);
 }
 
 #define GEN_FCMP(name, size, reg1, reg2, FS, E)                         \
@@ -245,8 +369,8 @@
             break;                                                      \
         }                                                               \
     }
-#define GEN_FCMPS(name, size, FS, E)                                    \
-    void glue(helper_, name)(CPUState *env, float32 src1, float32 src2) \
+#define GEN_FCMP_T(name, size, FS, E)                                   \
+    void glue(helper_, name)(CPUState *env, size src1, size src2)       \
     {                                                                   \
         env->fsr &= FSR_FTT_NMASK;                                      \
         if (E && (glue(size, _is_any_nan)(src1) ||                      \
@@ -282,80 +406,42 @@
         }                                                               \
     }
 
-GEN_FCMPS(fcmps, float32, 0, 0);
-GEN_FCMP(fcmpd, float64, DT0, DT1, 0, 0);
+GEN_FCMP_T(fcmps, float32, 0, 0);
+GEN_FCMP_T(fcmpd, float64, 0, 0);
 
-GEN_FCMPS(fcmpes, float32, 0, 1);
-GEN_FCMP(fcmped, float64, DT0, DT1, 0, 1);
+GEN_FCMP_T(fcmpes, float32, 0, 1);
+GEN_FCMP_T(fcmped, float64, 0, 1);
 
 GEN_FCMP(fcmpq, float128, QT0, QT1, 0, 0);
 GEN_FCMP(fcmpeq, float128, QT0, QT1, 0, 1);
 
 #ifdef TARGET_SPARC64
-GEN_FCMPS(fcmps_fcc1, float32, 22, 0);
-GEN_FCMP(fcmpd_fcc1, float64, DT0, DT1, 22, 0);
+GEN_FCMP_T(fcmps_fcc1, float32, 22, 0);
+GEN_FCMP_T(fcmpd_fcc1, float64, 22, 0);
 GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
 
-GEN_FCMPS(fcmps_fcc2, float32, 24, 0);
-GEN_FCMP(fcmpd_fcc2, float64, DT0, DT1, 24, 0);
+GEN_FCMP_T(fcmps_fcc2, float32, 24, 0);
+GEN_FCMP_T(fcmpd_fcc2, float64, 24, 0);
 GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
 
-GEN_FCMPS(fcmps_fcc3, float32, 26, 0);
-GEN_FCMP(fcmpd_fcc3, float64, DT0, DT1, 26, 0);
+GEN_FCMP_T(fcmps_fcc3, float32, 26, 0);
+GEN_FCMP_T(fcmpd_fcc3, float64, 26, 0);
 GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
 
-GEN_FCMPS(fcmpes_fcc1, float32, 22, 1);
-GEN_FCMP(fcmped_fcc1, float64, DT0, DT1, 22, 1);
+GEN_FCMP_T(fcmpes_fcc1, float32, 22, 1);
+GEN_FCMP_T(fcmped_fcc1, float64, 22, 1);
 GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
 
-GEN_FCMPS(fcmpes_fcc2, float32, 24, 1);
-GEN_FCMP(fcmped_fcc2, float64, DT0, DT1, 24, 1);
+GEN_FCMP_T(fcmpes_fcc2, float32, 24, 1);
+GEN_FCMP_T(fcmped_fcc2, float64, 24, 1);
 GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
 
-GEN_FCMPS(fcmpes_fcc3, float32, 26, 1);
-GEN_FCMP(fcmped_fcc3, float64, DT0, DT1, 26, 1);
+GEN_FCMP_T(fcmpes_fcc3, float32, 26, 1);
+GEN_FCMP_T(fcmped_fcc3, float64, 26, 1);
 GEN_FCMP(fcmpeq_fcc3, float128, QT0, QT1, 26, 1);
 #endif
-#undef GEN_FCMPS
-
-void helper_check_ieee_exceptions(CPUState *env)
-{
-    target_ulong status;
-
-    status = get_float_exception_flags(&env->fp_status);
-    if (status) {
-        /* Copy IEEE 754 flags into FSR */
-        if (status & float_flag_invalid) {
-            env->fsr |= FSR_NVC;
-        }
-        if (status & float_flag_overflow) {
-            env->fsr |= FSR_OFC;
-        }
-        if (status & float_flag_underflow) {
-            env->fsr |= FSR_UFC;
-        }
-        if (status & float_flag_divbyzero) {
-            env->fsr |= FSR_DZC;
-        }
-        if (status & float_flag_inexact) {
-            env->fsr |= FSR_NXC;
-        }
-
-        if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
-            /* Unmasked exception, generate a trap */
-            env->fsr |= FSR_FTT_IEEE_EXCP;
-            helper_raise_exception(env, TT_FP_EXCP);
-        } else {
-            /* Accumulate exceptions */
-            env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
-        }
-    }
-}
-
-void helper_clear_float_exceptions(CPUState *env)
-{
-    set_float_exception_flags(0, &env->fp_status);
-}
+#undef GEN_FCMP_T
+#undef GEN_FCMP
 
 static inline void set_fsr(CPUState *env)
 {
diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index 615ddef..faaf8dc 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -16,8 +16,7 @@
 DEF_HELPER_2(wrccr, void, env, tl)
 DEF_HELPER_1(rdcwp, tl, env)
 DEF_HELPER_2(wrcwp, void, env, tl)
-DEF_HELPER_3(array8, tl, env, tl, tl)
-DEF_HELPER_3(alignaddr, tl, env, tl, tl)
+DEF_HELPER_FLAGS_2(array8, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl, tl)
 DEF_HELPER_1(popc, tl, tl)
 DEF_HELPER_3(ldda_asi, void, tl, int, int)
 DEF_HELPER_4(ldf_asi, void, tl, int, int, int)
@@ -39,8 +38,6 @@
 DEF_HELPER_3(udiv_cc, tl, env, tl, tl)
 DEF_HELPER_3(sdiv, tl, env, tl, tl)
 DEF_HELPER_3(sdiv_cc, tl, env, tl, tl)
-DEF_HELPER_2(stdf, void, tl, int)
-DEF_HELPER_2(lddf, void, tl, int)
 DEF_HELPER_2(ldqf, void, tl, int)
 DEF_HELPER_2(stqf, void, tl, int)
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
@@ -48,33 +45,31 @@
 DEF_HELPER_4(st_asi, void, tl, i64, int, int)
 #endif
 DEF_HELPER_2(ldfsr, void, env, i32)
-DEF_HELPER_1(check_ieee_exceptions, void, env)
-DEF_HELPER_1(clear_float_exceptions, void, env)
-DEF_HELPER_1(fabss, f32, f32)
+DEF_HELPER_FLAGS_1(fabss, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
 DEF_HELPER_2(fsqrts, f32, env, f32)
-DEF_HELPER_1(fsqrtd, void, env)
+DEF_HELPER_2(fsqrtd, f64, env, f64)
 DEF_HELPER_3(fcmps, void, env, f32, f32)
-DEF_HELPER_1(fcmpd, void, env)
+DEF_HELPER_3(fcmpd, void, env, f64, f64)
 DEF_HELPER_3(fcmpes, void, env, f32, f32)
-DEF_HELPER_1(fcmped, void, env)
+DEF_HELPER_3(fcmped, void, env, f64, f64)
 DEF_HELPER_1(fsqrtq, void, env)
 DEF_HELPER_1(fcmpq, void, env)
 DEF_HELPER_1(fcmpeq, void, env)
 #ifdef TARGET_SPARC64
 DEF_HELPER_2(ldxfsr, void, env, i64)
-DEF_HELPER_1(fabsd, void, env)
+DEF_HELPER_FLAGS_1(fabsd, TCG_CALL_CONST | TCG_CALL_PURE, f64, f64)
 DEF_HELPER_3(fcmps_fcc1, void, env, f32, f32)
 DEF_HELPER_3(fcmps_fcc2, void, env, f32, f32)
 DEF_HELPER_3(fcmps_fcc3, void, env, f32, f32)
-DEF_HELPER_1(fcmpd_fcc1, void, env)
-DEF_HELPER_1(fcmpd_fcc2, void, env)
-DEF_HELPER_1(fcmpd_fcc3, void, env)
+DEF_HELPER_3(fcmpd_fcc1, void, env, f64, f64)
+DEF_HELPER_3(fcmpd_fcc2, void, env, f64, f64)
+DEF_HELPER_3(fcmpd_fcc3, void, env, f64, f64)
 DEF_HELPER_3(fcmpes_fcc1, void, env, f32, f32)
 DEF_HELPER_3(fcmpes_fcc2, void, env, f32, f32)
 DEF_HELPER_3(fcmpes_fcc3, void, env, f32, f32)
-DEF_HELPER_1(fcmped_fcc1, void, env)
-DEF_HELPER_1(fcmped_fcc2, void, env)
-DEF_HELPER_1(fcmped_fcc3, void, env)
+DEF_HELPER_3(fcmped_fcc1, void, env, f64, f64)
+DEF_HELPER_3(fcmped_fcc2, void, env, f64, f64)
+DEF_HELPER_3(fcmped_fcc3, void, env, f64, f64)
 DEF_HELPER_1(fabsq, void, env)
 DEF_HELPER_1(fcmpq_fcc1, void, env)
 DEF_HELPER_1(fcmpq_fcc2, void, env)
@@ -86,77 +81,88 @@
 DEF_HELPER_2(raise_exception, void, env, int)
 DEF_HELPER_0(shutdown, void)
 #define F_HELPER_0_1(name) DEF_HELPER_1(f ## name, void, env)
-#define F_HELPER_DQ_0_1(name)                   \
-    F_HELPER_0_1(name ## d);                    \
-    F_HELPER_0_1(name ## q)
 
-F_HELPER_DQ_0_1(add);
-F_HELPER_DQ_0_1(sub);
-F_HELPER_DQ_0_1(mul);
-F_HELPER_DQ_0_1(div);
+DEF_HELPER_3(faddd, f64, env, f64, f64)
+DEF_HELPER_3(fsubd, f64, env, f64, f64)
+DEF_HELPER_3(fmuld, f64, env, f64, f64)
+DEF_HELPER_3(fdivd, f64, env, f64, f64)
+F_HELPER_0_1(addq)
+F_HELPER_0_1(subq)
+F_HELPER_0_1(mulq)
+F_HELPER_0_1(divq)
 
 DEF_HELPER_3(fadds, f32, env, f32, f32)
 DEF_HELPER_3(fsubs, f32, env, f32, f32)
 DEF_HELPER_3(fmuls, f32, env, f32, f32)
 DEF_HELPER_3(fdivs, f32, env, f32, f32)
 
-DEF_HELPER_3(fsmuld, void, env, f32, f32)
-F_HELPER_0_1(dmulq);
+DEF_HELPER_3(fsmuld, f64, env, f32, f32)
+DEF_HELPER_3(fdmulq, void, env, f64, f64);
 
-DEF_HELPER_1(fnegs, f32, f32)
-DEF_HELPER_2(fitod, void, env, s32)
+DEF_HELPER_FLAGS_1(fnegs, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_2(fitod, f64, env, s32)
 DEF_HELPER_2(fitoq, void, env, s32)
 
 DEF_HELPER_2(fitos, f32, env, s32)
 
 #ifdef TARGET_SPARC64
-DEF_HELPER_1(fnegd, void, env)
+DEF_HELPER_FLAGS_1(fnegd, TCG_CALL_CONST | TCG_CALL_PURE, f64, f64)
 DEF_HELPER_1(fnegq, void, env)
-DEF_HELPER_1(fxtos, i32, env)
-F_HELPER_DQ_0_1(xto);
+DEF_HELPER_2(fxtos, f32, env, s64)
+DEF_HELPER_2(fxtod, f64, env, s64)
+DEF_HELPER_2(fxtoq, void, env, s64)
 #endif
-DEF_HELPER_1(fdtos, f32, env)
-DEF_HELPER_2(fstod, void, env, f32)
+DEF_HELPER_2(fdtos, f32, env, f64)
+DEF_HELPER_2(fstod, f64, env, f32)
 DEF_HELPER_1(fqtos, f32, env)
 DEF_HELPER_2(fstoq, void, env, f32)
-F_HELPER_0_1(qtod);
-F_HELPER_0_1(dtoq);
+DEF_HELPER_1(fqtod, f64, env)
+DEF_HELPER_2(fdtoq, void, env, f64)
 DEF_HELPER_2(fstoi, s32, env, f32)
-DEF_HELPER_1(fdtoi, s32, env)
+DEF_HELPER_2(fdtoi, s32, env, f64)
 DEF_HELPER_1(fqtoi, s32, env)
 #ifdef TARGET_SPARC64
-DEF_HELPER_2(fstox, void, env, i32)
-F_HELPER_0_1(dtox);
-F_HELPER_0_1(qtox);
-F_HELPER_0_1(aligndata);
+DEF_HELPER_2(fstox, s64, env, f32)
+DEF_HELPER_2(fdtox, s64, env, f64)
+DEF_HELPER_1(fqtox, s64, env)
 
-F_HELPER_0_1(pmerge);
-F_HELPER_0_1(mul8x16);
-F_HELPER_0_1(mul8x16al);
-F_HELPER_0_1(mul8x16au);
-F_HELPER_0_1(mul8sux16);
-F_HELPER_0_1(mul8ulx16);
-F_HELPER_0_1(muld8sux16);
-F_HELPER_0_1(muld8ulx16);
-F_HELPER_0_1(expand);
-#define VIS_HELPER(name)                                 \
-    F_HELPER_0_1(name##16);                              \
-    DEF_HELPER_3(f ## name ## 16s, i32, env, i32, i32)   \
-    F_HELPER_0_1(name##32);                              \
-    DEF_HELPER_3(f ## name ## 32s, i32, env, i32, i32)
+DEF_HELPER_FLAGS_2(fpmerge, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8x16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8x16al, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8x16au, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8sux16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8ulx16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmuld8sux16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmuld8ulx16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fexpand, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_3(pdist, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fpack16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i64, i64)
+DEF_HELPER_FLAGS_3(fpack32, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fpackfix, TCG_CALL_CONST | TCG_CALL_PURE, i32, i64, i64)
+DEF_HELPER_FLAGS_3(bshuffle, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
+#define VIS_HELPER(name)                                                 \
+    DEF_HELPER_FLAGS_2(f ## name ## 16, TCG_CALL_CONST | TCG_CALL_PURE,  \
+                       i64, i64, i64)                                    \
+    DEF_HELPER_FLAGS_2(f ## name ## 16s, TCG_CALL_CONST | TCG_CALL_PURE, \
+                       i32, i32, i32)                                    \
+    DEF_HELPER_FLAGS_2(f ## name ## 32, TCG_CALL_CONST | TCG_CALL_PURE,  \
+                       i64, i64, i64)                                    \
+    DEF_HELPER_FLAGS_2(f ## name ## 32s, TCG_CALL_CONST | TCG_CALL_PURE, \
+                       i32, i32, i32)
 
 VIS_HELPER(padd);
 VIS_HELPER(psub);
-#define VIS_CMPHELPER(name)                              \
-    DEF_HELPER_1(f##name##16, i64, env);                 \
-    DEF_HELPER_1(f##name##32, i64, env)
+#define VIS_CMPHELPER(name)                                              \
+    DEF_HELPER_FLAGS_2(f##name##16, TCG_CALL_CONST | TCG_CALL_PURE,      \
+                       i64, i64, i64)                                    \
+    DEF_HELPER_FLAGS_2(f##name##32, TCG_CALL_CONST | TCG_CALL_PURE,      \
+                       i64, i64, i64)
 VIS_CMPHELPER(cmpgt);
 VIS_CMPHELPER(cmpeq);
 VIS_CMPHELPER(cmple);
 VIS_CMPHELPER(cmpne);
 #endif
 #undef F_HELPER_0_1
-#undef F_HELPER_DQ_0_1
 #undef VIS_HELPER
 #undef VIS_CMPHELPER
 DEF_HELPER_1(compute_psr, void, env);
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index 1fb3996..b59707e 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -66,8 +66,6 @@
 #endif
 #endif
 
-#define DT0 (env->dt0)
-#define DT1 (env->dt1)
 #define QT0 (env->qt0)
 #define QT1 (env->qt1)
 
@@ -2047,7 +2045,7 @@
 void helper_ldf_asi(target_ulong addr, int asi, int size, int rd)
 {
     unsigned int i;
-    CPU_DoubleU u;
+    target_ulong val;
 
     helper_check_align(addr, 3);
     addr = asi_address_mask(env, asi, addr);
@@ -2062,13 +2060,11 @@
             return;
         }
         helper_check_align(addr, 0x3f);
-        for (i = 0; i < 16; i++) {
-            *(uint32_t *)&env->fpr[rd++] = helper_ld_asi(addr, asi & 0x8f, 4,
-                                                         0);
-            addr += 4;
+        for (i = 0; i < 8; i++, rd += 2, addr += 8) {
+            env->fpr[rd/2].ll = helper_ld_asi(addr, asi & 0x8f, 8, 0);
         }
-
         return;
+
     case 0x16: /* UA2007 Block load primary, user privilege */
     case 0x17: /* UA2007 Block load secondary, user privilege */
     case 0x1e: /* UA2007 Block load primary LE, user privilege */
@@ -2082,13 +2078,11 @@
             return;
         }
         helper_check_align(addr, 0x3f);
-        for (i = 0; i < 16; i++) {
-            *(uint32_t *)&env->fpr[rd++] = helper_ld_asi(addr, asi & 0x19, 4,
-                                                         0);
-            addr += 4;
+        for (i = 0; i < 8; i++, rd += 2, addr += 4) {
+            env->fpr[rd/2].ll = helper_ld_asi(addr, asi & 0x19, 8, 0);
         }
-
         return;
+
     default:
         break;
     }
@@ -2096,20 +2090,19 @@
     switch (size) {
     default:
     case 4:
-        *((uint32_t *)&env->fpr[rd]) = helper_ld_asi(addr, asi, size, 0);
+        val = helper_ld_asi(addr, asi, size, 0);
+        if (rd & 1) {
+            env->fpr[rd/2].l.lower = val;
+        } else {
+            env->fpr[rd/2].l.upper = val;
+        }
         break;
     case 8:
-        u.ll = helper_ld_asi(addr, asi, size, 0);
-        *((uint32_t *)&env->fpr[rd++]) = u.l.upper;
-        *((uint32_t *)&env->fpr[rd++]) = u.l.lower;
+        env->fpr[rd/2].ll = helper_ld_asi(addr, asi, size, 0);
         break;
     case 16:
-        u.ll = helper_ld_asi(addr, asi, 8, 0);
-        *((uint32_t *)&env->fpr[rd++]) = u.l.upper;
-        *((uint32_t *)&env->fpr[rd++]) = u.l.lower;
-        u.ll = helper_ld_asi(addr + 8, asi, 8, 0);
-        *((uint32_t *)&env->fpr[rd++]) = u.l.upper;
-        *((uint32_t *)&env->fpr[rd++]) = u.l.lower;
+        env->fpr[rd/2].ll = helper_ld_asi(addr, asi, 8, 0);
+        env->fpr[rd/2 + 1].ll = helper_ld_asi(addr + 8, asi, 8, 0);
         break;
     }
 }
@@ -2117,8 +2110,7 @@
 void helper_stf_asi(target_ulong addr, int asi, int size, int rd)
 {
     unsigned int i;
-    target_ulong val = 0;
-    CPU_DoubleU u;
+    target_ulong val;
 
     helper_check_align(addr, 3);
     addr = asi_address_mask(env, asi, addr);
@@ -2135,10 +2127,8 @@
             return;
         }
         helper_check_align(addr, 0x3f);
-        for (i = 0; i < 16; i++) {
-            val = *(uint32_t *)&env->fpr[rd++];
-            helper_st_asi(addr, val, asi & 0x8f, 4);
-            addr += 4;
+        for (i = 0; i < 8; i++, rd += 2, addr += 8) {
+            helper_st_asi(addr, env->fpr[rd/2].ll, asi & 0x8f, 8);
         }
 
         return;
@@ -2155,10 +2145,8 @@
             return;
         }
         helper_check_align(addr, 0x3f);
-        for (i = 0; i < 16; i++) {
-            val = *(uint32_t *)&env->fpr[rd++];
-            helper_st_asi(addr, val, asi & 0x19, 4);
-            addr += 4;
+        for (i = 0; i < 8; i++, rd += 2, addr += 8) {
+            helper_st_asi(addr, env->fpr[rd/2].ll, asi & 0x19, 8);
         }
 
         return;
@@ -2169,20 +2157,19 @@
     switch (size) {
     default:
     case 4:
-        helper_st_asi(addr, *(uint32_t *)&env->fpr[rd], asi, size);
+        if (rd & 1) {
+            val = env->fpr[rd/2].l.lower;
+        } else {
+            val = env->fpr[rd/2].l.upper;
+        }
+        helper_st_asi(addr, val, asi, size);
         break;
     case 8:
-        u.l.upper = *(uint32_t *)&env->fpr[rd++];
-        u.l.lower = *(uint32_t *)&env->fpr[rd++];
-        helper_st_asi(addr, u.ll, asi, size);
+        helper_st_asi(addr, env->fpr[rd/2].ll, asi, size);
         break;
     case 16:
-        u.l.upper = *(uint32_t *)&env->fpr[rd++];
-        u.l.lower = *(uint32_t *)&env->fpr[rd++];
-        helper_st_asi(addr, u.ll, asi, 8);
-        u.l.upper = *(uint32_t *)&env->fpr[rd++];
-        u.l.lower = *(uint32_t *)&env->fpr[rd++];
-        helper_st_asi(addr + 8, u.ll, asi, 8);
+        helper_st_asi(addr, env->fpr[rd/2].ll, asi, 8);
+        helper_st_asi(addr + 8, env->fpr[rd/2 + 1].ll, asi, 8);
         break;
     }
 }
@@ -2214,56 +2201,6 @@
 }
 #endif /* TARGET_SPARC64 */
 
-void helper_stdf(target_ulong addr, int mem_idx)
-{
-    helper_check_align(addr, 7);
-#if !defined(CONFIG_USER_ONLY)
-    switch (mem_idx) {
-    case MMU_USER_IDX:
-        stfq_user(addr, DT0);
-        break;
-    case MMU_KERNEL_IDX:
-        stfq_kernel(addr, DT0);
-        break;
-#ifdef TARGET_SPARC64
-    case MMU_HYPV_IDX:
-        stfq_hypv(addr, DT0);
-        break;
-#endif
-    default:
-        DPRINTF_MMU("helper_stdf: need to check MMU idx %d\n", mem_idx);
-        break;
-    }
-#else
-    stfq_raw(address_mask(env, addr), DT0);
-#endif
-}
-
-void helper_lddf(target_ulong addr, int mem_idx)
-{
-    helper_check_align(addr, 7);
-#if !defined(CONFIG_USER_ONLY)
-    switch (mem_idx) {
-    case MMU_USER_IDX:
-        DT0 = ldfq_user(addr);
-        break;
-    case MMU_KERNEL_IDX:
-        DT0 = ldfq_kernel(addr);
-        break;
-#ifdef TARGET_SPARC64
-    case MMU_HYPV_IDX:
-        DT0 = ldfq_hypv(addr);
-        break;
-#endif
-    default:
-        DPRINTF_MMU("helper_lddf: need to check MMU idx %d\n", mem_idx);
-        break;
-    }
-#else
-    DT0 = ldfq_raw(address_mask(env, addr));
-#endif
-}
-
 void helper_ldqf(target_ulong addr, int mem_idx)
 {
     /* XXX add 128 bit load */
diff --git a/target-sparc/machine.c b/target-sparc/machine.c
index 56ae041..235b088 100644
--- a/target-sparc/machine.c
+++ b/target-sparc/machine.c
@@ -21,13 +21,9 @@
         qemu_put_betls(f, &env->regbase[i]);
 
     /* FPU */
-    for(i = 0; i < TARGET_FPREGS; i++) {
-        union {
-            float32 f;
-            uint32_t i;
-        } u;
-        u.f = env->fpr[i];
-        qemu_put_be32(f, u.i);
+    for (i = 0; i < TARGET_DPREGS; i++) {
+        qemu_put_be32(f, env->fpr[i].l.upper);
+        qemu_put_be32(f, env->fpr[i].l.lower);
     }
 
     qemu_put_betls(f, &env->pc);
@@ -128,13 +124,9 @@
         qemu_get_betls(f, &env->regbase[i]);
 
     /* FPU */
-    for(i = 0; i < TARGET_FPREGS; i++) {
-        union {
-            float32 f;
-            uint32_t i;
-        } u;
-        u.i = qemu_get_be32(f);
-        env->fpr[i] = u.f;
+    for (i = 0; i < TARGET_DPREGS; i++) {
+        env->fpr[i].l.upper = qemu_get_be32(f);
+        env->fpr[i].l.lower = qemu_get_be32(f);
     }
 
     qemu_get_betls(f, &env->pc);
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 383fd9c..9318540 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -63,7 +63,7 @@
 static TCGv_i32 cpu_tmp32;
 static TCGv_i64 cpu_tmp64;
 /* Floating point registers */
-static TCGv_i32 cpu_fpr[TARGET_FPREGS];
+static TCGv_i64 cpu_fpr[TARGET_DPREGS];
 
 static target_ulong gen_opc_npc[OPC_BUF_SIZE];
 static target_ulong gen_opc_jump_pc[2];
@@ -82,6 +82,8 @@
     uint32_t cc_op;  /* current CC operation */
     struct TranslationBlock *tb;
     sparc_def_t *def;
+    TCGv_i32 t32[3];
+    int n_t32;
 } DisasContext;
 
 // This function uses non-native bit order
@@ -114,67 +116,116 @@
 
 #define IS_IMM (insn & (1<<13))
 
+static inline void gen_update_fprs_dirty(int rd)
+{
+#if defined(TARGET_SPARC64)
+    tcg_gen_ori_i32(cpu_fprs, cpu_fprs, (rd < 32) ? 1 : 2);
+#endif
+}
+
 /* floating point registers moves */
-static void gen_op_load_fpr_DT0(unsigned int src)
+static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src)
 {
-    tcg_gen_st_i32(cpu_fpr[src], cpu_env, offsetof(CPUSPARCState, dt0) +
-                   offsetof(CPU_DoubleU, l.upper));
-    tcg_gen_st_i32(cpu_fpr[src + 1], cpu_env, offsetof(CPUSPARCState, dt0) +
-                   offsetof(CPU_DoubleU, l.lower));
+#if TCG_TARGET_REG_BITS == 32
+    if (src & 1) {
+        return TCGV_LOW(cpu_fpr[src / 2]);
+    } else {
+        return TCGV_HIGH(cpu_fpr[src / 2]);
+    }
+#else
+    if (src & 1) {
+        return MAKE_TCGV_I32(GET_TCGV_I64(cpu_fpr[src / 2]));
+    } else {
+        TCGv_i32 ret = tcg_temp_local_new_i32();
+        TCGv_i64 t = tcg_temp_new_i64();
+
+        tcg_gen_shri_i64(t, cpu_fpr[src / 2], 32);
+        tcg_gen_trunc_i64_i32(ret, t);
+        tcg_temp_free_i64(t);
+
+        dc->t32[dc->n_t32++] = ret;
+        assert(dc->n_t32 <= ARRAY_SIZE(dc->t32));
+
+        return ret;
+    }
+#endif
 }
 
-static void gen_op_load_fpr_DT1(unsigned int src)
+static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v)
 {
-    tcg_gen_st_i32(cpu_fpr[src], cpu_env, offsetof(CPUSPARCState, dt1) +
-                   offsetof(CPU_DoubleU, l.upper));
-    tcg_gen_st_i32(cpu_fpr[src + 1], cpu_env, offsetof(CPUSPARCState, dt1) +
-                   offsetof(CPU_DoubleU, l.lower));
+#if TCG_TARGET_REG_BITS == 32
+    if (dst & 1) {
+        tcg_gen_mov_i32(TCGV_LOW(cpu_fpr[dst / 2]), v);
+    } else {
+        tcg_gen_mov_i32(TCGV_HIGH(cpu_fpr[dst / 2]), v);
+    }
+#else
+    TCGv_i64 t = MAKE_TCGV_I64(GET_TCGV_I32(v));
+    tcg_gen_deposit_i64(cpu_fpr[dst / 2], cpu_fpr[dst / 2], t,
+                        (dst & 1 ? 0 : 32), 32);
+#endif
+    gen_update_fprs_dirty(dst);
 }
 
-static void gen_op_store_DT0_fpr(unsigned int dst)
+static TCGv_i32 gen_dest_fpr_F(void)
 {
-    tcg_gen_ld_i32(cpu_fpr[dst], cpu_env, offsetof(CPUSPARCState, dt0) +
-                   offsetof(CPU_DoubleU, l.upper));
-    tcg_gen_ld_i32(cpu_fpr[dst + 1], cpu_env, offsetof(CPUSPARCState, dt0) +
-                   offsetof(CPU_DoubleU, l.lower));
+    return cpu_tmp32;
+}
+
+static TCGv_i64 gen_load_fpr_D(DisasContext *dc, unsigned int src)
+{
+    src = DFPREG(src);
+    return cpu_fpr[src / 2];
+}
+
+static void gen_store_fpr_D(DisasContext *dc, unsigned int dst, TCGv_i64 v)
+{
+    dst = DFPREG(dst);
+    tcg_gen_mov_i64(cpu_fpr[dst / 2], v);
+    gen_update_fprs_dirty(dst);
+}
+
+static TCGv_i64 gen_dest_fpr_D(void)
+{
+    return cpu_tmp64;
 }
 
 static void gen_op_load_fpr_QT0(unsigned int src)
 {
-    tcg_gen_st_i32(cpu_fpr[src], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.upmost));
-    tcg_gen_st_i32(cpu_fpr[src + 1], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.upper));
-    tcg_gen_st_i32(cpu_fpr[src + 2], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.lower));
-    tcg_gen_st_i32(cpu_fpr[src + 3], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.lowest));
+    tcg_gen_st_i64(cpu_fpr[src / 2], cpu_env, offsetof(CPUSPARCState, qt0) +
+                   offsetof(CPU_QuadU, ll.upper));
+    tcg_gen_st_i64(cpu_fpr[src/2 + 1], cpu_env, offsetof(CPUSPARCState, qt0) +
+                   offsetof(CPU_QuadU, ll.lower));
 }
 
 static void gen_op_load_fpr_QT1(unsigned int src)
 {
-    tcg_gen_st_i32(cpu_fpr[src], cpu_env, offsetof(CPUSPARCState, qt1) +
-                   offsetof(CPU_QuadU, l.upmost));
-    tcg_gen_st_i32(cpu_fpr[src + 1], cpu_env, offsetof(CPUSPARCState, qt1) +
-                   offsetof(CPU_QuadU, l.upper));
-    tcg_gen_st_i32(cpu_fpr[src + 2], cpu_env, offsetof(CPUSPARCState, qt1) +
-                   offsetof(CPU_QuadU, l.lower));
-    tcg_gen_st_i32(cpu_fpr[src + 3], cpu_env, offsetof(CPUSPARCState, qt1) +
-                   offsetof(CPU_QuadU, l.lowest));
+    tcg_gen_st_i64(cpu_fpr[src / 2], cpu_env, offsetof(CPUSPARCState, qt1) +
+                   offsetof(CPU_QuadU, ll.upper));
+    tcg_gen_st_i64(cpu_fpr[src/2 + 1], cpu_env, offsetof(CPUSPARCState, qt1) +
+                   offsetof(CPU_QuadU, ll.lower));
 }
 
 static void gen_op_store_QT0_fpr(unsigned int dst)
 {
-    tcg_gen_ld_i32(cpu_fpr[dst], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.upmost));
-    tcg_gen_ld_i32(cpu_fpr[dst + 1], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.upper));
-    tcg_gen_ld_i32(cpu_fpr[dst + 2], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.lower));
-    tcg_gen_ld_i32(cpu_fpr[dst + 3], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.lowest));
+    tcg_gen_ld_i64(cpu_fpr[dst / 2], cpu_env, offsetof(CPUSPARCState, qt0) +
+                   offsetof(CPU_QuadU, ll.upper));
+    tcg_gen_ld_i64(cpu_fpr[dst/2 + 1], cpu_env, offsetof(CPUSPARCState, qt0) +
+                   offsetof(CPU_QuadU, ll.lower));
 }
 
+#ifdef TARGET_SPARC64
+static void gen_move_Q(unsigned int rd, unsigned int rs)
+{
+    rd = QFPREG(rd);
+    rs = QFPREG(rs);
+
+    tcg_gen_mov_i64(cpu_fpr[rd / 2], cpu_fpr[rs / 2]);
+    tcg_gen_mov_i64(cpu_fpr[rd / 2 + 1], cpu_fpr[rs / 2 + 1]);
+    gen_update_fprs_dirty(rd);
+}
+#endif
+
 /* moves */
 #ifdef CONFIG_USER_ONLY
 #define supervisor(dc) 0
@@ -1419,20 +1470,20 @@
     }
 }
 
-static inline void gen_op_fcmpd(int fccno)
+static inline void gen_op_fcmpd(int fccno, TCGv_i64 r_rs1, TCGv_i64 r_rs2)
 {
     switch (fccno) {
     case 0:
-        gen_helper_fcmpd(cpu_env);
+        gen_helper_fcmpd(cpu_env, r_rs1, r_rs2);
         break;
     case 1:
-        gen_helper_fcmpd_fcc1(cpu_env);
+        gen_helper_fcmpd_fcc1(cpu_env, r_rs1, r_rs2);
         break;
     case 2:
-        gen_helper_fcmpd_fcc2(cpu_env);
+        gen_helper_fcmpd_fcc2(cpu_env, r_rs1, r_rs2);
         break;
     case 3:
-        gen_helper_fcmpd_fcc3(cpu_env);
+        gen_helper_fcmpd_fcc3(cpu_env, r_rs1, r_rs2);
         break;
     }
 }
@@ -1473,20 +1524,20 @@
     }
 }
 
-static inline void gen_op_fcmped(int fccno)
+static inline void gen_op_fcmped(int fccno, TCGv_i64 r_rs1, TCGv_i64 r_rs2)
 {
     switch (fccno) {
     case 0:
-        gen_helper_fcmped(cpu_env);
+        gen_helper_fcmped(cpu_env, r_rs1, r_rs2);
         break;
     case 1:
-        gen_helper_fcmped_fcc1(cpu_env);
+        gen_helper_fcmped_fcc1(cpu_env, r_rs1, r_rs2);
         break;
     case 2:
-        gen_helper_fcmped_fcc2(cpu_env);
+        gen_helper_fcmped_fcc2(cpu_env, r_rs1, r_rs2);
         break;
     case 3:
-        gen_helper_fcmped_fcc3(cpu_env);
+        gen_helper_fcmped_fcc3(cpu_env, r_rs1, r_rs2);
         break;
     }
 }
@@ -1516,9 +1567,9 @@
     gen_helper_fcmps(cpu_env, r_rs1, r_rs2);
 }
 
-static inline void gen_op_fcmpd(int fccno)
+static inline void gen_op_fcmpd(int fccno, TCGv_i64 r_rs1, TCGv_i64 r_rs2)
 {
-    gen_helper_fcmpd(cpu_env);
+    gen_helper_fcmpd(cpu_env, r_rs1, r_rs2);
 }
 
 static inline void gen_op_fcmpq(int fccno)
@@ -1531,9 +1582,9 @@
     gen_helper_fcmpes(cpu_env, r_rs1, r_rs2);
 }
 
-static inline void gen_op_fcmped(int fccno)
+static inline void gen_op_fcmped(int fccno, TCGv_i64 r_rs1, TCGv_i64 r_rs2)
 {
-    gen_helper_fcmped(cpu_env);
+    gen_helper_fcmped(cpu_env, r_rs1, r_rs2);
 }
 
 static inline void gen_op_fcmpeq(int fccno)
@@ -1570,21 +1621,313 @@
     return 0;
 }
 
-static inline void gen_update_fprs_dirty(int rd)
-{
-#if defined(TARGET_SPARC64)
-    tcg_gen_ori_i32(cpu_fprs, cpu_fprs, (rd < 32) ? 1 : 2);
-#endif
-}
-
 static inline void gen_op_clear_ieee_excp_and_FTT(void)
 {
     tcg_gen_andi_tl(cpu_fsr, cpu_fsr, FSR_FTT_CEXC_NMASK);
 }
 
-static inline void gen_clear_float_exceptions(void)
+static inline void gen_fop_FF(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_i32, TCGv_ptr, TCGv_i32))
 {
-    gen_helper_clear_float_exceptions(cpu_env);
+    TCGv_i32 dst, src;
+
+    src = gen_load_fpr_F(dc, rs);
+    dst = gen_dest_fpr_F();
+
+    gen(dst, cpu_env, src);
+
+    gen_store_fpr_F(dc, rd, dst);
+}
+
+static inline void gen_ne_fop_FF(DisasContext *dc, int rd, int rs,
+                                 void (*gen)(TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 dst, src;
+
+    src = gen_load_fpr_F(dc, rs);
+    dst = gen_dest_fpr_F();
+
+    gen(dst, src);
+
+    gen_store_fpr_F(dc, rd, dst);
+}
+
+static inline void gen_fop_FFF(DisasContext *dc, int rd, int rs1, int rs2,
+                        void (*gen)(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 dst, src1, src2;
+
+    src1 = gen_load_fpr_F(dc, rs1);
+    src2 = gen_load_fpr_F(dc, rs2);
+    dst = gen_dest_fpr_F();
+
+    gen(dst, cpu_env, src1, src2);
+
+    gen_store_fpr_F(dc, rd, dst);
+}
+
+#ifdef TARGET_SPARC64
+static inline void gen_ne_fop_FFF(DisasContext *dc, int rd, int rs1, int rs2,
+                                  void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 dst, src1, src2;
+
+    src1 = gen_load_fpr_F(dc, rs1);
+    src2 = gen_load_fpr_F(dc, rs2);
+    dst = gen_dest_fpr_F();
+
+    gen(dst, src1, src2);
+
+    gen_store_fpr_F(dc, rd, dst);
+}
+#endif
+
+static inline void gen_fop_DD(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_i64, TCGv_ptr, TCGv_i64))
+{
+    TCGv_i64 dst, src;
+
+    src = gen_load_fpr_D(dc, rs);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_env, src);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+#ifdef TARGET_SPARC64
+static inline void gen_ne_fop_DD(DisasContext *dc, int rd, int rs,
+                                 void (*gen)(TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 dst, src;
+
+    src = gen_load_fpr_D(dc, rs);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, src);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+#endif
+
+static inline void gen_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
+                        void (*gen)(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 dst, src1, src2;
+
+    src1 = gen_load_fpr_D(dc, rs1);
+    src2 = gen_load_fpr_D(dc, rs2);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_env, src1, src2);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+#ifdef TARGET_SPARC64
+static inline void gen_ne_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
+                                  void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 dst, src1, src2;
+
+    src1 = gen_load_fpr_D(dc, rs1);
+    src2 = gen_load_fpr_D(dc, rs2);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, src1, src2);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+static inline void gen_gsr_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
+                           void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 dst, src1, src2;
+
+    src1 = gen_load_fpr_D(dc, rs1);
+    src2 = gen_load_fpr_D(dc, rs2);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_gsr, src1, src2);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+static inline void gen_ne_fop_DDDD(DisasContext *dc, int rd, int rs1, int rs2,
+                           void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 dst, src0, src1, src2;
+
+    src1 = gen_load_fpr_D(dc, rs1);
+    src2 = gen_load_fpr_D(dc, rs2);
+    src0 = gen_load_fpr_D(dc, rd);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, src0, src1, src2);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+#endif
+
+static inline void gen_fop_QQ(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_ptr))
+{
+    gen_op_load_fpr_QT1(QFPREG(rs));
+
+    gen(cpu_env);
+
+    gen_op_store_QT0_fpr(QFPREG(rd));
+    gen_update_fprs_dirty(QFPREG(rd));
+}
+
+#ifdef TARGET_SPARC64
+static inline void gen_ne_fop_QQ(DisasContext *dc, int rd, int rs,
+                                 void (*gen)(TCGv_ptr))
+{
+    gen_op_load_fpr_QT1(QFPREG(rs));
+
+    gen(cpu_env);
+
+    gen_op_store_QT0_fpr(QFPREG(rd));
+    gen_update_fprs_dirty(QFPREG(rd));
+}
+#endif
+
+static inline void gen_fop_QQQ(DisasContext *dc, int rd, int rs1, int rs2,
+                               void (*gen)(TCGv_ptr))
+{
+    gen_op_load_fpr_QT0(QFPREG(rs1));
+    gen_op_load_fpr_QT1(QFPREG(rs2));
+
+    gen(cpu_env);
+
+    gen_op_store_QT0_fpr(QFPREG(rd));
+    gen_update_fprs_dirty(QFPREG(rd));
+}
+
+static inline void gen_fop_DFF(DisasContext *dc, int rd, int rs1, int rs2,
+                        void (*gen)(TCGv_i64, TCGv_ptr, TCGv_i32, TCGv_i32))
+{
+    TCGv_i64 dst;
+    TCGv_i32 src1, src2;
+
+    src1 = gen_load_fpr_F(dc, rs1);
+    src2 = gen_load_fpr_F(dc, rs2);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_env, src1, src2);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+static inline void gen_fop_QDD(DisasContext *dc, int rd, int rs1, int rs2,
+                               void (*gen)(TCGv_ptr, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 src1, src2;
+
+    src1 = gen_load_fpr_D(dc, rs1);
+    src2 = gen_load_fpr_D(dc, rs2);
+
+    gen(cpu_env, src1, src2);
+
+    gen_op_store_QT0_fpr(QFPREG(rd));
+    gen_update_fprs_dirty(QFPREG(rd));
+}
+
+#ifdef TARGET_SPARC64
+static inline void gen_fop_DF(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_i64, TCGv_ptr, TCGv_i32))
+{
+    TCGv_i64 dst;
+    TCGv_i32 src;
+
+    src = gen_load_fpr_F(dc, rs);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_env, src);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+#endif
+
+static inline void gen_ne_fop_DF(DisasContext *dc, int rd, int rs,
+                                 void (*gen)(TCGv_i64, TCGv_ptr, TCGv_i32))
+{
+    TCGv_i64 dst;
+    TCGv_i32 src;
+
+    src = gen_load_fpr_F(dc, rs);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_env, src);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+static inline void gen_fop_FD(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_i32, TCGv_ptr, TCGv_i64))
+{
+    TCGv_i32 dst;
+    TCGv_i64 src;
+
+    src = gen_load_fpr_D(dc, rs);
+    dst = gen_dest_fpr_F();
+
+    gen(dst, cpu_env, src);
+
+    gen_store_fpr_F(dc, rd, dst);
+}
+
+static inline void gen_fop_FQ(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_i32, TCGv_ptr))
+{
+    TCGv_i32 dst;
+
+    gen_op_load_fpr_QT1(QFPREG(rs));
+    dst = gen_dest_fpr_F();
+
+    gen(dst, cpu_env);
+
+    gen_store_fpr_F(dc, rd, dst);
+}
+
+static inline void gen_fop_DQ(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_i64, TCGv_ptr))
+{
+    TCGv_i64 dst;
+
+    gen_op_load_fpr_QT1(QFPREG(rs));
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_env);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+static inline void gen_ne_fop_QF(DisasContext *dc, int rd, int rs,
+                                 void (*gen)(TCGv_ptr, TCGv_i32))
+{
+    TCGv_i32 src;
+
+    src = gen_load_fpr_F(dc, rs);
+
+    gen(cpu_env, src);
+
+    gen_op_store_QT0_fpr(QFPREG(rd));
+    gen_update_fprs_dirty(QFPREG(rd));
+}
+
+static inline void gen_ne_fop_QD(DisasContext *dc, int rd, int rs,
+                                 void (*gen)(TCGv_ptr, TCGv_i64))
+{
+    TCGv_i64 src;
+
+    src = gen_load_fpr_D(dc, rs);
+
+    gen(cpu_env, src);
+
+    gen_op_store_QT0_fpr(QFPREG(rd));
+    gen_update_fprs_dirty(QFPREG(rd));
 }
 
 /* asi moves */
@@ -1878,6 +2221,148 @@
 
     tcg_temp_free_i32(r_tl);
 }
+
+static void gen_edge(DisasContext *dc, TCGv dst, TCGv s1, TCGv s2,
+                     int width, bool cc, bool left)
+{
+    TCGv lo1, lo2, t1, t2;
+    uint64_t amask, tabl, tabr;
+    int shift, imask, omask;
+
+    if (cc) {
+        tcg_gen_mov_tl(cpu_cc_src, s1);
+        tcg_gen_mov_tl(cpu_cc_src2, s2);
+        tcg_gen_sub_tl(cpu_cc_dst, s1, s2);
+        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUB);
+        dc->cc_op = CC_OP_SUB;
+    }
+
+    /* Theory of operation: there are two tables, left and right (not to
+       be confused with the left and right versions of the opcode).  These
+       are indexed by the low 3 bits of the inputs.  To make things "easy",
+       these tables are loaded into two constants, TABL and TABR below.
+       The operation index = (input & imask) << shift calculates the index
+       into the constant, while val = (table >> index) & omask calculates
+       the value we're looking for.  */
+    switch (width) {
+    case 8:
+        imask = 0x7;
+        shift = 3;
+        omask = 0xff;
+        if (left) {
+            tabl = 0x80c0e0f0f8fcfeffULL;
+            tabr = 0xff7f3f1f0f070301ULL;
+        } else {
+            tabl = 0x0103070f1f3f7fffULL;
+            tabr = 0xfffefcf8f0e0c080ULL;
+        }
+        break;
+    case 16:
+        imask = 0x6;
+        shift = 1;
+        omask = 0xf;
+        if (left) {
+            tabl = 0x8cef;
+            tabr = 0xf731;
+        } else {
+            tabl = 0x137f;
+            tabr = 0xfec8;
+        }
+        break;
+    case 32:
+        imask = 0x4;
+        shift = 0;
+        omask = 0x3;
+        if (left) {
+            tabl = (2 << 2) | 3;
+            tabr = (3 << 2) | 1;
+        } else {
+            tabl = (1 << 2) | 3;
+            tabr = (3 << 2) | 2;
+        }
+        break;
+    default:
+        abort();
+    }
+
+    lo1 = tcg_temp_new();
+    lo2 = tcg_temp_new();
+    tcg_gen_andi_tl(lo1, s1, imask);
+    tcg_gen_andi_tl(lo2, s2, imask);
+    tcg_gen_shli_tl(lo1, lo1, shift);
+    tcg_gen_shli_tl(lo2, lo2, shift);
+
+    t1 = tcg_const_tl(tabl);
+    t2 = tcg_const_tl(tabr);
+    tcg_gen_shr_tl(lo1, t1, lo1);
+    tcg_gen_shr_tl(lo2, t2, lo2);
+    tcg_gen_andi_tl(dst, lo1, omask);
+    tcg_gen_andi_tl(lo2, lo2, omask);
+
+    amask = -8;
+    if (AM_CHECK(dc)) {
+        amask &= 0xffffffffULL;
+    }
+    tcg_gen_andi_tl(s1, s1, amask);
+    tcg_gen_andi_tl(s2, s2, amask);
+
+    /* We want to compute
+        dst = (s1 == s2 ? lo1 : lo1 & lo2).
+       We've already done dst = lo1, so this reduces to
+        dst &= (s1 == s2 ? -1 : lo2)
+       Which we perform by
+        lo2 |= -(s1 == s2)
+        dst &= lo2
+    */
+    tcg_gen_setcond_tl(TCG_COND_EQ, t1, s1, s2);
+    tcg_gen_neg_tl(t1, t1);
+    tcg_gen_or_tl(lo2, lo2, t1);
+    tcg_gen_and_tl(dst, dst, lo2);
+
+    tcg_temp_free(lo1);
+    tcg_temp_free(lo2);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+
+static void gen_alignaddr(TCGv dst, TCGv s1, TCGv s2, bool left)
+{
+    TCGv tmp = tcg_temp_new();
+
+    tcg_gen_add_tl(tmp, s1, s2);
+    tcg_gen_andi_tl(dst, tmp, -8);
+    if (left) {
+        tcg_gen_neg_tl(tmp, tmp);
+    }
+    tcg_gen_deposit_tl(cpu_gsr, cpu_gsr, tmp, 0, 3);
+
+    tcg_temp_free(tmp);
+}
+
+static void gen_faligndata(TCGv dst, TCGv gsr, TCGv s1, TCGv s2)
+{
+    TCGv t1, t2, shift;
+
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    shift = tcg_temp_new();
+
+    tcg_gen_andi_tl(shift, gsr, 7);
+    tcg_gen_shli_tl(shift, shift, 3);
+    tcg_gen_shl_tl(t1, s1, shift);
+
+    /* A shift of 64 does not produce 0 in TCG.  Divide this into a
+       shift of (up to 63) followed by a constant shift of 1.  */
+    tcg_gen_xori_tl(shift, shift, 63);
+    tcg_gen_shr_tl(t2, s2, shift);
+    tcg_gen_shri_tl(t2, t2, 1);
+
+    tcg_gen_or_tl(dst, t1, t2);
+
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(shift);
+}
 #endif
 
 #define CHECK_IU_FEATURE(dc, FEATURE)                      \
@@ -1892,6 +2377,8 @@
 {
     unsigned int insn, opc, rs1, rs2, rd;
     TCGv cpu_src1, cpu_src2, cpu_tmp1, cpu_tmp2;
+    TCGv_i32 cpu_src1_32, cpu_src2_32, cpu_dst_32;
+    TCGv_i64 cpu_src1_64, cpu_src2_64, cpu_dst_64;
     target_long simm;
 
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
@@ -2369,350 +2856,162 @@
                 save_state(dc, cpu_cond);
                 switch (xop) {
                 case 0x1: /* fmovs */
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rs2);
+                    gen_store_fpr_F(dc, rd, cpu_src1_32);
                     break;
                 case 0x5: /* fnegs */
-                    gen_helper_fnegs(cpu_fpr[rd], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FF(dc, rd, rs2, gen_helper_fnegs);
                     break;
                 case 0x9: /* fabss */
-                    gen_helper_fabss(cpu_fpr[rd], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FF(dc, rd, rs2, gen_helper_fabss);
                     break;
                 case 0x29: /* fsqrts */
                     CHECK_FPU_FEATURE(dc, FSQRT);
-                    gen_clear_float_exceptions();
-                    gen_helper_fsqrts(cpu_tmp32, cpu_env, cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FF(dc, rd, rs2, gen_helper_fsqrts);
                     break;
                 case 0x2a: /* fsqrtd */
                     CHECK_FPU_FEATURE(dc, FSQRT);
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fsqrtd(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DD(dc, rd, rs2, gen_helper_fsqrtd);
                     break;
                 case 0x2b: /* fsqrtq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fsqrtq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_fop_QQ(dc, rd, rs2, gen_helper_fsqrtq);
                     break;
                 case 0x41: /* fadds */
-                    gen_clear_float_exceptions();
-                    gen_helper_fadds(cpu_tmp32, cpu_env, cpu_fpr[rs1],
-                                     cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FFF(dc, rd, rs1, rs2, gen_helper_fadds);
                     break;
                 case 0x42: /* faddd */
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_faddd(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DDD(dc, rd, rs1, rs2, gen_helper_faddd);
                     break;
                 case 0x43: /* faddq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT0(QFPREG(rs1));
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_faddq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_fop_QQQ(dc, rd, rs1, rs2, gen_helper_faddq);
                     break;
                 case 0x45: /* fsubs */
-                    gen_clear_float_exceptions();
-                    gen_helper_fsubs(cpu_tmp32, cpu_env, cpu_fpr[rs1],
-                                     cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FFF(dc, rd, rs1, rs2, gen_helper_fsubs);
                     break;
                 case 0x46: /* fsubd */
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fsubd(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DDD(dc, rd, rs1, rs2, gen_helper_fsubd);
                     break;
                 case 0x47: /* fsubq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT0(QFPREG(rs1));
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fsubq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_fop_QQQ(dc, rd, rs1, rs2, gen_helper_fsubq);
                     break;
                 case 0x49: /* fmuls */
                     CHECK_FPU_FEATURE(dc, FMUL);
-                    gen_clear_float_exceptions();
-                    gen_helper_fmuls(cpu_tmp32, cpu_env, cpu_fpr[rs1],
-                                     cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FFF(dc, rd, rs1, rs2, gen_helper_fmuls);
                     break;
                 case 0x4a: /* fmuld */
                     CHECK_FPU_FEATURE(dc, FMUL);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fmuld(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmuld);
                     break;
                 case 0x4b: /* fmulq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
                     CHECK_FPU_FEATURE(dc, FMUL);
-                    gen_op_load_fpr_QT0(QFPREG(rs1));
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fmulq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_fop_QQQ(dc, rd, rs1, rs2, gen_helper_fmulq);
                     break;
                 case 0x4d: /* fdivs */
-                    gen_clear_float_exceptions();
-                    gen_helper_fdivs(cpu_tmp32, cpu_env, cpu_fpr[rs1],
-                                     cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FFF(dc, rd, rs1, rs2, gen_helper_fdivs);
                     break;
                 case 0x4e: /* fdivd */
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fdivd(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DDD(dc, rd, rs1, rs2, gen_helper_fdivd);
                     break;
                 case 0x4f: /* fdivq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT0(QFPREG(rs1));
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fdivq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_fop_QQQ(dc, rd, rs1, rs2, gen_helper_fdivq);
                     break;
                 case 0x69: /* fsmuld */
                     CHECK_FPU_FEATURE(dc, FSMULD);
-                    gen_clear_float_exceptions();
-                    gen_helper_fsmuld(cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DFF(dc, rd, rs1, rs2, gen_helper_fsmuld);
                     break;
                 case 0x6e: /* fdmulq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fdmulq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_fop_QDD(dc, rd, rs1, rs2, gen_helper_fdmulq);
                     break;
                 case 0xc4: /* fitos */
-                    gen_clear_float_exceptions();
-                    gen_helper_fitos(cpu_tmp32, cpu_env, cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FF(dc, rd, rs2, gen_helper_fitos);
                     break;
                 case 0xc6: /* fdtos */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fdtos(cpu_tmp32, cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FD(dc, rd, rs2, gen_helper_fdtos);
                     break;
                 case 0xc7: /* fqtos */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fqtos(cpu_tmp32, cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FQ(dc, rd, rs2, gen_helper_fqtos);
                     break;
                 case 0xc8: /* fitod */
-                    gen_helper_fitod(cpu_env, cpu_fpr[rs2]);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DF(dc, rd, rs2, gen_helper_fitod);
                     break;
                 case 0xc9: /* fstod */
-                    gen_helper_fstod(cpu_env, cpu_fpr[rs2]);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DF(dc, rd, rs2, gen_helper_fstod);
                     break;
                 case 0xcb: /* fqtod */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fqtod(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DQ(dc, rd, rs2, gen_helper_fqtod);
                     break;
                 case 0xcc: /* fitoq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_helper_fitoq(cpu_env, cpu_fpr[rs2]);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_ne_fop_QF(dc, rd, rs2, gen_helper_fitoq);
                     break;
                 case 0xcd: /* fstoq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_helper_fstoq(cpu_env, cpu_fpr[rs2]);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_ne_fop_QF(dc, rd, rs2, gen_helper_fstoq);
                     break;
                 case 0xce: /* fdtoq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fdtoq(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_ne_fop_QD(dc, rd, rs2, gen_helper_fdtoq);
                     break;
                 case 0xd1: /* fstoi */
-                    gen_clear_float_exceptions();
-                    gen_helper_fstoi(cpu_tmp32, cpu_env, cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FF(dc, rd, rs2, gen_helper_fstoi);
                     break;
                 case 0xd2: /* fdtoi */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fdtoi(cpu_tmp32, cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FD(dc, rd, rs2, gen_helper_fdtoi);
                     break;
                 case 0xd3: /* fqtoi */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fqtoi(cpu_tmp32, cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FQ(dc, rd, rs2, gen_helper_fqtoi);
                     break;
 #ifdef TARGET_SPARC64
                 case 0x2: /* V9 fmovd */
-                    tcg_gen_mov_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_mov_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs2);
+                    gen_store_fpr_D(dc, rd, cpu_src1_64);
                     break;
                 case 0x3: /* V9 fmovq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd)], cpu_fpr[QFPREG(rs2)]);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 1],
-                                    cpu_fpr[QFPREG(rs2) + 1]);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 2],
-                                    cpu_fpr[QFPREG(rs2) + 2]);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 3],
-                                    cpu_fpr[QFPREG(rs2) + 3]);
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_move_Q(rd, rs2);
                     break;
                 case 0x6: /* V9 fnegd */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fnegd(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DD(dc, rd, rs2, gen_helper_fnegd);
                     break;
                 case 0x7: /* V9 fnegq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_helper_fnegq(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_ne_fop_QQ(dc, rd, rs2, gen_helper_fnegq);
                     break;
                 case 0xa: /* V9 fabsd */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fabsd(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DD(dc, rd, rs2, gen_helper_fabsd);
                     break;
                 case 0xb: /* V9 fabsq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_helper_fabsq(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_ne_fop_QQ(dc, rd, rs2, gen_helper_fabsq);
                     break;
                 case 0x81: /* V9 fstox */
-                    gen_clear_float_exceptions();
-                    gen_helper_fstox(cpu_env, cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DF(dc, rd, rs2, gen_helper_fstox);
                     break;
                 case 0x82: /* V9 fdtox */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fdtox(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DD(dc, rd, rs2, gen_helper_fdtox);
                     break;
                 case 0x83: /* V9 fqtox */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fqtox(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DQ(dc, rd, rs2, gen_helper_fqtox);
                     break;
                 case 0x84: /* V9 fxtos */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fxtos(cpu_tmp32, cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FD(dc, rd, rs2, gen_helper_fxtos);
                     break;
                 case 0x88: /* V9 fxtod */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fxtod(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DD(dc, rd, rs2, gen_helper_fxtod);
                     break;
                 case 0x8c: /* V9 fxtoq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fxtoq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_ne_fop_QD(dc, rd, rs2, gen_helper_fxtoq);
                     break;
 #endif
                 default:
@@ -2738,8 +3037,8 @@
                     cpu_src1 = get_src1(insn, cpu_src1);
                     tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], cpu_src1,
                                        0, l1);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rs2);
+                    gen_store_fpr_F(dc, rd, cpu_src1_32);
                     gen_set_label(l1);
                     break;
                 } else if ((xop & 0x11f) == 0x006) { // V9 fmovdr
@@ -2750,9 +3049,8 @@
                     cpu_src1 = get_src1(insn, cpu_src1);
                     tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], cpu_src1,
                                        0, l1);
-                    tcg_gen_mov_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_mov_i32(cpu_fpr[DFPREG(rd) + 1], cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs2);
+                    gen_store_fpr_D(dc, rd, cpu_src1_64);
                     gen_set_label(l1);
                     break;
                 } else if ((xop & 0x11f) == 0x007) { // V9 fmovqr
@@ -2764,11 +3062,7 @@
                     cpu_src1 = get_src1(insn, cpu_src1);
                     tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], cpu_src1,
                                        0, l1);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd)], cpu_fpr[QFPREG(rs2)]);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 1], cpu_fpr[QFPREG(rs2) + 1]);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 2], cpu_fpr[QFPREG(rs2) + 2]);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 3], cpu_fpr[QFPREG(rs2) + 3]);
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_move_Q(rd, rs2);
                     gen_set_label(l1);
                     break;
                 }
@@ -2786,8 +3080,8 @@
                         gen_fcond(r_cond, fcc, cond);                   \
                         tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
                                            0, l1);                      \
-                        tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs2]);     \
-                        gen_update_fprs_dirty(rd);                      \
+                        cpu_src1_32 = gen_load_fpr_F(dc, rs2);          \
+                        gen_store_fpr_F(dc, rd, cpu_src1_32);           \
                         gen_set_label(l1);                              \
                         tcg_temp_free(r_cond);                          \
                     }
@@ -2802,11 +3096,8 @@
                         gen_fcond(r_cond, fcc, cond);                   \
                         tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
                                            0, l1);                      \
-                        tcg_gen_mov_i32(cpu_fpr[DFPREG(rd)],            \
-                                        cpu_fpr[DFPREG(rs2)]);          \
-                        tcg_gen_mov_i32(cpu_fpr[DFPREG(rd) + 1],        \
-                                        cpu_fpr[DFPREG(rs2) + 1]);      \
-                        gen_update_fprs_dirty(DFPREG(rd));              \
+                        cpu_src1_64 = gen_load_fpr_D(dc, rs2);          \
+                        gen_store_fpr_D(dc, rd, cpu_src1_64);           \
                         gen_set_label(l1);                              \
                         tcg_temp_free(r_cond);                          \
                     }
@@ -2821,15 +3112,7 @@
                         gen_fcond(r_cond, fcc, cond);                   \
                         tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
                                            0, l1);                      \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd)],            \
-                                        cpu_fpr[QFPREG(rs2)]);          \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 1],        \
-                                        cpu_fpr[QFPREG(rs2) + 1]);      \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 2],        \
-                                        cpu_fpr[QFPREG(rs2) + 2]);      \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 3],        \
-                                        cpu_fpr[QFPREG(rs2) + 3]);      \
-                        gen_update_fprs_dirty(QFPREG(rd));              \
+                        gen_move_Q(rd, rs2);                            \
                         gen_set_label(l1);                              \
                         tcg_temp_free(r_cond);                          \
                     }
@@ -2887,8 +3170,8 @@
                         gen_cond(r_cond, icc, cond, dc);                \
                         tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
                                            0, l1);                      \
-                        tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs2]);     \
-                        gen_update_fprs_dirty(rd);                      \
+                        cpu_src1_32 = gen_load_fpr_F(dc, rs2);          \
+                        gen_store_fpr_F(dc, rd, cpu_src1_32);           \
                         gen_set_label(l1);                              \
                         tcg_temp_free(r_cond);                          \
                     }
@@ -2903,10 +3186,8 @@
                         gen_cond(r_cond, icc, cond, dc);                \
                         tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
                                            0, l1);                      \
-                        tcg_gen_mov_i32(cpu_fpr[DFPREG(rd)],            \
-                                        cpu_fpr[DFPREG(rs2)]);          \
-                        tcg_gen_mov_i32(cpu_fpr[DFPREG(rd) + 1],        \
-                                        cpu_fpr[DFPREG(rs2) + 1]);      \
+                        cpu_src1_64 = gen_load_fpr_D(dc, rs2);          \
+                        gen_store_fpr_D(dc, rd, cpu_src1_64);           \
                         gen_update_fprs_dirty(DFPREG(rd));              \
                         gen_set_label(l1);                              \
                         tcg_temp_free(r_cond);                          \
@@ -2922,15 +3203,7 @@
                         gen_cond(r_cond, icc, cond, dc);                \
                         tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
                                            0, l1);                      \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd)],            \
-                                        cpu_fpr[QFPREG(rs2)]);          \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 1],        \
-                                        cpu_fpr[QFPREG(rs2) + 1]);      \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 2],        \
-                                        cpu_fpr[QFPREG(rs2) + 2]);      \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 3],        \
-                                        cpu_fpr[QFPREG(rs2) + 3]);      \
-                        gen_update_fprs_dirty(QFPREG(rd));              \
+                        gen_move_Q(rd, rs2);                            \
                         gen_set_label(l1);                              \
                         tcg_temp_free(r_cond);                          \
                     }
@@ -2960,12 +3233,14 @@
 #undef FMOVQCC
 #endif
                     case 0x51: /* fcmps, V9 %fcc */
-                        gen_op_fcmps(rd & 3, cpu_fpr[rs1], cpu_fpr[rs2]);
+                        cpu_src1_32 = gen_load_fpr_F(dc, rs1);
+                        cpu_src2_32 = gen_load_fpr_F(dc, rs2);
+                        gen_op_fcmps(rd & 3, cpu_src1_32, cpu_src2_32);
                         break;
                     case 0x52: /* fcmpd, V9 %fcc */
-                        gen_op_load_fpr_DT0(DFPREG(rs1));
-                        gen_op_load_fpr_DT1(DFPREG(rs2));
-                        gen_op_fcmpd(rd & 3);
+                        cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                        cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                        gen_op_fcmpd(rd & 3, cpu_src1_64, cpu_src2_64);
                         break;
                     case 0x53: /* fcmpq, V9 %fcc */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
@@ -2974,12 +3249,14 @@
                         gen_op_fcmpq(rd & 3);
                         break;
                     case 0x55: /* fcmpes, V9 %fcc */
-                        gen_op_fcmpes(rd & 3, cpu_fpr[rs1], cpu_fpr[rs2]);
+                        cpu_src1_32 = gen_load_fpr_F(dc, rs1);
+                        cpu_src2_32 = gen_load_fpr_F(dc, rs2);
+                        gen_op_fcmpes(rd & 3, cpu_src1_32, cpu_src2_32);
                         break;
                     case 0x56: /* fcmped, V9 %fcc */
-                        gen_op_load_fpr_DT0(DFPREG(rs1));
-                        gen_op_load_fpr_DT1(DFPREG(rs2));
-                        gen_op_fcmped(rd & 3);
+                        cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                        cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                        gen_op_fcmped(rd & 3, cpu_src1_64, cpu_src2_64);
                         break;
                     case 0x57: /* fcmpeq, V9 %fcc */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
@@ -3819,31 +4096,101 @@
 
                 switch (opf) {
                 case 0x000: /* VIS I edge8cc */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 1, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x001: /* VIS II edge8n */
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 0, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x002: /* VIS I edge8lcc */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 1, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x003: /* VIS II edge8ln */
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 0, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x004: /* VIS I edge16cc */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 1, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x005: /* VIS II edge16n */
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 0, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x006: /* VIS I edge16lcc */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 1, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x007: /* VIS II edge16ln */
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 0, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x008: /* VIS I edge32cc */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 1, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x009: /* VIS II edge32n */
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 0, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x00a: /* VIS I edge32lcc */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 1, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x00b: /* VIS II edge32ln */
-                    // XXX
-                    goto illegal_insn;
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 0, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x010: /* VIS I array8 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1 = get_src1(insn, cpu_src1);
                     gen_movl_reg_TN(rs2, cpu_src2);
-                    gen_helper_array8(cpu_dst, cpu_env, cpu_src1, cpu_src2);
+                    gen_helper_array8(cpu_dst, cpu_src1, cpu_src2);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x012: /* VIS I array16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1 = get_src1(insn, cpu_src1);
                     gen_movl_reg_TN(rs2, cpu_src2);
-                    gen_helper_array8(cpu_dst, cpu_env, cpu_src1, cpu_src2);
+                    gen_helper_array8(cpu_dst, cpu_src1, cpu_src2);
                     tcg_gen_shli_i64(cpu_dst, cpu_dst, 1);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
@@ -3851,7 +4198,7 @@
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1 = get_src1(insn, cpu_src1);
                     gen_movl_reg_TN(rs2, cpu_src2);
-                    gen_helper_array8(cpu_dst, cpu_env, cpu_src1, cpu_src2);
+                    gen_helper_array8(cpu_dst, cpu_src1, cpu_src2);
                     tcg_gen_shli_i64(cpu_dst, cpu_dst, 2);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
@@ -3859,424 +4206,317 @@
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1 = get_src1(insn, cpu_src1);
                     gen_movl_reg_TN(rs2, cpu_src2);
-                    gen_helper_alignaddr(cpu_dst, cpu_env, cpu_src1, cpu_src2);
+                    gen_alignaddr(cpu_dst, cpu_src1, cpu_src2, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
+                case 0x01a: /* VIS I alignaddrl */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    cpu_src1 = get_src1(insn, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_alignaddr(cpu_dst, cpu_src1, cpu_src2, 1);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x019: /* VIS II bmask */
-                case 0x01a: /* VIS I alignaddrl */
-                    // XXX
-                    goto illegal_insn;
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    cpu_src1 = get_src1(insn, cpu_src1);
+                    cpu_src2 = get_src1(insn, cpu_src2);
+                    tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
+                    tcg_gen_deposit_tl(cpu_gsr, cpu_gsr, cpu_dst, 32, 32);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x020: /* VIS I fcmple16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmple16(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmple16(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x022: /* VIS I fcmpne16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmpne16(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmpne16(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x024: /* VIS I fcmple32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmple32(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmple32(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x026: /* VIS I fcmpne32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmpne32(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmpne32(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x028: /* VIS I fcmpgt16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmpgt16(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmpgt16(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x02a: /* VIS I fcmpeq16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmpeq16(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmpeq16(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x02c: /* VIS I fcmpgt32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmpgt32(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmpgt32(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x02e: /* VIS I fcmpeq32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmpeq32(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmpeq32(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x031: /* VIS I fmul8x16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmul8x16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmul8x16);
                     break;
                 case 0x033: /* VIS I fmul8x16au */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmul8x16au(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmul8x16au);
                     break;
                 case 0x035: /* VIS I fmul8x16al */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmul8x16al(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmul8x16al);
                     break;
                 case 0x036: /* VIS I fmul8sux16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmul8sux16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmul8sux16);
                     break;
                 case 0x037: /* VIS I fmul8ulx16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmul8ulx16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmul8ulx16);
                     break;
                 case 0x038: /* VIS I fmuld8sux16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmuld8sux16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmuld8sux16);
                     break;
                 case 0x039: /* VIS I fmuld8ulx16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmuld8ulx16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmuld8ulx16);
                     break;
                 case 0x03a: /* VIS I fpack32 */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_gsr_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpack32);
+                    break;
                 case 0x03b: /* VIS I fpack16 */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs2);
+                    cpu_dst_32 = gen_dest_fpr_F();
+                    gen_helper_fpack16(cpu_dst_32, cpu_gsr, cpu_src1_64);
+                    gen_store_fpr_F(dc, rd, cpu_dst_32);
+                    break;
                 case 0x03d: /* VIS I fpackfix */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs2);
+                    cpu_dst_32 = gen_dest_fpr_F();
+                    gen_helper_fpackfix(cpu_dst_32, cpu_gsr, cpu_src1_64);
+                    gen_store_fpr_F(dc, rd, cpu_dst_32);
+                    break;
                 case 0x03e: /* VIS I pdist */
-                    // XXX
-                    goto illegal_insn;
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_ne_fop_DDDD(dc, rd, rs1, rs2, gen_helper_pdist);
+                    break;
                 case 0x048: /* VIS I faligndata */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_faligndata(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_gsr_fop_DDD(dc, rd, rs1, rs2, gen_faligndata);
                     break;
                 case 0x04b: /* VIS I fpmerge */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fpmerge(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpmerge);
                     break;
                 case 0x04c: /* VIS II bshuffle */
-                    // XXX
-                    goto illegal_insn;
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_gsr_fop_DDD(dc, rd, rs1, rs2, gen_helper_bshuffle);
+                    break;
                 case 0x04d: /* VIS I fexpand */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fexpand(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fexpand);
                     break;
                 case 0x050: /* VIS I fpadd16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fpadd16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpadd16);
                     break;
                 case 0x051: /* VIS I fpadd16s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_helper_fpadd16s(cpu_fpr[rd], cpu_env,
-                                        cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, gen_helper_fpadd16s);
                     break;
                 case 0x052: /* VIS I fpadd32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fpadd32(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpadd32);
                     break;
                 case 0x053: /* VIS I fpadd32s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_helper_fpadd32s(cpu_fpr[rd], cpu_env,
-                                        cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_add_i32);
                     break;
                 case 0x054: /* VIS I fpsub16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fpsub16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpsub16);
                     break;
                 case 0x055: /* VIS I fpsub16s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_helper_fpsub16s(cpu_fpr[rd], cpu_env,
-                                        cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, gen_helper_fpsub16s);
                     break;
                 case 0x056: /* VIS I fpsub32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fpsub32(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpsub32);
                     break;
                 case 0x057: /* VIS I fpsub32s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_helper_fpsub32s(cpu_fpr[rd], cpu_env,
-                                        cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_sub_i32);
                     break;
                 case 0x060: /* VIS I fzero */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_movi_i32(cpu_fpr[DFPREG(rd)], 0);
-                    tcg_gen_movi_i32(cpu_fpr[DFPREG(rd) + 1], 0);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    cpu_dst_64 = gen_dest_fpr_D();
+                    tcg_gen_movi_i64(cpu_dst_64, 0);
+                    gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 case 0x061: /* VIS I fzeros */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_movi_i32(cpu_fpr[rd], 0);
-                    gen_update_fprs_dirty(rd);
+                    cpu_dst_32 = gen_dest_fpr_F();
+                    tcg_gen_movi_i32(cpu_dst_32, 0);
+                    gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x062: /* VIS I fnor */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_nor_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                    cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_nor_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_nor_i64);
                     break;
                 case 0x063: /* VIS I fnors */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_nor_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_nor_i32);
                     break;
                 case 0x064: /* VIS I fandnot2 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_andc_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                     cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_andc_i32(cpu_fpr[DFPREG(rd) + 1],
-                                     cpu_fpr[DFPREG(rs1) + 1],
-                                     cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_andc_i64);
                     break;
                 case 0x065: /* VIS I fandnot2s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_andc_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_andc_i32);
                     break;
                 case 0x066: /* VIS I fnot2 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_not_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_not_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DD(dc, rd, rs2, tcg_gen_not_i64);
                     break;
                 case 0x067: /* VIS I fnot2s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_not_i32(cpu_fpr[rd], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FF(dc, rd, rs2, tcg_gen_not_i32);
                     break;
                 case 0x068: /* VIS I fandnot1 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_andc_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs2)],
-                                     cpu_fpr[DFPREG(rs1)]);
-                    tcg_gen_andc_i32(cpu_fpr[DFPREG(rd) + 1],
-                                     cpu_fpr[DFPREG(rs2) + 1],
-                                     cpu_fpr[DFPREG(rs1) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs2, rs1, tcg_gen_andc_i64);
                     break;
                 case 0x069: /* VIS I fandnot1s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_andc_i32(cpu_fpr[rd], cpu_fpr[rs2], cpu_fpr[rs1]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs2, rs1, tcg_gen_andc_i32);
                     break;
                 case 0x06a: /* VIS I fnot1 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_not_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)]);
-                    tcg_gen_not_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DD(dc, rd, rs1, tcg_gen_not_i64);
                     break;
                 case 0x06b: /* VIS I fnot1s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_not_i32(cpu_fpr[rd], cpu_fpr[rs1]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FF(dc, rd, rs1, tcg_gen_not_i32);
                     break;
                 case 0x06c: /* VIS I fxor */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_xor_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                    cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_xor_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_xor_i64);
                     break;
                 case 0x06d: /* VIS I fxors */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_xor_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_xor_i32);
                     break;
                 case 0x06e: /* VIS I fnand */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_nand_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                     cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_nand_i32(cpu_fpr[DFPREG(rd) + 1],
-                                     cpu_fpr[DFPREG(rs1) + 1],
-                                     cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_nand_i64);
                     break;
                 case 0x06f: /* VIS I fnands */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_nand_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_nand_i32);
                     break;
                 case 0x070: /* VIS I fand */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_and_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                    cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_and_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_and_i64);
                     break;
                 case 0x071: /* VIS I fands */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_and_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_and_i32);
                     break;
                 case 0x072: /* VIS I fxnor */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_xori_i32(cpu_tmp32, cpu_fpr[DFPREG(rs2)], -1);
-                    tcg_gen_xor_i32(cpu_fpr[DFPREG(rd)], cpu_tmp32,
-                                    cpu_fpr[DFPREG(rs1)]);
-                    tcg_gen_xori_i32(cpu_tmp32, cpu_fpr[DFPREG(rs2) + 1], -1);
-                    tcg_gen_xor_i32(cpu_fpr[DFPREG(rd) + 1], cpu_tmp32,
-                                    cpu_fpr[DFPREG(rs1) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_eqv_i64);
                     break;
                 case 0x073: /* VIS I fxnors */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_xori_i32(cpu_tmp32, cpu_fpr[rs2], -1);
-                    tcg_gen_xor_i32(cpu_fpr[rd], cpu_tmp32, cpu_fpr[rs1]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_eqv_i32);
                     break;
                 case 0x074: /* VIS I fsrc1 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_mov_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)]);
-                    tcg_gen_mov_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    gen_store_fpr_D(dc, rd, cpu_src1_64);
                     break;
                 case 0x075: /* VIS I fsrc1s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs1]);
-                    gen_update_fprs_dirty(rd);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rs1);
+                    gen_store_fpr_F(dc, rd, cpu_src1_32);
                     break;
                 case 0x076: /* VIS I fornot2 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_orc_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                    cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_orc_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_orc_i64);
                     break;
                 case 0x077: /* VIS I fornot2s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_orc_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_orc_i32);
                     break;
                 case 0x078: /* VIS I fsrc2 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs2));
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs2);
+                    gen_store_fpr_D(dc, rd, cpu_src1_64);
                     break;
                 case 0x079: /* VIS I fsrc2s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rs2);
+                    gen_store_fpr_F(dc, rd, cpu_src1_32);
                     break;
                 case 0x07a: /* VIS I fornot1 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_orc_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs2)],
-                                    cpu_fpr[DFPREG(rs1)]);
-                    tcg_gen_orc_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs2, rs1, tcg_gen_orc_i64);
                     break;
                 case 0x07b: /* VIS I fornot1s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_orc_i32(cpu_fpr[rd], cpu_fpr[rs2], cpu_fpr[rs1]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs2, rs1, tcg_gen_orc_i32);
                     break;
                 case 0x07c: /* VIS I for */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_or_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                   cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_or_i32(cpu_fpr[DFPREG(rd) + 1],
-                                   cpu_fpr[DFPREG(rs1) + 1],
-                                   cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_or_i64);
                     break;
                 case 0x07d: /* VIS I fors */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_or_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_or_i32);
                     break;
                 case 0x07e: /* VIS I fone */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_movi_i32(cpu_fpr[DFPREG(rd)], -1);
-                    tcg_gen_movi_i32(cpu_fpr[DFPREG(rd) + 1], -1);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    cpu_dst_64 = gen_dest_fpr_D();
+                    tcg_gen_movi_i64(cpu_dst_64, -1);
+                    gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 case 0x07f: /* VIS I fones */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_movi_i32(cpu_fpr[rd], -1);
-                    gen_update_fprs_dirty(rd);
+                    cpu_dst_32 = gen_dest_fpr_F();
+                    tcg_gen_movi_i32(cpu_dst_32, -1);
+                    gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x080: /* VIS I shutdown */
                 case 0x081: /* VIS II siam */
@@ -4659,8 +4899,9 @@
                 case 0x20:      /* ldf, load fpreg */
                     gen_address_mask(dc, cpu_addr);
                     tcg_gen_qemu_ld32u(cpu_tmp0, cpu_addr, dc->mem_idx);
-                    tcg_gen_trunc_tl_i32(cpu_fpr[rd], cpu_tmp0);
-                    gen_update_fprs_dirty(rd);
+                    cpu_dst_32 = gen_dest_fpr_F();
+                    tcg_gen_trunc_tl_i32(cpu_dst_32, cpu_tmp0);
+                    gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x21:      /* ldfsr, V9 ldxfsr */
 #ifdef TARGET_SPARC64
@@ -4694,16 +4935,10 @@
                     }
                     break;
                 case 0x23:      /* lddf, load double fpreg */
-                    {
-                        TCGv_i32 r_const;
-
-                        r_const = tcg_const_i32(dc->mem_idx);
-                        gen_address_mask(dc, cpu_addr);
-                        gen_helper_lddf(cpu_addr, r_const);
-                        tcg_temp_free_i32(r_const);
-                        gen_op_store_DT0_fpr(DFPREG(rd));
-                        gen_update_fprs_dirty(DFPREG(rd));
-                    }
+                    gen_address_mask(dc, cpu_addr);
+                    cpu_dst_64 = gen_dest_fpr_D();
+                    tcg_gen_qemu_ld64(cpu_dst_64, cpu_addr, dc->mem_idx);
+                    gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 default:
                     goto illegal_insn;
@@ -4810,7 +5045,8 @@
                 switch (xop) {
                 case 0x24: /* stf, store fpreg */
                     gen_address_mask(dc, cpu_addr);
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_fpr[rd]);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rd);
+                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_src1_32);
                     tcg_gen_qemu_st32(cpu_tmp0, cpu_addr, dc->mem_idx);
                     break;
                 case 0x25: /* stfsr, V9 stxfsr */
@@ -4853,15 +5089,9 @@
 #endif
 #endif
                 case 0x27: /* stdf, store double fpreg */
-                    {
-                        TCGv_i32 r_const;
-
-                        gen_op_load_fpr_DT0(DFPREG(rd));
-                        r_const = tcg_const_i32(dc->mem_idx);
-                        gen_address_mask(dc, cpu_addr);
-                        gen_helper_stdf(cpu_addr, r_const);
-                        tcg_temp_free_i32(r_const);
-                    }
+                    gen_address_mask(dc, cpu_addr);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rd);
+                    tcg_gen_qemu_st64(cpu_src1_64, cpu_addr, dc->mem_idx);
                     break;
                 default:
                     goto illegal_insn;
@@ -4996,6 +5226,13 @@
  egress:
     tcg_temp_free(cpu_tmp1);
     tcg_temp_free(cpu_tmp2);
+    if (dc->n_t32 != 0) {
+        int i;
+        for (i = dc->n_t32 - 1; i >= 0; --i) {
+            tcg_temp_free_i32(dc->t32[i]);
+        }
+        dc->n_t32 = 0;
+    }
 }
 
 static inline void gen_intermediate_code_internal(TranslationBlock * tb,
@@ -5095,6 +5332,7 @@
     tcg_temp_free_i64(cpu_tmp64);
     tcg_temp_free_i32(cpu_tmp32);
     tcg_temp_free(cpu_tmp0);
+
     if (tb->cflags & CF_LAST_IO)
         gen_io_end();
     if (!dc->is_br) {
@@ -5159,15 +5397,11 @@
         "g6",
         "g7",
     };
-    static const char * const fregnames[64] = {
-        "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
-        "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
-        "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
-        "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
-        "f32", "f33", "f34", "f35", "f36", "f37", "f38", "f39",
-        "f40", "f41", "f42", "f43", "f44", "f45", "f46", "f47",
-        "f48", "f49", "f50", "f51", "f52", "f53", "f54", "f55",
-        "f56", "f57", "f58", "f59", "f60", "f61", "f62", "f63",
+    static const char * const fregnames[32] = {
+        "f0", "f2", "f4", "f6", "f8", "f10", "f12", "f14",
+        "f16", "f18", "f20", "f22", "f24", "f26", "f28", "f30",
+        "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46",
+        "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62",
     };
 
     /* init various static tables */
@@ -5237,14 +5471,16 @@
         cpu_tbr = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, tbr),
                                      "tbr");
 #endif
-        for (i = 1; i < 8; i++)
+        for (i = 1; i < 8; i++) {
             cpu_gregs[i] = tcg_global_mem_new(TCG_AREG0,
                                               offsetof(CPUState, gregs[i]),
                                               gregnames[i]);
-        for (i = 0; i < TARGET_FPREGS; i++)
-            cpu_fpr[i] = tcg_global_mem_new_i32(TCG_AREG0,
+        }
+        for (i = 0; i < TARGET_DPREGS; i++) {
+            cpu_fpr[i] = tcg_global_mem_new_i64(TCG_AREG0,
                                                 offsetof(CPUState, fpr[i]),
                                                 fregnames[i]);
+        }
 
         /* register helpers */
 
diff --git a/target-sparc/vis_helper.c b/target-sparc/vis_helper.c
index a22c10b..a992c29 100644
--- a/target-sparc/vis_helper.c
+++ b/target-sparc/vis_helper.c
@@ -20,11 +20,6 @@
 #include "cpu.h"
 #include "helper.h"
 
-#define DT0 (env->dt0)
-#define DT1 (env->dt1)
-#define QT0 (env->qt0)
-#define QT1 (env->qt1)
-
 /* This function uses non-native bit order */
 #define GET_FIELD(X, FROM, TO)                                  \
     ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
@@ -33,8 +28,7 @@
 #define GET_FIELD_SP(X, FROM, TO)               \
     GET_FIELD(X, 63 - (TO), 63 - (FROM))
 
-target_ulong helper_array8(CPUState *env, target_ulong pixel_addr,
-                           target_ulong cubesize)
+target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
 {
     return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
         (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
@@ -47,29 +41,6 @@
         GET_FIELD_SP(pixel_addr, 11, 12);
 }
 
-target_ulong helper_alignaddr(CPUState *env, target_ulong addr,
-                              target_ulong offset)
-{
-    uint64_t tmp;
-
-    tmp = addr + offset;
-    env->gsr &= ~7ULL;
-    env->gsr |= tmp & 7ULL;
-    return tmp & ~7ULL;
-}
-
-void helper_faligndata(CPUState *env)
-{
-    uint64_t tmp;
-
-    tmp = (*((uint64_t *)&DT0)) << ((env->gsr & 7) * 8);
-    /* on many architectures a shift of 64 does nothing */
-    if ((env->gsr & 7) != 0) {
-        tmp |= (*((uint64_t *)&DT1)) >> (64 - (env->gsr & 7) * 8);
-    }
-    *((uint64_t *)&DT0) = tmp;
-}
-
 #ifdef HOST_WORDS_BIGENDIAN
 #define VIS_B64(n) b[7 - (n)]
 #define VIS_W64(n) w[3 - (n)]
@@ -102,12 +73,12 @@
     float32 f;
 } VIS32;
 
-void helper_fpmerge(CPUState *env)
+uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
     /* Reverse calculation order to handle overlap */
     d.VIS_B64(7) = s.VIS_B64(3);
@@ -119,16 +90,16 @@
     d.VIS_B64(1) = s.VIS_B64(0);
     /* d.VIS_B64(0) = d.VIS_B64(0); */
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmul8x16(CPUState *env)
+uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                 \
     tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
@@ -143,16 +114,16 @@
     PMUL(3);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmul8x16al(CPUState *env)
+uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                 \
     tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
@@ -167,16 +138,16 @@
     PMUL(3);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmul8x16au(CPUState *env)
+uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                 \
     tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
@@ -191,16 +162,16 @@
     PMUL(3);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmul8sux16(CPUState *env)
+uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                         \
     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
@@ -215,16 +186,16 @@
     PMUL(3);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmul8ulx16(CPUState *env)
+uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                         \
     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
@@ -239,16 +210,16 @@
     PMUL(3);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmuld8sux16(CPUState *env)
+uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                         \
     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
@@ -262,16 +233,16 @@
     PMUL(0);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmuld8ulx16(CPUState *env)
+uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                         \
     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
@@ -285,42 +256,41 @@
     PMUL(0);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fexpand(CPUState *env)
+uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
 {
     VIS32 s;
     VIS64 d;
 
-    s.l = (uint32_t)(*(uint64_t *)&DT0 & 0xffffffff);
-    d.d = DT1;
+    s.l = (uint32_t)src1;
+    d.ll = src2;
     d.VIS_W64(0) = s.VIS_B32(0) << 4;
     d.VIS_W64(1) = s.VIS_B32(1) << 4;
     d.VIS_W64(2) = s.VIS_B32(2) << 4;
     d.VIS_W64(3) = s.VIS_B32(3) << 4;
 
-    DT0 = d.d;
+    return d.ll;
 }
 
 #define VIS_HELPER(name, F)                             \
-    void name##16(CPUState *env)                        \
+    uint64_t name##16(uint64_t src1, uint64_t src2)     \
     {                                                   \
         VIS64 s, d;                                     \
                                                         \
-        s.d = DT0;                                      \
-        d.d = DT1;                                      \
+        s.ll = src1;                                    \
+        d.ll = src2;                                    \
                                                         \
         d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
         d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
         d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
         d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
                                                         \
-        DT0 = d.d;                                      \
+        return d.ll;                                    \
     }                                                   \
                                                         \
-    uint32_t name##16s(CPUState *env, uint32_t src1,    \
-                       uint32_t src2)                   \
+    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
     {                                                   \
         VIS32 s, d;                                     \
                                                         \
@@ -333,21 +303,20 @@
         return d.l;                                     \
     }                                                   \
                                                         \
-    void name##32(CPUState *env)                        \
+    uint64_t name##32(uint64_t src1, uint64_t src2)     \
     {                                                   \
         VIS64 s, d;                                     \
                                                         \
-        s.d = DT0;                                      \
-        d.d = DT1;                                      \
+        s.ll = src1;                                    \
+        d.ll = src2;                                    \
                                                         \
         d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
         d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
                                                         \
-        DT0 = d.d;                                      \
+        return d.ll;                                    \
     }                                                   \
                                                         \
-    uint32_t name##32s(CPUState *env, uint32_t src1,    \
-                       uint32_t src2)                   \
+    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
     {                                                   \
         VIS32 s, d;                                     \
                                                         \
@@ -365,12 +334,12 @@
 VIS_HELPER(helper_fpsub, FSUB)
 
 #define VIS_CMPHELPER(name, F)                                    \
-    uint64_t name##16(CPUState *env)                              \
+    uint64_t name##16(uint64_t src1, uint64_t src2)               \
     {                                                             \
         VIS64 s, d;                                               \
                                                                   \
-        s.d = DT0;                                                \
-        d.d = DT1;                                                \
+        s.ll = src1;                                              \
+        d.ll = src2;                                              \
                                                                   \
         d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
         d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
@@ -381,12 +350,12 @@
         return d.ll;                                              \
     }                                                             \
                                                                   \
-    uint64_t name##32(CPUState *env)                              \
+    uint64_t name##32(uint64_t src1, uint64_t src2)               \
     {                                                             \
         VIS64 s, d;                                               \
                                                                   \
-        s.d = DT0;                                                \
-        d.d = DT1;                                                \
+        s.ll = src1;                                              \
+        d.ll = src2;                                              \
                                                                   \
         d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
         d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
@@ -404,3 +373,117 @@
 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
 VIS_CMPHELPER(helper_fcmple, FCMPLE)
 VIS_CMPHELPER(helper_fcmpne, FCMPNE)
+
+uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
+{
+    int i;
+    for (i = 0; i < 8; i++) {
+        int s1, s2;
+
+        s1 = (src1 >> (56 - (i * 8))) & 0xff;
+        s2 = (src2 >> (56 - (i * 8))) & 0xff;
+
+        /* Absolute value of difference. */
+        s1 -= s2;
+        if (s1 < 0) {
+            s1 = -s1;
+        }
+
+        sum += s1;
+    }
+
+    return sum;
+}
+
+uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
+{
+    int scale = (gsr >> 3) & 0xf;
+    uint32_t ret = 0;
+    int byte;
+
+    for (byte = 0; byte < 4; byte++) {
+        uint32_t val;
+        int16_t src = rs2 >> (byte * 16);
+        int32_t scaled = src << scale;
+        int32_t from_fixed = scaled >> 7;
+
+        val = (from_fixed < 0 ?  0 :
+               from_fixed > 255 ?  255 : from_fixed);
+
+        ret |= val << (8 * byte);
+    }
+
+    return ret;
+}
+
+uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
+{
+    int scale = (gsr >> 3) & 0x1f;
+    uint64_t ret = 0;
+    int word;
+
+    ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
+    for (word = 0; word < 2; word++) {
+        uint64_t val;
+        int32_t src = rs2 >> (word * 32);
+        int64_t scaled = (int64_t)src << scale;
+        int64_t from_fixed = scaled >> 23;
+
+        val = (from_fixed < 0 ? 0 :
+               (from_fixed > 255) ? 255 : from_fixed);
+
+        ret |= val << (32 * word);
+    }
+
+    return ret;
+}
+
+uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
+{
+    int scale = (gsr >> 3) & 0x1f;
+    uint32_t ret = 0;
+    int word;
+
+    for (word = 0; word < 2; word++) {
+        uint32_t val;
+        int32_t src = rs2 >> (word * 32);
+        int64_t scaled = src << scale;
+        int64_t from_fixed = scaled >> 16;
+
+        val = (from_fixed < -32768 ? -32768 :
+               from_fixed > 32767 ?  32767 : from_fixed);
+
+        ret |= (val & 0xffff) << (word * 16);
+    }
+
+    return ret;
+}
+
+uint64 helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
+{
+    union {
+        uint64_t ll[2];
+        uint8_t b[16];
+    } s;
+    VIS64 r;
+    uint32_t i, mask, host;
+
+    /* Set up S such that we can index across all of the bytes.  */
+#ifdef HOST_WORDS_BIGENDIAN
+    s.ll[0] = src1;
+    s.ll[1] = src2;
+    host = 0;
+#else
+    s.ll[1] = src1;
+    s.ll[0] = src2;
+    host = 15;
+#endif
+    mask = gsr >> 32;
+
+    for (i = 0; i < 8; ++i) {
+        unsigned e = (mask >> (28 - i*4)) & 0xf;
+        r.VIS_B64(i) = s.b[e ^ host];
+    }
+
+    return r.ll;
+}