mips: upstream integration.

Still doesn't fix emulator64-mips though.

Change-Id: I58ad8a3001b779527c85d1f86053ab580ee68784
diff --git a/Makefile.target b/Makefile.target
index fe5a338..70fb7b6 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -216,10 +216,13 @@
 
 ifeq ($(EMULATOR_TARGET_ARCH), mips)
 common_LOCAL_SRC_FILES += \
-    target-mips/op_helper.c \
+    target-mips/dsp_helper.c \
     target-mips/helper.c \
+    target-mips/lmi_helper.c \
+    target-mips/machine.c \
+    target-mips/op_helper.c \
     target-mips/translate.c \
-    target-mips/machine.c
+
 endif  # EMULATOR_TARGET_ARCH == mips
 
 common_LOCAL_SRC_FILES += fpu/softfloat.c
diff --git a/hw/mips/mips_int.c b/hw/mips/mips_int.c
index 6fc3d05..0f4d4e4 100644
--- a/hw/mips/mips_int.c
+++ b/hw/mips/mips_int.c
@@ -35,7 +35,7 @@
     cpu_mips_update_irq(env);
 }
 
-void cpu_mips_irq_init_cpu(CPUOldState *env)
+void cpu_mips_irq_init_cpu(CPUMIPSState *env)
 {
     qemu_irq *qi;
     int i;
@@ -45,3 +45,12 @@
         env->irq[i] = qi[i];
     }
 }
+
+void cpu_mips_soft_irq(CPUMIPSState *env, int irq, int level)
+{
+    if (irq < 0 || irq > 2) {
+        return;
+    }
+
+    qemu_set_irq(env->irq[irq], level);
+}
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index 6ca15a0..90e544d 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -698,6 +698,37 @@
     *flags = env->hflags & (MIPS_HFLAG_TMASK | MIPS_HFLAG_BMASK);
 }
 
+static inline int mips_vpe_active(CPUMIPSState *env)
+{
+    int active = 1;
+
+    /* Check that the VPE is enabled.  */
+    if (!(env->mvp->CP0_MVPControl & (1 << CP0MVPCo_EVP))) {
+        active = 0;
+    }
+    /* Check that the VPE is activated.  */
+    if (!(env->CP0_VPEConf0 & (1 << CP0VPEC0_VPA))) {
+        active = 0;
+    }
+
+    /* Now verify that there are active thread contexts in the VPE.
+
+       This assumes the CPU model will internally reschedule threads
+       if the active one goes to sleep. If there are no threads available
+       the active one will be in a sleeping state, and we can turn off
+       the entire VPE.  */
+    if (!(env->active_tc.CP0_TCStatus & (1 << CP0TCSt_A))) {
+        /* TC is not activated.  */
+        active = 0;
+    }
+    if (env->active_tc.CP0_TCHalt & 1) {
+        /* TC is in halt state.  */
+        active = 0;
+    }
+
+    return active;
+}
+
 static inline void cpu_set_tls(CPUMIPSState *env, target_ulong newtls)
 {
     env->tls_value = newtls;
diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c
new file mode 100644
index 0000000..a2f46d9
--- /dev/null
+++ b/target-mips/dsp_helper.c
@@ -0,0 +1,3755 @@
+/*
+ * MIPS ASE DSP Instruction emulation helpers for QEMU.
+ *
+ * Copyright (c) 2012  Jia Liu <proljc@gmail.com>
+ *                     Dongxue Zhang <elta.era@gmail.com>
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "helper.h"
+#include "qemu/bitops.h"
+
+/* As the byte ordering doesn't matter, i.e. all columns are treated
+   identically, these unions can be used directly.  */
+typedef union {
+    uint8_t  ub[4];
+    int8_t   sb[4];
+    uint16_t uh[2];
+    int16_t  sh[2];
+    uint32_t uw[1];
+    int32_t  sw[1];
+} DSP32Value;
+
+typedef union {
+    uint8_t  ub[8];
+    int8_t   sb[8];
+    uint16_t uh[4];
+    int16_t  sh[4];
+    uint32_t uw[2];
+    int32_t  sw[2];
+    uint64_t ul[1];
+    int64_t  sl[1];
+} DSP64Value;
+
+/*** MIPS DSP internal functions begin ***/
+#define MIPSDSP_ABS(x) (((x) >= 0) ? x : -x)
+#define MIPSDSP_OVERFLOW_ADD(a, b, c, d) (~(a ^ b) & (a ^ c) & d)
+#define MIPSDSP_OVERFLOW_SUB(a, b, c, d) ((a ^ b) & (a ^ c) & d)
+
+static inline void set_DSPControl_overflow_flag(uint32_t flag, int position,
+                                                CPUMIPSState *env)
+{
+    env->active_tc.DSPControl |= (target_ulong)flag << position;
+}
+
+static inline void set_DSPControl_carryflag(bool flag, CPUMIPSState *env)
+{
+    env->active_tc.DSPControl &= ~(1 << 13);
+    env->active_tc.DSPControl |= flag << 13;
+}
+
+static inline uint32_t get_DSPControl_carryflag(CPUMIPSState *env)
+{
+    return (env->active_tc.DSPControl >> 13) & 0x01;
+}
+
+static inline void set_DSPControl_24(uint32_t flag, int len, CPUMIPSState *env)
+{
+  uint32_t filter;
+
+  filter = ((0x01 << len) - 1) << 24;
+  filter = ~filter;
+
+  env->active_tc.DSPControl &= filter;
+  env->active_tc.DSPControl |= (target_ulong)flag << 24;
+}
+
+static inline uint32_t get_DSPControl_24(int len, CPUMIPSState *env)
+{
+  uint32_t filter;
+
+  filter = (0x01 << len) - 1;
+
+  return (env->active_tc.DSPControl >> 24) & filter;
+}
+
+static inline void set_DSPControl_pos(uint32_t pos, CPUMIPSState *env)
+{
+    target_ulong dspc;
+
+    dspc = env->active_tc.DSPControl;
+#ifndef TARGET_MIPS64
+    dspc = dspc & 0xFFFFFFC0;
+    dspc |= (pos & 0x3F);
+#else
+    dspc = dspc & 0xFFFFFF80;
+    dspc |= (pos & 0x7F);
+#endif
+    env->active_tc.DSPControl = dspc;
+}
+
+static inline uint32_t get_DSPControl_pos(CPUMIPSState *env)
+{
+    target_ulong dspc;
+    uint32_t pos;
+
+    dspc = env->active_tc.DSPControl;
+
+#ifndef TARGET_MIPS64
+    pos = dspc & 0x3F;
+#else
+    pos = dspc & 0x7F;
+#endif
+
+    return pos;
+}
+
+static inline void set_DSPControl_efi(uint32_t flag, CPUMIPSState *env)
+{
+    env->active_tc.DSPControl &= 0xFFFFBFFF;
+    env->active_tc.DSPControl |= (target_ulong)flag << 14;
+}
+
+#define DO_MIPS_SAT_ABS(size)                                          \
+static inline int##size##_t mipsdsp_sat_abs##size(int##size##_t a,         \
+                                                  CPUMIPSState *env)   \
+{                                                                      \
+    if (a == INT##size##_MIN) {                                        \
+        set_DSPControl_overflow_flag(1, 20, env);                      \
+        return INT##size##_MAX;                                        \
+    } else {                                                           \
+        return MIPSDSP_ABS(a);                                         \
+    }                                                                  \
+}
+DO_MIPS_SAT_ABS(8)
+DO_MIPS_SAT_ABS(16)
+DO_MIPS_SAT_ABS(32)
+#undef DO_MIPS_SAT_ABS
+
+/* get sum value */
+static inline int16_t mipsdsp_add_i16(int16_t a, int16_t b, CPUMIPSState *env)
+{
+    int16_t tempI;
+
+    tempI = a + b;
+
+    if (MIPSDSP_OVERFLOW_ADD(a, b, tempI, 0x8000)) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return tempI;
+}
+
+static inline int16_t mipsdsp_sat_add_i16(int16_t a, int16_t b,
+                                          CPUMIPSState *env)
+{
+    int16_t tempS;
+
+    tempS = a + b;
+
+    if (MIPSDSP_OVERFLOW_ADD(a, b, tempS, 0x8000)) {
+        if (a > 0) {
+            tempS = 0x7FFF;
+        } else {
+            tempS = 0x8000;
+        }
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return tempS;
+}
+
+static inline int32_t mipsdsp_sat_add_i32(int32_t a, int32_t b,
+                                          CPUMIPSState *env)
+{
+    int32_t tempI;
+
+    tempI = a + b;
+
+    if (MIPSDSP_OVERFLOW_ADD(a, b, tempI, 0x80000000)) {
+        if (a > 0) {
+            tempI = 0x7FFFFFFF;
+        } else {
+            tempI = 0x80000000;
+        }
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return tempI;
+}
+
+static inline uint8_t mipsdsp_add_u8(uint8_t a, uint8_t b, CPUMIPSState *env)
+{
+    uint16_t temp;
+
+    temp = (uint16_t)a + (uint16_t)b;
+
+    if (temp & 0x0100) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp & 0xFF;
+}
+
+static inline uint16_t mipsdsp_add_u16(uint16_t a, uint16_t b,
+                                       CPUMIPSState *env)
+{
+    uint32_t temp;
+
+    temp = (uint32_t)a + (uint32_t)b;
+
+    if (temp & 0x00010000) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp & 0xFFFF;
+}
+
+static inline uint8_t mipsdsp_sat_add_u8(uint8_t a, uint8_t b,
+                                         CPUMIPSState *env)
+{
+    uint8_t  result;
+    uint16_t temp;
+
+    temp = (uint16_t)a + (uint16_t)b;
+    result = temp & 0xFF;
+
+    if (0x0100 & temp) {
+        result = 0xFF;
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return result;
+}
+
+static inline uint16_t mipsdsp_sat_add_u16(uint16_t a, uint16_t b,
+                                           CPUMIPSState *env)
+{
+    uint16_t result;
+    uint32_t temp;
+
+    temp = (uint32_t)a + (uint32_t)b;
+    result = temp & 0xFFFF;
+
+    if (0x00010000 & temp) {
+        result = 0xFFFF;
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return result;
+}
+
+static inline int32_t mipsdsp_sat32_acc_q31(int32_t acc, int32_t a,
+                                            CPUMIPSState *env)
+{
+    int64_t temp;
+    int32_t temp32, temp31, result;
+    int64_t temp_sum;
+
+#ifndef TARGET_MIPS64
+    temp = ((uint64_t)env->active_tc.HI[acc] << 32) |
+           (uint64_t)env->active_tc.LO[acc];
+#else
+    temp = (uint64_t)env->active_tc.LO[acc];
+#endif
+
+    temp_sum = (int64_t)a + temp;
+
+    temp32 = (temp_sum >> 32) & 0x01;
+    temp31 = (temp_sum >> 31) & 0x01;
+    result = temp_sum & 0xFFFFFFFF;
+
+    if (temp32 != temp31) {
+        if (temp32 == 0) {
+            result = 0x7FFFFFFF;
+        } else {
+            result = 0x80000000;
+        }
+        set_DSPControl_overflow_flag(1, 16 + acc, env);
+    }
+
+    return result;
+}
+
+/* a[0] is LO, a[1] is HI. */
+static inline void mipsdsp_sat64_acc_add_q63(int64_t *ret,
+                                             int32_t ac,
+                                             int64_t *a,
+                                             CPUMIPSState *env)
+{
+    bool temp64;
+
+    ret[0] = env->active_tc.LO[ac] + a[0];
+    ret[1] = env->active_tc.HI[ac] + a[1];
+
+    if (((uint64_t)ret[0] < (uint64_t)env->active_tc.LO[ac]) &&
+        ((uint64_t)ret[0] < (uint64_t)a[0])) {
+        ret[1] += 1;
+    }
+    temp64 = ret[1] & 1;
+    if (temp64 != ((ret[0] >> 63) & 0x01)) {
+        if (temp64) {
+            ret[0] = (0x01ull << 63);
+            ret[1] = ~0ull;
+        } else {
+            ret[0] = (0x01ull << 63) - 1;
+            ret[1] = 0x00;
+        }
+        set_DSPControl_overflow_flag(1, 16 + ac, env);
+    }
+}
+
+static inline void mipsdsp_sat64_acc_sub_q63(int64_t *ret,
+                                             int32_t ac,
+                                             int64_t *a,
+                                             CPUMIPSState *env)
+{
+    bool temp64;
+
+    ret[0] = env->active_tc.LO[ac] - a[0];
+    ret[1] = env->active_tc.HI[ac] - a[1];
+
+    if ((uint64_t)ret[0] > (uint64_t)env->active_tc.LO[ac]) {
+        ret[1] -= 1;
+    }
+    temp64 = ret[1] & 1;
+    if (temp64 != ((ret[0] >> 63) & 0x01)) {
+        if (temp64) {
+            ret[0] = (0x01ull << 63);
+            ret[1] = ~0ull;
+        } else {
+            ret[0] = (0x01ull << 63) - 1;
+            ret[1] = 0x00;
+        }
+        set_DSPControl_overflow_flag(1, 16 + ac, env);
+    }
+}
+
+static inline int32_t mipsdsp_mul_i16_i16(int16_t a, int16_t b,
+                                          CPUMIPSState *env)
+{
+    int32_t temp;
+
+    temp = (int32_t)a * (int32_t)b;
+
+    if ((temp > (int)0x7FFF) || (temp < (int)0xFFFF8000)) {
+        set_DSPControl_overflow_flag(1, 21, env);
+    }
+    temp &= 0x0000FFFF;
+
+    return temp;
+}
+
+static inline int32_t mipsdsp_mul_u16_u16(int32_t a, int32_t b)
+{
+    return a * b;
+}
+
+static inline int32_t mipsdsp_mul_i32_i32(int32_t a, int32_t b)
+{
+    return a * b;
+}
+
+static inline int32_t mipsdsp_sat16_mul_i16_i16(int16_t a, int16_t b,
+                                                CPUMIPSState *env)
+{
+    int32_t temp;
+
+    temp = (int32_t)a * (int32_t)b;
+
+    if (temp > (int)0x7FFF) {
+        temp = 0x00007FFF;
+        set_DSPControl_overflow_flag(1, 21, env);
+    } else if (temp < (int)0xffff8000) {
+        temp = 0xFFFF8000;
+        set_DSPControl_overflow_flag(1, 21, env);
+    }
+    temp &= 0x0000FFFF;
+
+    return temp;
+}
+
+static inline int32_t mipsdsp_mul_q15_q15_overflowflag21(uint16_t a, uint16_t b,
+                                                         CPUMIPSState *env)
+{
+    int32_t temp;
+
+    if ((a == 0x8000) && (b == 0x8000)) {
+        temp = 0x7FFFFFFF;
+        set_DSPControl_overflow_flag(1, 21, env);
+    } else {
+        temp = ((int16_t)a * (int16_t)b) << 1;
+    }
+
+    return temp;
+}
+
+/* right shift */
+static inline uint8_t mipsdsp_rshift_u8(uint8_t a, target_ulong mov)
+{
+    return a >> mov;
+}
+
+static inline uint16_t mipsdsp_rshift_u16(uint16_t a, target_ulong mov)
+{
+    return a >> mov;
+}
+
+static inline int8_t mipsdsp_rashift8(int8_t a, target_ulong mov)
+{
+    return a >> mov;
+}
+
+static inline int16_t mipsdsp_rashift16(int16_t a, target_ulong mov)
+{
+    return a >> mov;
+}
+
+static inline int32_t mipsdsp_rashift32(int32_t a, target_ulong mov)
+{
+    return a >> mov;
+}
+
+static inline int16_t mipsdsp_rshift1_add_q16(int16_t a, int16_t b)
+{
+    int32_t temp;
+
+    temp = (int32_t)a + (int32_t)b;
+
+    return (temp >> 1) & 0xFFFF;
+}
+
+/* round right shift */
+static inline int16_t mipsdsp_rrshift1_add_q16(int16_t a, int16_t b)
+{
+    int32_t temp;
+
+    temp = (int32_t)a + (int32_t)b;
+    temp += 1;
+
+    return (temp >> 1) & 0xFFFF;
+}
+
+static inline int32_t mipsdsp_rshift1_add_q32(int32_t a, int32_t b)
+{
+    int64_t temp;
+
+    temp = (int64_t)a + (int64_t)b;
+
+    return (temp >> 1) & 0xFFFFFFFF;
+}
+
+static inline int32_t mipsdsp_rrshift1_add_q32(int32_t a, int32_t b)
+{
+    int64_t temp;
+
+    temp = (int64_t)a + (int64_t)b;
+    temp += 1;
+
+    return (temp >> 1) & 0xFFFFFFFF;
+}
+
+static inline uint8_t mipsdsp_rshift1_add_u8(uint8_t a, uint8_t b)
+{
+    uint16_t temp;
+
+    temp = (uint16_t)a + (uint16_t)b;
+
+    return (temp >> 1) & 0x00FF;
+}
+
+static inline uint8_t mipsdsp_rrshift1_add_u8(uint8_t a, uint8_t b)
+{
+    uint16_t temp;
+
+    temp = (uint16_t)a + (uint16_t)b + 1;
+
+    return (temp >> 1) & 0x00FF;
+}
+
+static inline uint8_t mipsdsp_rshift1_sub_u8(uint8_t a, uint8_t b)
+{
+    uint16_t temp;
+
+    temp = (uint16_t)a - (uint16_t)b;
+
+    return (temp >> 1) & 0x00FF;
+}
+
+static inline uint8_t mipsdsp_rrshift1_sub_u8(uint8_t a, uint8_t b)
+{
+    uint16_t temp;
+
+    temp = (uint16_t)a - (uint16_t)b + 1;
+
+    return (temp >> 1) & 0x00FF;
+}
+
+/*  128 bits long. p[0] is LO, p[1] is HI. */
+static inline void mipsdsp_rndrashift_short_acc(int64_t *p,
+                                                int32_t ac,
+                                                int32_t shift,
+                                                CPUMIPSState *env)
+{
+    int64_t acc;
+
+    acc = ((int64_t)env->active_tc.HI[ac] << 32) |
+          ((int64_t)env->active_tc.LO[ac] & 0xFFFFFFFF);
+    p[0] = (shift == 0) ? (acc << 1) : (acc >> (shift - 1));
+    p[1] = (acc >> 63) & 0x01;
+}
+
+/* 128 bits long. p[0] is LO, p[1] is HI */
+static inline void mipsdsp_rashift_acc(uint64_t *p,
+                                       uint32_t ac,
+                                       uint32_t shift,
+                                       CPUMIPSState *env)
+{
+    uint64_t tempB, tempA;
+
+    tempB = env->active_tc.HI[ac];
+    tempA = env->active_tc.LO[ac];
+    shift = shift & 0x1F;
+
+    if (shift == 0) {
+        p[1] = tempB;
+        p[0] = tempA;
+    } else {
+        p[0] = (tempB << (64 - shift)) | (tempA >> shift);
+        p[1] = (int64_t)tempB >> shift;
+    }
+}
+
+/* 128 bits long. p[0] is LO, p[1] is HI , p[2] is sign of HI.*/
+static inline void mipsdsp_rndrashift_acc(uint64_t *p,
+                                          uint32_t ac,
+                                          uint32_t shift,
+                                          CPUMIPSState *env)
+{
+    int64_t tempB, tempA;
+
+    tempB = env->active_tc.HI[ac];
+    tempA = env->active_tc.LO[ac];
+    shift = shift & 0x3F;
+
+    if (shift == 0) {
+        p[2] = tempB >> 63;
+        p[1] = (tempB << 1) | (tempA >> 63);
+        p[0] = tempA << 1;
+    } else {
+        p[0] = (tempB << (65 - shift)) | (tempA >> (shift - 1));
+        p[1] = (int64_t)tempB >> (shift - 1);
+        if (tempB >= 0) {
+            p[2] = 0x0;
+        } else {
+            p[2] = ~0ull;
+        }
+    }
+}
+
+static inline int32_t mipsdsp_mul_q15_q15(int32_t ac, uint16_t a, uint16_t b,
+                                          CPUMIPSState *env)
+{
+    int32_t temp;
+
+    if ((a == 0x8000) && (b == 0x8000)) {
+        temp = 0x7FFFFFFF;
+        set_DSPControl_overflow_flag(1, 16 + ac, env);
+    } else {
+        temp = ((int16_t)a * (int16_t)b) << 1;
+    }
+
+    return temp;
+}
+
+static inline int64_t mipsdsp_mul_q31_q31(int32_t ac, uint32_t a, uint32_t b,
+                                          CPUMIPSState *env)
+{
+    uint64_t temp;
+
+    if ((a == 0x80000000) && (b == 0x80000000)) {
+        temp = (0x01ull << 63) - 1;
+        set_DSPControl_overflow_flag(1, 16 + ac, env);
+    } else {
+        temp = ((int64_t)(int32_t)a * (int32_t)b) << 1;
+    }
+
+    return temp;
+}
+
+static inline uint16_t mipsdsp_mul_u8_u8(uint8_t a, uint8_t b)
+{
+    return (uint16_t)a * (uint16_t)b;
+}
+
+static inline uint16_t mipsdsp_mul_u8_u16(uint8_t a, uint16_t b,
+                                          CPUMIPSState *env)
+{
+    uint32_t tempI;
+
+    tempI = (uint32_t)a * (uint32_t)b;
+    if (tempI > 0x0000FFFF) {
+        tempI = 0x0000FFFF;
+        set_DSPControl_overflow_flag(1, 21, env);
+    }
+
+    return tempI & 0x0000FFFF;
+}
+
+static inline uint64_t mipsdsp_mul_u32_u32(uint32_t a, uint32_t b)
+{
+    return (uint64_t)a * (uint64_t)b;
+}
+
+static inline int16_t mipsdsp_rndq15_mul_q15_q15(uint16_t a, uint16_t b,
+                                                 CPUMIPSState *env)
+{
+    uint32_t temp;
+
+    if ((a == 0x8000) && (b == 0x8000)) {
+        temp = 0x7FFF0000;
+        set_DSPControl_overflow_flag(1, 21, env);
+    } else {
+        temp = ((int16_t)a * (int16_t)b) << 1;
+        temp = temp + 0x00008000;
+    }
+
+    return (temp & 0xFFFF0000) >> 16;
+}
+
+static inline int32_t mipsdsp_sat16_mul_q15_q15(uint16_t a, uint16_t b,
+                                                CPUMIPSState *env)
+{
+    int32_t temp;
+
+    if ((a == 0x8000) && (b == 0x8000)) {
+        temp = 0x7FFF0000;
+        set_DSPControl_overflow_flag(1, 21, env);
+    } else {
+        temp = (int16_t)a * (int16_t)b;
+        temp = temp << 1;
+    }
+
+    return (temp >> 16) & 0x0000FFFF;
+}
+
+static inline uint16_t mipsdsp_trunc16_sat16_round(int32_t a,
+                                                   CPUMIPSState *env)
+{
+    uint16_t temp;
+
+
+    /*
+     * The value 0x00008000 will be added to the input Q31 value, and the code
+     * needs to check if the addition causes an overflow. Since a positive value
+     * is added, overflow can happen in one direction only.
+     */
+    if (a > 0x7FFF7FFF) {
+        temp = 0x7FFF;
+        set_DSPControl_overflow_flag(1, 22, env);
+    } else {
+        temp = ((a + 0x8000) >> 16) & 0xFFFF;
+    }
+
+    return temp;
+}
+
+static inline uint8_t mipsdsp_sat8_reduce_precision(uint16_t a,
+                                                    CPUMIPSState *env)
+{
+    uint16_t mag;
+    uint32_t sign;
+
+    sign = (a >> 15) & 0x01;
+    mag = a & 0x7FFF;
+
+    if (sign == 0) {
+        if (mag > 0x7F80) {
+            set_DSPControl_overflow_flag(1, 22, env);
+            return 0xFF;
+        } else {
+            return (mag >> 7) & 0xFFFF;
+        }
+    } else {
+        set_DSPControl_overflow_flag(1, 22, env);
+        return 0x00;
+    }
+}
+
+static inline uint8_t mipsdsp_lshift8(uint8_t a, uint8_t s, CPUMIPSState *env)
+{
+    uint8_t discard;
+
+    if (s != 0) {
+        discard = a >> (8 - s);
+
+        if (discard != 0x00) {
+            set_DSPControl_overflow_flag(1, 22, env);
+        }
+    }
+    return a << s;
+}
+
+static inline uint16_t mipsdsp_lshift16(uint16_t a, uint8_t s,
+                                        CPUMIPSState *env)
+{
+    uint16_t discard;
+
+    if (s != 0) {
+        discard = (int16_t)a >> (15 - s);
+
+        if ((discard != 0x0000) && (discard != 0xFFFF)) {
+            set_DSPControl_overflow_flag(1, 22, env);
+        }
+    }
+    return a << s;
+}
+
+
+static inline uint32_t mipsdsp_lshift32(uint32_t a, uint8_t s,
+                                        CPUMIPSState *env)
+{
+    uint32_t discard;
+
+    if (s == 0) {
+        return a;
+    } else {
+        discard = (int32_t)a >> (31 - (s - 1));
+
+        if ((discard != 0x00000000) && (discard != 0xFFFFFFFF)) {
+            set_DSPControl_overflow_flag(1, 22, env);
+        }
+        return a << s;
+    }
+}
+
+static inline uint16_t mipsdsp_sat16_lshift(uint16_t a, uint8_t s,
+                                            CPUMIPSState *env)
+{
+    uint8_t  sign;
+    uint16_t discard;
+
+    if (s == 0) {
+        return a;
+    } else {
+        sign = (a >> 15) & 0x01;
+        if (sign != 0) {
+            discard = (((0x01 << (16 - s)) - 1) << s) |
+                      ((a >> (14 - (s - 1))) & ((0x01 << s) - 1));
+        } else {
+            discard = a >> (14 - (s - 1));
+        }
+
+        if ((discard != 0x0000) && (discard != 0xFFFF)) {
+            set_DSPControl_overflow_flag(1, 22, env);
+            return (sign == 0) ? 0x7FFF : 0x8000;
+        } else {
+            return a << s;
+        }
+    }
+}
+
+static inline uint32_t mipsdsp_sat32_lshift(uint32_t a, uint8_t s,
+                                            CPUMIPSState *env)
+{
+    uint8_t  sign;
+    uint32_t discard;
+
+    if (s == 0) {
+        return a;
+    } else {
+        sign = (a >> 31) & 0x01;
+        if (sign != 0) {
+            discard = (((0x01 << (32 - s)) - 1) << s) |
+                      ((a >> (30 - (s - 1))) & ((0x01 << s) - 1));
+        } else {
+            discard = a >> (30 - (s - 1));
+        }
+
+        if ((discard != 0x00000000) && (discard != 0xFFFFFFFF)) {
+            set_DSPControl_overflow_flag(1, 22, env);
+            return (sign == 0) ? 0x7FFFFFFF : 0x80000000;
+        } else {
+            return a << s;
+        }
+    }
+}
+
+static inline uint8_t mipsdsp_rnd8_rashift(uint8_t a, uint8_t s)
+{
+    uint32_t temp;
+
+    if (s == 0) {
+        temp = (uint32_t)a << 1;
+    } else {
+        temp = (int32_t)(int8_t)a >> (s - 1);
+    }
+
+    return (temp + 1) >> 1;
+}
+
+static inline uint16_t mipsdsp_rnd16_rashift(uint16_t a, uint8_t s)
+{
+    uint32_t temp;
+
+    if (s == 0) {
+        temp = (uint32_t)a << 1;
+    } else {
+        temp = (int32_t)(int16_t)a >> (s - 1);
+    }
+
+    return (temp + 1) >> 1;
+}
+
+static inline uint32_t mipsdsp_rnd32_rashift(uint32_t a, uint8_t s)
+{
+    int64_t temp;
+
+    if (s == 0) {
+        temp = (uint64_t)a << 1;
+    } else {
+        temp = (int64_t)(int32_t)a >> (s - 1);
+    }
+    temp += 1;
+
+    return (temp >> 1) & 0xFFFFFFFFull;
+}
+
+static inline uint16_t mipsdsp_sub_i16(int16_t a, int16_t b, CPUMIPSState *env)
+{
+    int16_t  temp;
+
+    temp = a - b;
+    if (MIPSDSP_OVERFLOW_SUB(a, b, temp, 0x8000)) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp;
+}
+
+static inline uint16_t mipsdsp_sat16_sub(int16_t a, int16_t b,
+                                         CPUMIPSState *env)
+{
+    int16_t  temp;
+
+    temp = a - b;
+    if (MIPSDSP_OVERFLOW_SUB(a, b, temp, 0x8000)) {
+        if (a >= 0) {
+            temp = 0x7FFF;
+        } else {
+            temp = 0x8000;
+        }
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp;
+}
+
+static inline uint32_t mipsdsp_sat32_sub(int32_t a, int32_t b,
+                                         CPUMIPSState *env)
+{
+    int32_t  temp;
+
+    temp = a - b;
+    if (MIPSDSP_OVERFLOW_SUB(a, b, temp, 0x80000000)) {
+        if (a >= 0) {
+            temp = 0x7FFFFFFF;
+        } else {
+            temp = 0x80000000;
+        }
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp & 0xFFFFFFFFull;
+}
+
+static inline uint16_t mipsdsp_rshift1_sub_q16(int16_t a, int16_t b)
+{
+    int32_t  temp;
+
+    temp = (int32_t)a - (int32_t)b;
+
+    return (temp >> 1) & 0x0000FFFF;
+}
+
+static inline uint16_t mipsdsp_rrshift1_sub_q16(int16_t a, int16_t b)
+{
+    int32_t  temp;
+
+    temp = (int32_t)a - (int32_t)b;
+    temp += 1;
+
+    return (temp >> 1) & 0x0000FFFF;
+}
+
+static inline uint32_t mipsdsp_rshift1_sub_q32(int32_t a, int32_t b)
+{
+    int64_t  temp;
+
+    temp = (int64_t)a - (int64_t)b;
+
+    return (temp >> 1) & 0xFFFFFFFFull;
+}
+
+static inline uint32_t mipsdsp_rrshift1_sub_q32(int32_t a, int32_t b)
+{
+    int64_t  temp;
+
+    temp = (int64_t)a - (int64_t)b;
+    temp += 1;
+
+    return (temp >> 1) & 0xFFFFFFFFull;
+}
+
+static inline uint16_t mipsdsp_sub_u16_u16(uint16_t a, uint16_t b,
+                                           CPUMIPSState *env)
+{
+    uint8_t  temp16;
+    uint32_t temp;
+
+    temp = (uint32_t)a - (uint32_t)b;
+    temp16 = (temp >> 16) & 0x01;
+    if (temp16 == 1) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+    return temp & 0x0000FFFF;
+}
+
+static inline uint16_t mipsdsp_satu16_sub_u16_u16(uint16_t a, uint16_t b,
+                                                  CPUMIPSState *env)
+{
+    uint8_t  temp16;
+    uint32_t temp;
+
+    temp   = (uint32_t)a - (uint32_t)b;
+    temp16 = (temp >> 16) & 0x01;
+
+    if (temp16 == 1) {
+        temp = 0x0000;
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp & 0x0000FFFF;
+}
+
+static inline uint8_t mipsdsp_sub_u8(uint8_t a, uint8_t b, CPUMIPSState *env)
+{
+    uint8_t  temp8;
+    uint16_t temp;
+
+    temp = (uint16_t)a - (uint16_t)b;
+    temp8 = (temp >> 8) & 0x01;
+    if (temp8 == 1) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp & 0x00FF;
+}
+
+static inline uint8_t mipsdsp_satu8_sub(uint8_t a, uint8_t b, CPUMIPSState *env)
+{
+    uint8_t  temp8;
+    uint16_t temp;
+
+    temp = (uint16_t)a - (uint16_t)b;
+    temp8 = (temp >> 8) & 0x01;
+    if (temp8 == 1) {
+        temp = 0x00;
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp & 0x00FF;
+}
+
+static inline uint32_t mipsdsp_sub32(int32_t a, int32_t b, CPUMIPSState *env)
+{
+    int32_t temp;
+
+    temp = a - b;
+    if (MIPSDSP_OVERFLOW_SUB(a, b, temp, 0x80000000)) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp;
+}
+
+static inline int32_t mipsdsp_add_i32(int32_t a, int32_t b, CPUMIPSState *env)
+{
+    int32_t temp;
+
+    temp = a + b;
+
+    if (MIPSDSP_OVERFLOW_ADD(a, b, temp, 0x80000000)) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp;
+}
+
+static inline int32_t mipsdsp_cmp_eq(int32_t a, int32_t b)
+{
+    return a == b;
+}
+
+static inline int32_t mipsdsp_cmp_le(int32_t a, int32_t b)
+{
+    return a <= b;
+}
+
+static inline int32_t mipsdsp_cmp_lt(int32_t a, int32_t b)
+{
+    return a < b;
+}
+
+static inline int32_t mipsdsp_cmpu_eq(uint32_t a, uint32_t b)
+{
+    return a == b;
+}
+
+static inline int32_t mipsdsp_cmpu_le(uint32_t a, uint32_t b)
+{
+    return a <= b;
+}
+
+static inline int32_t mipsdsp_cmpu_lt(uint32_t a, uint32_t b)
+{
+    return a < b;
+}
+/*** MIPS DSP internal functions end ***/
+
+#define MIPSDSP_LHI 0xFFFFFFFF00000000ull
+#define MIPSDSP_LLO 0x00000000FFFFFFFFull
+#define MIPSDSP_HI  0xFFFF0000
+#define MIPSDSP_LO  0x0000FFFF
+#define MIPSDSP_Q3  0xFF000000
+#define MIPSDSP_Q2  0x00FF0000
+#define MIPSDSP_Q1  0x0000FF00
+#define MIPSDSP_Q0  0x000000FF
+
+#define MIPSDSP_SPLIT32_8(num, a, b, c, d)  \
+    do {                                    \
+        a = (num >> 24) & MIPSDSP_Q0;       \
+        b = (num >> 16) & MIPSDSP_Q0;       \
+        c = (num >> 8) & MIPSDSP_Q0;        \
+        d = num & MIPSDSP_Q0;               \
+    } while (0)
+
+#define MIPSDSP_SPLIT32_16(num, a, b)       \
+    do {                                    \
+        a = (num >> 16) & MIPSDSP_LO;       \
+        b = num & MIPSDSP_LO;               \
+    } while (0)
+
+#define MIPSDSP_RETURN32_8(a, b, c, d)  ((target_long)(int32_t) \
+                                         (((uint32_t)a << 24) | \
+                                         (((uint32_t)b << 16) | \
+                                         (((uint32_t)c << 8) |  \
+                                          ((uint32_t)d & 0xFF)))))
+#define MIPSDSP_RETURN32_16(a, b)       ((target_long)(int32_t) \
+                                         (((uint32_t)a << 16) | \
+                                          ((uint32_t)b & 0xFFFF)))
+
+#ifdef TARGET_MIPS64
+#define MIPSDSP_SPLIT64_16(num, a, b, c, d)  \
+    do {                                     \
+        a = (num >> 48) & MIPSDSP_LO;        \
+        b = (num >> 32) & MIPSDSP_LO;        \
+        c = (num >> 16) & MIPSDSP_LO;        \
+        d = num & MIPSDSP_LO;                \
+    } while (0)
+
+#define MIPSDSP_SPLIT64_32(num, a, b)       \
+    do {                                    \
+        a = (num >> 32) & MIPSDSP_LLO;      \
+        b = num & MIPSDSP_LLO;              \
+    } while (0)
+
+#define MIPSDSP_RETURN64_16(a, b, c, d) (((uint64_t)a << 48) | \
+                                         ((uint64_t)b << 32) | \
+                                         ((uint64_t)c << 16) | \
+                                         (uint64_t)d)
+#define MIPSDSP_RETURN64_32(a, b)       (((uint64_t)a << 32) | (uint64_t)b)
+#endif
+
+/** DSP Arithmetic Sub-class insns **/
+#define MIPSDSP32_UNOP_ENV(name, func, element)                            \
+target_ulong helper_##name(target_ulong rt, CPUMIPSState *env)             \
+{                                                                          \
+    DSP32Value dt;                                                         \
+    unsigned int i;                                                     \
+                                                                           \
+    dt.sw[0] = rt;                                                         \
+                                                                           \
+    for (i = 0; i < ARRAY_SIZE(dt.element); i++) {                         \
+        dt.element[i] = mipsdsp_##func(dt.element[i], env);                \
+    }                                                                      \
+                                                                           \
+    return (target_long)dt.sw[0];                                          \
+}
+MIPSDSP32_UNOP_ENV(absq_s_ph, sat_abs16, sh)
+MIPSDSP32_UNOP_ENV(absq_s_qb, sat_abs8, sb)
+MIPSDSP32_UNOP_ENV(absq_s_w, sat_abs32, sw)
+#undef MIPSDSP32_UNOP_ENV
+
+#if defined(TARGET_MIPS64)
+#define MIPSDSP64_UNOP_ENV(name, func, element)                            \
+target_ulong helper_##name(target_ulong rt, CPUMIPSState *env)             \
+{                                                                          \
+    DSP64Value dt;                                                         \
+    unsigned int i;                                                        \
+                                                                           \
+    dt.sl[0] = rt;                                                         \
+                                                                           \
+    for (i = 0; i < ARRAY_SIZE(dt.element); i++) {                         \
+        dt.element[i] = mipsdsp_##func(dt.element[i], env);                \
+    }                                                                      \
+                                                                           \
+    return dt.sl[0];                                                       \
+}
+MIPSDSP64_UNOP_ENV(absq_s_ob, sat_abs8, sb)
+MIPSDSP64_UNOP_ENV(absq_s_qh, sat_abs16, sh)
+MIPSDSP64_UNOP_ENV(absq_s_pw, sat_abs32, sw)
+#undef MIPSDSP64_UNOP_ENV
+#endif
+
+#define MIPSDSP32_BINOP(name, func, element)                               \
+target_ulong helper_##name(target_ulong rs, target_ulong rt)               \
+{                                                                          \
+    DSP32Value ds, dt;                                                     \
+    unsigned int i;                                                        \
+                                                                           \
+    ds.sw[0] = rs;                                                         \
+    dt.sw[0] = rt;                                                         \
+                                                                           \
+    for (i = 0; i < ARRAY_SIZE(ds.element); i++) {                         \
+        ds.element[i] = mipsdsp_##func(ds.element[i], dt.element[i]);      \
+    }                                                                      \
+                                                                           \
+    return (target_long)ds.sw[0];                                          \
+}
+MIPSDSP32_BINOP(addqh_ph, rshift1_add_q16, sh);
+MIPSDSP32_BINOP(addqh_r_ph, rrshift1_add_q16, sh);
+MIPSDSP32_BINOP(addqh_r_w, rrshift1_add_q32, sw);
+MIPSDSP32_BINOP(addqh_w, rshift1_add_q32, sw);
+MIPSDSP32_BINOP(adduh_qb, rshift1_add_u8, ub);
+MIPSDSP32_BINOP(adduh_r_qb, rrshift1_add_u8, ub);
+MIPSDSP32_BINOP(subqh_ph, rshift1_sub_q16, sh);
+MIPSDSP32_BINOP(subqh_r_ph, rrshift1_sub_q16, sh);
+MIPSDSP32_BINOP(subqh_r_w, rrshift1_sub_q32, sw);
+MIPSDSP32_BINOP(subqh_w, rshift1_sub_q32, sw);
+#undef MIPSDSP32_BINOP
+
+#define MIPSDSP32_BINOP_ENV(name, func, element)                           \
+target_ulong helper_##name(target_ulong rs, target_ulong rt,               \
+                           CPUMIPSState *env)                              \
+{                                                                          \
+    DSP32Value ds, dt;                                                     \
+    unsigned int i;                                                        \
+                                                                           \
+    ds.sw[0] = rs;                                                         \
+    dt.sw[0] = rt;                                                         \
+                                                                           \
+    for (i = 0 ; i < ARRAY_SIZE(ds.element); i++) {                        \
+        ds.element[i] = mipsdsp_##func(ds.element[i], dt.element[i], env); \
+    }                                                                      \
+                                                                           \
+    return (target_long)ds.sw[0];                                          \
+}
+MIPSDSP32_BINOP_ENV(addq_ph, add_i16, sh)
+MIPSDSP32_BINOP_ENV(addq_s_ph, sat_add_i16, sh)
+MIPSDSP32_BINOP_ENV(addq_s_w, sat_add_i32, sw);
+MIPSDSP32_BINOP_ENV(addu_ph, add_u16, sh)
+MIPSDSP32_BINOP_ENV(addu_qb, add_u8, ub);
+MIPSDSP32_BINOP_ENV(addu_s_ph, sat_add_u16, sh)
+MIPSDSP32_BINOP_ENV(addu_s_qb, sat_add_u8, ub);
+MIPSDSP32_BINOP_ENV(subq_ph, sub_i16, sh);
+MIPSDSP32_BINOP_ENV(subq_s_ph, sat16_sub, sh);
+MIPSDSP32_BINOP_ENV(subq_s_w, sat32_sub, sw);
+MIPSDSP32_BINOP_ENV(subu_ph, sub_u16_u16, sh);
+MIPSDSP32_BINOP_ENV(subu_qb, sub_u8, ub);
+MIPSDSP32_BINOP_ENV(subu_s_ph, satu16_sub_u16_u16, sh);
+MIPSDSP32_BINOP_ENV(subu_s_qb, satu8_sub, ub);
+#undef MIPSDSP32_BINOP_ENV
+
+#ifdef TARGET_MIPS64
+#define MIPSDSP64_BINOP(name, func, element)                               \
+target_ulong helper_##name(target_ulong rs, target_ulong rt)               \
+{                                                                          \
+    DSP64Value ds, dt;                                                     \
+    unsigned int i;                                                        \
+                                                                           \
+    ds.sl[0] = rs;                                                         \
+    dt.sl[0] = rt;                                                         \
+                                                                           \
+    for (i = 0 ; i < ARRAY_SIZE(ds.element); i++) {                        \
+        ds.element[i] = mipsdsp_##func(ds.element[i], dt.element[i]);      \
+    }                                                                      \
+                                                                           \
+    return ds.sl[0];                                                       \
+}
+MIPSDSP64_BINOP(adduh_ob, rshift1_add_u8, ub);
+MIPSDSP64_BINOP(adduh_r_ob, rrshift1_add_u8, ub);
+MIPSDSP64_BINOP(subuh_ob, rshift1_sub_u8, ub);
+MIPSDSP64_BINOP(subuh_r_ob, rrshift1_sub_u8, ub);
+#undef MIPSDSP64_BINOP
+
+#define MIPSDSP64_BINOP_ENV(name, func, element)                           \
+target_ulong helper_##name(target_ulong rs, target_ulong rt,               \
+                           CPUMIPSState *env)                              \
+{                                                                          \
+    DSP64Value ds, dt;                                                     \
+    unsigned int i;                                                        \
+                                                                           \
+    ds.sl[0] = rs;                                                         \
+    dt.sl[0] = rt;                                                         \
+                                                                           \
+    for (i = 0 ; i < ARRAY_SIZE(ds.element); i++) {                        \
+        ds.element[i] = mipsdsp_##func(ds.element[i], dt.element[i], env); \
+    }                                                                      \
+                                                                           \
+    return ds.sl[0];                                                       \
+}
+MIPSDSP64_BINOP_ENV(addq_pw, add_i32, sw);
+MIPSDSP64_BINOP_ENV(addq_qh, add_i16, sh);
+MIPSDSP64_BINOP_ENV(addq_s_pw, sat_add_i32, sw);
+MIPSDSP64_BINOP_ENV(addq_s_qh, sat_add_i16, sh);
+MIPSDSP64_BINOP_ENV(addu_ob, add_u8, uh);
+MIPSDSP64_BINOP_ENV(addu_qh, add_u16, uh);
+MIPSDSP64_BINOP_ENV(addu_s_ob, sat_add_u8, uh);
+MIPSDSP64_BINOP_ENV(addu_s_qh, sat_add_u16, uh);
+MIPSDSP64_BINOP_ENV(subq_pw, sub32, sw);
+MIPSDSP64_BINOP_ENV(subq_qh, sub_i16, sh);
+MIPSDSP64_BINOP_ENV(subq_s_pw, sat32_sub, sw);
+MIPSDSP64_BINOP_ENV(subq_s_qh, sat16_sub, sh);
+MIPSDSP64_BINOP_ENV(subu_ob, sub_u8, uh);
+MIPSDSP64_BINOP_ENV(subu_qh, sub_u16_u16, uh);
+MIPSDSP64_BINOP_ENV(subu_s_ob, satu8_sub, uh);
+MIPSDSP64_BINOP_ENV(subu_s_qh, satu16_sub_u16_u16, uh);
+#undef MIPSDSP64_BINOP_ENV
+
+#endif
+
+#define SUBUH_QB(name, var) \
+target_ulong helper_##name##_qb(target_ulong rs, target_ulong rt) \
+{                                                                 \
+    uint8_t rs3, rs2, rs1, rs0;                                   \
+    uint8_t rt3, rt2, rt1, rt0;                                   \
+    uint8_t tempD, tempC, tempB, tempA;                           \
+                                                                  \
+    MIPSDSP_SPLIT32_8(rs, rs3, rs2, rs1, rs0);                    \
+    MIPSDSP_SPLIT32_8(rt, rt3, rt2, rt1, rt0);                    \
+                                                                  \
+    tempD = ((uint16_t)rs3 - (uint16_t)rt3 + var) >> 1;           \
+    tempC = ((uint16_t)rs2 - (uint16_t)rt2 + var) >> 1;           \
+    tempB = ((uint16_t)rs1 - (uint16_t)rt1 + var) >> 1;           \
+    tempA = ((uint16_t)rs0 - (uint16_t)rt0 + var) >> 1;           \
+                                                                  \
+    return ((uint32_t)tempD << 24) | ((uint32_t)tempC << 16) |    \
+        ((uint32_t)tempB << 8) | ((uint32_t)tempA);               \
+}
+
+SUBUH_QB(subuh, 0);
+SUBUH_QB(subuh_r, 1);
+
+#undef SUBUH_QB
+
+target_ulong helper_addsc(target_ulong rs, target_ulong rt, CPUMIPSState *env)
+{
+    uint64_t temp, tempRs, tempRt;
+    bool flag;
+
+    tempRs = (uint64_t)rs & MIPSDSP_LLO;
+    tempRt = (uint64_t)rt & MIPSDSP_LLO;
+
+    temp = tempRs + tempRt;
+    flag = (temp & 0x0100000000ull) >> 32;
+    set_DSPControl_carryflag(flag, env);
+
+    return (target_long)(int32_t)(temp & MIPSDSP_LLO);
+}
+
+target_ulong helper_addwc(target_ulong rs, target_ulong rt, CPUMIPSState *env)
+{
+    uint32_t rd;
+    int32_t temp32, temp31;
+    int64_t tempL;
+
+    tempL = (int64_t)(int32_t)rs + (int64_t)(int32_t)rt +
+        get_DSPControl_carryflag(env);
+    temp31 = (tempL >> 31) & 0x01;
+    temp32 = (tempL >> 32) & 0x01;
+
+    if (temp31 != temp32) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    rd = tempL & MIPSDSP_LLO;
+
+    return (target_long)(int32_t)rd;
+}
+
+target_ulong helper_modsub(target_ulong rs, target_ulong rt)
+{
+    int32_t decr;
+    uint16_t lastindex;
+    target_ulong rd;
+
+    decr = rt & MIPSDSP_Q0;
+    lastindex = (rt >> 8) & MIPSDSP_LO;
+
+    if ((rs & MIPSDSP_LLO) == 0x00000000) {
+        rd = (target_ulong)lastindex;
+    } else {
+        rd = rs - decr;
+    }
+
+    return rd;
+}
+
+target_ulong helper_raddu_w_qb(target_ulong rs)
+{
+    target_ulong ret = 0;
+    DSP32Value ds;
+    unsigned int i;
+
+    ds.uw[0] = rs;
+    for (i = 0; i < 4; i++) {
+        ret += ds.ub[i];
+    }
+    return ret;
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_raddu_l_ob(target_ulong rs)
+{
+    target_ulong ret = 0;
+    DSP64Value ds;
+    unsigned int i;
+
+    ds.ul[0] = rs;
+    for (i = 0; i < 8; i++) {
+        ret += ds.ub[i];
+    }
+    return ret;
+}
+#endif
+
+#define PRECR_QB_PH(name, a, b)\
+target_ulong helper_##name##_qb_ph(target_ulong rs, target_ulong rt) \
+{                                                                    \
+    uint8_t tempD, tempC, tempB, tempA;                              \
+                                                                     \
+    tempD = (rs >> a) & MIPSDSP_Q0;                                  \
+    tempC = (rs >> b) & MIPSDSP_Q0;                                  \
+    tempB = (rt >> a) & MIPSDSP_Q0;                                  \
+    tempA = (rt >> b) & MIPSDSP_Q0;                                  \
+                                                                     \
+    return MIPSDSP_RETURN32_8(tempD, tempC, tempB, tempA);           \
+}
+
+PRECR_QB_PH(precr, 16, 0);
+PRECR_QB_PH(precrq, 24, 8);
+
+#undef PRECR_QB_OH
+
+target_ulong helper_precr_sra_ph_w(uint32_t sa, target_ulong rs,
+                                   target_ulong rt)
+{
+    uint16_t tempB, tempA;
+
+    tempB = ((int32_t)rt >> sa) & MIPSDSP_LO;
+    tempA = ((int32_t)rs >> sa) & MIPSDSP_LO;
+
+    return MIPSDSP_RETURN32_16(tempB, tempA);
+}
+
+target_ulong helper_precr_sra_r_ph_w(uint32_t sa,
+                                     target_ulong rs, target_ulong rt)
+{
+    uint64_t tempB, tempA;
+
+    /* If sa = 0, then (sa - 1) = -1 will case shift error, so we need else. */
+    if (sa == 0) {
+        tempB = (rt & MIPSDSP_LO) << 1;
+        tempA = (rs & MIPSDSP_LO) << 1;
+    } else {
+        tempB = ((int32_t)rt >> (sa - 1)) + 1;
+        tempA = ((int32_t)rs >> (sa - 1)) + 1;
+    }
+    rt = (((tempB >> 1) & MIPSDSP_LO) << 16) | ((tempA >> 1) & MIPSDSP_LO);
+
+    return (target_long)(int32_t)rt;
+}
+
+target_ulong helper_precrq_ph_w(target_ulong rs, target_ulong rt)
+{
+    uint16_t tempB, tempA;
+
+    tempB = (rs & MIPSDSP_HI) >> 16;
+    tempA = (rt & MIPSDSP_HI) >> 16;
+
+    return MIPSDSP_RETURN32_16(tempB, tempA);
+}
+
+target_ulong helper_precrq_rs_ph_w(target_ulong rs, target_ulong rt,
+                                   CPUMIPSState *env)
+{
+    uint16_t tempB, tempA;
+
+    tempB = mipsdsp_trunc16_sat16_round(rs, env);
+    tempA = mipsdsp_trunc16_sat16_round(rt, env);
+
+    return MIPSDSP_RETURN32_16(tempB, tempA);
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_precr_ob_qh(target_ulong rs, target_ulong rt)
+{
+    uint8_t rs6, rs4, rs2, rs0;
+    uint8_t rt6, rt4, rt2, rt0;
+    uint64_t temp;
+
+    rs6 = (rs >> 48) & MIPSDSP_Q0;
+    rs4 = (rs >> 32) & MIPSDSP_Q0;
+    rs2 = (rs >> 16) & MIPSDSP_Q0;
+    rs0 = rs & MIPSDSP_Q0;
+    rt6 = (rt >> 48) & MIPSDSP_Q0;
+    rt4 = (rt >> 32) & MIPSDSP_Q0;
+    rt2 = (rt >> 16) & MIPSDSP_Q0;
+    rt0 = rt & MIPSDSP_Q0;
+
+    temp = ((uint64_t)rs6 << 56) | ((uint64_t)rs4 << 48) |
+           ((uint64_t)rs2 << 40) | ((uint64_t)rs0 << 32) |
+           ((uint64_t)rt6 << 24) | ((uint64_t)rt4 << 16) |
+           ((uint64_t)rt2 << 8) | (uint64_t)rt0;
+
+    return temp;
+}
+
+#define PRECR_QH_PW(name, var) \
+target_ulong helper_precr_##name##_qh_pw(target_ulong rs, target_ulong rt, \
+                                    uint32_t sa)                      \
+{                                                                     \
+    uint16_t rs3, rs2, rs1, rs0;                                      \
+    uint16_t rt3, rt2, rt1, rt0;                                      \
+    uint16_t tempD, tempC, tempB, tempA;                              \
+                                                                      \
+    MIPSDSP_SPLIT64_16(rs, rs3, rs2, rs1, rs0);                       \
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);                       \
+                                                                      \
+    /* When sa = 0, we use rt2, rt0, rs2, rs0;                        \
+     * when sa != 0, we use rt3, rt1, rs3, rs1. */                    \
+    if (sa == 0) {                                                    \
+        tempD = rt2 << var;                                           \
+        tempC = rt0 << var;                                           \
+        tempB = rs2 << var;                                           \
+        tempA = rs0 << var;                                           \
+    } else {                                                          \
+        tempD = (((int16_t)rt3 >> sa) + var) >> var;                  \
+        tempC = (((int16_t)rt1 >> sa) + var) >> var;                  \
+        tempB = (((int16_t)rs3 >> sa) + var) >> var;                  \
+        tempA = (((int16_t)rs1 >> sa) + var) >> var;                  \
+    }                                                                 \
+                                                                      \
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);           \
+}
+
+PRECR_QH_PW(sra, 0);
+PRECR_QH_PW(sra_r, 1);
+
+#undef PRECR_QH_PW
+
+target_ulong helper_precrq_ob_qh(target_ulong rs, target_ulong rt)
+{
+    uint8_t rs6, rs4, rs2, rs0;
+    uint8_t rt6, rt4, rt2, rt0;
+    uint64_t temp;
+
+    rs6 = (rs >> 56) & MIPSDSP_Q0;
+    rs4 = (rs >> 40) & MIPSDSP_Q0;
+    rs2 = (rs >> 24) & MIPSDSP_Q0;
+    rs0 = (rs >> 8) & MIPSDSP_Q0;
+    rt6 = (rt >> 56) & MIPSDSP_Q0;
+    rt4 = (rt >> 40) & MIPSDSP_Q0;
+    rt2 = (rt >> 24) & MIPSDSP_Q0;
+    rt0 = (rt >> 8) & MIPSDSP_Q0;
+
+    temp = ((uint64_t)rs6 << 56) | ((uint64_t)rs4 << 48) |
+           ((uint64_t)rs2 << 40) | ((uint64_t)rs0 << 32) |
+           ((uint64_t)rt6 << 24) | ((uint64_t)rt4 << 16) |
+           ((uint64_t)rt2 << 8) | (uint64_t)rt0;
+
+    return temp;
+}
+
+target_ulong helper_precrq_qh_pw(target_ulong rs, target_ulong rt)
+{
+    uint16_t tempD, tempC, tempB, tempA;
+
+    tempD = (rs >> 48) & MIPSDSP_LO;
+    tempC = (rs >> 16) & MIPSDSP_LO;
+    tempB = (rt >> 48) & MIPSDSP_LO;
+    tempA = (rt >> 16) & MIPSDSP_LO;
+
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);
+}
+
+target_ulong helper_precrq_rs_qh_pw(target_ulong rs, target_ulong rt,
+                                    CPUMIPSState *env)
+{
+    uint32_t rs2, rs0;
+    uint32_t rt2, rt0;
+    uint16_t tempD, tempC, tempB, tempA;
+
+    rs2 = (rs >> 32) & MIPSDSP_LLO;
+    rs0 = rs & MIPSDSP_LLO;
+    rt2 = (rt >> 32) & MIPSDSP_LLO;
+    rt0 = rt & MIPSDSP_LLO;
+
+    tempD = mipsdsp_trunc16_sat16_round(rs2, env);
+    tempC = mipsdsp_trunc16_sat16_round(rs0, env);
+    tempB = mipsdsp_trunc16_sat16_round(rt2, env);
+    tempA = mipsdsp_trunc16_sat16_round(rt0, env);
+
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);
+}
+
+target_ulong helper_precrq_pw_l(target_ulong rs, target_ulong rt)
+{
+    uint32_t tempB, tempA;
+
+    tempB = (rs >> 32) & MIPSDSP_LLO;
+    tempA = (rt >> 32) & MIPSDSP_LLO;
+
+    return MIPSDSP_RETURN64_32(tempB, tempA);
+}
+#endif
+
+target_ulong helper_precrqu_s_qb_ph(target_ulong rs, target_ulong rt,
+                                    CPUMIPSState *env)
+{
+    uint8_t  tempD, tempC, tempB, tempA;
+    uint16_t rsh, rsl, rth, rtl;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempD = mipsdsp_sat8_reduce_precision(rsh, env);
+    tempC = mipsdsp_sat8_reduce_precision(rsl, env);
+    tempB = mipsdsp_sat8_reduce_precision(rth, env);
+    tempA = mipsdsp_sat8_reduce_precision(rtl, env);
+
+    return MIPSDSP_RETURN32_8(tempD, tempC, tempB, tempA);
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_precrqu_s_ob_qh(target_ulong rs, target_ulong rt,
+                                    CPUMIPSState *env)
+{
+    int i;
+    uint16_t rs3, rs2, rs1, rs0;
+    uint16_t rt3, rt2, rt1, rt0;
+    uint8_t temp[8];
+    uint64_t result;
+
+    result = 0;
+
+    MIPSDSP_SPLIT64_16(rs, rs3, rs2, rs1, rs0);
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);
+
+    temp[7] = mipsdsp_sat8_reduce_precision(rs3, env);
+    temp[6] = mipsdsp_sat8_reduce_precision(rs2, env);
+    temp[5] = mipsdsp_sat8_reduce_precision(rs1, env);
+    temp[4] = mipsdsp_sat8_reduce_precision(rs0, env);
+    temp[3] = mipsdsp_sat8_reduce_precision(rt3, env);
+    temp[2] = mipsdsp_sat8_reduce_precision(rt2, env);
+    temp[1] = mipsdsp_sat8_reduce_precision(rt1, env);
+    temp[0] = mipsdsp_sat8_reduce_precision(rt0, env);
+
+    for (i = 0; i < 8; i++) {
+        result |= (uint64_t)temp[i] << (8 * i);
+    }
+
+    return result;
+}
+
+#define PRECEQ_PW(name, a, b) \
+target_ulong helper_preceq_pw_##name(target_ulong rt) \
+{                                                       \
+    uint16_t tempB, tempA;                              \
+    uint32_t tempBI, tempAI;                            \
+                                                        \
+    tempB = (rt >> a) & MIPSDSP_LO;                     \
+    tempA = (rt >> b) & MIPSDSP_LO;                     \
+                                                        \
+    tempBI = (uint32_t)tempB << 16;                     \
+    tempAI = (uint32_t)tempA << 16;                     \
+                                                        \
+    return MIPSDSP_RETURN64_32(tempBI, tempAI);         \
+}
+
+PRECEQ_PW(qhl, 48, 32);
+PRECEQ_PW(qhr, 16, 0);
+PRECEQ_PW(qhla, 48, 16);
+PRECEQ_PW(qhra, 32, 0);
+
+#undef PRECEQ_PW
+
+#endif
+
+#define PRECEQU_PH(name, a, b) \
+target_ulong helper_precequ_ph_##name(target_ulong rt) \
+{                                                        \
+    uint16_t tempB, tempA;                               \
+                                                         \
+    tempB = (rt >> a) & MIPSDSP_Q0;                      \
+    tempA = (rt >> b) & MIPSDSP_Q0;                      \
+                                                         \
+    tempB = tempB << 7;                                  \
+    tempA = tempA << 7;                                  \
+                                                         \
+    return MIPSDSP_RETURN32_16(tempB, tempA);            \
+}
+
+PRECEQU_PH(qbl, 24, 16);
+PRECEQU_PH(qbr, 8, 0);
+PRECEQU_PH(qbla, 24, 8);
+PRECEQU_PH(qbra, 16, 0);
+
+#undef PRECEQU_PH
+
+#if defined(TARGET_MIPS64)
+#define PRECEQU_QH(name, a, b, c, d) \
+target_ulong helper_precequ_qh_##name(target_ulong rt)       \
+{                                                            \
+    uint16_t tempD, tempC, tempB, tempA;                     \
+                                                             \
+    tempD = (rt >> a) & MIPSDSP_Q0;                          \
+    tempC = (rt >> b) & MIPSDSP_Q0;                          \
+    tempB = (rt >> c) & MIPSDSP_Q0;                          \
+    tempA = (rt >> d) & MIPSDSP_Q0;                          \
+                                                             \
+    tempD = tempD << 7;                                      \
+    tempC = tempC << 7;                                      \
+    tempB = tempB << 7;                                      \
+    tempA = tempA << 7;                                      \
+                                                             \
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);  \
+}
+
+PRECEQU_QH(obl, 56, 48, 40, 32);
+PRECEQU_QH(obr, 24, 16, 8, 0);
+PRECEQU_QH(obla, 56, 40, 24, 8);
+PRECEQU_QH(obra, 48, 32, 16, 0);
+
+#undef PRECEQU_QH
+
+#endif
+
+#define PRECEU_PH(name, a, b) \
+target_ulong helper_preceu_ph_##name(target_ulong rt) \
+{                                                     \
+    uint16_t tempB, tempA;                            \
+                                                      \
+    tempB = (rt >> a) & MIPSDSP_Q0;                   \
+    tempA = (rt >> b) & MIPSDSP_Q0;                   \
+                                                      \
+    return MIPSDSP_RETURN32_16(tempB, tempA);         \
+}
+
+PRECEU_PH(qbl, 24, 16);
+PRECEU_PH(qbr, 8, 0);
+PRECEU_PH(qbla, 24, 8);
+PRECEU_PH(qbra, 16, 0);
+
+#undef PRECEU_PH
+
+#if defined(TARGET_MIPS64)
+#define PRECEU_QH(name, a, b, c, d) \
+target_ulong helper_preceu_qh_##name(target_ulong rt)        \
+{                                                            \
+    uint16_t tempD, tempC, tempB, tempA;                     \
+                                                             \
+    tempD = (rt >> a) & MIPSDSP_Q0;                          \
+    tempC = (rt >> b) & MIPSDSP_Q0;                          \
+    tempB = (rt >> c) & MIPSDSP_Q0;                          \
+    tempA = (rt >> d) & MIPSDSP_Q0;                          \
+                                                             \
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);  \
+}
+
+PRECEU_QH(obl, 56, 48, 40, 32);
+PRECEU_QH(obr, 24, 16, 8, 0);
+PRECEU_QH(obla, 56, 40, 24, 8);
+PRECEU_QH(obra, 48, 32, 16, 0);
+
+#undef PRECEU_QH
+
+#endif
+
+/** DSP GPR-Based Shift Sub-class insns **/
+#define SHIFT_QB(name, func) \
+target_ulong helper_##name##_qb(target_ulong sa, target_ulong rt) \
+{                                                                    \
+    uint8_t rt3, rt2, rt1, rt0;                                      \
+                                                                     \
+    sa = sa & 0x07;                                                  \
+                                                                     \
+    MIPSDSP_SPLIT32_8(rt, rt3, rt2, rt1, rt0);                       \
+                                                                     \
+    rt3 = mipsdsp_##func(rt3, sa);                                   \
+    rt2 = mipsdsp_##func(rt2, sa);                                   \
+    rt1 = mipsdsp_##func(rt1, sa);                                   \
+    rt0 = mipsdsp_##func(rt0, sa);                                   \
+                                                                     \
+    return MIPSDSP_RETURN32_8(rt3, rt2, rt1, rt0);                   \
+}
+
+#define SHIFT_QB_ENV(name, func) \
+target_ulong helper_##name##_qb(target_ulong sa, target_ulong rt,\
+                                CPUMIPSState *env) \
+{                                                                    \
+    uint8_t rt3, rt2, rt1, rt0;                                      \
+                                                                     \
+    sa = sa & 0x07;                                                  \
+                                                                     \
+    MIPSDSP_SPLIT32_8(rt, rt3, rt2, rt1, rt0);                       \
+                                                                     \
+    rt3 = mipsdsp_##func(rt3, sa, env);                              \
+    rt2 = mipsdsp_##func(rt2, sa, env);                              \
+    rt1 = mipsdsp_##func(rt1, sa, env);                              \
+    rt0 = mipsdsp_##func(rt0, sa, env);                              \
+                                                                     \
+    return MIPSDSP_RETURN32_8(rt3, rt2, rt1, rt0);                   \
+}
+
+SHIFT_QB_ENV(shll, lshift8);
+SHIFT_QB(shrl, rshift_u8);
+
+SHIFT_QB(shra, rashift8);
+SHIFT_QB(shra_r, rnd8_rashift);
+
+#undef SHIFT_QB
+#undef SHIFT_QB_ENV
+
+#if defined(TARGET_MIPS64)
+#define SHIFT_OB(name, func) \
+target_ulong helper_##name##_ob(target_ulong rt, target_ulong sa) \
+{                                                                        \
+    int i;                                                               \
+    uint8_t rt_t[8];                                                     \
+    uint64_t temp;                                                       \
+                                                                         \
+    sa = sa & 0x07;                                                      \
+    temp = 0;                                                            \
+                                                                         \
+    for (i = 0; i < 8; i++) {                                            \
+        rt_t[i] = (rt >> (8 * i)) & MIPSDSP_Q0;                          \
+        rt_t[i] = mipsdsp_##func(rt_t[i], sa);                           \
+        temp |= (uint64_t)rt_t[i] << (8 * i);                            \
+    }                                                                    \
+                                                                         \
+    return temp;                                                         \
+}
+
+#define SHIFT_OB_ENV(name, func) \
+target_ulong helper_##name##_ob(target_ulong rt, target_ulong sa, \
+                                CPUMIPSState *env)                       \
+{                                                                        \
+    int i;                                                               \
+    uint8_t rt_t[8];                                                     \
+    uint64_t temp;                                                       \
+                                                                         \
+    sa = sa & 0x07;                                                      \
+    temp = 0;                                                            \
+                                                                         \
+    for (i = 0; i < 8; i++) {                                            \
+        rt_t[i] = (rt >> (8 * i)) & MIPSDSP_Q0;                          \
+        rt_t[i] = mipsdsp_##func(rt_t[i], sa, env);                      \
+        temp |= (uint64_t)rt_t[i] << (8 * i);                            \
+    }                                                                    \
+                                                                         \
+    return temp;                                                         \
+}
+
+SHIFT_OB_ENV(shll, lshift8);
+SHIFT_OB(shrl, rshift_u8);
+
+SHIFT_OB(shra, rashift8);
+SHIFT_OB(shra_r, rnd8_rashift);
+
+#undef SHIFT_OB
+#undef SHIFT_OB_ENV
+
+#endif
+
+#define SHIFT_PH(name, func) \
+target_ulong helper_##name##_ph(target_ulong sa, target_ulong rt, \
+                                CPUMIPSState *env)                \
+{                                                                 \
+    uint16_t rth, rtl;                                            \
+                                                                  \
+    sa = sa & 0x0F;                                               \
+                                                                  \
+    MIPSDSP_SPLIT32_16(rt, rth, rtl);                             \
+                                                                  \
+    rth = mipsdsp_##func(rth, sa, env);                           \
+    rtl = mipsdsp_##func(rtl, sa, env);                           \
+                                                                  \
+    return MIPSDSP_RETURN32_16(rth, rtl);                         \
+}
+
+SHIFT_PH(shll, lshift16);
+SHIFT_PH(shll_s, sat16_lshift);
+
+#undef SHIFT_PH
+
+#if defined(TARGET_MIPS64)
+#define SHIFT_QH(name, func) \
+target_ulong helper_##name##_qh(target_ulong rt, target_ulong sa) \
+{                                                                 \
+    uint16_t rt3, rt2, rt1, rt0;                                  \
+                                                                  \
+    sa = sa & 0x0F;                                               \
+                                                                  \
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);                   \
+                                                                  \
+    rt3 = mipsdsp_##func(rt3, sa);                                \
+    rt2 = mipsdsp_##func(rt2, sa);                                \
+    rt1 = mipsdsp_##func(rt1, sa);                                \
+    rt0 = mipsdsp_##func(rt0, sa);                                \
+                                                                  \
+    return MIPSDSP_RETURN64_16(rt3, rt2, rt1, rt0);               \
+}
+
+#define SHIFT_QH_ENV(name, func) \
+target_ulong helper_##name##_qh(target_ulong rt, target_ulong sa, \
+                                CPUMIPSState *env)                \
+{                                                                 \
+    uint16_t rt3, rt2, rt1, rt0;                                  \
+                                                                  \
+    sa = sa & 0x0F;                                               \
+                                                                  \
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);                   \
+                                                                  \
+    rt3 = mipsdsp_##func(rt3, sa, env);                           \
+    rt2 = mipsdsp_##func(rt2, sa, env);                           \
+    rt1 = mipsdsp_##func(rt1, sa, env);                           \
+    rt0 = mipsdsp_##func(rt0, sa, env);                           \
+                                                                  \
+    return MIPSDSP_RETURN64_16(rt3, rt2, rt1, rt0);               \
+}
+
+SHIFT_QH_ENV(shll, lshift16);
+SHIFT_QH_ENV(shll_s, sat16_lshift);
+
+SHIFT_QH(shrl, rshift_u16);
+SHIFT_QH(shra, rashift16);
+SHIFT_QH(shra_r, rnd16_rashift);
+
+#undef SHIFT_QH
+#undef SHIFT_QH_ENV
+
+#endif
+
+#define SHIFT_W(name, func) \
+target_ulong helper_##name##_w(target_ulong sa, target_ulong rt) \
+{                                                                       \
+    uint32_t temp;                                                      \
+                                                                        \
+    sa = sa & 0x1F;                                                     \
+    temp = mipsdsp_##func(rt, sa);                                      \
+                                                                        \
+    return (target_long)(int32_t)temp;                                  \
+}
+
+#define SHIFT_W_ENV(name, func) \
+target_ulong helper_##name##_w(target_ulong sa, target_ulong rt, \
+                               CPUMIPSState *env) \
+{                                                                       \
+    uint32_t temp;                                                      \
+                                                                        \
+    sa = sa & 0x1F;                                                     \
+    temp = mipsdsp_##func(rt, sa, env);                                 \
+                                                                        \
+    return (target_long)(int32_t)temp;                                  \
+}
+
+SHIFT_W_ENV(shll_s, sat32_lshift);
+SHIFT_W(shra_r, rnd32_rashift);
+
+#undef SHIFT_W
+#undef SHIFT_W_ENV
+
+#if defined(TARGET_MIPS64)
+#define SHIFT_PW(name, func) \
+target_ulong helper_##name##_pw(target_ulong rt, target_ulong sa) \
+{                                                                 \
+    uint32_t rt1, rt0;                                            \
+                                                                  \
+    sa = sa & 0x1F;                                               \
+    MIPSDSP_SPLIT64_32(rt, rt1, rt0);                             \
+                                                                  \
+    rt1 = mipsdsp_##func(rt1, sa);                                \
+    rt0 = mipsdsp_##func(rt0, sa);                                \
+                                                                  \
+    return MIPSDSP_RETURN64_32(rt1, rt0);                         \
+}
+
+#define SHIFT_PW_ENV(name, func) \
+target_ulong helper_##name##_pw(target_ulong rt, target_ulong sa, \
+                                CPUMIPSState *env)                \
+{                                                                 \
+    uint32_t rt1, rt0;                                            \
+                                                                  \
+    sa = sa & 0x1F;                                               \
+    MIPSDSP_SPLIT64_32(rt, rt1, rt0);                             \
+                                                                  \
+    rt1 = mipsdsp_##func(rt1, sa, env);                           \
+    rt0 = mipsdsp_##func(rt0, sa, env);                           \
+                                                                  \
+    return MIPSDSP_RETURN64_32(rt1, rt0);                         \
+}
+
+SHIFT_PW_ENV(shll, lshift32);
+SHIFT_PW_ENV(shll_s, sat32_lshift);
+
+SHIFT_PW(shra, rashift32);
+SHIFT_PW(shra_r, rnd32_rashift);
+
+#undef SHIFT_PW
+#undef SHIFT_PW_ENV
+
+#endif
+
+#define SHIFT_PH(name, func) \
+target_ulong helper_##name##_ph(target_ulong sa, target_ulong rt) \
+{                                                                    \
+    uint16_t rth, rtl;                                               \
+                                                                     \
+    sa = sa & 0x0F;                                                  \
+                                                                     \
+    MIPSDSP_SPLIT32_16(rt, rth, rtl);                                \
+                                                                     \
+    rth = mipsdsp_##func(rth, sa);                                   \
+    rtl = mipsdsp_##func(rtl, sa);                                   \
+                                                                     \
+    return MIPSDSP_RETURN32_16(rth, rtl);                            \
+}
+
+SHIFT_PH(shrl, rshift_u16);
+SHIFT_PH(shra, rashift16);
+SHIFT_PH(shra_r, rnd16_rashift);
+
+#undef SHIFT_PH
+
+/** DSP Multiply Sub-class insns **/
+/* Return value made up by two 16bits value.
+ * FIXME give the macro a better name.
+ */
+#define MUL_RETURN32_16_PH(name, func, \
+                           rsmov1, rsmov2, rsfilter, \
+                           rtmov1, rtmov2, rtfilter) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt, \
+                           CPUMIPSState *env)                \
+{                                                            \
+    uint16_t rsB, rsA, rtB, rtA;                             \
+                                                             \
+    rsB = (rs >> rsmov1) & rsfilter;                         \
+    rsA = (rs >> rsmov2) & rsfilter;                         \
+    rtB = (rt >> rtmov1) & rtfilter;                         \
+    rtA = (rt >> rtmov2) & rtfilter;                         \
+                                                             \
+    rsB = mipsdsp_##func(rsB, rtB, env);                     \
+    rsA = mipsdsp_##func(rsA, rtA, env);                     \
+                                                             \
+    return MIPSDSP_RETURN32_16(rsB, rsA);                    \
+}
+
+MUL_RETURN32_16_PH(muleu_s_ph_qbl, mul_u8_u16, \
+                      24, 16, MIPSDSP_Q0, \
+                      16, 0, MIPSDSP_LO);
+MUL_RETURN32_16_PH(muleu_s_ph_qbr, mul_u8_u16, \
+                      8, 0, MIPSDSP_Q0, \
+                      16, 0, MIPSDSP_LO);
+MUL_RETURN32_16_PH(mulq_rs_ph, rndq15_mul_q15_q15, \
+                      16, 0, MIPSDSP_LO, \
+                      16, 0, MIPSDSP_LO);
+MUL_RETURN32_16_PH(mul_ph, mul_i16_i16, \
+                      16, 0, MIPSDSP_LO, \
+                      16, 0, MIPSDSP_LO);
+MUL_RETURN32_16_PH(mul_s_ph, sat16_mul_i16_i16, \
+                      16, 0, MIPSDSP_LO, \
+                      16, 0, MIPSDSP_LO);
+MUL_RETURN32_16_PH(mulq_s_ph, sat16_mul_q15_q15, \
+                      16, 0, MIPSDSP_LO, \
+                      16, 0, MIPSDSP_LO);
+
+#undef MUL_RETURN32_16_PH
+
+#define MUL_RETURN32_32_ph(name, func, movbits) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt, \
+                                  CPUMIPSState *env)         \
+{                                                            \
+    int16_t rsh, rth;                                        \
+    int32_t temp;                                            \
+                                                             \
+    rsh = (rs >> movbits) & MIPSDSP_LO;                      \
+    rth = (rt >> movbits) & MIPSDSP_LO;                      \
+    temp = mipsdsp_##func(rsh, rth, env);                    \
+                                                             \
+    return (target_long)(int32_t)temp;                       \
+}
+
+MUL_RETURN32_32_ph(muleq_s_w_phl, mul_q15_q15_overflowflag21, 16);
+MUL_RETURN32_32_ph(muleq_s_w_phr, mul_q15_q15_overflowflag21, 0);
+
+#undef MUL_RETURN32_32_ph
+
+#define MUL_VOID_PH(name, use_ac_env) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt,        \
+                          CPUMIPSState *env)                             \
+{                                                                        \
+    int16_t rsh, rsl, rth, rtl;                                          \
+    int32_t tempB, tempA;                                                \
+    int64_t acc, dotp;                                                   \
+                                                                         \
+    MIPSDSP_SPLIT32_16(rs, rsh, rsl);                                    \
+    MIPSDSP_SPLIT32_16(rt, rth, rtl);                                    \
+                                                                         \
+    if (use_ac_env == 1) {                                               \
+        tempB = mipsdsp_mul_q15_q15(ac, rsh, rth, env);                  \
+        tempA = mipsdsp_mul_q15_q15(ac, rsl, rtl, env);                  \
+    } else {                                                             \
+        tempB = mipsdsp_mul_u16_u16(rsh, rth);                           \
+        tempA = mipsdsp_mul_u16_u16(rsl, rtl);                           \
+    }                                                                    \
+                                                                         \
+    dotp = (int64_t)tempB - (int64_t)tempA;                              \
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |                      \
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);               \
+    dotp = dotp + acc;                                                   \
+    env->active_tc.HI[ac] = (target_long)(int32_t)                       \
+                            ((dotp & MIPSDSP_LHI) >> 32);                \
+    env->active_tc.LO[ac] = (target_long)(int32_t)(dotp & MIPSDSP_LLO);  \
+}
+
+MUL_VOID_PH(mulsaq_s_w_ph, 1);
+MUL_VOID_PH(mulsa_w_ph, 0);
+
+#undef MUL_VOID_PH
+
+#if defined(TARGET_MIPS64)
+#define MUL_RETURN64_16_QH(name, func, \
+                           rsmov1, rsmov2, rsmov3, rsmov4, rsfilter, \
+                           rtmov1, rtmov2, rtmov3, rtmov4, rtfilter) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt,         \
+                           CPUMIPSState *env)                        \
+{                                                                    \
+    uint16_t rs3, rs2, rs1, rs0;                                     \
+    uint16_t rt3, rt2, rt1, rt0;                                     \
+    uint16_t tempD, tempC, tempB, tempA;                             \
+                                                                     \
+    rs3 = (rs >> rsmov1) & rsfilter;                                 \
+    rs2 = (rs >> rsmov2) & rsfilter;                                 \
+    rs1 = (rs >> rsmov3) & rsfilter;                                 \
+    rs0 = (rs >> rsmov4) & rsfilter;                                 \
+    rt3 = (rt >> rtmov1) & rtfilter;                                 \
+    rt2 = (rt >> rtmov2) & rtfilter;                                 \
+    rt1 = (rt >> rtmov3) & rtfilter;                                 \
+    rt0 = (rt >> rtmov4) & rtfilter;                                 \
+                                                                     \
+    tempD = mipsdsp_##func(rs3, rt3, env);                           \
+    tempC = mipsdsp_##func(rs2, rt2, env);                           \
+    tempB = mipsdsp_##func(rs1, rt1, env);                           \
+    tempA = mipsdsp_##func(rs0, rt0, env);                           \
+                                                                     \
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);          \
+}
+
+MUL_RETURN64_16_QH(muleu_s_qh_obl, mul_u8_u16, \
+                   56, 48, 40, 32, MIPSDSP_Q0, \
+                   48, 32, 16, 0, MIPSDSP_LO);
+MUL_RETURN64_16_QH(muleu_s_qh_obr, mul_u8_u16, \
+                   24, 16, 8, 0, MIPSDSP_Q0, \
+                   48, 32, 16, 0, MIPSDSP_LO);
+MUL_RETURN64_16_QH(mulq_rs_qh, rndq15_mul_q15_q15, \
+                   48, 32, 16, 0, MIPSDSP_LO, \
+                   48, 32, 16, 0, MIPSDSP_LO);
+
+#undef MUL_RETURN64_16_QH
+
+#define MUL_RETURN64_32_QH(name, \
+                           rsmov1, rsmov2, \
+                           rtmov1, rtmov2) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt, \
+                           CPUMIPSState *env)                \
+{                                                            \
+    uint16_t rsB, rsA;                                       \
+    uint16_t rtB, rtA;                                       \
+    uint32_t tempB, tempA;                                   \
+                                                             \
+    rsB = (rs >> rsmov1) & MIPSDSP_LO;                       \
+    rsA = (rs >> rsmov2) & MIPSDSP_LO;                       \
+    rtB = (rt >> rtmov1) & MIPSDSP_LO;                       \
+    rtA = (rt >> rtmov2) & MIPSDSP_LO;                       \
+                                                             \
+    tempB = mipsdsp_mul_q15_q15(5, rsB, rtB, env);           \
+    tempA = mipsdsp_mul_q15_q15(5, rsA, rtA, env);           \
+                                                             \
+    return ((uint64_t)tempB << 32) | (uint64_t)tempA;        \
+}
+
+MUL_RETURN64_32_QH(muleq_s_pw_qhl, 48, 32, 48, 32);
+MUL_RETURN64_32_QH(muleq_s_pw_qhr, 16, 0, 16, 0);
+
+#undef MUL_RETURN64_32_QH
+
+void helper_mulsaq_s_w_qh(target_ulong rs, target_ulong rt, uint32_t ac,
+                          CPUMIPSState *env)
+{
+    int16_t rs3, rs2, rs1, rs0;
+    int16_t rt3, rt2, rt1, rt0;
+    int32_t tempD, tempC, tempB, tempA;
+    int64_t acc[2];
+    int64_t temp[2];
+    int64_t temp_sum;
+
+    MIPSDSP_SPLIT64_16(rs, rs3, rs2, rs1, rs0);
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);
+
+    tempD = mipsdsp_mul_q15_q15(ac, rs3, rt3, env);
+    tempC = mipsdsp_mul_q15_q15(ac, rs2, rt2, env);
+    tempB = mipsdsp_mul_q15_q15(ac, rs1, rt1, env);
+    tempA = mipsdsp_mul_q15_q15(ac, rs0, rt0, env);
+
+    temp[0] = ((int32_t)tempD - (int32_t)tempC) +
+              ((int32_t)tempB - (int32_t)tempA);
+    temp[0] = (int64_t)(temp[0] << 30) >> 30;
+    if (((temp[0] >> 33) & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = ~0ull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
+        acc[1] += 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] += temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+#endif
+
+#define DP_QB(name, func, is_add, rsmov1, rsmov2, rtmov1, rtmov2) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt,        \
+                   CPUMIPSState *env)                                    \
+{                                                                        \
+    uint8_t rs3, rs2;                                                    \
+    uint8_t rt3, rt2;                                                    \
+    uint16_t tempB, tempA;                                               \
+    uint64_t tempC, dotp;                                                \
+                                                                         \
+    rs3 = (rs >> rsmov1) & MIPSDSP_Q0;                                   \
+    rs2 = (rs >> rsmov2) & MIPSDSP_Q0;                                   \
+    rt3 = (rt >> rtmov1) & MIPSDSP_Q0;                                   \
+    rt2 = (rt >> rtmov2) & MIPSDSP_Q0;                                   \
+    tempB = mipsdsp_##func(rs3, rt3);                                    \
+    tempA = mipsdsp_##func(rs2, rt2);                                    \
+    dotp = (int64_t)tempB + (int64_t)tempA;                              \
+    if (is_add) {                                                        \
+        tempC = (((uint64_t)env->active_tc.HI[ac] << 32) |               \
+                 ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO))        \
+            + dotp;                                                      \
+    } else {                                                             \
+        tempC = (((uint64_t)env->active_tc.HI[ac] << 32) |               \
+                 ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO))        \
+            - dotp;                                                      \
+    }                                                                    \
+                                                                         \
+    env->active_tc.HI[ac] = (target_long)(int32_t)                       \
+                            ((tempC & MIPSDSP_LHI) >> 32);               \
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO); \
+}
+
+DP_QB(dpau_h_qbl, mul_u8_u8, 1, 24, 16, 24, 16);
+DP_QB(dpau_h_qbr, mul_u8_u8, 1, 8, 0, 8, 0);
+DP_QB(dpsu_h_qbl, mul_u8_u8, 0, 24, 16, 24, 16);
+DP_QB(dpsu_h_qbr, mul_u8_u8, 0, 8, 0, 8, 0);
+
+#undef DP_QB
+
+#if defined(TARGET_MIPS64)
+#define DP_OB(name, add_sub, \
+              rsmov1, rsmov2, rsmov3, rsmov4, \
+              rtmov1, rtmov2, rtmov3, rtmov4) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac,       \
+                       CPUMIPSState *env)                               \
+{                                                                       \
+    uint8_t rsD, rsC, rsB, rsA;                                         \
+    uint8_t rtD, rtC, rtB, rtA;                                         \
+    uint16_t tempD, tempC, tempB, tempA;                                \
+    uint64_t temp[2];                                                   \
+    uint64_t acc[2];                                                    \
+    uint64_t temp_sum;                                                  \
+                                                                        \
+    temp[0] = 0;                                                        \
+    temp[1] = 0;                                                        \
+                                                                        \
+    rsD = (rs >> rsmov1) & MIPSDSP_Q0;                                  \
+    rsC = (rs >> rsmov2) & MIPSDSP_Q0;                                  \
+    rsB = (rs >> rsmov3) & MIPSDSP_Q0;                                  \
+    rsA = (rs >> rsmov4) & MIPSDSP_Q0;                                  \
+    rtD = (rt >> rtmov1) & MIPSDSP_Q0;                                  \
+    rtC = (rt >> rtmov2) & MIPSDSP_Q0;                                  \
+    rtB = (rt >> rtmov3) & MIPSDSP_Q0;                                  \
+    rtA = (rt >> rtmov4) & MIPSDSP_Q0;                                  \
+                                                                        \
+    tempD = mipsdsp_mul_u8_u8(rsD, rtD);                                \
+    tempC = mipsdsp_mul_u8_u8(rsC, rtC);                                \
+    tempB = mipsdsp_mul_u8_u8(rsB, rtB);                                \
+    tempA = mipsdsp_mul_u8_u8(rsA, rtA);                                \
+                                                                        \
+    temp[0] = (uint64_t)tempD + (uint64_t)tempC +                       \
+      (uint64_t)tempB + (uint64_t)tempA;                                \
+                                                                        \
+    acc[0] = env->active_tc.LO[ac];                                     \
+    acc[1] = env->active_tc.HI[ac];                                     \
+                                                                        \
+    if (add_sub) {                                                      \
+        temp_sum = acc[0] + temp[0];                                    \
+        if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&                  \
+            ((uint64_t)temp_sum < (uint64_t)temp[0])) {                 \
+            acc[1] += 1;                                                \
+        }                                                               \
+        temp[0] = temp_sum;                                             \
+        temp[1] = acc[1] + temp[1];                                     \
+    } else {                                                            \
+        temp_sum = acc[0] - temp[0];                                    \
+        if ((uint64_t)temp_sum > (uint64_t)acc[0]) {                    \
+            acc[1] -= 1;                                                \
+        }                                                               \
+        temp[0] = temp_sum;                                             \
+        temp[1] = acc[1] - temp[1];                                     \
+    }                                                                   \
+                                                                        \
+    env->active_tc.HI[ac] = temp[1];                                    \
+    env->active_tc.LO[ac] = temp[0];                                    \
+}
+
+DP_OB(dpau_h_obl, 1, 56, 48, 40, 32, 56, 48, 40, 32);
+DP_OB(dpau_h_obr, 1, 24, 16, 8, 0, 24, 16, 8, 0);
+DP_OB(dpsu_h_obl, 0, 56, 48, 40, 32, 56, 48, 40, 32);
+DP_OB(dpsu_h_obr, 0, 24, 16, 8, 0, 24, 16, 8, 0);
+
+#undef DP_OB
+#endif
+
+#define DP_NOFUNC_PH(name, is_add, rsmov1, rsmov2, rtmov1, rtmov2)             \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt,              \
+                   CPUMIPSState *env)                                          \
+{                                                                              \
+    int16_t rsB, rsA, rtB, rtA;                                                \
+    int32_t  tempA, tempB;                                                     \
+    int64_t  acc;                                                              \
+                                                                               \
+    rsB = (rs >> rsmov1) & MIPSDSP_LO;                                         \
+    rsA = (rs >> rsmov2) & MIPSDSP_LO;                                         \
+    rtB = (rt >> rtmov1) & MIPSDSP_LO;                                         \
+    rtA = (rt >> rtmov2) & MIPSDSP_LO;                                         \
+                                                                               \
+    tempB = (int32_t)rsB * (int32_t)rtB;                                       \
+    tempA = (int32_t)rsA * (int32_t)rtA;                                       \
+                                                                               \
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |                            \
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);                     \
+                                                                               \
+    if (is_add) {                                                              \
+        acc = acc + ((int64_t)tempB + (int64_t)tempA);                         \
+    } else {                                                                   \
+        acc = acc - ((int64_t)tempB + (int64_t)tempA);                         \
+    }                                                                          \
+                                                                               \
+    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32); \
+    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);         \
+}
+
+DP_NOFUNC_PH(dpa_w_ph, 1, 16, 0, 16, 0);
+DP_NOFUNC_PH(dpax_w_ph, 1, 16, 0, 0, 16);
+DP_NOFUNC_PH(dps_w_ph, 0, 16, 0, 16, 0);
+DP_NOFUNC_PH(dpsx_w_ph, 0, 16, 0, 0, 16);
+#undef DP_NOFUNC_PH
+
+#define DP_HASFUNC_PH(name, is_add, rsmov1, rsmov2, rtmov1, rtmov2) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt,   \
+                   CPUMIPSState *env)                      \
+{                                                          \
+    int16_t rsB, rsA, rtB, rtA;                            \
+    int32_t tempB, tempA;                                  \
+    int64_t acc, dotp;                                     \
+                                                           \
+    rsB = (rs >> rsmov1) & MIPSDSP_LO;                     \
+    rsA = (rs >> rsmov2) & MIPSDSP_LO;                     \
+    rtB = (rt >> rtmov1) & MIPSDSP_LO;                     \
+    rtA = (rt >> rtmov2) & MIPSDSP_LO;                     \
+                                                           \
+    tempB = mipsdsp_mul_q15_q15(ac, rsB, rtB, env);        \
+    tempA = mipsdsp_mul_q15_q15(ac, rsA, rtA, env);        \
+                                                           \
+    dotp = (int64_t)tempB + (int64_t)tempA;                \
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |        \
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO); \
+                                                           \
+    if (is_add) {                                          \
+        acc = acc + dotp;                                  \
+    } else {                                               \
+        acc = acc - dotp;                                  \
+    }                                                      \
+                                                           \
+    env->active_tc.HI[ac] = (target_long)(int32_t)         \
+        ((acc & MIPSDSP_LHI) >> 32);                       \
+    env->active_tc.LO[ac] = (target_long)(int32_t)         \
+        (acc & MIPSDSP_LLO);                               \
+}
+
+DP_HASFUNC_PH(dpaq_s_w_ph, 1, 16, 0, 16, 0);
+DP_HASFUNC_PH(dpaqx_s_w_ph, 1, 16, 0, 0, 16);
+DP_HASFUNC_PH(dpsq_s_w_ph, 0, 16, 0, 16, 0);
+DP_HASFUNC_PH(dpsqx_s_w_ph, 0, 16, 0, 0, 16);
+
+#undef DP_HASFUNC_PH
+
+#define DP_128OPERATION_PH(name, is_add) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt, \
+                          CPUMIPSState *env)                             \
+{                                                                        \
+    int16_t rsh, rsl, rth, rtl;                                          \
+    int32_t tempB, tempA, tempC62_31, tempC63;                           \
+    int64_t acc, dotp, tempC;                                            \
+                                                                         \
+    MIPSDSP_SPLIT32_16(rs, rsh, rsl);                                    \
+    MIPSDSP_SPLIT32_16(rt, rth, rtl);                                    \
+                                                                         \
+    tempB = mipsdsp_mul_q15_q15(ac, rsh, rtl, env);                      \
+    tempA = mipsdsp_mul_q15_q15(ac, rsl, rth, env);                      \
+                                                                         \
+    dotp = (int64_t)tempB + (int64_t)tempA;                              \
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |                      \
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);               \
+    if (is_add) {                                                        \
+        tempC = acc + dotp;                                              \
+    } else {                                                             \
+        tempC = acc - dotp;                                              \
+    }                                                                    \
+    tempC63 = (tempC >> 63) & 0x01;                                      \
+    tempC62_31 = (tempC >> 31) & 0xFFFFFFFF;                             \
+                                                                         \
+    if ((tempC63 == 0) && (tempC62_31 != 0x00000000)) {                  \
+        tempC = 0x7FFFFFFF;                                              \
+        set_DSPControl_overflow_flag(1, 16 + ac, env);                   \
+    }                                                                    \
+                                                                         \
+    if ((tempC63 == 1) && (tempC62_31 != 0xFFFFFFFF)) {                  \
+        tempC = (int64_t)(int32_t)0x80000000;                            \
+        set_DSPControl_overflow_flag(1, 16 + ac, env);                   \
+    }                                                                    \
+                                                                         \
+    env->active_tc.HI[ac] = (target_long)(int32_t)                       \
+        ((tempC & MIPSDSP_LHI) >> 32);                                   \
+    env->active_tc.LO[ac] = (target_long)(int32_t)                       \
+        (tempC & MIPSDSP_LLO);                                           \
+}
+
+DP_128OPERATION_PH(dpaqx_sa_w_ph, 1);
+DP_128OPERATION_PH(dpsqx_sa_w_ph, 0);
+
+#undef DP_128OPERATION_HP
+
+#if defined(TARGET_MIPS64)
+#define DP_QH(name, is_add, use_ac_env) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac,    \
+                   CPUMIPSState *env)                                \
+{                                                                    \
+    int32_t rs3, rs2, rs1, rs0;                                      \
+    int32_t rt3, rt2, rt1, rt0;                                      \
+    int32_t tempD, tempC, tempB, tempA;                              \
+    int64_t acc[2];                                                  \
+    int64_t temp[2];                                                 \
+    int64_t temp_sum;                                                \
+                                                                     \
+    MIPSDSP_SPLIT64_16(rs, rs3, rs2, rs1, rs0);                      \
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);                      \
+                                                                     \
+    if (use_ac_env) {                                                \
+        tempD = mipsdsp_mul_q15_q15(ac, rs3, rt3, env);              \
+        tempC = mipsdsp_mul_q15_q15(ac, rs2, rt2, env);              \
+        tempB = mipsdsp_mul_q15_q15(ac, rs1, rt1, env);              \
+        tempA = mipsdsp_mul_q15_q15(ac, rs0, rt0, env);              \
+    } else {                                                         \
+        tempD = mipsdsp_mul_u16_u16(rs3, rt3);                       \
+        tempC = mipsdsp_mul_u16_u16(rs2, rt2);                       \
+        tempB = mipsdsp_mul_u16_u16(rs1, rt1);                       \
+        tempA = mipsdsp_mul_u16_u16(rs0, rt0);                       \
+    }                                                                \
+                                                                     \
+    temp[0] = (int64_t)tempD + (int64_t)tempC +                      \
+              (int64_t)tempB + (int64_t)tempA;                       \
+                                                                     \
+    if (temp[0] >= 0) {                                              \
+        temp[1] = 0;                                                 \
+    } else {                                                         \
+        temp[1] = ~0ull;                                             \
+    }                                                                \
+                                                                     \
+    acc[1] = env->active_tc.HI[ac];                                  \
+    acc[0] = env->active_tc.LO[ac];                                  \
+                                                                     \
+    if (is_add) {                                                    \
+        temp_sum = acc[0] + temp[0];                                 \
+        if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&               \
+            ((uint64_t)temp_sum < (uint64_t)temp[0])) {              \
+            acc[1] = acc[1] + 1;                                     \
+        }                                                            \
+        temp[0] = temp_sum;                                          \
+        temp[1] = acc[1] + temp[1];                                  \
+    } else {                                                         \
+        temp_sum = acc[0] - temp[0];                                 \
+        if ((uint64_t)temp_sum > (uint64_t)acc[0]) {                 \
+            acc[1] = acc[1] - 1;                                     \
+        }                                                            \
+        temp[0] = temp_sum;                                          \
+        temp[1] = acc[1] - temp[1];                                  \
+    }                                                                \
+                                                                     \
+    env->active_tc.HI[ac] = temp[1];                                 \
+    env->active_tc.LO[ac] = temp[0];                                 \
+}
+
+DP_QH(dpa_w_qh, 1, 0);
+DP_QH(dpaq_s_w_qh, 1, 1);
+DP_QH(dps_w_qh, 0, 0);
+DP_QH(dpsq_s_w_qh, 0, 1);
+
+#undef DP_QH
+
+#endif
+
+#define DP_L_W(name, is_add) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt,      \
+                   CPUMIPSState *env)                                  \
+{                                                                      \
+    int32_t temp63;                                                    \
+    int64_t dotp, acc;                                                 \
+    uint64_t temp;                                                     \
+    bool overflow;                                                     \
+                                                                       \
+    dotp = mipsdsp_mul_q31_q31(ac, rs, rt, env);                       \
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |                    \
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);             \
+    if (is_add) {                                                      \
+        temp = acc + dotp;                                             \
+        overflow = MIPSDSP_OVERFLOW_ADD((uint64_t)acc, (uint64_t)dotp, \
+                                        temp, (0x01ull << 63));        \
+    } else {                                                           \
+        temp = acc - dotp;                                             \
+        overflow = MIPSDSP_OVERFLOW_SUB((uint64_t)acc, (uint64_t)dotp, \
+                                        temp, (0x01ull << 63));        \
+    }                                                                  \
+                                                                       \
+    if (overflow) {                                                    \
+        temp63 = (temp >> 63) & 0x01;                                  \
+        if (temp63 == 1) {                                             \
+            temp = (0x01ull << 63) - 1;                                \
+        } else {                                                       \
+            temp = 0x01ull << 63;                                      \
+        }                                                              \
+                                                                       \
+        set_DSPControl_overflow_flag(1, 16 + ac, env);                 \
+    }                                                                  \
+                                                                       \
+    env->active_tc.HI[ac] = (target_long)(int32_t)                     \
+        ((temp & MIPSDSP_LHI) >> 32);                                  \
+    env->active_tc.LO[ac] = (target_long)(int32_t)                     \
+        (temp & MIPSDSP_LLO);                                          \
+}
+
+DP_L_W(dpaq_sa_l_w, 1);
+DP_L_W(dpsq_sa_l_w, 0);
+
+#undef DP_L_W
+
+#if defined(TARGET_MIPS64)
+#define DP_L_PW(name, func) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac, \
+                   CPUMIPSState *env)                             \
+{                                                                 \
+    int32_t rs1, rs0;                                             \
+    int32_t rt1, rt0;                                             \
+    int64_t tempB[2], tempA[2];                                   \
+    int64_t temp[2];                                              \
+    int64_t acc[2];                                               \
+    int64_t temp_sum;                                             \
+                                                                  \
+    temp[0] = 0;                                                  \
+    temp[1] = 0;                                                  \
+                                                                  \
+    MIPSDSP_SPLIT64_32(rs, rs1, rs0);                             \
+    MIPSDSP_SPLIT64_32(rt, rt1, rt0);                             \
+                                                                  \
+    tempB[0] = mipsdsp_mul_q31_q31(ac, rs1, rt1, env);            \
+    tempA[0] = mipsdsp_mul_q31_q31(ac, rs0, rt0, env);            \
+                                                                  \
+    if (tempB[0] >= 0) {                                          \
+        tempB[1] = 0x00;                                          \
+    } else {                                                      \
+        tempB[1] = ~0ull;                                         \
+    }                                                             \
+                                                                  \
+    if (tempA[0] >= 0) {                                          \
+        tempA[1] = 0x00;                                          \
+    } else {                                                      \
+        tempA[1] = ~0ull;                                         \
+    }                                                             \
+                                                                  \
+    temp_sum = tempB[0] + tempA[0];                               \
+    if (((uint64_t)temp_sum < (uint64_t)tempB[0]) &&              \
+        ((uint64_t)temp_sum < (uint64_t)tempA[0])) {              \
+        temp[1] += 1;                                             \
+    }                                                             \
+    temp[0] = temp_sum;                                           \
+    temp[1] += tempB[1] + tempA[1];                               \
+                                                                  \
+    mipsdsp_##func(acc, ac, temp, env);                           \
+                                                                  \
+    env->active_tc.HI[ac] = acc[1];                               \
+    env->active_tc.LO[ac] = acc[0];                               \
+}
+
+DP_L_PW(dpaq_sa_l_pw, sat64_acc_add_q63);
+DP_L_PW(dpsq_sa_l_pw, sat64_acc_sub_q63);
+
+#undef DP_L_PW
+
+void helper_mulsaq_s_l_pw(target_ulong rs, target_ulong rt, uint32_t ac,
+                          CPUMIPSState *env)
+{
+    int32_t rs1, rs0;
+    int32_t rt1, rt0;
+    int64_t tempB[2], tempA[2];
+    int64_t temp[2];
+    int64_t acc[2];
+    int64_t temp_sum;
+
+    rs1 = (rs >> 32) & MIPSDSP_LLO;
+    rs0 = rs & MIPSDSP_LLO;
+    rt1 = (rt >> 32) & MIPSDSP_LLO;
+    rt0 = rt & MIPSDSP_LLO;
+
+    tempB[0] = mipsdsp_mul_q31_q31(ac, rs1, rt1, env);
+    tempA[0] = mipsdsp_mul_q31_q31(ac, rs0, rt0, env);
+
+    if (tempB[0] >= 0) {
+        tempB[1] = 0x00;
+    } else {
+        tempB[1] = ~0ull;
+    }
+
+    if (tempA[0] >= 0) {
+        tempA[1] = 0x00;
+    } else {
+        tempA[1] = ~0ull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = tempB[0] - tempA[0];
+    if ((uint64_t)temp_sum > (uint64_t)tempB[0]) {
+        tempB[1] -= 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] = tempB[1] - tempA[1];
+
+    if ((temp[1] & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = ~0ull;
+    }
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
+        acc[1] += 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] += temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+#endif
+
+#define MAQ_S_W(name, mov) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt, \
+                   CPUMIPSState *env)                             \
+{                                                                 \
+    int16_t rsh, rth;                                             \
+    int32_t tempA;                                                \
+    int64_t tempL, acc;                                           \
+                                                                  \
+    rsh = (rs >> mov) & MIPSDSP_LO;                               \
+    rth = (rt >> mov) & MIPSDSP_LO;                               \
+    tempA  = mipsdsp_mul_q15_q15(ac, rsh, rth, env);              \
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |               \
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);        \
+    tempL  = (int64_t)tempA + acc;                                \
+    env->active_tc.HI[ac] = (target_long)(int32_t)                \
+        ((tempL & MIPSDSP_LHI) >> 32);                            \
+    env->active_tc.LO[ac] = (target_long)(int32_t)                \
+        (tempL & MIPSDSP_LLO);                                    \
+}
+
+MAQ_S_W(maq_s_w_phl, 16);
+MAQ_S_W(maq_s_w_phr, 0);
+
+#undef MAQ_S_W
+
+#define MAQ_SA_W(name, mov) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt,        \
+                   CPUMIPSState *env)                                    \
+{                                                                        \
+    int16_t rsh, rth;                                                    \
+    int32_t tempA;                                                       \
+                                                                         \
+    rsh = (rs >> mov) & MIPSDSP_LO;                                      \
+    rth = (rt >> mov) & MIPSDSP_LO;                                      \
+    tempA = mipsdsp_mul_q15_q15(ac, rsh, rth, env);                      \
+    tempA = mipsdsp_sat32_acc_q31(ac, tempA, env);                       \
+                                                                         \
+    env->active_tc.HI[ac] = (target_long)(int32_t)(((int64_t)tempA &     \
+                                                    MIPSDSP_LHI) >> 32); \
+    env->active_tc.LO[ac] = (target_long)(int32_t)((int64_t)tempA &      \
+                                                   MIPSDSP_LLO);         \
+}
+
+MAQ_SA_W(maq_sa_w_phl, 16);
+MAQ_SA_W(maq_sa_w_phr, 0);
+
+#undef MAQ_SA_W
+
+#define MULQ_W(name, addvar) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt,   \
+                           CPUMIPSState *env)                  \
+{                                                              \
+    int32_t rs_t, rt_t;                                        \
+    int32_t tempI;                                             \
+    int64_t tempL;                                             \
+                                                               \
+    rs_t = rs & MIPSDSP_LLO;                                   \
+    rt_t = rt & MIPSDSP_LLO;                                   \
+                                                               \
+    if ((rs_t == 0x80000000) && (rt_t == 0x80000000)) {        \
+        tempL = 0x7FFFFFFF00000000ull;                         \
+        set_DSPControl_overflow_flag(1, 21, env);              \
+    } else {                                                   \
+        tempL  = ((int64_t)rs_t * (int64_t)rt_t) << 1;         \
+        tempL += addvar;                                       \
+    }                                                          \
+    tempI = (tempL & MIPSDSP_LHI) >> 32;                       \
+                                                               \
+    return (target_long)(int32_t)tempI;                        \
+}
+
+MULQ_W(mulq_s_w, 0);
+MULQ_W(mulq_rs_w, 0x80000000ull);
+
+#undef MULQ_W
+
+#if defined(TARGET_MIPS64)
+
+#define MAQ_S_W_QH(name, mov) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac, \
+                   CPUMIPSState *env)                             \
+{                                                                 \
+    int16_t rs_t, rt_t;                                           \
+    int32_t temp_mul;                                             \
+    int64_t temp[2];                                              \
+    int64_t acc[2];                                               \
+    int64_t temp_sum;                                             \
+                                                                  \
+    temp[0] = 0;                                                  \
+    temp[1] = 0;                                                  \
+                                                                  \
+    rs_t = (rs >> mov) & MIPSDSP_LO;                              \
+    rt_t = (rt >> mov) & MIPSDSP_LO;                              \
+    temp_mul = mipsdsp_mul_q15_q15(ac, rs_t, rt_t, env);          \
+                                                                  \
+    temp[0] = (int64_t)temp_mul;                                  \
+    if (temp[0] >= 0) {                                           \
+        temp[1] = 0x00;                                           \
+    } else {                                                      \
+        temp[1] = ~0ull;                                          \
+    }                                                             \
+                                                                  \
+    acc[0] = env->active_tc.LO[ac];                               \
+    acc[1] = env->active_tc.HI[ac];                               \
+                                                                  \
+    temp_sum = acc[0] + temp[0];                                  \
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&                \
+        ((uint64_t)temp_sum < (uint64_t)temp[0])) {               \
+        acc[1] += 1;                                              \
+    }                                                             \
+    acc[0] = temp_sum;                                            \
+    acc[1] += temp[1];                                            \
+                                                                  \
+    env->active_tc.HI[ac] = acc[1];                               \
+    env->active_tc.LO[ac] = acc[0];                               \
+}
+
+MAQ_S_W_QH(maq_s_w_qhll, 48);
+MAQ_S_W_QH(maq_s_w_qhlr, 32);
+MAQ_S_W_QH(maq_s_w_qhrl, 16);
+MAQ_S_W_QH(maq_s_w_qhrr, 0);
+
+#undef MAQ_S_W_QH
+
+#define MAQ_SA_W(name, mov) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac, \
+                   CPUMIPSState *env)                             \
+{                                                                 \
+    int16_t rs_t, rt_t;                                           \
+    int32_t temp;                                                 \
+    int64_t acc[2];                                               \
+                                                                  \
+    rs_t = (rs >> mov) & MIPSDSP_LO;                              \
+    rt_t = (rt >> mov) & MIPSDSP_LO;                              \
+    temp = mipsdsp_mul_q15_q15(ac, rs_t, rt_t, env);              \
+    temp = mipsdsp_sat32_acc_q31(ac, temp, env);                  \
+                                                                  \
+    acc[0] = (int64_t)(int32_t)temp;                              \
+    if (acc[0] >= 0) {                                            \
+        acc[1] = 0x00;                                            \
+    } else {                                                      \
+        acc[1] = ~0ull;                                           \
+    }                                                             \
+                                                                  \
+    env->active_tc.HI[ac] = acc[1];                               \
+    env->active_tc.LO[ac] = acc[0];                               \
+}
+
+MAQ_SA_W(maq_sa_w_qhll, 48);
+MAQ_SA_W(maq_sa_w_qhlr, 32);
+MAQ_SA_W(maq_sa_w_qhrl, 16);
+MAQ_SA_W(maq_sa_w_qhrr, 0);
+
+#undef MAQ_SA_W
+
+#define MAQ_S_L_PW(name, mov) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac, \
+                   CPUMIPSState *env)                             \
+{                                                                 \
+    int32_t rs_t, rt_t;                                           \
+    int64_t temp[2];                                              \
+    int64_t acc[2];                                               \
+    int64_t temp_sum;                                             \
+                                                                  \
+    temp[0] = 0;                                                  \
+    temp[1] = 0;                                                  \
+                                                                  \
+    rs_t = (rs >> mov) & MIPSDSP_LLO;                             \
+    rt_t = (rt >> mov) & MIPSDSP_LLO;                             \
+                                                                  \
+    temp[0] = mipsdsp_mul_q31_q31(ac, rs_t, rt_t, env);           \
+    if (temp[0] >= 0) {                                           \
+        temp[1] = 0x00;                                           \
+    } else {                                                      \
+        temp[1] = ~0ull;                                          \
+    }                                                             \
+                                                                  \
+    acc[0] = env->active_tc.LO[ac];                               \
+    acc[1] = env->active_tc.HI[ac];                               \
+                                                                  \
+    temp_sum = acc[0] + temp[0];                                  \
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&                \
+        ((uint64_t)temp_sum < (uint64_t)temp[0])) {               \
+        acc[1] += 1;                                              \
+    }                                                             \
+    acc[0] = temp_sum;                                            \
+    acc[1] += temp[1];                                            \
+                                                                  \
+    env->active_tc.HI[ac] = acc[1];                               \
+    env->active_tc.LO[ac] = acc[0];                               \
+}
+
+MAQ_S_L_PW(maq_s_l_pwl, 32);
+MAQ_S_L_PW(maq_s_l_pwr, 0);
+
+#undef MAQ_S_L_PW
+
+#define DM_OPERATE(name, func, is_add, sigext) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac,    \
+                  CPUMIPSState *env)                                 \
+{                                                                    \
+    int32_t rs1, rs0;                                                \
+    int32_t rt1, rt0;                                                \
+    int64_t tempBL[2], tempAL[2];                                    \
+    int64_t acc[2];                                                  \
+    int64_t temp[2];                                                 \
+    int64_t temp_sum;                                                \
+                                                                     \
+    temp[0] = 0x00;                                                  \
+    temp[1] = 0x00;                                                  \
+                                                                     \
+    MIPSDSP_SPLIT64_32(rs, rs1, rs0);                                \
+    MIPSDSP_SPLIT64_32(rt, rt1, rt0);                                \
+                                                                     \
+    if (sigext) {                                                    \
+        tempBL[0] = (int64_t)mipsdsp_##func(rs1, rt1);               \
+        tempAL[0] = (int64_t)mipsdsp_##func(rs0, rt0);               \
+                                                                     \
+        if (tempBL[0] >= 0) {                                        \
+            tempBL[1] = 0x0;                                         \
+        } else {                                                     \
+            tempBL[1] = ~0ull;                                       \
+        }                                                            \
+                                                                     \
+        if (tempAL[0] >= 0) {                                        \
+            tempAL[1] = 0x0;                                         \
+        } else {                                                     \
+            tempAL[1] = ~0ull;                                       \
+        }                                                            \
+    } else {                                                         \
+        tempBL[0] = mipsdsp_##func(rs1, rt1);                        \
+        tempAL[0] = mipsdsp_##func(rs0, rt0);                        \
+        tempBL[1] = 0;                                               \
+        tempAL[1] = 0;                                               \
+    }                                                                \
+                                                                     \
+    acc[1] = env->active_tc.HI[ac];                                  \
+    acc[0] = env->active_tc.LO[ac];                                  \
+                                                                     \
+    temp_sum = tempBL[0] + tempAL[0];                                \
+    if (((uint64_t)temp_sum < (uint64_t)tempBL[0]) &&                \
+        ((uint64_t)temp_sum < (uint64_t)tempAL[0])) {                \
+        temp[1] += 1;                                                \
+    }                                                                \
+    temp[0] = temp_sum;                                              \
+    temp[1] += tempBL[1] + tempAL[1];                                \
+                                                                     \
+    if (is_add) {                                                    \
+        temp_sum = acc[0] + temp[0];                                 \
+        if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&               \
+            ((uint64_t)temp_sum < (uint64_t)temp[0])) {              \
+            acc[1] += 1;                                             \
+        }                                                            \
+        temp[0] = temp_sum;                                          \
+        temp[1] = acc[1] + temp[1];                                  \
+    } else {                                                         \
+        temp_sum = acc[0] - temp[0];                                 \
+        if ((uint64_t)temp_sum > (uint64_t)acc[0]) {                 \
+            acc[1] -= 1;                                             \
+        }                                                            \
+        temp[0] = temp_sum;                                          \
+        temp[1] = acc[1] - temp[1];                                  \
+    }                                                                \
+                                                                     \
+    env->active_tc.HI[ac] = temp[1];                                 \
+    env->active_tc.LO[ac] = temp[0];                                 \
+}
+
+DM_OPERATE(dmadd, mul_i32_i32, 1, 1);
+DM_OPERATE(dmaddu, mul_u32_u32, 1, 0);
+DM_OPERATE(dmsub, mul_i32_i32, 0, 1);
+DM_OPERATE(dmsubu, mul_u32_u32, 0, 0);
+#undef DM_OPERATE
+#endif
+
+/** DSP Bit/Manipulation Sub-class insns **/
+target_ulong helper_bitrev(target_ulong rt)
+{
+    int32_t temp;
+    uint32_t rd;
+    int i;
+
+    temp = rt & MIPSDSP_LO;
+    rd = 0;
+    for (i = 0; i < 16; i++) {
+        rd = (rd << 1) | (temp & 1);
+        temp = temp >> 1;
+    }
+
+    return (target_ulong)rd;
+}
+
+#define BIT_INSV(name, posfilter, ret_type)                     \
+target_ulong helper_##name(CPUMIPSState *env, target_ulong rs,  \
+                           target_ulong rt)                     \
+{                                                               \
+    uint32_t pos, size, msb, lsb;                               \
+    uint32_t const sizefilter = 0x3F;                           \
+    target_ulong temp;                                          \
+    target_ulong dspc;                                          \
+                                                                \
+    dspc = env->active_tc.DSPControl;                           \
+                                                                \
+    pos  = dspc & posfilter;                                    \
+    size = (dspc >> 7) & sizefilter;                            \
+                                                                \
+    msb  = pos + size - 1;                                      \
+    lsb  = pos;                                                 \
+                                                                \
+    if (lsb > msb || (msb > TARGET_LONG_BITS)) {                \
+        return rt;                                              \
+    }                                                           \
+                                                                \
+    temp = deposit64(rt, pos, size, rs);                        \
+                                                                \
+    return (target_long)(ret_type)temp;                         \
+}
+
+BIT_INSV(insv, 0x1F, int32_t);
+#ifdef TARGET_MIPS64
+BIT_INSV(dinsv, 0x7F, target_long);
+#endif
+
+#undef BIT_INSV
+
+
+/** DSP Compare-Pick Sub-class insns **/
+#define CMP_HAS_RET(name, func, split_num, filter, bit_size) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt) \
+{                                                       \
+    uint32_t rs_t, rt_t;                                \
+    uint8_t cc;                                         \
+    uint32_t temp = 0;                                  \
+    int i;                                              \
+                                                        \
+    for (i = 0; i < split_num; i++) {                   \
+        rs_t = (rs >> (bit_size * i)) & filter;         \
+        rt_t = (rt >> (bit_size * i)) & filter;         \
+        cc = mipsdsp_##func(rs_t, rt_t);                \
+        temp |= cc << i;                                \
+    }                                                   \
+                                                        \
+    return (target_ulong)temp;                          \
+}
+
+CMP_HAS_RET(cmpgu_eq_qb, cmpu_eq, 4, MIPSDSP_Q0, 8);
+CMP_HAS_RET(cmpgu_lt_qb, cmpu_lt, 4, MIPSDSP_Q0, 8);
+CMP_HAS_RET(cmpgu_le_qb, cmpu_le, 4, MIPSDSP_Q0, 8);
+
+#ifdef TARGET_MIPS64
+CMP_HAS_RET(cmpgu_eq_ob, cmpu_eq, 8, MIPSDSP_Q0, 8);
+CMP_HAS_RET(cmpgu_lt_ob, cmpu_lt, 8, MIPSDSP_Q0, 8);
+CMP_HAS_RET(cmpgu_le_ob, cmpu_le, 8, MIPSDSP_Q0, 8);
+#endif
+
+#undef CMP_HAS_RET
+
+
+#define CMP_NO_RET(name, func, split_num, filter, bit_size) \
+void helper_##name(target_ulong rs, target_ulong rt,        \
+                            CPUMIPSState *env)              \
+{                                                           \
+    int##bit_size##_t rs_t, rt_t;                           \
+    int##bit_size##_t flag = 0;                             \
+    int##bit_size##_t cc;                                   \
+    int i;                                                  \
+                                                            \
+    for (i = 0; i < split_num; i++) {                       \
+        rs_t = (rs >> (bit_size * i)) & filter;             \
+        rt_t = (rt >> (bit_size * i)) & filter;             \
+                                                            \
+        cc = mipsdsp_##func((int32_t)rs_t, (int32_t)rt_t);  \
+        flag |= cc << i;                                    \
+    }                                                       \
+                                                            \
+    set_DSPControl_24(flag, split_num, env);                \
+}
+
+CMP_NO_RET(cmpu_eq_qb, cmpu_eq, 4, MIPSDSP_Q0, 8);
+CMP_NO_RET(cmpu_lt_qb, cmpu_lt, 4, MIPSDSP_Q0, 8);
+CMP_NO_RET(cmpu_le_qb, cmpu_le, 4, MIPSDSP_Q0, 8);
+
+CMP_NO_RET(cmp_eq_ph, cmp_eq, 2, MIPSDSP_LO, 16);
+CMP_NO_RET(cmp_lt_ph, cmp_lt, 2, MIPSDSP_LO, 16);
+CMP_NO_RET(cmp_le_ph, cmp_le, 2, MIPSDSP_LO, 16);
+
+#ifdef TARGET_MIPS64
+CMP_NO_RET(cmpu_eq_ob, cmpu_eq, 8, MIPSDSP_Q0, 8);
+CMP_NO_RET(cmpu_lt_ob, cmpu_lt, 8, MIPSDSP_Q0, 8);
+CMP_NO_RET(cmpu_le_ob, cmpu_le, 8, MIPSDSP_Q0, 8);
+
+CMP_NO_RET(cmp_eq_qh, cmp_eq, 4, MIPSDSP_LO, 16);
+CMP_NO_RET(cmp_lt_qh, cmp_lt, 4, MIPSDSP_LO, 16);
+CMP_NO_RET(cmp_le_qh, cmp_le, 4, MIPSDSP_LO, 16);
+
+CMP_NO_RET(cmp_eq_pw, cmp_eq, 2, MIPSDSP_LLO, 32);
+CMP_NO_RET(cmp_lt_pw, cmp_lt, 2, MIPSDSP_LLO, 32);
+CMP_NO_RET(cmp_le_pw, cmp_le, 2, MIPSDSP_LLO, 32);
+#endif
+#undef CMP_NO_RET
+
+#if defined(TARGET_MIPS64)
+
+#define CMPGDU_OB(name) \
+target_ulong helper_cmpgdu_##name##_ob(target_ulong rs, target_ulong rt, \
+                                       CPUMIPSState *env)  \
+{                                                     \
+    int i;                                            \
+    uint8_t rs_t, rt_t;                               \
+    uint32_t cond;                                    \
+                                                      \
+    cond = 0;                                         \
+                                                      \
+    for (i = 0; i < 8; i++) {                         \
+        rs_t = (rs >> (8 * i)) & MIPSDSP_Q0;          \
+        rt_t = (rt >> (8 * i)) & MIPSDSP_Q0;          \
+                                                      \
+        if (mipsdsp_cmpu_##name(rs_t, rt_t)) {        \
+            cond |= 0x01 << i;                        \
+        }                                             \
+    }                                                 \
+                                                      \
+    set_DSPControl_24(cond, 8, env);                  \
+                                                      \
+    return (uint64_t)cond;                            \
+}
+
+CMPGDU_OB(eq)
+CMPGDU_OB(lt)
+CMPGDU_OB(le)
+#undef CMPGDU_OB
+#endif
+
+#define PICK_INSN(name, split_num, filter, bit_size, ret32bit) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt,   \
+                            CPUMIPSState *env)                 \
+{                                                              \
+    uint32_t rs_t, rt_t;                                       \
+    uint32_t cc;                                               \
+    target_ulong dsp;                                          \
+    int i;                                                     \
+    target_ulong result = 0;                                   \
+                                                               \
+    dsp = env->active_tc.DSPControl;                           \
+    for (i = 0; i < split_num; i++) {                          \
+        rs_t = (rs >> (bit_size * i)) & filter;                \
+        rt_t = (rt >> (bit_size * i)) & filter;                \
+        cc = (dsp >> (24 + i)) & 0x01;                         \
+        cc = cc == 1 ? rs_t : rt_t;                            \
+                                                               \
+        result |= (target_ulong)cc << (bit_size * i);          \
+    }                                                          \
+                                                               \
+    if (ret32bit) {                                            \
+        result = (target_long)(int32_t)(result & MIPSDSP_LLO); \
+    }                                                          \
+                                                               \
+    return result;                                             \
+}
+
+PICK_INSN(pick_qb, 4, MIPSDSP_Q0, 8, 1);
+PICK_INSN(pick_ph, 2, MIPSDSP_LO, 16, 1);
+
+#ifdef TARGET_MIPS64
+PICK_INSN(pick_ob, 8, MIPSDSP_Q0, 8, 0);
+PICK_INSN(pick_qh, 4, MIPSDSP_LO, 16, 0);
+PICK_INSN(pick_pw, 2, MIPSDSP_LLO, 32, 0);
+#endif
+#undef PICK_INSN
+
+target_ulong helper_packrl_ph(target_ulong rs, target_ulong rt)
+{
+    uint32_t rsl, rth;
+
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+
+    return (target_long)(int32_t)((rsl << 16) | rth);
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_packrl_pw(target_ulong rs, target_ulong rt)
+{
+    uint32_t rs0, rt1;
+
+    rs0 = rs & MIPSDSP_LLO;
+    rt1 = (rt >> 32) & MIPSDSP_LLO;
+
+    return ((uint64_t)rs0 << 32) | (uint64_t)rt1;
+}
+#endif
+
+/** DSP Accumulator and DSPControl Access Sub-class insns **/
+target_ulong helper_extr_w(target_ulong ac, target_ulong shift,
+                           CPUMIPSState *env)
+{
+    int32_t tempI;
+    int64_t tempDL[2];
+
+    shift = shift & 0x1F;
+
+    mipsdsp_rndrashift_short_acc(tempDL, ac, shift, env);
+    if ((tempDL[1] != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
+        (tempDL[1] != 1 || (tempDL[0] & MIPSDSP_LHI) != MIPSDSP_LHI)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    tempI = (tempDL[0] >> 1) & MIPSDSP_LLO;
+
+    tempDL[0] += 1;
+    if (tempDL[0] == 0) {
+        tempDL[1] += 1;
+    }
+
+    if (((tempDL[1] & 0x01) != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
+        ((tempDL[1] & 0x01) != 1 || (tempDL[0] & MIPSDSP_LHI) != MIPSDSP_LHI)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (target_long)tempI;
+}
+
+target_ulong helper_extr_r_w(target_ulong ac, target_ulong shift,
+                             CPUMIPSState *env)
+{
+    int64_t tempDL[2];
+
+    shift = shift & 0x1F;
+
+    mipsdsp_rndrashift_short_acc(tempDL, ac, shift, env);
+    if ((tempDL[1] != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
+        (tempDL[1] != 1 || (tempDL[0] & MIPSDSP_LHI) != MIPSDSP_LHI)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    tempDL[0] += 1;
+    if (tempDL[0] == 0) {
+        tempDL[1] += 1;
+    }
+
+    if (((tempDL[1] & 0x01) != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
+        ((tempDL[1] & 0x01) != 1 || (tempDL[0] & MIPSDSP_LHI) != MIPSDSP_LHI)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (target_long)(int32_t)(tempDL[0] >> 1);
+}
+
+target_ulong helper_extr_rs_w(target_ulong ac, target_ulong shift,
+                              CPUMIPSState *env)
+{
+    int32_t tempI, temp64;
+    int64_t tempDL[2];
+
+    shift = shift & 0x1F;
+
+    mipsdsp_rndrashift_short_acc(tempDL, ac, shift, env);
+    if ((tempDL[1] != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
+        (tempDL[1] != 1 || (tempDL[0] & MIPSDSP_LHI) != MIPSDSP_LHI)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+    tempDL[0] += 1;
+    if (tempDL[0] == 0) {
+        tempDL[1] += 1;
+    }
+    tempI = tempDL[0] >> 1;
+
+    if (((tempDL[1] & 0x01) != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
+        ((tempDL[1] & 0x01) != 1 || (tempDL[0] & MIPSDSP_LHI) != MIPSDSP_LHI)) {
+        temp64 = tempDL[1] & 0x01;
+        if (temp64 == 0) {
+            tempI = 0x7FFFFFFF;
+        } else {
+            tempI = 0x80000000;
+        }
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (target_long)tempI;
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_dextr_w(target_ulong ac, target_ulong shift,
+                            CPUMIPSState *env)
+{
+    uint64_t temp[3];
+
+    shift = shift & 0x3F;
+
+    mipsdsp_rndrashift_acc(temp, ac, shift, env);
+
+    return (int64_t)(int32_t)(temp[0] >> 1);
+}
+
+target_ulong helper_dextr_r_w(target_ulong ac, target_ulong shift,
+                              CPUMIPSState *env)
+{
+    uint64_t temp[3];
+    uint32_t temp128;
+
+    shift = shift & 0x3F;
+    mipsdsp_rndrashift_acc(temp, ac, shift, env);
+
+    temp[0] += 1;
+    if (temp[0] == 0) {
+        temp[1] += 1;
+        if (temp[1] == 0) {
+            temp[2] += 1;
+        }
+    }
+
+    temp128 = temp[2] & 0x01;
+
+    if ((temp128 != 0 || temp[1] != 0) &&
+       (temp128 != 1 || temp[1] != ~0ull)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (int64_t)(int32_t)(temp[0] >> 1);
+}
+
+target_ulong helper_dextr_rs_w(target_ulong ac, target_ulong shift,
+                               CPUMIPSState *env)
+{
+    uint64_t temp[3];
+    uint32_t temp128;
+
+    shift = shift & 0x3F;
+    mipsdsp_rndrashift_acc(temp, ac, shift, env);
+
+    temp[0] += 1;
+    if (temp[0] == 0) {
+        temp[1] += 1;
+        if (temp[1] == 0) {
+            temp[2] += 1;
+        }
+    }
+
+    temp128 = temp[2] & 0x01;
+
+    if ((temp128 != 0 || temp[1] != 0) &&
+       (temp128 != 1 || temp[1] != ~0ull)) {
+        if (temp128 == 0) {
+            temp[0] = 0x0FFFFFFFF;
+        } else {
+            temp[0] = 0x0100000000ULL;
+        }
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (int64_t)(int32_t)(temp[0] >> 1);
+}
+
+target_ulong helper_dextr_l(target_ulong ac, target_ulong shift,
+                            CPUMIPSState *env)
+{
+    uint64_t temp[3];
+    target_ulong result;
+
+    shift = shift & 0x3F;
+
+    mipsdsp_rndrashift_acc(temp, ac, shift, env);
+    result = (temp[1] << 63) | (temp[0] >> 1);
+
+    return result;
+}
+
+target_ulong helper_dextr_r_l(target_ulong ac, target_ulong shift,
+                              CPUMIPSState *env)
+{
+    uint64_t temp[3];
+    uint32_t temp128;
+    target_ulong result;
+
+    shift = shift & 0x3F;
+    mipsdsp_rndrashift_acc(temp, ac, shift, env);
+
+    temp[0] += 1;
+    if (temp[0] == 0) {
+        temp[1] += 1;
+        if (temp[1] == 0) {
+            temp[2] += 1;
+        }
+    }
+
+    temp128 = temp[2] & 0x01;
+
+    if ((temp128 != 0 || temp[1] != 0) &&
+       (temp128 != 1 || temp[1] != ~0ull)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    result = (temp[1] << 63) | (temp[0] >> 1);
+
+    return result;
+}
+
+target_ulong helper_dextr_rs_l(target_ulong ac, target_ulong shift,
+                               CPUMIPSState *env)
+{
+    uint64_t temp[3];
+    uint32_t temp128;
+    target_ulong result;
+
+    shift = shift & 0x3F;
+    mipsdsp_rndrashift_acc(temp, ac, shift, env);
+
+    temp[0] += 1;
+    if (temp[0] == 0) {
+        temp[1] += 1;
+        if (temp[1] == 0) {
+            temp[2] += 1;
+        }
+    }
+
+    temp128 = temp[2] & 0x01;
+
+    if ((temp128 != 0 || temp[1] != 0) &&
+       (temp128 != 1 || temp[1] != ~0ull)) {
+        if (temp128 == 0) {
+            temp[1] &= ~0x00ull - 1;
+            temp[0] |= ~0x00ull - 1;
+        } else {
+            temp[1] |= 0x01;
+            temp[0] &= 0x01;
+        }
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+    result = (temp[1] << 63) | (temp[0] >> 1);
+
+    return result;
+}
+#endif
+
+target_ulong helper_extr_s_h(target_ulong ac, target_ulong shift,
+                             CPUMIPSState *env)
+{
+    int64_t temp, acc;
+
+    shift = shift & 0x1F;
+
+    acc = ((int64_t)env->active_tc.HI[ac] << 32) |
+          ((int64_t)env->active_tc.LO[ac] & 0xFFFFFFFF);
+
+    temp = acc >> shift;
+
+    if (temp > (int64_t)0x7FFF) {
+        temp = 0x00007FFF;
+        set_DSPControl_overflow_flag(1, 23, env);
+    } else if (temp < (int64_t)0xFFFFFFFFFFFF8000ULL) {
+        temp = 0xFFFF8000;
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (target_long)(int32_t)(temp & 0xFFFFFFFF);
+}
+
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_dextr_s_h(target_ulong ac, target_ulong shift,
+                              CPUMIPSState *env)
+{
+    int64_t temp[2];
+    uint32_t temp127;
+
+    shift = shift & 0x1F;
+
+    mipsdsp_rashift_acc((uint64_t *)temp, ac, shift, env);
+
+    temp127 = (temp[1] >> 63) & 0x01;
+
+    if ((temp127 == 0) && (temp[1] > 0 || temp[0] > 32767)) {
+        temp[0] &= 0xFFFF0000;
+        temp[0] |= 0x00007FFF;
+        set_DSPControl_overflow_flag(1, 23, env);
+    } else if ((temp127 == 1) &&
+            (temp[1] < 0xFFFFFFFFFFFFFFFFll
+             || temp[0] < 0xFFFFFFFFFFFF1000ll)) {
+        temp[0] &= 0xFFFF0000;
+        temp[0] |= 0x00008000;
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (int64_t)(int16_t)(temp[0] & MIPSDSP_LO);
+}
+
+#endif
+
+target_ulong helper_extp(target_ulong ac, target_ulong size, CPUMIPSState *env)
+{
+    int32_t start_pos;
+    int sub;
+    uint32_t temp;
+    uint64_t acc;
+
+    size = size & 0x1F;
+
+    temp = 0;
+    start_pos = get_DSPControl_pos(env);
+    sub = start_pos - (size + 1);
+    if (sub >= -1) {
+        acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
+              ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+        temp = (acc >> (start_pos - size)) & (~0U >> (31 - size));
+        set_DSPControl_efi(0, env);
+    } else {
+        set_DSPControl_efi(1, env);
+    }
+
+    return (target_ulong)temp;
+}
+
+target_ulong helper_extpdp(target_ulong ac, target_ulong size,
+                           CPUMIPSState *env)
+{
+    int32_t start_pos;
+    int sub;
+    uint32_t temp;
+    uint64_t acc;
+
+    size = size & 0x1F;
+    temp = 0;
+    start_pos = get_DSPControl_pos(env);
+    sub = start_pos - (size + 1);
+    if (sub >= -1) {
+        acc  = ((uint64_t)env->active_tc.HI[ac] << 32) |
+               ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+        temp = extract64(acc, start_pos - size, size + 1);
+
+        set_DSPControl_pos(sub, env);
+        set_DSPControl_efi(0, env);
+    } else {
+        set_DSPControl_efi(1, env);
+    }
+
+    return (target_ulong)temp;
+}
+
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_dextp(target_ulong ac, target_ulong size, CPUMIPSState *env)
+{
+    int start_pos;
+    int len;
+    int sub;
+    uint64_t tempB, tempA;
+    uint64_t temp;
+
+    temp = 0;
+
+    size = size & 0x3F;
+    start_pos = get_DSPControl_pos(env);
+    len = start_pos - size;
+    tempB = env->active_tc.HI[ac];
+    tempA = env->active_tc.LO[ac];
+
+    sub = start_pos - (size + 1);
+
+    if (sub >= -1) {
+        temp = (tempB << (64 - len)) | (tempA >> len);
+        temp = temp & ((0x01 << (size + 1)) - 1);
+        set_DSPControl_efi(0, env);
+    } else {
+        set_DSPControl_efi(1, env);
+    }
+
+    return temp;
+}
+
+target_ulong helper_dextpdp(target_ulong ac, target_ulong size,
+                            CPUMIPSState *env)
+{
+    int start_pos;
+    int len;
+    int sub;
+    uint64_t tempB, tempA;
+    uint64_t temp;
+
+    temp = 0;
+    size = size & 0x3F;
+    start_pos = get_DSPControl_pos(env);
+    len = start_pos - size;
+    tempB = env->active_tc.HI[ac];
+    tempA = env->active_tc.LO[ac];
+
+    sub = start_pos - (size + 1);
+
+    if (sub >= -1) {
+        temp = (tempB << (64 - len)) | (tempA >> len);
+        temp = temp & ((0x01 << (size + 1)) - 1);
+        set_DSPControl_pos(sub, env);
+        set_DSPControl_efi(0, env);
+    } else {
+        set_DSPControl_efi(1, env);
+    }
+
+    return temp;
+}
+
+#endif
+
+void helper_shilo(target_ulong ac, target_ulong rs, CPUMIPSState *env)
+{
+    int8_t  rs5_0;
+    uint64_t temp, acc;
+
+    rs5_0 = rs & 0x3F;
+    rs5_0 = (int8_t)(rs5_0 << 2) >> 2;
+
+    if (unlikely(rs5_0 == 0)) {
+        return;
+    }
+
+    acc   = (((uint64_t)env->active_tc.HI[ac] << 32) & MIPSDSP_LHI) |
+            ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+
+    if (rs5_0 > 0) {
+        temp = acc >> rs5_0;
+    } else {
+        temp = acc << -rs5_0;
+    }
+
+    env->active_tc.HI[ac] = (target_ulong)(int32_t)((temp & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_ulong)(int32_t)(temp & MIPSDSP_LLO);
+}
+
+#if defined(TARGET_MIPS64)
+void helper_dshilo(target_ulong shift, target_ulong ac, CPUMIPSState *env)
+{
+    int8_t shift_t;
+    uint64_t tempB, tempA;
+
+    shift_t = (int8_t)(shift << 1) >> 1;
+
+    tempB = env->active_tc.HI[ac];
+    tempA = env->active_tc.LO[ac];
+
+    if (shift_t != 0) {
+        if (shift_t >= 0) {
+            tempA = (tempB << (64 - shift_t)) | (tempA >> shift_t);
+            tempB = tempB >> shift_t;
+        } else {
+            shift_t = -shift_t;
+            tempB = (tempB << shift_t) | (tempA >> (64 - shift_t));
+            tempA = tempA << shift_t;
+        }
+    }
+
+    env->active_tc.HI[ac] = tempB;
+    env->active_tc.LO[ac] = tempA;
+}
+
+#endif
+void helper_mthlip(target_ulong ac, target_ulong rs, CPUMIPSState *env)
+{
+    int32_t tempA, tempB, pos;
+
+    tempA = rs;
+    tempB = env->active_tc.LO[ac];
+    env->active_tc.HI[ac] = (target_long)tempB;
+    env->active_tc.LO[ac] = (target_long)tempA;
+    pos = get_DSPControl_pos(env);
+
+    if (pos > 32) {
+        return;
+    } else {
+        set_DSPControl_pos(pos + 32, env);
+    }
+}
+
+#if defined(TARGET_MIPS64)
+void helper_dmthlip(target_ulong rs, target_ulong ac, CPUMIPSState *env)
+{
+    uint8_t ac_t;
+    uint8_t pos;
+    uint64_t tempB, tempA;
+
+    ac_t = ac & 0x3;
+
+    tempA = rs;
+    tempB = env->active_tc.LO[ac_t];
+
+    env->active_tc.HI[ac_t] = tempB;
+    env->active_tc.LO[ac_t] = tempA;
+
+    pos = get_DSPControl_pos(env);
+
+    if (pos <= 64) {
+        pos = pos + 64;
+        set_DSPControl_pos(pos, env);
+    }
+}
+#endif
+
+void cpu_wrdsp(uint32_t rs, uint32_t mask_num, CPUMIPSState *env)
+{
+    uint8_t  mask[6];
+    uint8_t  i;
+    uint32_t newbits, overwrite;
+    target_ulong dsp;
+
+    newbits   = 0x00;
+    overwrite = 0xFFFFFFFF;
+    dsp = env->active_tc.DSPControl;
+
+    for (i = 0; i < 6; i++) {
+        mask[i] = (mask_num >> i) & 0x01;
+    }
+
+    if (mask[0] == 1) {
+#if defined(TARGET_MIPS64)
+        overwrite &= 0xFFFFFF80;
+        newbits   &= 0xFFFFFF80;
+        newbits   |= 0x0000007F & rs;
+#else
+        overwrite &= 0xFFFFFFC0;
+        newbits   &= 0xFFFFFFC0;
+        newbits   |= 0x0000003F & rs;
+#endif
+    }
+
+    if (mask[1] == 1) {
+        overwrite &= 0xFFFFE07F;
+        newbits   &= 0xFFFFE07F;
+        newbits   |= 0x00001F80 & rs;
+    }
+
+    if (mask[2] == 1) {
+        overwrite &= 0xFFFFDFFF;
+        newbits   &= 0xFFFFDFFF;
+        newbits   |= 0x00002000 & rs;
+    }
+
+    if (mask[3] == 1) {
+        overwrite &= 0xFF00FFFF;
+        newbits   &= 0xFF00FFFF;
+        newbits   |= 0x00FF0000 & rs;
+    }
+
+    if (mask[4] == 1) {
+        overwrite &= 0x00FFFFFF;
+        newbits   &= 0x00FFFFFF;
+#if defined(TARGET_MIPS64)
+        newbits   |= 0xFF000000 & rs;
+#else
+        newbits   |= 0x0F000000 & rs;
+#endif
+    }
+
+    if (mask[5] == 1) {
+        overwrite &= 0xFFFFBFFF;
+        newbits   &= 0xFFFFBFFF;
+        newbits   |= 0x00004000 & rs;
+    }
+
+    dsp = dsp & overwrite;
+    dsp = dsp | newbits;
+    env->active_tc.DSPControl = dsp;
+}
+
+void helper_wrdsp(target_ulong rs, target_ulong mask_num, CPUMIPSState *env)
+{
+    return cpu_wrdsp(rs, mask_num, env);
+}
+
+uint32_t cpu_rddsp(uint32_t mask_num, CPUMIPSState *env)
+{
+    uint8_t  mask[6];
+    uint32_t ruler, i;
+    target_ulong temp;
+    target_ulong dsp;
+
+    ruler = 0x01;
+    for (i = 0; i < 6; i++) {
+        mask[i] = (mask_num & ruler) >> i ;
+        ruler = ruler << 1;
+    }
+
+    temp  = 0x00;
+    dsp = env->active_tc.DSPControl;
+
+    if (mask[0] == 1) {
+#if defined(TARGET_MIPS64)
+        temp |= dsp & 0x7F;
+#else
+        temp |= dsp & 0x3F;
+#endif
+    }
+
+    if (mask[1] == 1) {
+        temp |= dsp & 0x1F80;
+    }
+
+    if (mask[2] == 1) {
+        temp |= dsp & 0x2000;
+    }
+
+    if (mask[3] == 1) {
+        temp |= dsp & 0x00FF0000;
+    }
+
+    if (mask[4] == 1) {
+#if defined(TARGET_MIPS64)
+        temp |= dsp & 0xFF000000;
+#else
+        temp |= dsp & 0x0F000000;
+#endif
+    }
+
+    if (mask[5] == 1) {
+        temp |= dsp & 0x4000;
+    }
+
+    return temp;
+}
+
+target_ulong helper_rddsp(target_ulong mask_num, CPUMIPSState *env)
+{
+    return cpu_rddsp(mask_num, env);
+}
+
+
+#undef MIPSDSP_LHI
+#undef MIPSDSP_LLO
+#undef MIPSDSP_HI
+#undef MIPSDSP_LO
+#undef MIPSDSP_Q3
+#undef MIPSDSP_Q2
+#undef MIPSDSP_Q1
+#undef MIPSDSP_Q0
+
+#undef MIPSDSP_SPLIT32_8
+#undef MIPSDSP_SPLIT32_16
+
+#undef MIPSDSP_RETURN32_8
+#undef MIPSDSP_RETURN32_16
+
+#ifdef TARGET_MIPS64
+#undef MIPSDSP_SPLIT64_16
+#undef MIPSDSP_SPLIT64_32
+#undef MIPSDSP_RETURN64_16
+#undef MIPSDSP_RETURN64_32
+#endif
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 59ede15..b49f7da 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -25,8 +25,6 @@
 #ifdef TARGET_MIPS64
 DEF_HELPER_FLAGS_1(dclo, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(dclz, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_3(dmult, void, env, tl, tl)
-DEF_HELPER_3(dmultu, void, env, tl, tl)
 #endif
 
 DEF_HELPER_3(muls, tl, env, tl, tl)
@@ -49,6 +47,8 @@
 DEF_HELPER_1(mfc0_mvpcontrol, tl, env)
 DEF_HELPER_1(mfc0_mvpconf0, tl, env)
 DEF_HELPER_1(mfc0_mvpconf1, tl, env)
+DEF_HELPER_1(mftc0_vpecontrol, tl, env)
+DEF_HELPER_1(mftc0_vpeconf0, tl, env)
 DEF_HELPER_1(mfc0_random, tl, env)
 DEF_HELPER_1(mfc0_tcstatus, tl, env)
 DEF_HELPER_1(mftc0_tcstatus, tl, env)
@@ -67,6 +67,10 @@
 DEF_HELPER_1(mfc0_count, tl, env)
 DEF_HELPER_1(mftc0_entryhi, tl, env)
 DEF_HELPER_1(mftc0_status, tl, env)
+DEF_HELPER_1(mftc0_cause, tl, env)
+DEF_HELPER_1(mftc0_epc, tl, env)
+DEF_HELPER_1(mftc0_ebase, tl, env)
+DEF_HELPER_2(mftc0_configx, tl, env, tl)
 DEF_HELPER_1(mfc0_lladdr, tl, env)
 DEF_HELPER_2(mfc0_watchlo, tl, env, i32)
 DEF_HELPER_2(mfc0_watchhi, tl, env, i32)
@@ -85,7 +89,9 @@
 DEF_HELPER_2(mtc0_index, void, env, tl)
 DEF_HELPER_2(mtc0_mvpcontrol, void, env, tl)
 DEF_HELPER_2(mtc0_vpecontrol, void, env, tl)
+DEF_HELPER_2(mttc0_vpecontrol, void, env, tl)
 DEF_HELPER_2(mtc0_vpeconf0, void, env, tl)
+DEF_HELPER_2(mttc0_vpeconf0, void, env, tl)
 DEF_HELPER_2(mtc0_vpeconf1, void, env, tl)
 DEF_HELPER_2(mtc0_yqmask, void, env, tl)
 DEF_HELPER_2(mtc0_vpeopt, void, env, tl)
@@ -124,9 +130,13 @@
 DEF_HELPER_2(mtc0_intctl, void, env, tl)
 DEF_HELPER_2(mtc0_srsctl, void, env, tl)
 DEF_HELPER_2(mtc0_cause, void, env, tl)
+DEF_HELPER_2(mttc0_cause, void, env, tl)
 DEF_HELPER_2(mtc0_ebase, void, env, tl)
+DEF_HELPER_2(mttc0_ebase, void, env, tl)
 DEF_HELPER_2(mtc0_config0, void, env, tl)
 DEF_HELPER_2(mtc0_config2, void, env, tl)
+DEF_HELPER_2(mtc0_config4, void, env, tl)
+DEF_HELPER_2(mtc0_config5, void, env, tl)
 DEF_HELPER_2(mtc0_lladdr, void, env, tl)
 DEF_HELPER_3(mtc0_watchlo, void, env, tl, i32)
 DEF_HELPER_3(mtc0_watchhi, void, env, tl, i32)
@@ -156,6 +166,15 @@
 DEF_HELPER_1(dvpe, tl, env)
 DEF_HELPER_1(evpe, tl, env)
 #endif /* !CONFIG_USER_ONLY */
+
+/* microMIPS functions */
+DEF_HELPER_4(lwm, void, env, tl, tl, i32)
+DEF_HELPER_4(swm, void, env, tl, tl, i32)
+#ifdef TARGET_MIPS64
+DEF_HELPER_4(ldm, void, env, tl, tl, i32)
+DEF_HELPER_4(sdm, void, env, tl, tl, i32)
+#endif
+
 DEF_HELPER_2(fork, void, tl, tl)
 DEF_HELPER_2(yield, tl, env, tl)
 
@@ -281,4 +300,399 @@
 DEF_HELPER_2(pmon, void, env, int)
 DEF_HELPER_1(wait, void, env)
 
+/* Loongson multimedia functions.  */
+DEF_HELPER_FLAGS_2(paddsh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddush, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddsb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddusb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(psubsh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubush, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubsb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubusb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(pshufh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(packsswh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(packsshb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(packushb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(punpcklhw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpckhhw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpcklbh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpckhbh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpcklwd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpckhwd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(pavgh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pavgb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmaxsh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pminsh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmaxub, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pminub, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(pcmpeqw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpgtw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpeqh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpgth, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpeqb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpgtb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(psllw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psllh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psrlw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psrlh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psraw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psrah, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(pmullh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmulhh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmulhuh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmaddhw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(pasubub, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(biadd, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(pmovmskb, TCG_CALL_NO_RWG_SE, i64, i64)
+
+/*** MIPS DSP ***/
+/* DSP Arithmetic Sub-class insns */
+DEF_HELPER_FLAGS_3(addq_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addq_s_ph, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(addq_qh, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addq_s_qh, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(addq_s_w, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(addq_pw, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addq_s_pw, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(addu_qb, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addu_s_qb, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(adduh_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(adduh_r_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(addu_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addu_s_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(addqh_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(addqh_r_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(addqh_w, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(addqh_r_w, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(addu_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addu_s_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(adduh_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(adduh_r_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(addu_qh, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addu_s_qh, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(subq_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subq_s_ph, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(subq_qh, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subq_s_qh, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(subq_s_w, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(subq_pw, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subq_s_pw, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(subu_qb, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subu_s_qb, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(subuh_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(subuh_r_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(subu_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subu_s_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(subqh_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(subqh_r_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(subqh_w, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(subqh_r_w, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(subu_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subu_s_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(subuh_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(subuh_r_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(subu_qh, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subu_s_qh, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(addsc, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addwc, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(modsub, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_1(raddu_w_qb, TCG_CALL_NO_RWG_SE, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_1(raddu_l_ob, TCG_CALL_NO_RWG_SE, tl, tl)
+#endif
+DEF_HELPER_FLAGS_2(absq_s_qb, 0, tl, tl, env)
+DEF_HELPER_FLAGS_2(absq_s_ph, 0, tl, tl, env)
+DEF_HELPER_FLAGS_2(absq_s_w, 0, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_2(absq_s_ob, 0, tl, tl, env)
+DEF_HELPER_FLAGS_2(absq_s_qh, 0, tl, tl, env)
+DEF_HELPER_FLAGS_2(absq_s_pw, 0, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_2(precr_qb_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(precrq_qb_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(precr_sra_ph_w, TCG_CALL_NO_RWG_SE,
+                   tl, i32, tl, tl)
+DEF_HELPER_FLAGS_3(precr_sra_r_ph_w, TCG_CALL_NO_RWG_SE,
+                   tl, i32, tl, tl)
+DEF_HELPER_FLAGS_2(precrq_ph_w, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(precrq_rs_ph_w, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_2(precr_ob_qh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(precr_sra_qh_pw,
+                   TCG_CALL_NO_RWG_SE, tl, tl, tl, i32)
+DEF_HELPER_FLAGS_3(precr_sra_r_qh_pw,
+                   TCG_CALL_NO_RWG_SE, tl, tl, tl, i32)
+DEF_HELPER_FLAGS_2(precrq_ob_qh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(precrq_qh_pw, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(precrq_rs_qh_pw,
+                   TCG_CALL_NO_RWG_SE, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(precrq_pw_l, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#endif
+DEF_HELPER_FLAGS_3(precrqu_s_qb_ph, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(precrqu_s_ob_qh,
+                   TCG_CALL_NO_RWG_SE, tl, tl, tl, env)
+
+DEF_HELPER_FLAGS_1(preceq_pw_qhl, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceq_pw_qhr, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceq_pw_qhla, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceq_pw_qhra, TCG_CALL_NO_RWG_SE, tl, tl)
+#endif
+DEF_HELPER_FLAGS_1(precequ_ph_qbl, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(precequ_ph_qbr, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(precequ_ph_qbla, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(precequ_ph_qbra, TCG_CALL_NO_RWG_SE, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_1(precequ_qh_obl, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(precequ_qh_obr, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(precequ_qh_obla, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(precequ_qh_obra, TCG_CALL_NO_RWG_SE, tl, tl)
+#endif
+DEF_HELPER_FLAGS_1(preceu_ph_qbl, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceu_ph_qbr, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceu_ph_qbla, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceu_ph_qbra, TCG_CALL_NO_RWG_SE, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_1(preceu_qh_obl, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceu_qh_obr, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceu_qh_obla, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceu_qh_obra, TCG_CALL_NO_RWG_SE, tl, tl)
+#endif
+
+/* DSP GPR-Based Shift Sub-class insns */
+DEF_HELPER_FLAGS_3(shll_qb, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(shll_ob, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(shll_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(shll_s_ph, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(shll_qh, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(shll_s_qh, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(shll_s_w, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(shll_pw, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(shll_s_pw, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_2(shrl_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shrl_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_2(shrl_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shrl_qh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#endif
+DEF_HELPER_FLAGS_2(shra_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_r_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_2(shra_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_r_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#endif
+DEF_HELPER_FLAGS_2(shra_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_r_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_r_w, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_2(shra_qh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_r_qh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_pw, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_r_pw, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#endif
+
+/* DSP Multiply Sub-class insns */
+DEF_HELPER_FLAGS_3(muleu_s_ph_qbl, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(muleu_s_ph_qbr, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(muleu_s_qh_obl, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(muleu_s_qh_obr, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(mulq_rs_ph, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(mulq_rs_qh, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(muleq_s_w_phl, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(muleq_s_w_phr, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(muleq_s_pw_qhl, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(muleq_s_pw_qhr, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_4(dpau_h_qbl, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dpau_h_qbr, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpau_h_obl, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(dpau_h_obr, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpsu_h_qbl, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dpsu_h_qbr, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpsu_h_obl, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(dpsu_h_obr, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpa_w_ph, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpa_w_qh, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpax_w_ph, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dpaq_s_w_ph, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpaq_s_w_qh, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpaqx_s_w_ph, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dpaqx_sa_w_ph, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dps_w_ph, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dps_w_qh, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpsx_w_ph, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dpsq_s_w_ph, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpsq_s_w_qh, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpsqx_s_w_ph, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dpsqx_sa_w_ph, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(mulsaq_s_w_ph, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(mulsaq_s_w_qh, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpaq_sa_l_w, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpaq_sa_l_pw, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpsq_sa_l_w, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpsq_sa_l_pw, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(mulsaq_s_l_pw, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(maq_s_w_phl, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(maq_s_w_phr, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(maq_sa_w_phl, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(maq_sa_w_phr, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_3(mul_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(mul_s_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(mulq_s_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(mulq_s_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(mulq_rs_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_4(mulsa_w_ph, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(maq_s_w_qhll, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_s_w_qhlr, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_s_w_qhrl, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_s_w_qhrr, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_sa_w_qhll, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_sa_w_qhlr, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_sa_w_qhrl, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_sa_w_qhrr, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_s_l_pwl, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_s_l_pwr, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(dmadd, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(dmaddu, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(dmsub, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(dmsubu, 0, void, tl, tl, i32, env)
+#endif
+
+/* DSP Bit/Manipulation Sub-class insns */
+DEF_HELPER_FLAGS_1(bitrev, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_3(insv, 0, tl, env, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dinsv, 0, tl, env, tl, tl)
+#endif
+
+/* DSP Compare-Pick Sub-class insns */
+DEF_HELPER_FLAGS_3(cmpu_eq_qb, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpu_lt_qb, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpu_le_qb, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_2(cmpgu_eq_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(cmpgu_lt_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(cmpgu_le_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(cmp_eq_ph, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_lt_ph, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_le_ph, 0, void, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(cmpu_eq_ob, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpu_lt_ob, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpu_le_ob, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpgdu_eq_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpgdu_lt_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpgdu_le_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(cmpgu_eq_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(cmpgu_lt_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(cmpgu_le_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(cmp_eq_qh, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_lt_qh, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_le_qh, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_eq_pw, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_lt_pw, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_le_pw, 0, void, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(pick_qb, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(pick_ph, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(pick_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(pick_qh, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(pick_pw, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_2(packrl_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_2(packrl_pw, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#endif
+
+/* DSP Accumulator and DSPControl Access Sub-class insns */
+DEF_HELPER_FLAGS_3(extr_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(extr_r_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(extr_rs_w, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dextr_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(dextr_r_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(dextr_rs_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(dextr_l, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(dextr_r_l, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(dextr_rs_l, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(extr_s_h, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dextr_s_h, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(extp, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(extpdp, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dextp, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(dextpdp, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(shilo, 0, void, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dshilo, 0, void, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(mthlip, 0, void, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dmthlip, 0, void, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(wrdsp, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_2(rddsp, 0, tl, tl, env)
+
+
+
 #include "exec/def-helper.h"
diff --git a/target-mips/lmi_helper.c b/target-mips/lmi_helper.c
new file mode 100644
index 0000000..1b24353
--- /dev/null
+++ b/target-mips/lmi_helper.c
@@ -0,0 +1,744 @@
+/*
+ *  Loongson Multimedia Instruction emulation helpers for QEMU.
+ *
+ *  Copyright (c) 2011  Richard Henderson <rth@twiddle.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "helper.h"
+
+/* If the byte ordering doesn't matter, i.e. all columns are treated
+   identically, then this union can be used directly.  If byte ordering
+   does matter, we generally ignore dumping to memory.  */
+typedef union {
+    uint8_t  ub[8];
+    int8_t   sb[8];
+    uint16_t uh[4];
+    int16_t  sh[4];
+    uint32_t uw[2];
+    int32_t  sw[2];
+    uint64_t d;
+} LMIValue;
+
+/* Some byte ordering issues can be mitigated by XORing in the following.  */
+#ifdef HOST_WORDS_BIGENDIAN
+# define BYTE_ORDER_XOR(N) N
+#else
+# define BYTE_ORDER_XOR(N) 0
+#endif
+
+#define SATSB(x)  (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
+#define SATUB(x)  (x > 0xff ? 0xff : x)
+
+#define SATSH(x)  (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
+#define SATUH(x)  (x > 0xffff ? 0xffff : x)
+
+#define SATSW(x) \
+    (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
+#define SATUW(x)  (x > 0xffffffffull ? 0xffffffffull : x)
+
+uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        int r = vs.sb[i] + vt.sb[i];
+        vs.sb[i] = SATSB(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        int r = vs.ub[i] + vt.ub[i];
+        vs.ub[i] = SATUB(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        int r = vs.sh[i] + vt.sh[i];
+        vs.sh[i] = SATSH(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_paddush(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        int r = vs.uh[i] + vt.uh[i];
+        vs.uh[i] = SATUH(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_paddb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        vs.ub[i] += vt.ub[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_paddh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        vs.uh[i] += vt.uh[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_paddw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 2; ++i) {
+        vs.uw[i] += vt.uw[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        int r = vs.sb[i] - vt.sb[i];
+        vs.sb[i] = SATSB(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        int r = vs.ub[i] - vt.ub[i];
+        vs.ub[i] = SATUB(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        int r = vs.sh[i] - vt.sh[i];
+        vs.sh[i] = SATSH(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubush(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        int r = vs.uh[i] - vt.uh[i];
+        vs.uh[i] = SATUH(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        vs.ub[i] -= vt.ub[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        vs.uh[i] -= vt.uh[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 2; ++i) {
+        vs.uw[i] -= vt.uw[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
+{
+    unsigned host = BYTE_ORDER_XOR(3);
+    LMIValue vd, vs;
+    unsigned i;
+
+    vs.d = fs;
+    vd.d = 0;
+    for (i = 0; i < 4; i++, ft >>= 2) {
+        vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
+    }
+    return vd.d;
+}
+
+uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
+{
+    uint64_t fd = 0;
+    int64_t tmp;
+
+    tmp = (int32_t)(fs >> 0);
+    tmp = SATSH(tmp);
+    fd |= (tmp & 0xffff) << 0;
+
+    tmp = (int32_t)(fs >> 32);
+    tmp = SATSH(tmp);
+    fd |= (tmp & 0xffff) << 16;
+
+    tmp = (int32_t)(ft >> 0);
+    tmp = SATSH(tmp);
+    fd |= (tmp & 0xffff) << 32;
+
+    tmp = (int32_t)(ft >> 32);
+    tmp = SATSH(tmp);
+    fd |= (tmp & 0xffff) << 48;
+
+    return fd;
+}
+
+uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
+{
+    uint64_t fd = 0;
+    unsigned int i;
+
+    for (i = 0; i < 4; ++i) {
+        int16_t tmp = fs >> (i * 16);
+        tmp = SATSB(tmp);
+        fd |= (uint64_t)(tmp & 0xff) << (i * 8);
+    }
+    for (i = 0; i < 4; ++i) {
+        int16_t tmp = ft >> (i * 16);
+        tmp = SATSB(tmp);
+        fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
+    }
+
+    return fd;
+}
+
+uint64_t helper_packushb(uint64_t fs, uint64_t ft)
+{
+    uint64_t fd = 0;
+    unsigned int i;
+
+    for (i = 0; i < 4; ++i) {
+        int16_t tmp = fs >> (i * 16);
+        tmp = SATUB(tmp);
+        fd |= (uint64_t)(tmp & 0xff) << (i * 8);
+    }
+    for (i = 0; i < 4; ++i) {
+        int16_t tmp = ft >> (i * 16);
+        tmp = SATUB(tmp);
+        fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
+    }
+
+    return fd;
+}
+
+uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
+{
+    return (fs & 0xffffffff) | (ft << 32);
+}
+
+uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
+{
+    return (fs >> 32) | (ft & ~0xffffffffull);
+}
+
+uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
+{
+    unsigned host = BYTE_ORDER_XOR(3);
+    LMIValue vd, vs, vt;
+
+    vs.d = fs;
+    vt.d = ft;
+    vd.uh[0 ^ host] = vs.uh[0 ^ host];
+    vd.uh[1 ^ host] = vt.uh[0 ^ host];
+    vd.uh[2 ^ host] = vs.uh[1 ^ host];
+    vd.uh[3 ^ host] = vt.uh[1 ^ host];
+
+    return vd.d;
+}
+
+uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
+{
+    unsigned host = BYTE_ORDER_XOR(3);
+    LMIValue vd, vs, vt;
+
+    vs.d = fs;
+    vt.d = ft;
+    vd.uh[0 ^ host] = vs.uh[2 ^ host];
+    vd.uh[1 ^ host] = vt.uh[2 ^ host];
+    vd.uh[2 ^ host] = vs.uh[3 ^ host];
+    vd.uh[3 ^ host] = vt.uh[3 ^ host];
+
+    return vd.d;
+}
+
+uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
+{
+    unsigned host = BYTE_ORDER_XOR(7);
+    LMIValue vd, vs, vt;
+
+    vs.d = fs;
+    vt.d = ft;
+    vd.ub[0 ^ host] = vs.ub[0 ^ host];
+    vd.ub[1 ^ host] = vt.ub[0 ^ host];
+    vd.ub[2 ^ host] = vs.ub[1 ^ host];
+    vd.ub[3 ^ host] = vt.ub[1 ^ host];
+    vd.ub[4 ^ host] = vs.ub[2 ^ host];
+    vd.ub[5 ^ host] = vt.ub[2 ^ host];
+    vd.ub[6 ^ host] = vs.ub[3 ^ host];
+    vd.ub[7 ^ host] = vt.ub[3 ^ host];
+
+    return vd.d;
+}
+
+uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
+{
+    unsigned host = BYTE_ORDER_XOR(7);
+    LMIValue vd, vs, vt;
+
+    vs.d = fs;
+    vt.d = ft;
+    vd.ub[0 ^ host] = vs.ub[4 ^ host];
+    vd.ub[1 ^ host] = vt.ub[4 ^ host];
+    vd.ub[2 ^ host] = vs.ub[5 ^ host];
+    vd.ub[3 ^ host] = vt.ub[5 ^ host];
+    vd.ub[4 ^ host] = vs.ub[6 ^ host];
+    vd.ub[5 ^ host] = vt.ub[6 ^ host];
+    vd.ub[6 ^ host] = vs.ub[7 ^ host];
+    vd.ub[7 ^ host] = vt.ub[7 ^ host];
+
+    return vd.d;
+}
+
+uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
+    }
+    return vs.d;
+}
+
+uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; i++) {
+        vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
+    }
+    return vs.d;
+}
+
+uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pminub(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 2; i++) {
+        vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 2; i++) {
+        vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; i++) {
+        vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; i++) {
+        vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_psllw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs;
+    unsigned i;
+
+    ft &= 0x7f;
+    if (ft > 31) {
+        return 0;
+    }
+    vs.d = fs;
+    for (i = 0; i < 2; ++i) {
+        vs.uw[i] <<= ft;
+    }
+    return vs.d;
+}
+
+uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs;
+    unsigned i;
+
+    ft &= 0x7f;
+    if (ft > 31) {
+        return 0;
+    }
+    vs.d = fs;
+    for (i = 0; i < 2; ++i) {
+        vs.uw[i] >>= ft;
+    }
+    return vs.d;
+}
+
+uint64_t helper_psraw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs;
+    unsigned i;
+
+    ft &= 0x7f;
+    if (ft > 31) {
+        ft = 31;
+    }
+    vs.d = fs;
+    for (i = 0; i < 2; ++i) {
+        vs.sw[i] >>= ft;
+    }
+    return vs.d;
+}
+
+uint64_t helper_psllh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs;
+    unsigned i;
+
+    ft &= 0x7f;
+    if (ft > 15) {
+        return 0;
+    }
+    vs.d = fs;
+    for (i = 0; i < 4; ++i) {
+        vs.uh[i] <<= ft;
+    }
+    return vs.d;
+}
+
+uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs;
+    unsigned i;
+
+    ft &= 0x7f;
+    if (ft > 15) {
+        return 0;
+    }
+    vs.d = fs;
+    for (i = 0; i < 4; ++i) {
+        vs.uh[i] >>= ft;
+    }
+    return vs.d;
+}
+
+uint64_t helper_psrah(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs;
+    unsigned i;
+
+    ft &= 0x7f;
+    if (ft > 15) {
+        ft = 15;
+    }
+    vs.d = fs;
+    for (i = 0; i < 4; ++i) {
+        vs.sh[i] >>= ft;
+    }
+    return vs.d;
+}
+
+uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        vs.sh[i] *= vt.sh[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        int32_t r = vs.sh[i] * vt.sh[i];
+        vs.sh[i] = r >> 16;
+    }
+    return vs.d;
+}
+
+uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        uint32_t r = vs.uh[i] * vt.uh[i];
+        vs.uh[i] = r >> 16;
+    }
+    return vs.d;
+}
+
+uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
+{
+    unsigned host = BYTE_ORDER_XOR(3);
+    LMIValue vs, vt;
+    uint32_t p0, p1;
+
+    vs.d = fs;
+    vt.d = ft;
+    p0  = vs.sh[0 ^ host] * vt.sh[0 ^ host];
+    p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
+    p1  = vs.sh[2 ^ host] * vt.sh[2 ^ host];
+    p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
+
+    return ((uint64_t)p1 << 32) | p0;
+}
+
+uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        int r = vs.ub[i] - vt.ub[i];
+        vs.ub[i] = (r < 0 ? -r : r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_biadd(uint64_t fs)
+{
+    unsigned i, fd;
+
+    for (i = fd = 0; i < 8; ++i) {
+        fd += (fs >> (i * 8)) & 0xff;
+    }
+    return fd & 0xffff;
+}
+
+uint64_t helper_pmovmskb(uint64_t fs)
+{
+    unsigned fd = 0;
+
+    fd |= ((fs >>  7) & 1) << 0;
+    fd |= ((fs >> 15) & 1) << 1;
+    fd |= ((fs >> 23) & 1) << 2;
+    fd |= ((fs >> 31) & 1) << 3;
+    fd |= ((fs >> 39) & 1) << 4;
+    fd |= ((fs >> 47) & 1) << 5;
+    fd |= ((fs >> 55) & 1) << 6;
+    fd |= ((fs >> 63) & 1) << 7;
+
+    return fd & 0xff;
+}
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 49f4dac..29c3c6e 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -89,7 +89,7 @@
 static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
                              int mem_idx)                               \
 {                                                                       \
-    return (type) cpu_##insn##_raw(env, addr);                                     \
+    return (type) insn##_raw(addr);                                     \
 }
 #else
 #define HELPER_LD(name, insn, type)                                     \
@@ -117,7 +117,7 @@
 static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
                              type val, int mem_idx)                     \
 {                                                                       \
-    cpu_##insn##_raw(env, addr, val);                                              \
+    insn##_raw(addr, val);                                              \
 }
 #else
 #define HELPER_ST(name, insn, type)                                     \
@@ -168,135 +168,117 @@
     return ((uint64_t)(env->active_tc.HI[0]) << 32) | (uint32_t)env->active_tc.LO[0];
 }
 
-static inline void set_HILO (CPUMIPSState *env, uint64_t HILO)
+static inline target_ulong set_HIT0_LO(CPUMIPSState *env, uint64_t HILO)
 {
-    env->active_tc.LO[0] = (int32_t)HILO;
-    env->active_tc.HI[0] = (int32_t)(HILO >> 32);
-}
-
-static inline void set_HIT0_LO (CPUMIPSState *env, target_ulong arg1, uint64_t HILO)
-{
+    target_ulong tmp;
     env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
-    arg1 = env->active_tc.HI[0] = (int32_t)(HILO >> 32);
+    tmp = env->active_tc.HI[0] = (int32_t)(HILO >> 32);
+    return tmp;
 }
 
-static inline void set_HI_LOT0 (CPUMIPSState *env, target_ulong arg1, uint64_t HILO)
+static inline target_ulong set_HI_LOT0(CPUMIPSState *env, uint64_t HILO)
 {
-    arg1 = env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
+    target_ulong tmp = env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
     env->active_tc.HI[0] = (int32_t)(HILO >> 32);
+    return tmp;
 }
 
 /* Multiplication variants of the vr54xx. */
 target_ulong helper_muls(CPUMIPSState *env, target_ulong arg1,
                          target_ulong arg2)
 {
-    set_HI_LOT0(env, arg1, 0 - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
-
-    return arg1;
+    return set_HI_LOT0(env, 0 - ((int64_t)(int32_t)arg1 *
+                                 (int64_t)(int32_t)arg2));
 }
 
 target_ulong helper_mulsu(CPUMIPSState *env, target_ulong arg1,
                           target_ulong arg2)
 {
-    set_HI_LOT0(env, arg1, 0 - ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
-
-    return arg1;
+    return set_HI_LOT0(env, 0 - (uint64_t)(uint32_t)arg1 *
+                       (uint64_t)(uint32_t)arg2);
 }
 
 target_ulong helper_macc(CPUMIPSState *env, target_ulong arg1,
                          target_ulong arg2)
 {
-    set_HI_LOT0(env, arg1, ((int64_t)get_HILO(env)) + ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
-
-    return arg1;
+    return set_HI_LOT0(env, (int64_t)get_HILO(env) + (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
 target_ulong helper_macchi(CPUMIPSState *env, target_ulong arg1,
                            target_ulong arg2)
 {
-    set_HIT0_LO(env, arg1, ((int64_t)get_HILO(env)) + ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
-
-    return arg1;
+    return set_HIT0_LO(env, (int64_t)get_HILO(env) + (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
 target_ulong helper_maccu(CPUMIPSState *env, target_ulong arg1,
                           target_ulong arg2)
 {
-    set_HI_LOT0(env, arg1, ((uint64_t)get_HILO(env)) + ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
-
-    return arg1;
+    return set_HI_LOT0(env, (uint64_t)get_HILO(env) +
+                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 }
 
 target_ulong helper_macchiu(CPUMIPSState *env, target_ulong arg1,
                             target_ulong arg2)
 {
-    set_HIT0_LO(env, arg1, ((uint64_t)get_HILO(env)) + ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
-
-    return arg1;
+    return set_HIT0_LO(env, (uint64_t)get_HILO(env) +
+                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 }
 
 target_ulong helper_msac(CPUMIPSState *env, target_ulong arg1,
                          target_ulong arg2)
 {
-    set_HI_LOT0(env, arg1, ((int64_t)get_HILO(env)) - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
-
-    return arg1;
+    return set_HI_LOT0(env, (int64_t)get_HILO(env) - (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
 target_ulong helper_msachi(CPUMIPSState *env, target_ulong arg1,
                            target_ulong arg2)
 {
-    set_HIT0_LO(env, arg1, ((int64_t)get_HILO(env)) - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
-
-    return arg1;
+    return set_HIT0_LO(env, (int64_t)get_HILO(env) - (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
 target_ulong helper_msacu(CPUMIPSState *env, target_ulong arg1,
                           target_ulong arg2)
 {
-    set_HI_LOT0(env, arg1, ((uint64_t)get_HILO(env)) - ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
-
-    return arg1;
+    return set_HI_LOT0(env, (uint64_t)get_HILO(env) -
+                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 }
 
 target_ulong helper_msachiu(CPUMIPSState *env, target_ulong arg1,
                             target_ulong arg2)
 {
-    set_HIT0_LO(env, arg1, ((uint64_t)get_HILO(env)) - ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
-
-    return arg1;
+    return set_HIT0_LO(env, (uint64_t)get_HILO(env) -
+                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 }
 
 target_ulong helper_mulhi(CPUMIPSState *env, target_ulong arg1,
                           target_ulong arg2)
 {
-    set_HIT0_LO(env, arg1, (int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2);
-
-    return arg1;
+    return set_HIT0_LO(env, (int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2);
 }
 
 target_ulong helper_mulhiu(CPUMIPSState *env, target_ulong arg1,
                            target_ulong arg2)
 {
-    set_HIT0_LO(env, arg1, (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
-
-    return arg1;
+    return set_HIT0_LO(env, (uint64_t)(uint32_t)arg1 *
+                       (uint64_t)(uint32_t)arg2);
 }
 
 target_ulong helper_mulshi(CPUMIPSState *env, target_ulong arg1,
                            target_ulong arg2)
 {
-    set_HIT0_LO(env, arg1, 0 - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
-
-    return arg1;
+    return set_HIT0_LO(env, 0 - (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
 target_ulong helper_mulshiu(CPUMIPSState *env, target_ulong arg1,
                             target_ulong arg2)
 {
-    set_HIT0_LO(env, arg1, 0 - ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
-
-    return arg1;
+    return set_HIT0_LO(env, 0 - (uint64_t)(uint32_t)arg1 *
+                       (uint64_t)(uint32_t)arg2);
 }
 
 #ifdef TARGET_MIPS64
@@ -314,8 +296,8 @@
 #ifndef CONFIG_USER_ONLY
 
 static inline hwaddr do_translate_address(CPUMIPSState *env,
-                                          target_ulong address,
-                                          int rw)
+                                                      target_ulong address,
+                                                      int rw)
 {
     hwaddr lladdr;
 
@@ -471,17 +453,157 @@
 }
 #endif /* TARGET_MIPS64 */
 
-#ifndef CONFIG_USER_ONLY
-/* tc should point to an int with the value of the global TC index.
-   This function will transform it into a local index within the
-   returned CPUState.
+static const int multiple_regs[] = { 16, 17, 18, 19, 20, 21, 22, 23, 30 };
 
-   FIXME: This code assumes that all VPEs have the same number of TCs,
+void helper_lwm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
+{
+    target_ulong base_reglist = reglist & 0xf;
+    target_ulong do_r31 = reglist & 0x10;
+
+    if (base_reglist > 0 && base_reglist <= ARRAY_SIZE (multiple_regs)) {
+        target_ulong i;
+
+        for (i = 0; i < base_reglist; i++) {
+            env->active_tc.gpr[multiple_regs[i]] =
+                (target_long)do_lw(env, addr, mem_idx);
+            addr += 4;
+        }
+    }
+
+    if (do_r31) {
+        env->active_tc.gpr[31] = (target_long)do_lw(env, addr, mem_idx);
+    }
+}
+
+void helper_swm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
+{
+    target_ulong base_reglist = reglist & 0xf;
+    target_ulong do_r31 = reglist & 0x10;
+
+    if (base_reglist > 0 && base_reglist <= ARRAY_SIZE (multiple_regs)) {
+        target_ulong i;
+
+        for (i = 0; i < base_reglist; i++) {
+            do_sw(env, addr, env->active_tc.gpr[multiple_regs[i]], mem_idx);
+            addr += 4;
+        }
+    }
+
+    if (do_r31) {
+        do_sw(env, addr, env->active_tc.gpr[31], mem_idx);
+    }
+}
+
+#if defined(TARGET_MIPS64)
+void helper_ldm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
+{
+    target_ulong base_reglist = reglist & 0xf;
+    target_ulong do_r31 = reglist & 0x10;
+
+    if (base_reglist > 0 && base_reglist <= ARRAY_SIZE (multiple_regs)) {
+        target_ulong i;
+
+        for (i = 0; i < base_reglist; i++) {
+            env->active_tc.gpr[multiple_regs[i]] = do_ld(env, addr, mem_idx);
+            addr += 8;
+        }
+    }
+
+    if (do_r31) {
+        env->active_tc.gpr[31] = do_ld(env, addr, mem_idx);
+    }
+}
+
+void helper_sdm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
+{
+    target_ulong base_reglist = reglist & 0xf;
+    target_ulong do_r31 = reglist & 0x10;
+
+    if (base_reglist > 0 && base_reglist <= ARRAY_SIZE (multiple_regs)) {
+        target_ulong i;
+
+        for (i = 0; i < base_reglist; i++) {
+            do_sd(env, addr, env->active_tc.gpr[multiple_regs[i]], mem_idx);
+            addr += 8;
+        }
+    }
+
+    if (do_r31) {
+        do_sd(env, addr, env->active_tc.gpr[31], mem_idx);
+    }
+}
+#endif
+
+#ifndef CONFIG_USER_ONLY
+/* SMP helpers.  */
+static bool mips_vpe_is_wfi(MIPSCPU *c)
+{
+    CPUState *cpu = CPU(c);
+    CPUMIPSState *env = &c->env;
+
+    /* If the VPE is halted but otherwise active, it means it's waiting for
+       an interrupt.  */
+    return cpu->halted && mips_vpe_active(env);
+}
+
+static inline void mips_vpe_wake(MIPSCPU *c)
+{
+    /* Dont set ->halted = 0 directly, let it be done via cpu_has_work
+       because there might be other conditions that state that c should
+       be sleeping.  */
+    cpu_interrupt(CPU(c), CPU_INTERRUPT_WAKE);
+}
+
+static inline void mips_vpe_sleep(MIPSCPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+
+    /* The VPE was shut off, really go to bed.
+       Reset any old _WAKE requests.  */
+    cs->halted = 1;
+    cpu_reset_interrupt(cs, CPU_INTERRUPT_WAKE);
+}
+
+static inline void mips_tc_wake(MIPSCPU *cpu, int tc)
+{
+    CPUMIPSState *c = &cpu->env;
+
+    /* FIXME: TC reschedule.  */
+    if (mips_vpe_active(c) && !mips_vpe_is_wfi(cpu)) {
+        mips_vpe_wake(cpu);
+    }
+}
+
+static inline void mips_tc_sleep(MIPSCPU *cpu, int tc)
+{
+    CPUMIPSState *c = &cpu->env;
+
+    /* FIXME: TC reschedule.  */
+    if (!mips_vpe_active(c)) {
+        mips_vpe_sleep(cpu);
+    }
+}
+
+/**
+ * mips_cpu_map_tc:
+ * @env: CPU from which mapping is performed.
+ * @tc: Should point to an int with the value of the global TC index.
+ *
+ * This function will transform @tc into a local index within the
+ * returned #CPUMIPSState.
+ */
+/* FIXME: This code assumes that all VPEs have the same number of TCs,
           which depends on runtime setup. Can probably be fixed by
           walking the list of CPUMIPSStates.  */
 static CPUMIPSState *mips_cpu_map_tc(CPUMIPSState *env, int *tc)
 {
-    int vpe_idx, nr_threads = ENV_GET_CPU(env)->nr_threads;
+    CPUState *cs;
+    CPUState *other_cs;
+    int vpe_idx;
     int tc_idx = *tc;
 
     if (!(env->CP0_VPEConf0 & (1 << CP0VPEC0_MVP))) {
@@ -490,10 +612,14 @@
         return env;
     }
 
-    vpe_idx = tc_idx / nr_threads;
-    *tc = tc_idx % nr_threads;
-    CPUState *other = qemu_get_cpu(vpe_idx);
-    return other ? other->env_ptr : env;
+    cs = CPU(mips_env_get_cpu(env));
+    vpe_idx = tc_idx / cs->nr_threads;
+    *tc = tc_idx % cs->nr_threads;
+    other_cs = qemu_get_cpu(vpe_idx);
+    if (other_cs == NULL) {
+        return env;
+    }
+    return other_cs->env_ptr;
 }
 
 /* The per VPE CP0_Status register shares some fields with the per TC
@@ -735,6 +861,21 @@
     return other->CP0_EntryHi;
 }
 
+target_ulong helper_mftc0_cause(CPUMIPSState *env)
+{
+    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    int32_t tccause;
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+
+    if (other_tc == other->current_tc) {
+        tccause = other->CP0_Cause;
+    } else {
+        tccause = other->CP0_Cause;
+    }
+
+    return tccause;
+}
+
 target_ulong helper_mftc0_status(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
@@ -866,6 +1007,38 @@
     env->CP0_VPEControl = newval;
 }
 
+void helper_mttc0_vpecontrol(CPUMIPSState *env, target_ulong arg1)
+{
+    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+    uint32_t mask;
+    uint32_t newval;
+
+    mask = (1 << CP0VPECo_YSI) | (1 << CP0VPECo_GSI) |
+           (1 << CP0VPECo_TE) | (0xff << CP0VPECo_TargTC);
+    newval = (other->CP0_VPEControl & ~mask) | (arg1 & mask);
+
+    /* TODO: Enable/disable TCs.  */
+
+    other->CP0_VPEControl = newval;
+}
+
+target_ulong helper_mftc0_vpecontrol(CPUMIPSState *env)
+{
+    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+    /* FIXME: Mask away return zero on read bits.  */
+    return other->CP0_VPEControl;
+}
+
+target_ulong helper_mftc0_vpeconf0(CPUMIPSState *env)
+{
+    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+
+    return other->CP0_VPEConf0;
+}
+
 void helper_mtc0_vpeconf0(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0;
@@ -883,6 +1056,20 @@
     env->CP0_VPEConf0 = newval;
 }
 
+void helper_mttc0_vpeconf0(CPUMIPSState *env, target_ulong arg1)
+{
+    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+    uint32_t mask = 0;
+    uint32_t newval;
+
+    mask |= (1 << CP0VPEC0_MVP) | (1 << CP0VPEC0_VPA);
+    newval = (other->CP0_VPEConf0 & ~mask) | (arg1 & mask);
+
+    /* TODO: TC exclusive handling due to ERL/EXL.  */
+    other->CP0_VPEConf0 = newval;
+}
+
 void helper_mtc0_vpeconf1(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0;
@@ -999,15 +1186,23 @@
 
 void helper_mtc0_tchalt(CPUMIPSState *env, target_ulong arg1)
 {
+    MIPSCPU *cpu = mips_env_get_cpu(env);
+
     env->active_tc.CP0_TCHalt = arg1 & 0x1;
 
     // TODO: Halt TC / Restart (if allocated+active) TC.
+    if (env->active_tc.CP0_TCHalt & 1) {
+        mips_tc_sleep(cpu, env->current_tc);
+    } else {
+        mips_tc_wake(cpu, env->current_tc);
+    }
 }
 
 void helper_mttc0_tchalt(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+    MIPSCPU *other_cpu = mips_env_get_cpu(other);
 
     // TODO: Halt TC / Restart (if allocated+active) TC.
 
@@ -1015,6 +1210,12 @@
         other->active_tc.CP0_TCHalt = arg1;
     else
         other->tcs[other_tc].CP0_TCHalt = arg1;
+
+    if (arg1 & 1) {
+        mips_tc_sleep(other_cpu, other_tc);
+    } else {
+        mips_tc_wake(other_cpu, other_tc);
+    }
 }
 
 void helper_mtc0_tccontext(CPUMIPSState *env, target_ulong arg1)
@@ -1217,35 +1418,93 @@
     env->CP0_SRSCtl = (env->CP0_SRSCtl & ~mask) | (arg1 & mask);
 }
 
-void helper_mtc0_cause(CPUMIPSState *env, target_ulong arg1)
+static void mtc0_cause(CPUMIPSState *cpu, target_ulong arg1)
 {
     uint32_t mask = 0x00C00300;
-    uint32_t old = env->CP0_Cause;
+    uint32_t old = cpu->CP0_Cause;
+    int i;
 
-    if (env->insn_flags & ISA_MIPS32R2)
+    if (cpu->insn_flags & ISA_MIPS32R2) {
         mask |= 1 << CP0Ca_DC;
-
-    env->CP0_Cause = (env->CP0_Cause & ~mask) | (arg1 & mask);
-
-    if ((old ^ env->CP0_Cause) & (1 << CP0Ca_DC)) {
-        if (env->CP0_Cause & (1 << CP0Ca_DC))
-            cpu_mips_stop_count(env);
-        else
-            cpu_mips_start_count(env);
     }
 
-    /* Handle the software interrupt as an hardware one, as they
-       are very similar */
-    if (arg1 & CP0Ca_IP_mask) {
-        cpu_mips_update_irq(env);
+    cpu->CP0_Cause = (cpu->CP0_Cause & ~mask) | (arg1 & mask);
+
+    if ((old ^ cpu->CP0_Cause) & (1 << CP0Ca_DC)) {
+        if (cpu->CP0_Cause & (1 << CP0Ca_DC)) {
+            cpu_mips_stop_count(cpu);
+        } else {
+            cpu_mips_start_count(cpu);
+        }
     }
+
+    /* Set/reset software interrupts */
+    for (i = 0 ; i < 2 ; i++) {
+        if ((old ^ cpu->CP0_Cause) & (1 << (CP0Ca_IP + i))) {
+            cpu_mips_soft_irq(cpu, i, cpu->CP0_Cause & (1 << (CP0Ca_IP + i)));
+        }
+    }
+}
+
+void helper_mtc0_cause(CPUMIPSState *env, target_ulong arg1)
+{
+    mtc0_cause(env, arg1);
+}
+
+void helper_mttc0_cause(CPUMIPSState *env, target_ulong arg1)
+{
+    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+
+    mtc0_cause(other, arg1);
+}
+
+target_ulong helper_mftc0_epc(CPUMIPSState *env)
+{
+    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+
+    return other->CP0_EPC;
+}
+
+target_ulong helper_mftc0_ebase(CPUMIPSState *env)
+{
+    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+
+    return other->CP0_EBase;
 }
 
 void helper_mtc0_ebase(CPUMIPSState *env, target_ulong arg1)
 {
     /* vectored interrupts not implemented */
-    /* Multi-CPU not implemented */
-    env->CP0_EBase = 0x80000000 | (arg1 & 0x3FFFF000);
+    env->CP0_EBase = (env->CP0_EBase & ~0x3FFFF000) | (arg1 & 0x3FFFF000);
+}
+
+void helper_mttc0_ebase(CPUMIPSState *env, target_ulong arg1)
+{
+    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+    other->CP0_EBase = (other->CP0_EBase & ~0x3FFFF000) | (arg1 & 0x3FFFF000);
+}
+
+target_ulong helper_mftc0_configx(CPUMIPSState *env, target_ulong idx)
+{
+    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+
+    switch (idx) {
+    case 0: return other->CP0_Config0;
+    case 1: return other->CP0_Config1;
+    case 2: return other->CP0_Config2;
+    case 3: return other->CP0_Config3;
+    /* 4 and 5 are reserved.  */
+    case 6: return other->CP0_Config6;
+    case 7: return other->CP0_Config7;
+    default:
+        break;
+    }
+    return 0;
 }
 
 void helper_mtc0_config0(CPUMIPSState *env, target_ulong arg1)
@@ -1259,6 +1518,18 @@
     env->CP0_Config2 = (env->CP0_Config2 & 0x8FFF0FFF);
 }
 
+void helper_mtc0_config4(CPUMIPSState *env, target_ulong arg1)
+{
+    env->CP0_Config4 = (env->CP0_Config4 & (~env->CP0_Config4_rw_bitmask)) |
+                       (arg1 & env->CP0_Config4_rw_bitmask);
+}
+
+void helper_mtc0_config5(CPUMIPSState *env, target_ulong arg1)
+{
+    env->CP0_Config5 = (env->CP0_Config5 & (~env->CP0_Config5_rw_bitmask)) |
+                       (arg1 & env->CP0_Config5_rw_bitmask);
+}
+
 void helper_mtc0_lladdr(CPUMIPSState *env, target_ulong arg1)
 {
     target_long mask = env->CP0_LLAddr_rw_bitmask;
@@ -1466,14 +1737,37 @@
 
 target_ulong helper_dvpe(CPUMIPSState *env)
 {
-    // TODO
-    return 0;
+    CPUState *other_cs = first_cpu;
+    target_ulong prev = env->mvp->CP0_MVPControl;
+
+    CPU_FOREACH(other_cs) {
+        MIPSCPU *other_cpu = mips_env_get_cpu(other_cs->env_ptr);
+        /* Turn off all VPEs except the one executing the dvpe.  */
+        if (&other_cpu->env != env) {
+            other_cpu->env.mvp->CP0_MVPControl &= ~(1 << CP0MVPCo_EVP);
+            mips_vpe_sleep(other_cpu);
+        }
+    }
+    return prev;
 }
 
 target_ulong helper_evpe(CPUMIPSState *env)
 {
-    // TODO
-    return 0;
+    CPUState *other_cs = first_cpu;
+    target_ulong prev = env->mvp->CP0_MVPControl;
+
+    CPU_FOREACH(other_cs) {
+        MIPSCPU *other_cpu = mips_env_get_cpu(other_cs->env_ptr);
+
+        if (&other_cpu->env != env
+            /* If the VPE is WFI, don't disturb its sleep.  */
+            && !mips_vpe_is_wfi(other_cpu)) {
+            /* Enable the VPE.  */
+            other_cpu->env.mvp->CP0_MVPControl |= (1 << CP0MVPCo_EVP);
+            mips_vpe_wake(other_cpu); /* And wake it up.  */
+        }
+    }
+    return prev;
 }
 #endif /* !CONFIG_USER_ONLY */
 
@@ -1676,8 +1970,11 @@
         mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
         tag = env->CP0_EntryHi & ~mask;
         VPN = tlb->VPN & ~mask;
+#if defined(TARGET_MIPS64)
+        tag &= env->SEGMask;
+#endif
         /* Check ASID, virtual page number & size */
-        if (unlikely((tlb->G == 1 || tlb->ASID == ASID) && VPN == tag)) {
+        if ((tlb->G == 1 || tlb->ASID == ASID) && VPN == tag) {
             /* TLB match */
             env->CP0_Index = i;
             break;
@@ -2031,16 +2328,10 @@
 
 /* Complex FPU operations which may need stack space. */
 
-#define FLOAT_ONE32 make_float32(0x3f8 << 20)
-#define FLOAT_ONE64 make_float64(0x3ffULL << 52)
 #define FLOAT_TWO32 make_float32(1 << 30)
 #define FLOAT_TWO64 make_float64(1ULL << 62)
 #define FP_TO_INT32_OVERFLOW 0x7fffffff
 #define FP_TO_INT64_OVERFLOW 0x7fffffffffffffffULL
-#define FLOAT_QNAN32 0x7fbfffff
-#define FLOAT_QNAN64 0x7ff7ffffffffffffULL
-#define FLOAT_SNAN32 0x7fffffff
-#define FLOAT_SNAN64 0x7fffffffffffffffULL
 
 /* convert MIPS rounding mode in FCR31 to IEEE library */
 static unsigned int ieee_rm[] = {
@@ -2657,7 +2948,6 @@
 {
     uint64_t fdt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
     fdt2 = float64_div(float64_one, fdt2, &env->active_fpu.fp_status);
     update_fcr31(env, GETPC());
@@ -2668,7 +2958,6 @@
 {
     uint32_t fst2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
     fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status);
     update_fcr31(env, GETPC());
@@ -2708,7 +2997,6 @@
 {
     uint64_t fdt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
     fdt2 = float64_div(float64_one, fdt2, &env->active_fpu.fp_status);
     update_fcr31(env, GETPC());
@@ -2719,7 +3007,6 @@
 {
     uint32_t fst2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
     fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status);
     update_fcr31(env, GETPC());
@@ -2731,7 +3018,6 @@
     uint32_t fst2;
     uint32_t fsth2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_sqrt(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     fsth2 = float32_sqrt(fdt0 >> 32, &env->active_fpu.fp_status);
     fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status);
@@ -2749,7 +3035,6 @@
 {                                                                  \
     uint64_t dt2;                                                  \
                                                                    \
-    set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     dt2 = float64_ ## name (fdt0, fdt1, &env->active_fpu.fp_status);     \
     update_fcr31(env, GETPC());                                    \
     return dt2;                                                    \
@@ -2760,7 +3045,6 @@
 {                                                                  \
     uint32_t wt2;                                                  \
                                                                    \
-    set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     wt2 = float32_ ## name (fst0, fst1, &env->active_fpu.fp_status);     \
     update_fcr31(env, GETPC());                                    \
     return wt2;                                                    \
@@ -2777,7 +3061,6 @@
     uint32_t wt2;                                                  \
     uint32_t wth2;                                                 \
                                                                    \
-    set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     wt2 = float32_ ## name (fst0, fst1, &env->active_fpu.fp_status);     \
     wth2 = float32_ ## name (fsth0, fsth1, &env->active_fpu.fp_status);  \
     update_fcr31(env, GETPC());                                    \
@@ -2790,24 +3073,6 @@
 FLOAT_BINOP(div)
 #undef FLOAT_BINOP
 
-/* ternary operations */
-#define FLOAT_TERNOP(name1, name2)                                        \
-uint64_t helper_float_ ## name1 ## name2 ## _d(CPUMIPSState *env,         \
-                                           uint64_t fdt0, uint64_t fdt1,  \
-                                           uint64_t fdt2)                 \
-{                                                                         \
-    fdt0 = float64_ ## name1 (fdt0, fdt1, &env->active_fpu.fp_status);          \
-    return float64_ ## name2 (fdt0, fdt2, &env->active_fpu.fp_status);          \
-}                                                                         \
-                                                                          \
-uint32_t helper_float_ ## name1 ## name2 ## _s(CPUMIPSState *env,         \
-                                           uint32_t fst0, uint32_t fst1,  \
-                                           uint32_t fst2)                 \
-{                                                                         \
-    fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status);          \
-    return float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status);          \
-}                                                                         \
-
 #define UNFUSED_FMA(prefix, a, b, c, flags)                          \
 {                                                                    \
     a = prefix##_mul(a, b, &env->active_fpu.fp_status);              \
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 4a4d280..2c2e1fa 100755
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -1022,6 +1022,7 @@
 static TCGv_i64 fpu_f64[32];
 
 static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
+static target_ulong gen_opc_btarget[OPC_BUF_SIZE];
 
 #include "exec/gen-icount.h"
 
@@ -1303,97 +1304,7 @@
         return 23;
 }
 
-#define FOP_CONDS(type, fmt, bits)                                            \
-static inline void gen_cmp ## type ## _ ## fmt(int n, TCGv_i##bits a,         \
-                                               TCGv_i##bits b, int cc)        \
-{                                                                             \
-    switch (n) {                                                              \
-    case  0: gen_helper_3i(cmp ## type ## _ ## fmt ## _f, cpu_env, a, b, cc);    break;\
-    case  1: gen_helper_3i(cmp ## type ## _ ## fmt ## _un, cpu_env, a, b, cc);   break;\
-    case  2: gen_helper_3i(cmp ## type ## _ ## fmt ## _eq, cpu_env, a, b, cc);   break;\
-    case  3: gen_helper_3i(cmp ## type ## _ ## fmt ## _ueq, cpu_env, a, b, cc);  break;\
-    case  4: gen_helper_3i(cmp ## type ## _ ## fmt ## _olt, cpu_env, a, b, cc);  break;\
-    case  5: gen_helper_3i(cmp ## type ## _ ## fmt ## _ult, cpu_env, a, b, cc);  break;\
-    case  6: gen_helper_3i(cmp ## type ## _ ## fmt ## _ole, cpu_env, a, b, cc);  break;\
-    case  7: gen_helper_3i(cmp ## type ## _ ## fmt ## _ule, cpu_env, a, b, cc);  break;\
-    case  8: gen_helper_3i(cmp ## type ## _ ## fmt ## _sf, cpu_env, a, b, cc);   break;\
-    case  9: gen_helper_3i(cmp ## type ## _ ## fmt ## _ngle, cpu_env, a, b, cc); break;\
-    case 10: gen_helper_3i(cmp ## type ## _ ## fmt ## _seq, cpu_env, a, b, cc);  break;\
-    case 11: gen_helper_3i(cmp ## type ## _ ## fmt ## _ngl, cpu_env, a, b, cc);  break;\
-    case 12: gen_helper_3i(cmp ## type ## _ ## fmt ## _lt, cpu_env, a, b, cc);   break;\
-    case 13: gen_helper_3i(cmp ## type ## _ ## fmt ## _nge, cpu_env, a, b, cc);  break;\
-    case 14: gen_helper_3i(cmp ## type ## _ ## fmt ## _le, cpu_env, a, b, cc);   break;\
-    case 15: gen_helper_3i(cmp ## type ## _ ## fmt ## _ngt, cpu_env, a, b, cc);  break;\
-    default: abort();                                                         \
-    }                                                                         \
-}
-
-FOP_CONDS(, d, 64)
-FOP_CONDS(abs, d, 64)
-FOP_CONDS(, s, 32)
-FOP_CONDS(abs, s, 32)
-FOP_CONDS(, ps, 64)
-FOP_CONDS(abs, ps, 64)
-#undef FOP_CONDS
-
 /* Tests */
-#define OP_COND(name, cond)                                         \
-static inline void glue(gen_op_, name) (TCGv ret, TCGv t0, TCGv t1) \
-{                                                                   \
-    int l1 = gen_new_label();                                       \
-    int l2 = gen_new_label();                                       \
-                                                                    \
-    tcg_gen_brcond_tl(cond, t0, t1, l1);                            \
-    tcg_gen_movi_tl(ret, 0);                                        \
-    tcg_gen_br(l2);                                                 \
-    gen_set_label(l1);                                              \
-    tcg_gen_movi_tl(ret, 1);                                        \
-    gen_set_label(l2);                                              \
-}
-OP_COND(eq, TCG_COND_EQ);
-OP_COND(ne, TCG_COND_NE);
-OP_COND(ge, TCG_COND_GE);
-OP_COND(geu, TCG_COND_GEU);
-OP_COND(lt, TCG_COND_LT);
-OP_COND(ltu, TCG_COND_LTU);
-#undef OP_COND
-
-#define OP_CONDI(name, cond)                                                 \
-static inline void glue(gen_op_, name) (TCGv ret, TCGv t0, target_ulong val) \
-{                                                                            \
-    int l1 = gen_new_label();                                                \
-    int l2 = gen_new_label();                                                \
-                                                                             \
-    tcg_gen_brcondi_tl(cond, t0, val, l1);                                   \
-    tcg_gen_movi_tl(ret, 0);                                                 \
-    tcg_gen_br(l2);                                                          \
-    gen_set_label(l1);                                                       \
-    tcg_gen_movi_tl(ret, 1);                                                 \
-    gen_set_label(l2);                                                       \
-}
-OP_CONDI(lti, TCG_COND_LT);
-OP_CONDI(ltiu, TCG_COND_LTU);
-#undef OP_CONDI
-
-#define OP_CONDZ(name, cond)                                  \
-static inline void glue(gen_op_, name) (TCGv ret, TCGv t0)    \
-{                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t0, 0, l1);                      \
-    tcg_gen_movi_tl(ret, 0);                                  \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(ret, 1);                                  \
-    gen_set_label(l2);                                        \
-}
-OP_CONDZ(gez, TCG_COND_GE);
-OP_CONDZ(gtz, TCG_COND_GT);
-OP_CONDZ(lez, TCG_COND_LE);
-OP_CONDZ(ltz, TCG_COND_LT);
-#undef OP_CONDZ
-
 static inline void gen_save_pc(target_ulong pc)
 {
     tcg_gen_movi_tl(cpu_PC, pc);
@@ -1409,7 +1320,7 @@
     if (ctx->hflags != ctx->saved_hflags) {
         tcg_gen_movi_i32(hflags, ctx->hflags);
         ctx->saved_hflags = ctx->hflags;
-        switch (ctx->hflags & MIPS_HFLAG_BMASK) {
+        switch (ctx->hflags & MIPS_HFLAG_BMASK_BASE) {
         case MIPS_HFLAG_BR:
             break;
         case MIPS_HFLAG_BC:
@@ -1424,7 +1335,7 @@
 static inline void restore_cpu_state (CPUMIPSState *env, DisasContext *ctx)
 {
     ctx->saved_hflags = ctx->hflags;
-    switch (ctx->hflags & MIPS_HFLAG_BMASK) {
+    switch (ctx->hflags & MIPS_HFLAG_BMASK_BASE) {
     case MIPS_HFLAG_BR:
         break;
     case MIPS_HFLAG_BC:
@@ -1517,12 +1428,39 @@
         generate_exception(ctx, EXCP_RI);
 }
 
+/* Verify that the processor is running with DSP instructions enabled.
+   This is enabled by CP0 Status register MX(24) bit.
+ */
+
+static inline void check_dsp(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP))) {
+        if (ctx->insn_flags & ASE_DSP) {
+            generate_exception(ctx, EXCP_DSPDIS);
+        } else {
+            generate_exception(ctx, EXCP_RI);
+        }
+    }
+}
+
+static inline void check_dspr2(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSPR2))) {
+        if (ctx->insn_flags & ASE_DSP) {
+            generate_exception(ctx, EXCP_DSPDIS);
+        } else {
+            generate_exception(ctx, EXCP_RI);
+        }
+    }
+}
+
 /* This code generates a "reserved instruction" exception if the
    CPU does not support the instruction set corresponding to flags. */
-static inline void check_insn(CPUMIPSState *env, DisasContext *ctx, int flags)
+static inline void check_insn(DisasContext *ctx, int flags)
 {
-    if (unlikely(!(env->insn_flags & flags)))
+    if (unlikely(!(ctx->insn_flags & flags))) {
         generate_exception(ctx, EXCP_RI);
+    }
 }
 
 /* This code generates a "reserved instruction" exception if 64-bit
@@ -1563,9 +1501,73 @@
 #endif
 #undef OP_ST
 
+/* Define small wrappers for gen_load_fpr* so that we have a uniform
+   calling interface for 32 and 64-bit FPRs.  No sense in changing
+   all callers for gen_load_fpr32 when we need the CTX parameter for
+   this one use.  */
+#define gen_ldcmp_fpr32(ctx, x, y) gen_load_fpr32(x, y)
+#define gen_ldcmp_fpr64(ctx, x, y) gen_load_fpr64(ctx, x, y)
+#define FOP_CONDS(type, abs, fmt, ifmt, bits)                                 \
+static inline void gen_cmp ## type ## _ ## fmt(DisasContext *ctx, int n,      \
+                                               int ft, int fs, int cc)        \
+{                                                                             \
+    TCGv_i##bits fp0 = tcg_temp_new_i##bits ();                               \
+    TCGv_i##bits fp1 = tcg_temp_new_i##bits ();                               \
+    switch (ifmt) {                                                           \
+    case FMT_PS:                                                              \
+        check_cp1_64bitmode(ctx);                                             \
+        break;                                                                \
+    case FMT_D:                                                               \
+        if (abs) {                                                            \
+            check_cop1x(ctx);                                                 \
+        }                                                                     \
+        check_cp1_registers(ctx, fs | ft);                                    \
+        break;                                                                \
+    case FMT_S:                                                               \
+        if (abs) {                                                            \
+            check_cop1x(ctx);                                                 \
+        }                                                                     \
+        break;                                                                \
+    }                                                                         \
+    gen_ldcmp_fpr##bits (ctx, fp0, fs);                                       \
+    gen_ldcmp_fpr##bits (ctx, fp1, ft);                                       \
+    switch (n) {                                                              \
+    case  0: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _f, fp0, fp1, cc);    break;\
+    case  1: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _un, fp0, fp1, cc);   break;\
+    case  2: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _eq, fp0, fp1, cc);   break;\
+    case  3: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ueq, fp0, fp1, cc);  break;\
+    case  4: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _olt, fp0, fp1, cc);  break;\
+    case  5: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ult, fp0, fp1, cc);  break;\
+    case  6: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ole, fp0, fp1, cc);  break;\
+    case  7: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ule, fp0, fp1, cc);  break;\
+    case  8: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _sf, fp0, fp1, cc);   break;\
+    case  9: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngle, fp0, fp1, cc); break;\
+    case 10: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _seq, fp0, fp1, cc);  break;\
+    case 11: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngl, fp0, fp1, cc);  break;\
+    case 12: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _lt, fp0, fp1, cc);   break;\
+    case 13: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _nge, fp0, fp1, cc);  break;\
+    case 14: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _le, fp0, fp1, cc);   break;\
+    case 15: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngt, fp0, fp1, cc);  break;\
+    default: abort();                                                         \
+    }                                                                         \
+    tcg_temp_free_i##bits (fp0);                                              \
+    tcg_temp_free_i##bits (fp1);                                              \
+}
+
+FOP_CONDS(, 0, d, FMT_D, 64)
+FOP_CONDS(abs, 1, d, FMT_D, 64)
+FOP_CONDS(, 0, s, FMT_S, 32)
+FOP_CONDS(abs, 1, s, FMT_S, 32)
+FOP_CONDS(, 0, ps, FMT_PS, 64)
+FOP_CONDS(abs, 1, ps, FMT_PS, 64)
+#undef FOP_CONDS
+#undef gen_ldcmp_fpr32
+#undef gen_ldcmp_fpr64
+
+/* load/store instructions. */
 #ifdef CONFIG_USER_ONLY
 #define OP_LD_ATOMIC(insn,fname)                                           \
-static inline void op_ldst_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)  \
+static inline void op_ld_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)    \
 {                                                                          \
     TCGv t0 = tcg_temp_new();                                              \
     tcg_gen_mov_tl(t0, arg1);                                              \
@@ -1576,9 +1578,9 @@
 }
 #else
 #define OP_LD_ATOMIC(insn,fname)                                           \
-static inline void op_ldst_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)  \
+static inline void op_ld_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)    \
 {                                                                          \
-    gen_helper_3i(insn, ret, cpu_env, arg1, ctx->mem_idx);                 \
+    gen_helper_1e1i(insn, ret, arg1, ctx->mem_idx);                        \
 }
 #endif
 OP_LD_ATOMIC(ll,ld32s);
@@ -1589,7 +1591,7 @@
 
 #ifdef CONFIG_USER_ONLY
 #define OP_ST_ATOMIC(insn,fname,ldname,almask)                               \
-static inline void op_ldst_##insn(TCGv arg1, TCGv arg2, int rt, DisasContext *ctx) \
+static inline void op_st_##insn(TCGv arg1, TCGv arg2, int rt, DisasContext *ctx) \
 {                                                                            \
     TCGv t0 = tcg_temp_new();                                                \
     int l1 = gen_new_label();                                                \
@@ -1605,7 +1607,7 @@
     tcg_gen_movi_tl(t0, rt | ((almask << 3) & 0x20));                        \
     tcg_gen_st_tl(t0, cpu_env, offsetof(CPUMIPSState, llreg));                   \
     tcg_gen_st_tl(arg1, cpu_env, offsetof(CPUMIPSState, llnewval));              \
-    gen_helper_1i(raise_exception, cpu_env, EXCP_SC);                            \
+    gen_helper_0e0i(raise_exception, EXCP_SC);                               \
     gen_set_label(l2);                                                       \
     tcg_gen_movi_tl(t0, 0);                                                  \
     gen_store_gpr(t0, rt);                                                   \
@@ -1613,10 +1615,10 @@
 }
 #else
 #define OP_ST_ATOMIC(insn,fname,ldname,almask)                               \
-static inline void op_ldst_##insn(TCGv arg1, TCGv arg2, int rt, DisasContext *ctx) \
+static inline void op_st_##insn(TCGv arg1, TCGv arg2, int rt, DisasContext *ctx) \
 {                                                                            \
     TCGv t0 = tcg_temp_new();                                                \
-    gen_helper_4i(insn, t0, cpu_env, arg1, arg2, ctx->mem_idx);              \
+    gen_helper_1e2i(insn, t0, arg1, arg2, ctx->mem_idx);                     \
     gen_store_gpr(t0, rt);                                                   \
     tcg_temp_free(t0);                                                       \
 }
@@ -1627,60 +1629,78 @@
 #endif
 #undef OP_ST_ATOMIC
 
-/* Load and store */
-static void gen_ldst (DisasContext *ctx, uint32_t opc, int rt,
-                      int base, int16_t offset)
+static void gen_base_offset_addr (DisasContext *ctx, TCGv addr,
+                                  int base, int16_t offset)
 {
-    const char *opn = "ldst";
+    if (base == 0) {
+        tcg_gen_movi_tl(addr, offset);
+    } else if (offset == 0) {
+        gen_load_gpr(addr, base);
+    } else {
+        tcg_gen_movi_tl(addr, offset);
+        gen_op_addr_add(ctx, addr, cpu_gpr[base], addr);
+    }
+}
+
+static target_ulong pc_relative_pc (DisasContext *ctx)
+{
+    target_ulong pc = ctx->pc;
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+        int branch_bytes = ctx->hflags & MIPS_HFLAG_BDS16 ? 2 : 4;
+
+        pc -= branch_bytes;
+    }
+
+    pc &= ~(target_ulong)3;
+    return pc;
+}
+
+/* Load */
+static void gen_ld(DisasContext *ctx, uint32_t opc,
+                   int rt, int base, int16_t offset)
+{
+    const char *opn = "ld";
     TCGv t0, t1, t2;
 
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-
-    if (base == 0) {
-        tcg_gen_movi_tl(t0, offset);
-    } else if (offset == 0) {
-        gen_load_gpr(t0, base);
-    } else {
-        tcg_gen_movi_tl(t0, offset);
-        gen_op_addr_add(ctx, t0, cpu_gpr[base], t0);
+    if (rt == 0 && ctx->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F)) {
+        /* Loongson CPU uses a load to zero register for prefetch.
+           We emulate it as a NOP. On other CPU we must perform the
+           actual memory access. */
+        MIPS_DEBUG("NOP");
+        return;
     }
-    /* Don't do NOP if destination is zero: we must perform the actual
-       memory access. */
+
+    t0 = tcg_temp_new();
+    gen_base_offset_addr(ctx, t0, base, offset);
+
     switch (opc) {
 #if defined(TARGET_MIPS64)
     case OPC_LWU:
-        save_cpu_state(ctx, 0);
-        op_ldst_lwu(t0, t0, ctx);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUL);
         gen_store_gpr(t0, rt);
         opn = "lwu";
         break;
     case OPC_LD:
-        save_cpu_state(ctx, 0);
-        op_ldst_ld(t0, t0, ctx);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ);
         gen_store_gpr(t0, rt);
         opn = "ld";
         break;
     case OPC_LLD:
-        save_cpu_state(ctx, 0);
-        op_ldst_lld(t0, t0, ctx);
+        save_cpu_state(ctx, 1);
+        op_ld_lld(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lld";
         break;
-    case OPC_SD:
-        save_cpu_state(ctx, 0);
-        gen_load_gpr(t1, rt);
-        op_ldst_sd(t1, t0, ctx);
-        opn = "sd";
-        break;
     case OPC_LDL:
+        t1 = tcg_temp_new();
         tcg_gen_andi_tl(t1, t0, 7);
 #ifndef TARGET_WORDS_BIGENDIAN
         tcg_gen_xori_tl(t1, t1, 7);
 #endif
         tcg_gen_shli_tl(t1, t1, 3);
         tcg_gen_andi_tl(t0, t0, ~7);
-        tcg_gen_qemu_ld64(t0, t0, ctx->mem_idx);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ);
         tcg_gen_shl_tl(t0, t0, t1);
         tcg_gen_xori_tl(t1, t1, 63);
         t2 = tcg_const_tl(0x7fffffffffffffffull);
@@ -1689,23 +1709,19 @@
         tcg_gen_and_tl(t1, t1, t2);
         tcg_temp_free(t2);
         tcg_gen_or_tl(t0, t0, t1);
+        tcg_temp_free(t1);
         gen_store_gpr(t0, rt);
         opn = "ldl";
         break;
-    case OPC_SDL:
-        save_cpu_state(ctx, 1);
-        gen_load_gpr(t1, rt);
-        gen_helper_3i(sdl, cpu_env, t1, t0, ctx->mem_idx);
-        opn = "sdl";
-        break;
     case OPC_LDR:
+        t1 = tcg_temp_new();
         tcg_gen_andi_tl(t1, t0, 7);
 #ifdef TARGET_WORDS_BIGENDIAN
         tcg_gen_xori_tl(t1, t1, 7);
 #endif
         tcg_gen_shli_tl(t1, t1, 3);
         tcg_gen_andi_tl(t0, t0, ~7);
-        tcg_gen_qemu_ld64(t0, t0, ctx->mem_idx);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ);
         tcg_gen_shr_tl(t0, t0, t1);
         tcg_gen_xori_tl(t1, t1, 63);
         t2 = tcg_const_tl(0xfffffffffffffffeull);
@@ -1714,72 +1730,61 @@
         tcg_gen_and_tl(t1, t1, t2);
         tcg_temp_free(t2);
         tcg_gen_or_tl(t0, t0, t1);
+        tcg_temp_free(t1);
         gen_store_gpr(t0, rt);
         opn = "ldr";
         break;
-    case OPC_SDR:
-        save_cpu_state(ctx, 1);
-        gen_load_gpr(t1, rt);
-        gen_helper_3i(sdr, cpu_env, t1, t0, ctx->mem_idx);
-        opn = "sdr";
+    case OPC_LDPC:
+        t1 = tcg_const_tl(pc_relative_pc(ctx));
+        gen_op_addr_add(ctx, t0, t0, t1);
+        tcg_temp_free(t1);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ);
+        gen_store_gpr(t0, rt);
+        opn = "ldpc";
         break;
 #endif
+    case OPC_LWPC:
+        t1 = tcg_const_tl(pc_relative_pc(ctx));
+        gen_op_addr_add(ctx, t0, t0, t1);
+        tcg_temp_free(t1);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL);
+        gen_store_gpr(t0, rt);
+        opn = "lwpc";
+        break;
     case OPC_LW:
-        save_cpu_state(ctx, 0);
-        op_ldst_lw(t0, t0, ctx);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL);
         gen_store_gpr(t0, rt);
         opn = "lw";
         break;
-    case OPC_SW:
-        save_cpu_state(ctx, 0);
-        gen_load_gpr(t1, rt);
-        op_ldst_sw(t1, t0, ctx);
-        opn = "sw";
-        break;
     case OPC_LH:
-        save_cpu_state(ctx, 0);
-        op_ldst_lh(t0, t0, ctx);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW);
         gen_store_gpr(t0, rt);
         opn = "lh";
         break;
-    case OPC_SH:
-        save_cpu_state(ctx, 0);
-        gen_load_gpr(t1, rt);
-        op_ldst_sh(t1, t0, ctx);
-        opn = "sh";
-        break;
     case OPC_LHU:
-        save_cpu_state(ctx, 0);
-        op_ldst_lhu(t0, t0, ctx);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUW);
         gen_store_gpr(t0, rt);
         opn = "lhu";
         break;
     case OPC_LB:
-        save_cpu_state(ctx, 0);
-        op_ldst_lb(t0, t0, ctx);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
         gen_store_gpr(t0, rt);
         opn = "lb";
         break;
-    case OPC_SB:
-        save_cpu_state(ctx, 0);
-        gen_load_gpr(t1, rt);
-        op_ldst_sb(t1, t0, ctx);
-        opn = "sb";
-        break;
     case OPC_LBU:
-        save_cpu_state(ctx, 0);
-        op_ldst_lbu(t0, t0, ctx);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_UB);
         gen_store_gpr(t0, rt);
         opn = "lbu";
         break;
     case OPC_LWL:
+        t1 = tcg_temp_new();
         tcg_gen_andi_tl(t1, t0, 3);
 #ifndef TARGET_WORDS_BIGENDIAN
         tcg_gen_xori_tl(t1, t1, 3);
 #endif
         tcg_gen_shli_tl(t1, t1, 3);
         tcg_gen_andi_tl(t0, t0, ~3);
-        tcg_gen_qemu_ld32u(t0, t0, ctx->mem_idx);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUL);
         tcg_gen_shl_tl(t0, t0, t1);
         tcg_gen_xori_tl(t1, t1, 31);
         t2 = tcg_const_tl(0x7fffffffull);
@@ -1788,24 +1793,20 @@
         tcg_gen_and_tl(t1, t1, t2);
         tcg_temp_free(t2);
         tcg_gen_or_tl(t0, t0, t1);
+        tcg_temp_free(t1);
         tcg_gen_ext32s_tl(t0, t0);
         gen_store_gpr(t0, rt);
         opn = "lwl";
         break;
-    case OPC_SWL:
-        save_cpu_state(ctx, 1);
-        gen_load_gpr(t1, rt);
-        gen_helper_3i(swl, cpu_env, t1, t0, ctx->mem_idx);
-        opn = "swl";
-        break;
     case OPC_LWR:
+        t1 = tcg_temp_new();
         tcg_gen_andi_tl(t1, t0, 3);
 #ifdef TARGET_WORDS_BIGENDIAN
         tcg_gen_xori_tl(t1, t1, 3);
 #endif
         tcg_gen_shli_tl(t1, t1, 3);
         tcg_gen_andi_tl(t0, t0, ~3);
-        tcg_gen_qemu_ld32u(t0, t0, ctx->mem_idx);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUL);
         tcg_gen_shr_tl(t0, t0, t1);
         tcg_gen_xori_tl(t1, t1, 31);
         t2 = tcg_const_tl(0xfffffffeull);
@@ -1814,18 +1815,14 @@
         tcg_gen_and_tl(t1, t1, t2);
         tcg_temp_free(t2);
         tcg_gen_or_tl(t0, t0, t1);
+        tcg_temp_free(t1);
+        tcg_gen_ext32s_tl(t0, t0);
         gen_store_gpr(t0, rt);
         opn = "lwr";
         break;
-    case OPC_SWR:
-        save_cpu_state(ctx, 1);
-        gen_load_gpr(t1, rt);
-        gen_helper_3i(swr, cpu_env, t1, t0, ctx->mem_idx);
-        opn = "swr";
-        break;
     case OPC_LL:
         save_cpu_state(ctx, 1);
-        op_ldst_ll(t0, t0, ctx);
+        op_ld_ll(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "ll";
         break;
@@ -1833,6 +1830,61 @@
     (void)opn; /* avoid a compiler warning */
     MIPS_DEBUG("%s %s, %d(%s)", opn, regnames[rt], offset, regnames[base]);
     tcg_temp_free(t0);
+}
+
+/* Store */
+static void gen_st (DisasContext *ctx, uint32_t opc, int rt,
+                    int base, int16_t offset)
+{
+    const char *opn = "st";
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    gen_base_offset_addr(ctx, t0, base, offset);
+    gen_load_gpr(t1, rt);
+    switch (opc) {
+#if defined(TARGET_MIPS64)
+    case OPC_SD:
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ);
+        opn = "sd";
+        break;
+    case OPC_SDL:
+        save_cpu_state(ctx, 1);
+        gen_helper_0e2i(sdl, t1, t0, ctx->mem_idx);
+        opn = "sdl";
+        break;
+    case OPC_SDR:
+        save_cpu_state(ctx, 1);
+        gen_helper_0e2i(sdr, t1, t0, ctx->mem_idx);
+        opn = "sdr";
+        break;
+#endif
+    case OPC_SW:
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+        opn = "sw";
+        break;
+    case OPC_SH:
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW);
+        opn = "sh";
+        break;
+    case OPC_SB:
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_8);
+        opn = "sb";
+        break;
+    case OPC_SWL:
+        save_cpu_state(ctx, 1);
+        gen_helper_0e2i(swl, t1, t0, ctx->mem_idx);
+        opn = "swl";
+        break;
+    case OPC_SWR:
+        save_cpu_state(ctx, 1);
+        gen_helper_0e2i(swr, t1, t0, ctx->mem_idx);
+        opn = "swr";
+        break;
+    }
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s %s, %d(%s)", opn, regnames[rt], offset, regnames[base]);
+    tcg_temp_free(t0);
     tcg_temp_free(t1);
 }
 
@@ -1844,32 +1896,26 @@
     const char *opn = "st_cond";
     TCGv t0, t1;
 
+#ifdef CONFIG_USER_ONLY
     t0 = tcg_temp_local_new();
-
-    if (base == 0) {
-        tcg_gen_movi_tl(t0, offset);
-    } else if (offset == 0) {
-        gen_load_gpr(t0, base);
-    } else {
-        tcg_gen_movi_tl(t0, offset);
-        gen_op_addr_add(ctx, t0, cpu_gpr[base], t0);
-    }
-    /* Don't do NOP if destination is zero: we must perform the actual
-       memory access. */
-
     t1 = tcg_temp_local_new();
+#else
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+#endif
+    gen_base_offset_addr(ctx, t0, base, offset);
     gen_load_gpr(t1, rt);
     switch (opc) {
 #if defined(TARGET_MIPS64)
     case OPC_SCD:
-        save_cpu_state(ctx, 0);
-        op_ldst_scd(t1, t0, rt, ctx);
+        save_cpu_state(ctx, 1);
+        op_st_scd(t1, t0, rt, ctx);
         opn = "scd";
         break;
 #endif
     case OPC_SC:
         save_cpu_state(ctx, 1);
-        op_ldst_sc(t1, t0, rt, ctx);
+        op_st_sc(t1, t0, rt, ctx);
         opn = "sc";
         break;
     }
@@ -1886,23 +1932,14 @@
     const char *opn = "flt_ldst";
     TCGv t0 = tcg_temp_new();
 
-    if (base == 0) {
-        tcg_gen_movi_tl(t0, offset);
-    } else if (offset == 0) {
-        gen_load_gpr(t0, base);
-    } else {
-        tcg_gen_movi_tl(t0, offset);
-        gen_op_addr_add(ctx, t0, cpu_gpr[base], t0);
-    }
+    gen_base_offset_addr(ctx, t0, base, offset);
     /* Don't do NOP if destination is zero: we must perform the actual
        memory access. */
     switch (opc) {
     case OPC_LWC1:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            tcg_gen_qemu_ld32s(t0, t0, ctx->mem_idx);
-            tcg_gen_trunc_tl_i32(fp0, t0);
+            tcg_gen_qemu_ld_i32(fp0, t0, ctx->mem_idx, MO_TESL);
             gen_store_fpr32(fp0, ft);
             tcg_temp_free_i32(fp0);
         }
@@ -1911,12 +1948,8 @@
     case OPC_SWC1:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv t1 = tcg_temp_new();
-
             gen_load_fpr32(fp0, ft);
-            tcg_gen_extu_i32_tl(t1, fp0);
-            tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
-            tcg_temp_free(t1);
+            tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL);
             tcg_temp_free_i32(fp0);
         }
         opn = "swc1";
@@ -1924,8 +1957,7 @@
     case OPC_LDC1:
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            tcg_gen_qemu_ld64(fp0, t0, ctx->mem_idx);
+            tcg_gen_qemu_ld_i64(fp0, t0, ctx->mem_idx, MO_TEQ);
             gen_store_fpr64(ctx, fp0, ft);
             tcg_temp_free_i64(fp0);
         }
@@ -1934,9 +1966,8 @@
     case OPC_SDC1:
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
-
             gen_load_fpr64(ctx, fp0, ft);
-            tcg_gen_qemu_st64(fp0, t0, ctx->mem_idx);
+            tcg_gen_qemu_st_i64(fp0, t0, ctx->mem_idx, MO_TEQ);
             tcg_temp_free_i64(fp0);
         }
         opn = "sdc1";
@@ -1952,9 +1983,20 @@
     tcg_temp_free(t0);
 }
 
+static void gen_cop1_ldst(CPUMIPSState *env, DisasContext *ctx,
+                          uint32_t op, int rt, int rs, int16_t imm)
+{
+    if (env->CP0_Config1 & (1 << CP0C1_FP)) {
+        check_cp1_enabled(ctx);
+        gen_flt_ldst(ctx, op, rt, rs, imm);
+    } else {
+        generate_exception_err(ctx, EXCP_CpU, 1);
+    }
+}
+
 /* Arithmetic with immediate operand */
-static void gen_arith_imm (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
-                           int rt, int rs, int16_t imm)
+static void gen_arith_imm(DisasContext *ctx, uint32_t opc,
+                          int rt, int rs, int16_t imm)
 {
     target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */
     const char *opn = "imm arith";
@@ -2041,7 +2083,8 @@
 }
 
 /* Logic with immediate operand */
-static void gen_logic_imm (CPUMIPSState *env, uint32_t opc, int rt, int rs, int16_t imm)
+static void gen_logic_imm(DisasContext *ctx, uint32_t opc,
+                          int rt, int rs, int16_t imm)
 {
     target_ulong uimm;
 
@@ -2088,7 +2131,8 @@
 }
 
 /* Set on less than with immediate operand */
-static void gen_slt_imm (CPUMIPSState *env, uint32_t opc, int rt, int rs, int16_t imm)
+static void gen_slt_imm(DisasContext *ctx, uint32_t opc,
+                        int rt, int rs, int16_t imm)
 {
     target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */
     const char *opn = "imm arith";
@@ -2103,11 +2147,11 @@
     gen_load_gpr(t0, rs);
     switch (opc) {
     case OPC_SLTI:
-        gen_op_lti(cpu_gpr[rt], t0, uimm);
+        tcg_gen_setcondi_tl(TCG_COND_LT, cpu_gpr[rt], t0, uimm);
         opn = "slti";
         break;
     case OPC_SLTIU:
-        gen_op_ltiu(cpu_gpr[rt], t0, uimm);
+        tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_gpr[rt], t0, uimm);
         opn = "sltiu";
         break;
     }
@@ -2117,7 +2161,7 @@
 }
 
 /* Shifts with immediate operand */
-static void gen_shift_imm(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+static void gen_shift_imm(DisasContext *ctx, uint32_t opc,
                           int rt, int rs, int16_t imm)
 {
     target_ulong uimm = ((uint16_t)imm) & 0x1f;
@@ -2139,49 +2183,28 @@
         opn = "sll";
         break;
     case OPC_SRA:
-        tcg_gen_ext32s_tl(t0, t0);
         tcg_gen_sari_tl(cpu_gpr[rt], t0, uimm);
         opn = "sra";
         break;
     case OPC_SRL:
-        switch ((ctx->opcode >> 21) & 0x1f) {
-        case 0:
-            if (uimm != 0) {
-                tcg_gen_ext32u_tl(t0, t0);
-                tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
-            } else {
-                tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
-            }
-            opn = "srl";
-            break;
-        case 1:
-            /* rotr is decoded as srl on non-R2 CPUs */
-            if (env->insn_flags & ISA_MIPS32R2) {
-                if (uimm != 0) {
-                    TCGv_i32 t1 = tcg_temp_new_i32();
+        if (uimm != 0) {
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
+        } else {
+            tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
+        }
+        opn = "srl";
+        break;
+    case OPC_ROTR:
+        if (uimm != 0) {
+            TCGv_i32 t1 = tcg_temp_new_i32();
 
-                    tcg_gen_trunc_tl_i32(t1, t0);
-                    tcg_gen_rotri_i32(t1, t1, uimm);
-                    tcg_gen_ext_i32_tl(cpu_gpr[rt], t1);
-                    tcg_temp_free_i32(t1);
-                } else {
-                    tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
-                }
-                opn = "rotr";
-            } else {
-                if (uimm != 0) {
-                    tcg_gen_ext32u_tl(t0, t0);
-                    tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
-                } else {
-                    tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
-                }
-                opn = "srl";
-            }
-            break;
-        default:
-            MIPS_INVAL("invalid srl flag");
-            generate_exception(ctx, EXCP_RI);
-            break;
+            tcg_gen_trunc_tl_i32(t1, t0);
+            tcg_gen_rotri_i32(t1, t1, uimm);
+            tcg_gen_ext_i32_tl(cpu_gpr[rt], t1);
+            tcg_temp_free_i32(t1);
+        } else {
+            tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
         }
         opn = "rotr";
         break;
@@ -2195,29 +2218,14 @@
         opn = "dsra";
         break;
     case OPC_DSRL:
-        switch ((ctx->opcode >> 21) & 0x1f) {
-        case 0:
-            tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
-            opn = "dsrl";
-            break;
-        case 1:
-            /* drotr is decoded as dsrl on non-R2 CPUs */
-            if (env->insn_flags & ISA_MIPS32R2) {
-                if (uimm != 0) {
-                    tcg_gen_rotri_tl(cpu_gpr[rt], t0, uimm);
-                } else {
-                    tcg_gen_mov_tl(cpu_gpr[rt], t0);
-                }
-                opn = "drotr";
-            } else {
-                tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
-                opn = "dsrl";
-            }
-            break;
-        default:
-            MIPS_INVAL("invalid dsrl flag");
-            generate_exception(ctx, EXCP_RI);
-            break;
+        tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
+        opn = "dsrl";
+        break;
+    case OPC_DROTR:
+        if (uimm != 0) {
+            tcg_gen_rotri_tl(cpu_gpr[rt], t0, uimm);
+        } else {
+            tcg_gen_mov_tl(cpu_gpr[rt], t0);
         }
         opn = "drotr";
         break;
@@ -2230,26 +2238,12 @@
         opn = "dsra32";
         break;
     case OPC_DSRL32:
-        switch ((ctx->opcode >> 21) & 0x1f) {
-        case 0:
-            tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm + 32);
-            opn = "dsrl32";
-            break;
-        case 1:
-            /* drotr32 is decoded as dsrl32 on non-R2 CPUs */
-            if (env->insn_flags & ISA_MIPS32R2) {
-                tcg_gen_rotri_tl(cpu_gpr[rt], t0, uimm + 32);
-                opn = "drotr32";
-            } else {
-                tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm + 32);
-                opn = "dsrl32";
-            }
-            break;
-        default:
-            MIPS_INVAL("invalid dsrl32 flag");
-            generate_exception(ctx, EXCP_RI);
-            break;
-        }
+        tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm + 32);
+        opn = "dsrl32";
+        break;
+    case OPC_DROTR32:
+        tcg_gen_rotri_tl(cpu_gpr[rt], t0, uimm + 32);
+        opn = "drotr32";
         break;
 #endif
     }
@@ -2259,8 +2253,8 @@
 }
 
 /* Arithmetic */
-static void gen_arith (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
-                       int rd, int rs, int rt)
+static void gen_arith(DisasContext *ctx, uint32_t opc,
+                      int rd, int rs, int rt)
 {
     const char *opn = "arith";
 
@@ -2285,9 +2279,8 @@
             tcg_gen_add_tl(t0, t1, t2);
             tcg_gen_ext32s_tl(t0, t0);
             tcg_gen_xor_tl(t1, t1, t2);
-            tcg_gen_not_tl(t1, t1);
             tcg_gen_xor_tl(t2, t0, t2);
-            tcg_gen_and_tl(t1, t1, t2);
+            tcg_gen_andc_tl(t1, t2, t1);
             tcg_temp_free(t2);
             tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
             tcg_temp_free(t1);
@@ -2363,9 +2356,8 @@
             gen_load_gpr(t2, rt);
             tcg_gen_add_tl(t0, t1, t2);
             tcg_gen_xor_tl(t1, t1, t2);
-            tcg_gen_not_tl(t1, t1);
             tcg_gen_xor_tl(t2, t0, t2);
-            tcg_gen_and_tl(t1, t1, t2);
+            tcg_gen_andc_tl(t1, t2, t1);
             tcg_temp_free(t2);
             tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
             tcg_temp_free(t1);
@@ -2441,45 +2433,44 @@
 }
 
 /* Conditional move */
-static void gen_cond_move (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt)
+static void gen_cond_move(DisasContext *ctx, uint32_t opc,
+                          int rd, int rs, int rt)
 {
     const char *opn = "cond move";
-    int l1;
+    TCGv t0, t1, t2;
 
     if (rd == 0) {
-        /* If no destination, treat it as a NOP.
-           For add & sub, we must generate the overflow exception when needed. */
+        /* If no destination, treat it as a NOP. */
         MIPS_DEBUG("NOP");
         return;
     }
 
-    l1 = gen_new_label();
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rt);
+    t1 = tcg_const_tl(0);
+    t2 = tcg_temp_new();
+    gen_load_gpr(t2, rs);
     switch (opc) {
     case OPC_MOVN:
-        if (likely(rt != 0))
-            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rt], 0, l1);
-        else
-            tcg_gen_br(l1);
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rd], t0, t1, t2, cpu_gpr[rd]);
         opn = "movn";
         break;
     case OPC_MOVZ:
-        if (likely(rt != 0))
-            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[rt], 0, l1);
+        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_gpr[rd], t0, t1, t2, cpu_gpr[rd]);
         opn = "movz";
         break;
     }
-    if (rs != 0)
-        tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
-    else
-        tcg_gen_movi_tl(cpu_gpr[rd], 0);
-    gen_set_label(l1);
+    tcg_temp_free(t2);
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
 
     (void)opn; /* avoid a compiler warning */
     MIPS_DEBUG("%s %s, %s, %s", opn, regnames[rd], regnames[rs], regnames[rt]);
 }
 
 /* Logic */
-static void gen_logic (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt)
+static void gen_logic(DisasContext *ctx, uint32_t opc,
+                      int rd, int rs, int rt)
 {
     const char *opn = "logic";
 
@@ -2540,7 +2531,8 @@
 }
 
 /* Set on lower than */
-static void gen_slt (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt)
+static void gen_slt(DisasContext *ctx, uint32_t opc,
+                    int rd, int rs, int rt)
 {
     const char *opn = "slt";
     TCGv t0, t1;
@@ -2557,11 +2549,11 @@
     gen_load_gpr(t1, rt);
     switch (opc) {
     case OPC_SLT:
-        gen_op_lt(cpu_gpr[rd], t0, t1);
+        tcg_gen_setcond_tl(TCG_COND_LT, cpu_gpr[rd], t0, t1);
         opn = "slt";
         break;
     case OPC_SLTU:
-        gen_op_ltu(cpu_gpr[rd], t0, t1);
+        tcg_gen_setcond_tl(TCG_COND_LTU, cpu_gpr[rd], t0, t1);
         opn = "sltu";
         break;
     }
@@ -2572,8 +2564,8 @@
 }
 
 /* Shifts */
-static void gen_shift (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
-                       int rd, int rs, int rt)
+static void gen_shift(DisasContext *ctx, uint32_t opc,
+                      int rd, int rs, int rt)
 {
     const char *opn = "shifts";
     TCGv t0, t1;
@@ -2597,46 +2589,30 @@
         opn = "sllv";
         break;
     case OPC_SRAV:
-        tcg_gen_ext32s_tl(t1, t1);
         tcg_gen_andi_tl(t0, t0, 0x1f);
         tcg_gen_sar_tl(cpu_gpr[rd], t1, t0);
         opn = "srav";
         break;
     case OPC_SRLV:
-        switch ((ctx->opcode >> 6) & 0x1f) {
-        case 0:
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_andi_tl(t0, t0, 0x1f);
-            tcg_gen_shr_tl(t0, t1, t0);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
-            opn = "srlv";
-            break;
-        case 1:
-            /* rotrv is decoded as srlv on non-R2 CPUs */
-            if (env->insn_flags & ISA_MIPS32R2) {
-                TCGv_i32 t2 = tcg_temp_new_i32();
-                TCGv_i32 t3 = tcg_temp_new_i32();
+        tcg_gen_ext32u_tl(t1, t1);
+        tcg_gen_andi_tl(t0, t0, 0x1f);
+        tcg_gen_shr_tl(t0, t1, t0);
+        tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
+        opn = "srlv";
+        break;
+    case OPC_ROTRV:
+        {
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
 
-                tcg_gen_trunc_tl_i32(t2, t0);
-                tcg_gen_trunc_tl_i32(t3, t1);
-                tcg_gen_andi_i32(t2, t2, 0x1f);
-                tcg_gen_rotr_i32(t2, t3, t2);
-                tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
-                tcg_temp_free_i32(t2);
-                tcg_temp_free_i32(t3);
-                opn = "rotrv";
-            } else {
-                tcg_gen_ext32u_tl(t1, t1);
-                tcg_gen_andi_tl(t0, t0, 0x1f);
-                tcg_gen_shr_tl(t0, t1, t0);
-                tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
-                opn = "srlv";
-            }
-            break;
-        default:
-            MIPS_INVAL("invalid srlv flag");
-            generate_exception(ctx, EXCP_RI);
-            break;
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_andi_i32(t2, t2, 0x1f);
+            tcg_gen_rotr_i32(t2, t3, t2);
+            tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
+            opn = "rotrv";
         }
         break;
 #if defined(TARGET_MIPS64)
@@ -2651,29 +2627,14 @@
         opn = "dsrav";
         break;
     case OPC_DSRLV:
-        switch ((ctx->opcode >> 6) & 0x1f) {
-        case 0:
-            tcg_gen_andi_tl(t0, t0, 0x3f);
-            tcg_gen_shr_tl(cpu_gpr[rd], t1, t0);
-            opn = "dsrlv";
-            break;
-        case 1:
-            /* drotrv is decoded as dsrlv on non-R2 CPUs */
-            if (env->insn_flags & ISA_MIPS32R2) {
-                tcg_gen_andi_tl(t0, t0, 0x3f);
-                tcg_gen_rotr_tl(cpu_gpr[rd], t1, t0);
-                opn = "drotrv";
-            } else {
-                tcg_gen_andi_tl(t0, t0, 0x3f);
-                tcg_gen_shr_tl(t0, t1, t0);
-                opn = "dsrlv";
-            }
-            break;
-        default:
-            MIPS_INVAL("invalid dsrlv flag");
-            generate_exception(ctx, EXCP_RI);
-            break;
-        }
+        tcg_gen_andi_tl(t0, t0, 0x3f);
+        tcg_gen_shr_tl(cpu_gpr[rd], t1, t0);
+        opn = "dsrlv";
+        break;
+    case OPC_DROTRV:
+        tcg_gen_andi_tl(t0, t0, 0x3f);
+        tcg_gen_rotr_tl(cpu_gpr[rd], t1, t0);
+        opn = "drotrv";
         break;
 #endif
     }
@@ -2684,7 +2645,7 @@
 }
 
 /* Arithmetic on HI/LO registers */
-static void gen_HILO (DisasContext *ctx, uint32_t opc, int reg)
+static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg)
 {
     const char *opn = "hilo";
 
@@ -2693,27 +2654,62 @@
         MIPS_DEBUG("NOP");
         return;
     }
+
+    if (acc != 0) {
+        check_dsp(ctx);
+    }
+
     switch (opc) {
     case OPC_MFHI:
-        tcg_gen_mov_tl(cpu_gpr[reg], cpu_HI[0]);
+#if defined(TARGET_MIPS64)
+        if (acc != 0) {
+            tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_HI[acc]);
+        } else
+#endif
+        {
+            tcg_gen_mov_tl(cpu_gpr[reg], cpu_HI[acc]);
+        }
         opn = "mfhi";
         break;
     case OPC_MFLO:
-        tcg_gen_mov_tl(cpu_gpr[reg], cpu_LO[0]);
+#if defined(TARGET_MIPS64)
+        if (acc != 0) {
+            tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_LO[acc]);
+        } else
+#endif
+        {
+            tcg_gen_mov_tl(cpu_gpr[reg], cpu_LO[acc]);
+        }
         opn = "mflo";
         break;
     case OPC_MTHI:
-        if (reg != 0)
-            tcg_gen_mov_tl(cpu_HI[0], cpu_gpr[reg]);
-        else
-            tcg_gen_movi_tl(cpu_HI[0], 0);
+        if (reg != 0) {
+#if defined(TARGET_MIPS64)
+            if (acc != 0) {
+                tcg_gen_ext32s_tl(cpu_HI[acc], cpu_gpr[reg]);
+            } else
+#endif
+            {
+                tcg_gen_mov_tl(cpu_HI[acc], cpu_gpr[reg]);
+            }
+        } else {
+            tcg_gen_movi_tl(cpu_HI[acc], 0);
+        }
         opn = "mthi";
         break;
     case OPC_MTLO:
-        if (reg != 0)
-            tcg_gen_mov_tl(cpu_LO[0], cpu_gpr[reg]);
-        else
-            tcg_gen_movi_tl(cpu_LO[0], 0);
+        if (reg != 0) {
+#if defined(TARGET_MIPS64)
+            if (acc != 0) {
+                tcg_gen_ext32s_tl(cpu_LO[acc], cpu_gpr[reg]);
+            } else
+#endif
+            {
+                tcg_gen_mov_tl(cpu_LO[acc], cpu_gpr[reg]);
+            }
+        } else {
+            tcg_gen_movi_tl(cpu_LO[acc], 0);
+        }
         opn = "mtlo";
         break;
     }
@@ -2721,143 +2717,126 @@
     MIPS_DEBUG("%s %s", opn, regnames[reg]);
 }
 
-static void gen_muldiv (DisasContext *ctx, uint32_t opc,
-                        int rs, int rt)
+static void gen_muldiv(DisasContext *ctx, uint32_t opc,
+                       int acc, int rs, int rt)
 {
     const char *opn = "mul/div";
     TCGv t0, t1;
 
-    switch (opc) {
-    case OPC_DIV:
-    case OPC_DIVU:
-#if defined(TARGET_MIPS64)
-    case OPC_DDIV:
-    case OPC_DDIVU:
-#endif
-        t0 = tcg_temp_local_new();
-        t1 = tcg_temp_local_new();
-        break;
-    default:
-        t0 = tcg_temp_new();
-        t1 = tcg_temp_new();
-        break;
-    }
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
 
     gen_load_gpr(t0, rs);
     gen_load_gpr(t1, rt);
+
+    if (acc != 0) {
+        check_dsp(ctx);
+    }
+
     switch (opc) {
     case OPC_DIV:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
+            TCGv t2 = tcg_temp_new();
+            TCGv t3 = tcg_temp_new();
             tcg_gen_ext32s_tl(t0, t0);
             tcg_gen_ext32s_tl(t1, t1);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
-
-            tcg_gen_mov_tl(cpu_LO[0], t0);
-            tcg_gen_movi_tl(cpu_HI[0], 0);
-            tcg_gen_br(l1);
-            gen_set_label(l2);
-            tcg_gen_div_tl(cpu_LO[0], t0, t1);
-            tcg_gen_rem_tl(cpu_HI[0], t0, t1);
-            tcg_gen_ext32s_tl(cpu_LO[0], cpu_LO[0]);
-            tcg_gen_ext32s_tl(cpu_HI[0], cpu_HI[0]);
-            gen_set_label(l1);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
+            tcg_gen_and_tl(t2, t2, t3);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+            tcg_gen_or_tl(t2, t2, t3);
+            tcg_gen_movi_tl(t3, 0);
+            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+            tcg_gen_div_tl(cpu_LO[acc], t0, t1);
+            tcg_gen_rem_tl(cpu_HI[acc], t0, t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], cpu_LO[acc]);
+            tcg_gen_ext32s_tl(cpu_HI[acc], cpu_HI[acc]);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
         }
         opn = "div";
         break;
     case OPC_DIVU:
         {
-            int l1 = gen_new_label();
-
+            TCGv t2 = tcg_const_tl(0);
+            TCGv t3 = tcg_const_tl(1);
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            tcg_gen_divu_tl(cpu_LO[0], t0, t1);
-            tcg_gen_remu_tl(cpu_HI[0], t0, t1);
-            tcg_gen_ext32s_tl(cpu_LO[0], cpu_LO[0]);
-            tcg_gen_ext32s_tl(cpu_HI[0], cpu_HI[0]);
-            gen_set_label(l1);
+            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+            tcg_gen_divu_tl(cpu_LO[acc], t0, t1);
+            tcg_gen_remu_tl(cpu_HI[acc], t0, t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], cpu_LO[acc]);
+            tcg_gen_ext32s_tl(cpu_HI[acc], cpu_HI[acc]);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
         }
         opn = "divu";
         break;
     case OPC_MULT:
         {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGv_i64 t3 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(t2, t0);
-            tcg_gen_ext_tl_i64(t3, t1);
-            tcg_gen_mul_i64(t2, t2, t3);
-            tcg_temp_free_i64(t3);
-            tcg_gen_trunc_i64_tl(t0, t2);
-            tcg_gen_shri_i64(t2, t2, 32);
-            tcg_gen_trunc_i64_tl(t1, t2);
-            tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[0], t0);
-            tcg_gen_ext32s_tl(cpu_HI[0], t1);
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_muls2_i32(t2, t3, t2, t3);
+            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
+            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
         }
         opn = "mult";
         break;
     case OPC_MULTU:
         {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGv_i64 t3 = tcg_temp_new_i64();
-
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(t2, t0);
-            tcg_gen_extu_tl_i64(t3, t1);
-            tcg_gen_mul_i64(t2, t2, t3);
-            tcg_temp_free_i64(t3);
-            tcg_gen_trunc_i64_tl(t0, t2);
-            tcg_gen_shri_i64(t2, t2, 32);
-            tcg_gen_trunc_i64_tl(t1, t2);
-            tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[0], t0);
-            tcg_gen_ext32s_tl(cpu_HI[0], t1);
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_mulu2_i32(t2, t3, t2, t3);
+            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
+            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
         }
         opn = "multu";
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DDIV:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
-            tcg_gen_mov_tl(cpu_LO[0], t0);
-            tcg_gen_movi_tl(cpu_HI[0], 0);
-            tcg_gen_br(l1);
-            gen_set_label(l2);
-            tcg_gen_div_i64(cpu_LO[0], t0, t1);
-            tcg_gen_rem_i64(cpu_HI[0], t0, t1);
-            gen_set_label(l1);
+            TCGv t2 = tcg_temp_new();
+            TCGv t3 = tcg_temp_new();
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
+            tcg_gen_and_tl(t2, t2, t3);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+            tcg_gen_or_tl(t2, t2, t3);
+            tcg_gen_movi_tl(t3, 0);
+            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+            tcg_gen_div_tl(cpu_LO[acc], t0, t1);
+            tcg_gen_rem_tl(cpu_HI[acc], t0, t1);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
         }
         opn = "ddiv";
         break;
     case OPC_DDIVU:
         {
-            int l1 = gen_new_label();
-
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            tcg_gen_divu_i64(cpu_LO[0], t0, t1);
-            tcg_gen_remu_i64(cpu_HI[0], t0, t1);
-            gen_set_label(l1);
+            TCGv t2 = tcg_const_tl(0);
+            TCGv t3 = tcg_const_tl(1);
+            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+            tcg_gen_divu_i64(cpu_LO[acc], t0, t1);
+            tcg_gen_remu_i64(cpu_HI[acc], t0, t1);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
         }
         opn = "ddivu";
         break;
     case OPC_DMULT:
-        gen_helper_dmult(cpu_env, t0, t1);
+        tcg_gen_muls2_i64(cpu_LO[acc], cpu_HI[acc], t0, t1);
         opn = "dmult";
         break;
     case OPC_DMULTU:
-        gen_helper_dmultu(cpu_env, t0, t1);
+        tcg_gen_mulu2_i64(cpu_LO[acc], cpu_HI[acc], t0, t1);
         opn = "dmultu";
         break;
 #endif
@@ -2869,20 +2848,20 @@
             tcg_gen_ext_tl_i64(t2, t0);
             tcg_gen_ext_tl_i64(t3, t1);
             tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
             tcg_gen_add_i64(t2, t2, t3);
             tcg_temp_free_i64(t3);
             tcg_gen_trunc_i64_tl(t0, t2);
             tcg_gen_shri_i64(t2, t2, 32);
             tcg_gen_trunc_i64_tl(t1, t2);
             tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[0], t0);
-            tcg_gen_ext32s_tl(cpu_HI[0], t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], t0);
+            tcg_gen_ext32s_tl(cpu_HI[acc], t1);
         }
         opn = "madd";
         break;
     case OPC_MADDU:
-       {
+        {
             TCGv_i64 t2 = tcg_temp_new_i64();
             TCGv_i64 t3 = tcg_temp_new_i64();
 
@@ -2891,15 +2870,15 @@
             tcg_gen_extu_tl_i64(t2, t0);
             tcg_gen_extu_tl_i64(t3, t1);
             tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
             tcg_gen_add_i64(t2, t2, t3);
             tcg_temp_free_i64(t3);
             tcg_gen_trunc_i64_tl(t0, t2);
             tcg_gen_shri_i64(t2, t2, 32);
             tcg_gen_trunc_i64_tl(t1, t2);
             tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[0], t0);
-            tcg_gen_ext32s_tl(cpu_HI[0], t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], t0);
+            tcg_gen_ext32s_tl(cpu_HI[acc], t1);
         }
         opn = "maddu";
         break;
@@ -2911,15 +2890,15 @@
             tcg_gen_ext_tl_i64(t2, t0);
             tcg_gen_ext_tl_i64(t3, t1);
             tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
             tcg_gen_sub_i64(t2, t3, t2);
             tcg_temp_free_i64(t3);
             tcg_gen_trunc_i64_tl(t0, t2);
             tcg_gen_shri_i64(t2, t2, 32);
             tcg_gen_trunc_i64_tl(t1, t2);
             tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[0], t0);
-            tcg_gen_ext32s_tl(cpu_HI[0], t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], t0);
+            tcg_gen_ext32s_tl(cpu_HI[acc], t1);
         }
         opn = "msub";
         break;
@@ -2933,15 +2912,15 @@
             tcg_gen_extu_tl_i64(t2, t0);
             tcg_gen_extu_tl_i64(t3, t1);
             tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
             tcg_gen_sub_i64(t2, t3, t2);
             tcg_temp_free_i64(t3);
             tcg_gen_trunc_i64_tl(t0, t2);
             tcg_gen_shri_i64(t2, t2, 32);
             tcg_gen_trunc_i64_tl(t1, t2);
             tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[0], t0);
-            tcg_gen_ext32s_tl(cpu_HI[0], t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], t0);
+            tcg_gen_ext32s_tl(cpu_HI[acc], t1);
         }
         opn = "msubu";
         break;
@@ -3076,6 +3055,493 @@
     tcg_temp_free(t0);
 }
 
+/* Godson integer instructions */
+static void gen_loongson_integer(DisasContext *ctx, uint32_t opc,
+                                 int rd, int rs, int rt)
+{
+    const char *opn = "loongson";
+    TCGv t0, t1;
+
+    if (rd == 0) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    switch (opc) {
+    case OPC_MULT_G_2E:
+    case OPC_MULT_G_2F:
+    case OPC_MULTU_G_2E:
+    case OPC_MULTU_G_2F:
+#if defined(TARGET_MIPS64)
+    case OPC_DMULT_G_2E:
+    case OPC_DMULT_G_2F:
+    case OPC_DMULTU_G_2E:
+    case OPC_DMULTU_G_2F:
+#endif
+        t0 = tcg_temp_new();
+        t1 = tcg_temp_new();
+        break;
+    default:
+        t0 = tcg_temp_local_new();
+        t1 = tcg_temp_local_new();
+        break;
+    }
+
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+
+    switch (opc) {
+    case OPC_MULT_G_2E:
+    case OPC_MULT_G_2F:
+        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+        tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        opn = "mult.g";
+        break;
+    case OPC_MULTU_G_2E:
+    case OPC_MULTU_G_2F:
+        tcg_gen_ext32u_tl(t0, t0);
+        tcg_gen_ext32u_tl(t1, t1);
+        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+        tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        opn = "multu.g";
+        break;
+    case OPC_DIV_G_2E:
+    case OPC_DIV_G_2F:
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            int l3 = gen_new_label();
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ext32s_tl(t1, t1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l3);
+            gen_set_label(l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
+            tcg_gen_mov_tl(cpu_gpr[rd], t0);
+            tcg_gen_br(l3);
+            gen_set_label(l2);
+            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            gen_set_label(l3);
+        }
+        opn = "div.g";
+        break;
+    case OPC_DIVU_G_2E:
+    case OPC_DIVU_G_2F:
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            gen_set_label(l2);
+        }
+        opn = "divu.g";
+        break;
+    case OPC_MOD_G_2E:
+    case OPC_MOD_G_2F:
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            int l3 = gen_new_label();
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l3);
+            gen_set_label(l2);
+            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            gen_set_label(l3);
+        }
+        opn = "mod.g";
+        break;
+    case OPC_MODU_G_2E:
+    case OPC_MODU_G_2F:
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            gen_set_label(l2);
+        }
+        opn = "modu.g";
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DMULT_G_2E:
+    case OPC_DMULT_G_2F:
+        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+        opn = "dmult.g";
+        break;
+    case OPC_DMULTU_G_2E:
+    case OPC_DMULTU_G_2F:
+        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+        opn = "dmultu.g";
+        break;
+    case OPC_DDIV_G_2E:
+    case OPC_DDIV_G_2F:
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            int l3 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l3);
+            gen_set_label(l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
+            tcg_gen_mov_tl(cpu_gpr[rd], t0);
+            tcg_gen_br(l3);
+            gen_set_label(l2);
+            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
+            gen_set_label(l3);
+        }
+        opn = "ddiv.g";
+        break;
+    case OPC_DDIVU_G_2E:
+    case OPC_DDIVU_G_2F:
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
+            gen_set_label(l2);
+        }
+        opn = "ddivu.g";
+        break;
+    case OPC_DMOD_G_2E:
+    case OPC_DMOD_G_2F:
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            int l3 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l3);
+            gen_set_label(l2);
+            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
+            gen_set_label(l3);
+        }
+        opn = "dmod.g";
+        break;
+    case OPC_DMODU_G_2E:
+    case OPC_DMODU_G_2F:
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
+            gen_set_label(l2);
+        }
+        opn = "dmodu.g";
+        break;
+#endif
+    }
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s %s, %s", opn, regnames[rd], regnames[rs]);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+/* Loongson multimedia instructions */
+static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt)
+{
+    const char *opn = "loongson_cp2";
+    uint32_t opc, shift_max;
+    TCGv_i64 t0, t1;
+
+    opc = MASK_LMI(ctx->opcode);
+    switch (opc) {
+    case OPC_ADD_CP2:
+    case OPC_SUB_CP2:
+    case OPC_DADD_CP2:
+    case OPC_DSUB_CP2:
+        t0 = tcg_temp_local_new_i64();
+        t1 = tcg_temp_local_new_i64();
+        break;
+    default:
+        t0 = tcg_temp_new_i64();
+        t1 = tcg_temp_new_i64();
+        break;
+    }
+
+    gen_load_fpr64(ctx, t0, rs);
+    gen_load_fpr64(ctx, t1, rt);
+
+#define LMI_HELPER(UP, LO) \
+    case OPC_##UP: gen_helper_##LO(t0, t0, t1); opn = #LO; break
+#define LMI_HELPER_1(UP, LO) \
+    case OPC_##UP: gen_helper_##LO(t0, t0); opn = #LO; break
+#define LMI_DIRECT(UP, LO, OP) \
+    case OPC_##UP: tcg_gen_##OP##_i64(t0, t0, t1); opn = #LO; break
+
+    switch (opc) {
+    LMI_HELPER(PADDSH, paddsh);
+    LMI_HELPER(PADDUSH, paddush);
+    LMI_HELPER(PADDH, paddh);
+    LMI_HELPER(PADDW, paddw);
+    LMI_HELPER(PADDSB, paddsb);
+    LMI_HELPER(PADDUSB, paddusb);
+    LMI_HELPER(PADDB, paddb);
+
+    LMI_HELPER(PSUBSH, psubsh);
+    LMI_HELPER(PSUBUSH, psubush);
+    LMI_HELPER(PSUBH, psubh);
+    LMI_HELPER(PSUBW, psubw);
+    LMI_HELPER(PSUBSB, psubsb);
+    LMI_HELPER(PSUBUSB, psubusb);
+    LMI_HELPER(PSUBB, psubb);
+
+    LMI_HELPER(PSHUFH, pshufh);
+    LMI_HELPER(PACKSSWH, packsswh);
+    LMI_HELPER(PACKSSHB, packsshb);
+    LMI_HELPER(PACKUSHB, packushb);
+
+    LMI_HELPER(PUNPCKLHW, punpcklhw);
+    LMI_HELPER(PUNPCKHHW, punpckhhw);
+    LMI_HELPER(PUNPCKLBH, punpcklbh);
+    LMI_HELPER(PUNPCKHBH, punpckhbh);
+    LMI_HELPER(PUNPCKLWD, punpcklwd);
+    LMI_HELPER(PUNPCKHWD, punpckhwd);
+
+    LMI_HELPER(PAVGH, pavgh);
+    LMI_HELPER(PAVGB, pavgb);
+    LMI_HELPER(PMAXSH, pmaxsh);
+    LMI_HELPER(PMINSH, pminsh);
+    LMI_HELPER(PMAXUB, pmaxub);
+    LMI_HELPER(PMINUB, pminub);
+
+    LMI_HELPER(PCMPEQW, pcmpeqw);
+    LMI_HELPER(PCMPGTW, pcmpgtw);
+    LMI_HELPER(PCMPEQH, pcmpeqh);
+    LMI_HELPER(PCMPGTH, pcmpgth);
+    LMI_HELPER(PCMPEQB, pcmpeqb);
+    LMI_HELPER(PCMPGTB, pcmpgtb);
+
+    LMI_HELPER(PSLLW, psllw);
+    LMI_HELPER(PSLLH, psllh);
+    LMI_HELPER(PSRLW, psrlw);
+    LMI_HELPER(PSRLH, psrlh);
+    LMI_HELPER(PSRAW, psraw);
+    LMI_HELPER(PSRAH, psrah);
+
+    LMI_HELPER(PMULLH, pmullh);
+    LMI_HELPER(PMULHH, pmulhh);
+    LMI_HELPER(PMULHUH, pmulhuh);
+    LMI_HELPER(PMADDHW, pmaddhw);
+
+    LMI_HELPER(PASUBUB, pasubub);
+    LMI_HELPER_1(BIADD, biadd);
+    LMI_HELPER_1(PMOVMSKB, pmovmskb);
+
+    LMI_DIRECT(PADDD, paddd, add);
+    LMI_DIRECT(PSUBD, psubd, sub);
+    LMI_DIRECT(XOR_CP2, xor, xor);
+    LMI_DIRECT(NOR_CP2, nor, nor);
+    LMI_DIRECT(AND_CP2, and, and);
+    LMI_DIRECT(PANDN, pandn, andc);
+    LMI_DIRECT(OR, or, or);
+
+    case OPC_PINSRH_0:
+        tcg_gen_deposit_i64(t0, t0, t1, 0, 16);
+        opn = "pinsrh_0";
+        break;
+    case OPC_PINSRH_1:
+        tcg_gen_deposit_i64(t0, t0, t1, 16, 16);
+        opn = "pinsrh_1";
+        break;
+    case OPC_PINSRH_2:
+        tcg_gen_deposit_i64(t0, t0, t1, 32, 16);
+        opn = "pinsrh_2";
+        break;
+    case OPC_PINSRH_3:
+        tcg_gen_deposit_i64(t0, t0, t1, 48, 16);
+        opn = "pinsrh_3";
+        break;
+
+    case OPC_PEXTRH:
+        tcg_gen_andi_i64(t1, t1, 3);
+        tcg_gen_shli_i64(t1, t1, 4);
+        tcg_gen_shr_i64(t0, t0, t1);
+        tcg_gen_ext16u_i64(t0, t0);
+        opn = "pextrh";
+        break;
+
+    case OPC_ADDU_CP2:
+        tcg_gen_add_i64(t0, t0, t1);
+        tcg_gen_ext32s_i64(t0, t0);
+        opn = "addu";
+        break;
+    case OPC_SUBU_CP2:
+        tcg_gen_sub_i64(t0, t0, t1);
+        tcg_gen_ext32s_i64(t0, t0);
+        opn = "addu";
+        break;
+
+    case OPC_SLL_CP2:
+        opn = "sll";
+        shift_max = 32;
+        goto do_shift;
+    case OPC_SRL_CP2:
+        opn = "srl";
+        shift_max = 32;
+        goto do_shift;
+    case OPC_SRA_CP2:
+        opn = "sra";
+        shift_max = 32;
+        goto do_shift;
+    case OPC_DSLL_CP2:
+        opn = "dsll";
+        shift_max = 64;
+        goto do_shift;
+    case OPC_DSRL_CP2:
+        opn = "dsrl";
+        shift_max = 64;
+        goto do_shift;
+    case OPC_DSRA_CP2:
+        opn = "dsra";
+        shift_max = 64;
+        goto do_shift;
+    do_shift:
+        /* Make sure shift count isn't TCG undefined behaviour.  */
+        tcg_gen_andi_i64(t1, t1, shift_max - 1);
+
+        switch (opc) {
+        case OPC_SLL_CP2:
+        case OPC_DSLL_CP2:
+            tcg_gen_shl_i64(t0, t0, t1);
+            break;
+        case OPC_SRA_CP2:
+        case OPC_DSRA_CP2:
+            /* Since SRA is UndefinedResult without sign-extended inputs,
+               we can treat SRA and DSRA the same.  */
+            tcg_gen_sar_i64(t0, t0, t1);
+            break;
+        case OPC_SRL_CP2:
+            /* We want to shift in zeros for SRL; zero-extend first.  */
+            tcg_gen_ext32u_i64(t0, t0);
+            /* FALLTHRU */
+        case OPC_DSRL_CP2:
+            tcg_gen_shr_i64(t0, t0, t1);
+            break;
+        }
+
+        if (shift_max == 32) {
+            tcg_gen_ext32s_i64(t0, t0);
+        }
+
+        /* Shifts larger than MAX produce zero.  */
+        tcg_gen_setcondi_i64(TCG_COND_LTU, t1, t1, shift_max);
+        tcg_gen_neg_i64(t1, t1);
+        tcg_gen_and_i64(t0, t0, t1);
+        break;
+
+    case OPC_ADD_CP2:
+    case OPC_DADD_CP2:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            int lab = gen_new_label();
+
+            tcg_gen_mov_i64(t2, t0);
+            tcg_gen_add_i64(t0, t1, t2);
+            if (opc == OPC_ADD_CP2) {
+                tcg_gen_ext32s_i64(t0, t0);
+            }
+            tcg_gen_xor_i64(t1, t1, t2);
+            tcg_gen_xor_i64(t2, t2, t0);
+            tcg_gen_andc_i64(t1, t2, t1);
+            tcg_temp_free_i64(t2);
+            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(lab);
+
+            opn = (opc == OPC_ADD_CP2 ? "add" : "dadd");
+            break;
+        }
+
+    case OPC_SUB_CP2:
+    case OPC_DSUB_CP2:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            int lab = gen_new_label();
+
+            tcg_gen_mov_i64(t2, t0);
+            tcg_gen_sub_i64(t0, t1, t2);
+            if (opc == OPC_SUB_CP2) {
+                tcg_gen_ext32s_i64(t0, t0);
+            }
+            tcg_gen_xor_i64(t1, t1, t2);
+            tcg_gen_xor_i64(t2, t2, t0);
+            tcg_gen_and_i64(t1, t1, t2);
+            tcg_temp_free_i64(t2);
+            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(lab);
+
+            opn = (opc == OPC_SUB_CP2 ? "sub" : "dsub");
+            break;
+        }
+
+    case OPC_PMULUW:
+        tcg_gen_ext32u_i64(t0, t0);
+        tcg_gen_ext32u_i64(t1, t1);
+        tcg_gen_mul_i64(t0, t0, t1);
+        opn = "pmuluw";
+        break;
+
+    case OPC_SEQU_CP2:
+    case OPC_SEQ_CP2:
+    case OPC_SLTU_CP2:
+    case OPC_SLT_CP2:
+    case OPC_SLEU_CP2:
+    case OPC_SLE_CP2:
+        /* ??? Document is unclear: Set FCC[CC].  Does that mean the
+           FD field is the CC field?  */
+    default:
+        MIPS_INVAL(opn);
+        generate_exception(ctx, EXCP_RI);
+        return;
+    }
+
+#undef LMI_HELPER
+#undef LMI_DIRECT
+
+    gen_store_fpr64(ctx, t0, rd);
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s %s, %s, %s", opn,
+               fregnames[rd], fregnames[rs], fregnames[rt]);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}
+
 /* Traps */
 static void gen_trap (DisasContext *ctx, uint32_t opc,
                       int rs, int rt, int16_t imm)
@@ -3183,7 +3649,7 @@
         gen_save_pc(dest);
         if (ctx->singlestep_enabled) {
             save_cpu_state(ctx, 0);
-            gen_helper_1i(raise_exception, cpu_env, EXCP_DEBUG);
+            gen_helper_0e0i(raise_exception, EXCP_DEBUG);
         }
         tcg_gen_exit_tb(0);
     }
@@ -3191,6 +3657,7 @@
 
 /* Branches (before delay slot) */
 static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
+                                int insn_bytes,
                                 int rs, int rt, int32_t offset)
 {
     target_ulong btgt = -1;
@@ -3219,10 +3686,11 @@
             gen_load_gpr(t1, rt);
             bcond_compute = 1;
         }
-        btgt = ctx->pc + 4 + offset;
+        btgt = ctx->pc + insn_bytes + offset;
         break;
     case OPC_BGEZ:
     case OPC_BGEZAL:
+    case OPC_BGEZALS:
     case OPC_BGEZALL:
     case OPC_BGEZL:
     case OPC_BGTZ:
@@ -3231,6 +3699,7 @@
     case OPC_BLEZL:
     case OPC_BLTZ:
     case OPC_BLTZAL:
+    case OPC_BLTZALS:
     case OPC_BLTZALL:
     case OPC_BLTZL:
         /* Compare to zero */
@@ -3238,15 +3707,30 @@
             gen_load_gpr(t0, rs);
             bcond_compute = 1;
         }
-        btgt = ctx->pc + 4 + offset;
+        btgt = ctx->pc + insn_bytes + offset;
+        break;
+    case OPC_BPOSGE32:
+#if defined(TARGET_MIPS64)
+    case OPC_BPOSGE64:
+        tcg_gen_andi_tl(t0, cpu_dspctrl, 0x7F);
+#else
+        tcg_gen_andi_tl(t0, cpu_dspctrl, 0x3F);
+#endif
+        bcond_compute = 1;
+        btgt = ctx->pc + insn_bytes + offset;
         break;
     case OPC_J:
     case OPC_JAL:
+    case OPC_JALX:
+    case OPC_JALS:
+    case OPC_JALXS:
         /* Jump to immediate */
-        btgt = ((ctx->pc + 4) & (int32_t)0xF0000000) | (uint32_t)offset;
+        btgt = ((ctx->pc + insn_bytes) & (int32_t)0xF0000000) | (uint32_t)offset;
         break;
     case OPC_JR:
     case OPC_JALR:
+    case OPC_JALRC:
+    case OPC_JALRS:
         /* Jump to register */
         if (offset != 0 && offset != 16) {
             /* Hint = 0 is JR/JALR, hint 16 is JR.HB/JALR.HB, the
@@ -3275,8 +3759,12 @@
             ctx->hflags |= MIPS_HFLAG_B;
             MIPS_DEBUG("balways");
             break;
+        case OPC_BGEZALS:
         case OPC_BGEZAL:  /* 0 >= 0          */
         case OPC_BGEZALL: /* 0 >= 0 likely   */
+            ctx->hflags |= (opc == OPC_BGEZALS
+                            ? MIPS_HFLAG_BDS16
+                            : MIPS_HFLAG_BDS32);
             /* Always take and link */
             blink = 31;
             ctx->hflags |= MIPS_HFLAG_B;
@@ -3288,10 +3776,18 @@
             /* Treat as NOP. */
             MIPS_DEBUG("bnever (NOP)");
             goto out;
+        case OPC_BLTZALS:
         case OPC_BLTZAL:  /* 0 < 0           */
-            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 8);
+            ctx->hflags |= (opc == OPC_BLTZALS
+                            ? MIPS_HFLAG_BDS16
+                            : MIPS_HFLAG_BDS32);
+            /* Handle as an unconditional branch to get correct delay
+               slot checking.  */
+            blink = 31;
+            btgt = ctx->pc + (opc == OPC_BLTZALS ? 6 : 8);
+            ctx->hflags |= MIPS_HFLAG_B;
             MIPS_DEBUG("bnever and link");
-            goto out;
+            break;
         case OPC_BLTZALL: /* 0 < 0 likely */
             tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 8);
             /* Skip the instruction in the delay slot */
@@ -3309,18 +3805,33 @@
             ctx->hflags |= MIPS_HFLAG_B;
             MIPS_DEBUG("j " TARGET_FMT_lx, btgt);
             break;
+        case OPC_JALXS:
+        case OPC_JALX:
+            ctx->hflags |= MIPS_HFLAG_BX;
+            /* Fallthrough */
+        case OPC_JALS:
         case OPC_JAL:
             blink = 31;
             ctx->hflags |= MIPS_HFLAG_B;
+            ctx->hflags |= ((opc == OPC_JALS || opc == OPC_JALXS)
+                            ? MIPS_HFLAG_BDS16
+                            : MIPS_HFLAG_BDS32);
             MIPS_DEBUG("jal " TARGET_FMT_lx, btgt);
             break;
         case OPC_JR:
             ctx->hflags |= MIPS_HFLAG_BR;
+            if (insn_bytes == 4)
+                ctx->hflags |= MIPS_HFLAG_BDS32;
             MIPS_DEBUG("jr %s", regnames[rs]);
             break;
+        case OPC_JALRS:
         case OPC_JALR:
+        case OPC_JALRC:
             blink = rt;
             ctx->hflags |= MIPS_HFLAG_BR;
+            ctx->hflags |= (opc == OPC_JALRS
+                            ? MIPS_HFLAG_BDS16
+                            : MIPS_HFLAG_BDS32);
             MIPS_DEBUG("jalr %s, %s", regnames[rt], regnames[rs]);
             break;
         default:
@@ -3331,76 +3842,94 @@
     } else {
         switch (opc) {
         case OPC_BEQ:
-            gen_op_eq(bcond, t0, t1);
+            tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
             MIPS_DEBUG("beq %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto not_likely;
         case OPC_BEQL:
-            gen_op_eq(bcond, t0, t1);
+            tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
             MIPS_DEBUG("beql %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto likely;
         case OPC_BNE:
-            gen_op_ne(bcond, t0, t1);
+            tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
             MIPS_DEBUG("bne %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto not_likely;
         case OPC_BNEL:
-            gen_op_ne(bcond, t0, t1);
+            tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
             MIPS_DEBUG("bnel %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto likely;
         case OPC_BGEZ:
-            gen_op_gez(bcond, t0);
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
             MIPS_DEBUG("bgez %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto not_likely;
         case OPC_BGEZL:
-            gen_op_gez(bcond, t0);
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
             MIPS_DEBUG("bgezl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
+        case OPC_BGEZALS:
         case OPC_BGEZAL:
-            gen_op_gez(bcond, t0);
+            ctx->hflags |= (opc == OPC_BGEZALS
+                            ? MIPS_HFLAG_BDS16
+                            : MIPS_HFLAG_BDS32);
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
             MIPS_DEBUG("bgezal %s, " TARGET_FMT_lx, regnames[rs], btgt);
             blink = 31;
             goto not_likely;
         case OPC_BGEZALL:
-            gen_op_gez(bcond, t0);
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
             blink = 31;
             MIPS_DEBUG("bgezall %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BGTZ:
-            gen_op_gtz(bcond, t0);
+            tcg_gen_setcondi_tl(TCG_COND_GT, bcond, t0, 0);
             MIPS_DEBUG("bgtz %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto not_likely;
         case OPC_BGTZL:
-            gen_op_gtz(bcond, t0);
+            tcg_gen_setcondi_tl(TCG_COND_GT, bcond, t0, 0);
             MIPS_DEBUG("bgtzl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BLEZ:
-            gen_op_lez(bcond, t0);
+            tcg_gen_setcondi_tl(TCG_COND_LE, bcond, t0, 0);
             MIPS_DEBUG("blez %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto not_likely;
         case OPC_BLEZL:
-            gen_op_lez(bcond, t0);
+            tcg_gen_setcondi_tl(TCG_COND_LE, bcond, t0, 0);
             MIPS_DEBUG("blezl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BLTZ:
-            gen_op_ltz(bcond, t0);
+            tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
             MIPS_DEBUG("bltz %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto not_likely;
         case OPC_BLTZL:
-            gen_op_ltz(bcond, t0);
+            tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
             MIPS_DEBUG("bltzl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
+        case OPC_BPOSGE32:
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 32);
+            MIPS_DEBUG("bposge32 " TARGET_FMT_lx, btgt);
+            goto not_likely;
+#if defined(TARGET_MIPS64)
+        case OPC_BPOSGE64:
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 64);
+            MIPS_DEBUG("bposge64 " TARGET_FMT_lx, btgt);
+            goto not_likely;
+#endif
+        case OPC_BLTZALS:
         case OPC_BLTZAL:
-            gen_op_ltz(bcond, t0);
+            ctx->hflags |= (opc == OPC_BLTZALS
+                            ? MIPS_HFLAG_BDS16
+                            : MIPS_HFLAG_BDS32);
+            tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
             blink = 31;
             MIPS_DEBUG("bltzal %s, " TARGET_FMT_lx, regnames[rs], btgt);
         not_likely:
             ctx->hflags |= MIPS_HFLAG_BC;
             break;
         case OPC_BLTZALL:
-            gen_op_ltz(bcond, t0);
+            tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
             blink = 31;
             MIPS_DEBUG("bltzall %s, " TARGET_FMT_lx, regnames[rs], btgt);
         likely:
@@ -3417,10 +3946,18 @@
 
     ctx->btarget = btgt;
     if (blink > 0) {
-        tcg_gen_movi_tl(cpu_gpr[blink], ctx->pc + 8);
+        int post_delay = insn_bytes;
+        int lowbit = !!(ctx->hflags & MIPS_HFLAG_M16);
+
+        if (opc != OPC_JALRC)
+            post_delay += ((ctx->hflags & MIPS_HFLAG_BDS16) ? 2 : 4);
+
+        tcg_gen_movi_tl(cpu_gpr[blink], ctx->pc + post_delay + lowbit);
     }
 
  out:
+    if (insn_bytes == 2)
+        ctx->hflags |= MIPS_HFLAG_B16;
     tcg_temp_free(t0);
     tcg_temp_free(t1);
 }
@@ -3431,7 +3968,6 @@
 {
     TCGv t0 = tcg_temp_new();
     TCGv t1 = tcg_temp_new();
-    target_ulong mask;
 
     gen_load_gpr(t1, rs);
     switch (opc) {
@@ -3464,45 +4000,22 @@
     case OPC_INS:
         if (lsb > msb)
             goto fail;
-        mask = ((msb - lsb + 1 < 32) ? ((1 << (msb - lsb + 1)) - 1) : ~0) << lsb;
         gen_load_gpr(t0, rt);
-        tcg_gen_andi_tl(t0, t0, ~mask);
-        tcg_gen_shli_tl(t1, t1, lsb);
-        tcg_gen_andi_tl(t1, t1, mask);
-        tcg_gen_or_tl(t0, t0, t1);
+        tcg_gen_deposit_tl(t0, t0, t1, lsb, msb - lsb + 1);
         tcg_gen_ext32s_tl(t0, t0);
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DINSM:
-        if (lsb > msb)
-            goto fail;
-        mask = ((msb - lsb + 1 + 32 < 64) ? ((1ULL << (msb - lsb + 1 + 32)) - 1) : ~0ULL) << lsb;
         gen_load_gpr(t0, rt);
-        tcg_gen_andi_tl(t0, t0, ~mask);
-        tcg_gen_shli_tl(t1, t1, lsb);
-        tcg_gen_andi_tl(t1, t1, mask);
-        tcg_gen_or_tl(t0, t0, t1);
+        tcg_gen_deposit_tl(t0, t0, t1, lsb, msb + 32 - lsb + 1);
         break;
     case OPC_DINSU:
-        if (lsb > msb)
-            goto fail;
-        mask = ((1ULL << (msb - lsb + 1)) - 1) << (lsb + 32);
         gen_load_gpr(t0, rt);
-        tcg_gen_andi_tl(t0, t0, ~mask);
-        tcg_gen_shli_tl(t1, t1, lsb + 32);
-        tcg_gen_andi_tl(t1, t1, mask);
-        tcg_gen_or_tl(t0, t0, t1);
+        tcg_gen_deposit_tl(t0, t0, t1, lsb + 32, msb - lsb + 1);
         break;
     case OPC_DINS:
-        if (lsb > msb)
-            goto fail;
         gen_load_gpr(t0, rt);
-        mask = ((1ULL << (msb - lsb + 1)) - 1) << lsb;
-        gen_load_gpr(t0, rt);
-        tcg_gen_andi_tl(t0, t0, ~mask);
-        tcg_gen_shli_tl(t1, t1, lsb);
-        tcg_gen_andi_tl(t1, t1, mask);
-        tcg_gen_or_tl(t0, t0, t1);
+        tcg_gen_deposit_tl(t0, t0, t1, lsb, msb - lsb + 1);
         break;
 #endif
     default:
@@ -3620,12 +4133,12 @@
     tcg_gen_st_tl(arg, cpu_env, off);
 }
 
-static void gen_mfc0 (CPUMIPSState *env, DisasContext *ctx, TCGv arg, int reg, int sel)
+static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel)
 {
-    const char * __attribute__((unused)) rn = "invalid";
+    const char *rn = "invalid";
 
     if (sel != 0)
-        check_insn(env, ctx, ISA_MIPS32);
+        check_insn(ctx, ISA_MIPS32);
 
     switch (reg) {
     case 0:
@@ -3635,17 +4148,17 @@
             rn = "Index";
             break;
         case 1:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_mvpcontrol(arg, cpu_env);
             rn = "MVPControl";
             break;
         case 2:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_mvpconf0(arg, cpu_env);
             rn = "MVPConf0";
             break;
         case 3:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_mvpconf1(arg, cpu_env);
             rn = "MVPConf1";
             break;
@@ -3660,37 +4173,37 @@
             rn = "Random";
             break;
         case 1:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEControl));
             rn = "VPEControl";
             break;
         case 2:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEConf0));
             rn = "VPEConf0";
             break;
         case 3:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEConf1));
             rn = "VPEConf1";
             break;
         case 4:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mfc0_load64(arg, offsetof(CPUMIPSState, CP0_YQMask));
             rn = "YQMask";
             break;
         case 5:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mfc0_load64(arg, offsetof(CPUMIPSState, CP0_VPESchedule));
             rn = "VPESchedule";
             break;
         case 6:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mfc0_load64(arg, offsetof(CPUMIPSState, CP0_VPEScheFBack));
             rn = "VPEScheFBack";
             break;
         case 7:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEOpt));
             rn = "VPEOpt";
             break;
@@ -3706,37 +4219,37 @@
             rn = "EntryLo0";
             break;
         case 1:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_tcstatus(arg, cpu_env);
             rn = "TCStatus";
             break;
         case 2:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_tcbind(arg, cpu_env);
             rn = "TCBind";
             break;
         case 3:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_tcrestart(arg, cpu_env);
             rn = "TCRestart";
             break;
         case 4:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_tchalt(arg, cpu_env);
             rn = "TCHalt";
             break;
         case 5:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_tccontext(arg, cpu_env);
             rn = "TCContext";
             break;
         case 6:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_tcschedule(arg, cpu_env);
             rn = "TCSchedule";
             break;
         case 7:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_tcschefback(arg, cpu_env);
             rn = "TCScheFBack";
             break;
@@ -3777,7 +4290,7 @@
             rn = "PageMask";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PageGrain));
             rn = "PageGrain";
             break;
@@ -3792,27 +4305,27 @@
             rn = "Wired";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf0));
             rn = "SRSConf0";
             break;
         case 2:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf1));
             rn = "SRSConf1";
             break;
         case 3:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf2));
             rn = "SRSConf2";
             break;
         case 4:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf3));
             rn = "SRSConf3";
             break;
         case 5:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4));
             rn = "SRSConf4";
             break;
@@ -3823,7 +4336,7 @@
     case 7:
         switch (sel) {
         case 0:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_HWREna));
             rn = "HWREna";
             break;
@@ -3851,8 +4364,10 @@
             gen_helper_mfc0_count(arg, cpu_env);
             if (use_icount) {
                 gen_io_end();
-                ctx->bstate = BS_STOP;
             }
+            /* Break the TB to be able to take timer interrupts immediately
+               after reading count.  */
+            ctx->bstate = BS_STOP;
             rn = "Count";
             break;
         /* 6,7 are implementation dependent */
@@ -3889,17 +4404,17 @@
             rn = "Status";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_IntCtl));
             rn = "IntCtl";
             break;
         case 2:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSCtl));
             rn = "SRSCtl";
             break;
         case 3:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSMap));
             rn = "SRSMap";
             break;
@@ -3935,7 +4450,7 @@
             rn = "PRid";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_EBase));
             rn = "EBase";
             break;
@@ -3961,7 +4476,14 @@
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config3));
             rn = "Config3";
             break;
-        /* 4,5 are reserved */
+        case 4:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config4));
+            rn = "Config4";
+            break;
+        case 5:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config5));
+            rn = "Config5";
+            break;
         /* 6,7 are implementation dependent */
         case 6:
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config6));
@@ -3988,7 +4510,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_2i(mfc0_watchlo, arg, cpu_env, sel);
+            gen_helper_1e0i(mfc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -3998,7 +4520,7 @@
     case 19:
         switch (sel) {
         case 0 ...7:
-            gen_helper_2i(mfc0_watchhi, arg, cpu_env, sel);
+            gen_helper_1e0i(mfc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -4009,7 +4531,7 @@
         switch (sel) {
         case 0:
 #if defined(TARGET_MIPS64)
-            check_insn(env, ctx, ISA_MIPS3);
+            check_insn(ctx, ISA_MIPS3);
             tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_XContext));
             tcg_gen_ext32s_tl(arg, arg);
             rn = "XContext";
@@ -4189,6 +4711,7 @@
     default:
        goto die;
     }
+    (void)rn; /* avoid a compiler warning */
     LOG_DISAS("mfc0 %s (reg %d sel %d)\n", rn, reg, sel);
     return;
 
@@ -4197,12 +4720,12 @@
     generate_exception(ctx, EXCP_RI);
 }
 
-static void gen_mtc0 (CPUMIPSState *env, DisasContext *ctx, TCGv arg, int reg, int sel)
+static void gen_mtc0(DisasContext *ctx, TCGv arg, int reg, int sel)
 {
     const char *rn = "invalid";
 
     if (sel != 0)
-        check_insn(env, ctx, ISA_MIPS32);
+        check_insn(ctx, ISA_MIPS32);
 
     if (use_icount)
         gen_io_start();
@@ -4215,17 +4738,17 @@
             rn = "Index";
             break;
         case 1:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_mvpcontrol(cpu_env, arg);
             rn = "MVPControl";
             break;
         case 2:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             /* ignored */
             rn = "MVPConf0";
             break;
         case 3:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             /* ignored */
             rn = "MVPConf1";
             break;
@@ -4240,37 +4763,37 @@
             rn = "Random";
             break;
         case 1:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_vpecontrol(cpu_env, arg);
             rn = "VPEControl";
             break;
         case 2:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_vpeconf0(cpu_env, arg);
             rn = "VPEConf0";
             break;
         case 3:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_vpeconf1(cpu_env, arg);
             rn = "VPEConf1";
             break;
         case 4:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_yqmask(cpu_env, arg);
             rn = "YQMask";
             break;
         case 5:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mtc0_store64(arg, offsetof(CPUMIPSState, CP0_VPESchedule));
             rn = "VPESchedule";
             break;
         case 6:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mtc0_store64(arg, offsetof(CPUMIPSState, CP0_VPEScheFBack));
             rn = "VPEScheFBack";
             break;
         case 7:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_vpeopt(cpu_env, arg);
             rn = "VPEOpt";
             break;
@@ -4285,37 +4808,37 @@
             rn = "EntryLo0";
             break;
         case 1:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tcstatus(cpu_env, arg);
             rn = "TCStatus";
             break;
         case 2:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tcbind(cpu_env, arg);
             rn = "TCBind";
             break;
         case 3:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tcrestart(cpu_env, arg);
             rn = "TCRestart";
             break;
         case 4:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tchalt(cpu_env, arg);
             rn = "TCHalt";
             break;
         case 5:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tccontext(cpu_env, arg);
             rn = "TCContext";
             break;
         case 6:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tcschedule(cpu_env, arg);
             rn = "TCSchedule";
             break;
         case 7:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tcschefback(cpu_env, arg);
             rn = "TCScheFBack";
             break;
@@ -4340,7 +4863,7 @@
             rn = "Context";
             break;
         case 1:
-//            gen_helper_mtc0_contextconfig(arg); /* SmartMIPS ASE */
+//            gen_helper_mtc0_contextconfig(cpu_env, arg); /* SmartMIPS ASE */
             rn = "ContextConfig";
 //            break;
         default:
@@ -4354,7 +4877,7 @@
             rn = "PageMask";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_pagegrain(cpu_env, arg);
             rn = "PageGrain";
             break;
@@ -4369,27 +4892,27 @@
             rn = "Wired";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_srsconf0(cpu_env, arg);
             rn = "SRSConf0";
             break;
         case 2:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_srsconf1(cpu_env, arg);
             rn = "SRSConf1";
             break;
         case 3:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_srsconf2(cpu_env, arg);
             rn = "SRSConf2";
             break;
         case 4:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_srsconf3(cpu_env, arg);
             rn = "SRSConf3";
             break;
         case 5:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_srsconf4(cpu_env, arg);
             rn = "SRSConf4";
             break;
@@ -4400,7 +4923,7 @@
     case 7:
         switch (sel) {
         case 0:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_hwrena(cpu_env, arg);
             rn = "HWREna";
             break;
@@ -4455,21 +4978,21 @@
             rn = "Status";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_intctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "IntCtl";
             break;
         case 2:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_srsctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "SRSCtl";
             break;
         case 3:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mtc0_store32(arg, offsetof(CPUMIPSState, CP0_SRSMap));
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -4507,7 +5030,7 @@
             rn = "PRid";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_ebase(cpu_env, arg);
             rn = "EBase";
             break;
@@ -4537,7 +5060,17 @@
             /* ignored, read only */
             rn = "Config3";
             break;
-        /* 4,5 are reserved */
+        case 4:
+            gen_helper_mtc0_config4(cpu_env, arg);
+            rn = "Config4";
+            ctx->bstate = BS_STOP;
+            break;
+        case 5:
+            gen_helper_mtc0_config5(cpu_env, arg);
+            rn = "Config5";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->bstate = BS_STOP;
+            break;
         /* 6,7 are implementation dependent */
         case 6:
             /* ignored */
@@ -4565,7 +5098,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_2i(mtc0_watchlo, cpu_env, arg, sel);
+            gen_helper_0e1i(mtc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -4575,7 +5108,7 @@
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_2i(mtc0_watchhi, cpu_env, arg, sel);
+            gen_helper_0e1i(mtc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -4586,7 +5119,7 @@
         switch (sel) {
         case 0:
 #if defined(TARGET_MIPS64)
-            check_insn(env, ctx, ISA_MIPS3);
+            check_insn(ctx, ISA_MIPS3);
             gen_helper_mtc0_xcontext(cpu_env, arg);
             rn = "XContext";
             break;
@@ -4620,13 +5153,13 @@
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_mtc0_tracecontrol(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol(cpu_env, arg); /* PDtrace support */
             rn = "TraceControl";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
 //            break;
         case 2:
-//            gen_helper_mtc0_tracecontrol2(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol2(cpu_env, arg); /* PDtrace support */
             rn = "TraceControl2";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -4634,13 +5167,13 @@
         case 3:
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
-//            gen_helper_mtc0_usertracedata(arg); /* PDtrace support */
+//            gen_helper_mtc0_usertracedata(cpu_env, arg); /* PDtrace support */
             rn = "UserTraceData";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
 //            break;
         case 4:
-//            gen_helper_mtc0_tracebpc(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracebpc(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceBPC";
@@ -4667,31 +5200,31 @@
             rn = "Performance0";
             break;
         case 1:
-//            gen_helper_mtc0_performance1(cpu_env, arg);
+//            gen_helper_mtc0_performance1(arg);
             rn = "Performance1";
 //            break;
         case 2:
-//            gen_helper_mtc0_performance2(cpu_env, arg);
+//            gen_helper_mtc0_performance2(arg);
             rn = "Performance2";
 //            break;
         case 3:
-//            gen_helper_mtc0_performance3(cpu_env, arg);
+//            gen_helper_mtc0_performance3(arg);
             rn = "Performance3";
 //            break;
         case 4:
-//            gen_helper_mtc0_performance4(cpu_env, arg);
+//            gen_helper_mtc0_performance4(arg);
             rn = "Performance4";
 //            break;
         case 5:
-//            gen_helper_mtc0_performance5(cpu_env, arg);
+//            gen_helper_mtc0_performance5(arg);
             rn = "Performance5";
 //            break;
         case 6:
-//            gen_helper_mtc0_performance6(cpu_env, arg);
+//            gen_helper_mtc0_performance6(arg);
             rn = "Performance6";
 //            break;
         case 7:
-//            gen_helper_mtc0_performance7(cpu_env, arg);
+//            gen_helper_mtc0_performance7(arg);
             rn = "Performance7";
 //            break;
         default:
@@ -4794,12 +5327,12 @@
 }
 
 #if defined(TARGET_MIPS64)
-static void gen_dmfc0 (CPUMIPSState *env, DisasContext *ctx, TCGv arg, int reg, int sel)
+static void gen_dmfc0(DisasContext *ctx, TCGv arg, int reg, int sel)
 {
     const char *rn = "invalid";
 
     if (sel != 0)
-        check_insn(env, ctx, ISA_MIPS64);
+        check_insn(ctx, ISA_MIPS64);
 
     switch (reg) {
     case 0:
@@ -4809,17 +5342,17 @@
             rn = "Index";
             break;
         case 1:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_mvpcontrol(arg, cpu_env);
             rn = "MVPControl";
             break;
         case 2:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_mvpconf0(arg, cpu_env);
             rn = "MVPConf0";
             break;
         case 3:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_mvpconf1(arg, cpu_env);
             rn = "MVPConf1";
             break;
@@ -4834,37 +5367,37 @@
             rn = "Random";
             break;
         case 1:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEControl));
             rn = "VPEControl";
             break;
         case 2:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEConf0));
             rn = "VPEConf0";
             break;
         case 3:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEConf1));
             rn = "VPEConf1";
             break;
         case 4:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_YQMask));
             rn = "YQMask";
             break;
         case 5:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_VPESchedule));
             rn = "VPESchedule";
             break;
         case 6:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_VPEScheFBack));
             rn = "VPEScheFBack";
             break;
         case 7:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEOpt));
             rn = "VPEOpt";
             break;
@@ -4879,37 +5412,37 @@
             rn = "EntryLo0";
             break;
         case 1:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_tcstatus(arg, cpu_env);
             rn = "TCStatus";
             break;
         case 2:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mfc0_tcbind(arg, cpu_env);
             rn = "TCBind";
             break;
         case 3:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_dmfc0_tcrestart(arg, cpu_env);
             rn = "TCRestart";
             break;
         case 4:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_dmfc0_tchalt(arg, cpu_env);
             rn = "TCHalt";
             break;
         case 5:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_dmfc0_tccontext(arg, cpu_env);
             rn = "TCContext";
             break;
         case 6:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_dmfc0_tcschedule(arg, cpu_env);
             rn = "TCSchedule";
             break;
         case 7:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_dmfc0_tcschefback(arg, cpu_env);
             rn = "TCScheFBack";
             break;
@@ -4948,7 +5481,7 @@
             rn = "PageMask";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PageGrain));
             rn = "PageGrain";
             break;
@@ -4963,27 +5496,27 @@
             rn = "Wired";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf0));
             rn = "SRSConf0";
             break;
         case 2:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf1));
             rn = "SRSConf1";
             break;
         case 3:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf2));
             rn = "SRSConf2";
             break;
         case 4:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf3));
             rn = "SRSConf3";
             break;
         case 5:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4));
             rn = "SRSConf4";
             break;
@@ -4994,7 +5527,7 @@
     case 7:
         switch (sel) {
         case 0:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_HWREna));
             rn = "HWREna";
             break;
@@ -5021,8 +5554,10 @@
             gen_helper_mfc0_count(arg, cpu_env);
             if (use_icount) {
                 gen_io_end();
-                ctx->bstate = BS_STOP;
             }
+            /* Break the TB to be able to take timer interrupts immediately
+               after reading count.  */
+            ctx->bstate = BS_STOP;
             rn = "Count";
             break;
         /* 6,7 are implementation dependent */
@@ -5058,17 +5593,17 @@
             rn = "Status";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_IntCtl));
             rn = "IntCtl";
             break;
         case 2:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSCtl));
             rn = "SRSCtl";
             break;
         case 3:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSMap));
             rn = "SRSMap";
             break;
@@ -5103,7 +5638,7 @@
             rn = "PRid";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_EBase));
             rn = "EBase";
             break;
@@ -5155,7 +5690,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_2i(dmfc0_watchlo, arg, cpu_env, sel);
+            gen_helper_1e0i(dmfc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -5165,7 +5700,7 @@
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_2i(mfc0_watchhi, arg, cpu_env, sel);
+            gen_helper_1e0i(mfc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -5175,7 +5710,7 @@
     case 20:
         switch (sel) {
         case 0:
-            check_insn(env, ctx, ISA_MIPS3);
+            check_insn(ctx, ISA_MIPS3);
             tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_XContext));
             rn = "XContext";
             break;
@@ -5205,19 +5740,19 @@
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_dmfc0_tracecontrol(arg); /* PDtrace support */
+//            gen_helper_dmfc0_tracecontrol(arg, cpu_env); /* PDtrace support */
             rn = "TraceControl";
 //            break;
         case 2:
-//            gen_helper_dmfc0_tracecontrol2(arg); /* PDtrace support */
+//            gen_helper_dmfc0_tracecontrol2(arg, cpu_env); /* PDtrace support */
             rn = "TraceControl2";
 //            break;
         case 3:
-//            gen_helper_dmfc0_usertracedata(arg); /* PDtrace support */
+//            gen_helper_dmfc0_usertracedata(arg, cpu_env); /* PDtrace support */
             rn = "UserTraceData";
 //            break;
         case 4:
-//            gen_helper_dmfc0_tracebpc(arg); /* PDtrace support */
+//            gen_helper_dmfc0_tracebpc(arg, cpu_env); /* PDtrace support */
             rn = "TraceBPC";
 //            break;
         default:
@@ -5361,12 +5896,12 @@
     generate_exception(ctx, EXCP_RI);
 }
 
-static void gen_dmtc0 (CPUMIPSState *env, DisasContext *ctx, TCGv arg, int reg, int sel)
+static void gen_dmtc0(DisasContext *ctx, TCGv arg, int reg, int sel)
 {
     const char *rn = "invalid";
 
     if (sel != 0)
-        check_insn(env, ctx, ISA_MIPS64);
+        check_insn(ctx, ISA_MIPS64);
 
     if (use_icount)
         gen_io_start();
@@ -5379,17 +5914,17 @@
             rn = "Index";
             break;
         case 1:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_mvpcontrol(cpu_env, arg);
             rn = "MVPControl";
             break;
         case 2:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             /* ignored */
             rn = "MVPConf0";
             break;
         case 3:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             /* ignored */
             rn = "MVPConf1";
             break;
@@ -5404,37 +5939,37 @@
             rn = "Random";
             break;
         case 1:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_vpecontrol(cpu_env, arg);
             rn = "VPEControl";
             break;
         case 2:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_vpeconf0(cpu_env, arg);
             rn = "VPEConf0";
             break;
         case 3:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_vpeconf1(cpu_env, arg);
             rn = "VPEConf1";
             break;
         case 4:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_yqmask(cpu_env, arg);
             rn = "YQMask";
             break;
         case 5:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             tcg_gen_st_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_VPESchedule));
             rn = "VPESchedule";
             break;
         case 6:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             tcg_gen_st_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_VPEScheFBack));
             rn = "VPEScheFBack";
             break;
         case 7:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_vpeopt(cpu_env, arg);
             rn = "VPEOpt";
             break;
@@ -5449,37 +5984,37 @@
             rn = "EntryLo0";
             break;
         case 1:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tcstatus(cpu_env, arg);
             rn = "TCStatus";
             break;
         case 2:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tcbind(cpu_env, arg);
             rn = "TCBind";
             break;
         case 3:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tcrestart(cpu_env, arg);
             rn = "TCRestart";
             break;
         case 4:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tchalt(cpu_env, arg);
             rn = "TCHalt";
             break;
         case 5:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tccontext(cpu_env, arg);
             rn = "TCContext";
             break;
         case 6:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tcschedule(cpu_env, arg);
             rn = "TCSchedule";
             break;
         case 7:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             gen_helper_mtc0_tcschefback(cpu_env, arg);
             rn = "TCScheFBack";
             break;
@@ -5504,7 +6039,7 @@
             rn = "Context";
             break;
         case 1:
-//           gen_helper_mtc0_contextconfig(arg); /* SmartMIPS ASE */
+//           gen_helper_mtc0_contextconfig(cpu_env, arg); /* SmartMIPS ASE */
             rn = "ContextConfig";
 //           break;
         default:
@@ -5518,7 +6053,7 @@
             rn = "PageMask";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_pagegrain(cpu_env, arg);
             rn = "PageGrain";
             break;
@@ -5533,27 +6068,27 @@
             rn = "Wired";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_srsconf0(cpu_env, arg);
             rn = "SRSConf0";
             break;
         case 2:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_srsconf1(cpu_env, arg);
             rn = "SRSConf1";
             break;
         case 3:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_srsconf2(cpu_env, arg);
             rn = "SRSConf2";
             break;
         case 4:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_srsconf3(cpu_env, arg);
             rn = "SRSConf3";
             break;
         case 5:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_srsconf4(cpu_env, arg);
             rn = "SRSConf4";
             break;
@@ -5564,7 +6099,7 @@
     case 7:
         switch (sel) {
         case 0:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_hwrena(cpu_env, arg);
             rn = "HWREna";
             break;
@@ -5623,21 +6158,21 @@
             rn = "Status";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_intctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "IntCtl";
             break;
         case 2:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_srsctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "SRSCtl";
             break;
         case 3:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_mtc0_store32(arg, offsetof(CPUMIPSState, CP0_SRSMap));
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -5652,6 +6187,8 @@
         case 0:
             save_cpu_state(ctx, 1);
             gen_helper_mtc0_cause(cpu_env, arg);
+            /* Stop translation as we may have triggered an intetrupt */
+            ctx->bstate = BS_STOP;
             rn = "Cause";
             break;
         default:
@@ -5675,7 +6212,7 @@
             rn = "PRid";
             break;
         case 1:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_helper_mtc0_ebase(cpu_env, arg);
             rn = "EBase";
             break;
@@ -5724,7 +6261,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_2i(mtc0_watchlo, cpu_env, arg, sel);
+            gen_helper_0e1i(mtc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -5734,7 +6271,7 @@
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_2i(mtc0_watchhi, cpu_env, arg, sel);
+            gen_helper_0e1i(mtc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -5744,7 +6281,7 @@
     case 20:
         switch (sel) {
         case 0:
-            check_insn(env, ctx, ISA_MIPS3);
+            check_insn(ctx, ISA_MIPS3);
             gen_helper_mtc0_xcontext(cpu_env, arg);
             rn = "XContext";
             break;
@@ -5777,25 +6314,25 @@
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_mtc0_tracecontrol(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceControl";
 //            break;
         case 2:
-//            gen_helper_mtc0_tracecontrol2(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol2(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceControl2";
 //            break;
         case 3:
-//            gen_helper_mtc0_usertracedata(arg); /* PDtrace support */
+//            gen_helper_mtc0_usertracedata(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "UserTraceData";
 //            break;
         case 4:
-//            gen_helper_mtc0_tracebpc(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracebpc(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceBPC";
@@ -5964,6 +6501,19 @@
         tcg_gen_movi_tl(t0, -1);
     else if (u == 0) {
         switch (rt) {
+        case 1:
+            switch (sel) {
+            case 1:
+                gen_helper_mftc0_vpecontrol(t0, cpu_env);
+                break;
+            case 2:
+                gen_helper_mftc0_vpeconf0(t0, cpu_env);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
         case 2:
             switch (sel) {
             case 1:
@@ -5988,7 +6538,7 @@
                 gen_helper_mftc0_tcschefback(t0, cpu_env);
                 break;
             default:
-                gen_mfc0(env, ctx, t0, rt, sel);
+                gen_mfc0(ctx, t0, rt, sel);
                 break;
             }
             break;
@@ -5998,7 +6548,7 @@
                 gen_helper_mftc0_entryhi(t0, cpu_env);
                 break;
             default:
-                gen_mfc0(env, ctx, t0, rt, sel);
+                gen_mfc0(ctx, t0, rt, sel);
                 break;
             }
         case 12:
@@ -6007,65 +6557,105 @@
                 gen_helper_mftc0_status(t0, cpu_env);
                 break;
             default:
-                gen_mfc0(env, ctx, t0, rt, sel);
+                gen_mfc0(ctx, t0, rt, sel);
                 break;
             }
+        case 13:
+            switch (sel) {
+            case 0:
+                gen_helper_mftc0_cause(t0, cpu_env);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
+        case 14:
+            switch (sel) {
+            case 0:
+                gen_helper_mftc0_epc(t0, cpu_env);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
+        case 15:
+            switch (sel) {
+            case 1:
+                gen_helper_mftc0_ebase(t0, cpu_env);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
+        case 16:
+            switch (sel) {
+            case 0 ... 7:
+                gen_helper_mftc0_configx(t0, cpu_env, tcg_const_tl(sel));
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
         case 23:
             switch (sel) {
             case 0:
                 gen_helper_mftc0_debug(t0, cpu_env);
                 break;
             default:
-                gen_mfc0(env, ctx, t0, rt, sel);
+                gen_mfc0(ctx, t0, rt, sel);
                 break;
             }
             break;
         default:
-            gen_mfc0(env, ctx, t0, rt, sel);
+            gen_mfc0(ctx, t0, rt, sel);
         }
     } else switch (sel) {
     /* GPR registers. */
     case 0:
-        gen_helper_2i(mftgpr, t0, cpu_env, rt);
+        gen_helper_1e0i(mftgpr, t0, rt);
         break;
     /* Auxiliary CPU registers */
     case 1:
         switch (rt) {
         case 0:
-            gen_helper_2i(mftlo, t0, cpu_env, 0);
+            gen_helper_1e0i(mftlo, t0, 0);
             break;
         case 1:
-            gen_helper_2i(mfthi, t0, cpu_env, 0);
+            gen_helper_1e0i(mfthi, t0, 0);
             break;
         case 2:
-            gen_helper_2i(mftacx, t0, cpu_env, 0);
+            gen_helper_1e0i(mftacx, t0, 0);
             break;
         case 4:
-            gen_helper_2i(mftlo, t0, cpu_env, 1);
+            gen_helper_1e0i(mftlo, t0, 1);
             break;
         case 5:
-            gen_helper_2i(mfthi, t0, cpu_env, 1);
+            gen_helper_1e0i(mfthi, t0, 1);
             break;
         case 6:
-            gen_helper_2i(mftacx, t0, cpu_env, 1);
+            gen_helper_1e0i(mftacx, t0, 1);
             break;
         case 8:
-            gen_helper_2i(mftlo, t0, cpu_env, 2);
+            gen_helper_1e0i(mftlo, t0, 2);
             break;
         case 9:
-            gen_helper_2i(mfthi, t0, cpu_env, 2);
+            gen_helper_1e0i(mfthi, t0, 2);
             break;
         case 10:
-            gen_helper_2i(mftacx, t0, cpu_env, 2);
+            gen_helper_1e0i(mftacx, t0, 2);
             break;
         case 12:
-            gen_helper_2i(mftlo, t0, cpu_env, 3);
+            gen_helper_1e0i(mftlo, t0, 3);
             break;
         case 13:
-            gen_helper_2i(mfthi, t0, cpu_env, 3);
+            gen_helper_1e0i(mfthi, t0, 3);
             break;
         case 14:
-            gen_helper_2i(mftacx, t0, cpu_env, 3);
+            gen_helper_1e0i(mftacx, t0, 3);
             break;
         case 16:
             gen_helper_mftdsp(t0, cpu_env);
@@ -6093,7 +6683,7 @@
         break;
     case 3:
         /* XXX: For now we support only a single FPU context. */
-        gen_helper_2i(cfc1, t0, cpu_env, rt);
+        gen_helper_1e0i(cfc1, t0, rt);
         break;
     /* COP2: Not implemented. */
     case 4:
@@ -6129,6 +6719,19 @@
         /* NOP */ ;
     else if (u == 0) {
         switch (rd) {
+        case 1:
+            switch (sel) {
+            case 1:
+                gen_helper_mttc0_vpecontrol(cpu_env, t0);
+                break;
+            case 2:
+                gen_helper_mttc0_vpeconf0(cpu_env, t0);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
         case 2:
             switch (sel) {
             case 1:
@@ -6153,7 +6756,7 @@
                 gen_helper_mttc0_tcschefback(cpu_env, t0);
                 break;
             default:
-                gen_mtc0(env, ctx, t0, rd, sel);
+                gen_mtc0(ctx, t0, rd, sel);
                 break;
             }
             break;
@@ -6163,7 +6766,7 @@
                 gen_helper_mttc0_entryhi(cpu_env, t0);
                 break;
             default:
-                gen_mtc0(env, ctx, t0, rd, sel);
+                gen_mtc0(ctx, t0, rd, sel);
                 break;
             }
         case 12:
@@ -6172,65 +6775,85 @@
                 gen_helper_mttc0_status(cpu_env, t0);
                 break;
             default:
-                gen_mtc0(env, ctx, t0, rd, sel);
+                gen_mtc0(ctx, t0, rd, sel);
                 break;
             }
+        case 13:
+            switch (sel) {
+            case 0:
+                gen_helper_mttc0_cause(cpu_env, t0);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
+        case 15:
+            switch (sel) {
+            case 1:
+                gen_helper_mttc0_ebase(cpu_env, t0);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
         case 23:
             switch (sel) {
             case 0:
                 gen_helper_mttc0_debug(cpu_env, t0);
                 break;
             default:
-                gen_mtc0(env, ctx, t0, rd, sel);
+                gen_mtc0(ctx, t0, rd, sel);
                 break;
             }
             break;
         default:
-            gen_mtc0(env, ctx, t0, rd, sel);
+            gen_mtc0(ctx, t0, rd, sel);
         }
     } else switch (sel) {
     /* GPR registers. */
     case 0:
-        gen_helper_2i(mttgpr, cpu_env, t0, rd);
+        gen_helper_0e1i(mttgpr, t0, rd);
         break;
     /* Auxiliary CPU registers */
     case 1:
         switch (rd) {
         case 0:
-            gen_helper_2i(mttlo, cpu_env, t0, 0);
+            gen_helper_0e1i(mttlo, t0, 0);
             break;
         case 1:
-            gen_helper_2i(mtthi, cpu_env, t0, 0);
+            gen_helper_0e1i(mtthi, t0, 0);
             break;
         case 2:
-            gen_helper_2i(mttacx, cpu_env, t0, 0);
+            gen_helper_0e1i(mttacx, t0, 0);
             break;
         case 4:
-            gen_helper_2i(mttlo, cpu_env, t0, 1);
+            gen_helper_0e1i(mttlo, t0, 1);
             break;
         case 5:
-            gen_helper_2i(mtthi, cpu_env, t0, 1);
+            gen_helper_0e1i(mtthi, t0, 1);
             break;
         case 6:
-            gen_helper_2i(mttacx, cpu_env, t0, 1);
+            gen_helper_0e1i(mttacx, t0, 1);
             break;
         case 8:
-            gen_helper_2i(mttlo, cpu_env, t0, 2);
+            gen_helper_0e1i(mttlo, t0, 2);
             break;
         case 9:
-            gen_helper_2i(mtthi, cpu_env, t0, 2);
+            gen_helper_0e1i(mtthi, t0, 2);
             break;
         case 10:
-            gen_helper_2i(mttacx, cpu_env, t0, 2);
+            gen_helper_0e1i(mttacx, t0, 2);
             break;
         case 12:
-            gen_helper_2i(mttlo, cpu_env, t0, 3);
+            gen_helper_0e1i(mttlo, t0, 3);
             break;
         case 13:
-            gen_helper_2i(mtthi, cpu_env, t0, 3);
+            gen_helper_0e1i(mtthi, t0, 3);
             break;
         case 14:
-            gen_helper_2i(mttacx, cpu_env, t0, 3);
+            gen_helper_0e1i(mttacx, t0, 3);
             break;
         case 16:
             gen_helper_mttdsp(cpu_env, t0);
@@ -6286,13 +6909,14 @@
 {
     const char *opn = "ldst";
 
+    check_cp0_enabled(ctx);
     switch (opc) {
     case OPC_MFC0:
         if (rt == 0) {
             /* Treat as NOP. */
             return;
         }
-        gen_mfc0(env, ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
+        gen_mfc0(ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
         opn = "mfc0";
         break;
     case OPC_MTC0:
@@ -6300,35 +6924,35 @@
             TCGv t0 = tcg_temp_new();
 
             gen_load_gpr(t0, rt);
-            gen_mtc0(env, ctx, t0, rd, ctx->opcode & 0x7);
+            gen_mtc0(ctx, t0, rd, ctx->opcode & 0x7);
             tcg_temp_free(t0);
         }
         opn = "mtc0";
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DMFC0:
-        check_insn(env, ctx, ISA_MIPS3);
+        check_insn(ctx, ISA_MIPS3);
         if (rt == 0) {
             /* Treat as NOP. */
             return;
         }
-        gen_dmfc0(env, ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
+        gen_dmfc0(ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
         opn = "dmfc0";
         break;
     case OPC_DMTC0:
-        check_insn(env, ctx, ISA_MIPS3);
+        check_insn(ctx, ISA_MIPS3);
         {
             TCGv t0 = tcg_temp_new();
 
             gen_load_gpr(t0, rt);
-            gen_dmtc0(env, ctx, t0, rd, ctx->opcode & 0x7);
+            gen_dmtc0(ctx, t0, rd, ctx->opcode & 0x7);
             tcg_temp_free(t0);
         }
         opn = "dmtc0";
         break;
 #endif
     case OPC_MFTR:
-        check_insn(env, ctx, ASE_MT);
+        check_insn(ctx, ASE_MT);
         if (rd == 0) {
             /* Treat as NOP. */
             return;
@@ -6338,7 +6962,7 @@
         opn = "mftr";
         break;
     case OPC_MTTR:
-        check_insn(env, ctx, ASE_MT);
+        check_insn(ctx, ASE_MT);
         gen_mttr(env, ctx, rd, rt, (ctx->opcode >> 5) & 1,
                  ctx->opcode & 0x7, (ctx->opcode >> 4) & 1);
         opn = "mttr";
@@ -6369,13 +6993,13 @@
         break;
     case OPC_ERET:
         opn = "eret";
-        check_insn(env, ctx, ISA_MIPS2);
+        check_insn(ctx, ISA_MIPS2);
         gen_helper_eret(cpu_env);
         ctx->bstate = BS_EXCP;
         break;
     case OPC_DERET:
         opn = "deret";
-        check_insn(env, ctx, ISA_MIPS32);
+        check_insn(ctx, ISA_MIPS32);
         if (!(ctx->hflags & MIPS_HFLAG_DM)) {
             MIPS_INVAL(opn);
             generate_exception(ctx, EXCP_RI);
@@ -6386,7 +7010,7 @@
         break;
     case OPC_WAIT:
         opn = "wait";
-        check_insn(env, ctx, ISA_MIPS3 | ISA_MIPS32);
+        check_insn(ctx, ISA_MIPS3 | ISA_MIPS32);
         /* If we get an exception, we want to restart at next instruction */
         ctx->pc += 4;
         save_cpu_state(ctx, 1);
@@ -6406,15 +7030,15 @@
 #endif /* !CONFIG_USER_ONLY */
 
 /* CP1 Branches (before delay slot) */
-static void gen_compute_branch1 (CPUMIPSState *env, DisasContext *ctx, uint32_t op,
-                                 int32_t cc, int32_t offset)
+static void gen_compute_branch1(DisasContext *ctx, uint32_t op,
+                                int32_t cc, int32_t offset)
 {
     target_ulong btarget;
     const char *opn = "cp1 cond branch";
     TCGv_i32 t0 = tcg_temp_new_i32();
 
     if (cc != 0)
-        check_insn(env, ctx, ISA_MIPS4 | ISA_MIPS32);
+        check_insn(ctx, ISA_MIPS4 | ISA_MIPS32);
 
     btarget = ctx->pc + 4 + offset;
 
@@ -6452,9 +7076,8 @@
             TCGv_i32 t1 = tcg_temp_new_i32();
             tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
             tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
-            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_nand_i32(t0, t0, t1);
             tcg_temp_free_i32(t1);
-            tcg_gen_not_i32(t0, t0);
             tcg_gen_andi_i32(t0, t0, 1);
             tcg_gen_extu_i32_tl(bcond, t0);
         }
@@ -6477,13 +7100,12 @@
             TCGv_i32 t1 = tcg_temp_new_i32();
             tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
             tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
-            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_and_i32(t0, t0, t1);
             tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+2));
-            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_and_i32(t0, t0, t1);
             tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+3));
-            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_nand_i32(t0, t0, t1);
             tcg_temp_free_i32(t1);
-            tcg_gen_not_i32(t0, t0);
             tcg_gen_andi_i32(t0, t0, 1);
             tcg_gen_extu_i32_tl(bcond, t0);
         }
@@ -6525,6 +7147,146 @@
 
 #define FOP(func, fmt) (((fmt) << 21) | (func))
 
+enum fopcode {
+    OPC_ADD_S = FOP(0, FMT_S),
+    OPC_SUB_S = FOP(1, FMT_S),
+    OPC_MUL_S = FOP(2, FMT_S),
+    OPC_DIV_S = FOP(3, FMT_S),
+    OPC_SQRT_S = FOP(4, FMT_S),
+    OPC_ABS_S = FOP(5, FMT_S),
+    OPC_MOV_S = FOP(6, FMT_S),
+    OPC_NEG_S = FOP(7, FMT_S),
+    OPC_ROUND_L_S = FOP(8, FMT_S),
+    OPC_TRUNC_L_S = FOP(9, FMT_S),
+    OPC_CEIL_L_S = FOP(10, FMT_S),
+    OPC_FLOOR_L_S = FOP(11, FMT_S),
+    OPC_ROUND_W_S = FOP(12, FMT_S),
+    OPC_TRUNC_W_S = FOP(13, FMT_S),
+    OPC_CEIL_W_S = FOP(14, FMT_S),
+    OPC_FLOOR_W_S = FOP(15, FMT_S),
+    OPC_MOVCF_S = FOP(17, FMT_S),
+    OPC_MOVZ_S = FOP(18, FMT_S),
+    OPC_MOVN_S = FOP(19, FMT_S),
+    OPC_RECIP_S = FOP(21, FMT_S),
+    OPC_RSQRT_S = FOP(22, FMT_S),
+    OPC_RECIP2_S = FOP(28, FMT_S),
+    OPC_RECIP1_S = FOP(29, FMT_S),
+    OPC_RSQRT1_S = FOP(30, FMT_S),
+    OPC_RSQRT2_S = FOP(31, FMT_S),
+    OPC_CVT_D_S = FOP(33, FMT_S),
+    OPC_CVT_W_S = FOP(36, FMT_S),
+    OPC_CVT_L_S = FOP(37, FMT_S),
+    OPC_CVT_PS_S = FOP(38, FMT_S),
+    OPC_CMP_F_S = FOP (48, FMT_S),
+    OPC_CMP_UN_S = FOP (49, FMT_S),
+    OPC_CMP_EQ_S = FOP (50, FMT_S),
+    OPC_CMP_UEQ_S = FOP (51, FMT_S),
+    OPC_CMP_OLT_S = FOP (52, FMT_S),
+    OPC_CMP_ULT_S = FOP (53, FMT_S),
+    OPC_CMP_OLE_S = FOP (54, FMT_S),
+    OPC_CMP_ULE_S = FOP (55, FMT_S),
+    OPC_CMP_SF_S = FOP (56, FMT_S),
+    OPC_CMP_NGLE_S = FOP (57, FMT_S),
+    OPC_CMP_SEQ_S = FOP (58, FMT_S),
+    OPC_CMP_NGL_S = FOP (59, FMT_S),
+    OPC_CMP_LT_S = FOP (60, FMT_S),
+    OPC_CMP_NGE_S = FOP (61, FMT_S),
+    OPC_CMP_LE_S = FOP (62, FMT_S),
+    OPC_CMP_NGT_S = FOP (63, FMT_S),
+
+    OPC_ADD_D = FOP(0, FMT_D),
+    OPC_SUB_D = FOP(1, FMT_D),
+    OPC_MUL_D = FOP(2, FMT_D),
+    OPC_DIV_D = FOP(3, FMT_D),
+    OPC_SQRT_D = FOP(4, FMT_D),
+    OPC_ABS_D = FOP(5, FMT_D),
+    OPC_MOV_D = FOP(6, FMT_D),
+    OPC_NEG_D = FOP(7, FMT_D),
+    OPC_ROUND_L_D = FOP(8, FMT_D),
+    OPC_TRUNC_L_D = FOP(9, FMT_D),
+    OPC_CEIL_L_D = FOP(10, FMT_D),
+    OPC_FLOOR_L_D = FOP(11, FMT_D),
+    OPC_ROUND_W_D = FOP(12, FMT_D),
+    OPC_TRUNC_W_D = FOP(13, FMT_D),
+    OPC_CEIL_W_D = FOP(14, FMT_D),
+    OPC_FLOOR_W_D = FOP(15, FMT_D),
+    OPC_MOVCF_D = FOP(17, FMT_D),
+    OPC_MOVZ_D = FOP(18, FMT_D),
+    OPC_MOVN_D = FOP(19, FMT_D),
+    OPC_RECIP_D = FOP(21, FMT_D),
+    OPC_RSQRT_D = FOP(22, FMT_D),
+    OPC_RECIP2_D = FOP(28, FMT_D),
+    OPC_RECIP1_D = FOP(29, FMT_D),
+    OPC_RSQRT1_D = FOP(30, FMT_D),
+    OPC_RSQRT2_D = FOP(31, FMT_D),
+    OPC_CVT_S_D = FOP(32, FMT_D),
+    OPC_CVT_W_D = FOP(36, FMT_D),
+    OPC_CVT_L_D = FOP(37, FMT_D),
+    OPC_CMP_F_D = FOP (48, FMT_D),
+    OPC_CMP_UN_D = FOP (49, FMT_D),
+    OPC_CMP_EQ_D = FOP (50, FMT_D),
+    OPC_CMP_UEQ_D = FOP (51, FMT_D),
+    OPC_CMP_OLT_D = FOP (52, FMT_D),
+    OPC_CMP_ULT_D = FOP (53, FMT_D),
+    OPC_CMP_OLE_D = FOP (54, FMT_D),
+    OPC_CMP_ULE_D = FOP (55, FMT_D),
+    OPC_CMP_SF_D = FOP (56, FMT_D),
+    OPC_CMP_NGLE_D = FOP (57, FMT_D),
+    OPC_CMP_SEQ_D = FOP (58, FMT_D),
+    OPC_CMP_NGL_D = FOP (59, FMT_D),
+    OPC_CMP_LT_D = FOP (60, FMT_D),
+    OPC_CMP_NGE_D = FOP (61, FMT_D),
+    OPC_CMP_LE_D = FOP (62, FMT_D),
+    OPC_CMP_NGT_D = FOP (63, FMT_D),
+
+    OPC_CVT_S_W = FOP(32, FMT_W),
+    OPC_CVT_D_W = FOP(33, FMT_W),
+    OPC_CVT_S_L = FOP(32, FMT_L),
+    OPC_CVT_D_L = FOP(33, FMT_L),
+    OPC_CVT_PS_PW = FOP(38, FMT_W),
+
+    OPC_ADD_PS = FOP(0, FMT_PS),
+    OPC_SUB_PS = FOP(1, FMT_PS),
+    OPC_MUL_PS = FOP(2, FMT_PS),
+    OPC_DIV_PS = FOP(3, FMT_PS),
+    OPC_ABS_PS = FOP(5, FMT_PS),
+    OPC_MOV_PS = FOP(6, FMT_PS),
+    OPC_NEG_PS = FOP(7, FMT_PS),
+    OPC_MOVCF_PS = FOP(17, FMT_PS),
+    OPC_MOVZ_PS = FOP(18, FMT_PS),
+    OPC_MOVN_PS = FOP(19, FMT_PS),
+    OPC_ADDR_PS = FOP(24, FMT_PS),
+    OPC_MULR_PS = FOP(26, FMT_PS),
+    OPC_RECIP2_PS = FOP(28, FMT_PS),
+    OPC_RECIP1_PS = FOP(29, FMT_PS),
+    OPC_RSQRT1_PS = FOP(30, FMT_PS),
+    OPC_RSQRT2_PS = FOP(31, FMT_PS),
+
+    OPC_CVT_S_PU = FOP(32, FMT_PS),
+    OPC_CVT_PW_PS = FOP(36, FMT_PS),
+    OPC_CVT_S_PL = FOP(40, FMT_PS),
+    OPC_PLL_PS = FOP(44, FMT_PS),
+    OPC_PLU_PS = FOP(45, FMT_PS),
+    OPC_PUL_PS = FOP(46, FMT_PS),
+    OPC_PUU_PS = FOP(47, FMT_PS),
+    OPC_CMP_F_PS = FOP (48, FMT_PS),
+    OPC_CMP_UN_PS = FOP (49, FMT_PS),
+    OPC_CMP_EQ_PS = FOP (50, FMT_PS),
+    OPC_CMP_UEQ_PS = FOP (51, FMT_PS),
+    OPC_CMP_OLT_PS = FOP (52, FMT_PS),
+    OPC_CMP_ULT_PS = FOP (53, FMT_PS),
+    OPC_CMP_OLE_PS = FOP (54, FMT_PS),
+    OPC_CMP_ULE_PS = FOP (55, FMT_PS),
+    OPC_CMP_SF_PS = FOP (56, FMT_PS),
+    OPC_CMP_NGLE_PS = FOP (57, FMT_PS),
+    OPC_CMP_SEQ_PS = FOP (58, FMT_PS),
+    OPC_CMP_NGL_PS = FOP (59, FMT_PS),
+    OPC_CMP_LT_PS = FOP (60, FMT_PS),
+    OPC_CMP_NGE_PS = FOP (61, FMT_PS),
+    OPC_CMP_LE_PS = FOP (62, FMT_PS),
+    OPC_CMP_NGT_PS = FOP (63, FMT_PS),
+};
+
 static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
 {
     const char *opn = "cp1 move";
@@ -6712,7 +7474,7 @@
 }
 
 
-static void gen_farith (DisasContext *ctx, uint32_t op1,
+static void gen_farith (DisasContext *ctx, enum fopcode op1,
                         int ft, int fs, int fd, int cc)
 {
     const char *opn = "farith";
@@ -6755,8 +7517,8 @@
     enum { BINOP, CMPOP, OTHEROP } optype = OTHEROP;
     uint32_t func = ctx->opcode & 0x3f;
 
-    switch (ctx->opcode & FOP(0x3f, 0x1f)) {
-    case FOP(0, 16):
+    switch (op1) {
+    case OPC_ADD_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
@@ -6771,7 +7533,7 @@
         opn = "add.s";
         optype = BINOP;
         break;
-    case FOP(1, 16):
+    case OPC_SUB_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
@@ -6786,7 +7548,7 @@
         opn = "sub.s";
         optype = BINOP;
         break;
-    case FOP(2, 16):
+    case OPC_MUL_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
@@ -6801,7 +7563,7 @@
         opn = "mul.s";
         optype = BINOP;
         break;
-    case FOP(3, 16):
+    case OPC_DIV_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
@@ -6816,7 +7578,7 @@
         opn = "div.s";
         optype = BINOP;
         break;
-    case FOP(4, 16):
+    case OPC_SQRT_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
@@ -6827,7 +7589,7 @@
         }
         opn = "sqrt.s";
         break;
-    case FOP(5, 16):
+    case OPC_ABS_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
@@ -6838,7 +7600,7 @@
         }
         opn = "abs.s";
         break;
-    case FOP(6, 16):
+    case OPC_MOV_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
@@ -6848,7 +7610,7 @@
         }
         opn = "mov.s";
         break;
-    case FOP(7, 16):
+    case OPC_NEG_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
@@ -6859,7 +7621,7 @@
         }
         opn = "neg.s";
         break;
-    case FOP(8, 16):
+    case OPC_ROUND_L_S:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -6873,7 +7635,7 @@
         }
         opn = "round.l.s";
         break;
-    case FOP(9, 16):
+    case OPC_TRUNC_L_S:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -6887,7 +7649,7 @@
         }
         opn = "trunc.l.s";
         break;
-    case FOP(10, 16):
+    case OPC_CEIL_L_S:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -6901,7 +7663,7 @@
         }
         opn = "ceil.l.s";
         break;
-    case FOP(11, 16):
+    case OPC_FLOOR_L_S:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -6915,7 +7677,7 @@
         }
         opn = "floor.l.s";
         break;
-    case FOP(12, 16):
+    case OPC_ROUND_W_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
@@ -6926,7 +7688,7 @@
         }
         opn = "round.w.s";
         break;
-    case FOP(13, 16):
+    case OPC_TRUNC_W_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
@@ -6937,7 +7699,7 @@
         }
         opn = "trunc.w.s";
         break;
-    case FOP(14, 16):
+    case OPC_CEIL_W_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
@@ -6948,7 +7710,7 @@
         }
         opn = "ceil.w.s";
         break;
-    case FOP(15, 16):
+    case OPC_FLOOR_W_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
@@ -6959,11 +7721,11 @@
         }
         opn = "floor.w.s";
         break;
-    case FOP(17, 16):
+    case OPC_MOVCF_S:
         gen_movcf_s(fs, fd, (ft >> 2) & 0x7, ft & 0x1);
         opn = "movcf.s";
         break;
-    case FOP(18, 16):
+    case OPC_MOVZ_S:
         {
             int l1 = gen_new_label();
             TCGv_i32 fp0;
@@ -6979,7 +7741,7 @@
         }
         opn = "movz.s";
         break;
-    case FOP(19, 16):
+    case OPC_MOVN_S:
         {
             int l1 = gen_new_label();
             TCGv_i32 fp0;
@@ -6995,7 +7757,7 @@
         }
         opn = "movn.s";
         break;
-    case FOP(21, 16):
+    case OPC_RECIP_S:
         check_cop1x(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -7007,7 +7769,7 @@
         }
         opn = "recip.s";
         break;
-    case FOP(22, 16):
+    case OPC_RSQRT_S:
         check_cop1x(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -7019,14 +7781,14 @@
         }
         opn = "rsqrt.s";
         break;
-    case FOP(28, 16):
+    case OPC_RECIP2_S:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, fd);
+            gen_load_fpr32(fp1, ft);
             gen_helper_float_recip2_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
@@ -7034,7 +7796,7 @@
         }
         opn = "recip2.s";
         break;
-    case FOP(29, 16):
+    case OPC_RECIP1_S:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -7046,7 +7808,7 @@
         }
         opn = "recip1.s";
         break;
-    case FOP(30, 16):
+    case OPC_RSQRT1_S:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -7058,7 +7820,7 @@
         }
         opn = "rsqrt1.s";
         break;
-    case FOP(31, 16):
+    case OPC_RSQRT2_S:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -7073,7 +7835,7 @@
         }
         opn = "rsqrt2.s";
         break;
-    case FOP(33, 16):
+    case OPC_CVT_D_S:
         check_cp1_registers(ctx, fd);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -7087,7 +7849,7 @@
         }
         opn = "cvt.d.s";
         break;
-    case FOP(36, 16):
+    case OPC_CVT_W_S:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
@@ -7098,7 +7860,7 @@
         }
         opn = "cvt.w.s";
         break;
-    case FOP(37, 16):
+    case OPC_CVT_L_S:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -7112,7 +7874,7 @@
         }
         opn = "cvt.l.s";
         break;
-    case FOP(38, 16):
+    case OPC_CVT_PS_S:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp64 = tcg_temp_new_i64();
@@ -7121,7 +7883,7 @@
 
             gen_load_fpr32(fp32_0, fs);
             gen_load_fpr32(fp32_1, ft);
-            tcg_gen_concat_i32_i64(fp64, fp32_0, fp32_1);
+            tcg_gen_concat_i32_i64(fp64, fp32_1, fp32_0);
             tcg_temp_free_i32(fp32_1);
             tcg_temp_free_i32(fp32_0);
             gen_store_fpr64(ctx, fp64, fd);
@@ -7129,41 +7891,31 @@
         }
         opn = "cvt.ps.s";
         break;
-    case FOP(48, 16):
-    case FOP(49, 16):
-    case FOP(50, 16):
-    case FOP(51, 16):
-    case FOP(52, 16):
-    case FOP(53, 16):
-    case FOP(54, 16):
-    case FOP(55, 16):
-    case FOP(56, 16):
-    case FOP(57, 16):
-    case FOP(58, 16):
-    case FOP(59, 16):
-    case FOP(60, 16):
-    case FOP(61, 16):
-    case FOP(62, 16):
-    case FOP(63, 16):
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            if (ctx->opcode & (1 << 6)) {
-                check_cop1x(ctx);
-                gen_cmpabs_s(func-48, fp0, fp1, cc);
-                opn = condnames_abs[func-48];
-            } else {
-                gen_cmp_s(func-48, fp0, fp1, cc);
-                opn = condnames[func-48];
-            }
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(fp1);
+    case OPC_CMP_F_S:
+    case OPC_CMP_UN_S:
+    case OPC_CMP_EQ_S:
+    case OPC_CMP_UEQ_S:
+    case OPC_CMP_OLT_S:
+    case OPC_CMP_ULT_S:
+    case OPC_CMP_OLE_S:
+    case OPC_CMP_ULE_S:
+    case OPC_CMP_SF_S:
+    case OPC_CMP_NGLE_S:
+    case OPC_CMP_SEQ_S:
+    case OPC_CMP_NGL_S:
+    case OPC_CMP_LT_S:
+    case OPC_CMP_NGE_S:
+    case OPC_CMP_LE_S:
+    case OPC_CMP_NGT_S:
+        if (ctx->opcode & (1 << 6)) {
+            gen_cmpabs_s(ctx, func-48, ft, fs, cc);
+            opn = condnames_abs[func-48];
+        } else {
+            gen_cmp_s(ctx, func-48, ft, fs, cc);
+            opn = condnames[func-48];
         }
         break;
-    case FOP(0, 17):
+    case OPC_ADD_D:
         check_cp1_registers(ctx, fs | ft | fd);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7179,7 +7931,7 @@
         opn = "add.d";
         optype = BINOP;
         break;
-    case FOP(1, 17):
+    case OPC_SUB_D:
         check_cp1_registers(ctx, fs | ft | fd);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7195,7 +7947,7 @@
         opn = "sub.d";
         optype = BINOP;
         break;
-    case FOP(2, 17):
+    case OPC_MUL_D:
         check_cp1_registers(ctx, fs | ft | fd);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7211,7 +7963,7 @@
         opn = "mul.d";
         optype = BINOP;
         break;
-    case FOP(3, 17):
+    case OPC_DIV_D:
         check_cp1_registers(ctx, fs | ft | fd);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7227,7 +7979,7 @@
         opn = "div.d";
         optype = BINOP;
         break;
-    case FOP(4, 17):
+    case OPC_SQRT_D:
         check_cp1_registers(ctx, fs | fd);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7239,7 +7991,7 @@
         }
         opn = "sqrt.d";
         break;
-    case FOP(5, 17):
+    case OPC_ABS_D:
         check_cp1_registers(ctx, fs | fd);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7251,7 +8003,7 @@
         }
         opn = "abs.d";
         break;
-    case FOP(6, 17):
+    case OPC_MOV_D:
         check_cp1_registers(ctx, fs | fd);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7262,7 +8014,7 @@
         }
         opn = "mov.d";
         break;
-    case FOP(7, 17):
+    case OPC_NEG_D:
         check_cp1_registers(ctx, fs | fd);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7274,7 +8026,7 @@
         }
         opn = "neg.d";
         break;
-    case FOP(8, 17):
+    case OPC_ROUND_L_D:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7286,7 +8038,7 @@
         }
         opn = "round.l.d";
         break;
-    case FOP(9, 17):
+    case OPC_TRUNC_L_D:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7298,7 +8050,7 @@
         }
         opn = "trunc.l.d";
         break;
-    case FOP(10, 17):
+    case OPC_CEIL_L_D:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7310,7 +8062,7 @@
         }
         opn = "ceil.l.d";
         break;
-    case FOP(11, 17):
+    case OPC_FLOOR_L_D:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7322,7 +8074,7 @@
         }
         opn = "floor.l.d";
         break;
-    case FOP(12, 17):
+    case OPC_ROUND_W_D:
         check_cp1_registers(ctx, fs);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -7336,7 +8088,7 @@
         }
         opn = "round.w.d";
         break;
-    case FOP(13, 17):
+    case OPC_TRUNC_W_D:
         check_cp1_registers(ctx, fs);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -7350,7 +8102,7 @@
         }
         opn = "trunc.w.d";
         break;
-    case FOP(14, 17):
+    case OPC_CEIL_W_D:
         check_cp1_registers(ctx, fs);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -7364,7 +8116,7 @@
         }
         opn = "ceil.w.d";
         break;
-    case FOP(15, 17):
+    case OPC_FLOOR_W_D:
         check_cp1_registers(ctx, fs);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -7378,11 +8130,11 @@
         }
         opn = "floor.w.d";
         break;
-    case FOP(17, 17):
+    case OPC_MOVCF_D:
         gen_movcf_d(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
         opn = "movcf.d";
         break;
-    case FOP(18, 17):
+    case OPC_MOVZ_D:
         {
             int l1 = gen_new_label();
             TCGv_i64 fp0;
@@ -7398,7 +8150,7 @@
         }
         opn = "movz.d";
         break;
-    case FOP(19, 17):
+    case OPC_MOVN_D:
         {
             int l1 = gen_new_label();
             TCGv_i64 fp0;
@@ -7414,7 +8166,7 @@
         }
         opn = "movn.d";
         break;
-    case FOP(21, 17):
+    case OPC_RECIP_D:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7426,7 +8178,7 @@
         }
         opn = "recip.d";
         break;
-    case FOP(22, 17):
+    case OPC_RSQRT_D:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7438,7 +8190,7 @@
         }
         opn = "rsqrt.d";
         break;
-    case FOP(28, 17):
+    case OPC_RECIP2_D:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7453,7 +8205,7 @@
         }
         opn = "recip2.d";
         break;
-    case FOP(29, 17):
+    case OPC_RECIP1_D:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7465,7 +8217,7 @@
         }
         opn = "recip1.d";
         break;
-    case FOP(30, 17):
+    case OPC_RSQRT1_D:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7477,7 +8229,7 @@
         }
         opn = "rsqrt1.d";
         break;
-    case FOP(31, 17):
+    case OPC_RSQRT2_D:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7492,43 +8244,31 @@
         }
         opn = "rsqrt2.d";
         break;
-    case FOP(48, 17):
-    case FOP(49, 17):
-    case FOP(50, 17):
-    case FOP(51, 17):
-    case FOP(52, 17):
-    case FOP(53, 17):
-    case FOP(54, 17):
-    case FOP(55, 17):
-    case FOP(56, 17):
-    case FOP(57, 17):
-    case FOP(58, 17):
-    case FOP(59, 17):
-    case FOP(60, 17):
-    case FOP(61, 17):
-    case FOP(62, 17):
-    case FOP(63, 17):
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            if (ctx->opcode & (1 << 6)) {
-                check_cop1x(ctx);
-                check_cp1_registers(ctx, fs | ft);
-                gen_cmpabs_d(func-48, fp0, fp1, cc);
-                opn = condnames_abs[func-48];
-            } else {
-                check_cp1_registers(ctx, fs | ft);
-                gen_cmp_d(func-48, fp0, fp1, cc);
-                opn = condnames[func-48];
-            }
-            tcg_temp_free_i64(fp0);
-            tcg_temp_free_i64(fp1);
+    case OPC_CMP_F_D:
+    case OPC_CMP_UN_D:
+    case OPC_CMP_EQ_D:
+    case OPC_CMP_UEQ_D:
+    case OPC_CMP_OLT_D:
+    case OPC_CMP_ULT_D:
+    case OPC_CMP_OLE_D:
+    case OPC_CMP_ULE_D:
+    case OPC_CMP_SF_D:
+    case OPC_CMP_NGLE_D:
+    case OPC_CMP_SEQ_D:
+    case OPC_CMP_NGL_D:
+    case OPC_CMP_LT_D:
+    case OPC_CMP_NGE_D:
+    case OPC_CMP_LE_D:
+    case OPC_CMP_NGT_D:
+        if (ctx->opcode & (1 << 6)) {
+            gen_cmpabs_d(ctx, func-48, ft, fs, cc);
+            opn = condnames_abs[func-48];
+        } else {
+            gen_cmp_d(ctx, func-48, ft, fs, cc);
+            opn = condnames[func-48];
         }
         break;
-    case FOP(32, 17):
+    case OPC_CVT_S_D:
         check_cp1_registers(ctx, fs);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -7542,7 +8282,7 @@
         }
         opn = "cvt.s.d";
         break;
-    case FOP(36, 17):
+    case OPC_CVT_W_D:
         check_cp1_registers(ctx, fs);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -7556,7 +8296,7 @@
         }
         opn = "cvt.w.d";
         break;
-    case FOP(37, 17):
+    case OPC_CVT_L_D:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7568,7 +8308,7 @@
         }
         opn = "cvt.l.d";
         break;
-    case FOP(32, 20):
+    case OPC_CVT_S_W:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
@@ -7579,7 +8319,7 @@
         }
         opn = "cvt.s.w";
         break;
-    case FOP(33, 20):
+    case OPC_CVT_D_W:
         check_cp1_registers(ctx, fd);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -7593,7 +8333,7 @@
         }
         opn = "cvt.d.w";
         break;
-    case FOP(32, 21):
+    case OPC_CVT_S_L:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp32 = tcg_temp_new_i32();
@@ -7607,7 +8347,7 @@
         }
         opn = "cvt.s.l";
         break;
-    case FOP(33, 21):
+    case OPC_CVT_D_L:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7619,7 +8359,7 @@
         }
         opn = "cvt.d.l";
         break;
-    case FOP(38, 20):
+    case OPC_CVT_PS_PW:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7631,7 +8371,7 @@
         }
         opn = "cvt.ps.pw";
         break;
-    case FOP(0, 22):
+    case OPC_ADD_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7646,7 +8386,7 @@
         }
         opn = "add.ps";
         break;
-    case FOP(1, 22):
+    case OPC_SUB_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7661,7 +8401,7 @@
         }
         opn = "sub.ps";
         break;
-    case FOP(2, 22):
+    case OPC_MUL_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7676,7 +8416,7 @@
         }
         opn = "mul.ps";
         break;
-    case FOP(5, 22):
+    case OPC_ABS_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7688,7 +8428,7 @@
         }
         opn = "abs.ps";
         break;
-    case FOP(6, 22):
+    case OPC_MOV_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7699,7 +8439,7 @@
         }
         opn = "mov.ps";
         break;
-    case FOP(7, 22):
+    case OPC_NEG_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7711,12 +8451,12 @@
         }
         opn = "neg.ps";
         break;
-    case FOP(17, 22):
+    case OPC_MOVCF_PS:
         check_cp1_64bitmode(ctx);
         gen_movcf_ps(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
         opn = "movcf.ps";
         break;
-    case FOP(18, 22):
+    case OPC_MOVZ_PS:
         check_cp1_64bitmode(ctx);
         {
             int l1 = gen_new_label();
@@ -7732,7 +8472,7 @@
         }
         opn = "movz.ps";
         break;
-    case FOP(19, 22):
+    case OPC_MOVN_PS:
         check_cp1_64bitmode(ctx);
         {
             int l1 = gen_new_label();
@@ -7749,7 +8489,7 @@
         }
         opn = "movn.ps";
         break;
-    case FOP(24, 22):
+    case OPC_ADDR_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7764,7 +8504,7 @@
         }
         opn = "addr.ps";
         break;
-    case FOP(26, 22):
+    case OPC_MULR_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7779,14 +8519,14 @@
         }
         opn = "mulr.ps";
         break;
-    case FOP(28, 22):
+    case OPC_RECIP2_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, fd);
+            gen_load_fpr64(ctx, fp1, ft);
             gen_helper_float_recip2_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
@@ -7794,7 +8534,7 @@
         }
         opn = "recip2.ps";
         break;
-    case FOP(29, 22):
+    case OPC_RECIP1_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7806,7 +8546,7 @@
         }
         opn = "recip1.ps";
         break;
-    case FOP(30, 22):
+    case OPC_RSQRT1_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7818,7 +8558,7 @@
         }
         opn = "rsqrt1.ps";
         break;
-    case FOP(31, 22):
+    case OPC_RSQRT2_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7833,7 +8573,7 @@
         }
         opn = "rsqrt2.ps";
         break;
-    case FOP(32, 22):
+    case OPC_CVT_S_PU:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -7845,7 +8585,7 @@
         }
         opn = "cvt.s.pu";
         break;
-    case FOP(36, 22):
+    case OPC_CVT_PW_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
@@ -7857,7 +8597,7 @@
         }
         opn = "cvt.pw.ps";
         break;
-    case FOP(40, 22):
+    case OPC_CVT_S_PL:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -7869,7 +8609,7 @@
         }
         opn = "cvt.s.pl";
         break;
-    case FOP(44, 22):
+    case OPC_PLL_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -7884,7 +8624,7 @@
         }
         opn = "pll.ps";
         break;
-    case FOP(45, 22):
+    case OPC_PLU_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -7899,7 +8639,7 @@
         }
         opn = "plu.ps";
         break;
-    case FOP(46, 22):
+    case OPC_PUL_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -7914,7 +8654,7 @@
         }
         opn = "pul.ps";
         break;
-    case FOP(47, 22):
+    case OPC_PUU_PS:
         check_cp1_64bitmode(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -7929,38 +8669,28 @@
         }
         opn = "puu.ps";
         break;
-    case FOP(48, 22):
-    case FOP(49, 22):
-    case FOP(50, 22):
-    case FOP(51, 22):
-    case FOP(52, 22):
-    case FOP(53, 22):
-    case FOP(54, 22):
-    case FOP(55, 22):
-    case FOP(56, 22):
-    case FOP(57, 22):
-    case FOP(58, 22):
-    case FOP(59, 22):
-    case FOP(60, 22):
-    case FOP(61, 22):
-    case FOP(62, 22):
-    case FOP(63, 22):
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            if (ctx->opcode & (1 << 6)) {
-                gen_cmpabs_ps(func-48, fp0, fp1, cc);
-                opn = condnames_abs[func-48];
-            } else {
-                gen_cmp_ps(func-48, fp0, fp1, cc);
-                opn = condnames[func-48];
-            }
-            tcg_temp_free_i64(fp0);
-            tcg_temp_free_i64(fp1);
+    case OPC_CMP_F_PS:
+    case OPC_CMP_UN_PS:
+    case OPC_CMP_EQ_PS:
+    case OPC_CMP_UEQ_PS:
+    case OPC_CMP_OLT_PS:
+    case OPC_CMP_ULT_PS:
+    case OPC_CMP_OLE_PS:
+    case OPC_CMP_ULE_PS:
+    case OPC_CMP_SF_PS:
+    case OPC_CMP_NGLE_PS:
+    case OPC_CMP_SEQ_PS:
+    case OPC_CMP_NGL_PS:
+    case OPC_CMP_LT_PS:
+    case OPC_CMP_NGE_PS:
+    case OPC_CMP_LE_PS:
+    case OPC_CMP_NGT_PS:
+        if (ctx->opcode & (1 << 6)) {
+            gen_cmpabs_ps(ctx, func-48, ft, fs, cc);
+            opn = condnames_abs[func-48];
+        } else {
+            gen_cmp_ps(ctx, func-48, ft, fs, cc);
+            opn = condnames[func-48];
         }
         break;
     default:
@@ -7995,19 +8725,17 @@
     } else if (index == 0) {
         gen_load_gpr(t0, base);
     } else {
-        gen_load_gpr(t0, index);
-        gen_op_addr_add(ctx, t0, cpu_gpr[base], t0);
+        gen_op_addr_add(ctx, t0, cpu_gpr[base], cpu_gpr[index]);
     }
     /* Don't do NOP if destination is zero: we must perform the actual
        memory access. */
-    save_cpu_state(ctx, 0);
     switch (opc) {
     case OPC_LWXC1:
         check_cop1x(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            tcg_gen_qemu_ld32s(t0, t0, ctx->mem_idx);
+            tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL);
             tcg_gen_trunc_tl_i32(fp0, t0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -8019,8 +8747,7 @@
         check_cp1_registers(ctx, fd);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            tcg_gen_qemu_ld64(fp0, t0, ctx->mem_idx);
+            tcg_gen_qemu_ld_i64(fp0, t0, ctx->mem_idx, MO_TEQ);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -8032,7 +8759,7 @@
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
-            tcg_gen_qemu_ld64(fp0, t0, ctx->mem_idx);
+            tcg_gen_qemu_ld_i64(fp0, t0, ctx->mem_idx, MO_TEQ);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -8042,13 +8769,9 @@
         check_cop1x(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv t1 = tcg_temp_new();
-
             gen_load_fpr32(fp0, fs);
-            tcg_gen_extu_i32_tl(t1, fp0);
-            tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
+            tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL);
             tcg_temp_free_i32(fp0);
-            tcg_temp_free(t1);
         }
         opn = "swxc1";
         store = 1;
@@ -8058,9 +8781,8 @@
         check_cp1_registers(ctx, fs);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
-
             gen_load_fpr64(ctx, fp0, fs);
-            tcg_gen_qemu_st64(fp0, t0, ctx->mem_idx);
+            tcg_gen_qemu_st_i64(fp0, t0, ctx->mem_idx, MO_TEQ);
             tcg_temp_free_i64(fp0);
         }
         opn = "sdxc1";
@@ -8071,9 +8793,8 @@
         tcg_gen_andi_tl(t0, t0, ~0x7);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
-
             gen_load_fpr64(ctx, fp0, fs);
-            tcg_gen_qemu_st64(fp0, t0, ctx->mem_idx);
+            tcg_gen_qemu_st_i64(fp0, t0, ctx->mem_idx, MO_TEQ);
             tcg_temp_free_i64(fp0);
         }
         opn = "suxc1";
@@ -8360,8 +9081,3645 @@
                fregnames[fs], fregnames[ft]);
 }
 
+static void gen_rdhwr(DisasContext *ctx, int rt, int rd)
+{
+    TCGv t0;
+
+#if !defined(CONFIG_USER_ONLY)
+    /* The Linux kernel will emulate rdhwr if it's not supported natively.
+       Therefore only check the ISA in system mode.  */
+    check_insn(ctx, ISA_MIPS32R2);
+#endif
+    t0 = tcg_temp_new();
+
+    switch (rd) {
+    case 0:
+        save_cpu_state(ctx, 1);
+        gen_helper_rdhwr_cpunum(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+    case 1:
+        save_cpu_state(ctx, 1);
+        gen_helper_rdhwr_synci_step(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+    case 2:
+        save_cpu_state(ctx, 1);
+        gen_helper_rdhwr_cc(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+    case 3:
+        save_cpu_state(ctx, 1);
+        gen_helper_rdhwr_ccres(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+    case 29:
+#if defined(CONFIG_USER_ONLY)
+        tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUMIPSState, tls_value));
+        gen_store_gpr(t0, rt);
+        break;
+#else
+        /* XXX: Some CPUs implement this in hardware.
+           Not supported yet. */
+#endif
+    default:            /* Invalid */
+        MIPS_INVAL("rdhwr");
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+    tcg_temp_free(t0);
+}
+
+static void handle_delay_slot(DisasContext *ctx, int insn_bytes)
+{
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+        int proc_hflags = ctx->hflags & MIPS_HFLAG_BMASK;
+        /* Branches completion */
+        ctx->hflags &= ~MIPS_HFLAG_BMASK;
+        ctx->bstate = BS_BRANCH;
+        save_cpu_state(ctx, 0);
+        /* FIXME: Need to clear can_do_io.  */
+        switch (proc_hflags & MIPS_HFLAG_BMASK_BASE) {
+        case MIPS_HFLAG_B:
+            /* unconditional branch */
+            MIPS_DEBUG("unconditional branch");
+            if (proc_hflags & MIPS_HFLAG_BX) {
+                tcg_gen_xori_i32(hflags, hflags, MIPS_HFLAG_M16);
+            }
+            gen_goto_tb(ctx, 0, ctx->btarget);
+            break;
+        case MIPS_HFLAG_BL:
+            /* blikely taken case */
+            MIPS_DEBUG("blikely branch taken");
+            gen_goto_tb(ctx, 0, ctx->btarget);
+            break;
+        case MIPS_HFLAG_BC:
+            /* Conditional branch */
+            MIPS_DEBUG("conditional branch");
+            {
+                int l1 = gen_new_label();
+
+                tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
+                gen_goto_tb(ctx, 1, ctx->pc + insn_bytes);
+                gen_set_label(l1);
+                gen_goto_tb(ctx, 0, ctx->btarget);
+            }
+            break;
+        case MIPS_HFLAG_BR:
+            /* unconditional branch to register */
+            MIPS_DEBUG("branch to register");
+            if (ctx->insn_flags & (ASE_MIPS16 | ASE_MICROMIPS)) {
+                TCGv t0 = tcg_temp_new();
+                TCGv_i32 t1 = tcg_temp_new_i32();
+
+                tcg_gen_andi_tl(t0, btarget, 0x1);
+                tcg_gen_trunc_tl_i32(t1, t0);
+                tcg_temp_free(t0);
+                tcg_gen_andi_i32(hflags, hflags, ~(uint32_t)MIPS_HFLAG_M16);
+                tcg_gen_shli_i32(t1, t1, MIPS_HFLAG_M16_SHIFT);
+                tcg_gen_or_i32(hflags, hflags, t1);
+                tcg_temp_free_i32(t1);
+
+                tcg_gen_andi_tl(cpu_PC, btarget, ~(target_ulong)0x1);
+            } else {
+                tcg_gen_mov_tl(cpu_PC, btarget);
+            }
+            if (ctx->singlestep_enabled) {
+                save_cpu_state(ctx, 0);
+                gen_helper_0e0i(raise_exception, EXCP_DEBUG);
+            }
+            tcg_gen_exit_tb(0);
+            break;
+        default:
+            MIPS_DEBUG("unknown branch");
+            break;
+        }
+    }
+}
+
 /* ISA extensions (ASEs) */
 /* MIPS16 extension to MIPS32 */
+
+/* MIPS16 major opcodes */
+enum {
+  M16_OPC_ADDIUSP = 0x00,
+  M16_OPC_ADDIUPC = 0x01,
+  M16_OPC_B = 0x02,
+  M16_OPC_JAL = 0x03,
+  M16_OPC_BEQZ = 0x04,
+  M16_OPC_BNEQZ = 0x05,
+  M16_OPC_SHIFT = 0x06,
+  M16_OPC_LD = 0x07,
+  M16_OPC_RRIA = 0x08,
+  M16_OPC_ADDIU8 = 0x09,
+  M16_OPC_SLTI = 0x0a,
+  M16_OPC_SLTIU = 0x0b,
+  M16_OPC_I8 = 0x0c,
+  M16_OPC_LI = 0x0d,
+  M16_OPC_CMPI = 0x0e,
+  M16_OPC_SD = 0x0f,
+  M16_OPC_LB = 0x10,
+  M16_OPC_LH = 0x11,
+  M16_OPC_LWSP = 0x12,
+  M16_OPC_LW = 0x13,
+  M16_OPC_LBU = 0x14,
+  M16_OPC_LHU = 0x15,
+  M16_OPC_LWPC = 0x16,
+  M16_OPC_LWU = 0x17,
+  M16_OPC_SB = 0x18,
+  M16_OPC_SH = 0x19,
+  M16_OPC_SWSP = 0x1a,
+  M16_OPC_SW = 0x1b,
+  M16_OPC_RRR = 0x1c,
+  M16_OPC_RR = 0x1d,
+  M16_OPC_EXTEND = 0x1e,
+  M16_OPC_I64 = 0x1f
+};
+
+/* I8 funct field */
+enum {
+  I8_BTEQZ = 0x0,
+  I8_BTNEZ = 0x1,
+  I8_SWRASP = 0x2,
+  I8_ADJSP = 0x3,
+  I8_SVRS = 0x4,
+  I8_MOV32R = 0x5,
+  I8_MOVR32 = 0x7
+};
+
+/* RRR f field */
+enum {
+  RRR_DADDU = 0x0,
+  RRR_ADDU = 0x1,
+  RRR_DSUBU = 0x2,
+  RRR_SUBU = 0x3
+};
+
+/* RR funct field */
+enum {
+  RR_JR = 0x00,
+  RR_SDBBP = 0x01,
+  RR_SLT = 0x02,
+  RR_SLTU = 0x03,
+  RR_SLLV = 0x04,
+  RR_BREAK = 0x05,
+  RR_SRLV = 0x06,
+  RR_SRAV = 0x07,
+  RR_DSRL = 0x08,
+  RR_CMP = 0x0a,
+  RR_NEG = 0x0b,
+  RR_AND = 0x0c,
+  RR_OR = 0x0d,
+  RR_XOR = 0x0e,
+  RR_NOT = 0x0f,
+  RR_MFHI = 0x10,
+  RR_CNVT = 0x11,
+  RR_MFLO = 0x12,
+  RR_DSRA = 0x13,
+  RR_DSLLV = 0x14,
+  RR_DSRLV = 0x16,
+  RR_DSRAV = 0x17,
+  RR_MULT = 0x18,
+  RR_MULTU = 0x19,
+  RR_DIV = 0x1a,
+  RR_DIVU = 0x1b,
+  RR_DMULT = 0x1c,
+  RR_DMULTU = 0x1d,
+  RR_DDIV = 0x1e,
+  RR_DDIVU = 0x1f
+};
+
+/* I64 funct field */
+enum {
+  I64_LDSP = 0x0,
+  I64_SDSP = 0x1,
+  I64_SDRASP = 0x2,
+  I64_DADJSP = 0x3,
+  I64_LDPC = 0x4,
+  I64_DADDIU5 = 0x5,
+  I64_DADDIUPC = 0x6,
+  I64_DADDIUSP = 0x7
+};
+
+/* RR ry field for CNVT */
+enum {
+  RR_RY_CNVT_ZEB = 0x0,
+  RR_RY_CNVT_ZEH = 0x1,
+  RR_RY_CNVT_ZEW = 0x2,
+  RR_RY_CNVT_SEB = 0x4,
+  RR_RY_CNVT_SEH = 0x5,
+  RR_RY_CNVT_SEW = 0x6,
+};
+
+static int xlat (int r)
+{
+  static int map[] = { 16, 17, 2, 3, 4, 5, 6, 7 };
+
+  return map[r];
+}
+
+static void gen_mips16_save (DisasContext *ctx,
+                             int xsregs, int aregs,
+                             int do_ra, int do_s0, int do_s1,
+                             int framesize)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    int args, astatic;
+
+    switch (aregs) {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+    case 11:
+        args = 0;
+        break;
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+        args = 1;
+        break;
+    case 8:
+    case 9:
+    case 10:
+        args = 2;
+        break;
+    case 12:
+    case 13:
+        args = 3;
+        break;
+    case 14:
+        args = 4;
+        break;
+    default:
+        generate_exception(ctx, EXCP_RI);
+        return;
+    }
+
+    switch (args) {
+    case 4:
+        gen_base_offset_addr(ctx, t0, 29, 12);
+        gen_load_gpr(t1, 7);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+        /* Fall through */
+    case 3:
+        gen_base_offset_addr(ctx, t0, 29, 8);
+        gen_load_gpr(t1, 6);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+        /* Fall through */
+    case 2:
+        gen_base_offset_addr(ctx, t0, 29, 4);
+        gen_load_gpr(t1, 5);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+        /* Fall through */
+    case 1:
+        gen_base_offset_addr(ctx, t0, 29, 0);
+        gen_load_gpr(t1, 4);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+    }
+
+    gen_load_gpr(t0, 29);
+
+#define DECR_AND_STORE(reg) do {                                 \
+        tcg_gen_subi_tl(t0, t0, 4);                              \
+        gen_load_gpr(t1, reg);                                   \
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); \
+    } while (0)
+
+    if (do_ra) {
+        DECR_AND_STORE(31);
+    }
+
+    switch (xsregs) {
+    case 7:
+        DECR_AND_STORE(30);
+        /* Fall through */
+    case 6:
+        DECR_AND_STORE(23);
+        /* Fall through */
+    case 5:
+        DECR_AND_STORE(22);
+        /* Fall through */
+    case 4:
+        DECR_AND_STORE(21);
+        /* Fall through */
+    case 3:
+        DECR_AND_STORE(20);
+        /* Fall through */
+    case 2:
+        DECR_AND_STORE(19);
+        /* Fall through */
+    case 1:
+        DECR_AND_STORE(18);
+    }
+
+    if (do_s1) {
+        DECR_AND_STORE(17);
+    }
+    if (do_s0) {
+        DECR_AND_STORE(16);
+    }
+
+    switch (aregs) {
+    case 0:
+    case 4:
+    case 8:
+    case 12:
+    case 14:
+        astatic = 0;
+        break;
+    case 1:
+    case 5:
+    case 9:
+    case 13:
+        astatic = 1;
+        break;
+    case 2:
+    case 6:
+    case 10:
+        astatic = 2;
+        break;
+    case 3:
+    case 7:
+        astatic = 3;
+        break;
+    case 11:
+        astatic = 4;
+        break;
+    default:
+        generate_exception(ctx, EXCP_RI);
+        return;
+    }
+
+    if (astatic > 0) {
+        DECR_AND_STORE(7);
+        if (astatic > 1) {
+            DECR_AND_STORE(6);
+            if (astatic > 2) {
+                DECR_AND_STORE(5);
+                if (astatic > 3) {
+                    DECR_AND_STORE(4);
+                }
+            }
+        }
+    }
+#undef DECR_AND_STORE
+
+    tcg_gen_subi_tl(cpu_gpr[29], cpu_gpr[29], framesize);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_mips16_restore (DisasContext *ctx,
+                                int xsregs, int aregs,
+                                int do_ra, int do_s0, int do_s1,
+                                int framesize)
+{
+    int astatic;
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    tcg_gen_addi_tl(t0, cpu_gpr[29], framesize);
+
+#define DECR_AND_LOAD(reg) do {                            \
+        tcg_gen_subi_tl(t0, t0, 4);                        \
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); \
+        gen_store_gpr(t1, reg);                            \
+    } while (0)
+
+    if (do_ra) {
+        DECR_AND_LOAD(31);
+    }
+
+    switch (xsregs) {
+    case 7:
+        DECR_AND_LOAD(30);
+        /* Fall through */
+    case 6:
+        DECR_AND_LOAD(23);
+        /* Fall through */
+    case 5:
+        DECR_AND_LOAD(22);
+        /* Fall through */
+    case 4:
+        DECR_AND_LOAD(21);
+        /* Fall through */
+    case 3:
+        DECR_AND_LOAD(20);
+        /* Fall through */
+    case 2:
+        DECR_AND_LOAD(19);
+        /* Fall through */
+    case 1:
+        DECR_AND_LOAD(18);
+    }
+
+    if (do_s1) {
+        DECR_AND_LOAD(17);
+    }
+    if (do_s0) {
+        DECR_AND_LOAD(16);
+    }
+
+    switch (aregs) {
+    case 0:
+    case 4:
+    case 8:
+    case 12:
+    case 14:
+        astatic = 0;
+        break;
+    case 1:
+    case 5:
+    case 9:
+    case 13:
+        astatic = 1;
+        break;
+    case 2:
+    case 6:
+    case 10:
+        astatic = 2;
+        break;
+    case 3:
+    case 7:
+        astatic = 3;
+        break;
+    case 11:
+        astatic = 4;
+        break;
+    default:
+        generate_exception(ctx, EXCP_RI);
+        return;
+    }
+
+    if (astatic > 0) {
+        DECR_AND_LOAD(7);
+        if (astatic > 1) {
+            DECR_AND_LOAD(6);
+            if (astatic > 2) {
+                DECR_AND_LOAD(5);
+                if (astatic > 3) {
+                    DECR_AND_LOAD(4);
+                }
+            }
+        }
+    }
+#undef DECR_AND_LOAD
+
+    tcg_gen_addi_tl(cpu_gpr[29], cpu_gpr[29], framesize);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_addiupc (DisasContext *ctx, int rx, int imm,
+                         int is_64_bit, int extended)
+{
+    TCGv t0;
+
+    if (extended && (ctx->hflags & MIPS_HFLAG_BMASK)) {
+        generate_exception(ctx, EXCP_RI);
+        return;
+    }
+
+    t0 = tcg_temp_new();
+
+    tcg_gen_movi_tl(t0, pc_relative_pc(ctx));
+    tcg_gen_addi_tl(cpu_gpr[rx], t0, imm);
+    if (!is_64_bit) {
+        tcg_gen_ext32s_tl(cpu_gpr[rx], cpu_gpr[rx]);
+    }
+
+    tcg_temp_free(t0);
+}
+
+#if defined(TARGET_MIPS64)
+static void decode_i64_mips16 (DisasContext *ctx,
+                               int ry, int funct, int16_t offset,
+                               int extended)
+{
+    switch (funct) {
+    case I64_LDSP:
+        check_mips_64(ctx);
+        offset = extended ? offset : offset << 3;
+        gen_ld(ctx, OPC_LD, ry, 29, offset);
+        break;
+    case I64_SDSP:
+        check_mips_64(ctx);
+        offset = extended ? offset : offset << 3;
+        gen_st(ctx, OPC_SD, ry, 29, offset);
+        break;
+    case I64_SDRASP:
+        check_mips_64(ctx);
+        offset = extended ? offset : (ctx->opcode & 0xff) << 3;
+        gen_st(ctx, OPC_SD, 31, 29, offset);
+        break;
+    case I64_DADJSP:
+        check_mips_64(ctx);
+        offset = extended ? offset : ((int8_t)ctx->opcode) << 3;
+        gen_arith_imm(ctx, OPC_DADDIU, 29, 29, offset);
+        break;
+    case I64_LDPC:
+        if (extended && (ctx->hflags & MIPS_HFLAG_BMASK)) {
+            generate_exception(ctx, EXCP_RI);
+        } else {
+            offset = extended ? offset : offset << 3;
+            gen_ld(ctx, OPC_LDPC, ry, 0, offset);
+        }
+        break;
+    case I64_DADDIU5:
+        check_mips_64(ctx);
+        offset = extended ? offset : ((int8_t)(offset << 3)) >> 3;
+        gen_arith_imm(ctx, OPC_DADDIU, ry, ry, offset);
+        break;
+    case I64_DADDIUPC:
+        check_mips_64(ctx);
+        offset = extended ? offset : offset << 2;
+        gen_addiupc(ctx, ry, offset, 1, extended);
+        break;
+    case I64_DADDIUSP:
+        check_mips_64(ctx);
+        offset = extended ? offset : offset << 2;
+        gen_arith_imm(ctx, OPC_DADDIU, ry, 29, offset);
+        break;
+    }
+}
+#endif
+
+static int decode_extended_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
+{
+    int extend = cpu_lduw_code(env, ctx->pc + 2);
+    int op, rx, ry, funct, sa;
+    int16_t imm, offset;
+
+    ctx->opcode = (ctx->opcode << 16) | extend;
+    op = (ctx->opcode >> 11) & 0x1f;
+    sa = (ctx->opcode >> 22) & 0x1f;
+    funct = (ctx->opcode >> 8) & 0x7;
+    rx = xlat((ctx->opcode >> 8) & 0x7);
+    ry = xlat((ctx->opcode >> 5) & 0x7);
+    offset = imm = (int16_t) (((ctx->opcode >> 16) & 0x1f) << 11
+                              | ((ctx->opcode >> 21) & 0x3f) << 5
+                              | (ctx->opcode & 0x1f));
+
+    /* The extended opcodes cleverly reuse the opcodes from their 16-bit
+       counterparts.  */
+    switch (op) {
+    case M16_OPC_ADDIUSP:
+        gen_arith_imm(ctx, OPC_ADDIU, rx, 29, imm);
+        break;
+    case M16_OPC_ADDIUPC:
+        gen_addiupc(ctx, rx, imm, 0, 1);
+        break;
+    case M16_OPC_B:
+        gen_compute_branch(ctx, OPC_BEQ, 4, 0, 0, offset << 1);
+        /* No delay slot, so just process as a normal instruction */
+        break;
+    case M16_OPC_BEQZ:
+        gen_compute_branch(ctx, OPC_BEQ, 4, rx, 0, offset << 1);
+        /* No delay slot, so just process as a normal instruction */
+        break;
+    case M16_OPC_BNEQZ:
+        gen_compute_branch(ctx, OPC_BNE, 4, rx, 0, offset << 1);
+        /* No delay slot, so just process as a normal instruction */
+        break;
+    case M16_OPC_SHIFT:
+        switch (ctx->opcode & 0x3) {
+        case 0x0:
+            gen_shift_imm(ctx, OPC_SLL, rx, ry, sa);
+            break;
+        case 0x1:
+#if defined(TARGET_MIPS64)
+            check_mips_64(ctx);
+            gen_shift_imm(ctx, OPC_DSLL, rx, ry, sa);
+#else
+            generate_exception(ctx, EXCP_RI);
+#endif
+            break;
+        case 0x2:
+            gen_shift_imm(ctx, OPC_SRL, rx, ry, sa);
+            break;
+        case 0x3:
+            gen_shift_imm(ctx, OPC_SRA, rx, ry, sa);
+            break;
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_LD:
+            check_mips_64(ctx);
+        gen_ld(ctx, OPC_LD, ry, rx, offset);
+        break;
+#endif
+    case M16_OPC_RRIA:
+        imm = ctx->opcode & 0xf;
+        imm = imm | ((ctx->opcode >> 20) & 0x7f) << 4;
+        imm = imm | ((ctx->opcode >> 16) & 0xf) << 11;
+        imm = (int16_t) (imm << 1) >> 1;
+        if ((ctx->opcode >> 4) & 0x1) {
+#if defined(TARGET_MIPS64)
+            check_mips_64(ctx);
+            gen_arith_imm(ctx, OPC_DADDIU, ry, rx, imm);
+#else
+            generate_exception(ctx, EXCP_RI);
+#endif
+        } else {
+            gen_arith_imm(ctx, OPC_ADDIU, ry, rx, imm);
+        }
+        break;
+    case M16_OPC_ADDIU8:
+        gen_arith_imm(ctx, OPC_ADDIU, rx, rx, imm);
+        break;
+    case M16_OPC_SLTI:
+        gen_slt_imm(ctx, OPC_SLTI, 24, rx, imm);
+        break;
+    case M16_OPC_SLTIU:
+        gen_slt_imm(ctx, OPC_SLTIU, 24, rx, imm);
+        break;
+    case M16_OPC_I8:
+        switch (funct) {
+        case I8_BTEQZ:
+            gen_compute_branch(ctx, OPC_BEQ, 4, 24, 0, offset << 1);
+            break;
+        case I8_BTNEZ:
+            gen_compute_branch(ctx, OPC_BNE, 4, 24, 0, offset << 1);
+            break;
+        case I8_SWRASP:
+            gen_st(ctx, OPC_SW, 31, 29, imm);
+            break;
+        case I8_ADJSP:
+            gen_arith_imm(ctx, OPC_ADDIU, 29, 29, imm);
+            break;
+        case I8_SVRS:
+            {
+                int xsregs = (ctx->opcode >> 24) & 0x7;
+                int aregs = (ctx->opcode >> 16) & 0xf;
+                int do_ra = (ctx->opcode >> 6) & 0x1;
+                int do_s0 = (ctx->opcode >> 5) & 0x1;
+                int do_s1 = (ctx->opcode >> 4) & 0x1;
+                int framesize = (((ctx->opcode >> 20) & 0xf) << 4
+                                 | (ctx->opcode & 0xf)) << 3;
+
+                if (ctx->opcode & (1 << 7)) {
+                    gen_mips16_save(ctx, xsregs, aregs,
+                                    do_ra, do_s0, do_s1,
+                                    framesize);
+                } else {
+                    gen_mips16_restore(ctx, xsregs, aregs,
+                                       do_ra, do_s0, do_s1,
+                                       framesize);
+                }
+            }
+            break;
+        default:
+            generate_exception(ctx, EXCP_RI);
+            break;
+        }
+        break;
+    case M16_OPC_LI:
+        tcg_gen_movi_tl(cpu_gpr[rx], (uint16_t) imm);
+        break;
+    case M16_OPC_CMPI:
+        tcg_gen_xori_tl(cpu_gpr[24], cpu_gpr[rx], (uint16_t) imm);
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_SD:
+        gen_st(ctx, OPC_SD, ry, rx, offset);
+        break;
+#endif
+    case M16_OPC_LB:
+        gen_ld(ctx, OPC_LB, ry, rx, offset);
+        break;
+    case M16_OPC_LH:
+        gen_ld(ctx, OPC_LH, ry, rx, offset);
+        break;
+    case M16_OPC_LWSP:
+        gen_ld(ctx, OPC_LW, rx, 29, offset);
+        break;
+    case M16_OPC_LW:
+        gen_ld(ctx, OPC_LW, ry, rx, offset);
+        break;
+    case M16_OPC_LBU:
+        gen_ld(ctx, OPC_LBU, ry, rx, offset);
+        break;
+    case M16_OPC_LHU:
+        gen_ld(ctx, OPC_LHU, ry, rx, offset);
+        break;
+    case M16_OPC_LWPC:
+        gen_ld(ctx, OPC_LWPC, rx, 0, offset);
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_LWU:
+        gen_ld(ctx, OPC_LWU, ry, rx, offset);
+        break;
+#endif
+    case M16_OPC_SB:
+        gen_st(ctx, OPC_SB, ry, rx, offset);
+        break;
+    case M16_OPC_SH:
+        gen_st(ctx, OPC_SH, ry, rx, offset);
+        break;
+    case M16_OPC_SWSP:
+        gen_st(ctx, OPC_SW, rx, 29, offset);
+        break;
+    case M16_OPC_SW:
+        gen_st(ctx, OPC_SW, ry, rx, offset);
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_I64:
+        decode_i64_mips16(ctx, ry, funct, offset, 1);
+        break;
+#endif
+    default:
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+
+    return 4;
+}
+
+static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
+{
+    int rx, ry;
+    int sa;
+    int op, cnvt_op, op1, offset;
+    int funct;
+    int n_bytes;
+
+    op = (ctx->opcode >> 11) & 0x1f;
+    sa = (ctx->opcode >> 2) & 0x7;
+    sa = sa == 0 ? 8 : sa;
+    rx = xlat((ctx->opcode >> 8) & 0x7);
+    cnvt_op = (ctx->opcode >> 5) & 0x7;
+    ry = xlat((ctx->opcode >> 5) & 0x7);
+    op1 = offset = ctx->opcode & 0x1f;
+
+    n_bytes = 2;
+
+    switch (op) {
+    case M16_OPC_ADDIUSP:
+        {
+            int16_t imm = ((uint8_t) ctx->opcode) << 2;
+
+            gen_arith_imm(ctx, OPC_ADDIU, rx, 29, imm);
+        }
+        break;
+    case M16_OPC_ADDIUPC:
+        gen_addiupc(ctx, rx, ((uint8_t) ctx->opcode) << 2, 0, 0);
+        break;
+    case M16_OPC_B:
+        offset = (ctx->opcode & 0x7ff) << 1;
+        offset = (int16_t)(offset << 4) >> 4;
+        gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0, offset);
+        /* No delay slot, so just process as a normal instruction */
+        break;
+    case M16_OPC_JAL:
+        offset = cpu_lduw_code(env, ctx->pc + 2);
+        offset = (((ctx->opcode & 0x1f) << 21)
+                  | ((ctx->opcode >> 5) & 0x1f) << 16
+                  | offset) << 2;
+        op = ((ctx->opcode >> 10) & 0x1) ? OPC_JALXS : OPC_JALS;
+        gen_compute_branch(ctx, op, 4, rx, ry, offset);
+        n_bytes = 4;
+        break;
+    case M16_OPC_BEQZ:
+        gen_compute_branch(ctx, OPC_BEQ, 2, rx, 0, ((int8_t)ctx->opcode) << 1);
+        /* No delay slot, so just process as a normal instruction */
+        break;
+    case M16_OPC_BNEQZ:
+        gen_compute_branch(ctx, OPC_BNE, 2, rx, 0, ((int8_t)ctx->opcode) << 1);
+        /* No delay slot, so just process as a normal instruction */
+        break;
+    case M16_OPC_SHIFT:
+        switch (ctx->opcode & 0x3) {
+        case 0x0:
+            gen_shift_imm(ctx, OPC_SLL, rx, ry, sa);
+            break;
+        case 0x1:
+#if defined(TARGET_MIPS64)
+            check_mips_64(ctx);
+            gen_shift_imm(ctx, OPC_DSLL, rx, ry, sa);
+#else
+            generate_exception(ctx, EXCP_RI);
+#endif
+            break;
+        case 0x2:
+            gen_shift_imm(ctx, OPC_SRL, rx, ry, sa);
+            break;
+        case 0x3:
+            gen_shift_imm(ctx, OPC_SRA, rx, ry, sa);
+            break;
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_LD:
+        check_mips_64(ctx);
+        gen_ld(ctx, OPC_LD, ry, rx, offset << 3);
+        break;
+#endif
+    case M16_OPC_RRIA:
+        {
+            int16_t imm = (int8_t)((ctx->opcode & 0xf) << 4) >> 4;
+
+            if ((ctx->opcode >> 4) & 1) {
+#if defined(TARGET_MIPS64)
+                check_mips_64(ctx);
+                gen_arith_imm(ctx, OPC_DADDIU, ry, rx, imm);
+#else
+                generate_exception(ctx, EXCP_RI);
+#endif
+            } else {
+                gen_arith_imm(ctx, OPC_ADDIU, ry, rx, imm);
+            }
+        }
+        break;
+    case M16_OPC_ADDIU8:
+        {
+            int16_t imm = (int8_t) ctx->opcode;
+
+            gen_arith_imm(ctx, OPC_ADDIU, rx, rx, imm);
+        }
+        break;
+    case M16_OPC_SLTI:
+        {
+            int16_t imm = (uint8_t) ctx->opcode;
+            gen_slt_imm(ctx, OPC_SLTI, 24, rx, imm);
+        }
+        break;
+    case M16_OPC_SLTIU:
+        {
+            int16_t imm = (uint8_t) ctx->opcode;
+            gen_slt_imm(ctx, OPC_SLTIU, 24, rx, imm);
+        }
+        break;
+    case M16_OPC_I8:
+        {
+            int reg32;
+
+            funct = (ctx->opcode >> 8) & 0x7;
+            switch (funct) {
+            case I8_BTEQZ:
+                gen_compute_branch(ctx, OPC_BEQ, 2, 24, 0,
+                                   ((int8_t)ctx->opcode) << 1);
+                break;
+            case I8_BTNEZ:
+                gen_compute_branch(ctx, OPC_BNE, 2, 24, 0,
+                                   ((int8_t)ctx->opcode) << 1);
+                break;
+            case I8_SWRASP:
+                gen_st(ctx, OPC_SW, 31, 29, (ctx->opcode & 0xff) << 2);
+                break;
+            case I8_ADJSP:
+                gen_arith_imm(ctx, OPC_ADDIU, 29, 29,
+                              ((int8_t)ctx->opcode) << 3);
+                break;
+            case I8_SVRS:
+                {
+                    int do_ra = ctx->opcode & (1 << 6);
+                    int do_s0 = ctx->opcode & (1 << 5);
+                    int do_s1 = ctx->opcode & (1 << 4);
+                    int framesize = ctx->opcode & 0xf;
+
+                    if (framesize == 0) {
+                        framesize = 128;
+                    } else {
+                        framesize = framesize << 3;
+                    }
+
+                    if (ctx->opcode & (1 << 7)) {
+                        gen_mips16_save(ctx, 0, 0,
+                                        do_ra, do_s0, do_s1, framesize);
+                    } else {
+                        gen_mips16_restore(ctx, 0, 0,
+                                           do_ra, do_s0, do_s1, framesize);
+                    }
+                }
+                break;
+            case I8_MOV32R:
+                {
+                    int rz = xlat(ctx->opcode & 0x7);
+
+                    reg32 = (((ctx->opcode >> 3) & 0x3) << 3) |
+                        ((ctx->opcode >> 5) & 0x7);
+                    gen_arith(ctx, OPC_ADDU, reg32, rz, 0);
+                }
+                break;
+            case I8_MOVR32:
+                reg32 = ctx->opcode & 0x1f;
+                gen_arith(ctx, OPC_ADDU, ry, reg32, 0);
+                break;
+            default:
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+        }
+        break;
+    case M16_OPC_LI:
+        {
+            int16_t imm = (uint8_t) ctx->opcode;
+
+            gen_arith_imm(ctx, OPC_ADDIU, rx, 0, imm);
+        }
+        break;
+    case M16_OPC_CMPI:
+        {
+            int16_t imm = (uint8_t) ctx->opcode;
+            gen_logic_imm(ctx, OPC_XORI, 24, rx, imm);
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_SD:
+        check_mips_64(ctx);
+        gen_st(ctx, OPC_SD, ry, rx, offset << 3);
+        break;
+#endif
+    case M16_OPC_LB:
+        gen_ld(ctx, OPC_LB, ry, rx, offset);
+        break;
+    case M16_OPC_LH:
+        gen_ld(ctx, OPC_LH, ry, rx, offset << 1);
+        break;
+    case M16_OPC_LWSP:
+        gen_ld(ctx, OPC_LW, rx, 29, ((uint8_t)ctx->opcode) << 2);
+        break;
+    case M16_OPC_LW:
+        gen_ld(ctx, OPC_LW, ry, rx, offset << 2);
+        break;
+    case M16_OPC_LBU:
+        gen_ld(ctx, OPC_LBU, ry, rx, offset);
+        break;
+    case M16_OPC_LHU:
+        gen_ld(ctx, OPC_LHU, ry, rx, offset << 1);
+        break;
+    case M16_OPC_LWPC:
+        gen_ld(ctx, OPC_LWPC, rx, 0, ((uint8_t)ctx->opcode) << 2);
+        break;
+#if defined (TARGET_MIPS64)
+    case M16_OPC_LWU:
+        check_mips_64(ctx);
+        gen_ld(ctx, OPC_LWU, ry, rx, offset << 2);
+        break;
+#endif
+    case M16_OPC_SB:
+        gen_st(ctx, OPC_SB, ry, rx, offset);
+        break;
+    case M16_OPC_SH:
+        gen_st(ctx, OPC_SH, ry, rx, offset << 1);
+        break;
+    case M16_OPC_SWSP:
+        gen_st(ctx, OPC_SW, rx, 29, ((uint8_t)ctx->opcode) << 2);
+        break;
+    case M16_OPC_SW:
+        gen_st(ctx, OPC_SW, ry, rx, offset << 2);
+        break;
+    case M16_OPC_RRR:
+        {
+            int rz = xlat((ctx->opcode >> 2) & 0x7);
+            int mips32_op;
+
+            switch (ctx->opcode & 0x3) {
+            case RRR_ADDU:
+                mips32_op = OPC_ADDU;
+                break;
+            case RRR_SUBU:
+                mips32_op = OPC_SUBU;
+                break;
+#if defined(TARGET_MIPS64)
+            case RRR_DADDU:
+                mips32_op = OPC_DADDU;
+                check_mips_64(ctx);
+                break;
+            case RRR_DSUBU:
+                mips32_op = OPC_DSUBU;
+                check_mips_64(ctx);
+                break;
+#endif
+            default:
+                generate_exception(ctx, EXCP_RI);
+                goto done;
+            }
+
+            gen_arith(ctx, mips32_op, rz, rx, ry);
+        done:
+            ;
+        }
+        break;
+    case M16_OPC_RR:
+        switch (op1) {
+        case RR_JR:
+            {
+                int nd = (ctx->opcode >> 7) & 0x1;
+                int link = (ctx->opcode >> 6) & 0x1;
+                int ra = (ctx->opcode >> 5) & 0x1;
+
+                if (link) {
+                    op = nd ? OPC_JALRC : OPC_JALRS;
+                } else {
+                    op = OPC_JR;
+                }
+
+                gen_compute_branch(ctx, op, 2, ra ? 31 : rx, 31, 0);
+            }
+            break;
+        case RR_SDBBP:
+            /* XXX: not clear which exception should be raised
+             *      when in debug mode...
+             */
+            check_insn(ctx, ISA_MIPS32);
+            if (!(ctx->hflags & MIPS_HFLAG_DM)) {
+                generate_exception(ctx, EXCP_DBp);
+            } else {
+                generate_exception(ctx, EXCP_DBp);
+            }
+            break;
+        case RR_SLT:
+            gen_slt(ctx, OPC_SLT, 24, rx, ry);
+            break;
+        case RR_SLTU:
+            gen_slt(ctx, OPC_SLTU, 24, rx, ry);
+            break;
+        case RR_BREAK:
+            generate_exception(ctx, EXCP_BREAK);
+            break;
+        case RR_SLLV:
+            gen_shift(ctx, OPC_SLLV, ry, rx, ry);
+            break;
+        case RR_SRLV:
+            gen_shift(ctx, OPC_SRLV, ry, rx, ry);
+            break;
+        case RR_SRAV:
+            gen_shift(ctx, OPC_SRAV, ry, rx, ry);
+            break;
+#if defined (TARGET_MIPS64)
+        case RR_DSRL:
+            check_mips_64(ctx);
+            gen_shift_imm(ctx, OPC_DSRL, ry, ry, sa);
+            break;
+#endif
+        case RR_CMP:
+            gen_logic(ctx, OPC_XOR, 24, rx, ry);
+            break;
+        case RR_NEG:
+            gen_arith(ctx, OPC_SUBU, rx, 0, ry);
+            break;
+        case RR_AND:
+            gen_logic(ctx, OPC_AND, rx, rx, ry);
+            break;
+        case RR_OR:
+            gen_logic(ctx, OPC_OR, rx, rx, ry);
+            break;
+        case RR_XOR:
+            gen_logic(ctx, OPC_XOR, rx, rx, ry);
+            break;
+        case RR_NOT:
+            gen_logic(ctx, OPC_NOR, rx, ry, 0);
+            break;
+        case RR_MFHI:
+            gen_HILO(ctx, OPC_MFHI, 0, rx);
+            break;
+        case RR_CNVT:
+            switch (cnvt_op) {
+            case RR_RY_CNVT_ZEB:
+                tcg_gen_ext8u_tl(cpu_gpr[rx], cpu_gpr[rx]);
+                break;
+            case RR_RY_CNVT_ZEH:
+                tcg_gen_ext16u_tl(cpu_gpr[rx], cpu_gpr[rx]);
+                break;
+            case RR_RY_CNVT_SEB:
+                tcg_gen_ext8s_tl(cpu_gpr[rx], cpu_gpr[rx]);
+                break;
+            case RR_RY_CNVT_SEH:
+                tcg_gen_ext16s_tl(cpu_gpr[rx], cpu_gpr[rx]);
+                break;
+#if defined (TARGET_MIPS64)
+            case RR_RY_CNVT_ZEW:
+                check_mips_64(ctx);
+                tcg_gen_ext32u_tl(cpu_gpr[rx], cpu_gpr[rx]);
+                break;
+            case RR_RY_CNVT_SEW:
+                check_mips_64(ctx);
+                tcg_gen_ext32s_tl(cpu_gpr[rx], cpu_gpr[rx]);
+                break;
+#endif
+            default:
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case RR_MFLO:
+            gen_HILO(ctx, OPC_MFLO, 0, rx);
+            break;
+#if defined (TARGET_MIPS64)
+        case RR_DSRA:
+            check_mips_64(ctx);
+            gen_shift_imm(ctx, OPC_DSRA, ry, ry, sa);
+            break;
+        case RR_DSLLV:
+            check_mips_64(ctx);
+            gen_shift(ctx, OPC_DSLLV, ry, rx, ry);
+            break;
+        case RR_DSRLV:
+            check_mips_64(ctx);
+            gen_shift(ctx, OPC_DSRLV, ry, rx, ry);
+            break;
+        case RR_DSRAV:
+            check_mips_64(ctx);
+            gen_shift(ctx, OPC_DSRAV, ry, rx, ry);
+            break;
+#endif
+        case RR_MULT:
+            gen_muldiv(ctx, OPC_MULT, 0, rx, ry);
+            break;
+        case RR_MULTU:
+            gen_muldiv(ctx, OPC_MULTU, 0, rx, ry);
+            break;
+        case RR_DIV:
+            gen_muldiv(ctx, OPC_DIV, 0, rx, ry);
+            break;
+        case RR_DIVU:
+            gen_muldiv(ctx, OPC_DIVU, 0, rx, ry);
+            break;
+#if defined (TARGET_MIPS64)
+        case RR_DMULT:
+            check_mips_64(ctx);
+            gen_muldiv(ctx, OPC_DMULT, 0, rx, ry);
+            break;
+        case RR_DMULTU:
+            check_mips_64(ctx);
+            gen_muldiv(ctx, OPC_DMULTU, 0, rx, ry);
+            break;
+        case RR_DDIV:
+            check_mips_64(ctx);
+            gen_muldiv(ctx, OPC_DDIV, 0, rx, ry);
+            break;
+        case RR_DDIVU:
+            check_mips_64(ctx);
+            gen_muldiv(ctx, OPC_DDIVU, 0, rx, ry);
+            break;
+#endif
+        default:
+            generate_exception(ctx, EXCP_RI);
+            break;
+        }
+        break;
+    case M16_OPC_EXTEND:
+        decode_extended_mips16_opc(env, ctx);
+        n_bytes = 4;
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_I64:
+        funct = (ctx->opcode >> 8) & 0x7;
+        decode_i64_mips16(ctx, ry, funct, offset, 0);
+        break;
+#endif
+    default:
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+
+    return n_bytes;
+}
+
+/* microMIPS extension to MIPS32/MIPS64 */
+
+/*
+ * microMIPS32/microMIPS64 major opcodes
+ *
+ * 1. MIPS Architecture for Programmers Volume II-B:
+ *      The microMIPS32 Instruction Set (Revision 3.05)
+ *
+ *    Table 6.2 microMIPS32 Encoding of Major Opcode Field
+ *
+ * 2. MIPS Architecture For Programmers Volume II-A:
+ *      The MIPS64 Instruction Set (Revision 3.51)
+ */
+
+enum {
+    POOL32A = 0x00,
+    POOL16A = 0x01,
+    LBU16 = 0x02,
+    MOVE16 = 0x03,
+    ADDI32 = 0x04,
+    LBU32 = 0x05,
+    SB32 = 0x06,
+    LB32 = 0x07,
+
+    POOL32B = 0x08,
+    POOL16B = 0x09,
+    LHU16 = 0x0a,
+    ANDI16 = 0x0b,
+    ADDIU32 = 0x0c,
+    LHU32 = 0x0d,
+    SH32 = 0x0e,
+    LH32 = 0x0f,
+
+    POOL32I = 0x10,
+    POOL16C = 0x11,
+    LWSP16 = 0x12,
+    POOL16D = 0x13,
+    ORI32 = 0x14,
+    POOL32F = 0x15,
+    POOL32S = 0x16,  /* MIPS64 */
+    DADDIU32 = 0x17, /* MIPS64 */
+
+    /* 0x1f is reserved */
+    POOL32C = 0x18,
+    LWGP16 = 0x19,
+    LW16 = 0x1a,
+    POOL16E = 0x1b,
+    XORI32 = 0x1c,
+    JALS32 = 0x1d,
+    ADDIUPC = 0x1e,
+
+    /* 0x20 is reserved */
+    RES_20 = 0x20,
+    POOL16F = 0x21,
+    SB16 = 0x22,
+    BEQZ16 = 0x23,
+    SLTI32 = 0x24,
+    BEQ32 = 0x25,
+    SWC132 = 0x26,
+    LWC132 = 0x27,
+
+    /* 0x28 and 0x29 are reserved */
+    RES_28 = 0x28,
+    RES_29 = 0x29,
+    SH16 = 0x2a,
+    BNEZ16 = 0x2b,
+    SLTIU32 = 0x2c,
+    BNE32 = 0x2d,
+    SDC132 = 0x2e,
+    LDC132 = 0x2f,
+
+    /* 0x30 and 0x31 are reserved */
+    RES_30 = 0x30,
+    RES_31 = 0x31,
+    SWSP16 = 0x32,
+    B16 = 0x33,
+    ANDI32 = 0x34,
+    J32 = 0x35,
+    SD32 = 0x36, /* MIPS64 */
+    LD32 = 0x37, /* MIPS64 */
+
+    /* 0x38 and 0x39 are reserved */
+    RES_38 = 0x38,
+    RES_39 = 0x39,
+    SW16 = 0x3a,
+    LI16 = 0x3b,
+    JALX32 = 0x3c,
+    JAL32 = 0x3d,
+    SW32 = 0x3e,
+    LW32 = 0x3f
+};
+
+/* POOL32A encoding of minor opcode field */
+
+enum {
+    /* These opcodes are distinguished only by bits 9..6; those bits are
+     * what are recorded below. */
+    SLL32 = 0x0,
+    SRL32 = 0x1,
+    SRA = 0x2,
+    ROTR = 0x3,
+
+    SLLV = 0x0,
+    SRLV = 0x1,
+    SRAV = 0x2,
+    ROTRV = 0x3,
+    ADD = 0x4,
+    ADDU32 = 0x5,
+    SUB = 0x6,
+    SUBU32 = 0x7,
+    MUL = 0x8,
+    AND = 0x9,
+    OR32 = 0xa,
+    NOR = 0xb,
+    XOR32 = 0xc,
+    SLT = 0xd,
+    SLTU = 0xe,
+
+    MOVN = 0x0,
+    MOVZ = 0x1,
+    LWXS = 0x4,
+
+    /* The following can be distinguished by their lower 6 bits. */
+    INS = 0x0c,
+    EXT = 0x2c,
+    POOL32AXF = 0x3c
+};
+
+/* POOL32AXF encoding of minor opcode field extension */
+
+/*
+ * 1. MIPS Architecture for Programmers Volume II-B:
+ *      The microMIPS32 Instruction Set (Revision 3.05)
+ *
+ *    Table 6.5 POOL32Axf Encoding of Minor Opcode Extension Field
+ *
+ * 2. MIPS Architecture for Programmers VolumeIV-e:
+ *      The MIPS DSP Application-Specific Extension
+ *        to the microMIPS32 Architecture (Revision 2.34)
+ *
+ *    Table 5.5 POOL32Axf Encoding of Minor Opcode Extension Field
+ */
+
+enum {
+    /* bits 11..6 */
+    TEQ = 0x00,
+    TGE = 0x08,
+    TGEU = 0x10,
+    TLT = 0x20,
+    TLTU = 0x28,
+    TNE = 0x30,
+
+    MFC0 = 0x03,
+    MTC0 = 0x0b,
+
+    /* begin of microMIPS32 DSP */
+
+    /* bits 13..12 for 0x01 */
+    MFHI_ACC = 0x0,
+    MFLO_ACC = 0x1,
+    MTHI_ACC = 0x2,
+    MTLO_ACC = 0x3,
+
+    /* bits 13..12 for 0x2a */
+    MADD_ACC = 0x0,
+    MADDU_ACC = 0x1,
+    MSUB_ACC = 0x2,
+    MSUBU_ACC = 0x3,
+
+    /* bits 13..12 for 0x32 */
+    MULT_ACC = 0x0,
+    MULTU_ACC = 0x1,
+
+    /* end of microMIPS32 DSP */
+
+    /* bits 15..12 for 0x2c */
+    SEB = 0x2,
+    SEH = 0x3,
+    CLO = 0x4,
+    CLZ = 0x5,
+    RDHWR = 0x6,
+    WSBH = 0x7,
+    MULT = 0x8,
+    MULTU = 0x9,
+    DIV = 0xa,
+    DIVU = 0xb,
+    MADD = 0xc,
+    MADDU = 0xd,
+    MSUB = 0xe,
+    MSUBU = 0xf,
+
+    /* bits 15..12 for 0x34 */
+    MFC2 = 0x4,
+    MTC2 = 0x5,
+    MFHC2 = 0x8,
+    MTHC2 = 0x9,
+    CFC2 = 0xc,
+    CTC2 = 0xd,
+
+    /* bits 15..12 for 0x3c */
+    JALR = 0x0,
+    JR = 0x0,                   /* alias */
+    JALR_HB = 0x1,
+    JALRS = 0x4,
+    JALRS_HB = 0x5,
+
+    /* bits 15..12 for 0x05 */
+    RDPGPR = 0xe,
+    WRPGPR = 0xf,
+
+    /* bits 15..12 for 0x0d */
+    TLBP = 0x0,
+    TLBR = 0x1,
+    TLBWI = 0x2,
+    TLBWR = 0x3,
+    WAIT = 0x9,
+    IRET = 0xd,
+    DERET = 0xe,
+    ERET = 0xf,
+
+    /* bits 15..12 for 0x15 */
+    DMT = 0x0,
+    DVPE = 0x1,
+    EMT = 0x2,
+    EVPE = 0x3,
+
+    /* bits 15..12 for 0x1d */
+    DI = 0x4,
+    EI = 0x5,
+
+    /* bits 15..12 for 0x2d */
+    SYNC = 0x6,
+    SYSCALL = 0x8,
+    SDBBP = 0xd,
+
+    /* bits 15..12 for 0x35 */
+    MFHI32 = 0x0,
+    MFLO32 = 0x1,
+    MTHI32 = 0x2,
+    MTLO32 = 0x3,
+};
+
+/* POOL32B encoding of minor opcode field (bits 15..12) */
+
+enum {
+    LWC2 = 0x0,
+    LWP = 0x1,
+    LDP = 0x4,
+    LWM32 = 0x5,
+    CACHE = 0x6,
+    LDM = 0x7,
+    SWC2 = 0x8,
+    SWP = 0x9,
+    SDP = 0xc,
+    SWM32 = 0xd,
+    SDM = 0xf
+};
+
+/* POOL32C encoding of minor opcode field (bits 15..12) */
+
+enum {
+    LWL = 0x0,
+    SWL = 0x8,
+    LWR = 0x1,
+    SWR = 0x9,
+    PREF = 0x2,
+    /* 0xa is reserved */
+    LL = 0x3,
+    SC = 0xb,
+    LDL = 0x4,
+    SDL = 0xc,
+    LDR = 0x5,
+    SDR = 0xd,
+    /* 0x6 is reserved */
+    LWU = 0xe,
+    LLD = 0x7,
+    SCD = 0xf
+};
+
+/* POOL32F encoding of minor opcode field (bits 5..0) */
+
+enum {
+    /* These are the bit 7..6 values */
+    ADD_FMT = 0x0,
+    MOVN_FMT = 0x0,
+
+    SUB_FMT = 0x1,
+    MOVZ_FMT = 0x1,
+
+    MUL_FMT = 0x2,
+
+    DIV_FMT = 0x3,
+
+    /* These are the bit 8..6 values */
+    RSQRT2_FMT = 0x0,
+    MOVF_FMT = 0x0,
+
+    LWXC1 = 0x1,
+    MOVT_FMT = 0x1,
+
+    PLL_PS = 0x2,
+    SWXC1 = 0x2,
+
+    PLU_PS = 0x3,
+    LDXC1 = 0x3,
+
+    PUL_PS = 0x4,
+    SDXC1 = 0x4,
+    RECIP2_FMT = 0x4,
+
+    PUU_PS = 0x5,
+    LUXC1 = 0x5,
+
+    CVT_PS_S = 0x6,
+    SUXC1 = 0x6,
+    ADDR_PS = 0x6,
+    PREFX = 0x6,
+
+    MULR_PS = 0x7,
+
+    MADD_S = 0x01,
+    MADD_D = 0x09,
+    MADD_PS = 0x11,
+    ALNV_PS = 0x19,
+    MSUB_S = 0x21,
+    MSUB_D = 0x29,
+    MSUB_PS = 0x31,
+
+    NMADD_S = 0x02,
+    NMADD_D = 0x0a,
+    NMADD_PS = 0x12,
+    NMSUB_S = 0x22,
+    NMSUB_D = 0x2a,
+    NMSUB_PS = 0x32,
+
+    POOL32FXF = 0x3b,
+
+    CABS_COND_FMT = 0x1c,              /* MIPS3D */
+    C_COND_FMT = 0x3c
+};
+
+/* POOL32Fxf encoding of minor opcode extension field */
+
+enum {
+    CVT_L = 0x04,
+    RSQRT_FMT = 0x08,
+    FLOOR_L = 0x0c,
+    CVT_PW_PS = 0x1c,
+    CVT_W = 0x24,
+    SQRT_FMT = 0x28,
+    FLOOR_W = 0x2c,
+    CVT_PS_PW = 0x3c,
+    CFC1 = 0x40,
+    RECIP_FMT = 0x48,
+    CEIL_L = 0x4c,
+    CTC1 = 0x60,
+    CEIL_W = 0x6c,
+    MFC1 = 0x80,
+    CVT_S_PL = 0x84,
+    TRUNC_L = 0x8c,
+    MTC1 = 0xa0,
+    CVT_S_PU = 0xa4,
+    TRUNC_W = 0xac,
+    MFHC1 = 0xc0,
+    ROUND_L = 0xcc,
+    MTHC1 = 0xe0,
+    ROUND_W = 0xec,
+
+    MOV_FMT = 0x01,
+    MOVF = 0x05,
+    ABS_FMT = 0x0d,
+    RSQRT1_FMT = 0x1d,
+    MOVT = 0x25,
+    NEG_FMT = 0x2d,
+    CVT_D = 0x4d,
+    RECIP1_FMT = 0x5d,
+    CVT_S = 0x6d
+};
+
+/* POOL32I encoding of minor opcode field (bits 25..21) */
+
+enum {
+    BLTZ = 0x00,
+    BLTZAL = 0x01,
+    BGEZ = 0x02,
+    BGEZAL = 0x03,
+    BLEZ = 0x04,
+    BNEZC = 0x05,
+    BGTZ = 0x06,
+    BEQZC = 0x07,
+    TLTI = 0x08,
+    TGEI = 0x09,
+    TLTIU = 0x0a,
+    TGEIU = 0x0b,
+    TNEI = 0x0c,
+    LUI = 0x0d,
+    TEQI = 0x0e,
+    SYNCI = 0x10,
+    BLTZALS = 0x11,
+    BGEZALS = 0x13,
+    BC2F = 0x14,
+    BC2T = 0x15,
+    BPOSGE64 = 0x1a,
+    BPOSGE32 = 0x1b,
+    /* These overlap and are distinguished by bit16 of the instruction */
+    BC1F = 0x1c,
+    BC1T = 0x1d,
+    BC1ANY2F = 0x1c,
+    BC1ANY2T = 0x1d,
+    BC1ANY4F = 0x1e,
+    BC1ANY4T = 0x1f
+};
+
+/* POOL16A encoding of minor opcode field */
+
+enum {
+    ADDU16 = 0x0,
+    SUBU16 = 0x1
+};
+
+/* POOL16B encoding of minor opcode field */
+
+enum {
+    SLL16 = 0x0,
+    SRL16 = 0x1
+};
+
+/* POOL16C encoding of minor opcode field */
+
+enum {
+    NOT16 = 0x00,
+    XOR16 = 0x04,
+    AND16 = 0x08,
+    OR16 = 0x0c,
+    LWM16 = 0x10,
+    SWM16 = 0x14,
+    JR16 = 0x18,
+    JRC16 = 0x1a,
+    JALR16 = 0x1c,
+    JALR16S = 0x1e,
+    MFHI16 = 0x20,
+    MFLO16 = 0x24,
+    BREAK16 = 0x28,
+    SDBBP16 = 0x2c,
+    JRADDIUSP = 0x30
+};
+
+/* POOL16D encoding of minor opcode field */
+
+enum {
+    ADDIUS5 = 0x0,
+    ADDIUSP = 0x1
+};
+
+/* POOL16E encoding of minor opcode field */
+
+enum {
+    ADDIUR2 = 0x0,
+    ADDIUR1SP = 0x1
+};
+
+static int mmreg (int r)
+{
+    static const int map[] = { 16, 17, 2, 3, 4, 5, 6, 7 };
+
+    return map[r];
+}
+
+/* Used for 16-bit store instructions.  */
+static int mmreg2 (int r)
+{
+    static const int map[] = { 0, 17, 2, 3, 4, 5, 6, 7 };
+
+    return map[r];
+}
+
+#define uMIPS_RD(op) ((op >> 7) & 0x7)
+#define uMIPS_RS(op) ((op >> 4) & 0x7)
+#define uMIPS_RS2(op) uMIPS_RS(op)
+#define uMIPS_RS1(op) ((op >> 1) & 0x7)
+#define uMIPS_RD5(op) ((op >> 5) & 0x1f)
+#define uMIPS_RS5(op) (op & 0x1f)
+
+/* Signed immediate */
+#define SIMM(op, start, width)                                          \
+    ((int32_t)(((op >> start) & ((~0U) >> (32-width)))                 \
+               << (32-width))                                           \
+     >> (32-width))
+/* Zero-extended immediate */
+#define ZIMM(op, start, width) ((op >> start) & ((~0U) >> (32-width)))
+
+static void gen_addiur1sp(DisasContext *ctx)
+{
+    int rd = mmreg(uMIPS_RD(ctx->opcode));
+
+    gen_arith_imm(ctx, OPC_ADDIU, rd, 29, ((ctx->opcode >> 1) & 0x3f) << 2);
+}
+
+static void gen_addiur2(DisasContext *ctx)
+{
+    static const int decoded_imm[] = { 1, 4, 8, 12, 16, 20, 24, -1 };
+    int rd = mmreg(uMIPS_RD(ctx->opcode));
+    int rs = mmreg(uMIPS_RS(ctx->opcode));
+
+    gen_arith_imm(ctx, OPC_ADDIU, rd, rs, decoded_imm[ZIMM(ctx->opcode, 1, 3)]);
+}
+
+static void gen_addiusp(DisasContext *ctx)
+{
+    int encoded = ZIMM(ctx->opcode, 1, 9);
+    int decoded;
+
+    if (encoded <= 1) {
+        decoded = 256 + encoded;
+    } else if (encoded <= 255) {
+        decoded = encoded;
+    } else if (encoded <= 509) {
+        decoded = encoded - 512;
+    } else {
+        decoded = encoded - 768;
+    }
+
+    gen_arith_imm(ctx, OPC_ADDIU, 29, 29, decoded << 2);
+}
+
+static void gen_addius5(DisasContext *ctx)
+{
+    int imm = SIMM(ctx->opcode, 1, 4);
+    int rd = (ctx->opcode >> 5) & 0x1f;
+
+    gen_arith_imm(ctx, OPC_ADDIU, rd, rd, imm);
+}
+
+static void gen_andi16(DisasContext *ctx)
+{
+    static const int decoded_imm[] = { 128, 1, 2, 3, 4, 7, 8, 15, 16,
+                                 31, 32, 63, 64, 255, 32768, 65535 };
+    int rd = mmreg(uMIPS_RD(ctx->opcode));
+    int rs = mmreg(uMIPS_RS(ctx->opcode));
+    int encoded = ZIMM(ctx->opcode, 0, 4);
+
+    gen_logic_imm(ctx, OPC_ANDI, rd, rs, decoded_imm[encoded]);
+}
+
+static void gen_ldst_multiple (DisasContext *ctx, uint32_t opc, int reglist,
+                               int base, int16_t offset)
+{
+    const char *opn = "ldst_multiple";
+    TCGv t0, t1;
+    TCGv_i32 t2;
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+        generate_exception(ctx, EXCP_RI);
+        return;
+    }
+
+    t0 = tcg_temp_new();
+
+    gen_base_offset_addr(ctx, t0, base, offset);
+
+    t1 = tcg_const_tl(reglist);
+    t2 = tcg_const_i32(ctx->mem_idx);
+
+    save_cpu_state(ctx, 1);
+    switch (opc) {
+    case LWM32:
+        gen_helper_lwm(cpu_env, t0, t1, t2);
+        opn = "lwm";
+        break;
+    case SWM32:
+        gen_helper_swm(cpu_env, t0, t1, t2);
+        opn = "swm";
+        break;
+#ifdef TARGET_MIPS64
+    case LDM:
+        gen_helper_ldm(cpu_env, t0, t1, t2);
+        opn = "ldm";
+        break;
+    case SDM:
+        gen_helper_sdm(cpu_env, t0, t1, t2);
+        opn = "sdm";
+        break;
+#endif
+    }
+    (void)opn;
+    MIPS_DEBUG("%s, %x, %d(%s)", opn, reglist, offset, regnames[base]);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free_i32(t2);
+}
+
+
+static void gen_pool16c_insn(DisasContext *ctx)
+{
+    int rd = mmreg((ctx->opcode >> 3) & 0x7);
+    int rs = mmreg(ctx->opcode & 0x7);
+    int opc;
+
+    switch (((ctx->opcode) >> 4) & 0x3f) {
+    case NOT16 + 0:
+    case NOT16 + 1:
+    case NOT16 + 2:
+    case NOT16 + 3:
+        gen_logic(ctx, OPC_NOR, rd, rs, 0);
+        break;
+    case XOR16 + 0:
+    case XOR16 + 1:
+    case XOR16 + 2:
+    case XOR16 + 3:
+        gen_logic(ctx, OPC_XOR, rd, rd, rs);
+        break;
+    case AND16 + 0:
+    case AND16 + 1:
+    case AND16 + 2:
+    case AND16 + 3:
+        gen_logic(ctx, OPC_AND, rd, rd, rs);
+        break;
+    case OR16 + 0:
+    case OR16 + 1:
+    case OR16 + 2:
+    case OR16 + 3:
+        gen_logic(ctx, OPC_OR, rd, rd, rs);
+        break;
+    case LWM16 + 0:
+    case LWM16 + 1:
+    case LWM16 + 2:
+    case LWM16 + 3:
+        {
+            static const int lwm_convert[] = { 0x11, 0x12, 0x13, 0x14 };
+            int offset = ZIMM(ctx->opcode, 0, 4);
+
+            gen_ldst_multiple(ctx, LWM32, lwm_convert[(ctx->opcode >> 4) & 0x3],
+                              29, offset << 2);
+        }
+        break;
+    case SWM16 + 0:
+    case SWM16 + 1:
+    case SWM16 + 2:
+    case SWM16 + 3:
+        {
+            static const int swm_convert[] = { 0x11, 0x12, 0x13, 0x14 };
+            int offset = ZIMM(ctx->opcode, 0, 4);
+
+            gen_ldst_multiple(ctx, SWM32, swm_convert[(ctx->opcode >> 4) & 0x3],
+                              29, offset << 2);
+        }
+        break;
+    case JR16 + 0:
+    case JR16 + 1:
+        {
+            int reg = ctx->opcode & 0x1f;
+
+            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0);
+        }
+        break;
+    case JRC16 + 0:
+    case JRC16 + 1:
+        {
+            int reg = ctx->opcode & 0x1f;
+
+            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0);
+            /* Let normal delay slot handling in our caller take us
+               to the branch target.  */
+        }
+        break;
+    case JALR16 + 0:
+    case JALR16 + 1:
+        opc = OPC_JALR;
+        goto do_jalr;
+    case JALR16S + 0:
+    case JALR16S + 1:
+        opc = OPC_JALRS;
+    do_jalr:
+        {
+            int reg = ctx->opcode & 0x1f;
+
+            gen_compute_branch(ctx, opc, 2, reg, 31, 0);
+        }
+        break;
+    case MFHI16 + 0:
+    case MFHI16 + 1:
+        gen_HILO(ctx, OPC_MFHI, 0, uMIPS_RS5(ctx->opcode));
+        break;
+    case MFLO16 + 0:
+    case MFLO16 + 1:
+        gen_HILO(ctx, OPC_MFLO, 0, uMIPS_RS5(ctx->opcode));
+        break;
+    case BREAK16:
+        generate_exception(ctx, EXCP_BREAK);
+        break;
+    case SDBBP16:
+        /* XXX: not clear which exception should be raised
+         *      when in debug mode...
+         */
+        check_insn(ctx, ISA_MIPS32);
+        if (!(ctx->hflags & MIPS_HFLAG_DM)) {
+            generate_exception(ctx, EXCP_DBp);
+        } else {
+            generate_exception(ctx, EXCP_DBp);
+        }
+        break;
+    case JRADDIUSP + 0:
+    case JRADDIUSP + 1:
+        {
+            int imm = ZIMM(ctx->opcode, 0, 5);
+
+            gen_compute_branch(ctx, OPC_JR, 2, 31, 0, 0);
+            gen_arith_imm(ctx, OPC_ADDIU, 29, 29, imm << 2);
+            /* Let normal delay slot handling in our caller take us
+               to the branch target.  */
+        }
+        break;
+    default:
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+}
+
+static void gen_ldxs (DisasContext *ctx, int base, int index, int rd)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    gen_load_gpr(t0, base);
+
+    if (index != 0) {
+        gen_load_gpr(t1, index);
+        tcg_gen_shli_tl(t1, t1, 2);
+        gen_op_addr_add(ctx, t0, t1, t0);
+    }
+
+    tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
+    gen_store_gpr(t1, rd);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_ldst_pair (DisasContext *ctx, uint32_t opc, int rd,
+                           int base, int16_t offset)
+{
+    const char *opn = "ldst_pair";
+    TCGv t0, t1;
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK || rd == 31) {
+        generate_exception(ctx, EXCP_RI);
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    gen_base_offset_addr(ctx, t0, base, offset);
+
+    switch (opc) {
+    case LWP:
+        if (rd == base) {
+            generate_exception(ctx, EXCP_RI);
+            return;
+        }
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
+        gen_store_gpr(t1, rd);
+        tcg_gen_movi_tl(t1, 4);
+        gen_op_addr_add(ctx, t0, t0, t1);
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
+        gen_store_gpr(t1, rd+1);
+        opn = "lwp";
+        break;
+    case SWP:
+        gen_load_gpr(t1, rd);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+        tcg_gen_movi_tl(t1, 4);
+        gen_op_addr_add(ctx, t0, t0, t1);
+        gen_load_gpr(t1, rd+1);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+        opn = "swp";
+        break;
+#ifdef TARGET_MIPS64
+    case LDP:
+        if (rd == base) {
+            generate_exception(ctx, EXCP_RI);
+            return;
+        }
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEQ);
+        gen_store_gpr(t1, rd);
+        tcg_gen_movi_tl(t1, 8);
+        gen_op_addr_add(ctx, t0, t0, t1);
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEQ);
+        gen_store_gpr(t1, rd+1);
+        opn = "ldp";
+        break;
+    case SDP:
+        gen_load_gpr(t1, rd);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ);
+        tcg_gen_movi_tl(t1, 8);
+        gen_op_addr_add(ctx, t0, t0, t1);
+        gen_load_gpr(t1, rd+1);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ);
+        opn = "sdp";
+        break;
+#endif
+    }
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s, %s, %d(%s)", opn, regnames[rd], offset, regnames[base]);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_pool32axf (CPUMIPSState *env, DisasContext *ctx, int rt, int rs)
+{
+    int extension = (ctx->opcode >> 6) & 0x3f;
+    int minor = (ctx->opcode >> 12) & 0xf;
+    uint32_t mips32_op;
+
+    switch (extension) {
+    case TEQ:
+        mips32_op = OPC_TEQ;
+        goto do_trap;
+    case TGE:
+        mips32_op = OPC_TGE;
+        goto do_trap;
+    case TGEU:
+        mips32_op = OPC_TGEU;
+        goto do_trap;
+    case TLT:
+        mips32_op = OPC_TLT;
+        goto do_trap;
+    case TLTU:
+        mips32_op = OPC_TLTU;
+        goto do_trap;
+    case TNE:
+        mips32_op = OPC_TNE;
+    do_trap:
+        gen_trap(ctx, mips32_op, rs, rt, -1);
+        break;
+#ifndef CONFIG_USER_ONLY
+    case MFC0:
+    case MFC0 + 32:
+        check_cp0_enabled(ctx);
+        if (rt == 0) {
+            /* Treat as NOP. */
+            break;
+        }
+        gen_mfc0(ctx, cpu_gpr[rt], rs, (ctx->opcode >> 11) & 0x7);
+        break;
+    case MTC0:
+    case MTC0 + 32:
+        check_cp0_enabled(ctx);
+        {
+            TCGv t0 = tcg_temp_new();
+
+            gen_load_gpr(t0, rt);
+            gen_mtc0(ctx, t0, rs, (ctx->opcode >> 11) & 0x7);
+            tcg_temp_free(t0);
+        }
+        break;
+#endif
+    case 0x2a:
+        switch (minor & 3) {
+        case MADD_ACC:
+            gen_muldiv(ctx, OPC_MADD, (ctx->opcode >> 14) & 3, rs, rt);
+            break;
+        case MADDU_ACC:
+            gen_muldiv(ctx, OPC_MADDU, (ctx->opcode >> 14) & 3, rs, rt);
+            break;
+        case MSUB_ACC:
+            gen_muldiv(ctx, OPC_MSUB, (ctx->opcode >> 14) & 3, rs, rt);
+            break;
+        case MSUBU_ACC:
+            gen_muldiv(ctx, OPC_MSUBU, (ctx->opcode >> 14) & 3, rs, rt);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x32:
+        switch (minor & 3) {
+        case MULT_ACC:
+            gen_muldiv(ctx, OPC_MULT, (ctx->opcode >> 14) & 3, rs, rt);
+            break;
+        case MULTU_ACC:
+            gen_muldiv(ctx, OPC_MULTU, (ctx->opcode >> 14) & 3, rs, rt);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x2c:
+        switch (minor) {
+        case SEB:
+            gen_bshfl(ctx, OPC_SEB, rs, rt);
+            break;
+        case SEH:
+            gen_bshfl(ctx, OPC_SEH, rs, rt);
+            break;
+        case CLO:
+            mips32_op = OPC_CLO;
+            goto do_cl;
+        case CLZ:
+            mips32_op = OPC_CLZ;
+        do_cl:
+            check_insn(ctx, ISA_MIPS32);
+            gen_cl(ctx, mips32_op, rt, rs);
+            break;
+        case RDHWR:
+            gen_rdhwr(ctx, rt, rs);
+            break;
+        case WSBH:
+            gen_bshfl(ctx, OPC_WSBH, rs, rt);
+            break;
+        case MULT:
+            mips32_op = OPC_MULT;
+            goto do_mul;
+        case MULTU:
+            mips32_op = OPC_MULTU;
+            goto do_mul;
+        case DIV:
+            mips32_op = OPC_DIV;
+            goto do_div;
+        case DIVU:
+            mips32_op = OPC_DIVU;
+            goto do_div;
+        do_div:
+            check_insn(ctx, ISA_MIPS32);
+            gen_muldiv(ctx, mips32_op, 0, rs, rt);
+            break;
+        case MADD:
+            mips32_op = OPC_MADD;
+            goto do_mul;
+        case MADDU:
+            mips32_op = OPC_MADDU;
+            goto do_mul;
+        case MSUB:
+            mips32_op = OPC_MSUB;
+            goto do_mul;
+        case MSUBU:
+            mips32_op = OPC_MSUBU;
+        do_mul:
+            check_insn(ctx, ISA_MIPS32);
+            gen_muldiv(ctx, mips32_op, 0, rs, rt);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x34:
+        switch (minor) {
+        case MFC2:
+        case MTC2:
+        case MFHC2:
+        case MTHC2:
+        case CFC2:
+        case CTC2:
+            generate_exception_err(ctx, EXCP_CpU, 2);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x3c:
+        switch (minor) {
+        case JALR:
+        case JALR_HB:
+            gen_compute_branch (ctx, OPC_JALR, 4, rs, rt, 0);
+            break;
+        case JALRS:
+        case JALRS_HB:
+            gen_compute_branch (ctx, OPC_JALRS, 4, rs, rt, 0);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x05:
+        switch (minor) {
+        case RDPGPR:
+            check_cp0_enabled(ctx);
+            check_insn(ctx, ISA_MIPS32R2);
+            gen_load_srsgpr(rt, rs);
+            break;
+        case WRPGPR:
+            check_cp0_enabled(ctx);
+            check_insn(ctx, ISA_MIPS32R2);
+            gen_store_srsgpr(rt, rs);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+#ifndef CONFIG_USER_ONLY
+    case 0x0d:
+        switch (minor) {
+        case TLBP:
+            mips32_op = OPC_TLBP;
+            goto do_cp0;
+        case TLBR:
+            mips32_op = OPC_TLBR;
+            goto do_cp0;
+        case TLBWI:
+            mips32_op = OPC_TLBWI;
+            goto do_cp0;
+        case TLBWR:
+            mips32_op = OPC_TLBWR;
+            goto do_cp0;
+        case WAIT:
+            mips32_op = OPC_WAIT;
+            goto do_cp0;
+        case DERET:
+            mips32_op = OPC_DERET;
+            goto do_cp0;
+        case ERET:
+            mips32_op = OPC_ERET;
+        do_cp0:
+            gen_cp0(env, ctx, mips32_op, rt, rs);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x1d:
+        switch (minor) {
+        case DI:
+            check_cp0_enabled(ctx);
+            {
+                TCGv t0 = tcg_temp_new();
+
+                save_cpu_state(ctx, 1);
+                gen_helper_di(t0, cpu_env);
+                gen_store_gpr(t0, rs);
+                /* Stop translation as we may have switched the execution mode */
+                ctx->bstate = BS_STOP;
+                tcg_temp_free(t0);
+            }
+            break;
+        case EI:
+            check_cp0_enabled(ctx);
+            {
+                TCGv t0 = tcg_temp_new();
+
+                save_cpu_state(ctx, 1);
+                gen_helper_ei(t0, cpu_env);
+                gen_store_gpr(t0, rs);
+                /* Stop translation as we may have switched the execution mode */
+                ctx->bstate = BS_STOP;
+                tcg_temp_free(t0);
+            }
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+#endif
+    case 0x2d:
+        switch (minor) {
+        case SYNC:
+            /* NOP */
+            break;
+        case SYSCALL:
+            generate_exception(ctx, EXCP_SYSCALL);
+            ctx->bstate = BS_STOP;
+            break;
+        case SDBBP:
+            check_insn(ctx, ISA_MIPS32);
+            if (!(ctx->hflags & MIPS_HFLAG_DM)) {
+                generate_exception(ctx, EXCP_DBp);
+            } else {
+                generate_exception(ctx, EXCP_DBp);
+            }
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x01:
+        switch (minor & 3) {
+        case MFHI_ACC:
+            gen_HILO(ctx, OPC_MFHI, minor >> 2, rs);
+            break;
+        case MFLO_ACC:
+            gen_HILO(ctx, OPC_MFLO, minor >> 2, rs);
+            break;
+        case MTHI_ACC:
+            gen_HILO(ctx, OPC_MTHI, minor >> 2, rs);
+            break;
+        case MTLO_ACC:
+            gen_HILO(ctx, OPC_MTLO, minor >> 2, rs);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x35:
+        switch (minor) {
+        case MFHI32:
+            gen_HILO(ctx, OPC_MFHI, 0, rs);
+            break;
+        case MFLO32:
+            gen_HILO(ctx, OPC_MFLO, 0, rs);
+            break;
+        case MTHI32:
+            gen_HILO(ctx, OPC_MTHI, 0, rs);
+            break;
+        case MTLO32:
+            gen_HILO(ctx, OPC_MTLO, 0, rs);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    default:
+    pool32axf_invalid:
+        MIPS_INVAL("pool32axf");
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/* Values for microMIPS fmt field.  Variable-width, depending on which
+   formats the instruction supports.  */
+
+enum {
+    FMT_SD_S = 0,
+    FMT_SD_D = 1,
+
+    FMT_SDPS_S = 0,
+    FMT_SDPS_D = 1,
+    FMT_SDPS_PS = 2,
+
+    FMT_SWL_S = 0,
+    FMT_SWL_W = 1,
+    FMT_SWL_L = 2,
+
+    FMT_DWL_D = 0,
+    FMT_DWL_W = 1,
+    FMT_DWL_L = 2
+};
+
+static void gen_pool32fxf(DisasContext *ctx, int rt, int rs)
+{
+    int extension = (ctx->opcode >> 6) & 0x3ff;
+    uint32_t mips32_op;
+
+#define FLOAT_1BIT_FMT(opc, fmt) (fmt << 8) | opc
+#define FLOAT_2BIT_FMT(opc, fmt) (fmt << 7) | opc
+#define COND_FLOAT_MOV(opc, cond) (cond << 7) | opc
+
+    switch (extension) {
+    case FLOAT_1BIT_FMT(CFC1, 0):
+        mips32_op = OPC_CFC1;
+        goto do_cp1;
+    case FLOAT_1BIT_FMT(CTC1, 0):
+        mips32_op = OPC_CTC1;
+        goto do_cp1;
+    case FLOAT_1BIT_FMT(MFC1, 0):
+        mips32_op = OPC_MFC1;
+        goto do_cp1;
+    case FLOAT_1BIT_FMT(MTC1, 0):
+        mips32_op = OPC_MTC1;
+        goto do_cp1;
+    case FLOAT_1BIT_FMT(MFHC1, 0):
+        mips32_op = OPC_MFHC1;
+        goto do_cp1;
+    case FLOAT_1BIT_FMT(MTHC1, 0):
+        mips32_op = OPC_MTHC1;
+    do_cp1:
+        gen_cp1(ctx, mips32_op, rt, rs);
+        break;
+
+        /* Reciprocal square root */
+    case FLOAT_1BIT_FMT(RSQRT_FMT, FMT_SD_S):
+        mips32_op = OPC_RSQRT_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(RSQRT_FMT, FMT_SD_D):
+        mips32_op = OPC_RSQRT_D;
+        goto do_unaryfp;
+
+        /* Square root */
+    case FLOAT_1BIT_FMT(SQRT_FMT, FMT_SD_S):
+        mips32_op = OPC_SQRT_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(SQRT_FMT, FMT_SD_D):
+        mips32_op = OPC_SQRT_D;
+        goto do_unaryfp;
+
+        /* Reciprocal */
+    case FLOAT_1BIT_FMT(RECIP_FMT, FMT_SD_S):
+        mips32_op = OPC_RECIP_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(RECIP_FMT, FMT_SD_D):
+        mips32_op = OPC_RECIP_D;
+        goto do_unaryfp;
+
+        /* Floor */
+    case FLOAT_1BIT_FMT(FLOOR_L, FMT_SD_S):
+        mips32_op = OPC_FLOOR_L_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(FLOOR_L, FMT_SD_D):
+        mips32_op = OPC_FLOOR_L_D;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(FLOOR_W, FMT_SD_S):
+        mips32_op = OPC_FLOOR_W_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(FLOOR_W, FMT_SD_D):
+        mips32_op = OPC_FLOOR_W_D;
+        goto do_unaryfp;
+
+        /* Ceiling */
+    case FLOAT_1BIT_FMT(CEIL_L, FMT_SD_S):
+        mips32_op = OPC_CEIL_L_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CEIL_L, FMT_SD_D):
+        mips32_op = OPC_CEIL_L_D;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CEIL_W, FMT_SD_S):
+        mips32_op = OPC_CEIL_W_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CEIL_W, FMT_SD_D):
+        mips32_op = OPC_CEIL_W_D;
+        goto do_unaryfp;
+
+        /* Truncation */
+    case FLOAT_1BIT_FMT(TRUNC_L, FMT_SD_S):
+        mips32_op = OPC_TRUNC_L_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(TRUNC_L, FMT_SD_D):
+        mips32_op = OPC_TRUNC_L_D;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(TRUNC_W, FMT_SD_S):
+        mips32_op = OPC_TRUNC_W_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(TRUNC_W, FMT_SD_D):
+        mips32_op = OPC_TRUNC_W_D;
+        goto do_unaryfp;
+
+        /* Round */
+    case FLOAT_1BIT_FMT(ROUND_L, FMT_SD_S):
+        mips32_op = OPC_ROUND_L_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(ROUND_L, FMT_SD_D):
+        mips32_op = OPC_ROUND_L_D;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(ROUND_W, FMT_SD_S):
+        mips32_op = OPC_ROUND_W_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(ROUND_W, FMT_SD_D):
+        mips32_op = OPC_ROUND_W_D;
+        goto do_unaryfp;
+
+        /* Integer to floating-point conversion */
+    case FLOAT_1BIT_FMT(CVT_L, FMT_SD_S):
+        mips32_op = OPC_CVT_L_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CVT_L, FMT_SD_D):
+        mips32_op = OPC_CVT_L_D;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CVT_W, FMT_SD_S):
+        mips32_op = OPC_CVT_W_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CVT_W, FMT_SD_D):
+        mips32_op = OPC_CVT_W_D;
+        goto do_unaryfp;
+
+        /* Paired-foo conversions */
+    case FLOAT_1BIT_FMT(CVT_S_PL, 0):
+        mips32_op = OPC_CVT_S_PL;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CVT_S_PU, 0):
+        mips32_op = OPC_CVT_S_PU;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CVT_PW_PS, 0):
+        mips32_op = OPC_CVT_PW_PS;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CVT_PS_PW, 0):
+        mips32_op = OPC_CVT_PS_PW;
+        goto do_unaryfp;
+
+        /* Floating-point moves */
+    case FLOAT_2BIT_FMT(MOV_FMT, FMT_SDPS_S):
+        mips32_op = OPC_MOV_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(MOV_FMT, FMT_SDPS_D):
+        mips32_op = OPC_MOV_D;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(MOV_FMT, FMT_SDPS_PS):
+        mips32_op = OPC_MOV_PS;
+        goto do_unaryfp;
+
+        /* Absolute value */
+    case FLOAT_2BIT_FMT(ABS_FMT, FMT_SDPS_S):
+        mips32_op = OPC_ABS_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(ABS_FMT, FMT_SDPS_D):
+        mips32_op = OPC_ABS_D;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(ABS_FMT, FMT_SDPS_PS):
+        mips32_op = OPC_ABS_PS;
+        goto do_unaryfp;
+
+        /* Negation */
+    case FLOAT_2BIT_FMT(NEG_FMT, FMT_SDPS_S):
+        mips32_op = OPC_NEG_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(NEG_FMT, FMT_SDPS_D):
+        mips32_op = OPC_NEG_D;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(NEG_FMT, FMT_SDPS_PS):
+        mips32_op = OPC_NEG_PS;
+        goto do_unaryfp;
+
+        /* Reciprocal square root step */
+    case FLOAT_2BIT_FMT(RSQRT1_FMT, FMT_SDPS_S):
+        mips32_op = OPC_RSQRT1_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(RSQRT1_FMT, FMT_SDPS_D):
+        mips32_op = OPC_RSQRT1_D;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(RSQRT1_FMT, FMT_SDPS_PS):
+        mips32_op = OPC_RSQRT1_PS;
+        goto do_unaryfp;
+
+        /* Reciprocal step */
+    case FLOAT_2BIT_FMT(RECIP1_FMT, FMT_SDPS_S):
+        mips32_op = OPC_RECIP1_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(RECIP1_FMT, FMT_SDPS_D):
+        mips32_op = OPC_RECIP1_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(RECIP1_FMT, FMT_SDPS_PS):
+        mips32_op = OPC_RECIP1_PS;
+        goto do_unaryfp;
+
+        /* Conversions from double */
+    case FLOAT_2BIT_FMT(CVT_D, FMT_SWL_S):
+        mips32_op = OPC_CVT_D_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(CVT_D, FMT_SWL_W):
+        mips32_op = OPC_CVT_D_W;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(CVT_D, FMT_SWL_L):
+        mips32_op = OPC_CVT_D_L;
+        goto do_unaryfp;
+
+        /* Conversions from single */
+    case FLOAT_2BIT_FMT(CVT_S, FMT_DWL_D):
+        mips32_op = OPC_CVT_S_D;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(CVT_S, FMT_DWL_W):
+        mips32_op = OPC_CVT_S_W;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(CVT_S, FMT_DWL_L):
+        mips32_op = OPC_CVT_S_L;
+    do_unaryfp:
+        gen_farith(ctx, mips32_op, -1, rs, rt, 0);
+        break;
+
+        /* Conditional moves on floating-point codes */
+    case COND_FLOAT_MOV(MOVT, 0):
+    case COND_FLOAT_MOV(MOVT, 1):
+    case COND_FLOAT_MOV(MOVT, 2):
+    case COND_FLOAT_MOV(MOVT, 3):
+    case COND_FLOAT_MOV(MOVT, 4):
+    case COND_FLOAT_MOV(MOVT, 5):
+    case COND_FLOAT_MOV(MOVT, 6):
+    case COND_FLOAT_MOV(MOVT, 7):
+        gen_movci(ctx, rt, rs, (ctx->opcode >> 13) & 0x7, 1);
+        break;
+    case COND_FLOAT_MOV(MOVF, 0):
+    case COND_FLOAT_MOV(MOVF, 1):
+    case COND_FLOAT_MOV(MOVF, 2):
+    case COND_FLOAT_MOV(MOVF, 3):
+    case COND_FLOAT_MOV(MOVF, 4):
+    case COND_FLOAT_MOV(MOVF, 5):
+    case COND_FLOAT_MOV(MOVF, 6):
+    case COND_FLOAT_MOV(MOVF, 7):
+        gen_movci(ctx, rt, rs, (ctx->opcode >> 13) & 0x7, 0);
+        break;
+    default:
+        MIPS_INVAL("pool32fxf");
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+}
+
+static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
+                                    uint16_t insn_hw1)
+{
+    int32_t offset;
+    uint16_t insn;
+    int rt, rs, rd, rr;
+    int16_t imm;
+    uint32_t op, minor, mips32_op;
+    uint32_t cond, fmt, cc;
+
+    insn = cpu_lduw_code(env, ctx->pc + 2);
+    ctx->opcode = (ctx->opcode << 16) | insn;
+
+    rt = (ctx->opcode >> 21) & 0x1f;
+    rs = (ctx->opcode >> 16) & 0x1f;
+    rd = (ctx->opcode >> 11) & 0x1f;
+    rr = (ctx->opcode >> 6) & 0x1f;
+    imm = (int16_t) ctx->opcode;
+
+    op = (ctx->opcode >> 26) & 0x3f;
+    switch (op) {
+    case POOL32A:
+        minor = ctx->opcode & 0x3f;
+        switch (minor) {
+        case 0x00:
+            minor = (ctx->opcode >> 6) & 0xf;
+            switch (minor) {
+            case SLL32:
+                mips32_op = OPC_SLL;
+                goto do_shifti;
+            case SRA:
+                mips32_op = OPC_SRA;
+                goto do_shifti;
+            case SRL32:
+                mips32_op = OPC_SRL;
+                goto do_shifti;
+            case ROTR:
+                mips32_op = OPC_ROTR;
+            do_shifti:
+                gen_shift_imm(ctx, mips32_op, rt, rs, rd);
+                break;
+            default:
+                goto pool32a_invalid;
+            }
+            break;
+        case 0x10:
+            minor = (ctx->opcode >> 6) & 0xf;
+            switch (minor) {
+                /* Arithmetic */
+            case ADD:
+                mips32_op = OPC_ADD;
+                goto do_arith;
+            case ADDU32:
+                mips32_op = OPC_ADDU;
+                goto do_arith;
+            case SUB:
+                mips32_op = OPC_SUB;
+                goto do_arith;
+            case SUBU32:
+                mips32_op = OPC_SUBU;
+                goto do_arith;
+            case MUL:
+                mips32_op = OPC_MUL;
+            do_arith:
+                gen_arith(ctx, mips32_op, rd, rs, rt);
+                break;
+                /* Shifts */
+            case SLLV:
+                mips32_op = OPC_SLLV;
+                goto do_shift;
+            case SRLV:
+                mips32_op = OPC_SRLV;
+                goto do_shift;
+            case SRAV:
+                mips32_op = OPC_SRAV;
+                goto do_shift;
+            case ROTRV:
+                mips32_op = OPC_ROTRV;
+            do_shift:
+                gen_shift(ctx, mips32_op, rd, rs, rt);
+                break;
+                /* Logical operations */
+            case AND:
+                mips32_op = OPC_AND;
+                goto do_logic;
+            case OR32:
+                mips32_op = OPC_OR;
+                goto do_logic;
+            case NOR:
+                mips32_op = OPC_NOR;
+                goto do_logic;
+            case XOR32:
+                mips32_op = OPC_XOR;
+            do_logic:
+                gen_logic(ctx, mips32_op, rd, rs, rt);
+                break;
+                /* Set less than */
+            case SLT:
+                mips32_op = OPC_SLT;
+                goto do_slt;
+            case SLTU:
+                mips32_op = OPC_SLTU;
+            do_slt:
+                gen_slt(ctx, mips32_op, rd, rs, rt);
+                break;
+            default:
+                goto pool32a_invalid;
+            }
+            break;
+        case 0x18:
+            minor = (ctx->opcode >> 6) & 0xf;
+            switch (minor) {
+                /* Conditional moves */
+            case MOVN:
+                mips32_op = OPC_MOVN;
+                goto do_cmov;
+            case MOVZ:
+                mips32_op = OPC_MOVZ;
+            do_cmov:
+                gen_cond_move(ctx, mips32_op, rd, rs, rt);
+                break;
+            case LWXS:
+                gen_ldxs(ctx, rs, rt, rd);
+                break;
+            default:
+                goto pool32a_invalid;
+            }
+            break;
+        case INS:
+            gen_bitops(ctx, OPC_INS, rt, rs, rr, rd);
+            return;
+        case EXT:
+            gen_bitops(ctx, OPC_EXT, rt, rs, rr, rd);
+            return;
+        case POOL32AXF:
+            gen_pool32axf(env, ctx, rt, rs);
+            break;
+        case 0x07:
+            generate_exception(ctx, EXCP_BREAK);
+            break;
+        default:
+        pool32a_invalid:
+                MIPS_INVAL("pool32a");
+                generate_exception(ctx, EXCP_RI);
+                break;
+        }
+        break;
+    case POOL32B:
+        minor = (ctx->opcode >> 12) & 0xf;
+        switch (minor) {
+        case CACHE:
+            check_cp0_enabled(ctx);
+            /* Treat as no-op. */
+            break;
+        case LWC2:
+        case SWC2:
+            /* COP2: Not implemented. */
+            generate_exception_err(ctx, EXCP_CpU, 2);
+            break;
+        case LWP:
+        case SWP:
+#ifdef TARGET_MIPS64
+        case LDP:
+        case SDP:
+#endif
+            gen_ldst_pair(ctx, minor, rt, rs, SIMM(ctx->opcode, 0, 12));
+            break;
+        case LWM32:
+        case SWM32:
+#ifdef TARGET_MIPS64
+        case LDM:
+        case SDM:
+#endif
+            gen_ldst_multiple(ctx, minor, rt, rs, SIMM(ctx->opcode, 0, 12));
+            break;
+        default:
+            MIPS_INVAL("pool32b");
+            generate_exception(ctx, EXCP_RI);
+            break;
+        }
+        break;
+    case POOL32F:
+        if (env->CP0_Config1 & (1 << CP0C1_FP)) {
+            minor = ctx->opcode & 0x3f;
+            check_cp1_enabled(ctx);
+            switch (minor) {
+            case ALNV_PS:
+                mips32_op = OPC_ALNV_PS;
+                goto do_madd;
+            case MADD_S:
+                mips32_op = OPC_MADD_S;
+                goto do_madd;
+            case MADD_D:
+                mips32_op = OPC_MADD_D;
+                goto do_madd;
+            case MADD_PS:
+                mips32_op = OPC_MADD_PS;
+                goto do_madd;
+            case MSUB_S:
+                mips32_op = OPC_MSUB_S;
+                goto do_madd;
+            case MSUB_D:
+                mips32_op = OPC_MSUB_D;
+                goto do_madd;
+            case MSUB_PS:
+                mips32_op = OPC_MSUB_PS;
+                goto do_madd;
+            case NMADD_S:
+                mips32_op = OPC_NMADD_S;
+                goto do_madd;
+            case NMADD_D:
+                mips32_op = OPC_NMADD_D;
+                goto do_madd;
+            case NMADD_PS:
+                mips32_op = OPC_NMADD_PS;
+                goto do_madd;
+            case NMSUB_S:
+                mips32_op = OPC_NMSUB_S;
+                goto do_madd;
+            case NMSUB_D:
+                mips32_op = OPC_NMSUB_D;
+                goto do_madd;
+            case NMSUB_PS:
+                mips32_op = OPC_NMSUB_PS;
+            do_madd:
+                gen_flt3_arith(ctx, mips32_op, rd, rr, rs, rt);
+                break;
+            case CABS_COND_FMT:
+                cond = (ctx->opcode >> 6) & 0xf;
+                cc = (ctx->opcode >> 13) & 0x7;
+                fmt = (ctx->opcode >> 10) & 0x3;
+                switch (fmt) {
+                case 0x0:
+                    gen_cmpabs_s(ctx, cond, rt, rs, cc);
+                    break;
+                case 0x1:
+                    gen_cmpabs_d(ctx, cond, rt, rs, cc);
+                    break;
+                case 0x2:
+                    gen_cmpabs_ps(ctx, cond, rt, rs, cc);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case C_COND_FMT:
+                cond = (ctx->opcode >> 6) & 0xf;
+                cc = (ctx->opcode >> 13) & 0x7;
+                fmt = (ctx->opcode >> 10) & 0x3;
+                switch (fmt) {
+                case 0x0:
+                    gen_cmp_s(ctx, cond, rt, rs, cc);
+                    break;
+                case 0x1:
+                    gen_cmp_d(ctx, cond, rt, rs, cc);
+                    break;
+                case 0x2:
+                    gen_cmp_ps(ctx, cond, rt, rs, cc);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case POOL32FXF:
+                gen_pool32fxf(ctx, rt, rs);
+                break;
+            case 0x00:
+                /* PLL foo */
+                switch ((ctx->opcode >> 6) & 0x7) {
+                case PLL_PS:
+                    mips32_op = OPC_PLL_PS;
+                    goto do_ps;
+                case PLU_PS:
+                    mips32_op = OPC_PLU_PS;
+                    goto do_ps;
+                case PUL_PS:
+                    mips32_op = OPC_PUL_PS;
+                    goto do_ps;
+                case PUU_PS:
+                    mips32_op = OPC_PUU_PS;
+                    goto do_ps;
+                case CVT_PS_S:
+                    mips32_op = OPC_CVT_PS_S;
+                do_ps:
+                    gen_farith(ctx, mips32_op, rt, rs, rd, 0);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case 0x08:
+                /* [LS][WDU]XC1 */
+                switch ((ctx->opcode >> 6) & 0x7) {
+                case LWXC1:
+                    mips32_op = OPC_LWXC1;
+                    goto do_ldst_cp1;
+                case SWXC1:
+                    mips32_op = OPC_SWXC1;
+                    goto do_ldst_cp1;
+                case LDXC1:
+                    mips32_op = OPC_LDXC1;
+                    goto do_ldst_cp1;
+                case SDXC1:
+                    mips32_op = OPC_SDXC1;
+                    goto do_ldst_cp1;
+                case LUXC1:
+                    mips32_op = OPC_LUXC1;
+                    goto do_ldst_cp1;
+                case SUXC1:
+                    mips32_op = OPC_SUXC1;
+                do_ldst_cp1:
+                    gen_flt3_ldst(ctx, mips32_op, rd, rd, rt, rs);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case 0x18:
+                /* 3D insns */
+                fmt = (ctx->opcode >> 9) & 0x3;
+                switch ((ctx->opcode >> 6) & 0x7) {
+                case RSQRT2_FMT:
+                    switch (fmt) {
+                    case FMT_SDPS_S:
+                        mips32_op = OPC_RSQRT2_S;
+                        goto do_3d;
+                    case FMT_SDPS_D:
+                        mips32_op = OPC_RSQRT2_D;
+                        goto do_3d;
+                    case FMT_SDPS_PS:
+                        mips32_op = OPC_RSQRT2_PS;
+                        goto do_3d;
+                    default:
+                        goto pool32f_invalid;
+                    }
+                    break;
+                case RECIP2_FMT:
+                    switch (fmt) {
+                    case FMT_SDPS_S:
+                        mips32_op = OPC_RECIP2_S;
+                        goto do_3d;
+                    case FMT_SDPS_D:
+                        mips32_op = OPC_RECIP2_D;
+                        goto do_3d;
+                    case FMT_SDPS_PS:
+                        mips32_op = OPC_RECIP2_PS;
+                        goto do_3d;
+                    default:
+                        goto pool32f_invalid;
+                    }
+                    break;
+                case ADDR_PS:
+                    mips32_op = OPC_ADDR_PS;
+                    goto do_3d;
+                case MULR_PS:
+                    mips32_op = OPC_MULR_PS;
+                do_3d:
+                    gen_farith(ctx, mips32_op, rt, rs, rd, 0);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case 0x20:
+                /* MOV[FT].fmt and PREFX */
+                cc = (ctx->opcode >> 13) & 0x7;
+                fmt = (ctx->opcode >> 9) & 0x3;
+                switch ((ctx->opcode >> 6) & 0x7) {
+                case MOVF_FMT:
+                    switch (fmt) {
+                    case FMT_SDPS_S:
+                        gen_movcf_s(rs, rt, cc, 0);
+                        break;
+                    case FMT_SDPS_D:
+                        gen_movcf_d(ctx, rs, rt, cc, 0);
+                        break;
+                    case FMT_SDPS_PS:
+                        gen_movcf_ps(ctx, rs, rt, cc, 0);
+                        break;
+                    default:
+                        goto pool32f_invalid;
+                    }
+                    break;
+                case MOVT_FMT:
+                    switch (fmt) {
+                    case FMT_SDPS_S:
+                        gen_movcf_s(rs, rt, cc, 1);
+                        break;
+                    case FMT_SDPS_D:
+                        gen_movcf_d(ctx, rs, rt, cc, 1);
+                        break;
+                    case FMT_SDPS_PS:
+                        gen_movcf_ps(ctx, rs, rt, cc, 1);
+                        break;
+                    default:
+                        goto pool32f_invalid;
+                    }
+                    break;
+                case PREFX:
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+#define FINSN_3ARG_SDPS(prfx)                           \
+                switch ((ctx->opcode >> 8) & 0x3) {     \
+                case FMT_SDPS_S:                        \
+                    mips32_op = OPC_##prfx##_S;         \
+                    goto do_fpop;                       \
+                case FMT_SDPS_D:                        \
+                    mips32_op = OPC_##prfx##_D;         \
+                    goto do_fpop;                       \
+                case FMT_SDPS_PS:                       \
+                    mips32_op = OPC_##prfx##_PS;        \
+                    goto do_fpop;                       \
+                default:                                \
+                    goto pool32f_invalid;               \
+                }
+            case 0x30:
+                /* regular FP ops */
+                switch ((ctx->opcode >> 6) & 0x3) {
+                case ADD_FMT:
+                    FINSN_3ARG_SDPS(ADD);
+                    break;
+                case SUB_FMT:
+                    FINSN_3ARG_SDPS(SUB);
+                    break;
+                case MUL_FMT:
+                    FINSN_3ARG_SDPS(MUL);
+                    break;
+                case DIV_FMT:
+                    fmt = (ctx->opcode >> 8) & 0x3;
+                    if (fmt == 1) {
+                        mips32_op = OPC_DIV_D;
+                    } else if (fmt == 0) {
+                        mips32_op = OPC_DIV_S;
+                    } else {
+                        goto pool32f_invalid;
+                    }
+                    goto do_fpop;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case 0x38:
+                /* cmovs */
+                switch ((ctx->opcode >> 6) & 0x3) {
+                case MOVN_FMT:
+                    FINSN_3ARG_SDPS(MOVN);
+                    break;
+                case MOVZ_FMT:
+                    FINSN_3ARG_SDPS(MOVZ);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            do_fpop:
+                gen_farith(ctx, mips32_op, rt, rs, rd, 0);
+                break;
+            default:
+            pool32f_invalid:
+                MIPS_INVAL("pool32f");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+        } else {
+            generate_exception_err(ctx, EXCP_CpU, 1);
+        }
+        break;
+    case POOL32I:
+        minor = (ctx->opcode >> 21) & 0x1f;
+        switch (minor) {
+        case BLTZ:
+            mips32_op = OPC_BLTZ;
+            goto do_branch;
+        case BLTZAL:
+            mips32_op = OPC_BLTZAL;
+            goto do_branch;
+        case BLTZALS:
+            mips32_op = OPC_BLTZALS;
+            goto do_branch;
+        case BGEZ:
+            mips32_op = OPC_BGEZ;
+            goto do_branch;
+        case BGEZAL:
+            mips32_op = OPC_BGEZAL;
+            goto do_branch;
+        case BGEZALS:
+            mips32_op = OPC_BGEZALS;
+            goto do_branch;
+        case BLEZ:
+            mips32_op = OPC_BLEZ;
+            goto do_branch;
+        case BGTZ:
+            mips32_op = OPC_BGTZ;
+        do_branch:
+            gen_compute_branch(ctx, mips32_op, 4, rs, -1, imm << 1);
+            break;
+
+            /* Traps */
+        case TLTI:
+            mips32_op = OPC_TLTI;
+            goto do_trapi;
+        case TGEI:
+            mips32_op = OPC_TGEI;
+            goto do_trapi;
+        case TLTIU:
+            mips32_op = OPC_TLTIU;
+            goto do_trapi;
+        case TGEIU:
+            mips32_op = OPC_TGEIU;
+            goto do_trapi;
+        case TNEI:
+            mips32_op = OPC_TNEI;
+            goto do_trapi;
+        case TEQI:
+            mips32_op = OPC_TEQI;
+        do_trapi:
+            gen_trap(ctx, mips32_op, rs, -1, imm);
+            break;
+
+        case BNEZC:
+        case BEQZC:
+            gen_compute_branch(ctx, minor == BNEZC ? OPC_BNE : OPC_BEQ,
+                               4, rs, 0, imm << 1);
+            /* Compact branches don't have a delay slot, so just let
+               the normal delay slot handling take us to the branch
+               target. */
+            break;
+        case LUI:
+            gen_logic_imm(ctx, OPC_LUI, rs, -1, imm);
+            break;
+        case SYNCI:
+            break;
+        case BC2F:
+        case BC2T:
+            /* COP2: Not implemented. */
+            generate_exception_err(ctx, EXCP_CpU, 2);
+            break;
+        case BC1F:
+            mips32_op = (ctx->opcode & (1 << 16)) ? OPC_BC1FANY2 : OPC_BC1F;
+            goto do_cp1branch;
+        case BC1T:
+            mips32_op = (ctx->opcode & (1 << 16)) ? OPC_BC1TANY2 : OPC_BC1T;
+            goto do_cp1branch;
+        case BC1ANY4F:
+            mips32_op = OPC_BC1FANY4;
+            goto do_cp1mips3d;
+        case BC1ANY4T:
+            mips32_op = OPC_BC1TANY4;
+        do_cp1mips3d:
+            check_cop1x(ctx);
+            check_insn(ctx, ASE_MIPS3D);
+            /* Fall through */
+        do_cp1branch:
+            gen_compute_branch1(ctx, mips32_op,
+                                (ctx->opcode >> 18) & 0x7, imm << 1);
+            break;
+        case BPOSGE64:
+        case BPOSGE32:
+            /* MIPS DSP: not implemented */
+            /* Fall through */
+        default:
+            MIPS_INVAL("pool32i");
+            generate_exception(ctx, EXCP_RI);
+            break;
+        }
+        break;
+    case POOL32C:
+        minor = (ctx->opcode >> 12) & 0xf;
+        switch (minor) {
+        case LWL:
+            mips32_op = OPC_LWL;
+            goto do_ld_lr;
+        case SWL:
+            mips32_op = OPC_SWL;
+            goto do_st_lr;
+        case LWR:
+            mips32_op = OPC_LWR;
+            goto do_ld_lr;
+        case SWR:
+            mips32_op = OPC_SWR;
+            goto do_st_lr;
+#if defined(TARGET_MIPS64)
+        case LDL:
+            mips32_op = OPC_LDL;
+            goto do_ld_lr;
+        case SDL:
+            mips32_op = OPC_SDL;
+            goto do_st_lr;
+        case LDR:
+            mips32_op = OPC_LDR;
+            goto do_ld_lr;
+        case SDR:
+            mips32_op = OPC_SDR;
+            goto do_st_lr;
+        case LWU:
+            mips32_op = OPC_LWU;
+            goto do_ld_lr;
+        case LLD:
+            mips32_op = OPC_LLD;
+            goto do_ld_lr;
+#endif
+        case LL:
+            mips32_op = OPC_LL;
+            goto do_ld_lr;
+        do_ld_lr:
+            gen_ld(ctx, mips32_op, rt, rs, SIMM(ctx->opcode, 0, 12));
+            break;
+        do_st_lr:
+            gen_st(ctx, mips32_op, rt, rs, SIMM(ctx->opcode, 0, 12));
+            break;
+        case SC:
+            gen_st_cond(ctx, OPC_SC, rt, rs, SIMM(ctx->opcode, 0, 12));
+            break;
+#if defined(TARGET_MIPS64)
+        case SCD:
+            gen_st_cond(ctx, OPC_SCD, rt, rs, SIMM(ctx->opcode, 0, 12));
+            break;
+#endif
+        case PREF:
+            /* Treat as no-op */
+            break;
+        default:
+            MIPS_INVAL("pool32c");
+            generate_exception(ctx, EXCP_RI);
+            break;
+        }
+        break;
+    case ADDI32:
+        mips32_op = OPC_ADDI;
+        goto do_addi;
+    case ADDIU32:
+        mips32_op = OPC_ADDIU;
+    do_addi:
+        gen_arith_imm(ctx, mips32_op, rt, rs, imm);
+        break;
+
+        /* Logical operations */
+    case ORI32:
+        mips32_op = OPC_ORI;
+        goto do_logici;
+    case XORI32:
+        mips32_op = OPC_XORI;
+        goto do_logici;
+    case ANDI32:
+        mips32_op = OPC_ANDI;
+    do_logici:
+        gen_logic_imm(ctx, mips32_op, rt, rs, imm);
+        break;
+
+        /* Set less than immediate */
+    case SLTI32:
+        mips32_op = OPC_SLTI;
+        goto do_slti;
+    case SLTIU32:
+        mips32_op = OPC_SLTIU;
+    do_slti:
+        gen_slt_imm(ctx, mips32_op, rt, rs, imm);
+        break;
+    case JALX32:
+        offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
+        gen_compute_branch(ctx, OPC_JALX, 4, rt, rs, offset);
+        break;
+    case JALS32:
+        offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 1;
+        gen_compute_branch(ctx, OPC_JALS, 4, rt, rs, offset);
+        break;
+    case BEQ32:
+        gen_compute_branch(ctx, OPC_BEQ, 4, rt, rs, imm << 1);
+        break;
+    case BNE32:
+        gen_compute_branch(ctx, OPC_BNE, 4, rt, rs, imm << 1);
+        break;
+    case J32:
+        gen_compute_branch(ctx, OPC_J, 4, rt, rs,
+                           (int32_t)(ctx->opcode & 0x3FFFFFF) << 1);
+        break;
+    case JAL32:
+        gen_compute_branch(ctx, OPC_JAL, 4, rt, rs,
+                           (int32_t)(ctx->opcode & 0x3FFFFFF) << 1);
+        break;
+        /* Floating point (COP1) */
+    case LWC132:
+        mips32_op = OPC_LWC1;
+        goto do_cop1;
+    case LDC132:
+        mips32_op = OPC_LDC1;
+        goto do_cop1;
+    case SWC132:
+        mips32_op = OPC_SWC1;
+        goto do_cop1;
+    case SDC132:
+        mips32_op = OPC_SDC1;
+    do_cop1:
+        gen_cop1_ldst(env, ctx, mips32_op, rt, rs, imm);
+        break;
+    case ADDIUPC:
+        {
+            int reg = mmreg(ZIMM(ctx->opcode, 23, 3));
+            int offset = SIMM(ctx->opcode, 0, 23) << 2;
+
+            gen_addiupc(ctx, reg, offset, 0, 0);
+        }
+        break;
+        /* Loads and stores */
+    case LB32:
+        mips32_op = OPC_LB;
+        goto do_ld;
+    case LBU32:
+        mips32_op = OPC_LBU;
+        goto do_ld;
+    case LH32:
+        mips32_op = OPC_LH;
+        goto do_ld;
+    case LHU32:
+        mips32_op = OPC_LHU;
+        goto do_ld;
+    case LW32:
+        mips32_op = OPC_LW;
+        goto do_ld;
+#ifdef TARGET_MIPS64
+    case LD32:
+        mips32_op = OPC_LD;
+        goto do_ld;
+    case SD32:
+        mips32_op = OPC_SD;
+        goto do_st;
+#endif
+    case SB32:
+        mips32_op = OPC_SB;
+        goto do_st;
+    case SH32:
+        mips32_op = OPC_SH;
+        goto do_st;
+    case SW32:
+        mips32_op = OPC_SW;
+        goto do_st;
+    do_ld:
+        gen_ld(ctx, mips32_op, rt, rs, imm);
+        break;
+    do_st:
+        gen_st(ctx, mips32_op, rt, rs, imm);
+        break;
+    default:
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+}
+
+static int decode_micromips_opc (CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t op;
+
+    /* make sure instructions are on a halfword boundary */
+    if (ctx->pc & 0x1) {
+        env->CP0_BadVAddr = ctx->pc;
+        generate_exception(ctx, EXCP_AdEL);
+        ctx->bstate = BS_STOP;
+        return 2;
+    }
+
+    op = (ctx->opcode >> 10) & 0x3f;
+    /* Enforce properly-sized instructions in a delay slot */
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+        int bits = ctx->hflags & MIPS_HFLAG_BMASK_EXT;
+
+        switch (op) {
+        case POOL32A:
+        case POOL32B:
+        case POOL32I:
+        case POOL32C:
+        case ADDI32:
+        case ADDIU32:
+        case ORI32:
+        case XORI32:
+        case SLTI32:
+        case SLTIU32:
+        case ANDI32:
+        case JALX32:
+        case LBU32:
+        case LHU32:
+        case POOL32F:
+        case JALS32:
+        case BEQ32:
+        case BNE32:
+        case J32:
+        case JAL32:
+        case SB32:
+        case SH32:
+        case POOL32S:
+        case ADDIUPC:
+        case SWC132:
+        case SDC132:
+        case SD32:
+        case SW32:
+        case LB32:
+        case LH32:
+        case DADDIU32:
+        case LWC132:
+        case LDC132:
+        case LD32:
+        case LW32:
+            if (bits & MIPS_HFLAG_BDS16) {
+                generate_exception(ctx, EXCP_RI);
+                /* Just stop translation; the user is confused.  */
+                ctx->bstate = BS_STOP;
+                return 2;
+            }
+            break;
+        case POOL16A:
+        case POOL16B:
+        case POOL16C:
+        case LWGP16:
+        case POOL16F:
+        case LBU16:
+        case LHU16:
+        case LWSP16:
+        case LW16:
+        case SB16:
+        case SH16:
+        case SWSP16:
+        case SW16:
+        case MOVE16:
+        case ANDI16:
+        case POOL16D:
+        case POOL16E:
+        case BEQZ16:
+        case BNEZ16:
+        case B16:
+        case LI16:
+            if (bits & MIPS_HFLAG_BDS32) {
+                generate_exception(ctx, EXCP_RI);
+                /* Just stop translation; the user is confused.  */
+                ctx->bstate = BS_STOP;
+                return 2;
+            }
+            break;
+        default:
+            break;
+        }
+    }
+    switch (op) {
+    case POOL16A:
+        {
+            int rd = mmreg(uMIPS_RD(ctx->opcode));
+            int rs1 = mmreg(uMIPS_RS1(ctx->opcode));
+            int rs2 = mmreg(uMIPS_RS2(ctx->opcode));
+            uint32_t opc = 0;
+
+            switch (ctx->opcode & 0x1) {
+            case ADDU16:
+                opc = OPC_ADDU;
+                break;
+            case SUBU16:
+                opc = OPC_SUBU;
+                break;
+            }
+
+            gen_arith(ctx, opc, rd, rs1, rs2);
+        }
+        break;
+    case POOL16B:
+        {
+            int rd = mmreg(uMIPS_RD(ctx->opcode));
+            int rs = mmreg(uMIPS_RS(ctx->opcode));
+            int amount = (ctx->opcode >> 1) & 0x7;
+            uint32_t opc = 0;
+            amount = amount == 0 ? 8 : amount;
+
+            switch (ctx->opcode & 0x1) {
+            case SLL16:
+                opc = OPC_SLL;
+                break;
+            case SRL16:
+                opc = OPC_SRL;
+                break;
+            }
+
+            gen_shift_imm(ctx, opc, rd, rs, amount);
+        }
+        break;
+    case POOL16C:
+        gen_pool16c_insn(ctx);
+        break;
+    case LWGP16:
+        {
+            int rd = mmreg(uMIPS_RD(ctx->opcode));
+            int rb = 28;            /* GP */
+            int16_t offset = SIMM(ctx->opcode, 0, 7) << 2;
+
+            gen_ld(ctx, OPC_LW, rd, rb, offset);
+        }
+        break;
+    case POOL16F:
+        if (ctx->opcode & 1) {
+            generate_exception(ctx, EXCP_RI);
+        } else {
+            /* MOVEP */
+            int enc_dest = uMIPS_RD(ctx->opcode);
+            int enc_rt = uMIPS_RS2(ctx->opcode);
+            int enc_rs = uMIPS_RS1(ctx->opcode);
+            int rd, rs, re, rt;
+            static const int rd_enc[] = { 5, 5, 6, 4, 4, 4, 4, 4 };
+            static const int re_enc[] = { 6, 7, 7, 21, 22, 5, 6, 7 };
+            static const int rs_rt_enc[] = { 0, 17, 2, 3, 16, 18, 19, 20 };
+
+            rd = rd_enc[enc_dest];
+            re = re_enc[enc_dest];
+            rs = rs_rt_enc[enc_rs];
+            rt = rs_rt_enc[enc_rt];
+
+            gen_arith_imm(ctx, OPC_ADDIU, rd, rs, 0);
+            gen_arith_imm(ctx, OPC_ADDIU, re, rt, 0);
+        }
+        break;
+    case LBU16:
+        {
+            int rd = mmreg(uMIPS_RD(ctx->opcode));
+            int rb = mmreg(uMIPS_RS(ctx->opcode));
+            int16_t offset = ZIMM(ctx->opcode, 0, 4);
+            offset = (offset == 0xf ? -1 : offset);
+
+            gen_ld(ctx, OPC_LBU, rd, rb, offset);
+        }
+        break;
+    case LHU16:
+        {
+            int rd = mmreg(uMIPS_RD(ctx->opcode));
+            int rb = mmreg(uMIPS_RS(ctx->opcode));
+            int16_t offset = ZIMM(ctx->opcode, 0, 4) << 1;
+
+            gen_ld(ctx, OPC_LHU, rd, rb, offset);
+        }
+        break;
+    case LWSP16:
+        {
+            int rd = (ctx->opcode >> 5) & 0x1f;
+            int rb = 29;            /* SP */
+            int16_t offset = ZIMM(ctx->opcode, 0, 5) << 2;
+
+            gen_ld(ctx, OPC_LW, rd, rb, offset);
+        }
+        break;
+    case LW16:
+        {
+            int rd = mmreg(uMIPS_RD(ctx->opcode));
+            int rb = mmreg(uMIPS_RS(ctx->opcode));
+            int16_t offset = ZIMM(ctx->opcode, 0, 4) << 2;
+
+            gen_ld(ctx, OPC_LW, rd, rb, offset);
+        }
+        break;
+    case SB16:
+        {
+            int rd = mmreg2(uMIPS_RD(ctx->opcode));
+            int rb = mmreg(uMIPS_RS(ctx->opcode));
+            int16_t offset = ZIMM(ctx->opcode, 0, 4);
+
+            gen_st(ctx, OPC_SB, rd, rb, offset);
+        }
+        break;
+    case SH16:
+        {
+            int rd = mmreg2(uMIPS_RD(ctx->opcode));
+            int rb = mmreg(uMIPS_RS(ctx->opcode));
+            int16_t offset = ZIMM(ctx->opcode, 0, 4) << 1;
+
+            gen_st(ctx, OPC_SH, rd, rb, offset);
+        }
+        break;
+    case SWSP16:
+        {
+            int rd = (ctx->opcode >> 5) & 0x1f;
+            int rb = 29;            /* SP */
+            int16_t offset = ZIMM(ctx->opcode, 0, 5) << 2;
+
+            gen_st(ctx, OPC_SW, rd, rb, offset);
+        }
+        break;
+    case SW16:
+        {
+            int rd = mmreg2(uMIPS_RD(ctx->opcode));
+            int rb = mmreg(uMIPS_RS(ctx->opcode));
+            int16_t offset = ZIMM(ctx->opcode, 0, 4) << 2;
+
+            gen_st(ctx, OPC_SW, rd, rb, offset);
+        }
+        break;
+    case MOVE16:
+        {
+            int rd = uMIPS_RD5(ctx->opcode);
+            int rs = uMIPS_RS5(ctx->opcode);
+
+            gen_arith_imm(ctx, OPC_ADDIU, rd, rs, 0);
+        }
+        break;
+    case ANDI16:
+        gen_andi16(ctx);
+        break;
+    case POOL16D:
+        switch (ctx->opcode & 0x1) {
+        case ADDIUS5:
+            gen_addius5(ctx);
+            break;
+        case ADDIUSP:
+            gen_addiusp(ctx);
+            break;
+        }
+        break;
+    case POOL16E:
+        switch (ctx->opcode & 0x1) {
+        case ADDIUR2:
+            gen_addiur2(ctx);
+            break;
+        case ADDIUR1SP:
+            gen_addiur1sp(ctx);
+            break;
+        }
+        break;
+    case B16:
+        gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0,
+                           SIMM(ctx->opcode, 0, 10) << 1);
+        break;
+    case BNEZ16:
+    case BEQZ16:
+        gen_compute_branch(ctx, op == BNEZ16 ? OPC_BNE : OPC_BEQ, 2,
+                           mmreg(uMIPS_RD(ctx->opcode)),
+                           0, SIMM(ctx->opcode, 0, 7) << 1);
+        break;
+    case LI16:
+        {
+            int reg = mmreg(uMIPS_RD(ctx->opcode));
+            int imm = ZIMM(ctx->opcode, 0, 7);
+
+            imm = (imm == 0x7f ? -1 : imm);
+            tcg_gen_movi_tl(cpu_gpr[reg], imm);
+        }
+        break;
+    case RES_20:
+    case RES_28:
+    case RES_29:
+    case RES_30:
+    case RES_31:
+    case RES_38:
+    case RES_39:
+        generate_exception(ctx, EXCP_RI);
+        break;
+    default:
+        decode_micromips32_opc (env, ctx, op);
+        return 4;
+    }
+
+    return 2;
+}
+
 /* SmartMIPS extension to MIPS32 */
 
 #if defined(TARGET_MIPS64)
@@ -8370,6 +12728,1727 @@
 
 #endif
 
+/* MIPSDSP functions. */
+static void gen_mipsdsp_ld(DisasContext *ctx, uint32_t opc,
+                           int rd, int base, int offset)
+{
+    const char *opn = "ldx";
+    TCGv t0;
+
+    check_dsp(ctx);
+    t0 = tcg_temp_new();
+
+    if (base == 0) {
+        gen_load_gpr(t0, offset);
+    } else if (offset == 0) {
+        gen_load_gpr(t0, base);
+    } else {
+        gen_op_addr_add(ctx, t0, cpu_gpr[base], cpu_gpr[offset]);
+    }
+
+    switch (opc) {
+    case OPC_LBUX:
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_UB);
+        gen_store_gpr(t0, rd);
+        opn = "lbux";
+        break;
+    case OPC_LHX:
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW);
+        gen_store_gpr(t0, rd);
+        opn = "lhx";
+        break;
+    case OPC_LWX:
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL);
+        gen_store_gpr(t0, rd);
+        opn = "lwx";
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_LDX:
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ);
+        gen_store_gpr(t0, rd);
+        opn = "ldx";
+        break;
+#endif
+    }
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s %s, %s(%s)", opn,
+               regnames[rd], regnames[offset], regnames[base]);
+    tcg_temp_free(t0);
+}
+
+static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2,
+                              int ret, int v1, int v2)
+{
+    const char *opn = "mipsdsp arith";
+    TCGv v1_t;
+    TCGv v2_t;
+
+    if (ret == 0) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (op1) {
+    /* OPC_MULT_G_2E is equal OPC_ADDUH_QB_DSP */
+    case OPC_MULT_G_2E:
+        check_dspr2(ctx);
+        switch (op2) {
+        case OPC_ADDUH_QB:
+            gen_helper_adduh_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDUH_R_QB:
+            gen_helper_adduh_r_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQH_PH:
+            gen_helper_addqh_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQH_R_PH:
+            gen_helper_addqh_r_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQH_W:
+            gen_helper_addqh_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQH_R_W:
+            gen_helper_addqh_r_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBUH_QB:
+            gen_helper_subuh_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBUH_R_QB:
+            gen_helper_subuh_r_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBQH_PH:
+            gen_helper_subqh_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBQH_R_PH:
+            gen_helper_subqh_r_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBQH_W:
+            gen_helper_subqh_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBQH_R_W:
+            gen_helper_subqh_r_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        }
+        break;
+    case OPC_ABSQ_S_PH_DSP:
+        switch (op2) {
+        case OPC_ABSQ_S_QB:
+            check_dspr2(ctx);
+            gen_helper_absq_s_qb(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_ABSQ_S_PH:
+            check_dsp(ctx);
+            gen_helper_absq_s_ph(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_ABSQ_S_W:
+            check_dsp(ctx);
+            gen_helper_absq_s_w(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_PRECEQ_W_PHL:
+            check_dsp(ctx);
+            tcg_gen_andi_tl(cpu_gpr[ret], v2_t, 0xFFFF0000);
+            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            break;
+        case OPC_PRECEQ_W_PHR:
+            check_dsp(ctx);
+            tcg_gen_andi_tl(cpu_gpr[ret], v2_t, 0x0000FFFF);
+            tcg_gen_shli_tl(cpu_gpr[ret], cpu_gpr[ret], 16);
+            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            break;
+        case OPC_PRECEQU_PH_QBL:
+            check_dsp(ctx);
+            gen_helper_precequ_ph_qbl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_PH_QBR:
+            check_dsp(ctx);
+            gen_helper_precequ_ph_qbr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_PH_QBLA:
+            check_dsp(ctx);
+            gen_helper_precequ_ph_qbla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_PH_QBRA:
+            check_dsp(ctx);
+            gen_helper_precequ_ph_qbra(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_PH_QBL:
+            check_dsp(ctx);
+            gen_helper_preceu_ph_qbl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_PH_QBR:
+            check_dsp(ctx);
+            gen_helper_preceu_ph_qbr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_PH_QBLA:
+            check_dsp(ctx);
+            gen_helper_preceu_ph_qbla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_PH_QBRA:
+            check_dsp(ctx);
+            gen_helper_preceu_ph_qbra(cpu_gpr[ret], v2_t);
+            break;
+        }
+        break;
+    case OPC_ADDU_QB_DSP:
+        switch (op2) {
+        case OPC_ADDQ_PH:
+            check_dsp(ctx);
+            gen_helper_addq_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_S_PH:
+            check_dsp(ctx);
+            gen_helper_addq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_S_W:
+            check_dsp(ctx);
+            gen_helper_addq_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_QB:
+            check_dsp(ctx);
+            gen_helper_addu_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_S_QB:
+            check_dsp(ctx);
+            gen_helper_addu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_PH:
+            check_dspr2(ctx);
+            gen_helper_addu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_S_PH:
+            check_dspr2(ctx);
+            gen_helper_addu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_PH:
+            check_dsp(ctx);
+            gen_helper_subq_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_S_PH:
+            check_dsp(ctx);
+            gen_helper_subq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_S_W:
+            check_dsp(ctx);
+            gen_helper_subq_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_QB:
+            check_dsp(ctx);
+            gen_helper_subu_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_S_QB:
+            check_dsp(ctx);
+            gen_helper_subu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_PH:
+            check_dspr2(ctx);
+            gen_helper_subu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_S_PH:
+            check_dspr2(ctx);
+            gen_helper_subu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDSC:
+            check_dsp(ctx);
+            gen_helper_addsc(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDWC:
+            check_dsp(ctx);
+            gen_helper_addwc(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MODSUB:
+            check_dsp(ctx);
+            gen_helper_modsub(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_RADDU_W_QB:
+            check_dsp(ctx);
+            gen_helper_raddu_w_qb(cpu_gpr[ret], v1_t);
+            break;
+        }
+        break;
+    case OPC_CMPU_EQ_QB_DSP:
+        switch (op2) {
+        case OPC_PRECR_QB_PH:
+            check_dspr2(ctx);
+            gen_helper_precr_qb_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_QB_PH:
+            check_dsp(ctx);
+            gen_helper_precrq_qb_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECR_SRA_PH_W:
+            check_dspr2(ctx);
+            {
+                TCGv_i32 sa_t = tcg_const_i32(v2);
+                gen_helper_precr_sra_ph_w(cpu_gpr[ret], sa_t, v1_t,
+                                          cpu_gpr[ret]);
+                tcg_temp_free_i32(sa_t);
+                break;
+            }
+        case OPC_PRECR_SRA_R_PH_W:
+            check_dspr2(ctx);
+            {
+                TCGv_i32 sa_t = tcg_const_i32(v2);
+                gen_helper_precr_sra_r_ph_w(cpu_gpr[ret], sa_t, v1_t,
+                                            cpu_gpr[ret]);
+                tcg_temp_free_i32(sa_t);
+                break;
+            }
+        case OPC_PRECRQ_PH_W:
+            check_dsp(ctx);
+            gen_helper_precrq_ph_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_RS_PH_W:
+            check_dsp(ctx);
+            gen_helper_precrq_rs_ph_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PRECRQU_S_QB_PH:
+            check_dsp(ctx);
+            gen_helper_precrqu_s_qb_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_ABSQ_S_QH_DSP:
+        switch (op2) {
+        case OPC_PRECEQ_L_PWL:
+            check_dsp(ctx);
+            tcg_gen_andi_tl(cpu_gpr[ret], v2_t, 0xFFFFFFFF00000000ull);
+            break;
+        case OPC_PRECEQ_L_PWR:
+            check_dsp(ctx);
+            tcg_gen_shli_tl(cpu_gpr[ret], v2_t, 32);
+            break;
+        case OPC_PRECEQ_PW_QHL:
+            check_dsp(ctx);
+            gen_helper_preceq_pw_qhl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQ_PW_QHR:
+            check_dsp(ctx);
+            gen_helper_preceq_pw_qhr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQ_PW_QHLA:
+            check_dsp(ctx);
+            gen_helper_preceq_pw_qhla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQ_PW_QHRA:
+            check_dsp(ctx);
+            gen_helper_preceq_pw_qhra(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_QH_OBL:
+            check_dsp(ctx);
+            gen_helper_precequ_qh_obl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_QH_OBR:
+            check_dsp(ctx);
+            gen_helper_precequ_qh_obr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_QH_OBLA:
+            check_dsp(ctx);
+            gen_helper_precequ_qh_obla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_QH_OBRA:
+            check_dsp(ctx);
+            gen_helper_precequ_qh_obra(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_QH_OBL:
+            check_dsp(ctx);
+            gen_helper_preceu_qh_obl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_QH_OBR:
+            check_dsp(ctx);
+            gen_helper_preceu_qh_obr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_QH_OBLA:
+            check_dsp(ctx);
+            gen_helper_preceu_qh_obla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_QH_OBRA:
+            check_dsp(ctx);
+            gen_helper_preceu_qh_obra(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_ABSQ_S_OB:
+            check_dspr2(ctx);
+            gen_helper_absq_s_ob(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_ABSQ_S_PW:
+            check_dsp(ctx);
+            gen_helper_absq_s_pw(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_ABSQ_S_QH:
+            check_dsp(ctx);
+            gen_helper_absq_s_qh(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        }
+        break;
+    case OPC_ADDU_OB_DSP:
+        switch (op2) {
+        case OPC_RADDU_L_OB:
+            check_dsp(ctx);
+            gen_helper_raddu_l_ob(cpu_gpr[ret], v1_t);
+            break;
+        case OPC_SUBQ_PW:
+            check_dsp(ctx);
+            gen_helper_subq_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_S_PW:
+            check_dsp(ctx);
+            gen_helper_subq_s_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_QH:
+            check_dsp(ctx);
+            gen_helper_subq_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_S_QH:
+            check_dsp(ctx);
+            gen_helper_subq_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_OB:
+            check_dsp(ctx);
+            gen_helper_subu_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_S_OB:
+            check_dsp(ctx);
+            gen_helper_subu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_QH:
+            check_dspr2(ctx);
+            gen_helper_subu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_S_QH:
+            check_dspr2(ctx);
+            gen_helper_subu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBUH_OB:
+            check_dspr2(ctx);
+            gen_helper_subuh_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBUH_R_OB:
+            check_dspr2(ctx);
+            gen_helper_subuh_r_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQ_PW:
+            check_dsp(ctx);
+            gen_helper_addq_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_S_PW:
+            check_dsp(ctx);
+            gen_helper_addq_s_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_QH:
+            check_dsp(ctx);
+            gen_helper_addq_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_S_QH:
+            check_dsp(ctx);
+            gen_helper_addq_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_OB:
+            check_dsp(ctx);
+            gen_helper_addu_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_S_OB:
+            check_dsp(ctx);
+            gen_helper_addu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_QH:
+            check_dspr2(ctx);
+            gen_helper_addu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_S_QH:
+            check_dspr2(ctx);
+            gen_helper_addu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDUH_OB:
+            check_dspr2(ctx);
+            gen_helper_adduh_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDUH_R_OB:
+            check_dspr2(ctx);
+            gen_helper_adduh_r_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        }
+        break;
+    case OPC_CMPU_EQ_OB_DSP:
+        switch (op2) {
+        case OPC_PRECR_OB_QH:
+            check_dspr2(ctx);
+            gen_helper_precr_ob_qh(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECR_SRA_QH_PW:
+            check_dspr2(ctx);
+            {
+                TCGv_i32 ret_t = tcg_const_i32(ret);
+                gen_helper_precr_sra_qh_pw(v2_t, v1_t, v2_t, ret_t);
+                tcg_temp_free_i32(ret_t);
+                break;
+            }
+        case OPC_PRECR_SRA_R_QH_PW:
+            check_dspr2(ctx);
+            {
+                TCGv_i32 sa_v = tcg_const_i32(ret);
+                gen_helper_precr_sra_r_qh_pw(v2_t, v1_t, v2_t, sa_v);
+                tcg_temp_free_i32(sa_v);
+                break;
+            }
+        case OPC_PRECRQ_OB_QH:
+            check_dsp(ctx);
+            gen_helper_precrq_ob_qh(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_PW_L:
+            check_dsp(ctx);
+            gen_helper_precrq_pw_l(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_QH_PW:
+            check_dsp(ctx);
+            gen_helper_precrq_qh_pw(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_RS_QH_PW:
+            check_dsp(ctx);
+            gen_helper_precrq_rs_qh_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PRECRQU_S_OB_QH:
+            check_dsp(ctx);
+            gen_helper_precrqu_s_ob_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+}
+
+static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc,
+                              int ret, int v1, int v2)
+{
+    uint32_t op2;
+    const char *opn = "mipsdsp shift";
+    TCGv t0;
+    TCGv v1_t;
+    TCGv v2_t;
+
+    if (ret == 0) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    tcg_gen_movi_tl(t0, v1);
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (opc) {
+    case OPC_SHLL_QB_DSP:
+        {
+            op2 = MASK_SHLL_QB(ctx->opcode);
+            switch (op2) {
+            case OPC_SHLL_QB:
+                check_dsp(ctx);
+                gen_helper_shll_qb(cpu_gpr[ret], t0, v2_t, cpu_env);
+                break;
+            case OPC_SHLLV_QB:
+                check_dsp(ctx);
+                gen_helper_shll_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+                break;
+            case OPC_SHLL_PH:
+                check_dsp(ctx);
+                gen_helper_shll_ph(cpu_gpr[ret], t0, v2_t, cpu_env);
+                break;
+            case OPC_SHLLV_PH:
+                check_dsp(ctx);
+                gen_helper_shll_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+                break;
+            case OPC_SHLL_S_PH:
+                check_dsp(ctx);
+                gen_helper_shll_s_ph(cpu_gpr[ret], t0, v2_t, cpu_env);
+                break;
+            case OPC_SHLLV_S_PH:
+                check_dsp(ctx);
+                gen_helper_shll_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+                break;
+            case OPC_SHLL_S_W:
+                check_dsp(ctx);
+                gen_helper_shll_s_w(cpu_gpr[ret], t0, v2_t, cpu_env);
+                break;
+            case OPC_SHLLV_S_W:
+                check_dsp(ctx);
+                gen_helper_shll_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+                break;
+            case OPC_SHRL_QB:
+                check_dsp(ctx);
+                gen_helper_shrl_qb(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRLV_QB:
+                check_dsp(ctx);
+                gen_helper_shrl_qb(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRL_PH:
+                check_dspr2(ctx);
+                gen_helper_shrl_ph(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRLV_PH:
+                check_dspr2(ctx);
+                gen_helper_shrl_ph(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRA_QB:
+                check_dspr2(ctx);
+                gen_helper_shra_qb(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRA_R_QB:
+                check_dspr2(ctx);
+                gen_helper_shra_r_qb(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRAV_QB:
+                check_dspr2(ctx);
+                gen_helper_shra_qb(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRAV_R_QB:
+                check_dspr2(ctx);
+                gen_helper_shra_r_qb(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRA_PH:
+                check_dsp(ctx);
+                gen_helper_shra_ph(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRA_R_PH:
+                check_dsp(ctx);
+                gen_helper_shra_r_ph(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRAV_PH:
+                check_dsp(ctx);
+                gen_helper_shra_ph(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRAV_R_PH:
+                check_dsp(ctx);
+                gen_helper_shra_r_ph(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRA_R_W:
+                check_dsp(ctx);
+                gen_helper_shra_r_w(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRAV_R_W:
+                check_dsp(ctx);
+                gen_helper_shra_r_w(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK SHLL.QB");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        }
+#ifdef TARGET_MIPS64
+    case OPC_SHLL_OB_DSP:
+        op2 = MASK_SHLL_OB(ctx->opcode);
+        switch (op2) {
+        case OPC_SHLL_PW:
+            check_dsp(ctx);
+            gen_helper_shll_pw(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_PW:
+            check_dsp(ctx);
+            gen_helper_shll_pw(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHLL_S_PW:
+            check_dsp(ctx);
+            gen_helper_shll_s_pw(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_S_PW:
+            check_dsp(ctx);
+            gen_helper_shll_s_pw(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHLL_OB:
+            check_dsp(ctx);
+            gen_helper_shll_ob(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_OB:
+            check_dsp(ctx);
+            gen_helper_shll_ob(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHLL_QH:
+            check_dsp(ctx);
+            gen_helper_shll_qh(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_QH:
+            check_dsp(ctx);
+            gen_helper_shll_qh(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHLL_S_QH:
+            check_dsp(ctx);
+            gen_helper_shll_s_qh(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_S_QH:
+            check_dsp(ctx);
+            gen_helper_shll_s_qh(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHRA_OB:
+            check_dspr2(ctx);
+            gen_helper_shra_ob(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_OB:
+            check_dspr2(ctx);
+            gen_helper_shra_ob(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_R_OB:
+            check_dspr2(ctx);
+            gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_R_OB:
+            check_dspr2(ctx);
+            gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_PW:
+            check_dsp(ctx);
+            gen_helper_shra_pw(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_PW:
+            check_dsp(ctx);
+            gen_helper_shra_pw(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_R_PW:
+            check_dsp(ctx);
+            gen_helper_shra_r_pw(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_R_PW:
+            check_dsp(ctx);
+            gen_helper_shra_r_pw(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_QH:
+            check_dsp(ctx);
+            gen_helper_shra_qh(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_QH:
+            check_dsp(ctx);
+            gen_helper_shra_qh(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_R_QH:
+            check_dsp(ctx);
+            gen_helper_shra_r_qh(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_R_QH:
+            check_dsp(ctx);
+            gen_helper_shra_r_qh(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRL_OB:
+            check_dsp(ctx);
+            gen_helper_shrl_ob(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRLV_OB:
+            check_dsp(ctx);
+            gen_helper_shrl_ob(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRL_QH:
+            check_dspr2(ctx);
+            gen_helper_shrl_qh(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRLV_QH:
+            check_dspr2(ctx);
+            gen_helper_shrl_qh(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK SHLL.OB");
+            generate_exception(ctx, EXCP_RI);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free(t0);
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+}
+
+static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2,
+                                 int ret, int v1, int v2, int check_ret)
+{
+    const char *opn = "mipsdsp multiply";
+    TCGv_i32 t0;
+    TCGv v1_t;
+    TCGv v2_t;
+
+    if ((ret == 0) && (check_ret == 1)) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new_i32();
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    tcg_gen_movi_i32(t0, ret);
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (op1) {
+    /* OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have
+     * the same mask and op1. */
+    case OPC_MULT_G_2E:
+        check_dspr2(ctx);
+        switch (op2) {
+        case  OPC_MUL_PH:
+            gen_helper_mul_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case  OPC_MUL_S_PH:
+            gen_helper_mul_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_S_W:
+            gen_helper_mulq_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_RS_W:
+            gen_helper_mulq_rs_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+    case OPC_DPA_W_PH_DSP:
+        switch (op2) {
+        case OPC_DPAU_H_QBL:
+            check_dsp(ctx);
+            gen_helper_dpau_h_qbl(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAU_H_QBR:
+            check_dsp(ctx);
+            gen_helper_dpau_h_qbr(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSU_H_QBL:
+            check_dsp(ctx);
+            gen_helper_dpsu_h_qbl(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSU_H_QBR:
+            check_dsp(ctx);
+            gen_helper_dpsu_h_qbr(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPA_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpa_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAX_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpax_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAQ_S_W_PH:
+            check_dsp(ctx);
+            gen_helper_dpaq_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAQX_S_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpaqx_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAQX_SA_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpaqx_sa_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPS_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dps_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSX_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpsx_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSQ_S_W_PH:
+            check_dsp(ctx);
+            gen_helper_dpsq_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSQX_S_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpsqx_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSQX_SA_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpsqx_sa_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULSAQ_S_W_PH:
+            check_dsp(ctx);
+            gen_helper_mulsaq_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAQ_SA_L_W:
+            check_dsp(ctx);
+            gen_helper_dpaq_sa_l_w(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSQ_SA_L_W:
+            check_dsp(ctx);
+            gen_helper_dpsq_sa_l_w(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MAQ_S_W_PHL:
+            check_dsp(ctx);
+            gen_helper_maq_s_w_phl(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MAQ_S_W_PHR:
+            check_dsp(ctx);
+            gen_helper_maq_s_w_phr(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MAQ_SA_W_PHL:
+            check_dsp(ctx);
+            gen_helper_maq_sa_w_phl(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MAQ_SA_W_PHR:
+            check_dsp(ctx);
+            gen_helper_maq_sa_w_phr(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULSA_W_PH:
+            check_dspr2(ctx);
+            gen_helper_mulsa_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_DPAQ_W_QH_DSP:
+        {
+            int ac = ret & 0x03;
+            tcg_gen_movi_i32(t0, ac);
+
+            switch (op2) {
+            case OPC_DMADD:
+                check_dsp(ctx);
+                gen_helper_dmadd(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DMADDU:
+                check_dsp(ctx);
+                gen_helper_dmaddu(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DMSUB:
+                check_dsp(ctx);
+                gen_helper_dmsub(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DMSUBU:
+                check_dsp(ctx);
+                gen_helper_dmsubu(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPA_W_QH:
+                check_dspr2(ctx);
+                gen_helper_dpa_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPAQ_S_W_QH:
+                check_dsp(ctx);
+                gen_helper_dpaq_s_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPAQ_SA_L_PW:
+                check_dsp(ctx);
+                gen_helper_dpaq_sa_l_pw(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPAU_H_OBL:
+                check_dsp(ctx);
+                gen_helper_dpau_h_obl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPAU_H_OBR:
+                check_dsp(ctx);
+                gen_helper_dpau_h_obr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPS_W_QH:
+                check_dspr2(ctx);
+                gen_helper_dps_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPSQ_S_W_QH:
+                check_dsp(ctx);
+                gen_helper_dpsq_s_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPSQ_SA_L_PW:
+                check_dsp(ctx);
+                gen_helper_dpsq_sa_l_pw(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPSU_H_OBL:
+                check_dsp(ctx);
+                gen_helper_dpsu_h_obl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPSU_H_OBR:
+                check_dsp(ctx);
+                gen_helper_dpsu_h_obr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_L_PWL:
+                check_dsp(ctx);
+                gen_helper_maq_s_l_pwl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_L_PWR:
+                check_dsp(ctx);
+                gen_helper_maq_s_l_pwr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_W_QHLL:
+                check_dsp(ctx);
+                gen_helper_maq_s_w_qhll(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_SA_W_QHLL:
+                check_dsp(ctx);
+                gen_helper_maq_sa_w_qhll(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_W_QHLR:
+                check_dsp(ctx);
+                gen_helper_maq_s_w_qhlr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_SA_W_QHLR:
+                check_dsp(ctx);
+                gen_helper_maq_sa_w_qhlr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_W_QHRL:
+                check_dsp(ctx);
+                gen_helper_maq_s_w_qhrl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_SA_W_QHRL:
+                check_dsp(ctx);
+                gen_helper_maq_sa_w_qhrl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_W_QHRR:
+                check_dsp(ctx);
+                gen_helper_maq_s_w_qhrr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_SA_W_QHRR:
+                check_dsp(ctx);
+                gen_helper_maq_sa_w_qhrr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MULSAQ_S_L_PW:
+                check_dsp(ctx);
+                gen_helper_mulsaq_s_l_pw(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MULSAQ_S_W_QH:
+                check_dsp(ctx);
+                gen_helper_mulsaq_s_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            }
+        }
+        break;
+#endif
+    case OPC_ADDU_QB_DSP:
+        switch (op2) {
+        case OPC_MULEU_S_PH_QBL:
+            check_dsp(ctx);
+            gen_helper_muleu_s_ph_qbl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEU_S_PH_QBR:
+            check_dsp(ctx);
+            gen_helper_muleu_s_ph_qbr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_RS_PH:
+            check_dsp(ctx);
+            gen_helper_mulq_rs_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEQ_S_W_PHL:
+            check_dsp(ctx);
+            gen_helper_muleq_s_w_phl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEQ_S_W_PHR:
+            check_dsp(ctx);
+            gen_helper_muleq_s_w_phr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_S_PH:
+            check_dspr2(ctx);
+            gen_helper_mulq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_ADDU_OB_DSP:
+        switch (op2) {
+        case OPC_MULEQ_S_PW_QHL:
+            check_dsp(ctx);
+            gen_helper_muleq_s_pw_qhl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEQ_S_PW_QHR:
+            check_dsp(ctx);
+            gen_helper_muleq_s_pw_qhr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEU_S_QH_OBL:
+            check_dsp(ctx);
+            gen_helper_muleu_s_qh_obl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEU_S_QH_OBR:
+            check_dsp(ctx);
+            gen_helper_muleu_s_qh_obr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_RS_QH:
+            check_dsp(ctx);
+            gen_helper_mulq_rs_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free_i32(t0);
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+
+}
+
+static void gen_mipsdsp_bitinsn(DisasContext *ctx, uint32_t op1, uint32_t op2,
+                                int ret, int val)
+{
+    const char *opn = "mipsdsp Bit/ Manipulation";
+    int16_t imm;
+    TCGv t0;
+    TCGv val_t;
+
+    if (ret == 0) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    val_t = tcg_temp_new();
+    gen_load_gpr(val_t, val);
+
+    switch (op1) {
+    case OPC_ABSQ_S_PH_DSP:
+        switch (op2) {
+        case OPC_BITREV:
+            check_dsp(ctx);
+            gen_helper_bitrev(cpu_gpr[ret], val_t);
+            break;
+        case OPC_REPL_QB:
+            check_dsp(ctx);
+            {
+                target_long result;
+                imm = (ctx->opcode >> 16) & 0xFF;
+                result = (uint32_t)imm << 24 |
+                         (uint32_t)imm << 16 |
+                         (uint32_t)imm << 8  |
+                         (uint32_t)imm;
+                result = (int32_t)result;
+                tcg_gen_movi_tl(cpu_gpr[ret], result);
+            }
+            break;
+        case OPC_REPLV_QB:
+            check_dsp(ctx);
+            tcg_gen_ext8u_tl(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 8);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            break;
+        case OPC_REPL_PH:
+            check_dsp(ctx);
+            {
+                imm = (ctx->opcode >> 16) & 0x03FF;
+                imm = (int16_t)(imm << 6) >> 6;
+                tcg_gen_movi_tl(cpu_gpr[ret], \
+                                (target_long)((int32_t)imm << 16 | \
+                                (uint16_t)imm));
+            }
+            break;
+        case OPC_REPLV_PH:
+            check_dsp(ctx);
+            tcg_gen_ext16u_tl(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_ABSQ_S_QH_DSP:
+        switch (op2) {
+        case OPC_REPL_OB:
+            check_dsp(ctx);
+            {
+                target_long temp;
+
+                imm = (ctx->opcode >> 16) & 0xFF;
+                temp = ((uint64_t)imm << 8) | (uint64_t)imm;
+                temp = (temp << 16) | temp;
+                temp = (temp << 32) | temp;
+                tcg_gen_movi_tl(cpu_gpr[ret], temp);
+                break;
+            }
+        case OPC_REPL_PW:
+            check_dsp(ctx);
+            {
+                target_long temp;
+
+                imm = (ctx->opcode >> 16) & 0x03FF;
+                imm = (int16_t)(imm << 6) >> 6;
+                temp = ((target_long)imm << 32) \
+                       | ((target_long)imm & 0xFFFFFFFF);
+                tcg_gen_movi_tl(cpu_gpr[ret], temp);
+                break;
+            }
+        case OPC_REPL_QH:
+            check_dsp(ctx);
+            {
+                target_long temp;
+
+                imm = (ctx->opcode >> 16) & 0x03FF;
+                imm = (int16_t)(imm << 6) >> 6;
+
+                temp = ((uint64_t)(uint16_t)imm << 48) |
+                       ((uint64_t)(uint16_t)imm << 32) |
+                       ((uint64_t)(uint16_t)imm << 16) |
+                       (uint64_t)(uint16_t)imm;
+                tcg_gen_movi_tl(cpu_gpr[ret], temp);
+                break;
+            }
+        case OPC_REPLV_OB:
+            check_dsp(ctx);
+            tcg_gen_ext8u_tl(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 8);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 32);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            break;
+        case OPC_REPLV_PW:
+            check_dsp(ctx);
+            tcg_gen_ext32u_i64(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 32);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            break;
+        case OPC_REPLV_QH:
+            check_dsp(ctx);
+            tcg_gen_ext16u_tl(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 32);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            break;
+        }
+        break;
+#endif
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(val_t);
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+}
+
+static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx,
+                                     uint32_t op1, uint32_t op2,
+                                     int ret, int v1, int v2, int check_ret)
+{
+    const char *opn = "mipsdsp add compare pick";
+    TCGv t1;
+    TCGv v1_t;
+    TCGv v2_t;
+
+    if ((ret == 0) && (check_ret == 1)) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t1 = tcg_temp_new();
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (op1) {
+    case OPC_CMPU_EQ_QB_DSP:
+        switch (op2) {
+        case OPC_CMPU_EQ_QB:
+            check_dsp(ctx);
+            gen_helper_cmpu_eq_qb(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPU_LT_QB:
+            check_dsp(ctx);
+            gen_helper_cmpu_lt_qb(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPU_LE_QB:
+            check_dsp(ctx);
+            gen_helper_cmpu_le_qb(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGU_EQ_QB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_eq_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGU_LT_QB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_lt_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGU_LE_QB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_le_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGDU_EQ_QB:
+            check_dspr2(ctx);
+            gen_helper_cmpgu_eq_qb(t1, v1_t, v2_t);
+            tcg_gen_mov_tl(cpu_gpr[ret], t1);
+            tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
+            tcg_gen_shli_tl(t1, t1, 24);
+            tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
+            break;
+        case OPC_CMPGDU_LT_QB:
+            check_dspr2(ctx);
+            gen_helper_cmpgu_lt_qb(t1, v1_t, v2_t);
+            tcg_gen_mov_tl(cpu_gpr[ret], t1);
+            tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
+            tcg_gen_shli_tl(t1, t1, 24);
+            tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
+            break;
+        case OPC_CMPGDU_LE_QB:
+            check_dspr2(ctx);
+            gen_helper_cmpgu_le_qb(t1, v1_t, v2_t);
+            tcg_gen_mov_tl(cpu_gpr[ret], t1);
+            tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
+            tcg_gen_shli_tl(t1, t1, 24);
+            tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
+            break;
+        case OPC_CMP_EQ_PH:
+            check_dsp(ctx);
+            gen_helper_cmp_eq_ph(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LT_PH:
+            check_dsp(ctx);
+            gen_helper_cmp_lt_ph(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LE_PH:
+            check_dsp(ctx);
+            gen_helper_cmp_le_ph(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PICK_QB:
+            check_dsp(ctx);
+            gen_helper_pick_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PICK_PH:
+            check_dsp(ctx);
+            gen_helper_pick_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PACKRL_PH:
+            check_dsp(ctx);
+            gen_helper_packrl_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_CMPU_EQ_OB_DSP:
+        switch (op2) {
+        case OPC_CMP_EQ_PW:
+            check_dsp(ctx);
+            gen_helper_cmp_eq_pw(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LT_PW:
+            check_dsp(ctx);
+            gen_helper_cmp_lt_pw(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LE_PW:
+            check_dsp(ctx);
+            gen_helper_cmp_le_pw(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_EQ_QH:
+            check_dsp(ctx);
+            gen_helper_cmp_eq_qh(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LT_QH:
+            check_dsp(ctx);
+            gen_helper_cmp_lt_qh(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LE_QH:
+            check_dsp(ctx);
+            gen_helper_cmp_le_qh(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGDU_EQ_OB:
+            check_dspr2(ctx);
+            gen_helper_cmpgdu_eq_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGDU_LT_OB:
+            check_dspr2(ctx);
+            gen_helper_cmpgdu_lt_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGDU_LE_OB:
+            check_dspr2(ctx);
+            gen_helper_cmpgdu_le_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGU_EQ_OB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_eq_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGU_LT_OB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_lt_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGU_LE_OB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_le_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPU_EQ_OB:
+            check_dsp(ctx);
+            gen_helper_cmpu_eq_ob(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPU_LT_OB:
+            check_dsp(ctx);
+            gen_helper_cmpu_lt_ob(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPU_LE_OB:
+            check_dsp(ctx);
+            gen_helper_cmpu_le_ob(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PACKRL_PW:
+            check_dsp(ctx);
+            gen_helper_packrl_pw(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PICK_OB:
+            check_dsp(ctx);
+            gen_helper_pick_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PICK_PW:
+            check_dsp(ctx);
+            gen_helper_pick_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PICK_QH:
+            check_dsp(ctx);
+            gen_helper_pick_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free(t1);
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+}
+
+static void gen_mipsdsp_append(CPUMIPSState *env, DisasContext *ctx,
+                               uint32_t op1, int rt, int rs, int sa)
+{
+    const char *opn = "mipsdsp append/dappend";
+    TCGv t0;
+
+    check_dspr2(ctx);
+
+    if (rt == 0) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+
+    switch (op1) {
+    case OPC_APPEND_DSP:
+        switch (MASK_APPEND(ctx->opcode)) {
+        case OPC_APPEND:
+            if (sa != 0) {
+                tcg_gen_deposit_tl(cpu_gpr[rt], t0, cpu_gpr[rt], sa, 32 - sa);
+            }
+            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
+            break;
+        case OPC_PREPEND:
+            if (sa != 0) {
+                tcg_gen_ext32u_tl(cpu_gpr[rt], cpu_gpr[rt]);
+                tcg_gen_shri_tl(cpu_gpr[rt], cpu_gpr[rt], sa);
+                tcg_gen_shli_tl(t0, t0, 32 - sa);
+                tcg_gen_or_tl(cpu_gpr[rt], cpu_gpr[rt], t0);
+            }
+            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
+            break;
+        case OPC_BALIGN:
+            sa &= 3;
+            if (sa != 0 && sa != 2) {
+                tcg_gen_shli_tl(cpu_gpr[rt], cpu_gpr[rt], 8 * sa);
+                tcg_gen_ext32u_tl(t0, t0);
+                tcg_gen_shri_tl(t0, t0, 8 * (4 - sa));
+                tcg_gen_or_tl(cpu_gpr[rt], cpu_gpr[rt], t0);
+            }
+            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK APPEND");
+            generate_exception(ctx, EXCP_RI);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_DAPPEND_DSP:
+        switch (MASK_DAPPEND(ctx->opcode)) {
+        case OPC_DAPPEND:
+            if (sa != 0) {
+                tcg_gen_deposit_tl(cpu_gpr[rt], t0, cpu_gpr[rt], sa, 64 - sa);
+            }
+            break;
+        case OPC_PREPENDD:
+            tcg_gen_shri_tl(cpu_gpr[rt], cpu_gpr[rt], 0x20 | sa);
+            tcg_gen_shli_tl(t0, t0, 64 - (0x20 | sa));
+            tcg_gen_or_tl(cpu_gpr[rt], t0, t0);
+            break;
+        case OPC_PREPENDW:
+            if (sa != 0) {
+                tcg_gen_shri_tl(cpu_gpr[rt], cpu_gpr[rt], sa);
+                tcg_gen_shli_tl(t0, t0, 64 - sa);
+                tcg_gen_or_tl(cpu_gpr[rt], cpu_gpr[rt], t0);
+            }
+            break;
+        case OPC_DBALIGN:
+            sa &= 7;
+            if (sa != 0 && sa != 2 && sa != 4) {
+                tcg_gen_shli_tl(cpu_gpr[rt], cpu_gpr[rt], 8 * sa);
+                tcg_gen_shri_tl(t0, t0, 8 * (8 - sa));
+                tcg_gen_or_tl(cpu_gpr[rt], cpu_gpr[rt], t0);
+            }
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK DAPPEND");
+            generate_exception(ctx, EXCP_RI);
+            break;
+        }
+        break;
+#endif
+    }
+    tcg_temp_free(t0);
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+}
+
+static void gen_mipsdsp_accinsn(DisasContext *ctx, uint32_t op1, uint32_t op2,
+                                int ret, int v1, int v2, int check_ret)
+
+{
+    const char *opn = "mipsdsp accumulator";
+    TCGv t0;
+    TCGv t1;
+    TCGv v1_t;
+    TCGv v2_t;
+    int16_t imm;
+
+    if ((ret == 0) && (check_ret == 1)) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (op1) {
+    case OPC_EXTR_W_DSP:
+        check_dsp(ctx);
+        switch (op2) {
+        case OPC_EXTR_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extr_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTR_R_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extr_r_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTR_RS_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extr_rs_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTR_S_H:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extr_s_h(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTRV_S_H:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extr_s_h(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTRV_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extr_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTRV_R_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extr_r_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTRV_RS_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extr_rs_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTP:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extp(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTPV:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extp(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTPDP:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extpdp(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTPDPV:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extpdp(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_SHILO:
+            imm = (ctx->opcode >> 20) & 0x3F;
+            tcg_gen_movi_tl(t0, ret);
+            tcg_gen_movi_tl(t1, imm);
+            gen_helper_shilo(t0, t1, cpu_env);
+            break;
+        case OPC_SHILOV:
+            tcg_gen_movi_tl(t0, ret);
+            gen_helper_shilo(t0, v1_t, cpu_env);
+            break;
+        case OPC_MTHLIP:
+            tcg_gen_movi_tl(t0, ret);
+            gen_helper_mthlip(t0, v1_t, cpu_env);
+            break;
+        case OPC_WRDSP:
+            imm = (ctx->opcode >> 11) & 0x3FF;
+            tcg_gen_movi_tl(t0, imm);
+            gen_helper_wrdsp(v1_t, t0, cpu_env);
+            break;
+        case OPC_RDDSP:
+            imm = (ctx->opcode >> 16) & 0x03FF;
+            tcg_gen_movi_tl(t0, imm);
+            gen_helper_rddsp(cpu_gpr[ret], t0, cpu_env);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_DEXTR_W_DSP:
+        check_dsp(ctx);
+        switch (op2) {
+        case OPC_DMTHLIP:
+            tcg_gen_movi_tl(t0, ret);
+            gen_helper_dmthlip(v1_t, t0, cpu_env);
+            break;
+        case OPC_DSHILO:
+            {
+                int shift = (ctx->opcode >> 19) & 0x7F;
+                int ac = (ctx->opcode >> 11) & 0x03;
+                tcg_gen_movi_tl(t0, shift);
+                tcg_gen_movi_tl(t1, ac);
+                gen_helper_dshilo(t0, t1, cpu_env);
+                break;
+            }
+        case OPC_DSHILOV:
+            {
+                int ac = (ctx->opcode >> 11) & 0x03;
+                tcg_gen_movi_tl(t0, ac);
+                gen_helper_dshilo(v1_t, t0, cpu_env);
+                break;
+            }
+        case OPC_DEXTP:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+
+            gen_helper_dextp(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTPV:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextp(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTPDP:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextpdp(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTPDPV:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextpdp(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTR_L:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_l(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_R_L:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_r_l(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_RS_L:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_rs_l(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_R_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_r_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_RS_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_rs_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_S_H:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_s_h(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTRV_S_H:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_s_h(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTRV_L:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_l(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_R_L:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_r_l(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_RS_L:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_rs_l(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_R_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_r_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_RS_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_rs_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+}
+
+/* End MIPSDSP functions. */
+
 static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
 {
     int32_t offset;
@@ -8385,7 +14464,7 @@
     }
 
     /* Handle blikely not taken case */
-    if ((ctx->hflags & MIPS_HFLAG_BMASK) == MIPS_HFLAG_BL) {
+    if ((ctx->hflags & MIPS_HFLAG_BMASK_BASE) == MIPS_HFLAG_BL) {
         int l1 = gen_new_label();
 
         MIPS_DEBUG("blikely condition (" TARGET_FMT_lx ")", ctx->pc + 4);
@@ -8395,8 +14474,9 @@
         gen_set_label(l1);
     }
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(ctx->pc);
+    }
 
     op = MASK_OP_MAJOR(ctx->opcode);
     rs = (ctx->opcode >> 21) & 0x1f;
@@ -8410,61 +14490,98 @@
         switch (op1) {
         case OPC_SLL:          /* Shift with immediate */
         case OPC_SRA:
+            gen_shift_imm(ctx, op1, rd, rt, sa);
+            break;
         case OPC_SRL:
-            gen_shift_imm(env, ctx, op1, rd, rt, sa);
+            switch ((ctx->opcode >> 21) & 0x1f) {
+            case 1:
+                /* rotr is decoded as srl on non-R2 CPUs */
+                if (ctx->insn_flags & ISA_MIPS32R2) {
+                    op1 = OPC_ROTR;
+                }
+                /* Fallthrough */
+            case 0:
+                gen_shift_imm(ctx, op1, rd, rt, sa);
+                break;
+            default:
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
             break;
         case OPC_MOVN:         /* Conditional move */
         case OPC_MOVZ:
-            check_insn(env, ctx, ISA_MIPS4 | ISA_MIPS32);
-            gen_cond_move(env, op1, rd, rs, rt);
+            check_insn(ctx, ISA_MIPS4 | ISA_MIPS32 |
+                                 INSN_LOONGSON2E | INSN_LOONGSON2F);
+            gen_cond_move(ctx, op1, rd, rs, rt);
             break;
         case OPC_ADD ... OPC_SUBU:
-            gen_arith(env, ctx, op1, rd, rs, rt);
+            gen_arith(ctx, op1, rd, rs, rt);
             break;
         case OPC_SLLV:         /* Shifts */
-        case OPC_SRLV:
         case OPC_SRAV:
-            gen_shift(env, ctx, op1, rd, rs, rt);
+            gen_shift(ctx, op1, rd, rs, rt);
+            break;
+        case OPC_SRLV:
+            switch ((ctx->opcode >> 6) & 0x1f) {
+            case 1:
+                /* rotrv is decoded as srlv on non-R2 CPUs */
+                if (ctx->insn_flags & ISA_MIPS32R2) {
+                    op1 = OPC_ROTRV;
+                }
+                /* Fallthrough */
+            case 0:
+                gen_shift(ctx, op1, rd, rs, rt);
+                break;
+            default:
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
             break;
         case OPC_SLT:          /* Set on less than */
         case OPC_SLTU:
-            gen_slt(env, op1, rd, rs, rt);
+            gen_slt(ctx, op1, rd, rs, rt);
             break;
         case OPC_AND:          /* Logic*/
         case OPC_OR:
         case OPC_NOR:
         case OPC_XOR:
-            gen_logic(env, op1, rd, rs, rt);
+            gen_logic(ctx, op1, rd, rs, rt);
             break;
-        case OPC_MULT ... OPC_DIVU:
+        case OPC_MULT:
+        case OPC_MULTU:
             if (sa) {
-                check_insn(env, ctx, INSN_VR54XX);
+                check_insn(ctx, INSN_VR54XX);
                 op1 = MASK_MUL_VR54XX(ctx->opcode);
                 gen_mul_vr54xx(ctx, op1, rd, rs, rt);
-            } else
-                gen_muldiv(ctx, op1, rs, rt);
+            } else {
+                gen_muldiv(ctx, op1, rd & 3, rs, rt);
+            }
+            break;
+        case OPC_DIV:
+        case OPC_DIVU:
+            gen_muldiv(ctx, op1, 0, rs, rt);
             break;
         case OPC_JR ... OPC_JALR:
-            gen_compute_branch(ctx, op1, rs, rd, sa);
-            return;
+            gen_compute_branch(ctx, op1, 4, rs, rd, sa);
+            break;
         case OPC_TGE ... OPC_TEQ: /* Traps */
         case OPC_TNE:
             gen_trap(ctx, op1, rs, rt, -1);
             break;
         case OPC_MFHI:          /* Move from HI/LO */
         case OPC_MFLO:
-            gen_HILO(ctx, op1, rd);
+            gen_HILO(ctx, op1, rs & 3, rd);
             break;
         case OPC_MTHI:
         case OPC_MTLO:          /* Move to HI/LO */
-            gen_HILO(ctx, op1, rs);
+            gen_HILO(ctx, op1, rd & 3, rs);
             break;
         case OPC_PMON:          /* Pmon entry point, also R4010 selsl */
 #ifdef MIPS_STRICT_STANDARD
             MIPS_INVAL("PMON / selsl");
             generate_exception(ctx, EXCP_RI);
 #else
-            gen_helper_1i(pmon, cpu_env, sa);
+            gen_helper_0e0i(pmon, sa);
 #endif
             break;
         case OPC_SYSCALL:
@@ -8489,7 +14606,7 @@
             break;
 
         case OPC_MOVCI:
-            check_insn(env, ctx, ISA_MIPS4 | ISA_MIPS32);
+            check_insn(ctx, ISA_MIPS4 | ISA_MIPS32);
             if (env->CP0_Config1 & (1 << CP0C1_FP)) {
                 check_cp1_enabled(ctx);
                 gen_movci(ctx, rd, rs, (ctx->opcode >> 18) & 0x7,
@@ -8503,30 +14620,81 @@
        /* MIPS64 specific opcodes */
         case OPC_DSLL:
         case OPC_DSRA:
-        case OPC_DSRL:
         case OPC_DSLL32:
         case OPC_DSRA32:
-        case OPC_DSRL32:
-            check_insn(env, ctx, ISA_MIPS3);
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
-            gen_shift_imm(env, ctx, op1, rd, rt, sa);
+            gen_shift_imm(ctx, op1, rd, rt, sa);
+            break;
+        case OPC_DSRL:
+            switch ((ctx->opcode >> 21) & 0x1f) {
+            case 1:
+                /* drotr is decoded as dsrl on non-R2 CPUs */
+                if (ctx->insn_flags & ISA_MIPS32R2) {
+                    op1 = OPC_DROTR;
+                }
+                /* Fallthrough */
+            case 0:
+                check_insn(ctx, ISA_MIPS3);
+                check_mips_64(ctx);
+                gen_shift_imm(ctx, op1, rd, rt, sa);
+                break;
+            default:
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_DSRL32:
+            switch ((ctx->opcode >> 21) & 0x1f) {
+            case 1:
+                /* drotr32 is decoded as dsrl32 on non-R2 CPUs */
+                if (ctx->insn_flags & ISA_MIPS32R2) {
+                    op1 = OPC_DROTR32;
+                }
+                /* Fallthrough */
+            case 0:
+                check_insn(ctx, ISA_MIPS3);
+                check_mips_64(ctx);
+                gen_shift_imm(ctx, op1, rd, rt, sa);
+                break;
+            default:
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
             break;
         case OPC_DADD ... OPC_DSUBU:
-            check_insn(env, ctx, ISA_MIPS3);
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
-            gen_arith(env, ctx, op1, rd, rs, rt);
+            gen_arith(ctx, op1, rd, rs, rt);
             break;
         case OPC_DSLLV:
         case OPC_DSRAV:
-        case OPC_DSRLV:
-            check_insn(env, ctx, ISA_MIPS3);
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
-            gen_shift(env, ctx, op1, rd, rs, rt);
+            gen_shift(ctx, op1, rd, rs, rt);
+            break;
+        case OPC_DSRLV:
+            switch ((ctx->opcode >> 6) & 0x1f) {
+            case 1:
+                /* drotrv is decoded as dsrlv on non-R2 CPUs */
+                if (ctx->insn_flags & ISA_MIPS32R2) {
+                    op1 = OPC_DROTRV;
+                }
+                /* Fallthrough */
+            case 0:
+                check_insn(ctx, ISA_MIPS3);
+                check_mips_64(ctx);
+                gen_shift(ctx, op1, rd, rs, rt);
+                break;
+            default:
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
             break;
         case OPC_DMULT ... OPC_DDIVU:
-            check_insn(env, ctx, ISA_MIPS3);
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
-            gen_muldiv(ctx, op1, rs, rt);
+            gen_muldiv(ctx, op1, 0, rs, rt);
             break;
 #endif
         default:            /* Invalid */
@@ -8540,22 +14708,22 @@
         switch (op1) {
         case OPC_MADD ... OPC_MADDU: /* Multiply and add/sub */
         case OPC_MSUB ... OPC_MSUBU:
-            check_insn(env, ctx, ISA_MIPS32);
-            gen_muldiv(ctx, op1, rs, rt);
+            check_insn(ctx, ISA_MIPS32);
+            gen_muldiv(ctx, op1, rd & 3, rs, rt);
             break;
         case OPC_MUL:
-            gen_arith(env, ctx, op1, rd, rs, rt);
+            gen_arith(ctx, op1, rd, rs, rt);
             break;
         case OPC_CLO:
         case OPC_CLZ:
-            check_insn(env, ctx, ISA_MIPS32);
+            check_insn(ctx, ISA_MIPS32);
             gen_cl(ctx, op1, rd, rs);
             break;
         case OPC_SDBBP:
             /* XXX: not clear which exception should be raised
              *      when in debug mode...
              */
-            check_insn(env, ctx, ISA_MIPS32);
+            check_insn(ctx, ISA_MIPS32);
             if (!(ctx->hflags & MIPS_HFLAG_DM)) {
                 generate_exception(ctx, EXCP_DBp);
             } else {
@@ -8563,13 +14731,31 @@
             }
             /* Treat as NOP. */
             break;
+        case OPC_DIV_G_2F:
+        case OPC_DIVU_G_2F:
+        case OPC_MULT_G_2F:
+        case OPC_MULTU_G_2F:
+        case OPC_MOD_G_2F:
+        case OPC_MODU_G_2F:
+            check_insn(ctx, INSN_LOONGSON2F);
+            gen_loongson_integer(ctx, op1, rd, rs, rt);
+            break;
 #if defined(TARGET_MIPS64)
         case OPC_DCLO:
         case OPC_DCLZ:
-            check_insn(env, ctx, ISA_MIPS64);
+            check_insn(ctx, ISA_MIPS64);
             check_mips_64(ctx);
             gen_cl(ctx, op1, rd, rs);
             break;
+        case OPC_DMULT_G_2F:
+        case OPC_DMULTU_G_2F:
+        case OPC_DDIV_G_2F:
+        case OPC_DDIVU_G_2F:
+        case OPC_DMOD_G_2F:
+        case OPC_DMODU_G_2F:
+            check_insn(ctx, INSN_LOONGSON2F);
+            gen_loongson_integer(ctx, op1, rd, rs, rt);
+            break;
 #endif
         default:            /* Invalid */
             MIPS_INVAL("special2");
@@ -8582,59 +14768,19 @@
         switch (op1) {
         case OPC_EXT:
         case OPC_INS:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_bitops(ctx, op1, rt, rs, sa, rd);
             break;
         case OPC_BSHFL:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             op2 = MASK_BSHFL(ctx->opcode);
             gen_bshfl(ctx, op2, rt, rd);
             break;
         case OPC_RDHWR:
-            check_insn(env, ctx, ISA_MIPS32R2);
-            {
-                TCGv t0 = tcg_temp_new();
-
-                switch (rd) {
-                case 0:
-                    save_cpu_state(ctx, 1);
-                    gen_helper_rdhwr_cpunum(t0, cpu_env);
-                    gen_store_gpr(t0, rt);
-                    break;
-                case 1:
-                    save_cpu_state(ctx, 1);
-                    gen_helper_rdhwr_synci_step(t0, cpu_env);
-                    gen_store_gpr(t0, rt);
-                    break;
-                case 2:
-                    save_cpu_state(ctx, 1);
-                    gen_helper_rdhwr_cc(t0, cpu_env);
-                    gen_store_gpr(t0, rt);
-                    break;
-                case 3:
-                    save_cpu_state(ctx, 1);
-                    gen_helper_rdhwr_ccres(t0, cpu_env);
-                    gen_store_gpr(t0, rt);
-                    break;
-                case 29:
-#if defined(CONFIG_USER_ONLY)
-                    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUMIPSState, tls_value));
-                    gen_store_gpr(t0, rt);
-                    break;
-#else
-                    /* XXX: Some CPUs implement this in hardware.
-                       Not supported yet. */
-#endif
-                default:            /* Invalid */
-                    MIPS_INVAL("rdhwr");
-                    generate_exception(ctx, EXCP_RI);
-                    break;
-                }
-                tcg_temp_free(t0);
-            }
+            gen_rdhwr(ctx, rt, rd);
             break;
         case OPC_FORK:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             {
                 TCGv t0 = tcg_temp_new();
                 TCGv t1 = tcg_temp_new();
@@ -8647,7 +14793,7 @@
             }
             break;
         case OPC_YIELD:
-            check_insn(env, ctx, ASE_MT);
+            check_insn(ctx, ASE_MT);
             {
                 TCGv t0 = tcg_temp_new();
 
@@ -8658,19 +14804,518 @@
                 tcg_temp_free(t0);
             }
             break;
+        case OPC_DIV_G_2E ... OPC_DIVU_G_2E:
+        case OPC_MOD_G_2E ... OPC_MODU_G_2E:
+        case OPC_MULT_G_2E ... OPC_MULTU_G_2E:
+        /* OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have
+         * the same mask and op1. */
+            if ((ctx->insn_flags & ASE_DSPR2) && (op1 == OPC_MULT_G_2E)) {
+                op2 = MASK_ADDUH_QB(ctx->opcode);
+                switch (op2) {
+                case OPC_ADDUH_QB:
+                case OPC_ADDUH_R_QB:
+                case OPC_ADDQH_PH:
+                case OPC_ADDQH_R_PH:
+                case OPC_ADDQH_W:
+                case OPC_ADDQH_R_W:
+                case OPC_SUBUH_QB:
+                case OPC_SUBUH_R_QB:
+                case OPC_SUBQH_PH:
+                case OPC_SUBQH_R_PH:
+                case OPC_SUBQH_W:
+                case OPC_SUBQH_R_W:
+                    gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                    break;
+                case OPC_MUL_PH:
+                case OPC_MUL_S_PH:
+                case OPC_MULQ_S_W:
+                case OPC_MULQ_RS_W:
+                    gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 1);
+                    break;
+                default:
+                    MIPS_INVAL("MASK ADDUH.QB");
+                    generate_exception(ctx, EXCP_RI);
+                    break;
+                }
+            } else if (ctx->insn_flags & INSN_LOONGSON2E) {
+                gen_loongson_integer(ctx, op1, rd, rs, rt);
+            } else {
+                generate_exception(ctx, EXCP_RI);
+            }
+            break;
+        case OPC_LX_DSP:
+            op2 = MASK_LX(ctx->opcode);
+            switch (op2) {
+#if defined(TARGET_MIPS64)
+            case OPC_LDX:
+#endif
+            case OPC_LBUX:
+            case OPC_LHX:
+            case OPC_LWX:
+                gen_mipsdsp_ld(ctx, op2, rd, rs, rt);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK LX");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_ABSQ_S_PH_DSP:
+            op2 = MASK_ABSQ_S_PH(ctx->opcode);
+            switch (op2) {
+            case OPC_ABSQ_S_QB:
+            case OPC_ABSQ_S_PH:
+            case OPC_ABSQ_S_W:
+            case OPC_PRECEQ_W_PHL:
+            case OPC_PRECEQ_W_PHR:
+            case OPC_PRECEQU_PH_QBL:
+            case OPC_PRECEQU_PH_QBR:
+            case OPC_PRECEQU_PH_QBLA:
+            case OPC_PRECEQU_PH_QBRA:
+            case OPC_PRECEU_PH_QBL:
+            case OPC_PRECEU_PH_QBR:
+            case OPC_PRECEU_PH_QBLA:
+            case OPC_PRECEU_PH_QBRA:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_BITREV:
+            case OPC_REPL_QB:
+            case OPC_REPLV_QB:
+            case OPC_REPL_PH:
+            case OPC_REPLV_PH:
+                gen_mipsdsp_bitinsn(ctx, op1, op2, rd, rt);
+                break;
+            default:
+                MIPS_INVAL("MASK ABSQ_S.PH");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_ADDU_QB_DSP:
+            op2 = MASK_ADDU_QB(ctx->opcode);
+            switch (op2) {
+            case OPC_ADDQ_PH:
+            case OPC_ADDQ_S_PH:
+            case OPC_ADDQ_S_W:
+            case OPC_ADDU_QB:
+            case OPC_ADDU_S_QB:
+            case OPC_ADDU_PH:
+            case OPC_ADDU_S_PH:
+            case OPC_SUBQ_PH:
+            case OPC_SUBQ_S_PH:
+            case OPC_SUBQ_S_W:
+            case OPC_SUBU_QB:
+            case OPC_SUBU_S_QB:
+            case OPC_SUBU_PH:
+            case OPC_SUBU_S_PH:
+            case OPC_ADDSC:
+            case OPC_ADDWC:
+            case OPC_MODSUB:
+            case OPC_RADDU_W_QB:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_MULEU_S_PH_QBL:
+            case OPC_MULEU_S_PH_QBR:
+            case OPC_MULQ_RS_PH:
+            case OPC_MULEQ_S_W_PHL:
+            case OPC_MULEQ_S_W_PHR:
+            case OPC_MULQ_S_PH:
+                gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 1);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK ADDU.QB");
+                generate_exception(ctx, EXCP_RI);
+                break;
+
+            }
+            break;
+        case OPC_CMPU_EQ_QB_DSP:
+            op2 = MASK_CMPU_EQ_QB(ctx->opcode);
+            switch (op2) {
+            case OPC_PRECR_SRA_PH_W:
+            case OPC_PRECR_SRA_R_PH_W:
+                gen_mipsdsp_arith(ctx, op1, op2, rt, rs, rd);
+                break;
+            case OPC_PRECR_QB_PH:
+            case OPC_PRECRQ_QB_PH:
+            case OPC_PRECRQ_PH_W:
+            case OPC_PRECRQ_RS_PH_W:
+            case OPC_PRECRQU_S_QB_PH:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_CMPU_EQ_QB:
+            case OPC_CMPU_LT_QB:
+            case OPC_CMPU_LE_QB:
+            case OPC_CMP_EQ_PH:
+            case OPC_CMP_LT_PH:
+            case OPC_CMP_LE_PH:
+                gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            case OPC_CMPGU_EQ_QB:
+            case OPC_CMPGU_LT_QB:
+            case OPC_CMPGU_LE_QB:
+            case OPC_CMPGDU_EQ_QB:
+            case OPC_CMPGDU_LT_QB:
+            case OPC_CMPGDU_LE_QB:
+            case OPC_PICK_QB:
+            case OPC_PICK_PH:
+            case OPC_PACKRL_PH:
+                gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 1);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK CMPU.EQ.QB");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_SHLL_QB_DSP:
+            gen_mipsdsp_shift(ctx, op1, rd, rs, rt);
+            break;
+        case OPC_DPA_W_PH_DSP:
+            op2 = MASK_DPA_W_PH(ctx->opcode);
+            switch (op2) {
+            case OPC_DPAU_H_QBL:
+            case OPC_DPAU_H_QBR:
+            case OPC_DPSU_H_QBL:
+            case OPC_DPSU_H_QBR:
+            case OPC_DPA_W_PH:
+            case OPC_DPAX_W_PH:
+            case OPC_DPAQ_S_W_PH:
+            case OPC_DPAQX_S_W_PH:
+            case OPC_DPAQX_SA_W_PH:
+            case OPC_DPS_W_PH:
+            case OPC_DPSX_W_PH:
+            case OPC_DPSQ_S_W_PH:
+            case OPC_DPSQX_S_W_PH:
+            case OPC_DPSQX_SA_W_PH:
+            case OPC_MULSAQ_S_W_PH:
+            case OPC_DPAQ_SA_L_W:
+            case OPC_DPSQ_SA_L_W:
+            case OPC_MAQ_S_W_PHL:
+            case OPC_MAQ_S_W_PHR:
+            case OPC_MAQ_SA_W_PHL:
+            case OPC_MAQ_SA_W_PHR:
+            case OPC_MULSA_W_PH:
+                gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK DPAW.PH");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_INSV_DSP:
+            op2 = MASK_INSV(ctx->opcode);
+            switch (op2) {
+            case OPC_INSV:
+                check_dsp(ctx);
+                {
+                    TCGv t0, t1;
+
+                    if (rt == 0) {
+                        MIPS_DEBUG("NOP");
+                        break;
+                    }
+
+                    t0 = tcg_temp_new();
+                    t1 = tcg_temp_new();
+
+                    gen_load_gpr(t0, rt);
+                    gen_load_gpr(t1, rs);
+
+                    gen_helper_insv(cpu_gpr[rt], cpu_env, t1, t0);
+
+                    tcg_temp_free(t0);
+                    tcg_temp_free(t1);
+                    break;
+                }
+            default:            /* Invalid */
+                MIPS_INVAL("MASK INSV");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_APPEND_DSP:
+            gen_mipsdsp_append(env, ctx, op1, rt, rs, rd);
+            break;
+        case OPC_EXTR_W_DSP:
+            op2 = MASK_EXTR_W(ctx->opcode);
+            switch (op2) {
+            case OPC_EXTR_W:
+            case OPC_EXTR_R_W:
+            case OPC_EXTR_RS_W:
+            case OPC_EXTR_S_H:
+            case OPC_EXTRV_S_H:
+            case OPC_EXTRV_W:
+            case OPC_EXTRV_R_W:
+            case OPC_EXTRV_RS_W:
+            case OPC_EXTP:
+            case OPC_EXTPV:
+            case OPC_EXTPDP:
+            case OPC_EXTPDPV:
+                gen_mipsdsp_accinsn(ctx, op1, op2, rt, rs, rd, 1);
+                break;
+            case OPC_RDDSP:
+                gen_mipsdsp_accinsn(ctx, op1, op2, rd, rs, rt, 1);
+                break;
+            case OPC_SHILO:
+            case OPC_SHILOV:
+            case OPC_MTHLIP:
+            case OPC_WRDSP:
+                gen_mipsdsp_accinsn(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK EXTR.W");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
 #if defined(TARGET_MIPS64)
         case OPC_DEXTM ... OPC_DEXT:
         case OPC_DINSM ... OPC_DINS:
-            check_insn(env, ctx, ISA_MIPS64R2);
+            check_insn(ctx, ISA_MIPS64R2);
             check_mips_64(ctx);
             gen_bitops(ctx, op1, rt, rs, sa, rd);
             break;
         case OPC_DBSHFL:
-            check_insn(env, ctx, ISA_MIPS64R2);
+            check_insn(ctx, ISA_MIPS64R2);
             check_mips_64(ctx);
             op2 = MASK_DBSHFL(ctx->opcode);
             gen_bshfl(ctx, op2, rt, rd);
             break;
+        case OPC_DDIV_G_2E ... OPC_DDIVU_G_2E:
+        case OPC_DMULT_G_2E ... OPC_DMULTU_G_2E:
+        case OPC_DMOD_G_2E ... OPC_DMODU_G_2E:
+            check_insn(ctx, INSN_LOONGSON2E);
+            gen_loongson_integer(ctx, op1, rd, rs, rt);
+            break;
+        case OPC_ABSQ_S_QH_DSP:
+            op2 = MASK_ABSQ_S_QH(ctx->opcode);
+            switch (op2) {
+            case OPC_PRECEQ_L_PWL:
+            case OPC_PRECEQ_L_PWR:
+            case OPC_PRECEQ_PW_QHL:
+            case OPC_PRECEQ_PW_QHR:
+            case OPC_PRECEQ_PW_QHLA:
+            case OPC_PRECEQ_PW_QHRA:
+            case OPC_PRECEQU_QH_OBL:
+            case OPC_PRECEQU_QH_OBR:
+            case OPC_PRECEQU_QH_OBLA:
+            case OPC_PRECEQU_QH_OBRA:
+            case OPC_PRECEU_QH_OBL:
+            case OPC_PRECEU_QH_OBR:
+            case OPC_PRECEU_QH_OBLA:
+            case OPC_PRECEU_QH_OBRA:
+            case OPC_ABSQ_S_OB:
+            case OPC_ABSQ_S_PW:
+            case OPC_ABSQ_S_QH:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_REPL_OB:
+            case OPC_REPL_PW:
+            case OPC_REPL_QH:
+            case OPC_REPLV_OB:
+            case OPC_REPLV_PW:
+            case OPC_REPLV_QH:
+                gen_mipsdsp_bitinsn(ctx, op1, op2, rd, rt);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK ABSQ_S.QH");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_ADDU_OB_DSP:
+            op2 = MASK_ADDU_OB(ctx->opcode);
+            switch (op2) {
+            case OPC_RADDU_L_OB:
+            case OPC_SUBQ_PW:
+            case OPC_SUBQ_S_PW:
+            case OPC_SUBQ_QH:
+            case OPC_SUBQ_S_QH:
+            case OPC_SUBU_OB:
+            case OPC_SUBU_S_OB:
+            case OPC_SUBU_QH:
+            case OPC_SUBU_S_QH:
+            case OPC_SUBUH_OB:
+            case OPC_SUBUH_R_OB:
+            case OPC_ADDQ_PW:
+            case OPC_ADDQ_S_PW:
+            case OPC_ADDQ_QH:
+            case OPC_ADDQ_S_QH:
+            case OPC_ADDU_OB:
+            case OPC_ADDU_S_OB:
+            case OPC_ADDU_QH:
+            case OPC_ADDU_S_QH:
+            case OPC_ADDUH_OB:
+            case OPC_ADDUH_R_OB:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_MULEQ_S_PW_QHL:
+            case OPC_MULEQ_S_PW_QHR:
+            case OPC_MULEU_S_QH_OBL:
+            case OPC_MULEU_S_QH_OBR:
+            case OPC_MULQ_RS_QH:
+                gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 1);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK ADDU.OB");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_CMPU_EQ_OB_DSP:
+            op2 = MASK_CMPU_EQ_OB(ctx->opcode);
+            switch (op2) {
+            case OPC_PRECR_SRA_QH_PW:
+            case OPC_PRECR_SRA_R_QH_PW:
+                /* Return value is rt. */
+                gen_mipsdsp_arith(ctx, op1, op2, rt, rs, rd);
+                break;
+            case OPC_PRECR_OB_QH:
+            case OPC_PRECRQ_OB_QH:
+            case OPC_PRECRQ_PW_L:
+            case OPC_PRECRQ_QH_PW:
+            case OPC_PRECRQ_RS_QH_PW:
+            case OPC_PRECRQU_S_OB_QH:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_CMPU_EQ_OB:
+            case OPC_CMPU_LT_OB:
+            case OPC_CMPU_LE_OB:
+            case OPC_CMP_EQ_QH:
+            case OPC_CMP_LT_QH:
+            case OPC_CMP_LE_QH:
+            case OPC_CMP_EQ_PW:
+            case OPC_CMP_LT_PW:
+            case OPC_CMP_LE_PW:
+                gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            case OPC_CMPGDU_EQ_OB:
+            case OPC_CMPGDU_LT_OB:
+            case OPC_CMPGDU_LE_OB:
+            case OPC_CMPGU_EQ_OB:
+            case OPC_CMPGU_LT_OB:
+            case OPC_CMPGU_LE_OB:
+            case OPC_PACKRL_PW:
+            case OPC_PICK_OB:
+            case OPC_PICK_PW:
+            case OPC_PICK_QH:
+                gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 1);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK CMPU_EQ.OB");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_DAPPEND_DSP:
+            gen_mipsdsp_append(env, ctx, op1, rt, rs, rd);
+            break;
+        case OPC_DEXTR_W_DSP:
+            op2 = MASK_DEXTR_W(ctx->opcode);
+            switch (op2) {
+            case OPC_DEXTP:
+            case OPC_DEXTPDP:
+            case OPC_DEXTPDPV:
+            case OPC_DEXTPV:
+            case OPC_DEXTR_L:
+            case OPC_DEXTR_R_L:
+            case OPC_DEXTR_RS_L:
+            case OPC_DEXTR_W:
+            case OPC_DEXTR_R_W:
+            case OPC_DEXTR_RS_W:
+            case OPC_DEXTR_S_H:
+            case OPC_DEXTRV_L:
+            case OPC_DEXTRV_R_L:
+            case OPC_DEXTRV_RS_L:
+            case OPC_DEXTRV_S_H:
+            case OPC_DEXTRV_W:
+            case OPC_DEXTRV_R_W:
+            case OPC_DEXTRV_RS_W:
+                gen_mipsdsp_accinsn(ctx, op1, op2, rt, rs, rd, 1);
+                break;
+            case OPC_DMTHLIP:
+            case OPC_DSHILO:
+            case OPC_DSHILOV:
+                gen_mipsdsp_accinsn(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK EXTR.W");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_DPAQ_W_QH_DSP:
+            op2 = MASK_DPAQ_W_QH(ctx->opcode);
+            switch (op2) {
+            case OPC_DPAU_H_OBL:
+            case OPC_DPAU_H_OBR:
+            case OPC_DPSU_H_OBL:
+            case OPC_DPSU_H_OBR:
+            case OPC_DPA_W_QH:
+            case OPC_DPAQ_S_W_QH:
+            case OPC_DPS_W_QH:
+            case OPC_DPSQ_S_W_QH:
+            case OPC_MULSAQ_S_W_QH:
+            case OPC_DPAQ_SA_L_PW:
+            case OPC_DPSQ_SA_L_PW:
+            case OPC_MULSAQ_S_L_PW:
+                gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            case OPC_MAQ_S_W_QHLL:
+            case OPC_MAQ_S_W_QHLR:
+            case OPC_MAQ_S_W_QHRL:
+            case OPC_MAQ_S_W_QHRR:
+            case OPC_MAQ_SA_W_QHLL:
+            case OPC_MAQ_SA_W_QHLR:
+            case OPC_MAQ_SA_W_QHRL:
+            case OPC_MAQ_SA_W_QHRR:
+            case OPC_MAQ_S_L_PWL:
+            case OPC_MAQ_S_L_PWR:
+            case OPC_DMADD:
+            case OPC_DMADDU:
+            case OPC_DMSUB:
+            case OPC_DMSUBU:
+                gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK DPAQ.W.QH");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_DINSV_DSP:
+            op2 = MASK_INSV(ctx->opcode);
+            switch (op2) {
+            case OPC_DINSV:
+                {
+                    TCGv t0, t1;
+
+                    if (rt == 0) {
+                        MIPS_DEBUG("NOP");
+                        break;
+                    }
+                    check_dsp(ctx);
+
+                    t0 = tcg_temp_new();
+                    t1 = tcg_temp_new();
+
+                    gen_load_gpr(t0, rt);
+                    gen_load_gpr(t1, rs);
+
+                    gen_helper_dinsv(cpu_gpr[rt], cpu_env, t1, t0);
+                    break;
+                }
+            default:            /* Invalid */
+                MIPS_INVAL("MASK DINSV");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_SHLL_OB_DSP:
+            gen_mipsdsp_shift(ctx, op1, rd, rs, rt);
+            break;
 #endif
         default:            /* Invalid */
             MIPS_INVAL("special3");
@@ -8683,16 +15328,23 @@
         switch (op1) {
         case OPC_BLTZ ... OPC_BGEZL: /* REGIMM branches */
         case OPC_BLTZAL ... OPC_BGEZALL:
-            gen_compute_branch(ctx, op1, rs, -1, imm << 2);
-            return;
+            gen_compute_branch(ctx, op1, 4, rs, -1, imm << 2);
+            break;
         case OPC_TGEI ... OPC_TEQI: /* REGIMM traps */
         case OPC_TNEI:
             gen_trap(ctx, op1, rs, -1, imm);
             break;
         case OPC_SYNCI:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             /* Treat as NOP. */
             break;
+        case OPC_BPOSGE32:    /* MIPS DSP branch */
+#if defined(TARGET_MIPS64)
+        case OPC_BPOSGE64:
+#endif
+            check_dsp(ctx);
+            gen_compute_branch(ctx, op1, 4, -1, -2, (int32_t)imm << 2);
+            break;
         default:            /* Invalid */
             MIPS_INVAL("regimm");
             generate_exception(ctx, EXCP_RI);
@@ -8728,27 +15380,27 @@
                 op2 = MASK_MFMC0(ctx->opcode);
                 switch (op2) {
                 case OPC_DMT:
-                    check_insn(env, ctx, ASE_MT);
+                    check_insn(ctx, ASE_MT);
                     gen_helper_dmt(t0);
                     gen_store_gpr(t0, rt);
                     break;
                 case OPC_EMT:
-                    check_insn(env, ctx, ASE_MT);
+                    check_insn(ctx, ASE_MT);
                     gen_helper_emt(t0);
                     gen_store_gpr(t0, rt);
                     break;
                 case OPC_DVPE:
-                    check_insn(env, ctx, ASE_MT);
+                    check_insn(ctx, ASE_MT);
                     gen_helper_dvpe(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     break;
                 case OPC_EVPE:
-                    check_insn(env, ctx, ASE_MT);
+                    check_insn(ctx, ASE_MT);
                     gen_helper_evpe(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     break;
                 case OPC_DI:
-                    check_insn(env, ctx, ISA_MIPS32R2);
+                    check_insn(ctx, ISA_MIPS32R2);
                     save_cpu_state(ctx, 1);
                     gen_helper_di(t0, cpu_env);
                     gen_store_gpr(t0, rt);
@@ -8756,7 +15408,7 @@
                     ctx->bstate = BS_STOP;
                     break;
                 case OPC_EI:
-                    check_insn(env, ctx, ISA_MIPS32R2);
+                    check_insn(ctx, ISA_MIPS32R2);
                     save_cpu_state(ctx, 1);
                     gen_helper_ei(t0, cpu_env);
                     gen_store_gpr(t0, rt);
@@ -8773,11 +15425,11 @@
 #endif /* !CONFIG_USER_ONLY */
             break;
         case OPC_RDPGPR:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_load_srsgpr(rt, rd);
             break;
         case OPC_WRPGPR:
-            check_insn(env, ctx, ISA_MIPS32R2);
+            check_insn(ctx, ISA_MIPS32R2);
             gen_store_srsgpr(rt, rd);
             break;
         default:
@@ -8788,41 +15440,44 @@
         break;
     case OPC_ADDI: /* Arithmetic with immediate opcode */
     case OPC_ADDIU:
-         gen_arith_imm(env, ctx, op, rt, rs, imm);
+         gen_arith_imm(ctx, op, rt, rs, imm);
          break;
     case OPC_SLTI: /* Set on less than with immediate opcode */
     case OPC_SLTIU:
-         gen_slt_imm(env, op, rt, rs, imm);
+         gen_slt_imm(ctx, op, rt, rs, imm);
          break;
     case OPC_ANDI: /* Arithmetic with immediate opcode */
     case OPC_LUI:
     case OPC_ORI:
     case OPC_XORI:
-         gen_logic_imm(env, op, rt, rs, imm);
+         gen_logic_imm(ctx, op, rt, rs, imm);
          break;
     case OPC_J ... OPC_JAL: /* Jump */
          offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
-         gen_compute_branch(ctx, op, rs, rt, offset);
-         return;
+         gen_compute_branch(ctx, op, 4, rs, rt, offset);
+         break;
     case OPC_BEQ ... OPC_BGTZ: /* Branch */
     case OPC_BEQL ... OPC_BGTZL:
-         gen_compute_branch(ctx, op, rs, rt, imm << 2);
-         return;
+         gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
+         break;
     case OPC_LB ... OPC_LWR: /* Load and stores */
+    case OPC_LL:
+         gen_ld(ctx, op, rt, rs, imm);
+         break;
     case OPC_SB ... OPC_SW:
     case OPC_SWR:
-    case OPC_LL:
-         gen_ldst(ctx, op, rt, rs, imm);
+         gen_st(ctx, op, rt, rs, imm);
          break;
     case OPC_SC:
          gen_st_cond(ctx, op, rt, rs, imm);
          break;
     case OPC_CACHE:
-        check_insn(env, ctx, ISA_MIPS3 | ISA_MIPS32);
+        check_cp0_enabled(ctx);
+        check_insn(ctx, ISA_MIPS3 | ISA_MIPS32);
         /* Treat as NOP. */
         break;
     case OPC_PREF:
-        check_insn(env, ctx, ISA_MIPS4 | ISA_MIPS32);
+        check_insn(ctx, ISA_MIPS4 | ISA_MIPS32);
         /* Treat as NOP. */
         break;
 
@@ -8831,12 +15486,7 @@
     case OPC_LDC1:
     case OPC_SWC1:
     case OPC_SDC1:
-        if (env->CP0_Config1 & (1 << CP0C1_FP)) {
-            check_cp1_enabled(ctx);
-            gen_flt_ldst(ctx, op, rt, rs, imm);
-        } else {
-            generate_exception_err(ctx, EXCP_CpU, 1);
-        }
+        gen_cop1_ldst(env, ctx, op, rt, rs, imm);
         break;
 
     case OPC_CP1:
@@ -8846,7 +15496,7 @@
             switch (op1) {
             case OPC_MFHC1:
             case OPC_MTHC1:
-                check_insn(env, ctx, ISA_MIPS32R2);
+                check_insn(ctx, ISA_MIPS32R2);
             case OPC_MFC1:
             case OPC_CFC1:
             case OPC_MTC1:
@@ -8856,25 +15506,25 @@
 #if defined(TARGET_MIPS64)
             case OPC_DMFC1:
             case OPC_DMTC1:
-                check_insn(env, ctx, ISA_MIPS3);
+                check_insn(ctx, ISA_MIPS3);
                 gen_cp1(ctx, op1, rt, rd);
                 break;
 #endif
             case OPC_BC1ANY2:
             case OPC_BC1ANY4:
                 check_cop1x(ctx);
-                check_insn(env, ctx, ASE_MIPS3D);
+                check_insn(ctx, ASE_MIPS3D);
                 /* fall through */
             case OPC_BC1:
-                gen_compute_branch1(env, ctx, MASK_BC1(ctx->opcode),
+                gen_compute_branch1(ctx, MASK_BC1(ctx->opcode),
                                     (rt >> 2) & 0x7, imm << 2);
-                return;
+                break;
             case OPC_S_FMT:
             case OPC_D_FMT:
             case OPC_W_FMT:
             case OPC_L_FMT:
             case OPC_PS_FMT:
-                gen_farith(ctx, MASK_CP1_FUNC(ctx->opcode), rt, rd, sa,
+                gen_farith(ctx, ctx->opcode & FOP(0x3f, 0x1f), rt, rd, sa,
                            (imm >> 8) & 0x7);
                 break;
             default:
@@ -8892,10 +15542,14 @@
     case OPC_LDC2:
     case OPC_SWC2:
     case OPC_SDC2:
-    case OPC_CP2:
         /* COP2: Not implemented. */
         generate_exception_err(ctx, EXCP_CpU, 2);
         break;
+    case OPC_CP2:
+        check_insn(ctx, INSN_LOONGSON2F);
+        /* Note that these instructions use different fields.  */
+        gen_loongson_multimedia(ctx, sa, rd, rt);
+        break;
 
     case OPC_CP3:
         if (env->CP0_Config1 & (1 << CP0C1_FP)) {
@@ -8942,82 +15596,43 @@
     /* MIPS64 opcodes */
     case OPC_LWU:
     case OPC_LDL ... OPC_LDR:
-    case OPC_SDL ... OPC_SDR:
     case OPC_LLD:
     case OPC_LD:
-    case OPC_SD:
-        check_insn(env, ctx, ISA_MIPS3);
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
-        gen_ldst(ctx, op, rt, rs, imm);
+        gen_ld(ctx, op, rt, rs, imm);
+        break;
+    case OPC_SDL ... OPC_SDR:
+    case OPC_SD:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_st(ctx, op, rt, rs, imm);
         break;
     case OPC_SCD:
-        check_insn(env, ctx, ISA_MIPS3);
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
         gen_st_cond(ctx, op, rt, rs, imm);
         break;
     case OPC_DADDI:
     case OPC_DADDIU:
-        check_insn(env, ctx, ISA_MIPS3);
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
-        gen_arith_imm(env, ctx, op, rt, rs, imm);
+        gen_arith_imm(ctx, op, rt, rs, imm);
         break;
 #endif
     case OPC_JALX:
-        check_insn(env, ctx, ASE_MIPS16);
-        /* MIPS16: Not implemented. */
+        check_insn(ctx, ASE_MIPS16 | ASE_MICROMIPS);
+        offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
+        gen_compute_branch(ctx, op, 4, rs, rt, offset);
+        break;
     case OPC_MDMX:
-        check_insn(env, ctx, ASE_MDMX);
+        check_insn(ctx, ASE_MDMX);
         /* MDMX: Not implemented. */
     default:            /* Invalid */
         MIPS_INVAL("major opcode");
         generate_exception(ctx, EXCP_RI);
         break;
     }
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-        int hflags = ctx->hflags & MIPS_HFLAG_BMASK;
-        /* Branches completion */
-        ctx->hflags &= ~MIPS_HFLAG_BMASK;
-        ctx->bstate = BS_BRANCH;
-        save_cpu_state(ctx, 0);
-        /* FIXME: Need to clear can_do_io.  */
-        switch (hflags) {
-        case MIPS_HFLAG_B:
-            /* unconditional branch */
-            MIPS_DEBUG("unconditional branch");
-            gen_goto_tb(ctx, 0, ctx->btarget);
-            break;
-        case MIPS_HFLAG_BL:
-            /* blikely taken case */
-            MIPS_DEBUG("blikely branch taken");
-            gen_goto_tb(ctx, 0, ctx->btarget);
-            break;
-        case MIPS_HFLAG_BC:
-            /* Conditional branch */
-            MIPS_DEBUG("conditional branch");
-            {
-                int l1 = gen_new_label();
-
-                tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
-                gen_goto_tb(ctx, 1, ctx->pc + 4);
-                gen_set_label(l1);
-                gen_goto_tb(ctx, 0, ctx->btarget);
-            }
-            break;
-        case MIPS_HFLAG_BR:
-            /* unconditional branch to register */
-            MIPS_DEBUG("branch to register");
-            tcg_gen_mov_tl(cpu_PC, btarget);
-            if (ctx->singlestep_enabled) {
-                save_cpu_state(ctx, 0);
-                gen_helper_1i(raise_exception, cpu_env, EXCP_DEBUG);
-            }
-            tcg_gen_exit_tb(0);
-            break;
-        default:
-            MIPS_DEBUG("unknown branch");
-            break;
-        }
-    }
 }
 
 static inline void
@@ -9033,6 +15648,8 @@
     int j, lj = -1;
     int num_insns;
     int max_insns;
+    int insn_bytes;
+    int is_delay;
 
     if (search_pc)
         qemu_log("search pc %d\n", search_pc);
@@ -9042,6 +15659,7 @@
     ctx.pc = pc_start;
     ctx.saved_pc = -1;
     ctx.singlestep_enabled = cs->singlestep_enabled;
+    ctx.insn_flags = env->insn_flags;
     ctx.tb = tb;
     ctx.bstate = BS_NONE;
     /* Restore delay slot state from the tb context.  */
@@ -9056,11 +15674,6 @@
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0)
         max_insns = CF_COUNT_MASK;
-#ifdef DEBUG_DISAS
-    qemu_log_mask(CPU_LOG_TB_CPU, "------------------------------------------------\n");
-    /* FIXME: This may print out stale hflags from env... */
-    log_cpu_state_mask(CPU_LOG_TB_CPU, ENV_GET_CPU(env), 0);
-#endif
     LOG_DISAS("\ntb %p idx %d hflags %04x\n", tb, ctx.mem_idx, ctx.hflags);
     gen_icount_start();
     while (ctx.bstate == BS_NONE) {
@@ -9069,7 +15682,7 @@
                 if (bp->pc == ctx.pc) {
                     save_cpu_state(&ctx, 1);
                     ctx.bstate = BS_BRANCH;
-                    gen_helper_1i(raise_exception, cpu_env, EXCP_DEBUG);
+                    gen_helper_0e0i(raise_exception, EXCP_DEBUG);
                     /* Include the breakpoint location or the tb won't
                      * be flushed when it must be.  */
                     ctx.pc += 4;
@@ -9087,14 +15700,34 @@
             }
             tcg_ctx.gen_opc_pc[lj] = ctx.pc;
             gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
+            gen_opc_btarget[lj] = ctx.btarget;
             tcg_ctx.gen_opc_instr_start[lj] = 1;
             tcg_ctx.gen_opc_icount[lj] = num_insns;
         }
         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
             gen_io_start();
-        ctx.opcode = cpu_ldl_code(env, ctx.pc);
-        decode_opc(env, &ctx);
-        ctx.pc += 4;
+
+        is_delay = ctx.hflags & MIPS_HFLAG_BMASK;
+        if (!(ctx.hflags & MIPS_HFLAG_M16)) {
+            ctx.opcode = cpu_ldl_code(env, ctx.pc);
+            insn_bytes = 4;
+            decode_opc(env, &ctx);
+        } else if (ctx.insn_flags & ASE_MICROMIPS) {
+            ctx.opcode = cpu_lduw_code(env, ctx.pc);
+            insn_bytes = decode_micromips_opc(env, &ctx);
+        } else if (ctx.insn_flags & ASE_MIPS16) {
+            ctx.opcode = cpu_lduw_code(env, ctx.pc);
+            insn_bytes = decode_mips16_opc(env, &ctx);
+        } else {
+            generate_exception(&ctx, EXCP_RI);
+            ctx.bstate = BS_STOP;
+            break;
+        }
+        if (is_delay) {
+            handle_delay_slot(&ctx, insn_bytes);
+        }
+        ctx.pc += insn_bytes;
+
         num_insns++;
 
         /* Execute a branch and its delay slot as a single instruction.
@@ -9105,10 +15738,7 @@
             break;
         }
 
-        /* Do not split a branch instruction and its delay slot into two
-           TB's when a page boundary is crossed. This causes TB's to be
-           invalidated incorrectly if branch target is patched.  */
-        if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0 && (ctx.hflags & MIPS_HFLAG_BMASK) == 0)
+        if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0)
             break;
 
         if (tcg_ctx.gen_opc_ptr >= gen_opc_end) {
@@ -9126,11 +15756,10 @@
     }
     if (cs->singlestep_enabled && ctx.bstate != BS_BRANCH) {
         save_cpu_state(&ctx, ctx.bstate == BS_NONE);
-        gen_helper_1i(raise_exception, cpu_env, EXCP_DEBUG);
+        gen_helper_0e0i(raise_exception, EXCP_DEBUG);
     } else {
         switch (ctx.bstate) {
         case BS_STOP:
-            gen_helper_interrupt_restart(cpu_env);
             gen_goto_tb(&ctx, 0, ctx.pc);
             break;
         case BS_NONE:
@@ -9138,7 +15767,6 @@
             gen_goto_tb(&ctx, 0, ctx.pc);
             break;
         case BS_EXCP:
-            gen_helper_interrupt_restart(cpu_env);
             tcg_gen_exit_tb(0);
             break;
         case BS_BRANCH:
@@ -9165,7 +15793,6 @@
         log_target_disas(env, pc_start, ctx.pc - pc_start, 0);
         qemu_log("\n");
     }
-    qemu_log_mask(CPU_LOG_TB_CPU, "---------------- %d %08x\n", ctx.bstate, ctx.hflags);
 #endif
 }
 
@@ -9179,32 +15806,37 @@
     gen_intermediate_code_internal(mips_env_get_cpu(env), tb, true);
 }
 
-static void fpu_dump_state(CPUMIPSState *env, FILE *f,
-                           int (*fpu_fprintf)(FILE *f, const char *fmt, ...),
+static void fpu_dump_state(CPUMIPSState *env, FILE *f, fprintf_function fpu_fprintf,
                            int flags)
 {
     int i;
     int is_fpu64 = !!(env->hflags & MIPS_HFLAG_F64);
 
-#define printfpr(fp)                                                        \
-    do {                                                                    \
-        if (is_fpu64)                                                       \
-            fpu_fprintf(f, "w:%08x d:%016lx fd:%13g fs:%13g psu: %13g\n",   \
-                        (fp)->w[FP_ENDIAN_IDX], (fp)->d, (fp)->fd,          \
-                        (fp)->fs[FP_ENDIAN_IDX], (fp)->fs[!FP_ENDIAN_IDX]); \
-        else {                                                              \
-            fpr_t tmp;                                                      \
-            tmp.w[FP_ENDIAN_IDX] = (fp)->w[FP_ENDIAN_IDX];                  \
-            tmp.w[!FP_ENDIAN_IDX] = ((fp) + 1)->w[FP_ENDIAN_IDX];           \
-            fpu_fprintf(f, "w:%08x d:%016lx fd:%13g fs:%13g psu:%13g\n",    \
-                        tmp.w[FP_ENDIAN_IDX], tmp.d, tmp.fd,                \
-                        tmp.fs[FP_ENDIAN_IDX], tmp.fs[!FP_ENDIAN_IDX]);     \
-        }                                                                   \
+#define printfpr(fp)                                                    \
+    do {                                                                \
+        if (is_fpu64)                                                   \
+            fpu_fprintf(f, "w:%08x d:%016" PRIx64                       \
+                        " fd:%13g fs:%13g psu: %13g\n",                 \
+                        (fp)->w[FP_ENDIAN_IDX], (fp)->d,                \
+                        (double)(fp)->fd,                               \
+                        (double)(fp)->fs[FP_ENDIAN_IDX],                \
+                        (double)(fp)->fs[!FP_ENDIAN_IDX]);              \
+        else {                                                          \
+            fpr_t tmp;                                                  \
+            tmp.w[FP_ENDIAN_IDX] = (fp)->w[FP_ENDIAN_IDX];              \
+            tmp.w[!FP_ENDIAN_IDX] = ((fp) + 1)->w[FP_ENDIAN_IDX];       \
+            fpu_fprintf(f, "w:%08x d:%016" PRIx64                       \
+                        " fd:%13g fs:%13g psu:%13g\n",                  \
+                        tmp.w[FP_ENDIAN_IDX], tmp.d,                    \
+                        (double)tmp.fd,                                 \
+                        (double)tmp.fs[FP_ENDIAN_IDX],                  \
+                        (double)tmp.fs[!FP_ENDIAN_IDX]);                \
+        }                                                               \
     } while(0)
 
 
-    fpu_fprintf(f, "CP1 FCR0 0x%08x  FCR31 0x%08x  SR.FR %d  fp_status 0x%08x(0x%02x)\n",
-                env->active_fpu.fcr0, env->active_fpu.fcr31, is_fpu64, env->active_fpu.fp_status,
+    fpu_fprintf(f, "CP1 FCR0 0x%08x  FCR31 0x%08x  SR.FR %d  fp_status 0x%02x\n",
+                env->active_fpu.fcr0, env->active_fpu.fcr31, is_fpu64,
                 get_float_exception_flags(&env->active_fpu.fp_status));
     for (i = 0; i < 32; (is_fpu64) ? i++ : (i += 2)) {
         fpu_fprintf(f, "%3s: ", fregnames[i]);
@@ -9222,7 +15854,7 @@
 
 static void
 cpu_mips_check_sign_extensions (CPUMIPSState *env, FILE *f,
-                                int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+                                fprintf_function cpu_fprintf,
                                 int flags)
 {
     int i;
@@ -9248,8 +15880,7 @@
 }
 #endif
 
-void cpu_dump_state (CPUState *cpu, FILE *f,
-                     int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+void cpu_dump_state (CPUState *cpu, FILE *f, fprintf_function cpu_fprintf,
                      int flags)
 {
     CPUMIPSState *env = cpu->env_ptr;
@@ -9357,6 +15988,7 @@
 #ifndef CONFIG_USER_ONLY
     mmu_init(env, def);
 #endif
+    fpu_init(env, def);
     mvp_init(env, def);
     mips_tcg_init();
     cpu_reset(cpu);
@@ -9364,17 +15996,17 @@
     return env;
 }
 
-void cpu_reset(CPUState *cpu)
+void cpu_reset(CPUState *cs)
 {
-    CPUMIPSState *env = cpu->env_ptr;
+    CPUMIPSState *env = cs->env_ptr;
 
     if (qemu_loglevel_mask(CPU_LOG_RESET)) {
-        qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index);
-        log_cpu_state(cpu, 0);
+        qemu_log("CPU Reset (CPU %d)\n", cs->cpu_index);
+        log_cpu_state(cs, 0);
     }
 
     memset(env, 0, offsetof(CPUMIPSState, mvp));
-    tlb_flush(cpu, 1);
+    tlb_flush(cs, 1);
 
     /* Reset registers to their default values */
     env->CP0_PRid = env->cpu_model->CP0_PRid;
@@ -9385,6 +16017,10 @@
     env->CP0_Config1 = env->cpu_model->CP0_Config1;
     env->CP0_Config2 = env->cpu_model->CP0_Config2;
     env->CP0_Config3 = env->cpu_model->CP0_Config3;
+    env->CP0_Config4 = env->cpu_model->CP0_Config4;
+    env->CP0_Config4_rw_bitmask = env->cpu_model->CP0_Config4_rw_bitmask;
+    env->CP0_Config5 = env->cpu_model->CP0_Config5;
+    env->CP0_Config5_rw_bitmask = env->cpu_model->CP0_Config5_rw_bitmask;
     env->CP0_Config6 = env->cpu_model->CP0_Config6;
     env->CP0_Config7 = env->cpu_model->CP0_Config7;
     env->CP0_LLAddr_rw_bitmask = env->cpu_model->CP0_LLAddr_rw_bitmask
@@ -9415,22 +16051,35 @@
     env->CP0_SRSConf3 = env->cpu_model->CP0_SRSConf3;
     env->CP0_SRSConf4_rw_bitmask = env->cpu_model->CP0_SRSConf4_rw_bitmask;
     env->CP0_SRSConf4 = env->cpu_model->CP0_SRSConf4;
+    env->active_fpu.fcr0 = env->cpu_model->CP1_fcr0;
     env->insn_flags = env->cpu_model->insn_flags;
 
-    fpu_init(env, env->cpu_model);
-
 #if defined(CONFIG_USER_ONLY)
-    env->hflags = MIPS_HFLAG_UM;
-    /* Enable access to the SYNCI_Step register.  */
-    env->CP0_HWREna |= (1 << 1);
+    env->CP0_Status = (MIPS_HFLAG_UM << CP0St_KSU);
+# ifdef TARGET_MIPS64
+    /* Enable 64-bit register mode.  */
+    env->CP0_Status |= (1 << CP0St_PX);
+# endif
+# ifdef TARGET_ABI_MIPSN64
+    /* Enable 64-bit address mode.  */
+    env->CP0_Status |= (1 << CP0St_UX);
+# endif
+    /* Enable access to the CPUNum, SYNCI_Step, CC, and CCRes RDHWR
+       hardware registers.  */
+    env->CP0_HWREna |= 0x0000000F;
     if (env->CP0_Config1 & (1 << CP0C1_FP)) {
-        env->hflags |= MIPS_HFLAG_FPU;
+        env->CP0_Status |= (1 << CP0St_CU1);
     }
-#ifdef TARGET_MIPS64
-    if (env->active_fpu.fcr0 & (1 << FCR0_F64)) {
-        env->hflags |= MIPS_HFLAG_F64;
+    if (env->CP0_Config3 & (1 << CP0C3_DSPP)) {
+        env->CP0_Status |= (1 << CP0St_MX);
     }
-#endif
+# if defined(TARGET_MIPS64)
+    /* For MIPS64, init FR bit to 1 if FPU unit is there and bit is writable. */
+    if ((env->CP0_Config1 & (1 << CP0C1_FP)) &&
+        (env->CP0_Status_rw_bitmask & (1 << CP0St_FR))) {
+        env->CP0_Status |= (1 << CP0St_FR);
+    }
+# endif
 #else
     if (env->hflags & MIPS_HFLAG_BMASK) {
         /* If the exception was raised from a delay slot,
@@ -9441,9 +16090,9 @@
     }
     env->active_tc.PC = (int32_t)0xBFC00000;
     env->CP0_Random = env->tlb->nb_tlb - 1;
+    env->tlb->tlb_in_use = env->tlb->nb_tlb;
     env->CP0_Wired = 0;
-    /* SMP not implemented */
-    env->CP0_EBase = 0x80000000;
+    env->CP0_EBase = 0x80000000 | (cs->cpu_index & 0x3FF);
     env->CP0_Status = (1 << CP0St_BEV) | (1 << CP0St_ERL);
     /* vectored interrupts not implemented, timer on int 7,
        no performance counters. */
@@ -9460,14 +16109,35 @@
     }
     /* Count register increments in debug mode, EJTAG version 1 */
     env->CP0_Debug = (1 << CP0DB_CNT) | (0x1 << CP0DB_VER);
-    env->hflags = MIPS_HFLAG_CP0;
-#endif
-#if defined(TARGET_MIPS64)
-    if (env->cpu_model->insn_flags & ISA_MIPS3) {
-        env->hflags |= MIPS_HFLAG_64;
+
+    if (env->CP0_Config3 & (1 << CP0C3_MT)) {
+        int i;
+
+        /* Only TC0 on VPE 0 starts as active.  */
+        for (i = 0; i < ARRAY_SIZE(env->tcs); i++) {
+            env->tcs[i].CP0_TCBind = cs->cpu_index << CP0TCBd_CurVPE;
+            env->tcs[i].CP0_TCHalt = 1;
+        }
+        env->active_tc.CP0_TCHalt = 1;
+        cs->halted = 1;
+
+        if (cs->cpu_index == 0) {
+            /* VPE0 starts up enabled.  */
+            env->mvp->CP0_MVPControl |= (1 << CP0MVPCo_EVP);
+            env->CP0_VPEConf0 |= (1 << CP0VPEC0_MVP) | (1 << CP0VPEC0_VPA);
+
+            /* TC0 starts up unhalted.  */
+            cs->halted = 0;
+            env->active_tc.CP0_TCHalt = 0;
+            env->tcs[0].CP0_TCHalt = 0;
+            /* With thread 0 active.  */
+            env->active_tc.CP0_TCStatus = (1 << CP0TCSt_A);
+            env->tcs[0].CP0_TCStatus = (1 << CP0TCSt_A);
+        }
     }
 #endif
-    cpu->exception_index = EXCP_NONE;
+    compute_hflags(env);
+    cs->exception_index = EXCP_NONE;
 }
 
 void restore_state_to_opc(CPUMIPSState *env, TranslationBlock *tb, int pc_pos)
@@ -9475,4 +16145,13 @@
     env->active_tc.PC = tcg_ctx.gen_opc_pc[pc_pos];
     env->hflags &= ~MIPS_HFLAG_BMASK;
     env->hflags |= gen_opc_hflags[pc_pos];
+    switch (env->hflags & MIPS_HFLAG_BMASK_BASE) {
+    case MIPS_HFLAG_BR:
+        break;
+    case MIPS_HFLAG_BC:
+    case MIPS_HFLAG_BL:
+    case MIPS_HFLAG_B:
+        env->btarget = gen_opc_btarget[pc_pos];
+        break;
+    }
 }
diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c
index c8a50d0..8ace034 100644
--- a/target-mips/translate_init.c
+++ b/target-mips/translate_init.c
@@ -74,6 +74,10 @@
     int32_t CP0_Config1;
     int32_t CP0_Config2;
     int32_t CP0_Config3;
+    int32_t CP0_Config4;
+    int32_t CP0_Config4_rw_bitmask;
+    int32_t CP0_Config5;
+    int32_t CP0_Config5_rw_bitmask;
     int32_t CP0_Config6;
     int32_t CP0_Config7;
     target_ulong CP0_LLAddr_rw_bitmask;
@@ -110,7 +114,8 @@
         .CP0_Config0 = MIPS_CONFIG0 | (MMU_TYPE_R4000 << CP0C0_MT),
         .CP0_Config1 = MIPS_CONFIG1 | (15 << CP0C1_MMU) |
                        (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
-                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA),
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (0 << CP0C1_CA),
         .CP0_Config2 = MIPS_CONFIG2,
         .CP0_Config3 = MIPS_CONFIG3,
         .CP0_LLAddr_rw_bitmask = 0,
@@ -120,7 +125,7 @@
         .CP0_Status_rw_bitmask = 0x1278FF17,
         .SEGBITS = 32,
         .PABITS = 32,
-        .insn_flags = CPU_MIPS32 | ASE_MIPS16,
+        .insn_flags = CPU_MIPS32,
         .mmu_type = MMU_TYPE_R4000,
     },
     {
@@ -131,7 +136,8 @@
         .CP0_Config0 = MIPS_CONFIG0 | (MMU_TYPE_FMT << CP0C0_MT),
         .CP0_Config1 = MIPS_CONFIG1 |
                        (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
-                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA),
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_CA),
         .CP0_Config2 = MIPS_CONFIG2,
         .CP0_Config3 = MIPS_CONFIG3,
         .CP0_LLAddr_rw_bitmask = 0,
@@ -150,7 +156,8 @@
         .CP0_Config0 = MIPS_CONFIG0 | (MMU_TYPE_R4000 << CP0C0_MT),
         .CP0_Config1 = MIPS_CONFIG1 | (15 << CP0C1_MMU) |
                        (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
-                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA),
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (0 << CP0C1_CA),
         .CP0_Config2 = MIPS_CONFIG2,
         .CP0_Config3 = MIPS_CONFIG3,
         .CP0_LLAddr_rw_bitmask = 0,
@@ -160,7 +167,7 @@
         .CP0_Status_rw_bitmask = 0x1278FF17,
         .SEGBITS = 32,
         .PABITS = 32,
-        .insn_flags = CPU_MIPS32 | ASE_MIPS16,
+        .insn_flags = CPU_MIPS32,
         .mmu_type = MMU_TYPE_R4000,
     },
     {
@@ -169,7 +176,8 @@
         .CP0_Config0 = MIPS_CONFIG0 | (MMU_TYPE_FMT << CP0C0_MT),
         .CP0_Config1 = MIPS_CONFIG1 |
                        (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
-                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA),
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_CA),
         .CP0_Config2 = MIPS_CONFIG2,
         .CP0_Config3 = MIPS_CONFIG3,
         .CP0_LLAddr_rw_bitmask = 0,
@@ -189,7 +197,8 @@
                     (MMU_TYPE_R4000 << CP0C0_MT),
         .CP0_Config1 = MIPS_CONFIG1 | (15 << CP0C1_MMU) |
                        (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
-                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA),
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (0 << CP0C1_CA),
         .CP0_Config2 = MIPS_CONFIG2,
         .CP0_Config3 = MIPS_CONFIG3 | (0 << CP0C3_VInt),
         .CP0_LLAddr_rw_bitmask = 0,
@@ -199,7 +208,7 @@
         .CP0_Status_rw_bitmask = 0x1278FF17,
         .SEGBITS = 32,
         .PABITS = 32,
-        .insn_flags = CPU_MIPS32R2 | ASE_MIPS16,
+        .insn_flags = CPU_MIPS32R2,
         .mmu_type = MMU_TYPE_R4000,
     },
     {
@@ -209,7 +218,8 @@
                        (MMU_TYPE_FMT << CP0C0_MT),
         .CP0_Config1 = MIPS_CONFIG1 |
                        (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
-                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA),
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_CA),
         .CP0_Config2 = MIPS_CONFIG2,
         .CP0_Config3 = MIPS_CONFIG3,
         .CP0_LLAddr_rw_bitmask = 0,
@@ -229,7 +239,8 @@
                        (MMU_TYPE_R4000 << CP0C0_MT),
         .CP0_Config1 = MIPS_CONFIG1 | (15 << CP0C1_MMU) |
                        (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
-                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA),
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_CA),
         .CP0_Config2 = MIPS_CONFIG2,
         .CP0_Config3 = MIPS_CONFIG3 | (0 << CP0C3_VInt),
         .CP0_LLAddr_rw_bitmask = 0,
@@ -250,7 +261,8 @@
                     (MMU_TYPE_R4000 << CP0C0_MT),
         .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (15 << CP0C1_MMU) |
                        (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
-                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA),
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_CA),
         .CP0_Config2 = MIPS_CONFIG2,
         .CP0_Config3 = MIPS_CONFIG3 | (0 << CP0C3_VInt),
         .CP0_LLAddr_rw_bitmask = 0,
@@ -273,16 +285,16 @@
                        (MMU_TYPE_R4000 << CP0C0_MT),
         .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (15 << CP0C1_MMU) |
                        (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
-                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA),
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_CA),
         .CP0_Config2 = MIPS_CONFIG2,
-        .CP0_Config3 = MIPS_CONFIG3 | (0 << CP0C3_VInt) | (1 << CP0C3_MT),
+        .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_VInt) | (1 << CP0C3_MT) |
+                       (1 << CP0C3_DSPP),
         .CP0_LLAddr_rw_bitmask = 0,
         .CP0_LLAddr_shift = 0,
         .SYNCI_Step = 32,
         .CCRes = 2,
-        /* No DSP implemented. */
-        .CP0_Status_rw_bitmask = 0x3678FF1F,
-        /* No DSP implemented. */
+        .CP0_Status_rw_bitmask = 0x3778FF1F,
         .CP0_TCStatus_rw_bitmask = (0 << CP0TCSt_TCU3) | (0 << CP0TCSt_TCU2) |
                     (1 << CP0TCSt_TCU1) | (1 << CP0TCSt_TCU0) |
                     (0 << CP0TCSt_TMX) | (1 << CP0TCSt_DT) |
@@ -293,16 +305,16 @@
                     (1 << FCR0_D) | (1 << FCR0_S) | (0x95 << FCR0_PRID),
         .CP0_SRSCtl = (0xf << CP0SRSCtl_HSS),
         .CP0_SRSConf0_rw_bitmask = 0x3fffffff,
-        .CP0_SRSConf0 = (1 << CP0SRSC0_M) | (0x3fe << CP0SRSC0_SRS3) |
+        .CP0_SRSConf0 = (1U << CP0SRSC0_M) | (0x3fe << CP0SRSC0_SRS3) |
                     (0x3fe << CP0SRSC0_SRS2) | (0x3fe << CP0SRSC0_SRS1),
         .CP0_SRSConf1_rw_bitmask = 0x3fffffff,
-        .CP0_SRSConf1 = (1 << CP0SRSC1_M) | (0x3fe << CP0SRSC1_SRS6) |
+        .CP0_SRSConf1 = (1U << CP0SRSC1_M) | (0x3fe << CP0SRSC1_SRS6) |
                     (0x3fe << CP0SRSC1_SRS5) | (0x3fe << CP0SRSC1_SRS4),
         .CP0_SRSConf2_rw_bitmask = 0x3fffffff,
-        .CP0_SRSConf2 = (1 << CP0SRSC2_M) | (0x3fe << CP0SRSC2_SRS9) |
+        .CP0_SRSConf2 = (1U << CP0SRSC2_M) | (0x3fe << CP0SRSC2_SRS9) |
                     (0x3fe << CP0SRSC2_SRS8) | (0x3fe << CP0SRSC2_SRS7),
         .CP0_SRSConf3_rw_bitmask = 0x3fffffff,
-        .CP0_SRSConf3 = (1 << CP0SRSC3_M) | (0x3fe << CP0SRSC3_SRS12) |
+        .CP0_SRSConf3 = (1U << CP0SRSC3_M) | (0x3fe << CP0SRSC3_SRS12) |
                     (0x3fe << CP0SRSC3_SRS11) | (0x3fe << CP0SRSC3_SRS10),
         .CP0_SRSConf4_rw_bitmask = 0x3fffffff,
         .CP0_SRSConf4 = (0x3fe << CP0SRSC4_SRS15) |
@@ -312,6 +324,62 @@
         .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_MT,
         .mmu_type = MMU_TYPE_R4000,
     },
+    {
+        .name = "74Kf",
+        .CP0_PRid = 0x00019700,
+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
+                    (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (15 << CP0C1_MMU) |
+                       (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_CA),
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3 | (0 << CP0C3_VInt) | (1 << CP0C3_DSPP),
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 4,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x3778FF1F,
+        .CP1_fcr0 = (1 << FCR0_F64) | (1 << FCR0_L) | (1 << FCR0_W) |
+                    (1 << FCR0_D) | (1 << FCR0_S) | (0x93 << FCR0_PRID),
+        .SEGBITS = 32,
+        .PABITS = 32,
+        .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_DSPR2,
+        .mmu_type = MMU_TYPE_R4000,
+    },
+    {
+        /* A generic CPU providing MIPS32 Release 5 features.
+           FIXME: Eventually this should be replaced by a real CPU model. */
+        .name = "mips32r5-generic",
+        .CP0_PRid = 0x00019700,
+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
+                    (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (15 << CP0C1_MMU) |
+                       (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_CA),
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M),
+        .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M),
+        .CP0_Config4_rw_bitmask = 0,
+        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_UFR),
+        .CP0_Config5_rw_bitmask = (0 << CP0C5_M) | (1 << CP0C5_K) |
+                                  (1 << CP0C5_CV) | (0 << CP0C5_EVA) |
+                                  (1 << CP0C5_MSAEn) | (1 << CP0C5_UFR) |
+                                  (0 << CP0C5_NFExists),
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 4,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x3778FF1F,
+        .CP1_fcr0 = (1 << FCR0_UFRP) | (1 << FCR0_F64) | (1 << FCR0_L) |
+                    (1 << FCR0_W) | (1 << FCR0_D) | (1 << FCR0_S) |
+                    (0x93 << FCR0_PRID),
+        .SEGBITS = 32,
+        .PABITS = 32,
+        .insn_flags = CPU_MIPS32R5 | ASE_MIPS16 | ASE_DSP | ASE_DSPR2,
+        .mmu_type = MMU_TYPE_R4000,
+    },
 #if defined(TARGET_MIPS64)
     {
         .name = "R4000",
@@ -451,6 +519,70 @@
         .insn_flags = CPU_MIPS64R2 | ASE_MIPS3D,
         .mmu_type = MMU_TYPE_R4000,
     },
+    {
+        .name = "Loongson-2E",
+        .CP0_PRid = 0x6302,
+        /*64KB I-cache and d-cache. 4 way with 32 bit cache line size*/
+        .CP0_Config0 = (0x1<<17) | (0x1<<16) | (0x1<<11) | (0x1<<8) | (0x1<<5) |
+                       (0x1<<4) | (0x1<<1),
+        /* Note: Config1 is only used internally, Loongson-2E has only Config0. */
+        .CP0_Config1 = (1 << CP0C1_FP) | (47 << CP0C1_MMU),
+        .SYNCI_Step = 16,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x35D0FFFF,
+        .CP1_fcr0 = (0x5 << FCR0_PRID) | (0x1 << FCR0_REV),
+        .SEGBITS = 40,
+        .PABITS = 40,
+        .insn_flags = CPU_LOONGSON2E,
+        .mmu_type = MMU_TYPE_R4000,
+    },
+    {
+      .name = "Loongson-2F",
+      .CP0_PRid = 0x6303,
+      /*64KB I-cache and d-cache. 4 way with 32 bit cache line size*/
+      .CP0_Config0 = (0x1<<17) | (0x1<<16) | (0x1<<11) | (0x1<<8) | (0x1<<5) |
+                     (0x1<<4) | (0x1<<1),
+      /* Note: Config1 is only used internally, Loongson-2F has only Config0. */
+      .CP0_Config1 = (1 << CP0C1_FP) | (47 << CP0C1_MMU),
+      .SYNCI_Step = 16,
+      .CCRes = 2,
+      .CP0_Status_rw_bitmask = 0xF5D0FF1F,   /*bit5:7 not writable*/
+      .CP1_fcr0 = (0x5 << FCR0_PRID) | (0x1 << FCR0_REV),
+      .SEGBITS = 40,
+      .PABITS = 40,
+      .insn_flags = CPU_LOONGSON2F,
+      .mmu_type = MMU_TYPE_R4000,
+    },
+    {
+        /* A generic CPU providing MIPS64 ASE DSP 2 features.
+           FIXME: Eventually this should be replaced by a real CPU model. */
+        .name = "mips64dspr2",
+        .CP0_PRid = 0x00010000,
+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | (0x2 << CP0C0_AT) |
+                       (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (63 << CP0C1_MMU) |
+                       (2 << CP0C1_IS) | (4 << CP0C1_IL) | (3 << CP0C1_IA) |
+                       (2 << CP0C1_DS) | (4 << CP0C1_DL) | (3 << CP0C1_DA) |
+                       (1 << CP0C1_PC) | (1 << CP0C1_WR) | (1 << CP0C1_EP),
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_LPA),
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 0,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x37FBFFFF,
+        .CP1_fcr0 = (1 << FCR0_F64) | (1 << FCR0_3D) | (1 << FCR0_PS) |
+                    (1 << FCR0_L) | (1 << FCR0_W) | (1 << FCR0_D) |
+                    (1 << FCR0_S) | (0x00 << FCR0_PRID) | (0x0 << FCR0_REV),
+        .SEGBITS = 42,
+        /* The architectural limit is 59, but we have hardcoded 36 bit
+           in some places...
+        .PABITS = 59, */ /* the architectural limit */
+        .PABITS = 36,
+        .insn_flags = CPU_MIPS64R2 | ASE_DSP | ASE_DSPR2,
+        .mmu_type = MMU_TYPE_R4000,
+    },
+
 #endif
 };
 
@@ -466,7 +598,7 @@
     return NULL;
 }
 
-void mips_cpu_list (FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+void mips_cpu_list (FILE *f, fprintf_function cpu_fprintf)
 {
     int i;