Merge remote branch 'qmp/for-anthony' into staging
diff --git a/.gitignore b/.gitignore
index ce66ed5..a32b7c4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,7 @@
 qemu-img-cmds.h
 qemu-io
 qemu-monitor.texi
+QMP/qmp-commands.txt
 .gdbinit
 *.a
 *.aux
diff --git a/Makefile b/Makefile
index 221fbd8..560eac6 100644
--- a/Makefile
+++ b/Makefile
@@ -168,6 +168,7 @@
 
 distclean: clean
 	rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi
+	rm -f qemu-options.def
 	rm -f config-all-devices.mak
 	rm -f roms/seabios/config.mak roms/vgabios/config.mak
 	rm -f qemu-{doc,tech}.{info,aux,cp,dvi,fn,info,ky,log,pdf,pg,toc,tp,vr}
diff --git a/Makefile.objs b/Makefile.objs
index 67a0c23..55417c9 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -180,6 +180,9 @@
 # PowerPC E500 boards
 hw-obj-$(CONFIG_PPCE500_PCI) += ppce500_pci.o
 
+# MIPS devices
+hw-obj-$(CONFIG_PIIX4) += piix4.o
+
 # PCI watchdog devices
 hw-obj-y += wdt_i6300esb.o
 
diff --git a/Makefile.target b/Makefile.target
index d58b201..7489910 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -220,7 +220,7 @@
 obj-mips-y += vga.o i8259.o
 obj-mips-y += g364fb.o jazz_led.o
 obj-mips-y += gt64xxx.o mc146818rtc.o
-obj-mips-y += piix4.o cirrus_vga.o
+obj-mips-y += cirrus_vga.o
 obj-mips-$(CONFIG_FULONG) += bonito.o vt82c686.o mips_fulong2e.o
 
 obj-microblaze-y = petalogix_s3adsp1800_mmu.o
diff --git a/default-configs/mips-softmmu.mak b/default-configs/mips-softmmu.mak
index 29be52e..3d0af83 100644
--- a/default-configs/mips-softmmu.mak
+++ b/default-configs/mips-softmmu.mak
@@ -16,6 +16,7 @@
 CONFIG_ACPI=y
 CONFIG_APM=y
 CONFIG_DMA=y
+CONFIG_PIIX4=y
 CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
diff --git a/default-configs/mips64-softmmu.mak b/default-configs/mips64-softmmu.mak
index 9bae8a7..0030de4 100644
--- a/default-configs/mips64-softmmu.mak
+++ b/default-configs/mips64-softmmu.mak
@@ -16,6 +16,7 @@
 CONFIG_ACPI=y
 CONFIG_APM=y
 CONFIG_DMA=y
+CONFIG_PIIX4=y
 CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
diff --git a/default-configs/mips64el-softmmu.mak b/default-configs/mips64el-softmmu.mak
index 85b7838..fa2a3ff 100644
--- a/default-configs/mips64el-softmmu.mak
+++ b/default-configs/mips64el-softmmu.mak
@@ -16,6 +16,7 @@
 CONFIG_ACPI=y
 CONFIG_APM=y
 CONFIG_DMA=y
+CONFIG_PIIX4=y
 CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
diff --git a/default-configs/mipsel-softmmu.mak b/default-configs/mipsel-softmmu.mak
index 10ef483..238b73a 100644
--- a/default-configs/mipsel-softmmu.mak
+++ b/default-configs/mipsel-softmmu.mak
@@ -16,6 +16,7 @@
 CONFIG_ACPI=y
 CONFIG_APM=y
 CONFIG_DMA=y
+CONFIG_PIIX4=y
 CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
diff --git a/gdbstub.c b/gdbstub.c
index c1852c2..2b03ef2 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -37,6 +37,7 @@
 
 #define MAX_PACKET_LENGTH 4096
 
+#include "exec-all.h"
 #include "qemu_socket.h"
 #include "kvm.h"
 
diff --git a/hw/xen_domainbuild.c b/hw/xen_domainbuild.c
index 2f59856..7f1fd66 100644
--- a/hw/xen_domainbuild.c
+++ b/hw/xen_domainbuild.c
@@ -3,6 +3,7 @@
 #include "xen_domainbuild.h"
 #include "sysemu.h"
 #include "qemu-timer.h"
+#include "qemu-log.h"
 
 #include <xenguest.h>
 
diff --git a/kvm-stub.c b/kvm-stub.c
index 7be5f5d..3378bd3 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -13,6 +13,7 @@
 #include "qemu-common.h"
 #include "sysemu.h"
 #include "hw/hw.h"
+#include "exec-all.h"
 #include "gdbstub.h"
 #include "kvm.h"
 
diff --git a/monitor.c b/monitor.c
index f319fee..45fd482 100644
--- a/monitor.c
+++ b/monitor.c
@@ -55,6 +55,7 @@
 #include "json-streamer.h"
 #include "json-parser.h"
 #include "osdep.h"
+#include "exec-all.h"
 
 //#define DEBUG
 //#define DEBUG_COMPLETION
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index 314d6ac..686fb4a 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -413,7 +413,6 @@
 }
 
 #include "cpu-all.h"
-#include "exec-all.h"
 
 enum {
     FEATURE_ASN    = 0x00000001,
@@ -512,11 +511,6 @@
 void call_pal (CPUState *env);
 #endif
 
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
-{
-    env->pc = tb->pc;
-}
-
 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
diff --git a/target-alpha/exec.h b/target-alpha/exec.h
index 66526e2..a8a38d2 100644
--- a/target-alpha/exec.h
+++ b/target-alpha/exec.h
@@ -28,8 +28,6 @@
 
 register struct CPUAlphaState *env asm(AREG0);
 
-#define PARAM(n) ((uint64_t)PARAM##n)
-#define SPARAM(n) ((int32_t)PARAM##n)
 #define FP_STATUS (env->fp_status)
 
 #include "cpu.h"
@@ -55,4 +53,9 @@
     return EXCP_HALTED;
 }
 
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->pc = tb->pc;
+}
+
 #endif /* !defined (__ALPHA_EXEC_H__) */
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index f3d138d..39c4a0e 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -435,12 +435,6 @@
 #endif
 
 #include "cpu-all.h"
-#include "exec-all.h"
-
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
-{
-    env->regs[15] = tb->pc;
-}
 
 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
diff --git a/target-arm/exec.h b/target-arm/exec.h
index 0225c3f..e4c35a3 100644
--- a/target-arm/exec.h
+++ b/target-arm/exec.h
@@ -50,3 +50,9 @@
 #endif
 
 void raise_exception(int);
+
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->regs[15] = tb->pc;
+}
+
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 63e5dc7..2dd64d9 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2047,7 +2047,7 @@
 
 static inline uint16_t sub16_usat(uint16_t a, uint16_t b)
 {
-    if (a < b)
+    if (a > b)
         return a - b;
     else
         return 0;
@@ -2064,7 +2064,7 @@
 
 static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
 {
-    if (a < b)
+    if (a > b)
         return a - b;
     else
         return 0;
diff --git a/target-arm/op_addsub.h b/target-arm/op_addsub.h
index 29f77ba..c02c92a 100644
--- a/target-arm/op_addsub.h
+++ b/target-arm/op_addsub.h
@@ -73,8 +73,8 @@
     uint32_t res = 0;
     DECLARE_GE;
 
-    ADD16(a, b, 0);
-    SUB16(a >> 16, b >> 16, 1);
+    ADD16(a, b >> 16, 0);
+    SUB16(a >> 16, b, 1);
     SET_GE;
     return res;
 }
@@ -84,8 +84,8 @@
     uint32_t res = 0;
     DECLARE_GE;
 
-    SUB16(a, b, 0);
-    ADD16(a >> 16, b >> 16, 1);
+    SUB16(a, b >> 16, 0);
+    ADD16(a >> 16, b, 1);
     SET_GE;
     return res;
 }
diff --git a/target-arm/translate.c b/target-arm/translate.c
index a28e2ff..6fcdd7e 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -561,7 +561,7 @@
 
 /* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings.  */
 #define PAS_OP(pfx) \
-    switch (op2) {  \
+    switch (op1) {  \
     case 0: gen_pas_helper(glue(pfx,add8)); break; \
     case 1: gen_pas_helper(glue(pfx,add16)); break; \
     case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
@@ -573,7 +573,7 @@
 {
     TCGv_ptr tmp;
 
-    switch (op1) {
+    switch (op2) {
 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
     case 0:
         tmp = tcg_temp_new_ptr();
diff --git a/target-cris/cpu.h b/target-cris/cpu.h
index a62d57c..fce0804 100644
--- a/target-cris/cpu.h
+++ b/target-cris/cpu.h
@@ -250,12 +250,6 @@
 #define SFR_RW_MM_TLB_HI   env->pregs[PR_SRS]][6
 
 #include "cpu-all.h"
-#include "exec-all.h"
-
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
-{
-    env->pc = tb->pc;
-}
 
 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
diff --git a/target-cris/exec.h b/target-cris/exec.h
index 728aa80..93ce768 100644
--- a/target-cris/exec.h
+++ b/target-cris/exec.h
@@ -28,9 +28,6 @@
 #include "softmmu_exec.h"
 #endif
 
-void cpu_cris_flush_flags(CPUCRISState *env, int cc_op);
-void helper_movec(CPUCRISState *env, int reg, uint32_t val);
-
 static inline int cpu_has_work(CPUState *env)
 {
     return (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI));
@@ -48,3 +45,9 @@
 	}
 	return EXCP_HALTED;
 }
+
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->pc = tb->pc;
+}
+
diff --git a/target-cris/helper.c b/target-cris/helper.c
index 240bda0..053ed4a 100644
--- a/target-cris/helper.c
+++ b/target-cris/helper.c
@@ -78,7 +78,7 @@
 
 	D(printf ("%s addr=%x pc=%x rw=%x\n", __func__, address, env->pc, rw));
 	miss = cris_mmu_translate(&res, env, address & TARGET_PAGE_MASK,
-				  rw, mmu_idx);
+				  rw, mmu_idx, 0);
 	if (miss)
 	{
 		if (env->exception_index == EXCP_BUSFAULT)
@@ -250,7 +250,13 @@
 	uint32_t phy = addr;
 	struct cris_mmu_result res;
 	int miss;
-	miss = cris_mmu_translate(&res, env, addr, 0, 0);
+
+	miss = cris_mmu_translate(&res, env, addr, 0, 0, 1);
+	/* If D TLB misses, try I TLB.  */
+	if (miss) {
+		miss = cris_mmu_translate(&res, env, addr, 2, 0, 1);
+	}
+
 	if (!miss)
 		phy = res.phy;
 	D(fprintf(stderr, "%s %x -> %x\n", __func__, addr, phy));
diff --git a/target-cris/mmu.c b/target-cris/mmu.c
index d09e921..773438e 100644
--- a/target-cris/mmu.c
+++ b/target-cris/mmu.c
@@ -55,6 +55,17 @@
 	return f;
 }
 
+static void cris_mmu_update_rand_lfsr(CPUState *env)
+{
+	unsigned int f;
+
+	/* Update lfsr at every fault.  */
+	f = compute_polynom(env->mmu_rand_lfsr);
+	env->mmu_rand_lfsr >>= 1;
+	env->mmu_rand_lfsr |= (f << 15);
+	env->mmu_rand_lfsr &= 0xffff;
+}
+
 static inline int cris_mmu_enabled(uint32_t rw_gc_cfg)
 {
 	return (rw_gc_cfg & 12) != 0;
@@ -124,7 +135,7 @@
 /* rw 0 = read, 1 = write, 2 = exec.  */
 static int cris_mmu_translate_page(struct cris_mmu_result *res,
 				   CPUState *env, uint32_t vaddr,
-				   int rw, int usermode)
+				   int rw, int usermode, int debug)
 {
 	unsigned int vpage;
 	unsigned int idx;
@@ -250,15 +261,9 @@
 		set = env->mmu_rand_lfsr & 3;
 	}
 
-	if (!match) {
-		unsigned int f;
+	if (!match && !debug) {
+		cris_mmu_update_rand_lfsr(env);
 
-		/* Update lfsr at every fault.  */
-		f = compute_polynom(env->mmu_rand_lfsr);
-		env->mmu_rand_lfsr >>= 1;
-		env->mmu_rand_lfsr |= (f << 15);
-		env->mmu_rand_lfsr &= 0xffff;
-		
 		/* Compute index.  */
 		idx = vpage & 15;
 
@@ -325,7 +330,7 @@
 
 int cris_mmu_translate(struct cris_mmu_result *res,
 		       CPUState *env, uint32_t vaddr,
-		       int rw, int mmu_idx)
+		       int rw, int mmu_idx, int debug)
 {
 	int seg;
 	int miss = 0;
@@ -352,9 +357,10 @@
 		base = cris_mmu_translate_seg(env, seg);
                 res->phy = base | (0x0fffffff & vaddr);
 		res->prot = PAGE_BITS;
+	} else {
+		miss = cris_mmu_translate_page(res, env, vaddr, rw,
+					       is_user, debug);
 	}
-	else
-		miss = cris_mmu_translate_page(res, env, vaddr, rw, is_user);
   done:
 	env->pregs[PR_SRS] = old_srs;
 	return miss;
diff --git a/target-cris/mmu.h b/target-cris/mmu.h
index d753b38..459d809 100644
--- a/target-cris/mmu.h
+++ b/target-cris/mmu.h
@@ -14,4 +14,4 @@
 void cris_mmu_flush_pid(CPUState *env, uint32_t pid);
 int cris_mmu_translate(struct cris_mmu_result *res,
 		       CPUState *env, uint32_t vaddr,
-		       int rw, int mmu_idx);
+		       int rw, int mmu_idx, int debug);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 4de486e..1144d4e 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -933,19 +933,12 @@
 #endif
 
 #include "cpu-all.h"
-#include "exec-all.h"
-
 #include "svm.h"
 
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/apic.h"
 #endif
 
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
-{
-    env->eip = tb->pc - tb->cs_base;
-}
-
 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index 83057bd..dcfd81b 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -241,11 +241,11 @@
           /* missing:
           CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */
 #define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | \
-          CPUID_EXT_CX16 | CPUID_EXT_POPCNT | CPUID_EXT_XSAVE | \
+          CPUID_EXT_CX16 | CPUID_EXT_POPCNT | \
           CPUID_EXT_HYPERVISOR)
           /* missing:
           CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_EST,
-          CPUID_EXT_TM2, CPUID_EXT_XTPR, CPUID_EXT_PDCM */
+          CPUID_EXT_TM2, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_XSAVE */
 #define TCG_EXT2_FEATURES ((TCG_FEATURES & EXT2_FEATURE_MASK) | \
           CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
           CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT)
diff --git a/target-i386/exec.h b/target-i386/exec.h
index 4ff3c57..fc8945b 100644
--- a/target-i386/exec.h
+++ b/target-i386/exec.h
@@ -327,3 +327,9 @@
     if (env->efer & MSR_EFER_SVME)
         env->hflags |= HF_SVME_MASK;
 }
+
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->eip = tb->pc - tb->cs_base;
+}
+
diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
index 71331c6..c50e818 100644
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -16,7 +16,7 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
-#define CPU_NO_GLOBAL_REGS
+
 #include "exec.h"
 #include "exec-all.h"
 #include "host-utils.h"
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 708b0a1..2fcc026 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -3472,6 +3472,9 @@
         case 0x171: /* shift xmm, im */
         case 0x172:
         case 0x173:
+            if (b1 >= 2) {
+	        goto illegal_op;
+            }
             val = ldub_code(s->pc++);
             if (is_xmm) {
                 gen_op_movl_T0_im(val);
@@ -3699,6 +3702,9 @@
             rm = modrm & 7;
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
+            if (b1 >= 2) {
+                goto illegal_op;
+            }
 
             sse_op2 = sse_op_table6[b].op[b1];
             if (!sse_op2)
@@ -3798,6 +3804,9 @@
             rm = modrm & 7;
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
+            if (b1 >= 2) {
+                goto illegal_op;
+            }
 
             sse_op2 = sse_op_table7[b].op[b1];
             if (!sse_op2)
@@ -5293,6 +5302,7 @@
         break;
 
     case 0x91 ... 0x97: /* xchg R, EAX */
+    do_xchg_reg_eax:
         ot = dflag + OT_WORD;
         reg = (b & 7) | REX_B(s);
         rm = R_EAX;
@@ -6663,10 +6673,14 @@
         /************************/
         /* misc */
     case 0x90: /* nop */
-        /* XXX: xchg + rex handling */
         /* XXX: correct lock test for all insn */
-        if (prefixes & PREFIX_LOCK)
+        if (prefixes & PREFIX_LOCK) {
             goto illegal_op;
+        }
+        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
+        if (REX_B(s)) {
+            goto do_xchg_reg_eax;
+        }
         if (prefixes & PREFIX_REPZ) {
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_PAUSE);
         }
diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h
index b2f37ec..33c41b2 100644
--- a/target-m68k/cpu.h
+++ b/target-m68k/cpu.h
@@ -242,12 +242,6 @@
 #endif
 
 #include "cpu-all.h"
-#include "exec-all.h"
-
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
-{
-    env->pc = tb->pc;
-}
 
 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
diff --git a/target-m68k/exec.h b/target-m68k/exec.h
index ece9aa0..f31e06e 100644
--- a/target-m68k/exec.h
+++ b/target-m68k/exec.h
@@ -42,3 +42,9 @@
     }
     return EXCP_HALTED;
 }
+
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->pc = tb->pc;
+}
+
diff --git a/target-microblaze/cpu.h b/target-microblaze/cpu.h
index ff8c8c8..360ac0a 100644
--- a/target-microblaze/cpu.h
+++ b/target-microblaze/cpu.h
@@ -305,12 +305,6 @@
 }
 
 #include "cpu-all.h"
-#include "exec-all.h"
-
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
-{
-    env->sregs[SR_PC] = tb->pc;
-}
 
 static inline target_ulong cpu_get_pc(CPUState *env)
 {
diff --git a/target-microblaze/exec.h b/target-microblaze/exec.h
index 646701c..87b2494 100644
--- a/target-microblaze/exec.h
+++ b/target-microblaze/exec.h
@@ -27,8 +27,6 @@
 #include "softmmu_exec.h"
 #endif
 
-void cpu_mb_flush_flags(CPUMBState *env, int cc_op);
-
 static inline int cpu_has_work(CPUState *env)
 {
     return (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI));
@@ -46,3 +44,9 @@
 	}
 	return EXCP_HALTED;
 }
+
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->sregs[SR_PC] = tb->pc;
+}
+
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index c21b8e4..81051aa 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -526,7 +526,6 @@
 }
 
 #include "cpu-all.h"
-#include "exec-all.h"
 
 /* Memory access type :
  * may be needed for precise access rights control and precise exceptions.
@@ -612,13 +611,6 @@
 		                               int rw);
 #endif
 
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
-{
-    env->active_tc.PC = tb->pc;
-    env->hflags &= ~MIPS_HFLAG_BMASK;
-    env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
-}
-
 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
diff --git a/target-mips/exec.h b/target-mips/exec.h
index 01e9c4d..af61b54 100644
--- a/target-mips/exec.h
+++ b/target-mips/exec.h
@@ -17,14 +17,6 @@
 #include "softmmu_exec.h"
 #endif /* !defined(CONFIG_USER_ONLY) */
 
-void dump_fpu(CPUState *env);
-void fpu_dump_state(CPUState *env, FILE *f,
-                    int (*fpu_fprintf)(FILE *f, const char *fmt, ...),
-                    int flags);
-
-void cpu_mips_clock_init (CPUState *env);
-void cpu_mips_tlb_flush (CPUState *env, int flush_global);
-
 static inline int cpu_has_work(CPUState *env)
 {
     return (env->interrupt_request &
@@ -84,4 +76,11 @@
     }
 }
 
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->active_tc.PC = tb->pc;
+    env->hflags &= ~MIPS_HFLAG_BMASK;
+    env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
+}
+
 #endif /* !defined(__QEMU_MIPS_EXEC_H__) */
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index d09d6ed54..8ae510a 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -22,6 +22,11 @@
 #include "host-utils.h"
 
 #include "helper.h"
+
+#ifndef CONFIG_USER_ONLY
+static inline void cpu_mips_tlb_flush (CPUState *env, int flush_global);
+#endif
+
 /*****************************************************************************/
 /* Exceptions processing helpers */
 
@@ -1635,7 +1640,7 @@
 
 #ifndef CONFIG_USER_ONLY
 /* TLB management */
-void cpu_mips_tlb_flush (CPUState *env, int flush_global)
+static void cpu_mips_tlb_flush (CPUState *env, int flush_global)
 {
     /* Flush qemu's TLB and discard all shadowed entries.  */
     tlb_flush (env, flush_global);
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 804b6e4..0ab23d3 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -886,7 +886,7 @@
 
 /* load/store instructions. */
 #define OP_LD(insn,fname)                                                 \
-static inline void op_ldst_##insn(TCGv ret, TCGv arg1, DisasContext *ctx) \
+static inline void op_ld_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)   \
 {                                                                         \
     tcg_gen_qemu_##fname(ret, arg1, ctx->mem_idx);                        \
 }
@@ -902,7 +902,7 @@
 #undef OP_LD
 
 #define OP_ST(insn,fname)                                                  \
-static inline void op_ldst_##insn(TCGv arg1, TCGv arg2, DisasContext *ctx) \
+static inline void op_st_##insn(TCGv arg1, TCGv arg2, DisasContext *ctx)   \
 {                                                                          \
     tcg_gen_qemu_##fname(arg1, arg2, ctx->mem_idx);                        \
 }
@@ -916,7 +916,7 @@
 
 #ifdef CONFIG_USER_ONLY
 #define OP_LD_ATOMIC(insn,fname)                                           \
-static inline void op_ldst_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)  \
+static inline void op_ld_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)    \
 {                                                                          \
     TCGv t0 = tcg_temp_new();                                              \
     tcg_gen_mov_tl(t0, arg1);                                              \
@@ -927,7 +927,7 @@
 }
 #else
 #define OP_LD_ATOMIC(insn,fname)                                           \
-static inline void op_ldst_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)  \
+static inline void op_ld_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)    \
 {                                                                          \
     gen_helper_2i(insn, ret, arg1, ctx->mem_idx);                          \
 }
@@ -940,7 +940,7 @@
 
 #ifdef CONFIG_USER_ONLY
 #define OP_ST_ATOMIC(insn,fname,ldname,almask)                               \
-static inline void op_ldst_##insn(TCGv arg1, TCGv arg2, int rt, DisasContext *ctx) \
+static inline void op_st_##insn(TCGv arg1, TCGv arg2, int rt, DisasContext *ctx) \
 {                                                                            \
     TCGv t0 = tcg_temp_new();                                                \
     int l1 = gen_new_label();                                                \
@@ -964,7 +964,7 @@
 }
 #else
 #define OP_ST_ATOMIC(insn,fname,ldname,almask)                               \
-static inline void op_ldst_##insn(TCGv arg1, TCGv arg2, int rt, DisasContext *ctx) \
+static inline void op_st_##insn(TCGv arg1, TCGv arg2, int rt, DisasContext *ctx) \
 {                                                                            \
     TCGv t0 = tcg_temp_new();                                                \
     gen_helper_3i(insn, t0, arg1, arg2, ctx->mem_idx);                       \
@@ -1005,43 +1005,45 @@
     return pc;
 }
 
-/* Load and store */
-static void gen_ldst (DisasContext *ctx, uint32_t opc, int rt,
-                      int base, int16_t offset)
+/* Load */
+static void gen_ld (CPUState *env, DisasContext *ctx, uint32_t opc,
+                    int rt, int base, int16_t offset)
 {
-    const char *opn = "ldst";
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
+    const char *opn = "ld";
+    TCGv t0, t1;
 
+    if (rt == 0 && env->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F)) {
+        /* Loongson CPU uses a load to zero register for prefetch.
+           We emulate it as a NOP. On other CPU we must perform the
+           actual memory access. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
     gen_base_offset_addr(ctx, t0, base, offset);
-    /* Don't do NOP if destination is zero: we must perform the actual
-       memory access. */
+
     switch (opc) {
 #if defined(TARGET_MIPS64)
     case OPC_LWU:
         save_cpu_state(ctx, 0);
-        op_ldst_lwu(t0, t0, ctx);
+        op_ld_lwu(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lwu";
         break;
     case OPC_LD:
         save_cpu_state(ctx, 0);
-        op_ldst_ld(t0, t0, ctx);
+        op_ld_ld(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "ld";
         break;
     case OPC_LLD:
         save_cpu_state(ctx, 0);
-        op_ldst_lld(t0, t0, ctx);
+        op_ld_lld(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lld";
         break;
-    case OPC_SD:
-        save_cpu_state(ctx, 0);
-        gen_load_gpr(t1, rt);
-        op_ldst_sd(t1, t0, ctx);
-        opn = "sd";
-        break;
     case OPC_LDL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
@@ -1049,12 +1051,6 @@
         gen_store_gpr(t1, rt);
         opn = "ldl";
         break;
-    case OPC_SDL:
-        save_cpu_state(ctx, 1);
-        gen_load_gpr(t1, rt);
-        gen_helper_2i(sdl, t1, t0, ctx->mem_idx);
-        opn = "sdl";
-        break;
     case OPC_LDR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
@@ -1062,72 +1058,50 @@
         gen_store_gpr(t1, rt);
         opn = "ldr";
         break;
-    case OPC_SDR:
-        save_cpu_state(ctx, 1);
-        gen_load_gpr(t1, rt);
-        gen_helper_2i(sdr, t1, t0, ctx->mem_idx);
-        opn = "sdr";
-        break;
     case OPC_LDPC:
         save_cpu_state(ctx, 1);
         tcg_gen_movi_tl(t1, pc_relative_pc(ctx));
         gen_op_addr_add(ctx, t0, t0, t1);
-        op_ldst_ld(t0, t0, ctx);
+        op_ld_ld(t0, t0, ctx);
         gen_store_gpr(t0, rt);
+        opn = "ldpc";
         break;
 #endif
     case OPC_LWPC:
         save_cpu_state(ctx, 1);
         tcg_gen_movi_tl(t1, pc_relative_pc(ctx));
         gen_op_addr_add(ctx, t0, t0, t1);
-        op_ldst_lw(t0, t0, ctx);
+        op_ld_lw(t0, t0, ctx);
         gen_store_gpr(t0, rt);
+        opn = "lwpc";
         break;
     case OPC_LW:
         save_cpu_state(ctx, 0);
-        op_ldst_lw(t0, t0, ctx);
+        op_ld_lw(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lw";
         break;
-    case OPC_SW:
-        save_cpu_state(ctx, 0);
-        gen_load_gpr(t1, rt);
-        op_ldst_sw(t1, t0, ctx);
-        opn = "sw";
-        break;
     case OPC_LH:
         save_cpu_state(ctx, 0);
-        op_ldst_lh(t0, t0, ctx);
+        op_ld_lh(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lh";
         break;
-    case OPC_SH:
-        save_cpu_state(ctx, 0);
-        gen_load_gpr(t1, rt);
-        op_ldst_sh(t1, t0, ctx);
-        opn = "sh";
-        break;
     case OPC_LHU:
         save_cpu_state(ctx, 0);
-        op_ldst_lhu(t0, t0, ctx);
+        op_ld_lhu(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lhu";
         break;
     case OPC_LB:
         save_cpu_state(ctx, 0);
-        op_ldst_lb(t0, t0, ctx);
+        op_ld_lb(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lb";
         break;
-    case OPC_SB:
-        save_cpu_state(ctx, 0);
-        gen_load_gpr(t1, rt);
-        op_ldst_sb(t1, t0, ctx);
-        opn = "sb";
-        break;
     case OPC_LBU:
         save_cpu_state(ctx, 0);
-        op_ldst_lbu(t0, t0, ctx);
+        op_ld_lbu(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lbu";
         break;
@@ -1138,12 +1112,6 @@
         gen_store_gpr(t1, rt);
         opn = "lwl";
         break;
-    case OPC_SWL:
-        save_cpu_state(ctx, 1);
-        gen_load_gpr(t1, rt);
-        gen_helper_2i(swl, t1, t0, ctx->mem_idx);
-        opn = "swr";
-        break;
     case OPC_LWR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
@@ -1151,15 +1119,9 @@
         gen_store_gpr(t1, rt);
         opn = "lwr";
         break;
-    case OPC_SWR:
-        save_cpu_state(ctx, 1);
-        gen_load_gpr(t1, rt);
-        gen_helper_2i(swr, t1, t0, ctx->mem_idx);
-        opn = "swr";
-        break;
     case OPC_LL:
         save_cpu_state(ctx, 1);
-        op_ldst_ll(t0, t0, ctx);
+        op_ld_ll(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "ll";
         break;
@@ -1169,6 +1131,66 @@
     tcg_temp_free(t1);
 }
 
+/* Store */
+static void gen_st (DisasContext *ctx, uint32_t opc, int rt,
+                    int base, int16_t offset)
+{
+    const char *opn = "st";
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    gen_base_offset_addr(ctx, t0, base, offset);
+    gen_load_gpr(t1, rt);
+    switch (opc) {
+#if defined(TARGET_MIPS64)
+    case OPC_SD:
+        save_cpu_state(ctx, 0);
+        op_st_sd(t1, t0, ctx);
+        opn = "sd";
+        break;
+    case OPC_SDL:
+        save_cpu_state(ctx, 1);
+        gen_helper_2i(sdl, t1, t0, ctx->mem_idx);
+        opn = "sdl";
+        break;
+    case OPC_SDR:
+        save_cpu_state(ctx, 1);
+        gen_helper_2i(sdr, t1, t0, ctx->mem_idx);
+        opn = "sdr";
+        break;
+#endif
+    case OPC_SW:
+        save_cpu_state(ctx, 0);
+        op_st_sw(t1, t0, ctx);
+        opn = "sw";
+        break;
+    case OPC_SH:
+        save_cpu_state(ctx, 0);
+        op_st_sh(t1, t0, ctx);
+        opn = "sh";
+        break;
+    case OPC_SB:
+        save_cpu_state(ctx, 0);
+        op_st_sb(t1, t0, ctx);
+        opn = "sb";
+        break;
+    case OPC_SWL:
+        save_cpu_state(ctx, 1);
+        gen_helper_2i(swl, t1, t0, ctx->mem_idx);
+        opn = "swl";
+        break;
+    case OPC_SWR:
+        save_cpu_state(ctx, 1);
+        gen_helper_2i(swr, t1, t0, ctx->mem_idx);
+        opn = "swr";
+        break;
+    }
+    MIPS_DEBUG("%s %s, %d(%s)", opn, regnames[rt], offset, regnames[base]);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+
 /* Store conditional */
 static void gen_st_cond (DisasContext *ctx, uint32_t opc, int rt,
                          int base, int16_t offset)
@@ -1188,13 +1210,13 @@
 #if defined(TARGET_MIPS64)
     case OPC_SCD:
         save_cpu_state(ctx, 0);
-        op_ldst_scd(t1, t0, rt, ctx);
+        op_st_scd(t1, t0, rt, ctx);
         opn = "scd";
         break;
 #endif
     case OPC_SC:
         save_cpu_state(ctx, 1);
-        op_ldst_sc(t1, t0, rt, ctx);
+        op_st_sc(t1, t0, rt, ctx);
         opn = "sc";
         break;
     }
@@ -8003,22 +8025,22 @@
     case 4:
         gen_base_offset_addr(ctx, t0, 29, 12);
         gen_load_gpr(t1, 7);
-        op_ldst_sw(t1, t0, ctx);
+        op_st_sw(t1, t0, ctx);
         /* Fall through */
     case 3:
         gen_base_offset_addr(ctx, t0, 29, 8);
         gen_load_gpr(t1, 6);
-        op_ldst_sw(t1, t0, ctx);
+        op_st_sw(t1, t0, ctx);
         /* Fall through */
     case 2:
         gen_base_offset_addr(ctx, t0, 29, 4);
         gen_load_gpr(t1, 5);
-        op_ldst_sw(t1, t0, ctx);
+        op_st_sw(t1, t0, ctx);
         /* Fall through */
     case 1:
         gen_base_offset_addr(ctx, t0, 29, 0);
         gen_load_gpr(t1, 4);
-        op_ldst_sw(t1, t0, ctx);
+        op_st_sw(t1, t0, ctx);
     }
 
     gen_load_gpr(t0, 29);
@@ -8026,7 +8048,7 @@
 #define DECR_AND_STORE(reg) do {                \
         tcg_gen_subi_tl(t0, t0, 4);             \
         gen_load_gpr(t1, reg);                  \
-        op_ldst_sw(t1, t0, ctx);                \
+        op_st_sw(t1, t0, ctx);                  \
     } while (0)
 
     if (do_ra) {
@@ -8126,7 +8148,7 @@
 
 #define DECR_AND_LOAD(reg) do {                 \
         tcg_gen_subi_tl(t0, t0, 4);             \
-        op_ldst_lw(t1, t0, ctx);                \
+        op_ld_lw(t1, t0, ctx);                  \
         gen_store_gpr(t1, reg);                 \
     } while (0)
 
@@ -8244,17 +8266,17 @@
     case I64_LDSP:
         check_mips_64(ctx);
         offset = extended ? offset : offset << 3;
-        gen_ldst(ctx, OPC_LD, ry, 29, offset);
+        gen_ld(env, ctx, OPC_LD, ry, 29, offset);
         break;
     case I64_SDSP:
         check_mips_64(ctx);
         offset = extended ? offset : offset << 3;
-        gen_ldst(ctx, OPC_SD, ry, 29, offset);
+        gen_st(ctx, OPC_SD, ry, 29, offset);
         break;
     case I64_SDRASP:
         check_mips_64(ctx);
         offset = extended ? offset : (ctx->opcode & 0xff) << 3;
-        gen_ldst(ctx, OPC_SD, 31, 29, offset);
+        gen_st(ctx, OPC_SD, 31, 29, offset);
         break;
     case I64_DADJSP:
         check_mips_64(ctx);
@@ -8266,7 +8288,7 @@
             generate_exception(ctx, EXCP_RI);
         } else {
             offset = extended ? offset : offset << 3;
-            gen_ldst(ctx, OPC_LDPC, ry, 0, offset);
+            gen_ld(env, ctx, OPC_LDPC, ry, 0, offset);
         }
         break;
     case I64_DADDIU5:
@@ -8350,7 +8372,7 @@
 #if defined(TARGET_MIPS64)
     case M16_OPC_LD:
             check_mips_64(ctx);
-        gen_ldst(ctx, OPC_LD, ry, rx, offset);
+        gen_ld(env, ctx, OPC_LD, ry, rx, offset);
         break;
 #endif
     case M16_OPC_RRIA:
@@ -8387,7 +8409,7 @@
             gen_compute_branch(ctx, OPC_BNE, 4, 24, 0, offset << 1);
             break;
         case I8_SWRASP:
-            gen_ldst(ctx, OPC_SW, 31, 29, imm);
+            gen_st(ctx, OPC_SW, 31, 29, imm);
             break;
         case I8_ADJSP:
             gen_arith_imm(env, ctx, OPC_ADDIU, 29, 29, imm);
@@ -8426,46 +8448,46 @@
         break;
 #if defined(TARGET_MIPS64)
     case M16_OPC_SD:
-        gen_ldst(ctx, OPC_SD, ry, rx, offset);
+        gen_st(ctx, OPC_SD, ry, rx, offset);
         break;
 #endif
     case M16_OPC_LB:
-        gen_ldst(ctx, OPC_LB, ry, rx, offset);
+        gen_ld(env, ctx, OPC_LB, ry, rx, offset);
         break;
     case M16_OPC_LH:
-        gen_ldst(ctx, OPC_LH, ry, rx, offset);
+        gen_ld(env, ctx, OPC_LH, ry, rx, offset);
         break;
     case M16_OPC_LWSP:
-        gen_ldst(ctx, OPC_LW, rx, 29, offset);
+        gen_ld(env, ctx, OPC_LW, rx, 29, offset);
         break;
     case M16_OPC_LW:
-        gen_ldst(ctx, OPC_LW, ry, rx, offset);
+        gen_ld(env, ctx, OPC_LW, ry, rx, offset);
         break;
     case M16_OPC_LBU:
-        gen_ldst(ctx, OPC_LBU, ry, rx, offset);
+        gen_ld(env, ctx, OPC_LBU, ry, rx, offset);
         break;
     case M16_OPC_LHU:
-        gen_ldst(ctx, OPC_LHU, ry, rx, offset);
+        gen_ld(env, ctx, OPC_LHU, ry, rx, offset);
         break;
     case M16_OPC_LWPC:
-        gen_ldst(ctx, OPC_LWPC, rx, 0, offset);
+        gen_ld(env, ctx, OPC_LWPC, rx, 0, offset);
         break;
 #if defined(TARGET_MIPS64)
     case M16_OPC_LWU:
-        gen_ldst(ctx, OPC_LWU, ry, rx, offset);
+        gen_ld(env, ctx, OPC_LWU, ry, rx, offset);
         break;
 #endif
     case M16_OPC_SB:
-        gen_ldst(ctx, OPC_SB, ry, rx, offset);
+        gen_st(ctx, OPC_SB, ry, rx, offset);
         break;
     case M16_OPC_SH:
-        gen_ldst(ctx, OPC_SH, ry, rx, offset);
+        gen_st(ctx, OPC_SH, ry, rx, offset);
         break;
     case M16_OPC_SWSP:
-        gen_ldst(ctx, OPC_SW, rx, 29, offset);
+        gen_st(ctx, OPC_SW, rx, 29, offset);
         break;
     case M16_OPC_SW:
-        gen_ldst(ctx, OPC_SW, ry, rx, offset);
+        gen_st(ctx, OPC_SW, ry, rx, offset);
         break;
 #if defined(TARGET_MIPS64)
     case M16_OPC_I64:
@@ -8558,7 +8580,7 @@
 #if defined(TARGET_MIPS64)
     case M16_OPC_LD:
         check_mips_64(ctx);
-        gen_ldst(ctx, OPC_LD, ry, rx, offset << 3);
+        gen_ld(env, ctx, OPC_LD, ry, rx, offset << 3);
         break;
 #endif
     case M16_OPC_RRIA:
@@ -8613,7 +8635,7 @@
                                    ((int8_t)ctx->opcode) << 1);
                 break;
             case I8_SWRASP:
-                gen_ldst(ctx, OPC_SW, 31, 29, (ctx->opcode & 0xff) << 2);
+                gen_st(ctx, OPC_SW, 31, 29, (ctx->opcode & 0xff) << 2);
                 break;
             case I8_ADJSP:
                 gen_arith_imm(env, ctx, OPC_ADDIU, 29, 29,
@@ -8677,47 +8699,47 @@
 #if defined(TARGET_MIPS64)
     case M16_OPC_SD:
         check_mips_64(ctx);
-        gen_ldst(ctx, OPC_SD, ry, rx, offset << 3);
+        gen_st(ctx, OPC_SD, ry, rx, offset << 3);
         break;
 #endif
     case M16_OPC_LB:
-        gen_ldst(ctx, OPC_LB, ry, rx, offset);
+        gen_ld(env, ctx, OPC_LB, ry, rx, offset);
         break;
     case M16_OPC_LH:
-        gen_ldst(ctx, OPC_LH, ry, rx, offset << 1);
+        gen_ld(env, ctx, OPC_LH, ry, rx, offset << 1);
         break;
     case M16_OPC_LWSP:
-        gen_ldst(ctx, OPC_LW, rx, 29, ((uint8_t)ctx->opcode) << 2);
+        gen_ld(env, ctx, OPC_LW, rx, 29, ((uint8_t)ctx->opcode) << 2);
         break;
     case M16_OPC_LW:
-        gen_ldst(ctx, OPC_LW, ry, rx, offset << 2);
+        gen_ld(env, ctx, OPC_LW, ry, rx, offset << 2);
         break;
     case M16_OPC_LBU:
-        gen_ldst(ctx, OPC_LBU, ry, rx, offset);
+        gen_ld(env, ctx, OPC_LBU, ry, rx, offset);
         break;
     case M16_OPC_LHU:
-        gen_ldst(ctx, OPC_LHU, ry, rx, offset << 1);
+        gen_ld(env, ctx, OPC_LHU, ry, rx, offset << 1);
         break;
     case M16_OPC_LWPC:
-        gen_ldst(ctx, OPC_LWPC, rx, 0, ((uint8_t)ctx->opcode) << 2);
+        gen_ld(env, ctx, OPC_LWPC, rx, 0, ((uint8_t)ctx->opcode) << 2);
         break;
 #if defined (TARGET_MIPS64)
     case M16_OPC_LWU:
         check_mips_64(ctx);
-        gen_ldst(ctx, OPC_LWU, ry, rx, offset << 2);
+        gen_ld(env, ctx, OPC_LWU, ry, rx, offset << 2);
         break;
 #endif
     case M16_OPC_SB:
-        gen_ldst(ctx, OPC_SB, ry, rx, offset);
+        gen_st(ctx, OPC_SB, ry, rx, offset);
         break;
     case M16_OPC_SH:
-        gen_ldst(ctx, OPC_SH, ry, rx, offset << 1);
+        gen_st(ctx, OPC_SH, ry, rx, offset << 1);
         break;
     case M16_OPC_SWSP:
-        gen_ldst(ctx, OPC_SW, rx, 29, ((uint8_t)ctx->opcode) << 2);
+        gen_st(ctx, OPC_SW, rx, 29, ((uint8_t)ctx->opcode) << 2);
         break;
     case M16_OPC_SW:
-        gen_ldst(ctx, OPC_SW, ry, rx, offset << 2);
+        gen_st(ctx, OPC_SW, ry, rx, offset << 2);
         break;
     case M16_OPC_RRR:
         {
@@ -9631,7 +9653,7 @@
     }
 
     save_cpu_state(ctx, 0);
-    op_ldst_lw(t1, t0, ctx);
+    op_ld_lw(t1, t0, ctx);
     gen_store_gpr(t1, rd);
 
     tcg_temp_free(t0);
@@ -9657,43 +9679,43 @@
     switch (opc) {
     case LWP:
         save_cpu_state(ctx, 0);
-        op_ldst_lw(t1, t0, ctx);
+        op_ld_lw(t1, t0, ctx);
         gen_store_gpr(t1, rd);
         tcg_gen_movi_tl(t1, 4);
         gen_op_addr_add(ctx, t0, t0, t1);
-        op_ldst_lw(t1, t0, ctx);
+        op_ld_lw(t1, t0, ctx);
         gen_store_gpr(t1, rd+1);
         opn = "lwp";
         break;
     case SWP:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rd);
-        op_ldst_sw(t1, t0, ctx);
+        op_st_sw(t1, t0, ctx);
         tcg_gen_movi_tl(t1, 4);
         gen_op_addr_add(ctx, t0, t0, t1);
         gen_load_gpr(t1, rd+1);
-        op_ldst_sw(t1, t0, ctx);
+        op_st_sw(t1, t0, ctx);
         opn = "swp";
         break;
 #ifdef TARGET_MIPS64
     case LDP:
         save_cpu_state(ctx, 0);
-        op_ldst_ld(t1, t0, ctx);
+        op_ld_ld(t1, t0, ctx);
         gen_store_gpr(t1, rd);
         tcg_gen_movi_tl(t1, 8);
         gen_op_addr_add(ctx, t0, t0, t1);
-        op_ldst_ld(t1, t0, ctx);
+        op_ld_ld(t1, t0, ctx);
         gen_store_gpr(t1, rd+1);
         opn = "ldp";
         break;
     case SDP:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rd);
-        op_ldst_sd(t1, t0, ctx);
+        op_st_sd(t1, t0, ctx);
         tcg_gen_movi_tl(t1, 8);
         gen_op_addr_add(ctx, t0, t0, t1);
         gen_load_gpr(t1, rd+1);
-        op_ldst_sd(t1, t0, ctx);
+        op_st_sd(t1, t0, ctx);
         opn = "sdp";
         break;
 #endif
@@ -10798,40 +10820,44 @@
         switch (minor) {
         case LWL:
             mips32_op = OPC_LWL;
-            goto do_ldst_lr;
+            goto do_ld_lr;
         case SWL:
             mips32_op = OPC_SWL;
-            goto do_ldst_lr;
+            goto do_st_lr;
         case LWR:
             mips32_op = OPC_LWR;
-            goto do_ldst_lr;
+            goto do_ld_lr;
         case SWR:
             mips32_op = OPC_SWR;
-            goto do_ldst_lr;
+            goto do_st_lr;
 #if defined(TARGET_MIPS64)
         case LDL:
             mips32_op = OPC_LDL;
-            goto do_ldst_lr;
+            goto do_ld_lr;
         case SDL:
             mips32_op = OPC_SDL;
-            goto do_ldst_lr;
+            goto do_st_lr;
         case LDR:
             mips32_op = OPC_LDR;
-            goto do_ldst_lr;
+            goto do_ld_lr;
         case SDR:
             mips32_op = OPC_SDR;
-            goto do_ldst_lr;
+            goto do_st_lr;
         case LWU:
             mips32_op = OPC_LWU;
-            goto do_ldst_lr;
+            goto do_ld_lr;
         case LLD:
             mips32_op = OPC_LLD;
-            goto do_ldst_lr;
+            goto do_ld_lr;
 #endif
         case LL:
             mips32_op = OPC_LL;
-        do_ldst_lr:
-            gen_ldst(ctx, mips32_op, rt, rs, SIMM(ctx->opcode, 0, 12));
+            goto do_ld_lr;
+        do_ld_lr:
+            gen_ld(env, ctx, mips32_op, rt, rs, SIMM(ctx->opcode, 0, 12));
+            break;
+        do_st_lr:
+            gen_st(ctx, mips32_op, rt, rs, SIMM(ctx->opcode, 0, 12));
             break;
         case SC:
             gen_st_cond(ctx, OPC_SC, rt, rs, SIMM(ctx->opcode, 0, 12));
@@ -10935,37 +10961,41 @@
         /* Loads and stores */
     case LB32:
         mips32_op = OPC_LB;
-        goto do_ldst;
+        goto do_ld;
     case LBU32:
         mips32_op = OPC_LBU;
-        goto do_ldst;
+        goto do_ld;
     case LH32:
         mips32_op = OPC_LH;
-        goto do_ldst;
+        goto do_ld;
     case LHU32:
         mips32_op = OPC_LHU;
-        goto do_ldst;
+        goto do_ld;
     case LW32:
         mips32_op = OPC_LW;
-        goto do_ldst;
+        goto do_ld;
 #ifdef TARGET_MIPS64
     case LD32:
         mips32_op = OPC_LD;
-        goto do_ldst;
+        goto do_ld;
     case SD32:
         mips32_op = OPC_SD;
-        goto do_ldst;
+        goto do_st;
 #endif
     case SB32:
         mips32_op = OPC_SB;
-        goto do_ldst;
+        goto do_st;
     case SH32:
         mips32_op = OPC_SH;
-        goto do_ldst;
+        goto do_st;
     case SW32:
         mips32_op = OPC_SW;
-    do_ldst:
-        gen_ldst(ctx, mips32_op, rt, rs, imm);
+        goto do_st;
+    do_ld:
+        gen_ld(env, ctx, mips32_op, rt, rs, imm);
+        break;
+    do_st:
+        gen_st(ctx, mips32_op, rt, rs, imm);
         break;
     default:
         generate_exception(ctx, EXCP_RI);
@@ -11115,7 +11145,7 @@
             int rb = 28;            /* GP */
             int16_t offset = SIMM(ctx->opcode, 0, 7) << 2;
 
-            gen_ldst(ctx, OPC_LW, rd, rb, offset);
+            gen_ld(env, ctx, OPC_LW, rd, rb, offset);
         }
         break;
     case POOL16F:
@@ -11147,7 +11177,7 @@
             int16_t offset = ZIMM(ctx->opcode, 0, 4);
             offset = (offset == 0xf ? -1 : offset);
 
-            gen_ldst(ctx, OPC_LBU, rd, rb, offset);
+            gen_ld(env, ctx, OPC_LBU, rd, rb, offset);
         }
         break;
     case LHU16:
@@ -11156,7 +11186,7 @@
             int rb = mmreg(uMIPS_RS(ctx->opcode));
             int16_t offset = ZIMM(ctx->opcode, 0, 4) << 1;
 
-            gen_ldst(ctx, OPC_LHU, rd, rb, offset);
+            gen_ld(env, ctx, OPC_LHU, rd, rb, offset);
         }
         break;
     case LWSP16:
@@ -11165,7 +11195,7 @@
             int rb = 29;            /* SP */
             int16_t offset = ZIMM(ctx->opcode, 0, 5) << 2;
 
-            gen_ldst(ctx, OPC_LW, rd, rb, offset);
+            gen_ld(env, ctx, OPC_LW, rd, rb, offset);
         }
         break;
     case LW16:
@@ -11174,7 +11204,7 @@
             int rb = mmreg(uMIPS_RS(ctx->opcode));
             int16_t offset = ZIMM(ctx->opcode, 0, 4) << 2;
 
-            gen_ldst(ctx, OPC_LW, rd, rb, offset);
+            gen_ld(env, ctx, OPC_LW, rd, rb, offset);
         }
         break;
     case SB16:
@@ -11183,7 +11213,7 @@
             int rb = mmreg(uMIPS_RS(ctx->opcode));
             int16_t offset = ZIMM(ctx->opcode, 0, 4);
 
-            gen_ldst(ctx, OPC_SB, rd, rb, offset);
+            gen_st(ctx, OPC_SB, rd, rb, offset);
         }
         break;
     case SH16:
@@ -11192,7 +11222,7 @@
             int rb = mmreg(uMIPS_RS(ctx->opcode));
             int16_t offset = ZIMM(ctx->opcode, 0, 4) << 1;
 
-            gen_ldst(ctx, OPC_SH, rd, rb, offset);
+            gen_st(ctx, OPC_SH, rd, rb, offset);
         }
         break;
     case SWSP16:
@@ -11201,7 +11231,7 @@
             int rb = 29;            /* SP */
             int16_t offset = ZIMM(ctx->opcode, 0, 5) << 2;
 
-            gen_ldst(ctx, OPC_SW, rd, rb, offset);
+            gen_st(ctx, OPC_SW, rd, rb, offset);
         }
         break;
     case SW16:
@@ -11210,7 +11240,7 @@
             int rb = mmreg(uMIPS_RS(ctx->opcode));
             int16_t offset = ZIMM(ctx->opcode, 0, 4) << 2;
 
-            gen_ldst(ctx, OPC_SW, rd, rb, offset);
+            gen_st(ctx, OPC_SW, rd, rb, offset);
         }
         break;
     case MOVE16:
@@ -11775,10 +11805,12 @@
          *is_branch = 1;
          break;
     case OPC_LB ... OPC_LWR: /* Load and stores */
+    case OPC_LL:
+         gen_ld(env, ctx, op, rt, rs, imm);
+         break;
     case OPC_SB ... OPC_SW:
     case OPC_SWR:
-    case OPC_LL:
-         gen_ldst(ctx, op, rt, rs, imm);
+         gen_st(ctx, op, rt, rs, imm);
          break;
     case OPC_SC:
          gen_st_cond(ctx, op, rt, rs, imm);
@@ -11904,13 +11936,17 @@
     /* MIPS64 opcodes */
     case OPC_LWU:
     case OPC_LDL ... OPC_LDR:
-    case OPC_SDL ... OPC_SDR:
     case OPC_LLD:
     case OPC_LD:
+        check_insn(env, ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_ld(env, ctx, op, rt, rs, imm);
+        break;
+    case OPC_SDL ... OPC_SDR:
     case OPC_SD:
         check_insn(env, ctx, ISA_MIPS3);
         check_mips_64(ctx);
-        gen_ldst(ctx, op, rt, rs, imm);
+        gen_st(ctx, op, rt, rs, imm);
         break;
     case OPC_SCD:
         check_insn(env, ctx, ISA_MIPS3);
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 2ad4486..9c8d774 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -849,7 +849,6 @@
 #endif
 
 #include "cpu-all.h"
-#include "exec-all.h"
 
 /*****************************************************************************/
 /* CRF definitions */
@@ -1601,11 +1600,6 @@
 
 /*****************************************************************************/
 
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
-{
-    env->nip = tb->pc;
-}
-
 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
diff --git a/target-ppc/exec.h b/target-ppc/exec.h
index 09f592c..44cc5e9 100644
--- a/target-ppc/exec.h
+++ b/target-ppc/exec.h
@@ -52,4 +52,9 @@
     return EXCP_HALTED;
 }
 
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->nip = tb->pc;
+}
+
 #endif /* !defined (__PPC_H__) */
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index dd407b2..8d73fad 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -116,18 +116,12 @@
 #define cpu_gen_code cpu_s390x_gen_code
 
 #include "cpu-all.h"
-#include "exec-all.h"
 
 #define EXCP_OPEX 1 /* operation exception (sigill) */
 #define EXCP_SVC 2 /* supervisor call (syscall) */
 #define EXCP_ADDR 5 /* addressing exception */
 #define EXCP_EXECUTE_SVC 0xff00000 /* supervisor call via execute insn */
 
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock* tb)
-{
-    env->psw.addr = tb->pc;
-}
-
 static inline void cpu_get_tb_cpu_state(CPUState* env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
diff --git a/target-s390x/exec.h b/target-s390x/exec.h
index 837f853..bf3f264 100644
--- a/target-s390x/exec.h
+++ b/target-s390x/exec.h
@@ -45,3 +45,9 @@
     }
     return EXCP_HALTED;
 }
+
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock* tb)
+{
+    env->psw.addr = tb->pc;
+}
+
diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h
index f8b1680..64a609b 100644
--- a/target-sh4/cpu.h
+++ b/target-sh4/cpu.h
@@ -211,7 +211,6 @@
 #endif
 
 #include "cpu-all.h"
-#include "exec-all.h"
 
 /* Memory access type */
 enum {
@@ -303,12 +302,6 @@
 #define PTEA_TC        (1 << 3)
 #define cpu_ptea_tc(ptea) (((ptea) & PTEA_TC) >> 3)
 
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
-{
-    env->pc = tb->pc;
-    env->flags = tb->flags;
-}
-
 #define TB_FLAG_PENDING_MOVCA  (1 << 4)
 
 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
diff --git a/target-sh4/exec.h b/target-sh4/exec.h
index edd667d..2999c02 100644
--- a/target-sh4/exec.h
+++ b/target-sh4/exec.h
@@ -47,4 +47,10 @@
 #include "softmmu_exec.h"
 #endif
 
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->pc = tb->pc;
+    env->flags = tb->flags;
+}
+
 #endif				/* _EXEC_SH4_H */
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 8f0484b..7e0d17c 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -615,7 +615,6 @@
 #endif
 
 #include "cpu-all.h"
-#include "exec-all.h"
 
 #ifdef TARGET_SPARC64
 /* sun4u.c */
@@ -625,12 +624,6 @@
 trap_state* cpu_tsptr(CPUState* env);
 #endif
 
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
-{
-    env->pc = tb->pc;
-    env->npc = tb->cs_base;
-}
-
 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
diff --git a/target-sparc/exec.h b/target-sparc/exec.h
index c84e055..f811571 100644
--- a/target-sparc/exec.h
+++ b/target-sparc/exec.h
@@ -32,4 +32,10 @@
     return EXCP_HALTED;
 }
 
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->pc = tb->pc;
+    env->npc = tb->cs_base;
+}
+
 #endif