Merge "No 64 bit emu on windows." into idea133
automerge: c0ade02

* commit 'c0ade027b6f8ef42722224b9e2f6b73ef3d0aacd':
  No 64 bit emu on windows.
diff --git a/Makefile b/Makefile
index 0b6e600..d225409 100644
--- a/Makefile
+++ b/Makefile
@@ -42,7 +42,6 @@
 
 VPATH := $(OBJS_DIR)
 VPATH += :$(SRC_PATH)/android/config
-VPATH += :$(SRC_PATH):$(SRC_PATH)/target-$(TARGET_ARCH)
 
 .PHONY: all libraries executables clean clean-config clean-objs-dir \
         clean-executables clean-libraries
diff --git a/Makefile.android b/Makefile.android
index b5a6008..d25f357 100644
--- a/Makefile.android
+++ b/Makefile.android
@@ -47,7 +47,7 @@
   MY_CFLAGS += -D_WIN32
   # LARGEADDRESSAWARE gives more address space to 32-bit process
   MY_LDLIBS += -Xlinker --large-address-aware
-  ifneq ($(BUILD_HOST_64bit),)
+  ifneq ($(HOST_IS_64_BIT),)
     # Microsoft 64-bit compiler define both _WIN32 and _WIN64
     MY_CFLAGS += -D_WIN64
     # amd64-mingw32msvc- toolchain still name it vfw32.  May change it once amd64-mingw32msvc-
@@ -106,7 +106,7 @@
 # adequate values for HOST_CC
 #
 ifneq ($(BUILD_STANDALONE_EMULATOR),true)
-  # On Linux, use our custom 32-bit host toolchain (unless BUILD_HOST_64bit=1)
+  # On Linux, use our custom 32-bit host toolchain (unless HOST_IS_64_BIT=true)
   # which contains the relevant headers and 32-bit libraries for audio (The host 64-bit
   # Lucid doesn't provide these anymore, only their 64-bit versions).
   ifeq ($(HOST_OS),linux)
@@ -136,7 +136,7 @@
 
 
 ifneq ($(combo_target)$(TARGET_SIMULATOR),HOST_true)
-  ifneq ($(BUILD_HOST_64bit),)
+  ifneq ($(HOST_IS_64_BIT),)
     MY_CFLAGS += -m64
     MY_LDFLAGS += -m64
   else
@@ -151,6 +151,13 @@
   endif
 endif
 
+ifeq ($(HOST_OS)-$(BUILD_STANDALONE_EMULATOR),windows-true)
+  # Ensure that printf() et al use GNU printf format specifiers as required
+  # by QEMU. This is important when using the newer Mingw64 cross-toolchain.
+  # See http://sourceforge.net/apps/trac/mingw-w64/wiki/gnu%20printf
+  MY_CFLAGS += -D__USE_MINGW_ANSI_STDIO=1
+endif
+
 # Enable warning, except those related to missing field initializers
 # (the QEMU coding style loves using these).
 #
@@ -218,7 +225,7 @@
 #
 QEMU_SYSTEM_LDLIBS := -lm
 ifeq ($(HOST_OS),windows)
-  QEMU_SYSTEM_LDLIBS += -mno-cygwin -mwindows -mconsole
+  QEMU_SYSTEM_LDLIBS += -mwindows -mconsole
 endif
 
 ifeq ($(HOST_OS),freebsd)
@@ -294,6 +301,7 @@
 EMULATOR_TARGET_ARCH := arm
 include $(LOCAL_PATH)/Makefile.target
 
+# Note: the same binary handles x86 and x86_64
 EMULATOR_TARGET_ARCH := x86
 include $(LOCAL_PATH)/Makefile.target
 
@@ -331,7 +339,9 @@
     $(error Cannot find GPU emulation sources directory: $(EMULATOR_EMUGL_SOURCES_DIR))
   endif
 
+  # TODO(digit): Remove the line below.
   BUILD_EMULATOR_OPENGL := true
+  BUILD_EMULATOR_HOST_OPENGL := true
   include $(EMULATOR_EMUGL_SOURCES_DIR)/Android.mk
 endif
 
diff --git a/Makefile.common b/Makefile.common
index ff3b5eb..786a7f1 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -127,6 +127,7 @@
 	android/base/StringView.cpp \
 	android/emulation/CpuAccelerator.cpp \
 	android/filesystems/ext4_utils.cpp \
+	android/kernel/kernel_utils.cpp \
 	android/utils/assert.c \
 	android/utils/bufprint.c \
 	android/utils/debug.c \
@@ -203,9 +204,9 @@
 BUILD_SDL_FROM_SOURCES := true
 
 # On linux-x86, using the prebuilts avoid installing all the X11
-# development packages on our build servers.
-#
-ifeq ($(QEMU_HOST_TAG),linux-x86)
+# development packages on our build servers. Note: When building 64-bit
+# host binaries, don't use 32-bit SDL prebuilts.
+ifeq ($(strip $(QEMU_HOST_TAG)$(HOST_IS_64_BIT)),linux-x86)
     BUILD_SDL_FROM_SOURCES := false
 endif
 
@@ -412,8 +413,6 @@
 CORE_MISC_SOURCES = \
     aio-android.c \
     async.c \
-    bt-host.c \
-    bt-vhci.c \
     iohandler.c \
     ioport.c \
     migration-dummy-android.c \
@@ -488,12 +487,9 @@
 endif
 
 ifeq ($(HOST_OS),linux)
-    CORE_MISC_SOURCES += hw/usb/usb-linux.c \
-                         util/compatfd.c \
+    CORE_MISC_SOURCES += util/compatfd.c \
                          util/qemu-thread-posix.c \
                          android/camera/camera-capture-linux.c
-else
-    CORE_MISC_SOURCES += hw/usb/usb-dummy-android.c
 endif
 
 ifeq ($(HOST_OS),windows)
@@ -640,7 +636,6 @@
 BLOCK_SOURCES += \
     block.c \
     blockdev.c \
-    block/qcow.c \
     block/qcow2.c \
     block/qcow2-refcount.c \
     block/qcow2-snapshot.c \
@@ -707,7 +702,7 @@
 common_LOCAL_CFLAGS =
 common_LOCAL_SRC_FILES =
 
-ELFF_CFLAGS := -I$(LOCAL_PATH)/elff
+ELFF_CFLAGS := -I$(LOCAL_PATH)/distrib/elff
 ELFF_LDLIBS := -lstdc++
 
 ELFF_SOURCES := \
@@ -719,7 +714,7 @@
     elf_mapped_section.cc \
     elff_api.cc \
 
-common_LOCAL_SRC_FILES += $(ELFF_SOURCES:%=elff/%)
+common_LOCAL_SRC_FILES += $(ELFF_SOURCES:%=distrib/elff/elff/%)
 
 common_LOCAL_CFLAGS += \
     -fno-exceptions \
@@ -778,4 +773,3 @@
 LOCAL_GENERATED_SOURCES += $$(QEMU_HEADER_H)
 LOCAL_C_INCLUDES += $$(intermediates)
 endef
-
diff --git a/Makefile.target b/Makefile.target
index 7c13cbb..cce444d 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -52,19 +52,17 @@
 common_LOCAL_CFLAGS += $(EMULATOR_TARGET_CFLAGS)
 
 common_LOCAL_SRC_FILES += \
+    tcg/optimize.c \
     tcg/tcg.c \
+    tcg-runtime.c \
+    util/bitops.c \
+    util/host-utils.c \
 
 ##############################################################################
 # Emulated hardware devices.
 #
 
 HW_SOURCES := \
-    bt/core.c \
-    bt/hci.c \
-    bt/hid.c \
-    bt/l2cap.c \
-    bt/sdp.c \
-    block/cdrom.c \
     core/irq.c \
     core/qdev.c \
     core/sysbus.c \
@@ -75,18 +73,11 @@
     android/goldfish/fb.c \
     android/goldfish/battery.c \
     android/goldfish/mmc.c   \
-    android/goldfish/memlog.c \
     android/goldfish/nand.c \
     android/goldfish/pipe.c \
     android/goldfish/tty.c \
     android/goldfish/vmem.c \
     pci/pci.c \
-    scsi/scsi-disk.c \
-    usb/dev-hid.c \
-    usb/dev-hub.c \
-    usb/dev-storage.c \
-    usb/hcd-ohci.c \
-    usb/core.c \
     watchdog/watchdog.c
 
 
@@ -96,7 +87,6 @@
     arm/pic.c \
     arm/boot.c \
     android/goldfish/interrupt.c \
-    android/goldfish/switch.c \
     android/goldfish/timer.c \
     android/goldfish/trace.c \
 
@@ -142,7 +132,6 @@
     android/android_mips.c \
     mips/mips_pic.c \
     android/goldfish/interrupt.c \
-    android/goldfish/switch.c \
     android/goldfish/timer.c \
     android/goldfish/trace.c \
     mips/cputimer.c \
@@ -170,10 +159,9 @@
     cputlb.c \
     exec.c \
     main-loop.c \
+    memory-android.c \
     monitor-android.c \
     translate-all.c \
-    android/varint.c \
-    softmmu_outside_jit.c
 
 ##############################################################################
 # CPU-specific emulation.
@@ -200,7 +188,15 @@
 
 ifeq ($(EMULATOR_TARGET_ARCH), x86)
 common_LOCAL_SRC_FILES += \
-    target-i386/op_helper.c \
+    target-i386/cc_helper.c \
+    target-i386/excp_helper.c \
+    target-i386/fpu_helper.c \
+    target-i386/int_helper.c \
+    target-i386/mem_helper.c \
+    target-i386/misc_helper.c \
+    target-i386/seg_helper.c \
+    target-i386/smm_helper.c \
+    target-i386/svm_helper.c \
     target-i386/helper.c \
     target-i386/translate.c \
     target-i386/machine.c \
@@ -247,9 +243,7 @@
 # memory is within allocated block. This information also allows detecting
 # memory leaks and attempts to free/realloc invalid pointers.
 #
-common_LOCAL_CFLAGS += \
-    -I$(LOCAL_PATH)/memcheck \
-    -I$(LOCAL_PATH)/elff
+common_LOCAL_CFLAGS += $(ELFF_CFLAGS)
 
 MCHK_SOURCES := \
     memcheck.c \
@@ -258,7 +252,7 @@
     memcheck_mmrange_map.c \
     memcheck_util.c
 
-common_LOCAL_SRC_FILES += $(MCHK_SOURCES:%=memcheck/%)
+common_LOCAL_SRC_FILES += $(MCHK_SOURCES:%=android/qemu/memcheck/%)
 
 common_LOCAL_SRC_FILES += \
     cpus.c \
diff --git a/Makefile.tests b/Makefile.tests
index 6de9bb0..611fbd9 100644
--- a/Makefile.tests
+++ b/Makefile.tests
@@ -2,6 +2,7 @@
 include $(LOCAL_PATH)/distrib/googletest/Android.mk
 
 EMULATOR_UNITTESTS_SOURCES := \
+  android/avd/util_unittest.cpp \
   android/utils/bufprint_unittest.cpp \
   android/utils/eintr_wrapper_unittest.cpp \
   android/utils/file_data_unittest.cpp \
@@ -21,6 +22,7 @@
   android/base/StringView_unittest.cpp \
   android/emulation/CpuAccelerator_unittest.cpp \
   android/filesystems/ext4_utils_unittest.cpp \
+  android/kernel/kernel_utils_unittest.cpp \
 
 ifeq (windows,$(HOST_OS))
 EMULATOR_UNITTESTS_SOURCES += \
diff --git a/android-configure.sh b/android-configure.sh
index fb8ac47..42fe51d 100755
--- a/android-configure.sh
+++ b/android-configure.sh
@@ -112,11 +112,19 @@
 fi
 
 # On Linux, try to use our prebuilt toolchain to generate binaries
-# that are compatible with Ubuntu 8.04
+# that are compatible with Ubuntu 10.4
 if [ -z "$CC" -a -z "$OPTION_CC" -a "$HOST_OS" = linux ] ; then
-    PROBE_HOST_CC=`dirname $0`/../../prebuilts/gcc/linux-x86/host/x86_64-linux-glibc2.11-4.6/bin/x86_64-linux-gcc
-    if [ ! -f "$PROBE_HOST_CC" ] ; then
-        PROBE_HOST_CC=`dirname $0`/../../prebuilts/tools/gcc-sdk/gcc
+    PREBUILTS_HOST_GCC=$(dirname $0)/../../prebuilts/gcc/linux-x86/host
+    # NOTE: GCC 4.8 is currently disabled because this breaks MIPS emulation
+    # For some odd reason. Remove the 'DISABLED_' prefix below to re-enable it,
+    # e.g. once the MIPS backend has been updated to a more recent version.
+    # This only affects Linux emulator binaries.
+    PROBE_HOST_CC=$PREBUILTS_HOST_GCC/DISABLED_x86_64-linux-glibc2.11-4.8/bin/x86_64-linux-gcc
+    if [ ! -f "$PROBE_HOST_CC" ]; then
+        PROBE_HOST_CC=$PREBUILTS_HOST_GCC/x86_64-linux-glibc2.11-4.6/bin/x86_64-linux-gcc
+        if [ ! -f "$PROBE_HOST_CC" ] ; then
+            PROBE_HOST_CC=$(dirname $0)/../../prebuilts/tools/gcc-sdk/gcc
+        fi
     fi
     if [ -f "$PROBE_HOST_CC" ] ; then
         echo "Using prebuilt toolchain: $PROBE_HOST_CC"
@@ -493,23 +501,22 @@
 feature_run_exec HOST_BIGENDIAN
 fi
 
-# check size of host long bits
-HOST_LONGBITS=32
-if [ "$TARGET_OS" = "$OS" ] ; then
-cat > $TMPC << EOF
-int main(void) {
-        return sizeof(void*)*8;
-}
-EOF
-feature_run_exec HOST_LONGBITS
-fi
-
 # check whether we have <byteswap.h>
 #
 feature_check_header HAVE_BYTESWAP_H      "<byteswap.h>"
 feature_check_header HAVE_MACHINE_BSWAP_H "<machine/bswap.h>"
 feature_check_header HAVE_FNMATCH_H       "<fnmatch.h>"
 
+# check for Mingw version.
+MINGW_VERSION=
+if [ "$TARGET_OS" = "windows" ]; then
+log "Mingw      : Probing for GCC version."
+GCC_VERSION=$($CC -v 2>&1 | awk '$1 == "gcc" && $2 == "version" { print $3; }')
+GCC_MAJOR=$(echo "$GCC_VERSION" | cut -f1 -d.)
+GCC_MINOR=$(echo "$GCC_VERSION" | cut -f2 -d.)
+log "Mingw      : Found GCC version $GCC_MAJOR.$GCC_MINOR [$GCC_VERSION]"
+MINGW_GCC_VERSION=$(( $GCC_MAJOR * 100 + $GCC_MINOR ))
+fi
 # Build the config.make file
 #
 
@@ -606,6 +613,7 @@
     echo "" >> $config_mk
     echo "USE_MINGW := 1" >> $config_mk
     echo "HOST_OS   := windows" >> $config_mk
+    echo "HOST_MINGW_VERSION := $MINGW_GCC_VERSION" >> $config_mk
 fi
 
 if [ "$HOST_OS" = "darwin" ]; then
@@ -621,16 +629,6 @@
 
 #define CONFIG_QEMU_SHAREDIR   "/usr/local/share/qemu"
 
-#if defined(__x86_64__)
-#define HOST_X86_64    1
-#define HOST_LONG_BITS  64
-#elif defined(__i386__)
-#define HOST_I386    1
-#define HOST_LONG_BITS  32
-#else
-#error Unknown architecture for codegen
-#endif
-
 EOF
 
 if [ "$HAVE_BYTESWAP_H" = "yes" ] ; then
diff --git a/android/avd/hardware-properties.ini b/android/avd/hardware-properties.ini
index 7629133..327b75d 100644
--- a/android/avd/hardware-properties.ini
+++ b/android/avd/hardware-properties.ini
@@ -301,6 +301,13 @@
 default     =
 abstract    = kernel boot parameters string.
 
+name        = kernel.newDeviceNaming
+type        = string
+enum        = autodetect, yes, no
+default     = autodetect
+abstract    = Does the kernel require a new device naming scheme?
+abstract    = Used to specify whether the kernel requires a new device naming scheme. Typically for Linux 3.10 and above.
+
 # Path to the ramdisk image.
 name        = disk.ramdisk.path
 type        = string
diff --git a/android/avd/hw-config.c b/android/avd/hw-config.c
index 2d50ef7..34f2ed4 100644
--- a/android/avd/hw-config.c
+++ b/android/avd/hw-config.c
@@ -143,3 +143,22 @@
 {
     return strcmp(config->hw_screen, "multi-touch") == 0;
 }
+
+int
+androidHwConfig_getKernelDeviceNaming( AndroidHwConfig* config )
+{
+    if (!strcmp(config->kernel_newDeviceNaming, "no"))
+        return 0;
+    if (!strcmp(config->kernel_newDeviceNaming, "yes"))
+        return 1;
+    return -1;
+}
+
+const char* androidHwConfig_getKernelSerialPrefix(AndroidHwConfig* config )
+{
+    if (androidHwConfig_getKernelDeviceNaming(config) >= 1) {
+        return "ttyGF";
+    } else {
+        return "ttyS";
+    }
+}
diff --git a/android/avd/hw-config.h b/android/avd/hw-config.h
index fd99e45..81444fb 100644
--- a/android/avd/hw-config.h
+++ b/android/avd/hw-config.h
@@ -64,4 +64,19 @@
 /* Checks if screen supports multi-touch. */
 int  androidHwConfig_isScreenMultiTouch( AndroidHwConfig* config );
 
+// Return an integer indicating if the kernel requires a new device
+// naming scheme. More specifically:
+//  -1 -> don't know, caller will need to auto-detect.
+//   0 -> legacy device naming
+//   1 -> new device naming.
+//
+// The new device naming was allegedly introduced in Linux 3.10 and
+// replaces /dev/ttyS<num with /dev/ttyGF<num>. Also see related
+// declarations in android/kernel/kernel_utils.h
+int androidHwConfig_getKernelDeviceNaming( AndroidHwConfig* config );
+
+// Return the kernel device prefix for serial ports, depending on
+// kernel.newDeviceNaming.
+const char* androidHwConfig_getKernelSerialPrefix( AndroidHwConfig* config );
+
 #endif /* _ANDROID_AVD_HW_CONFIG_H */
diff --git a/android/avd/info.c b/android/avd/info.c
index d723265..d67b46d 100644
--- a/android/avd/info.c
+++ b/android/avd/info.c
@@ -792,6 +792,8 @@
     _avdInfo_getPropertyFile(i, "build.prop", i->buildProperties);
     _avdInfo_getPropertyFile(i, "boot.prop", i->bootProperties);
 
+    _avdInfo_extractBuildProperties(i);
+
     /* don't need this anymore */
     iniFile_free(i->rootIni);
     i->rootIni = NULL;
@@ -977,10 +979,6 @@
             suffix = "-armv7";
         }
 
-        if (!strcmp(i->targetAbi, "x86_64")) {
-            suffix = "-x86_64";
-        }
-
         p = bufprint(temp, end, "%s/kernel", i->androidOut);
         if (p < end && path_exists(temp)) {
             kernelPath = ASTRDUP(temp);
@@ -1126,6 +1124,11 @@
 }
 
 char*
+avdInfo_getTargetCpuArch(AvdInfo* i) {
+    return ASTRDUP(i->targetArch);
+}
+
+char*
 avdInfo_getTargetAbi( AvdInfo* i )
 {
     /* For now, we can't get the ABI from SDK AVDs */
diff --git a/android/avd/util.c b/android/avd/util.c
index 24d5f85..f7ebc35 100644
--- a/android/avd/util.c
+++ b/android/avd/util.c
@@ -313,3 +313,28 @@
 
     return avdArch;
 }
+
+const char*
+emulator_getBackendSuffix(const char* targetArch)
+{
+    if (!targetArch)
+        return NULL;
+
+    static const struct {
+        const char* avd_arch;
+        const char* emulator_suffix;
+    } kPairs[] = {
+        { "arm", "arm" },
+        { "x86", "x86" },
+        { "x86_64", "x86" },
+        { "mips", "mips" },
+        // Add more if needed here.
+    };
+    size_t n;
+    for (n = 0; n < sizeof(kPairs)/sizeof(kPairs[0]); ++n) {
+        if (!strcmp(targetArch, kPairs[n].avd_arch)) {
+            return kPairs[n].emulator_suffix;
+        }
+    }
+    return NULL;
+}
diff --git a/android/avd/util.h b/android/avd/util.h
index 147b10a..630d237 100644
--- a/android/avd/util.h
+++ b/android/avd/util.h
@@ -92,6 +92,13 @@
  */
 char* path_getBuildTargetArch( const char* androidOut );
 
+/* Given an AVD's target architecture, return the suffix of the
+ * corresponding emulator backend program, e.g. 'x86' for x86 and x86_64
+ * CPUs, 'arm' for ARM ones (including ARM64 when support is complete),
+ * etc. Returned string must not be freed by the caller.
+ */
+const char* emulator_getBackendSuffix(const char* targetArch);
+
 ANDROID_END_HEADER
 
 #endif /* _ANDROID_AVD_UTIL_H */
diff --git a/android/avd/util_unittest.cpp b/android/avd/util_unittest.cpp
new file mode 100644
index 0000000..c0aa213
--- /dev/null
+++ b/android/avd/util_unittest.cpp
@@ -0,0 +1,29 @@
+// Copyright 2014 The Android Open Source Project
+//
+// This software is licensed under the terms of the GNU General Public
+// License version 2, as published by the Free Software Foundation, and
+// may be copied, distributed, and modified under those terms.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+#include "android/avd/util.h"
+
+#include <gtest/gtest.h>
+
+TEST(AvdUtil, emulator_getBackendSuffix) {
+  EXPECT_STREQ("arm", emulator_getBackendSuffix("arm"));
+  EXPECT_STREQ("x86", emulator_getBackendSuffix("x86"));
+  EXPECT_STREQ("x86", emulator_getBackendSuffix("x86_64"));
+  EXPECT_STREQ("mips", emulator_getBackendSuffix("mips"));
+
+  // TODO(digit): Add support for these CPU architectures to the emulator
+  // to change these to EXPECT_STREQ() calls.
+  EXPECT_FALSE(emulator_getBackendSuffix("arm64"));
+  EXPECT_FALSE(emulator_getBackendSuffix("mips64"));
+
+  EXPECT_FALSE(emulator_getBackendSuffix(NULL));
+  EXPECT_FALSE(emulator_getBackendSuffix("dummy"));
+}
diff --git a/android/build/common.sh b/android/build/common.sh
index 6a62380..965a462 100644
--- a/android/build/common.sh
+++ b/android/build/common.sh
@@ -204,19 +204,29 @@
         exit 1
     fi
     # Do we have the binaries installed
+    log "Mingw64    : Checking for mingw64 installation"
+    MINGW64_PREFIX=x86_64-w64-mingw32
+    find_program MINGW64_CC $MINGW64_PREFIX-gcc
+    if [ -n "$MINGW64_CC" ]; then
+        MINGW_CC=$MINGW64_CC
+        MINGW_PREFIX=$MINGW64_PREFIX
+    else
     log "Mingw      : Checking for mingw32 installation"
     MINGW32_PREFIX=i586-mingw32msvc
     find_program MINGW32_CC $MINGW32_PREFIX-gcc
     if [ -z "$MINGW32_CC" ] ; then
-        echo "ERROR: It looks like $MINGW32_PREFIX-gcc is not in your path"
-        echo "Please install the mingw32 package !"
+            echo "ERROR: It looks like neither $MINGW64_PREFIX-cc nor $MINGW32_PREFIX-gcc"
+            echo "are in your path. Please install the mingw32 package !"
         exit 1
+        fi
+        MINGW_CC=$MINGW32_CC
+        MINGW_PREFIX=$MINGW32_PREFIX
+        FORCE_32BIT=no
     fi
     log2 "Mingw      : Found $MINGW32_CC"
-    CC=$MINGW32_CC
-    LD=$MINGW32_CC
-    AR=$MINGW32_PREFIX-ar
-    FORCE_32BIT=no
+    CC=$MINGW_CC
+    LD=$MINGW_CC
+    AR=$MINGW_PREFIX-ar
 }
 
 # Cygwin is normally not supported, unless you call this function
diff --git a/android/cmdline-options.h b/android/cmdline-options.h
index f268f9f..4e02923 100644
--- a/android/cmdline-options.h
+++ b/android/cmdline-options.h
@@ -150,9 +150,9 @@
 OPT_PARAM( nand_limits, "<nlimits>", "enforce NAND/Flash read/write thresholds" )
 #endif
 
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
 OPT_PARAM( memcheck, "<flags>", "enable memory access checking" )
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
 
 #ifdef CONFIG_STANDALONE_UI
 OPT_PARAM( list_cores, "<host>", "list running core process" )
diff --git a/android/config/config.h b/android/config/config.h
index 8d34bb1..30b941d 100644
--- a/android/config/config.h
+++ b/android/config/config.h
@@ -6,7 +6,6 @@
  */
 #include "config-host.h"
 
-#define TARGET_PHYS_ADDR_BITS  32
 #define CONFIG_NAND 1
 #define CONFIG_SHAPER 1
 #define CONFIG_SOFTMMU 1
@@ -14,4 +13,4 @@
 #ifndef _WIN32
 #define CONFIG_NAND_LIMITS 1
 #endif
-#define CONFIG_MEMCHECK 1
+#define CONFIG_ANDROID_MEMCHECK 1
diff --git a/android/config/darwin-x86/config-host.h b/android/config/darwin-x86/config-host.h
index d84727e..1262cf0 100644
--- a/android/config/darwin-x86/config-host.h
+++ b/android/config/darwin-x86/config-host.h
@@ -1,14 +1,5 @@
 /* This file was autogenerated by 'android-configure.sh' */
 #define CONFIG_QEMU_SHAREDIR   "/usr/local/share/qemu"
-#if defined(__x86_64__)
-#define HOST_X86_64    1
-#define HOST_LONG_BITS  64
-#elif defined(__i386__)
-#define HOST_I386    1
-#define HOST_LONG_BITS  32
-#else
-#error Unknown architecture for codegen
-#endif
 
 #define CONFIG_FNMATCH  1
 #define CONFIG_GDBSTUB  1
diff --git a/android/config/darwin-x86_64/config-host.h b/android/config/darwin-x86_64/config-host.h
index 7e2dfa8..9135b70 100644
--- a/android/config/darwin-x86_64/config-host.h
+++ b/android/config/darwin-x86_64/config-host.h
@@ -2,16 +2,6 @@
 
 #define CONFIG_QEMU_SHAREDIR   "/usr/local/share/qemu"
 
-#if defined(__x86_64__)
-#define HOST_X86_64    1
-#define HOST_LONG_BITS  64
-#elif defined(__i386__)
-#define HOST_I386    1
-#define HOST_LONG_BITS  32
-#else
-#error Unknown architecture for codegen
-#endif
-
 #define CONFIG_FNMATCH  1
 #define CONFIG_GDBSTUB  1
 #define CONFIG_SLIRP    1
diff --git a/android/config/freebsd-x86/config-host.h b/android/config/freebsd-x86/config-host.h
index fd31de4..e74ff53 100644
--- a/android/config/freebsd-x86/config-host.h
+++ b/android/config/freebsd-x86/config-host.h
@@ -1,14 +1,5 @@
 /* Automatically generated by configure - do not modify */
 #define CONFIG_QEMU_SHAREDIR "/usr/local/share/qemu"
-#if defined(__x86_64__)
-#define HOST_X86_64    1
-#define HOST_LONG_BITS  64
-#elif defined(__i386__)
-#define HOST_I386    1
-#define HOST_LONG_BITS  32
-#else
-#error Unknown architecture for codegen
-#endif
 
 #define CONFIG_MACHINE_BSWAP_H 1
 #define CONFIG_FNMATCH 1
diff --git a/android/config/linux-ppc/config-host.h b/android/config/linux-ppc/config-host.h
index f87ef47..ea0123b 100644
--- a/android/config/linux-ppc/config-host.h
+++ b/android/config/linux-ppc/config-host.h
@@ -1,6 +1,5 @@
 /* This file was autogenerated by 'android-configure.sh' */
 #define CONFIG_QEMU_SHAREDIR   "/usr/local/share/qemu"
-#define HOST_LONG_BITS  32
 #define CONFIG_BYTESWAP_H 1
 #define CONFIG_FNMATCH  1
 #define CONFIG_GDBSTUB  1
diff --git a/android/config/linux-x86/config-host.h b/android/config/linux-x86/config-host.h
index 3936a72..d7ac22d 100644
--- a/android/config/linux-x86/config-host.h
+++ b/android/config/linux-x86/config-host.h
@@ -1,14 +1,5 @@
 /* This file was autogenerated by 'android-configure.sh' */
 #define CONFIG_QEMU_SHAREDIR   "/usr/local/share/qemu"
-#if defined(__x86_64__)
-#define HOST_X86_64    1
-#define HOST_LONG_BITS  64
-#elif defined(__i386__)
-#define HOST_I386    1
-#define HOST_LONG_BITS  32
-#else
-#error Unknown architecture for codegen
-#endif
 
 #define CONFIG_BYTESWAP_H 1
 #define CONFIG_FNMATCH  1
diff --git a/android/config/target-x86/config.h b/android/config/target-x86/config.h
index f9a3b81..155dabb 100644
--- a/android/config/target-x86/config.h
+++ b/android/config/target-x86/config.h
@@ -1,5 +1,6 @@
 /* x86-specific configuration */
 #include "android/config/config.h"
+#define TARGET_X86_64 1
 #define TARGET_I386 1
 /* For now, KVM is only supported on Linux hosts */
 #ifdef CONFIG_LINUX
diff --git a/android/config/windows/config-host.h b/android/config/windows/config-host.h
index 8174f94..45b1363 100644
--- a/android/config/windows/config-host.h
+++ b/android/config/windows/config-host.h
@@ -1,14 +1,5 @@
 /* This file was autogenerated by 'android-configure.sh' */
 #define CONFIG_QEMU_SHAREDIR   "/usr/local/share/qemu"
-#if defined(__x86_64__)
-#define HOST_X86_64    1
-#define HOST_LONG_BITS  64
-#elif defined(__i386__)
-#define HOST_I386    1
-#define HOST_LONG_BITS  32
-#else
-#error Unknown architecture for codegen
-#endif
 
 #define CONFIG_GDBSTUB  1
 #define CONFIG_SLIRP    1
diff --git a/android/help.c b/android/help.c
index b703ec5..91528b2 100644
--- a/android/help.c
+++ b/android/help.c
@@ -971,7 +971,7 @@
     );
 }
 
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
 static void
 help_memcheck(stralloc_t*  out)
 {
@@ -1008,7 +1008,7 @@
     "  to analyze memory allocations and memory access.\n\n"
     );
 }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
 
 #ifdef CONFIG_STANDALONE_UI
 static void
diff --git a/android/hw-qemud.c b/android/hw-qemud.c
index 191ce01..79040f7 100644
--- a/android/hw-qemud.c
+++ b/android/hw-qemud.c
@@ -2254,8 +2254,13 @@
 
     qemud_multiplexer_init(_multiplexer, cs);
 
-    register_savevm( "qemud", 0, QEMUD_SAVE_VERSION,
-                      qemud_save, qemud_load, _multiplexer);
+    register_savevm(NULL,
+                    "qemud",
+                    0,
+                    QEMUD_SAVE_VERSION,
+                    qemud_save,
+                    qemud_load,
+                    _multiplexer);
 }
 
 extern void
diff --git a/android/kernel/kernel_utils.cpp b/android/kernel/kernel_utils.cpp
new file mode 100644
index 0000000..50b7094
--- /dev/null
+++ b/android/kernel/kernel_utils.cpp
@@ -0,0 +1,173 @@
+// Copyright 2014 The Android Open Source Project
+//
+// This software is licensed under the terms of the GNU General Public
+// License version 2, as published by the Free Software Foundation, and
+// may be copied, distributed, and modified under those terms.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+#include "android/kernel/kernel_utils.h"
+
+#include "android/base/Log.h"
+#include "android/base/files/ScopedStdioFile.h"
+#include "android/base/String.h"
+#include "android/kernel/kernel_utils_testing.h"
+#include "android/utils/path.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#define DEBUG_KERNEL  0
+
+#define KERNEL_LOG     LOG_IF(INFO, DEBUG_KERNEL)
+#define KERNEL_PLOG    PLOG_IF(INFO, DEBUG_KERNEL)
+#define KERNEL_ERROR   LOG_IF(ERROR, DEBUG_KERNEL)
+#define KERNEL_PERROR  PLOG_IF(ERROR, DEBUG_KERNEL)
+
+using android::base::String;
+
+namespace {
+
+#ifndef _WIN32
+// Helper class to perform launch a command through popen() and call
+// pclose() on destruction.
+class ScopedPopenFile {
+public:
+    ScopedPopenFile(const char* command) {
+        mFile = ::popen(command, "r");
+    }
+
+    FILE* get() const { return mFile; }
+
+    ~ScopedPopenFile() {
+        if (mFile) {
+            ::pclose(mFile);
+        }
+    }
+
+private:
+    FILE* mFile;
+};
+#endif  // !_WIN32
+
+bool getFileDescription(void* opaque, const char* filePath, String* text) {
+    if (!filePath) {
+        KERNEL_ERROR << "NULL path parameter";
+        return false;
+    }
+
+    if (!path_exists(filePath)) {
+        KERNEL_ERROR << "Kernel file doesn't exist: " << filePath;
+        return false;
+    }
+
+#ifdef _WIN32
+    // TODO(digit): Better/portable detection based on libmagic or something.
+    KERNEL_ERROR << "Can't detect kernel version on Windows!";
+    return false;
+#else
+    // NOTE: Use /usr/bin/file instead of 'file' because the latter can
+    // be broken in certain environments (e.g. some versions of MacPorts).
+    String command("/usr/bin/file ");
+    command += filePath;
+
+    ScopedPopenFile file(command.c_str());
+    if (!file.get()) {
+        KERNEL_PERROR << "Could not launch command: " << command.c_str();
+        return false;
+    }
+
+    String result;
+    const size_t kReserveSize = 256U;
+    result.resize(kReserveSize);
+
+    int ret = ::fread(&result[0], 1, kReserveSize, file.get());
+    if (ret < static_cast<int>(kReserveSize) && ferror(file.get())) {
+        KERNEL_ERROR << "Could not read file command output!?";
+        return false;
+    }
+    result.resize(ret);
+    text->assign(result);
+    return true;
+#endif
+}
+
+android::kernel::GetFileDescriptionFunction* sGetFileDescription =
+        getFileDescription;
+
+void* sGetFileDescriptionOpaque = NULL;
+
+}  // namespace
+
+namespace android {
+namespace kernel {
+
+void setFileDescriptionFunction(GetFileDescriptionFunction* file_func,
+                                void* file_opaque) {
+    sGetFileDescription = file_func ? file_func : &getFileDescription;
+    sGetFileDescriptionOpaque = file_func ? file_opaque : NULL;
+}
+
+}  // namespace kernel
+}  // namespace android
+
+bool android_pathProbeKernelType(const char* kernelPath, KernelType* ktype) {
+    String description;
+
+    if (!sGetFileDescription(sGetFileDescriptionOpaque,
+                             kernelPath,
+                             &description)) {
+        return false;
+    }
+    const char* bzImage = ::strstr(description.c_str(), "bzImage");
+    if (!bzImage) {
+        KERNEL_ERROR << "Not a compressed Linux kernel image!";
+        return false;
+    }
+    const char* version = ::strstr(bzImage, "version ");
+    if (!version) {
+        KERNEL_ERROR << "Could not determine version!";
+        return false;
+    }
+    version += ::strlen("version ");
+    KERNEL_LOG << "Found kernel version " << version;
+
+    char* end;
+    unsigned long major = ::strtoul(version, &end, 10);
+    if (end == version || *end != '.') {
+        KERNEL_ERROR << "Could not find kernel major version!";
+        return false;
+    }
+    KERNEL_LOG << "Kernel major version: " << major;
+    if (major > 3) {
+        *ktype = KERNEL_TYPE_3_10_OR_ABOVE;
+    } else if (major < 3) {
+        *ktype = KERNEL_TYPE_LEGACY;
+    } else /* major == 3 */ {
+        version = end + 1;
+        unsigned long minor = ::strtoul(version, &end, 10);
+        if (end == version) {
+            KERNEL_ERROR << "Could not find kernel minor version!";
+            return false;
+        }
+        KERNEL_LOG << "Kernel minor version: " << minor;
+
+        *ktype = (minor >= 10)
+                ? KERNEL_TYPE_3_10_OR_ABOVE : KERNEL_TYPE_LEGACY;
+    }
+    return true;
+}
+
+const char* android_kernelSerialDevicePrefix(KernelType ktype) {
+    switch (ktype) {
+        case KERNEL_TYPE_LEGACY: return "ttyS";
+        case KERNEL_TYPE_3_10_OR_ABOVE: return "ttyGF";
+        default: return "";
+    }
+}
diff --git a/android/kernel/kernel_utils.h b/android/kernel/kernel_utils.h
new file mode 100644
index 0000000..e10eb34
--- /dev/null
+++ b/android/kernel/kernel_utils.h
@@ -0,0 +1,44 @@
+// Copyright 2014 The Android Open Source Project
+//
+// This software is licensed under the terms of the GNU General Public
+// License version 2, as published by the Free Software Foundation, and
+// may be copied, distributed, and modified under those terms.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+#ifndef ANDROID_KERNEL_KERNEL_UTILS_H
+#define ANDROID_KERNEL_KERNEL_UTILS_H
+
+#include "android/utils/compiler.h"
+#include <stdint.h>
+
+ANDROID_BEGIN_HEADER
+
+// An enum used to list of the types of Linux kernel images we need to
+// handle. Unfortunately, this affects how we setup the kernel command-line
+// when launching the system.
+//
+// KERNEL_TYPE_LEGACY is any Linux kernel image before 3.10
+// KERNEL_TYPE_3_10_OR_ABOVE is anything at 3.10 or above.
+typedef enum {
+    KERNEL_TYPE_LEGACY = 0,
+    KERNEL_TYPE_3_10_OR_ABOVE = 1,
+} KernelType;
+
+// Probe the kernel image at |kernelPath| and returns the corresponding
+// KernelType value. On success, returns true and sets |*ktype| appropriately.
+// On failure (e.g. if the file doesn't exist or cannot be probed), return
+// false and doesn't touch |*ktype|.
+bool android_pathProbeKernelType(const char* kernelPath, KernelType *ktype);
+
+// Return the serial device name prefix matching a given kernel type |ktype|.
+// I.e. this should be "/dev/ttyS" for KERNEL_TYPE_LEGACY, and
+// "/dev/ttyGF" for KERNEL_TYPE_3_10_OR_ABOVE.
+const char* android_kernelSerialDevicePrefix(KernelType ktype);
+
+ANDROID_END_HEADER
+
+#endif  // ANDROID_KERNEL_KERNEL_UTILS_H
diff --git a/android/kernel/kernel_utils_testing.h b/android/kernel/kernel_utils_testing.h
new file mode 100644
index 0000000..f2d64ec
--- /dev/null
+++ b/android/kernel/kernel_utils_testing.h
@@ -0,0 +1,47 @@
+// Copyright 2014 The Android Open Source Project
+//
+// This software is licensed under the terms of the GNU General Public
+// License version 2, as published by the Free Software Foundation, and
+// may be copied, distributed, and modified under those terms.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+#ifndef ANDROID_KERNEL_KERNEL_UTILS_TESTING_H
+#define ANDROID_KERNEL_KERNEL_UTILS_TESTING_H
+
+namespace android {
+
+namespace base {
+class String;
+}  // namespace base
+
+namespace kernel {
+
+// Type of a function used to retrieve the textual description of a given
+// file at |filePath|. On success, return true and sets |*text| to the
+// description text, as if running through the 'file' command on Unix.
+// |opaque| is a client-provided value set by calling
+// setFileDescriptionFunction() below.
+typedef bool (GetFileDescriptionFunction)(void* opaque,
+                                          const char* filePath,
+                                          android::base::String* text);
+
+// Change the implementation of the function that extracts textual
+// descriptions from a given file. |file_func| is a pointer to the
+// new function, and |file_opaque| is the value that will be passed
+// as its first parameter. Note that if |file_func| is NULL, the
+// default implementation will be selected instead.
+//
+// Only use this during unit-testing to force different description
+// values on arbitrary file content.
+void setFileDescriptionFunction(GetFileDescriptionFunction* file_func,
+                                void* file_opaque);
+
+}  // namespace kernel
+
+}  // namespace android
+
+#endif  // ANDROID_KERNEL_KERNEL_UTILS_TESTING_H
diff --git a/android/kernel/kernel_utils_unittest.cpp b/android/kernel/kernel_utils_unittest.cpp
new file mode 100644
index 0000000..a5afa2a
--- /dev/null
+++ b/android/kernel/kernel_utils_unittest.cpp
@@ -0,0 +1,132 @@
+// Copyright 2014 The Android Open Source Project
+//
+// This software is licensed under the terms of the GNU General Public
+// License version 2, as published by the Free Software Foundation, and
+// may be copied, distributed, and modified under those terms.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+#include "android/kernel/kernel_utils.h"
+
+#include "android/base/String.h"
+#include "android/kernel/kernel_utils_testing.h"
+
+#include <gtest/gtest.h>
+
+using android::base::String;
+
+namespace android {
+namespace kernel {
+
+namespace {
+
+class ScopedDescriptionFunc {
+public:
+    explicit ScopedDescriptionFunc(GetFileDescriptionFunction* file_func) {
+        setFileDescriptionFunction(file_func, NULL);
+    }
+
+    ScopedDescriptionFunc(GetFileDescriptionFunction* file_func,
+                          void* file_opaque) {
+        setFileDescriptionFunction(file_func, file_opaque);
+    }
+
+    ~ScopedDescriptionFunc() {
+        setFileDescriptionFunction(NULL, NULL);
+    }
+};
+
+}  // namespace
+
+TEST(KernelUtils, GetKernelSerialDevicePrefix) {
+    EXPECT_STREQ("ttyS",
+            android_kernelSerialDevicePrefix(KERNEL_TYPE_LEGACY));
+    EXPECT_STREQ("ttyGF",
+            android_kernelSerialDevicePrefix(KERNEL_TYPE_3_10_OR_ABOVE));
+}
+
+static bool failFunc(void* opaque, const char* path, String* text) {
+    return false;
+}
+
+TEST(KernelUtils, ProbeKernelTypeWithNoKernelFile) {
+    ScopedDescriptionFunc func(&failFunc);
+
+    KernelType ktype;
+    EXPECT_FALSE(android_pathProbeKernelType("/tmp/kernel", &ktype));
+}
+
+struct Expectation {
+    const char* description;
+    bool result;
+    KernelType ktype;
+
+    static bool getDescriptionFunc(void* opaque,
+                                   const char* path,
+                                   String* text) {
+        const Expectation* expectation = static_cast<const Expectation*>(opaque);
+        if (!expectation->result)
+            return false;
+        text->assign(expectation->description);
+        return true;
+    }
+};
+
+#ifdef _WIN32
+#define DISABLED_ON_WIN32(x)  DISABLED_ ## x
+#else
+#define DISABLED_ON_WIN32(x)  x
+#endif
+
+TEST(KernelUtils, DISABLED_ON_WIN32(PathProbeKernelType)) {
+    static const Expectation kData[] = {
+        { NULL, false, KERNEL_TYPE_LEGACY },
+        // Missing bzImage.
+        { "Linux kernel x86 boot executable raw image, version 3.10.0+",
+                false, KERNEL_TYPE_LEGACY },
+        // Missing version
+        { "Linux kernel x86 boot executable bzImage, 3.10.0+",
+                false, KERNEL_TYPE_LEGACY },
+        // Legacy 2.6.29 kernel
+        { "Linux kernel x86 boot executable bzImage, version 2.6.29 (foo...)",
+                true,
+                KERNEL_TYPE_LEGACY },
+        // Legacy 3.4
+        { "Linux kernel x86 boot executable bzImage, version 3.4.1 (foo...)",
+                true,
+                KERNEL_TYPE_LEGACY },
+        // 3.10
+        { "Linux kernel x86 boot executable bzImage, version 3.10.0+",
+                true,
+                KERNEL_TYPE_3_10_OR_ABOVE },
+        // 3.40
+        { "Linux kernel x86 boot executable bzImage, version 3.40.0",
+                true,
+                KERNEL_TYPE_3_10_OR_ABOVE },
+        // 4.0
+        { "Linux kernel x86 boot executable bzImage, version 4.0.9",
+                true,
+                KERNEL_TYPE_3_10_OR_ABOVE },
+    };
+    const size_t kDataSize = sizeof(kData) / sizeof(kData[0]);
+    static const char kKernelPath[] = "/tmp/kernel";
+    for (size_t n = 0; n < kDataSize; ++n) {
+        KernelType kernelType;
+        const Expectation& expectation = kData[n];
+        ScopedDescriptionFunc func(&Expectation::getDescriptionFunc,
+                                   (void*)&expectation);
+        EXPECT_EQ(expectation.result,
+                  android_pathProbeKernelType(kKernelPath, &kernelType))
+                        << "For [" << expectation.description << "]";
+        if (expectation.result) {
+            EXPECT_EQ(expectation.ktype, kernelType) << "For ["
+                    << expectation.description << "]";
+        }
+    }
+}
+
+}  // namespace kernel
+}  // namespace android
diff --git a/android/main-emulator.c b/android/main-emulator.c
index 150fe1e..26d8ae6 100644
--- a/android/main-emulator.c
+++ b/android/main-emulator.c
@@ -24,6 +24,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <android/utils/compiler.h>
 #include <android/utils/panic.h>
 #include <android/utils/path.h>
 #include <android/utils/bufprint.h>
@@ -51,31 +52,23 @@
 #  define DLL_EXTENSION  ".so"
 #endif
 
-#if defined(__x86_64__)
-/* Normally emulator is compiled in 32-bit.  In standalone it can be compiled
-   in 64-bit (with ,/android-configure.sh --try-64).  In this case, emulator-$ARCH
-   are also compiled in 64-bit and will search for lib64*.so instead of lib*so */
-#define  GLES_EMULATION_LIB  "lib64OpenglRender" DLL_EXTENSION
-#elif defined(__i386__)
-#define  GLES_EMULATION_LIB  "libOpenglRender" DLL_EXTENSION
-#else
-#error Unknown architecture for codegen
-#endif
-
+// Name of GPU emulation main library for (32-bit and 64-bit versions)
+#define GLES_EMULATION_LIB    "libOpenglRender" DLL_EXTENSION
+#define GLES_EMULATION_LIB64  "lib64OpenglRender" DLL_EXTENSION
 
 /* Forward declarations */
 static char* getTargetEmulatorPath(const char* progName, const char* avdArch, const int force_32bit);
 static char* getSharedLibraryPath(const char* progName, const char* libName);
 static void  prependSharedLibraryPath(const char* prefix);
 
-/* The execv() definition in mingw is slightly bogus.
+/* The execv() definition in older mingw is slightly bogus.
  * It takes a second argument of type 'const char* const*'
  * while POSIX mandates char** instead.
  *
  * To avoid compiler warnings, define the safe_execv macro
  * to perform an explicit cast with mingw.
  */
-#ifdef _WIN32
+#if defined(_WIN32) && !ANDROID_GCC_PREREQ(4,4)
 #  define safe_execv(_filepath,_argv)  execv((_filepath),(const char* const*)(_argv))
 #else
 #  define safe_execv(_filepath,_argv)  execv((_filepath),(_argv))
@@ -100,6 +93,7 @@
     /* Parse command-line and look for
      * 1) an avd name either in the form or '-avd <name>' or '@<name>'
      * 2) '-force-32bit' which always use 32-bit emulator on 64-bit platforms
+     * 3) '-verbose', or '-debug-all' or '-debug all' to enable verbose mode.
      */
     int  nn;
     for (nn = 1; nn < argc; nn++) {
@@ -108,6 +102,15 @@
         if (!strcmp(opt,"-qemu"))
             break;
 
+        if (!strcmp(opt,"-verbose") || !strcmp(opt,"-debug-all")) {
+            android_verbose = 1;
+        }
+
+        if (!strcmp(opt,"-debug") && nn + 1 < argc &&
+            !strcmp(argv[nn + 1], "all")) {
+            android_verbose = 1;
+        }
+
         if (!strcmp(opt,"-force-32bit")) {
             force_32bit = 1;
             continue;
@@ -160,6 +163,12 @@
     {
         char*  sharedLibPath = getSharedLibraryPath(emulatorPath, GLES_EMULATION_LIB);
 
+        if (!sharedLibPath) {
+            // Sometimes, only the 64-bit libraries are available, for example
+            // when storing binaries under $AOSP/prebuilts/android-emulator/<system>/
+            sharedLibPath = getSharedLibraryPath(emulatorPath, GLES_EMULATION_LIB64);
+        }
+
         if (sharedLibPath != NULL) {
             D("Found OpenGLES emulation libraries in %s\n", sharedLibPath);
             prependSharedLibraryPath(sharedLibPath);
@@ -238,12 +247,19 @@
     int search_for_64bit_emulator = !force_32bit && getHostOSBitness() == 64;
 #endif
 
+    const char* emulatorSuffix = emulator_getBackendSuffix(avdArch);
+    if (!emulatorSuffix) {
+        APANIC("This emulator cannot emulate %s CPUs!\n", avdArch);
+    }
+    D("Using emulator-%s to emulate '%s' CPUs\n", emulatorSuffix, avdArch);
+
     /* Get program's directory name in progDir */
     path_split(progName, &progDir, NULL);
 
     if (search_for_64bit_emulator) {
         /* Find 64-bit emulator first */
-        p = bufprint(path, pathEnd, "%s/%s%s%s", progDir, emulator64Prefix, avdArch, exeExt);
+        p = bufprint(path, pathEnd, "%s/%s%s%s", progDir,
+                     emulator64Prefix, emulatorSuffix, exeExt);
         if (p >= pathEnd) {
             APANIC("Path too long: %s\n", progName);
         }
@@ -254,7 +270,8 @@
     }
 
     /* Find 32-bit emulator */
-    p = bufprint(path, pathEnd, "%s/%s%s%s", progDir, emulatorPrefix, avdArch, exeExt);
+    p = bufprint(path, pathEnd, "%s/%s%s%s", progDir, emulatorPrefix,
+                 emulatorSuffix, exeExt);
     free(progDir);
     if (p >= pathEnd) {
         APANIC("Path too long: %s\n", progName);
@@ -273,7 +290,8 @@
     if (strchr(progName, '/') == NULL) {
 #endif
         if (search_for_64bit_emulator) {
-           p = bufprint(path, pathEnd, "%s%s%s", emulator64Prefix, avdArch, exeExt);
+           p = bufprint(path, pathEnd, "%s%s%s", emulator64Prefix,
+                        emulatorSuffix, exeExt);
            if (p < pathEnd) {
                char*  resolved = path_search_exec(path);
                if (resolved != NULL)
@@ -281,7 +299,8 @@
            }
         }
 
-        p = bufprint(path, pathEnd, "%s%s%s", emulatorPrefix, avdArch, exeExt);
+        p = bufprint(path, pathEnd, "%s%s%s", emulatorPrefix,
+                     emulatorSuffix, exeExt);
         if (p < pathEnd) {
             char*  resolved = path_search_exec(path);
             if (resolved != NULL)
diff --git a/android/main.c b/android/main.c
index c0ac767..c01fa92 100644
--- a/android/main.c
+++ b/android/main.c
@@ -38,6 +38,7 @@
 #include "android/config/config.h"
 #include "android/cpu_accelerator.h"
 
+#include "android/kernel/kernel_utils.h"
 #include "android/user-config.h"
 #include "android/utils/bufprint.h"
 #include "android/utils/filelock.h"
@@ -453,6 +454,24 @@
          }
     }
 
+    // Auto-detect kernel device naming scheme if needed.
+    if (androidHwConfig_getKernelDeviceNaming(hw) < 0) {
+        KernelType kernelType;
+        const char* newDeviceNaming = "no";
+        if (!android_pathProbeKernelType(hw->kernel_path, &kernelType)) {
+            D("WARNING: Could not determine kernel device naming scheme. Assuming legacy\n"
+              "If this AVD doesn't boot, and uses a recent kernel (3.10 or above) try setting\n"
+              "'kernel.newDeviceNaming' to 'yes' in its configuration.\n");
+        } else if (kernelType == KERNEL_TYPE_3_10_OR_ABOVE) {
+            D("Auto-detect: Kernel image requires new device naming scheme.");
+            newDeviceNaming = "yes";
+        } else {
+            D("Auto-detect: Kernel image requires legacy device naming scheme.");
+        }
+        AFREE(hw->kernel_newDeviceNaming);
+        hw->kernel_newDeviceNaming = ASTRDUP(newDeviceNaming);
+    }
+
     if (boot_prop_ip[0]) {
         args[n++] = "-boot-property";
         args[n++] = boot_prop_ip;
@@ -1028,7 +1047,9 @@
 #endif
 
         if (opts->shell || opts->logcat) {
-            p = bufprint(p, end, " androidboot.console=ttyS%d", shell_serial );
+            p = bufprint(p, end, " androidboot.console=%s%d",
+                         androidHwConfig_getKernelSerialPrefix(android_hw),
+                         shell_serial );
         }
 
         if (opts->trace) {
@@ -1328,6 +1349,22 @@
         D("Auto-config: -qemu -cpu %s", hw->hw_cpu_model);
     }
 
+    /* If the target architecture is 'x86', ensure that the 'qemu32'
+     * CPU model is used. Otherwise, the default (which is now 'qemu64')
+     * will result in a failure to boot with some kernels under
+     * un-accelerated emulation.
+     */
+    if (hw->hw_cpu_model[0] == '\0') {
+        char* arch = avdInfo_getTargetCpuArch(avd);
+        D("Target arch = '%s'", arch ? arch : "NULL");
+        if (arch != NULL && !strcmp(arch, "x86")) {
+            AFREE(hw->hw_cpu_model);
+            hw->hw_cpu_model = ASTRDUP("qemu32");
+            D("Auto-config: -qemu -cpu %s", hw->hw_cpu_model);
+        }
+        AFREE(arch);
+    }
+
     /* Generate a hardware-qemu.ini for this AVD. The real hardware
      * configuration is ususally stored in several files, e.g. the AVD's
      * config.ini plus the skin-specific hardware.ini.
diff --git a/android/opengles.c b/android/opengles.c
index d4ff301..2570429 100644
--- a/android/opengles.c
+++ b/android/opengles.c
@@ -67,12 +67,12 @@
 #define DD(...) VERBOSE_PRINT(gles,__VA_ARGS__)
 
 /* Name of the GLES rendering library we're going to use */
-#if HOST_LONG_BITS == 32
+#if UINTPTR_MAX == UINT32_MAX
 #define RENDERER_LIB_NAME  "libOpenglRender"
-#elif HOST_LONG_BITS == 64
+#elif UINTPTR_MAX == UINT64_MAX
 #define RENDERER_LIB_NAME  "lib64OpenglRender"
 #else
-#error Unknown HOST_LONG_BITS
+#error Unknown UINTPTR_MAX
 #endif
 
 // Define the corresponding function pointers.
diff --git a/memcheck/memcheck.c b/android/qemu/memcheck/memcheck.c
similarity index 96%
rename from memcheck/memcheck.c
rename to android/qemu/memcheck/memcheck.c
index 5f0376a..e52bc31 100644
--- a/memcheck/memcheck.c
+++ b/android/qemu/memcheck/memcheck.c
@@ -16,11 +16,11 @@
 
 #include "qemu/queue.h"
 #include "migration/qemu-file.h"
-#include "elff_api.h"
-#include "memcheck.h"
-#include "memcheck_proc_management.h"
-#include "memcheck_util.h"
-#include "memcheck_logging.h"
+#include "elff/elff_api.h"
+#include "android/qemu/memcheck/memcheck.h"
+#include "android/qemu/memcheck/memcheck_proc_management.h"
+#include "android/qemu/memcheck/memcheck_util.h"
+#include "android/qemu/memcheck/memcheck_logging.h"
 
 // =============================================================================
 // Global data
@@ -64,11 +64,14 @@
 {
     if (trace_flags & TRACE_CHECK_INVALID_PTR_ENABLED) {
         printf("memcheck: Access violation is detected in process %s[pid=%u]:\n"
-          "  INVALID POINTER 0x%08X is used in '%s' operation.\n"
+          "  INVALID POINTER " TARGET_FMT_lx " is used in '%s' operation.\n"
           "  Allocation descriptor for this pointer has not been found in the\n"
           "  allocation map for the process. Most likely, this is an attempt\n"
           "  to %s a pointer that has been freed.\n",
-          proc->image_path, proc->pid, ptr, routine == 1 ? "free" : "realloc",
+          proc->image_path,
+          proc->pid,
+          ptr,
+          routine == 1 ? "free" : "realloc",
           routine == 1 ? "free" : "reallocate");
     }
 }
@@ -114,7 +117,7 @@
     const MMRangeDesc* rdesc = procdesc_get_range_desc(proc, vaddr);
     if (rdesc != NULL) {
         int elff_res;
-        printf("  In module %s at address 0x%08X\n", rdesc->path, vaddr);
+        printf("  In module %s at address " TARGET_FMT_lx "\n", rdesc->path, vaddr);
         elff_res =
           memcheck_get_address_info(vaddr, rdesc, &elff_info, &elff_handle);
         if (elff_res == 0) {
@@ -151,14 +154,15 @@
             printf("  Unable to obtain routine information. Symbols file is not found.\n");
         } else {
             printf("  Unable to obtain routine information.\n"
-                   "  Symbols file doesn't contain debugging information for address 0x%08X.\n",
+                   "  Symbols file doesn't contain debugging information for address "
+                   TARGET_FMT_lx ".\n",
                     mmrangedesc_get_module_offset(rdesc, vaddr));
         }
     } else {
-        printf("  In unknown module at address 0x%08X\n", vaddr);
+        printf("  In unknown module at address " TARGET_FMT_lx "\n", vaddr);
     }
 
-    printf("  Process attempts to %s %u bytes %s address 0x%08X\n",
+    printf("  Process attempts to %s %u bytes %s address " TARGET_FMT_lx "\n",
            is_read ? "read" : "write", data_size,
            is_read ? "from" : "to", addr);
     printf("  Accessed range belongs to the %s guarding area of allocated block.\n",
diff --git a/memcheck/memcheck.h b/android/qemu/memcheck/memcheck.h
similarity index 97%
rename from memcheck/memcheck.h
rename to android/qemu/memcheck/memcheck.h
index a7761a9..f983c7e 100644
--- a/memcheck/memcheck.h
+++ b/android/qemu/memcheck/memcheck.h
@@ -15,10 +15,10 @@
  * checking framework.
  */
 
-#ifndef QEMU_MEMCHECK_MEMCHECK_H
-#define QEMU_MEMCHECK_MEMCHECK_H
+#ifndef ANDROID_QEMU_MEMCHECK_MEMCHECK_H
+#define ANDROID_QEMU_MEMCHECK_MEMCHECK_H
 
-#include "memcheck_common.h"
+#include "android/qemu/memcheck/memcheck_common.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -185,4 +185,4 @@
 };  /* end of extern "C" */
 #endif
 
-#endif  // QEMU_MEMCHECK_MEMCHECK_H
+#endif  // ANDROID_QEMU_MEMCHECK_MEMCHECK_H
diff --git a/memcheck/memcheck_api.h b/android/qemu/memcheck/memcheck_api.h
similarity index 93%
rename from memcheck/memcheck_api.h
rename to android/qemu/memcheck/memcheck_api.h
index 9e76ecd..51d46e3 100644
--- a/memcheck/memcheck_api.h
+++ b/android/qemu/memcheck/memcheck_api.h
@@ -15,14 +15,14 @@
  * other qemu components.
  */
 
-#ifndef QEMU_MEMCHECK_MEMCHECK_API_H
-#define QEMU_MEMCHECK_MEMCHECK_API_H
+#ifndef ANDROID_QEMU_MEMCHECK_MEMCHECK_API_H
+#define ANDROID_QEMU_MEMCHECK_MEMCHECK_API_H
 
 /* This file should compile iff qemu is built with memory checking
  * configuration turned on. */
-#ifndef CONFIG_MEMCHECK
-#error CONFIG_MEMCHECK is not defined.
-#endif  // CONFIG_MEMCHECK
+#ifndef CONFIG_ANDROID_MEMCHECK
+#error CONFIG_ANDROID_MEMCHECK is not defined.
+#endif  // CONFIG_ANDROID_MEMCHECK
 
 /* Global flag, indicating whether or not memchecking has been enabled
  * for the current emulator session. 1 means that memchecking has been
@@ -104,4 +104,4 @@
  */
 void memcheck_on_ret(target_ulong pc);
 
-#endif  // QEMU_MEMCHECK_MEMCHECK_API_H
+#endif  // ANDROID_QEMU_MEMCHECK_MEMCHECK_API_H
diff --git a/memcheck/memcheck_common.h b/android/qemu/memcheck/memcheck_common.h
similarity index 98%
rename from memcheck/memcheck_common.h
rename to android/qemu/memcheck/memcheck_common.h
index 3c91744..6752076 100644
--- a/memcheck/memcheck_common.h
+++ b/android/qemu/memcheck/memcheck_common.h
@@ -15,8 +15,8 @@
  * in memechecker framework.
  */
 
-#ifndef QEMU_MEMCHECK_MEMCHECK_COMMON_H
-#define QEMU_MEMCHECK_MEMCHECK_COMMON_H
+#ifndef ANDROID_QEMU_MEMCHECK_MEMCHECK_COMMON_H
+#define ANDROID_QEMU_MEMCHECK_MEMCHECK_COMMON_H
 
 #include "qemu-common.h"
 #include "cpu.h"
@@ -475,4 +475,4 @@
 };  /* end of extern "C" */
 #endif
 
-#endif  // QEMU_MEMCHECK_MEMCHECK_COMMON_H
+#endif  // ANDROID_QEMU_MEMCHECK_MEMCHECK_COMMON_H
diff --git a/memcheck/memcheck_logging.h b/android/qemu/memcheck/memcheck_logging.h
similarity index 95%
rename from memcheck/memcheck_logging.h
rename to android/qemu/memcheck/memcheck_logging.h
index 94a3c09..4c78f7c 100644
--- a/memcheck/memcheck_logging.h
+++ b/android/qemu/memcheck/memcheck_logging.h
@@ -14,8 +14,8 @@
  * Contains declarations of logging macros used in memchecker framework.
  */
 
-#ifndef QEMU_MEMCHECK_MEMCHECK_LOGGING_H
-#define QEMU_MEMCHECK_MEMCHECK_LOGGING_H
+#ifndef ANDROID_QEMU_MEMCHECK_MEMCHECK_LOGGING_H
+#define ANDROID_QEMU_MEMCHECK_MEMCHECK_LOGGING_H
 
 #include "qemu-common.h"
 #include "android/utils/debug.h"
@@ -85,4 +85,4 @@
 };  /* end of extern "C" */
 #endif
 
-#endif  // QEMU_MEMCHECK_MEMCHECK_LOGGING_H
+#endif  // ANDROID_QEMU_MEMCHECK_MEMCHECK_LOGGING_H
diff --git a/memcheck/memcheck_malloc_map.c b/android/qemu/memcheck/memcheck_malloc_map.c
similarity index 97%
rename from memcheck/memcheck_malloc_map.c
rename to android/qemu/memcheck/memcheck_malloc_map.c
index 42d935b..0fcd577 100644
--- a/memcheck/memcheck_malloc_map.c
+++ b/android/qemu/memcheck/memcheck_malloc_map.c
@@ -15,9 +15,9 @@
  * memory blocks allocated by the guest system.
  */
 
-#include "memcheck_malloc_map.h"
-#include "memcheck_util.h"
-#include "memcheck_logging.h"
+#include "android/qemu/memcheck/memcheck_malloc_map.h"
+#include "android/qemu/memcheck/memcheck_util.h"
+#include "android/qemu/memcheck/memcheck_logging.h"
 
 /* Global flag, indicating whether or not __ld/__stx_mmu should be instrumented
  * for checking for access violations. If read / write access violation check.
diff --git a/memcheck/memcheck_malloc_map.h b/android/qemu/memcheck/memcheck_malloc_map.h
similarity index 96%
rename from memcheck/memcheck_malloc_map.h
rename to android/qemu/memcheck/memcheck_malloc_map.h
index 1c25738..5c16b56 100644
--- a/memcheck/memcheck_malloc_map.h
+++ b/android/qemu/memcheck/memcheck_malloc_map.h
@@ -21,11 +21,11 @@
  * each process running on the guest system.
  */
 
-#ifndef QEMU_MEMCHECK_MEMCHECK_MALLOC_MAP_H
-#define QEMU_MEMCHECK_MEMCHECK_MALLOC_MAP_H
+#ifndef ANDROID_QEMU_MEMCHECK_MEMCHECK_MALLOC_MAP_H
+#define ANDROID_QEMU_MEMCHECK_MEMCHECK_MALLOC_MAP_H
 
 #include "sys-tree.h"
-#include "memcheck_common.h"
+#include "android/qemu/memcheck/memcheck_common.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -141,4 +141,4 @@
 };  /* end of extern "C" */
 #endif
 
-#endif  // QEMU_MEMCHECK_MEMCHECK_MALLOC_MAP_H
+#endif  // ANDROID_QEMU_MEMCHECK_MEMCHECK_MALLOC_MAP_H
diff --git a/memcheck/memcheck_mmrange_map.c b/android/qemu/memcheck/memcheck_mmrange_map.c
similarity index 98%
rename from memcheck/memcheck_mmrange_map.c
rename to android/qemu/memcheck/memcheck_mmrange_map.c
index 1dbfdfd..03f1fcf 100644
--- a/memcheck/memcheck_mmrange_map.c
+++ b/android/qemu/memcheck/memcheck_mmrange_map.c
@@ -15,8 +15,8 @@
  * memory mappings in the guest system.
  */
 
-#include "memcheck_mmrange_map.h"
-#include "memcheck_logging.h"
+#include "android/qemu/memcheck/memcheck_mmrange_map.h"
+#include "android/qemu/memcheck/memcheck_logging.h"
 
 /* Memory range descriptor stored in the map. */
 typedef struct MMRangeMapEntry {
diff --git a/memcheck/memcheck_mmrange_map.h b/android/qemu/memcheck/memcheck_mmrange_map.h
similarity index 94%
rename from memcheck/memcheck_mmrange_map.h
rename to android/qemu/memcheck/memcheck_mmrange_map.h
index f291c95..ceebc30 100644
--- a/memcheck/memcheck_mmrange_map.h
+++ b/android/qemu/memcheck/memcheck_mmrange_map.h
@@ -19,11 +19,11 @@
  * any part.
  */
 
-#ifndef QEMU_MEMCHECK_MEMCHECK_MMRANGE_MAP_H
-#define QEMU_MEMCHECK_MEMCHECK_MMRANGE_MAP_H
+#ifndef ANDROID_QEMU_MEMCHECK_MEMCHECK_MMRANGE_MAP_H
+#define ANDROID_QEMU_MEMCHECK_MEMCHECK_MMRANGE_MAP_H
 
 #include "sys-tree.h"
-#include "memcheck_common.h"
+#include "android/qemu/memcheck/memcheck_common.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -118,4 +118,4 @@
 };  /* end of extern "C" */
 #endif
 
-#endif  // QEMU_MEMCHECK_MEMCHECK_MMRANGE_MAP_H
+#endif  // ANDROID_QEMU_MEMCHECK_MEMCHECK_MMRANGE_MAP_H
diff --git a/memcheck/memcheck_proc_management.c b/android/qemu/memcheck/memcheck_proc_management.c
similarity index 96%
rename from memcheck/memcheck_proc_management.c
rename to android/qemu/memcheck/memcheck_proc_management.c
index d33ed97..514f709 100644
--- a/memcheck/memcheck_proc_management.c
+++ b/android/qemu/memcheck/memcheck_proc_management.c
@@ -16,10 +16,10 @@
  */
 
 #include "elff/elff_api.h"
-#include "memcheck.h"
-#include "memcheck_proc_management.h"
-#include "memcheck_logging.h"
-#include "memcheck_util.h"
+#include "android/qemu/memcheck/memcheck.h"
+#include "android/qemu/memcheck/memcheck_proc_management.h"
+#include "android/qemu/memcheck/memcheck_logging.h"
+#include "android/qemu/memcheck/memcheck_util.h"
 
 /* Current thread id.
  * This value is updated with each call to memcheck_switch, saving here
@@ -643,8 +643,8 @@
                             uint32_t rel =
                                 mmrangedesc_get_module_offset(rdesc,
                                                   leaked_alloc.call_stack[stk]);
-                            printf("         Frame %u: PC=0x%08X (relative 0x%08X) in module %s\n",
-                                   stk, leaked_alloc.call_stack[stk], rel,
+                            printf("         Frame %u: PC=" TARGET_FMT_lx " (relative %llx) in module %s\n",
+                                   stk, leaked_alloc.call_stack[stk], (long long)rel,
                                    rdesc->path);
                             if (memcheck_get_address_info(leaked_alloc.call_stack[stk],
                                                           rdesc, &elff_info,
@@ -659,7 +659,7 @@
                                 elff_close(elff_handle);
                             }
                         } else {
-                            printf("         Frame %u: PC=0x%08X in module <unknown>\n",
+                            printf("         Frame %u: PC=" TARGET_FMT_lx " in module <unknown>\n",
                                    stk, leaked_alloc.call_stack[stk]);
 
                         }
@@ -738,7 +738,7 @@
         g_free(replaced.path);
     }
 
-    T(PROC_MMAP, "memcheck: %s[pid=%u] %s is mapped: 0x%08X - 0x%08X + 0x%08X\n",
+    T(PROC_MMAP, "memcheck: %s[pid=%u] %s is mapped: " TARGET_FMT_lx " - " TARGET_FMT_lx " + " TARGET_FMT_lx "\n",
       proc->image_path, proc->pid, path, vstart, vend, exec_offset);
 }
 
@@ -759,7 +759,7 @@
 
     if (desc.map_start >= vstart && desc.map_end <= vend) {
         /* Entire mapping has been deleted. */
-        T(PROC_MMAP, "memcheck: %s[pid=%u] %s is unmapped: [0x%08X - 0x%08X + 0x%08X]\n",
+        T(PROC_MMAP, "memcheck: %s[pid=%u] %s is unmapped: [" TARGET_FMT_lx " - " TARGET_FMT_lx " + " TARGET_FMT_lx "]\n",
           proc->image_path, proc->pid, desc.path, vstart, vend, desc.exec_offset);
         g_free(desc.path);
         return;
@@ -768,7 +768,7 @@
     /* This can be first stage of "remap" request, when part of the existing
      * mapping has been unmapped. If that's so, lets cut unmapped part from the
      * block that we just pulled, and add whatever's left back to the map. */
-    T(PROC_MMAP, "memcheck: REMAP(0x%08X, 0x%08X + 0x%08X) -> (0x%08X, 0x%08X)\n",
+    T(PROC_MMAP, "memcheck: REMAP(" TARGET_FMT_lx ", " TARGET_FMT_lx " + " TARGET_FMT_lx ") -> (" TARGET_FMT_lx ", " TARGET_FMT_lx ")\n",
        desc.map_start, desc.map_end, desc.exec_offset, vstart, vend);
     if (desc.map_start == vstart) {
         /* We cut part from the beginning. Add the tail back. */
diff --git a/memcheck/memcheck_proc_management.h b/android/qemu/memcheck/memcheck_proc_management.h
similarity index 96%
rename from memcheck/memcheck_proc_management.h
rename to android/qemu/memcheck/memcheck_proc_management.h
index 91dcf56..2192ae9 100644
--- a/memcheck/memcheck_proc_management.h
+++ b/android/qemu/memcheck/memcheck_proc_management.h
@@ -15,13 +15,13 @@
  * management in memchecker framework.
  */
 
-#ifndef QEMU_MEMCHECK_MEMCHECK_PROC_MANAGEMENT_H
-#define QEMU_MEMCHECK_MEMCHECK_PROC_MANAGEMENT_H
+#ifndef ANDROID_QEMU_MEMCHECK_MEMCHECK_PROC_MANAGEMENT_H
+#define ANDROID_QEMU_MEMCHECK_MEMCHECK_PROC_MANAGEMENT_H
 
 #include "qemu/queue.h"
-#include "memcheck_common.h"
-#include "memcheck_malloc_map.h"
-#include "memcheck_mmrange_map.h"
+#include "android/qemu/memcheck/memcheck_common.h"
+#include "android/qemu/memcheck/memcheck_malloc_map.h"
+#include "android/qemu/memcheck/memcheck_mmrange_map.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -318,4 +318,4 @@
 };  /* end of extern "C" */
 #endif
 
-#endif  // QEMU_MEMCHECK_MEMCHECK_PROC_MANAGEMENT_H
+#endif  // ANDROID_QEMU_MEMCHECK_MEMCHECK_PROC_MANAGEMENT_H
diff --git a/memcheck/memcheck_util.c b/android/qemu/memcheck/memcheck_util.c
similarity index 89%
rename from memcheck/memcheck_util.c
rename to android/qemu/memcheck/memcheck_util.c
index 5f7e73d..e7f8743 100644
--- a/memcheck/memcheck_util.c
+++ b/android/qemu/memcheck/memcheck_util.c
@@ -18,10 +18,13 @@
 #include "qemu-common.h"
 #include "android/utils/path.h"
 #include "cpu.h"
-#include "memcheck_util.h"
-#include "memcheck_proc_management.h"
-#include "memcheck_logging.h"
-//#include "softmmu_outside_jit.h"
+#include "exec/cpu-all.h"
+
+#include "android/qemu/memcheck/memcheck_util.h"
+#include "android/qemu/memcheck/memcheck_proc_management.h"
+#include "android/qemu/memcheck/memcheck_logging.h"
+
+#include "exec/softmmu_exec.h"
 
 /* Gets symblos file path for the given module.
  * Param:
@@ -95,7 +98,7 @@
      * buffer to 32 bits and use ld/stl_user instead of ld/stub_user to
      * read / write guest's memory. */
     while (buffer_size) {
-        *(uint8_t*)qemu_address = ldub_user(guest_address);
+        *(uint8_t*)qemu_address = cpu_ldub_user(cpu_single_env, guest_address);
         qemu_address = (uint8_t*)qemu_address + 1;
         guest_address++;
         buffer_size--;
@@ -108,7 +111,7 @@
                           size_t buffer_size)
 {
     while (buffer_size) {
-        stb_user(guest_address, *(uint8_t*)qemu_address);
+        cpu_stb_user(cpu_single_env, guest_address, *(uint8_t*)qemu_address);
         guest_address++;
         qemu_address = (uint8_t*)qemu_address + 1;
         buffer_size--;
@@ -124,7 +127,7 @@
 
     if (qemu_buffer_size > 1) {
         for (copied = 0; copied < qemu_buffer_size - 1; copied++) {
-            qemu_str[copied] = ldub_user(guest_str + copied);
+            qemu_str[copied] = cpu_ldub_user(cpu_single_env, guest_str + copied);
             if (qemu_str[copied] == '\0') {
                 return copied;
             }
@@ -143,7 +146,7 @@
 
     if (qemu_buffer_size > 1) {
         for (copied = 0; copied < qemu_buffer_size - 1; copied++) {
-            qemu_str[copied] = ldub_kernel(guest_str + copied);
+            qemu_str[copied] = cpu_ldub_kernel(cpu_single_env, guest_str + copied);
             if (qemu_str[copied] == '\0') {
                 return copied;
             }
@@ -160,19 +163,19 @@
 void
 memcheck_fail_alloc(target_ulong guest_address)
 {
-    stl_user(ALLOC_RES_ADDRESS(guest_address), 0);
+    cpu_stl_user(cpu_single_env, ALLOC_RES_ADDRESS(guest_address), 0);
 }
 
 void
 memcheck_fail_free(target_ulong guest_address)
 {
-    stl_user(FREE_RES_ADDRESS(guest_address), 0);
+    cpu_stl_user(cpu_single_env, FREE_RES_ADDRESS(guest_address), 0);
 }
 
 void
 memcheck_fail_query(target_ulong guest_address)
 {
-    stl_user(QUERY_RES_ADDRESS(guest_address), 0);
+    cpu_stl_user(cpu_single_env, QUERY_RES_ADDRESS(guest_address), 0);
 }
 
 // =============================================================================
@@ -208,9 +211,9 @@
            (uint32_t)mallocdesc_get_user_ptr(desc),
             (uint32_t)mallocdesc_get_user_ptr(desc) + desc->requested_bytes,
            desc->requested_bytes);
-    printf("            Prefix guarding area:   0x%08X - 0x%08X, %u bytes\n",
+    printf("            Prefix guarding area:   " TARGET_FMT_lx " - " TARGET_FMT_lx ", %u bytes\n",
            desc->ptr, desc->ptr + desc->prefix_size, desc->prefix_size);
-    printf("            Suffix guarding area:   0x%08X - 0x%08X, %u bytes\n",
+    printf("            Suffix guarding area:   " TARGET_FMT_lx " - " TARGET_FMT_lx ", %u bytes\n",
            mallocdesc_get_user_alloc_end(desc),
            mallocdesc_get_user_alloc_end(desc) + desc->suffix_size,
            desc->suffix_size);
diff --git a/memcheck/memcheck_util.h b/android/qemu/memcheck/memcheck_util.h
similarity index 97%
rename from memcheck/memcheck_util.h
rename to android/qemu/memcheck/memcheck_util.h
index d15eedf..f69f427 100644
--- a/memcheck/memcheck_util.h
+++ b/android/qemu/memcheck/memcheck_util.h
@@ -14,12 +14,12 @@
  * Contains declarations of utility routines for memchecker framework.
  */
 
-#ifndef QEMU_MEMCHECK_MEMCHECK_UTIL_H
-#define QEMU_MEMCHECK_MEMCHECK_UTIL_H
+#ifndef ANDROID_QEMU_MEMCHECK_MEMCHECK_UTIL_H
+#define ANDROID_QEMU_MEMCHECK_MEMCHECK_UTIL_H
 
-#include "memcheck_common.h"
+#include "android/qemu/memcheck/memcheck_common.h"
 #include "elff/elff_api.h"
-#include "exec.h"
+#include "cpu.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -237,4 +237,4 @@
 };  /* end of extern "C" */
 #endif
 
-#endif  // QEMU_MEMCHECK_MEMCHECK_UTIL_H
+#endif  // ANDROID_QEMU_MEMCHECK_MEMCHECK_UTIL_H
diff --git a/android/sockets.c b/android/sockets.c
index 4b8796d..4334ce9 100644
--- a/android/sockets.c
+++ b/android/sockets.c
@@ -1210,11 +1210,11 @@
 int socket_init(void)
 {
     WSADATA Data;
-    int ret, err;
+    int ret;
 
     ret = WSAStartup(MAKEWORD(2,2), &Data);
     if (ret != 0) {
-        err = WSAGetLastError();
+        (void) WSAGetLastError();
         return -1;
     }
     atexit(socket_cleanup);
diff --git a/android/utils/compiler.h b/android/utils/compiler.h
index 4c5f7b1..8462cea 100644
--- a/android/utils/compiler.h
+++ b/android/utils/compiler.h
@@ -20,4 +20,13 @@
 #define ANDROID_END_HEADER  /* nothing */
 #endif
 
+// ANDROID_GCC_PREREQ(<major>,<minor>) will evaluate to true
+// iff the current version of GCC is <major>.<minor> or higher.
+#if defined(__GNUC__) && defined(__GNUC_MINOR__)
+# define ANDROID_GCC_PREREQ(maj, min) \
+         ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+# define ANDROID_GCC_PREREQ(maj, min) 0
+#endif
+
 #endif  // ANDROID_UTILS_COMPILER_H
diff --git a/android/utils/filelock.c b/android/utils/filelock.c
index 8baffe4..b009e25 100644
--- a/android/utils/filelock.c
+++ b/android/utils/filelock.c
@@ -91,13 +91,13 @@
 #ifdef _WIN32
     int  pidfile_fd = -1;
 
-    ret = _mkdir( lock->lock );
+    ret = mkdir( lock->lock );
     if (ret < 0) {
         if (errno == ENOENT) {
             D( "could not access directory '%s', check path elements", lock->lock );
             return -1;
         } else if (errno != EEXIST) {
-            D( "_mkdir(%s): %s", lock->lock, strerror(errno) );
+            D( "mkdir(%s): %s", lock->lock, strerror(errno) );
             return -1;
         }
 
diff --git a/android/utils/path.c b/android/utils/path.c
index f1a146f..c88b5cc 100644
--- a/android/utils/path.c
+++ b/android/utils/path.c
@@ -284,7 +284,7 @@
 {
 #ifdef _WIN32
     (void)mode;
-    return _mkdir(path);
+    return mkdir(path);
 #else
     return HANDLE_EINTR(mkdir(path, mode));
 #endif
diff --git a/android/utils/path.h b/android/utils/path.h
index 2926e5e..ac5fb09 100644
--- a/android/utils/path.h
+++ b/android/utils/path.h
@@ -12,9 +12,12 @@
 #ifndef _ANDROID_UTILS_PATH_H
 #define _ANDROID_UTILS_PATH_H
 
+#include <android/utils/compiler.h>
 #include <android/utils/system.h>
 #include <stdint.h>  /* for uint64_t */
 
+ANDROID_BEGIN_HEADER
+
 /** MISC FILE AND DIRECTORY HANDLING
  **/
 
@@ -165,5 +168,6 @@
 extern void*          path_load_file( const char*  path, size_t  *pSize );
 
 /* */
+ANDROID_END_HEADER
 
 #endif /* _ANDROID_UTILS_PATH_H */
diff --git a/android/utils/system.c b/android/utils/system.c
index 6a6a4e1..a0344fd 100644
--- a/android/utils/system.c
+++ b/android/utils/system.c
@@ -67,7 +67,8 @@
     if (block2 != NULL)
         return block2;
 
-    fprintf(stderr, "PANIC: not enough memory to reallocate %zu bytes\n", size);
+    fprintf(stderr, "PANIC: not enough memory to reallocate %u bytes\n",
+            (unsigned)size);
     exit(1);
     return NULL;
 }
diff --git a/android/utils/system.h b/android/utils/system.h
index 4f4677d..a6b84d1 100644
--- a/android/utils/system.h
+++ b/android/utils/system.h
@@ -103,9 +103,9 @@
 extern char*  win32_strsep(char**  pline, const char*  delim);
 #endif
 
-/** Handle strcasecmp on Windows
+/** Handle strcasecmp on Windows (and older Mingw32 toolchain)
  **/
-#ifdef _WIN32
+#if defined(_WIN32) && !ANDROID_GCC_PREREQ(4,4)
 #  define  strcasecmp  stricmp
 #endif
 
diff --git a/android/varint.c b/android/varint.c
deleted file mode 100644
index 41f6c67..0000000
--- a/android/varint.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/* Copyright (C) 2007-2008 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-#include <inttypes.h>
-#include "varint.h"
-
-// Define some constants for powers of two.
-static const int k2Exp6 = 64;
-static const uint32_t k2Exp7 = 128;
-static const int k2Exp13 = 8192;
-static const uint32_t k2Exp14 = 16384;
-static const int k2Exp20 = (1 * 1024 * 1024);
-static const uint32_t k2Exp21 = (2 * 1024 * 1024);
-static const int k2Exp27 = (128 * 1024 * 1024);
-static const uint32_t k2Exp28 = (256 * 1024 * 1024);
-static const uint64_t k2Exp35 = (32LL * 1024LL * 1024LL * 1024LL);
-static const uint64_t k2Exp42 = (4LL * 1024LL * 1024LL * 1024LL * 1024LL);
-
-// Encodes the 64-bit value "value" using the varint encoding.  The varint
-// encoding uses a prefix followed by some data bits.  The valid prefixes
-// and the number of data bits are given in the table below.
-//
-// Prefix     Bytes  Data bits
-// 0          1      7
-// 10         2      14
-// 110        3      21
-// 1110       4      28
-// 11110      5      35
-// 111110     6      42
-// 11111100   9      64
-// 11111101   reserved
-// 11111110   reserved
-// 11111111   reserved
-char *varint_encode(uint64_t value, char *buf) {
-  if (value < k2Exp7) {
-    *buf++ = value;
-  } else if (value < k2Exp14) {
-    *buf++ = (2 << 6) | (value >> 8);
-    *buf++ = value & 0xff;
-  } else if (value < k2Exp21) {
-    *buf++ = (6 << 5) | (value >> 16);
-    *buf++ = (value >> 8) & 0xff;
-    *buf++ = value & 0xff;
-  } else if (value < k2Exp28) {
-    *buf++ = (0xe << 4) | (value >> 24);
-    *buf++ = (value >> 16) & 0xff;
-    *buf++ = (value >> 8) & 0xff;
-    *buf++ = value & 0xff;
-  } else if (value < k2Exp35) {
-    *buf++ = (0x1e << 3) | (value >> 32);
-    *buf++ = (value >> 24) & 0xff;
-    *buf++ = (value >> 16) & 0xff;
-    *buf++ = (value >> 8) & 0xff;
-    *buf++ = value & 0xff;
-  } else if (value < k2Exp42) {
-    *buf++ = (0x3e << 2) | (value >> 40);
-    *buf++ = (value >> 32) & 0xff;
-    *buf++ = (value >> 24) & 0xff;
-    *buf++ = (value >> 16) & 0xff;
-    *buf++ = (value >> 8) & 0xff;
-    *buf++ = value & 0xff;
-  } else {
-    *buf++ = (0x7e << 1);
-    *buf++ = (value >> 56) & 0xff;
-    *buf++ = (value >> 48) & 0xff;
-    *buf++ = (value >> 40) & 0xff;
-    *buf++ = (value >> 32) & 0xff;
-    *buf++ = (value >> 24) & 0xff;
-    *buf++ = (value >> 16) & 0xff;
-    *buf++ = (value >> 8) & 0xff;
-    *buf++ = value & 0xff;
-  }
-  return buf;
-}
-
-// Encodes the 35-bit signed value "value" using the varint encoding.
-// The varint encoding uses a prefix followed by some data bits.  The
-// valid prefixes and the number of data bits is given in the table
-// below.
-//
-// Prefix     Bytes  Data bits
-// 0          1      7
-// 10         2      14
-// 110        3      21
-// 1110       4      28
-// 11110      5      35
-char *varint_encode_signed(int64_t value, char *buf) {
-  if (value < k2Exp6 && value >= -k2Exp6) {
-    *buf++ = value & 0x7f;
-  } else if (value < k2Exp13 && value >= -k2Exp13) {
-    *buf++ = (2 << 6) | ((value >> 8) & 0x3f);
-    *buf++ = value & 0xff;
-  } else if (value < k2Exp20 && value >= -k2Exp20) {
-    *buf++ = (6 << 5) | ((value >> 16) & 0x1f);
-    *buf++ = (value >> 8) & 0xff;
-    *buf++ = value & 0xff;
-  } else if (value < k2Exp27 && value >= -k2Exp27) {
-    *buf++ = (0xe << 4) | ((value >> 24) & 0xf);
-    *buf++ = (value >> 16) & 0xff;
-    *buf++ = (value >> 8) & 0xff;
-    *buf++ = value & 0xff;
-  } else {
-    *buf++ = (0x1e << 3);
-    *buf++ = (value >> 24) & 0xff;
-    *buf++ = (value >> 16) & 0xff;
-    *buf++ = (value >> 8) & 0xff;
-    *buf++ = value & 0xff;
-  }
-  return buf;
-}
diff --git a/arch_init.c b/arch_init.c
index 359ef7c..9e659b0 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -120,7 +120,7 @@
     int bytes_sent = 0;
 
     if (!block)
-        block = QLIST_FIRST(&ram_list.blocks);
+        block = QTAILQ_FIRST(&ram_list.blocks);
 
     current_addr = block->offset + offset;
 
@@ -161,9 +161,9 @@
         offset += TARGET_PAGE_SIZE;
         if (offset >= block->length) {
             offset = 0;
-            block = QLIST_NEXT(block, next);
+            block = QTAILQ_NEXT(block, next);
             if (!block)
-                block = QLIST_FIRST(&ram_list.blocks);
+                block = QTAILQ_FIRST(&ram_list.blocks);
         }
 
         current_addr = block->offset + offset;
@@ -183,7 +183,7 @@
     RAMBlock *block;
     ram_addr_t count = 0;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         ram_addr_t addr;
         for (addr = block->offset; addr < block->offset + block->length;
              addr += TARGET_PAGE_SIZE) {
@@ -211,7 +211,7 @@
     RAMBlock *block;
     uint64_t total = 0;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next)
+    QTAILQ_FOREACH(block, &ram_list.blocks, next)
         total += block->length;
 
     return total;
@@ -234,18 +234,18 @@
     RAMBlock *block, *nblock, **blocks;
     int n;
     n = 0;
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         ++n;
     }
     blocks = g_malloc(n * sizeof *blocks);
     n = 0;
-    QLIST_FOREACH_SAFE(block, &ram_list.blocks, next, nblock) {
+    QTAILQ_FOREACH_SAFE(block, &ram_list.blocks, next, nblock) {
         blocks[n++] = block;
-        QLIST_REMOVE(block, next);
+        QTAILQ_REMOVE(&ram_list.blocks, block, next);
     }
     qsort(blocks, n, sizeof *blocks, block_compar);
     while (--n >= 0) {
-        QLIST_INSERT_HEAD(&ram_list.blocks, blocks[n], next);
+        QTAILQ_INSERT_HEAD(&ram_list.blocks, blocks[n], next);
     }
     g_free(blocks);
 }
@@ -275,7 +275,7 @@
         sort_ram_list();
 
         /* Make sure all dirty bits are set */
-        QLIST_FOREACH(block, &ram_list.blocks, next) {
+        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
             for (addr = block->offset; addr < block->offset + block->length;
                  addr += TARGET_PAGE_SIZE) {
                 if (!cpu_physical_memory_get_dirty(addr,
@@ -290,7 +290,7 @@
 
         qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
 
-        QLIST_FOREACH(block, &ram_list.blocks, next) {
+        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
             qemu_put_byte(f, strlen(block->idstr));
             qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
             qemu_put_be64(f, block->length);
@@ -358,7 +358,7 @@
     qemu_get_buffer(f, (uint8_t *)id, len);
     id[len] = 0;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (!strncmp(id, block->idstr, sizeof(id)))
             return block->host + offset;
     }
@@ -402,7 +402,7 @@
                     id[len] = 0;
                     length = qemu_get_be64(f);
 
-                    QLIST_FOREACH(block, &ram_list.blocks, next) {
+                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
                         if (!strncmp(id, block->idstr, sizeof(id))) {
                             if (block->length != length)
                                 return -EINVAL;
diff --git a/audio/audio.c b/audio/audio.c
index f3de998..ce0234f 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -232,7 +232,7 @@
         AUD_log (NULL, "Context:\n");
 
 #if defined AUDIO_BREAKPOINT_ON_BUG
-#  if defined HOST_I386
+#  if defined __i386__
 #    if defined __GNUC__
         __asm__ ("int3");
 #    elif defined _MSC_VER
@@ -2047,7 +2047,7 @@
     initialized = 1;
 
     QLIST_INIT (&s->card_head);
-    register_savevm ("audio", 0, 1, audio_save, audio_load, s);
+    register_savevm(NULL, "audio", 0, 1, audio_save, audio_load, s);
     audio_reset_timer();
 }
 
diff --git a/audio/mixeng.c b/audio/mixeng.c
index 3f876e7..b7c7bc7 100644
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -301,7 +301,7 @@
     struct rate *rate = audio_calloc (AUDIO_FUNC, 1, sizeof (*rate));
 
     if (!rate) {
-        dolog ("Could not allocate resampler (%zu bytes)\n", sizeof (*rate));
+        dolog ("Could not allocate resampler (%u bytes)\n", (int)sizeof (*rate));
         return NULL;
     }
 
diff --git a/block/cow.c b/block/cow.c
deleted file mode 100644
index cce970e..0000000
--- a/block/cow.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Block driver for the COW format
- *
- * Copyright (c) 2004 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-
-/**************************************************************/
-/* COW block driver using file system holes */
-
-/* user mode linux compatible COW file */
-#define COW_MAGIC 0x4f4f4f4d  /* MOOO */
-#define COW_VERSION 2
-
-struct cow_header_v2 {
-    uint32_t magic;
-    uint32_t version;
-    char backing_file[1024];
-    int32_t mtime;
-    uint64_t size;
-    uint32_t sectorsize;
-};
-
-typedef struct BDRVCowState {
-    int64_t cow_sectors_offset;
-} BDRVCowState;
-
-static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const struct cow_header_v2 *cow_header = (const void *)buf;
-
-    if (buf_size >= sizeof(struct cow_header_v2) &&
-        be32_to_cpu(cow_header->magic) == COW_MAGIC &&
-        be32_to_cpu(cow_header->version) == COW_VERSION)
-        return 100;
-    else
-        return 0;
-}
-
-static int cow_open(BlockDriverState *bs, int flags)
-{
-    BDRVCowState *s = bs->opaque;
-    struct cow_header_v2 cow_header;
-    int bitmap_size;
-    int64_t size;
-
-    /* see if it is a cow image */
-    if (bdrv_pread(bs->file, 0, &cow_header, sizeof(cow_header)) !=
-            sizeof(cow_header)) {
-        goto fail;
-    }
-
-    if (be32_to_cpu(cow_header.magic) != COW_MAGIC ||
-        be32_to_cpu(cow_header.version) != COW_VERSION) {
-        goto fail;
-    }
-
-    /* cow image found */
-    size = be64_to_cpu(cow_header.size);
-    bs->total_sectors = size / 512;
-
-    pstrcpy(bs->backing_file, sizeof(bs->backing_file),
-            cow_header.backing_file);
-
-    bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
-    s->cow_sectors_offset = (bitmap_size + 511) & ~511;
-    return 0;
- fail:
-    return -1;
-}
-
-/*
- * XXX(hch): right now these functions are extremly ineffcient.
- * We should just read the whole bitmap we'll need in one go instead.
- */
-static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
-{
-    uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
-    uint8_t bitmap;
-    int ret;
-
-    ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
-    if (ret < 0) {
-       return ret;
-    }
-
-    bitmap |= (1 << (bitnum % 8));
-
-    ret = bdrv_pwrite_sync(bs->file, offset, &bitmap, sizeof(bitmap));
-    if (ret < 0) {
-       return ret;
-    }
-    return 0;
-}
-
-static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
-{
-    uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
-    uint8_t bitmap;
-    int ret;
-
-    ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
-    if (ret < 0) {
-       return ret;
-    }
-
-    return !!(bitmap & (1 << (bitnum % 8)));
-}
-
-/* Return true if first block has been changed (ie. current version is
- * in COW file).  Set the number of continuous blocks for which that
- * is true. */
-static int cow_is_allocated(BlockDriverState *bs, int64_t sector_num,
-        int nb_sectors, int *num_same)
-{
-    int changed;
-
-    if (nb_sectors == 0) {
-	*num_same = nb_sectors;
-	return 0;
-    }
-
-    changed = is_bit_set(bs, sector_num);
-    if (changed < 0) {
-        return 0; /* XXX: how to return I/O errors? */
-    }
-
-    for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
-	if (is_bit_set(bs, sector_num + *num_same) != changed)
-	    break;
-    }
-
-    return changed;
-}
-
-static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
-        int nb_sectors)
-{
-    int error = 0;
-    int i;
-
-    for (i = 0; i < nb_sectors; i++) {
-        error = cow_set_bit(bs, sector_num + i);
-        if (error) {
-            break;
-        }
-    }
-
-    return error;
-}
-
-static int cow_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVCowState *s = bs->opaque;
-    int ret, n;
-
-    while (nb_sectors > 0) {
-        if (cow_is_allocated(bs, sector_num, nb_sectors, &n)) {
-            ret = bdrv_pread(bs->file,
-                        s->cow_sectors_offset + sector_num * 512,
-                        buf, n * 512);
-            if (ret != n * 512)
-                return -1;
-        } else {
-            if (bs->backing_hd) {
-                /* read from the base image */
-                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
-                if (ret < 0)
-                    return -1;
-            } else {
-            memset(buf, 0, n * 512);
-        }
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-    return 0;
-}
-
-static int cow_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
-{
-    BDRVCowState *s = bs->opaque;
-    int ret;
-
-    ret = bdrv_pwrite(bs->file, s->cow_sectors_offset + sector_num * 512,
-                      buf, nb_sectors * 512);
-    if (ret != nb_sectors * 512)
-        return -1;
-
-    return cow_update_bitmap(bs, sector_num, nb_sectors);
-}
-
-static void cow_close(BlockDriverState *bs)
-{
-}
-
-static int cow_create(const char *filename, QEMUOptionParameter *options)
-{
-    int fd, cow_fd;
-    struct cow_header_v2 cow_header;
-    struct stat st;
-    int64_t image_sectors = 0;
-    const char *image_filename = NULL;
-    int ret;
-
-    /* Read out options */
-    while (options && options->name) {
-        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
-            image_sectors = options->value.n / 512;
-        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
-            image_filename = options->value.s;
-        }
-        options++;
-    }
-
-    cow_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
-              0644);
-    if (cow_fd < 0)
-        return -errno;
-    memset(&cow_header, 0, sizeof(cow_header));
-    cow_header.magic = cpu_to_be32(COW_MAGIC);
-    cow_header.version = cpu_to_be32(COW_VERSION);
-    if (image_filename) {
-        /* Note: if no file, we put a dummy mtime */
-        cow_header.mtime = cpu_to_be32(0);
-
-        fd = open(image_filename, O_RDONLY | O_BINARY);
-        if (fd < 0) {
-            close(cow_fd);
-            goto mtime_fail;
-        }
-        if (fstat(fd, &st) != 0) {
-            close(fd);
-            goto mtime_fail;
-        }
-        close(fd);
-        cow_header.mtime = cpu_to_be32(st.st_mtime);
-    mtime_fail:
-        pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file),
-                image_filename);
-    }
-    cow_header.sectorsize = cpu_to_be32(512);
-    cow_header.size = cpu_to_be64(image_sectors * 512);
-    ret = qemu_write_full(cow_fd, &cow_header, sizeof(cow_header));
-    if (ret != sizeof(cow_header)) {
-        ret = -errno;
-        goto exit;
-    }
-
-    /* resize to include at least all the bitmap */
-    ret = ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3));
-    if (ret) {
-        ret = -errno;
-        goto exit;
-    }
-
-exit:
-    close(cow_fd);
-    return ret;
-}
-
-static void cow_flush(BlockDriverState *bs)
-{
-    bdrv_flush(bs->file);
-}
-
-static QEMUOptionParameter cow_create_options[] = {
-    {
-        .name = BLOCK_OPT_SIZE,
-        .type = OPT_SIZE,
-        .help = "Virtual disk size"
-    },
-    {
-        .name = BLOCK_OPT_BACKING_FILE,
-        .type = OPT_STRING,
-        .help = "File name of a base image"
-    },
-    { NULL }
-};
-
-static BlockDriver bdrv_cow = {
-    .format_name	= "cow",
-    .instance_size	= sizeof(BDRVCowState),
-    .bdrv_probe		= cow_probe,
-    .bdrv_open		= cow_open,
-    .bdrv_read		= cow_read,
-    .bdrv_write		= cow_write,
-    .bdrv_close		= cow_close,
-    .bdrv_create	= cow_create,
-    .bdrv_flush		= cow_flush,
-    .bdrv_is_allocated	= cow_is_allocated,
-
-    .create_options = cow_create_options,
-};
-
-static void bdrv_cow_init(void)
-{
-    bdrv_register(&bdrv_cow);
-}
-
-block_init(bdrv_cow_init);
diff --git a/block/qcow.c b/block/qcow.c
deleted file mode 100644
index 7b22d32..0000000
--- a/block/qcow.c
+++ /dev/null
@@ -1,977 +0,0 @@
-/*
- * Block driver for the QCOW format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/iov.h"
-#include "qemu/module.h"
-#include <zlib.h>
-#include "qemu/aes.h"
-
-/**************************************************************/
-/* QEMU COW block driver with compression and encryption support */
-
-#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-#define QCOW_VERSION 1
-
-#define QCOW_CRYPT_NONE 0
-#define QCOW_CRYPT_AES  1
-
-#define QCOW_OFLAG_COMPRESSED (1LL << 63)
-
-typedef struct QCowHeader {
-    uint32_t magic;
-    uint32_t version;
-    uint64_t backing_file_offset;
-    uint32_t backing_file_size;
-    uint32_t mtime;
-    uint64_t size; /* in bytes */
-    uint8_t cluster_bits;
-    uint8_t l2_bits;
-    uint32_t crypt_method;
-    uint64_t l1_table_offset;
-} QCowHeader;
-
-#define L2_CACHE_SIZE 16
-
-typedef struct BDRVQcowState {
-    BlockDriverState *hd;
-    int cluster_bits;
-    int cluster_size;
-    int cluster_sectors;
-    int l2_bits;
-    int l2_size;
-    int l1_size;
-    uint64_t cluster_offset_mask;
-    uint64_t l1_table_offset;
-    uint64_t *l1_table;
-    uint64_t *l2_cache;
-    uint64_t l2_cache_offsets[L2_CACHE_SIZE];
-    uint32_t l2_cache_counts[L2_CACHE_SIZE];
-    uint8_t *cluster_cache;
-    uint8_t *cluster_data;
-    uint64_t cluster_cache_offset;
-    uint32_t crypt_method; /* current crypt method, 0 if no key yet */
-    uint32_t crypt_method_header;
-    AES_KEY aes_encrypt_key;
-    AES_KEY aes_decrypt_key;
-} BDRVQcowState;
-
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-
-static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const QCowHeader *cow_header = (const void *)buf;
-
-    if (buf_size >= sizeof(QCowHeader) &&
-        be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
-        be32_to_cpu(cow_header->version) == QCOW_VERSION)
-        return 100;
-    else
-        return 0;
-}
-
-static int qcow_open(BlockDriverState *bs, int flags)
-{
-    BDRVQcowState *s = bs->opaque;
-    int len, i, shift;
-    QCowHeader header;
-
-    if (bdrv_pread(bs->file, 0, &header, sizeof(header)) != sizeof(header))
-        goto fail;
-    be32_to_cpus(&header.magic);
-    be32_to_cpus(&header.version);
-    be64_to_cpus(&header.backing_file_offset);
-    be32_to_cpus(&header.backing_file_size);
-    be32_to_cpus(&header.mtime);
-    be64_to_cpus(&header.size);
-    be32_to_cpus(&header.crypt_method);
-    be64_to_cpus(&header.l1_table_offset);
-
-    if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
-        goto fail;
-    if (header.size <= 1 || header.cluster_bits < 9)
-        goto fail;
-    if (header.crypt_method > QCOW_CRYPT_AES)
-        goto fail;
-    s->crypt_method_header = header.crypt_method;
-    if (s->crypt_method_header)
-        bs->encrypted = 1;
-    s->cluster_bits = header.cluster_bits;
-    s->cluster_size = 1 << s->cluster_bits;
-    s->cluster_sectors = 1 << (s->cluster_bits - 9);
-    s->l2_bits = header.l2_bits;
-    s->l2_size = 1 << s->l2_bits;
-    bs->total_sectors = header.size / 512;
-    s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
-
-    /* read the level 1 table */
-    shift = s->cluster_bits + s->l2_bits;
-    s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
-
-    s->l1_table_offset = header.l1_table_offset;
-    s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
-    if (!s->l1_table)
-        goto fail;
-    if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
-        s->l1_size * sizeof(uint64_t))
-        goto fail;
-    for(i = 0;i < s->l1_size; i++) {
-        be64_to_cpus(&s->l1_table[i]);
-    }
-    /* alloc L2 cache */
-    s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    if (!s->l2_cache)
-        goto fail;
-    s->cluster_cache = g_malloc(s->cluster_size);
-    if (!s->cluster_cache)
-        goto fail;
-    s->cluster_data = g_malloc(s->cluster_size);
-    if (!s->cluster_data)
-        goto fail;
-    s->cluster_cache_offset = -1;
-
-    /* read the backing file name */
-    if (header.backing_file_offset != 0) {
-        len = header.backing_file_size;
-        if (len > 1023)
-            len = 1023;
-        if (bdrv_pread(bs->file, header.backing_file_offset, bs->backing_file, len) != len)
-            goto fail;
-        bs->backing_file[len] = '\0';
-    }
-    return 0;
-
- fail:
-    g_free(s->l1_table);
-    g_free(s->l2_cache);
-    g_free(s->cluster_cache);
-    g_free(s->cluster_data);
-    return -1;
-}
-
-static int qcow_set_key(BlockDriverState *bs, const char *key)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint8_t keybuf[16];
-    int len, i;
-
-    memset(keybuf, 0, 16);
-    len = strlen(key);
-    if (len > 16)
-        len = 16;
-    /* XXX: we could compress the chars to 7 bits to increase
-       entropy */
-    for(i = 0;i < len;i++) {
-        keybuf[i] = key[i];
-    }
-    s->crypt_method = s->crypt_method_header;
-
-    if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
-        return -1;
-    if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
-        return -1;
-#if 0
-    /* test */
-    {
-        uint8_t in[16];
-        uint8_t out[16];
-        uint8_t tmp[16];
-        for(i=0;i<16;i++)
-            in[i] = i;
-        AES_encrypt(in, tmp, &s->aes_encrypt_key);
-        AES_decrypt(tmp, out, &s->aes_decrypt_key);
-        for(i = 0; i < 16; i++)
-            printf(" %02x", tmp[i]);
-        printf("\n");
-        for(i = 0; i < 16; i++)
-            printf(" %02x", out[i]);
-        printf("\n");
-    }
-#endif
-    return 0;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
-   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
-   supported */
-static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
-                            uint8_t *out_buf, const uint8_t *in_buf,
-                            int nb_sectors, int enc,
-                            const AES_KEY *key)
-{
-    union {
-        uint64_t ll[2];
-        uint8_t b[16];
-    } ivec;
-    int i;
-
-    for(i = 0; i < nb_sectors; i++) {
-        ivec.ll[0] = cpu_to_le64(sector_num);
-        ivec.ll[1] = 0;
-        AES_cbc_encrypt(in_buf, out_buf, 512, key,
-                        ivec.b, enc);
-        sector_num++;
-        in_buf += 512;
-        out_buf += 512;
-    }
-}
-
-/* 'allocate' is:
- *
- * 0 to not allocate.
- *
- * 1 to allocate a normal cluster (for sector indexes 'n_start' to
- * 'n_end')
- *
- * 2 to allocate a compressed cluster of size
- * 'compressed_size'. 'compressed_size' must be > 0 and <
- * cluster_size
- *
- * return 0 if not allocated.
- */
-static uint64_t get_cluster_offset(BlockDriverState *bs,
-                                   uint64_t offset, int allocate,
-                                   int compressed_size,
-                                   int n_start, int n_end)
-{
-    BDRVQcowState *s = bs->opaque;
-    int min_index, i, j, l1_index, l2_index;
-    uint64_t l2_offset, *l2_table, cluster_offset, tmp;
-    uint32_t min_count;
-    int new_l2_table;
-
-    l1_index = offset >> (s->l2_bits + s->cluster_bits);
-    l2_offset = s->l1_table[l1_index];
-    new_l2_table = 0;
-    if (!l2_offset) {
-        if (!allocate)
-            return 0;
-        /* allocate a new l2 entry */
-        l2_offset = bdrv_getlength(bs->file);
-        /* round to cluster size */
-        l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
-        /* update the L1 entry */
-        s->l1_table[l1_index] = l2_offset;
-        tmp = cpu_to_be64(l2_offset);
-        if (bdrv_pwrite_sync(bs->file,
-                s->l1_table_offset + l1_index * sizeof(tmp),
-                &tmp, sizeof(tmp)) < 0)
-            return 0;
-        new_l2_table = 1;
-    }
-    for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (l2_offset == s->l2_cache_offsets[i]) {
-            /* increment the hit count */
-            if (++s->l2_cache_counts[i] == 0xffffffff) {
-                for(j = 0; j < L2_CACHE_SIZE; j++) {
-                    s->l2_cache_counts[j] >>= 1;
-                }
-            }
-            l2_table = s->l2_cache + (i << s->l2_bits);
-            goto found;
-        }
-    }
-    /* not found: load a new entry in the least used one */
-    min_index = 0;
-    min_count = 0xffffffff;
-    for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (s->l2_cache_counts[i] < min_count) {
-            min_count = s->l2_cache_counts[i];
-            min_index = i;
-        }
-    }
-    l2_table = s->l2_cache + (min_index << s->l2_bits);
-    if (new_l2_table) {
-        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-        if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
-                s->l2_size * sizeof(uint64_t)) < 0)
-            return 0;
-    } else {
-        if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
-            s->l2_size * sizeof(uint64_t))
-            return 0;
-    }
-    s->l2_cache_offsets[min_index] = l2_offset;
-    s->l2_cache_counts[min_index] = 1;
- found:
-    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
-    cluster_offset = be64_to_cpu(l2_table[l2_index]);
-    if (!cluster_offset ||
-        ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
-        if (!allocate)
-            return 0;
-        /* allocate a new cluster */
-        if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
-            (n_end - n_start) < s->cluster_sectors) {
-            /* if the cluster is already compressed, we must
-               decompress it in the case it is not completely
-               overwritten */
-            if (decompress_cluster(bs, cluster_offset) < 0)
-                return 0;
-            cluster_offset = bdrv_getlength(bs->file);
-            cluster_offset = (cluster_offset + s->cluster_size - 1) &
-                ~(s->cluster_size - 1);
-            /* write the cluster content */
-            if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
-                s->cluster_size)
-                return -1;
-        } else {
-            cluster_offset = bdrv_getlength(bs->file);
-            if (allocate == 1) {
-                /* round to cluster size */
-                cluster_offset = (cluster_offset + s->cluster_size - 1) &
-                    ~(s->cluster_size - 1);
-                bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
-                /* if encrypted, we must initialize the cluster
-                   content which won't be written */
-                if (s->crypt_method &&
-                    (n_end - n_start) < s->cluster_sectors) {
-                    uint64_t start_sect;
-                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
-                    memset(s->cluster_data + 512, 0x00, 512);
-                    for(i = 0; i < s->cluster_sectors; i++) {
-                        if (i < n_start || i >= n_end) {
-                            encrypt_sectors(s, start_sect + i,
-                                            s->cluster_data,
-                                            s->cluster_data + 512, 1, 1,
-                                            &s->aes_encrypt_key);
-                            if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
-                                            s->cluster_data, 512) != 512)
-                                return -1;
-                        }
-                    }
-                }
-            } else if (allocate == 2) {
-                cluster_offset |= QCOW_OFLAG_COMPRESSED |
-                    (uint64_t)compressed_size << (63 - s->cluster_bits);
-            }
-        }
-        /* update L2 table */
-        tmp = cpu_to_be64(cluster_offset);
-        l2_table[l2_index] = tmp;
-        if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
-                &tmp, sizeof(tmp)) < 0)
-            return 0;
-    }
-    return cluster_offset;
-}
-
-static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
-                             int nb_sectors, int *pnum)
-{
-    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster, n;
-    uint64_t cluster_offset;
-
-    cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
-    index_in_cluster = sector_num & (s->cluster_sectors - 1);
-    n = s->cluster_sectors - index_in_cluster;
-    if (n > nb_sectors)
-        n = nb_sectors;
-    *pnum = n;
-    return (cluster_offset != 0);
-}
-
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
-                             const uint8_t *buf, int buf_size)
-{
-    z_stream strm1, *strm = &strm1;
-    int ret, out_len;
-
-    memset(strm, 0, sizeof(*strm));
-
-    strm->next_in = (uint8_t *)buf;
-    strm->avail_in = buf_size;
-    strm->next_out = out_buf;
-    strm->avail_out = out_buf_size;
-
-    ret = inflateInit2(strm, -12);
-    if (ret != Z_OK)
-        return -1;
-    ret = inflate(strm, Z_FINISH);
-    out_len = strm->next_out - out_buf;
-    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
-        out_len != out_buf_size) {
-        inflateEnd(strm);
-        return -1;
-    }
-    inflateEnd(strm);
-    return 0;
-}
-
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
-{
-    BDRVQcowState *s = bs->opaque;
-    int ret, csize;
-    uint64_t coffset;
-
-    coffset = cluster_offset & s->cluster_offset_mask;
-    if (s->cluster_cache_offset != coffset) {
-        csize = cluster_offset >> (63 - s->cluster_bits);
-        csize &= (s->cluster_size - 1);
-        ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
-        if (ret != csize)
-            return -1;
-        if (decompress_buffer(s->cluster_cache, s->cluster_size,
-                              s->cluster_data, csize) < 0) {
-            return -1;
-        }
-        s->cluster_cache_offset = coffset;
-    }
-    return 0;
-}
-
-#if 0
-
-static int qcow_read(BlockDriverState *bs, int64_t sector_num,
-                     uint8_t *buf, int nb_sectors)
-{
-    BDRVQcowState *s = bs->opaque;
-    int ret, index_in_cluster, n;
-    uint64_t cluster_offset;
-
-    while (nb_sectors > 0) {
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        n = s->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors)
-            n = nb_sectors;
-        if (!cluster_offset) {
-            if (bs->backing_hd) {
-                /* read from the base image */
-                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
-                if (ret < 0)
-                    return -1;
-            } else {
-                memset(buf, 0, 512 * n);
-            }
-        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
-            if (decompress_cluster(bs, cluster_offset) < 0)
-                return -1;
-            memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
-        } else {
-            ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512);
-            if (ret != n * 512)
-                return -1;
-            if (s->crypt_method) {
-                encrypt_sectors(s, sector_num, buf, buf, n, 0,
-                                &s->aes_decrypt_key);
-            }
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-    return 0;
-}
-#endif
-
-typedef struct QCowAIOCB {
-    BlockDriverAIOCB common;
-    int64_t sector_num;
-    QEMUIOVector *qiov;
-    uint8_t *buf;
-    void *orig_buf;
-    int nb_sectors;
-    int n;
-    uint64_t cluster_offset;
-    uint8_t *cluster_data;
-    struct iovec hd_iov;
-    QEMUIOVector hd_qiov;
-    BlockDriverAIOCB *hd_aiocb;
-} QCowAIOCB;
-
-static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
-    if (acb->hd_aiocb)
-        bdrv_aio_cancel(acb->hd_aiocb);
-    qemu_aio_release(acb);
-}
-
-static AIOPool qcow_aio_pool = {
-    .aiocb_size         = sizeof(QCowAIOCB),
-    .cancel             = qcow_aio_cancel,
-};
-
-static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int is_write)
-{
-    QCowAIOCB *acb;
-
-    acb = qemu_aio_get(&qcow_aio_pool, bs, cb, opaque);
-    if (!acb)
-        return NULL;
-    acb->hd_aiocb = NULL;
-    acb->sector_num = sector_num;
-    acb->qiov = qiov;
-    if (qiov->niov > 1) {
-        acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
-        if (is_write)
-            qemu_iovec_to_buf(qiov, 0, acb->buf, qiov->size);
-    } else {
-        acb->buf = (uint8_t *)qiov->iov->iov_base;
-    }
-    acb->nb_sectors = nb_sectors;
-    acb->n = 0;
-    acb->cluster_offset = 0;
-    return acb;
-}
-
-static void qcow_aio_read_cb(void *opaque, int ret)
-{
-    QCowAIOCB *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster;
-
-    acb->hd_aiocb = NULL;
-    if (ret < 0)
-        goto done;
-
- redo:
-    /* post process the read buffer */
-    if (!acb->cluster_offset) {
-        /* nothing to do */
-    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
-        /* nothing to do */
-    } else {
-        if (s->crypt_method) {
-            encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
-                            acb->n, 0,
-                            &s->aes_decrypt_key);
-        }
-    }
-
-    acb->nb_sectors -= acb->n;
-    acb->sector_num += acb->n;
-    acb->buf += acb->n * 512;
-
-    if (acb->nb_sectors == 0) {
-        /* request completed */
-        ret = 0;
-        goto done;
-    }
-
-    /* prepare next AIO request */
-    acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9,
-                                             0, 0, 0, 0);
-    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
-    acb->n = s->cluster_sectors - index_in_cluster;
-    if (acb->n > acb->nb_sectors)
-        acb->n = acb->nb_sectors;
-
-    if (!acb->cluster_offset) {
-        if (bs->backing_hd) {
-            /* read from the base image */
-            acb->hd_iov.iov_base = (void *)acb->buf;
-            acb->hd_iov.iov_len = acb->n * 512;
-            qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-            acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
-                &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
-            if (acb->hd_aiocb == NULL)
-                goto done;
-        } else {
-            /* Note: in this case, no need to wait */
-            memset(acb->buf, 0, 512 * acb->n);
-            goto redo;
-        }
-    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
-        /* add AIO support for compressed blocks ? */
-        if (decompress_cluster(bs, acb->cluster_offset) < 0)
-            goto done;
-        memcpy(acb->buf,
-               s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
-        goto redo;
-    } else {
-        if ((acb->cluster_offset & 511) != 0) {
-            ret = -EIO;
-            goto done;
-        }
-        acb->hd_iov.iov_base = (void *)acb->buf;
-        acb->hd_iov.iov_len = acb->n * 512;
-        qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-        acb->hd_aiocb = bdrv_aio_readv(bs->file,
-                            (acb->cluster_offset >> 9) + index_in_cluster,
-                            &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
-        if (acb->hd_aiocb == NULL)
-            goto done;
-    }
-
-    return;
-
-done:
-    if (acb->qiov->niov > 1) {
-        qemu_iovec_from_buf(acb->qiov, 0, acb->orig_buf, acb->qiov->size);
-        qemu_vfree(acb->orig_buf);
-    }
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_aio_release(acb);
-}
-
-static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    QCowAIOCB *acb;
-
-    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-    if (!acb)
-        return NULL;
-
-    qcow_aio_read_cb(acb, 0);
-    return &acb->common;
-}
-
-static void qcow_aio_write_cb(void *opaque, int ret)
-{
-    QCowAIOCB *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster;
-    uint64_t cluster_offset;
-    const uint8_t *src_buf;
-
-    acb->hd_aiocb = NULL;
-
-    if (ret < 0)
-        goto done;
-
-    acb->nb_sectors -= acb->n;
-    acb->sector_num += acb->n;
-    acb->buf += acb->n * 512;
-
-    if (acb->nb_sectors == 0) {
-        /* request completed */
-        ret = 0;
-        goto done;
-    }
-
-    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
-    acb->n = s->cluster_sectors - index_in_cluster;
-    if (acb->n > acb->nb_sectors)
-        acb->n = acb->nb_sectors;
-    cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, 1, 0,
-                                        index_in_cluster,
-                                        index_in_cluster + acb->n);
-    if (!cluster_offset || (cluster_offset & 511) != 0) {
-        ret = -EIO;
-        goto done;
-    }
-    if (s->crypt_method) {
-        if (!acb->cluster_data) {
-            acb->cluster_data = g_malloc0(s->cluster_size);
-            if (!acb->cluster_data) {
-                ret = -ENOMEM;
-                goto done;
-            }
-        }
-        encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
-                        acb->n, 1, &s->aes_encrypt_key);
-        src_buf = acb->cluster_data;
-    } else {
-        src_buf = acb->buf;
-    }
-
-    acb->hd_iov.iov_base = (void *)src_buf;
-    acb->hd_iov.iov_len = acb->n * 512;
-    qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-    acb->hd_aiocb = bdrv_aio_writev(bs->file,
-                                    (cluster_offset >> 9) + index_in_cluster,
-                                    &acb->hd_qiov, acb->n,
-                                    qcow_aio_write_cb, acb);
-    if (acb->hd_aiocb == NULL)
-        goto done;
-    return;
-
-done:
-    if (acb->qiov->niov > 1)
-        qemu_vfree(acb->orig_buf);
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_aio_release(acb);
-}
-
-static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    BDRVQcowState *s = bs->opaque;
-    QCowAIOCB *acb;
-
-    s->cluster_cache_offset = -1; /* disable compressed cache */
-
-    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-    if (!acb)
-        return NULL;
-
-
-    qcow_aio_write_cb(acb, 0);
-    return &acb->common;
-}
-
-static void qcow_close(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    g_free(s->l1_table);
-    g_free(s->l2_cache);
-    g_free(s->cluster_cache);
-    g_free(s->cluster_data);
-}
-
-static int qcow_create(const char *filename, QEMUOptionParameter *options)
-{
-    int fd, header_size, backing_filename_len, l1_size, i, shift;
-    QCowHeader header;
-    uint64_t tmp;
-    int64_t total_size = 0;
-    const char *backing_file = NULL;
-    int flags = 0;
-    int ret;
-
-    /* Read out options */
-    while (options && options->name) {
-        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
-            total_size = options->value.n / 512;
-        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
-            backing_file = options->value.s;
-        } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
-            flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
-        }
-        options++;
-    }
-
-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
-    if (fd < 0)
-        return -errno;
-    memset(&header, 0, sizeof(header));
-    header.magic = cpu_to_be32(QCOW_MAGIC);
-    header.version = cpu_to_be32(QCOW_VERSION);
-    header.size = cpu_to_be64(total_size * 512);
-    header_size = sizeof(header);
-    backing_filename_len = 0;
-    if (backing_file) {
-        if (strcmp(backing_file, "fat:")) {
-            header.backing_file_offset = cpu_to_be64(header_size);
-            backing_filename_len = strlen(backing_file);
-            header.backing_file_size = cpu_to_be32(backing_filename_len);
-            header_size += backing_filename_len;
-        } else {
-            /* special backing file for vvfat */
-            backing_file = NULL;
-        }
-        header.cluster_bits = 9; /* 512 byte cluster to avoid copying
-                                    unmodifyed sectors */
-        header.l2_bits = 12; /* 32 KB L2 tables */
-    } else {
-        header.cluster_bits = 12; /* 4 KB clusters */
-        header.l2_bits = 9; /* 4 KB L2 tables */
-    }
-    header_size = (header_size + 7) & ~7;
-    shift = header.cluster_bits + header.l2_bits;
-    l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
-
-    header.l1_table_offset = cpu_to_be64(header_size);
-    if (flags & BLOCK_FLAG_ENCRYPT) {
-        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
-    } else {
-        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
-    }
-
-    /* write all the data */
-    ret = qemu_write_full(fd, &header, sizeof(header));
-    if (ret != sizeof(header)) {
-        ret = -errno;
-        goto exit;
-    }
-
-    if (backing_file) {
-        ret = qemu_write_full(fd, backing_file, backing_filename_len);
-        if (ret != backing_filename_len) {
-            ret = -errno;
-            goto exit;
-        }
-
-    }
-    lseek(fd, header_size, SEEK_SET);
-    tmp = 0;
-    for(i = 0;i < l1_size; i++) {
-        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
-        if (ret != sizeof(tmp)) {
-            ret = -errno;
-            goto exit;
-        }
-    }
-
-    ret = 0;
-exit:
-    close(fd);
-    return ret;
-}
-
-static int qcow_make_empty(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint32_t l1_length = s->l1_size * sizeof(uint64_t);
-    int ret;
-
-    memset(s->l1_table, 0, l1_length);
-    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
-            l1_length) < 0)
-        return -1;
-    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
-    if (ret < 0)
-        return ret;
-
-    memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
-    memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
-
-    return 0;
-}
-
-/* XXX: put compressed sectors first, then all the cluster aligned
-   tables to avoid losing bytes in alignment */
-static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                                 const uint8_t *buf, int nb_sectors)
-{
-    BDRVQcowState *s = bs->opaque;
-    z_stream strm;
-    int ret, out_len;
-    uint8_t *out_buf;
-    uint64_t cluster_offset;
-
-    if (nb_sectors != s->cluster_sectors)
-        return -EINVAL;
-
-    out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-    if (!out_buf)
-        return -1;
-
-    /* best compression, small window, no zlib header */
-    memset(&strm, 0, sizeof(strm));
-    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
-                       Z_DEFLATED, -12,
-                       9, Z_DEFAULT_STRATEGY);
-    if (ret != 0) {
-        g_free(out_buf);
-        return -1;
-    }
-
-    strm.avail_in = s->cluster_size;
-    strm.next_in = (uint8_t *)buf;
-    strm.avail_out = s->cluster_size;
-    strm.next_out = out_buf;
-
-    ret = deflate(&strm, Z_FINISH);
-    if (ret != Z_STREAM_END && ret != Z_OK) {
-        g_free(out_buf);
-        deflateEnd(&strm);
-        return -1;
-    }
-    out_len = strm.next_out - out_buf;
-
-    deflateEnd(&strm);
-
-    if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
-        /* could not compress: write normal cluster */
-        bdrv_write(bs, sector_num, buf, s->cluster_sectors);
-    } else {
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
-                                            out_len, 0, 0);
-        cluster_offset &= s->cluster_offset_mask;
-        if (bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len) != out_len) {
-            g_free(out_buf);
-            return -1;
-        }
-    }
-
-    g_free(out_buf);
-    return 0;
-}
-
-static void qcow_flush(BlockDriverState *bs)
-{
-    bdrv_flush(bs->file);
-}
-
-static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    return bdrv_aio_flush(bs->file, cb, opaque);
-}
-
-static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVQcowState *s = bs->opaque;
-    bdi->cluster_size = s->cluster_size;
-    return 0;
-}
-
-
-static QEMUOptionParameter qcow_create_options[] = {
-    {
-        .name = BLOCK_OPT_SIZE,
-        .type = OPT_SIZE,
-        .help = "Virtual disk size"
-    },
-    {
-        .name = BLOCK_OPT_BACKING_FILE,
-        .type = OPT_STRING,
-        .help = "File name of a base image"
-    },
-    {
-        .name = BLOCK_OPT_ENCRYPT,
-        .type = OPT_FLAG,
-        .help = "Encrypt the image"
-    },
-    { NULL }
-};
-
-static BlockDriver bdrv_qcow = {
-    .format_name	= "qcow",
-    .instance_size	= sizeof(BDRVQcowState),
-    .bdrv_probe		= qcow_probe,
-    .bdrv_open		= qcow_open,
-    .bdrv_close		= qcow_close,
-    .bdrv_create	= qcow_create,
-    .bdrv_flush		= qcow_flush,
-    .bdrv_is_allocated	= qcow_is_allocated,
-    .bdrv_set_key	= qcow_set_key,
-    .bdrv_make_empty	= qcow_make_empty,
-    .bdrv_aio_readv	= qcow_aio_readv,
-    .bdrv_aio_writev	= qcow_aio_writev,
-    .bdrv_aio_flush	= qcow_aio_flush,
-    .bdrv_write_compressed = qcow_write_compressed,
-    .bdrv_get_info	= qcow_get_info,
-
-    .create_options = qcow_create_options,
-};
-
-static void bdrv_qcow_init(void)
-{
-    bdrv_register(&bdrv_qcow);
-}
-
-block_init(bdrv_qcow_init);
diff --git a/bt-host.c b/bt-host.c
deleted file mode 100644
index 26cb22c..0000000
--- a/bt-host.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Wrap a host Bluetooth HCI socket in a struct HCIInfo.
- *
- * Copyright (C) 2008 Andrzej Zaborowski  <balrog@zabor.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 or
- * (at your option) version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu-common.h"
-#include "sysemu/char.h"
-#include "net/net.h"
-#include "sysemu/bt.h"
-
-#ifndef _WIN32
-# include <errno.h>
-# include <sys/ioctl.h>
-# include <sys/uio.h>
-# ifdef CONFIG_BLUEZ
-#  include <bluetooth/bluetooth.h>
-#  include <bluetooth/hci.h>
-#  include <bluetooth/hci_lib.h>
-# else
-#  include "hw/bt.h"
-#  define HCI_MAX_FRAME_SIZE	1028
-# endif
-
-struct bt_host_hci_s {
-    struct HCIInfo hci;
-    int fd;
-
-    uint8_t hdr[HCI_MAX_FRAME_SIZE];
-    int len;
-};
-
-static void bt_host_send(struct HCIInfo *hci,
-                int type, const uint8_t *data, int len)
-{
-    struct bt_host_hci_s *s = (struct bt_host_hci_s *) hci;
-    uint8_t pkt = type;
-    struct iovec iv[2];
-
-    iv[0].iov_base = (void *)&pkt;
-    iv[0].iov_len  = 1;
-    iv[1].iov_base = (void *) data;
-    iv[1].iov_len  = len;
-
-    while (writev(s->fd, iv, 2) < 0) {
-        if (errno != EAGAIN && errno != EINTR) {
-            fprintf(stderr, "qemu: error %i writing bluetooth packet.\n",
-                            errno);
-            return;
-        }
-    }
-}
-
-static void bt_host_cmd(struct HCIInfo *hci, const uint8_t *data, int len)
-{
-    bt_host_send(hci, HCI_COMMAND_PKT, data, len);
-}
-
-static void bt_host_acl(struct HCIInfo *hci, const uint8_t *data, int len)
-{
-    bt_host_send(hci, HCI_ACLDATA_PKT, data, len);
-}
-
-static void bt_host_sco(struct HCIInfo *hci, const uint8_t *data, int len)
-{
-    bt_host_send(hci, HCI_SCODATA_PKT, data, len);
-}
-
-static void bt_host_read(void *opaque)
-{
-    struct bt_host_hci_s *s = (struct bt_host_hci_s *) opaque;
-    uint8_t *pkt;
-    int pktlen;
-
-    /* Seems that we can't read only the header first and then the amount
-     * of data indicated in the header because Linux will discard everything
-     * that's not been read in one go.  */
-    s->len = read(s->fd, s->hdr, sizeof(s->hdr));
-
-    if (s->len < 0) {
-        fprintf(stderr, "qemu: error %i reading HCI frame\n", errno);
-        return;
-    }
-
-    pkt = s->hdr;
-    while (s->len --)
-        switch (*pkt ++) {
-        case HCI_EVENT_PKT:
-            if (s->len < 2)
-                goto bad_pkt;
-
-            pktlen = MIN(pkt[1] + 2, s->len);
-            s->hci.evt_recv(s->hci.opaque, pkt, pktlen);
-            s->len -= pktlen;
-            pkt += pktlen;
-
-            /* TODO: if this is an Inquiry Result event, it's also
-             * interpreted by Linux kernel before we received it, possibly
-             * we should clean the kernel Inquiry cache through
-             * ioctl(s->fd, HCI_INQUIRY, ...).  */
-            break;
-
-        case HCI_ACLDATA_PKT:
-            if (s->len < 4)
-                goto bad_pkt;
-
-            pktlen = MIN(((pkt[3] << 8) | pkt[2]) + 4, s->len);
-            s->hci.acl_recv(s->hci.opaque, pkt, pktlen);
-            s->len -= pktlen;
-            pkt += pktlen;
-            break;
-
-        case HCI_SCODATA_PKT:
-            if (s->len < 3)
-                goto bad_pkt;
-
-            pktlen = MIN(pkt[2] + 3, s->len);
-            s->len -= pktlen;
-            pkt += pktlen;
-
-        default:
-        bad_pkt:
-            fprintf(stderr, "qemu: bad HCI packet type %02x\n", pkt[-1]);
-        }
-}
-
-static int bt_host_bdaddr_set(struct HCIInfo *hci, const uint8_t *bd_addr)
-{
-    return -ENOTSUP;
-}
-
-struct HCIInfo *bt_host_hci(const char *id)
-{
-    struct bt_host_hci_s *s;
-    int fd = -1;
-# ifdef CONFIG_BLUEZ
-    int dev_id = hci_devid(id);
-    struct hci_filter flt;
-
-    if (dev_id < 0) {
-        fprintf(stderr, "qemu: `%s' not available\n", id);
-        return 0;
-    }
-
-    fd = hci_open_dev(dev_id);
-
-    /* XXX: can we ensure nobody else has the device opened?  */
-# endif
-
-    if (fd < 0) {
-        fprintf(stderr, "qemu: Can't open `%s': %s (%i)\n",
-                        id, strerror(errno), errno);
-        return NULL;
-    }
-
-# ifdef CONFIG_BLUEZ
-    hci_filter_clear(&flt);
-    hci_filter_all_ptypes(&flt);
-    hci_filter_all_events(&flt);
-
-    if (qemu_setsockopt(fd, SOL_HCI, HCI_FILTER, &flt, sizeof(flt)) < 0) {
-        fprintf(stderr, "qemu: Can't set HCI filter on socket (%i)\n", errno);
-        return 0;
-    }
-# endif
-
-    s = g_malloc0(sizeof(struct bt_host_hci_s));
-    s->fd = fd;
-    s->hci.cmd_send = bt_host_cmd;
-    s->hci.sco_send = bt_host_sco;
-    s->hci.acl_send = bt_host_acl;
-    s->hci.bdaddr_set = bt_host_bdaddr_set;
-
-    qemu_set_fd_handler(s->fd, bt_host_read, NULL, s);
-
-    return &s->hci;
-}
-#else
-struct HCIInfo *bt_host_hci(const char *id)
-{
-    fprintf(stderr, "qemu: bluetooth passthrough not supported (yet)\n");
-
-    return 0;
-}
-#endif
diff --git a/bt-vhci.c b/bt-vhci.c
deleted file mode 100644
index c0e1d5c..0000000
--- a/bt-vhci.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Support for host VHCIs inside qemu scatternets.
- *
- * Copyright (C) 2008 Andrzej Zaborowski  <balrog@zabor.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 or
- * (at your option) version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu-common.h"
-#include "sysemu/char.h"
-#include "net/net.h"
-#include "hw/bt.h"
-
-#define VHCI_DEV	"/dev/vhci"
-#define VHCI_UDEV	"/dev/hci_vhci"
-
-struct bt_vhci_s {
-    int fd;
-    struct HCIInfo *info;
-
-    uint8_t hdr[4096];
-    int len;
-};
-
-static void vhci_read(void *opaque)
-{
-    struct bt_vhci_s *s = (struct bt_vhci_s *) opaque;
-    uint8_t *pkt;
-    int pktlen;
-
-    /* Seems that we can't read only the header first and then the amount
-     * of data indicated in the header because Linux will discard everything
-     * that's not been read in one go.  */
-    s->len = read(s->fd, s->hdr, sizeof(s->hdr));
-
-    if (s->len < 0) {
-        fprintf(stderr, "qemu: error %i reading the PDU\n", errno);
-        return;
-    }
-
-    pkt = s->hdr;
-    while (s->len --)
-        switch (*pkt ++) {
-        case HCI_COMMAND_PKT:
-            if (s->len < 3)
-                goto bad_pkt;
-
-            pktlen = MIN(pkt[2] + 3, s->len);
-            s->info->cmd_send(s->info, pkt, pktlen);
-            s->len -= pktlen;
-            pkt += pktlen;
-            break;
-
-        case HCI_ACLDATA_PKT:
-            if (s->len < 4)
-                goto bad_pkt;
-
-            pktlen = MIN(((pkt[3] << 8) | pkt[2]) + 4, s->len);
-            s->info->acl_send(s->info, pkt, pktlen);
-            s->len -= pktlen;
-            pkt += pktlen;
-            break;
-
-        case HCI_SCODATA_PKT:
-            if (s->len < 3)
-                goto bad_pkt;
-
-            pktlen = MIN(pkt[2] + 3, s->len);
-            s->info->sco_send(s->info, pkt, pktlen);
-            s->len -= pktlen;
-            pkt += pktlen;
-            break;
-
-        default:
-        bad_pkt:
-            fprintf(stderr, "qemu: bad HCI packet type %02x\n", pkt[-1]);
-        }
-}
-
-static void vhci_host_send(void *opaque,
-                int type, const uint8_t *data, int len)
-{
-    struct bt_vhci_s *s = (struct bt_vhci_s *) opaque;
-#if 0
-    uint8_t pkt = type;
-    struct iovec iv[2];
-
-    iv[0].iov_base = &pkt;
-    iv[0].iov_len  = 1;
-    iv[1].iov_base = (void *) data;
-    iv[1].iov_len  = len;
-
-    while (writev(s->fd, iv, 2) < 0)
-        if (errno != EAGAIN && errno != EINTR) {
-            fprintf(stderr, "qemu: error %i writing bluetooth packet.\n",
-                            errno);
-            return;
-        }
-#else
-    /* Apparently VHCI wants us to write everything in one chunk :-(  */
-    static uint8_t buf[4096];
-
-    buf[0] = type;
-    memcpy(buf + 1, data, len);
-
-    while (write(s->fd, buf, len + 1) < 0)
-        if (errno != EAGAIN && errno != EINTR) {
-            fprintf(stderr, "qemu: error %i writing bluetooth packet.\n",
-                            errno);
-            return;
-        }
-#endif
-}
-
-static void vhci_out_hci_packet_event(void *opaque,
-                const uint8_t *data, int len)
-{
-    vhci_host_send(opaque, HCI_EVENT_PKT, data, len);
-}
-
-static void vhci_out_hci_packet_acl(void *opaque,
-                const uint8_t *data, int len)
-{
-    vhci_host_send(opaque, HCI_ACLDATA_PKT, data, len);
-}
-
-void bt_vhci_init(struct HCIInfo *info)
-{
-    struct bt_vhci_s *s;
-    int err[2];
-    int fd;
-
-    fd = open(VHCI_DEV, O_RDWR);
-    err[0] = errno;
-    if (fd < 0) {
-        fd = open(VHCI_UDEV, O_RDWR);
-        err[1] = errno;
-    }
-
-    if (fd < 0) {
-        fprintf(stderr, "qemu: Can't open `%s': %s (%i)\n",
-                        VHCI_DEV, strerror(err[0]), err[0]);
-        fprintf(stderr, "qemu: Can't open `%s': %s (%i)\n",
-                        VHCI_UDEV, strerror(err[1]), err[1]);
-        exit(-1);
-    }
-
-    s = g_malloc0(sizeof(struct bt_vhci_s));
-    s->fd = fd;
-    s->info = info ?: qemu_next_hci();
-    s->info->opaque = s;
-    s->info->evt_recv = vhci_out_hci_packet_event;
-    s->info->acl_recv = vhci_out_hci_packet_acl;
-
-    qemu_set_fd_handler(s->fd, vhci_read, NULL, s);
-}
diff --git a/cpu-exec.c b/cpu-exec.c
index a3a13e9..9bc29d3 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -17,7 +17,7 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "config.h"
-#include "exec.h"
+#include "cpu.h"
 #include "disas/disas.h"
 #include "tcg.h"
 #include "sysemu/kvm.h"
@@ -40,32 +40,26 @@
 #endif
 #endif
 
-#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
-// Work around ugly bugs in glibc that mangle global register contents
-#undef env
-#define env cpu_single_env
-#endif
-
 int tb_invalidated_flag;
 
 //#define CONFIG_DEBUG_EXEC
 //#define DEBUG_SIGNAL
 
-int qemu_cpu_has_work(CPUOldState *env)
+bool qemu_cpu_has_work(CPUState *cpu)
 {
-    return cpu_has_work(env);
+    return cpu_has_work(cpu);
 }
 
-void cpu_loop_exit(CPUArchState* env1)
+void cpu_loop_exit(CPUArchState* env)
 {
-    env1->current_tb = NULL;
-    longjmp(env1->jmp_env, 1);
+    env->current_tb = NULL;
+    longjmp(env->jmp_env, 1);
 }
 
 /* exit the current TB from a signal handler. The host registers are
    restored in a state compatible with the CPU emulator
  */
-void cpu_resume_from_signal(CPUArchState *env1, void *puc)
+void cpu_resume_from_signal(CPUArchState *env, void *puc)
 {
 #if !defined(CONFIG_SOFTMMU)
 #ifdef __linux__
@@ -75,8 +69,6 @@
 #endif
 #endif
 
-    env = env1;
-
     /* XXX: restore cpu registers saved in host registers */
 
 #if !defined(CONFIG_SOFTMMU)
@@ -99,9 +91,9 @@
 
 /* Execute the code without caching the generated code. An interpreter
    could be used if available. */
-static void cpu_exec_nocache(int max_cycles, TranslationBlock *orig_tb)
+static void cpu_exec_nocache(CPUArchState *env, int max_cycles, TranslationBlock *orig_tb)
 {
-    unsigned long next_tb;
+    tcg_target_ulong next_tb;
     TranslationBlock *tb;
 
     /* Should never happen.
@@ -113,7 +105,7 @@
                      max_cycles);
     env->current_tb = tb;
     /* execute the generated code */
-    next_tb = tcg_qemu_tb_exec(tb->tc_ptr);
+    next_tb = tcg_qemu_tb_exec(env, tb->tc_ptr);
     env->current_tb = NULL;
 
     if ((next_tb & 3) == 2) {
@@ -125,7 +117,8 @@
     tb_free(tb);
 }
 
-static TranslationBlock *tb_find_slow(target_ulong pc,
+static TranslationBlock *tb_find_slow(CPUArchState *env,
+                                      target_ulong pc,
                                       target_ulong cs_base,
                                       uint64_t flags)
 {
@@ -136,11 +129,11 @@
     tb_invalidated_flag = 0;
 
     /* find translated block using physical mappings */
-    phys_pc = get_phys_addr_code(env, pc);
+    phys_pc = get_page_addr_code(env, pc);
     phys_page1 = phys_pc & TARGET_PAGE_MASK;
     phys_page2 = -1;
     h = tb_phys_hash_func(phys_pc);
-    ptb1 = &tb_phys_hash[h];
+    ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
     for(;;) {
         tb = *ptb1;
         if (!tb)
@@ -153,7 +146,7 @@
             if (tb->page_addr[1] != -1) {
                 virt_page2 = (pc & TARGET_PAGE_MASK) +
                     TARGET_PAGE_SIZE;
-                phys_page2 = get_phys_addr_code(env, virt_page2);
+                phys_page2 = get_page_addr_code(env, virt_page2);
                 if (tb->page_addr[1] == phys_page2)
                     goto found;
             } else {
@@ -170,15 +163,15 @@
     /* Move the last found TB to the head of the list */
     if (likely(*ptb1)) {
         *ptb1 = tb->phys_hash_next;
-        tb->phys_hash_next = tb_phys_hash[h];
-        tb_phys_hash[h] = tb;
+        tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
+        tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
     }
     /* we add the TB in the virtual pc hash table */
     env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
     return tb;
 }
 
-static inline TranslationBlock *tb_find_fast(void)
+static inline TranslationBlock *tb_find_fast(CPUArchState *env)
 {
     TranslationBlock *tb;
     target_ulong cs_base, pc;
@@ -191,19 +184,16 @@
     tb = env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
     if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
                  tb->flags != flags)) {
-        tb = tb_find_slow(pc, cs_base, flags);
+        tb = tb_find_slow(env, pc, cs_base, flags);
     }
     return tb;
 }
 
 static CPUDebugExcpHandler *debug_excp_handler;
 
-CPUDebugExcpHandler *cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler)
+void cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler)
 {
-    CPUDebugExcpHandler *old_handler = debug_excp_handler;
-
     debug_excp_handler = handler;
-    return old_handler;
 }
 
 static void cpu_handle_debug_exception(CPUOldState *env)
@@ -232,43 +222,37 @@
  */
 int need_handle_intr_request(CPUOldState *env)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
 #ifdef CONFIG_HAX
-    if (!hax_enabled() || hax_vcpu_emulation_mode(env))
-        return env->interrupt_request;
+    if (!hax_enabled() || hax_vcpu_emulation_mode(cpu))
+        return cpu->interrupt_request;
     return 0;
 #else
-    return env->interrupt_request;
+    return cpu->interrupt_request;
 #endif
 }
 
-int cpu_exec(CPUOldState *env1)
+int cpu_exec(CPUOldState *env)
 {
-    volatile host_reg_t saved_env_reg;
     int ret, interrupt_request;
     TranslationBlock *tb;
     uint8_t *tc_ptr;
-    unsigned long next_tb;
+    tcg_target_ulong next_tb;
+    CPUState *cpu = ENV_GET_CPU(env);
 
-    if (env1->halted) {
-        if (!cpu_has_work(env1)) {
+    if (cpu->halted) {
+        if (!cpu_has_work(cpu)) {
         return EXCP_HALTED;
         }
 
-        env1->halted = 0;
+        cpu->halted = 0;
     }
 
-    cpu_single_env = env1;
-
-    /* the access to env below is actually saving the global register's
-       value, so that files not including target-xyz/exec.h are free to
-       use it.  */
-    QEMU_BUILD_BUG_ON (sizeof (saved_env_reg) != sizeof (env));
-    saved_env_reg = (host_reg_t) env;
-    barrier();
-    env = env1;
+    current_cpu = cpu;
+    //cpu_single_env = env;
 
     if (unlikely(exit_request)) {
-        env->exit_request = 1;
+        cpu->exit_request = 1;
     }
 
 #if defined(TARGET_I386)
@@ -303,11 +287,6 @@
     /* prepare setjmp context for exception handling */
     for(;;) {
         if (setjmp(env->jmp_env) == 0) {
-#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
-#undef env
-                    env = cpu_single_env;
-#define env cpu_single_env
-#endif
             /* if an exception is pending, we execute it here */
             if (env->exception_index >= 0) {
                 if (env->exception_index >= EXCP_INTERRUPT) {
@@ -323,76 +302,37 @@
                        which will be handled outside the cpu execution
                        loop */
 #if defined(TARGET_I386)
-                    do_interrupt_user(env->exception_index,
-                                      env->exception_is_int,
-                                      env->error_code,
-                                      env->exception_next_eip);
-                    /* successfully delivered */
-                    env->old_exception = -1;
+                    do_interrupt(env);
 #endif
                     ret = env->exception_index;
                     break;
 #else
-#if defined(TARGET_I386)
-                    /* simulate a real cpu exception. On i386, it can
-                       trigger new exceptions, but we do not handle
-                       double or triple faults yet. */
-                    do_interrupt(env->exception_index,
-                                 env->exception_is_int,
-                                 env->error_code,
-                                 env->exception_next_eip, 0);
-                    /* successfully delivered */
-                    env->old_exception = -1;
-#elif defined(TARGET_PPC)
                     do_interrupt(env);
-#elif defined(TARGET_LM32)
-                    do_interrupt(env);
-#elif defined(TARGET_MICROBLAZE)
-                    do_interrupt(env);
-#elif defined(TARGET_MIPS)
-                    do_interrupt(env);
-#elif defined(TARGET_SPARC)
-                    do_interrupt(env);
-#elif defined(TARGET_ARM)
-                    do_interrupt(env);
-#elif defined(TARGET_UNICORE32)
-                    do_interrupt(env);
-#elif defined(TARGET_SH4)
-		    do_interrupt(env);
-#elif defined(TARGET_ALPHA)
-                    do_interrupt(env);
-#elif defined(TARGET_CRIS)
-                    do_interrupt(env);
-#elif defined(TARGET_M68K)
-                    do_interrupt(0);
-#elif defined(TARGET_S390X)
-                    do_interrupt(env);
-#endif
                     env->exception_index = -1;
 #endif
                 }
             }
 
 #ifdef CONFIG_HAX
-            if (hax_enabled() && !hax_vcpu_exec(env))
+            if (hax_enabled() && !hax_vcpu_exec(cpu))
                 longjmp(env->jmp_env, 1);
 #endif
 
             if (kvm_enabled()) {
-                kvm_cpu_exec(env);
+                kvm_cpu_exec(cpu);
                 longjmp(env->jmp_env, 1);
             }
 
             next_tb = 0; /* force lookup of first TB */
             for(;;) {
-                interrupt_request = env->interrupt_request;
+                interrupt_request = cpu->interrupt_request;
                 if (unlikely(need_handle_intr_request(env))) {
-                    if (unlikely(env->singlestep_enabled & SSTEP_NOIRQ)) {
+                    if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
                         /* Mask out external interrupts for this step. */
                         interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
                     }
                     if (interrupt_request & CPU_INTERRUPT_DEBUG) {
-                        env->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
+                        cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
                         env->exception_index = EXCP_DEBUG;
                         cpu_loop_exit(env);
                     }
@@ -400,15 +340,15 @@
     defined(TARGET_PPC) || defined(TARGET_ALPHA) || defined(TARGET_CRIS) || \
     defined(TARGET_MICROBLAZE) || defined(TARGET_LM32) || defined(TARGET_UNICORE32)
                     if (interrupt_request & CPU_INTERRUPT_HALT) {
-                        env->interrupt_request &= ~CPU_INTERRUPT_HALT;
-                        env->halted = 1;
+                        cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
+                        cpu->halted = 1;
                         env->exception_index = EXCP_HLT;
                         cpu_loop_exit(env);
                     }
 #endif
 #if defined(TARGET_I386)
                     if (interrupt_request & CPU_INTERRUPT_INIT) {
-                            svm_check_intercept(SVM_EXIT_INIT);
+                            svm_check_intercept(env, SVM_EXIT_INIT);
                             do_cpu_init(env);
                             env->exception_index = EXCP_HALTED;
                             cpu_loop_exit(env);
@@ -417,19 +357,19 @@
                     } else if (env->hflags2 & HF2_GIF_MASK) {
                         if ((interrupt_request & CPU_INTERRUPT_SMI) &&
                             !(env->hflags & HF_SMM_MASK)) {
-                            svm_check_intercept(SVM_EXIT_SMI);
-                            env->interrupt_request &= ~CPU_INTERRUPT_SMI;
-                            do_smm_enter();
+                            svm_check_intercept(env, SVM_EXIT_SMI);
+                            cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
+                            do_smm_enter(env);
                             next_tb = 0;
                         } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
                                    !(env->hflags2 & HF2_NMI_MASK)) {
-                            env->interrupt_request &= ~CPU_INTERRUPT_NMI;
+                            cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
                             env->hflags2 |= HF2_NMI_MASK;
-                            do_interrupt(EXCP02_NMI, 0, 0, 0, 1);
+                            do_interrupt_x86_hardirq(env, EXCP02_NMI, 1);
                             next_tb = 0;
 			} else if (interrupt_request & CPU_INTERRUPT_MCE) {
-                            env->interrupt_request &= ~CPU_INTERRUPT_MCE;
-                            do_interrupt(EXCP12_MCHK, 0, 0, 0, 0);
+                            cpu->interrupt_request &= ~CPU_INTERRUPT_MCE;
+                            do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0);
                             next_tb = 0;
                         } else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
                                    (((env->hflags2 & HF2_VINTR_MASK) &&
@@ -438,16 +378,11 @@
                                      (env->eflags & IF_MASK &&
                                       !(env->hflags & HF_INHIBIT_IRQ_MASK))))) {
                             int intno;
-                            svm_check_intercept(SVM_EXIT_INTR);
-                            env->interrupt_request &= ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_VIRQ);
+                            svm_check_intercept(env, SVM_EXIT_INTR);
+                            cpu->interrupt_request &= ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_VIRQ);
                             intno = cpu_get_pic_interrupt(env);
                             qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing hardware INT=0x%02x\n", intno);
-#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
-#undef env
-                    env = cpu_single_env;
-#define env cpu_single_env
-#endif
-                            do_interrupt(intno, 0, 0, 0, 1);
+                            do_interrupt_x86_hardirq(env, intno, 1);
                             /* ensure that no TB jump will be modified as
                                the program flow was changed */
                             next_tb = 0;
@@ -457,11 +392,11 @@
                                    !(env->hflags & HF_INHIBIT_IRQ_MASK)) {
                             int intno;
                             /* FIXME: this should respect TPR */
-                            svm_check_intercept(SVM_EXIT_VINTR);
+                            svm_check_intercept(env, SVM_EXIT_VINTR);
                             intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
                             qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing virtual hardware INT=0x%02x\n", intno);
-                            do_interrupt(intno, 0, 0, 0, 1);
-                            env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
+                            do_interrupt_x86_hardirq(env, intno, 1);
+                            cpu->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
                             next_tb = 0;
 #endif
                         }
@@ -582,7 +517,7 @@
                            provide/save the vector when the interrupt is
                            first signalled.  */
                         env->exception_index = env->pending_vector;
-                        do_interrupt(1);
+                        do_interrupt_m68k_hardirq(env, 1);
                         next_tb = 0;
                     }
 #elif defined(TARGET_S390X) && !defined(CONFIG_USER_ONLY)
@@ -594,15 +529,15 @@
 #endif
                    /* Don't use the cached interrupt_request value,
                       do_interrupt may have updated the EXITTB flag. */
-                    if (env->interrupt_request & CPU_INTERRUPT_EXITTB) {
-                        env->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
+                    if (cpu->interrupt_request & CPU_INTERRUPT_EXITTB) {
+                        cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
                         /* ensure that no TB jump will be modified as
                            the program flow was changed */
                         next_tb = 0;
                     }
                 }
-                if (unlikely(env->exit_request)) {
-                    env->exit_request = 0;
+                if (unlikely(cpu->exit_request)) {
+                    cpu->exit_request = 0;
                     env->exception_index = EXCP_INTERRUPT;
                     cpu_loop_exit(env);
                 }
@@ -610,22 +545,23 @@
                 if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
                     /* restore flags in standard format */
 #if defined(TARGET_I386)
-                    env->eflags = env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
-                    log_cpu_state(env, X86_DUMP_CCOP);
+                    env->eflags = env->eflags | cpu_cc_compute_all(env, CC_OP)
+                        | (DF & DF_MASK);
+                    log_cpu_state(cpu, X86_DUMP_CCOP);
                     env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
 #elif defined(TARGET_M68K)
                     cpu_m68k_flush_flags(env, env->cc_op);
                     env->cc_op = CC_OP_FLAGS;
                     env->sr = (env->sr & 0xffe0)
                               | env->cc_dest | (env->cc_x << 4);
-                    log_cpu_state(env, 0);
+                    log_cpu_state(cpu, 0);
 #else
-                    log_cpu_state(env, 0);
+                    log_cpu_state(cpu, 0);
 #endif
                 }
 #endif /* DEBUG_DISAS || CONFIG_DEBUG_EXEC */
-                spin_lock(&tb_lock);
-                tb = tb_find_fast();
+                spin_lock(&tcg_ctx.tb_ctx.tb_lock);
+                tb = tb_find_fast(env);
                 /* Note: we do it here to avoid a gcc bug on Mac OS X when
                    doing it in tb_find_slow */
                 if (tb_invalidated_flag) {
@@ -646,7 +582,7 @@
                 if (next_tb != 0 && tb->page_addr[1] == -1) {
                     tb_add_jump((TranslationBlock *)(next_tb & ~3), next_tb & 3, tb);
                 }
-                spin_unlock(&tb_lock);
+                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
 
                 /* cpu_interrupt might be called while translating the
                    TB, but before it is linked into a potentially
@@ -654,15 +590,10 @@
                    starting execution if there is a pending interrupt. */
                 env->current_tb = tb;
                 barrier();
-                if (likely(!env->exit_request)) {
+                if (likely(!cpu->exit_request)) {
                     tc_ptr = tb->tc_ptr;
                 /* execute the generated code */
-#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
-#undef env
-                    env = cpu_single_env;
-#define env cpu_single_env
-#endif
-                    next_tb = tcg_qemu_tb_exec(tc_ptr);
+                    next_tb = tcg_qemu_tb_exec(env, tc_ptr);
                     if ((next_tb & 3) == 2) {
                         /* Instruction counter expired.  */
                         int insns_left;
@@ -683,7 +614,7 @@
                         } else {
                             if (insns_left > 0) {
                                 /* Execute remaining instructions.  */
-                                cpu_exec_nocache(insns_left, tb);
+                                cpu_exec_nocache(env, insns_left, tb);
                             }
                             env->exception_index = EXCP_INTERRUPT;
                             next_tb = 0;
@@ -693,19 +624,24 @@
                 }
                 env->current_tb = NULL;
 #ifdef CONFIG_HAX
-                if (hax_enabled() && hax_stop_emulation(env))
+                if (hax_enabled() && hax_stop_emulation(cpu))
                     cpu_loop_exit(env);
 #endif
                 /* reset soft MMU for next block (it can currently
                    only be set by a memory fault) */
             } /* for(;;) */
+        } else {
+            /* Reload env after longjmp - the compiler may have smashed all
+             * local variables as longjmp is marked 'noreturn'. */
+            env = cpu_single_env;
         }
     } /* for(;;) */
 
 
 #if defined(TARGET_I386)
     /* restore flags in standard format */
-    env->eflags = env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
+    env->eflags = env->eflags | cpu_cc_compute_all(env, CC_OP)
+        | (DF & DF_MASK);
 #elif defined(TARGET_ARM)
     /* XXX: Save/restore host fpu exception state?.  */
 #elif defined(TARGET_UNICORE32)
@@ -728,28 +664,11 @@
 #error unsupported target CPU
 #endif
 
-    /* restore global registers */
-    barrier();
-    env = (void *) saved_env_reg;
-
     /* fail safe : never use cpu_single_env outside cpu_exec() */
-    cpu_single_env = NULL;
+    current_cpu = NULL;
     return ret;
 }
 
-/* must only be called from the generated code as an exception can be
-   generated */
-void tb_invalidate_page_range(target_ulong start, target_ulong end)
-{
-    /* XXX: cannot enable it yet because it yields to MMU exception
-       where NIP != read address on PowerPC */
-#if 0
-    target_ulong phys_addr;
-    phys_addr = get_phys_addr_code(env, start);
-    tb_invalidate_phys_page_range(phys_addr, phys_addr + end - start, 0);
-#endif
-}
-
 #if defined(TARGET_I386) && defined(CONFIG_USER_ONLY)
 
 void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector)
@@ -835,7 +754,7 @@
     if (tb) {
         /* the PC is inside the translated code. It means that we have
            a virtual CPU fault */
-        cpu_restore_state(tb, env, pc);
+        cpu_restore_state(env, pc);
     }
 
     /* we restore the process signal mask as the sigreturn should
diff --git a/cpus.c b/cpus.c
index ab34ad0..f97720a 100644
--- a/cpus.c
+++ b/cpus.c
@@ -24,9 +24,9 @@
 #include "config-host.h"
 
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "monitor/monitor.h"
 #include "sysemu/sysemu.h"
+#include "cpu.h"
 #include "exec/exec-all.h"
 #include "exec/gdbstub.h"
 #include "sysemu/dma.h"
@@ -36,25 +36,25 @@
 
 #include "sysemu/cpus.h"
 
-static CPUOldState *cur_cpu;
-static CPUOldState *next_cpu;
+static CPUState *cur_cpu;
+static CPUState *next_cpu;
 
 /***********************************************************/
 void hw_error(const char *fmt, ...)
 {
     va_list ap;
-    CPUOldState *env;
+    CPUState *cpu;
 
     va_start(ap, fmt);
     fprintf(stderr, "qemu: hardware error: ");
     vfprintf(stderr, fmt, ap);
     fprintf(stderr, "\n");
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
-        fprintf(stderr, "CPU #%d:\n", env->cpu_index);
+    CPU_FOREACH(cpu) {
+        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
 #ifdef TARGET_I386
-        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
+        cpu_dump_state(cpu->env_ptr, stderr, fprintf, X86_DUMP_FPU);
 #else
-        cpu_dump_state(env, stderr, fprintf, 0);
+        cpu_dump_state(cpu->env_ptr, stderr, fprintf, 0);
 #endif
     }
     va_end(ap);
@@ -71,54 +71,48 @@
     }
 }
 
-static int cpu_can_run(CPUOldState *env)
+static int cpu_can_run(CPUArchState *env)
 {
-    if (env->stop)
+    CPUState *cpu = ENV_GET_CPU(env);
+    if (cpu->stop)
         return 0;
-    if (env->stopped)
+    if (cpu->stopped)
         return 0;
     return 1;
 }
 
-static int cpu_has_work(CPUOldState *env)
-{
-    if (env->stop)
-        return 1;
-    if (env->stopped)
-        return 0;
-    if (!env->halted)
-        return 1;
-    if (qemu_cpu_has_work(env))
-        return 1;
-    return 0;
-}
-
 int tcg_has_work(void)
 {
-    CPUOldState *env;
+    CPUState *cpu;
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu)
-        if (cpu_has_work(env))
+    CPU_FOREACH(cpu) {
+        if (cpu->stop)
             return 1;
+        if (cpu->stopped)
+            return 0;
+        if (!cpu->halted)
+            return 1;
+        if (cpu_has_work(cpu))
+            return 1;
+        return 0;
+    }
     return 0;
 }
 
-void qemu_init_vcpu(void *_env)
+void qemu_init_vcpu(CPUState *cpu)
 {
-    CPUOldState *env = _env;
-
     if (kvm_enabled())
-        kvm_init_vcpu(env);
+        kvm_init_vcpu(cpu);
 #ifdef CONFIG_HAX
     if (hax_enabled())
-        hax_init_vcpu(env);
+        hax_init_vcpu(cpu);
 #endif
     return;
 }
 
-int qemu_cpu_self(void *env)
+bool qemu_cpu_is_self(CPUState *cpu)
 {
-    return 1;
+    return true;
 }
 
 void resume_all_vcpus(void)
@@ -129,7 +123,7 @@
 {
 }
 
-void qemu_cpu_kick(void *env)
+void qemu_cpu_kick(CPUState *cpu)
 {
     return;
 }
@@ -141,10 +135,10 @@
 
 void qemu_notify_event(void)
 {
-    CPUOldState *env = cpu_single_env;
+    CPUState *cpu = current_cpu;
 
-    if (env) {
-        cpu_exit(env);
+    if (cpu) {
+        cpu_exit(cpu);
     /*
      * This is mainly for the Windows host, where the timer may be in
      * a different thread with vcpu. Thus the timer function needs to
@@ -156,7 +150,7 @@
      */
 #ifdef CONFIG_HAX
         if (hax_enabled())
-            hax_raise_event(env);
+            hax_raise_event(cpu);
      } else {
 #ifdef _WIN32
          if(hax_enabled())
@@ -227,9 +221,10 @@
     int ret = 0;
 
     if (next_cpu == NULL)
-        next_cpu = first_cpu;
-    for (; next_cpu != NULL; next_cpu = next_cpu->next_cpu) {
-        CPUOldState *env = cur_cpu = next_cpu;
+        next_cpu = QTAILQ_FIRST(&cpus);
+    for (; next_cpu != NULL; next_cpu = QTAILQ_NEXT(next_cpu, node)) {\
+        cur_cpu = next_cpu;
+        CPUOldState *env = cur_cpu->env_ptr;
 
         if (!vm_running)
             break;
@@ -239,7 +234,7 @@
         if (cpu_can_run(env))
             ret = qemu_cpu_exec(env);
         if (ret == EXCP_DEBUG) {
-            gdb_set_stop_cpu(env);
+            gdb_set_stop_cpu(cur_cpu);
             debug_requested = 1;
             break;
         }
@@ -290,7 +285,13 @@
 TimersState timers_state;
 
 void qemu_timer_register_savevm(void) {
-    register_savevm("timer", 0, 2, timer_save, timer_load, &timers_state);
+    register_savevm(NULL,
+                    "timer",
+                    0,
+                    2,
+                    timer_save,
+                    timer_load,
+                    &timers_state);
 }
 
 /* Return the virtual CPU time, based on the instruction counter.  */
diff --git a/cputlb.c b/cputlb.c
index f175fa3..b59f158 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -22,8 +22,8 @@
 #include "exec/exec-all.h"
 #include "exec/cputlb.h"
 
-#ifdef CONFIG_MEMCHECK
-#include "memcheck/memcheck_api.h"
+#ifdef CONFIG_ANDROID_MEMCHECK
+#include "android/qemu/memcheck/memcheck_api.h"
 #endif
 
 /* statistics */
@@ -36,8 +36,18 @@
     .addend     = -1,
 };
 
-/* NOTE: if flush_global is true, also flush global entries (not
-   implemented yet) */
+/* NOTE:
+ * If flush_global is true (the usual case), flush all tlb entries.
+ * If flush_global is false, flush (at least) all tlb entries not
+ * marked global.
+ *
+ * Since QEMU doesn't currently implement a global/not-global flag
+ * for tlb entries, at the moment tlb_flush() will also flush all
+ * tlb entries in the flush_global == false case. This is OK because
+ * CPU architectures generally permit an implementation to drop
+ * entries from the TLB at any time, so flushing more entries than
+ * required is only an efficiency issue, not a correctness issue.
+ */
 void tlb_flush(CPUArchState *env, int flush_global)
 {
     int i;
@@ -58,6 +68,9 @@
     }
 
     memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
+
+    env->tlb_flush_addr = -1;
+    env->tlb_flush_mask = 0;
     tlb_flush_count++;
 }
 
@@ -81,6 +94,16 @@
 #if defined(DEBUG_TLB)
     printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
 #endif
+    /* Check if we need to flush due to large pages.  */
+    if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
+#if defined(DEBUG_TLB)
+        printf("tlb_flush_page: forced full flush ("
+               TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
+               env->tlb_flush_addr, env->tlb_flush_mask);
+#endif
+        tlb_flush(env, 1);
+        return;
+    }
     /* must reset current TB so that interrupts cannot modify the
        links while we are modifying them */
     env->current_tb = NULL;
@@ -111,22 +134,30 @@
     cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
 }
 
-void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
-                           uintptr_t start, uintptr_t length)
+static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
+{
+    return (tlbe->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM;
+}
+
+void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
+                           uintptr_t length)
 {
     uintptr_t addr;
-    if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
+
+    if (tlb_is_dirty_ram(tlb_entry)) {
         addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
         if ((addr - start) < length) {
-            tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
+            tlb_entry->addr_write &= TARGET_PAGE_MASK;
+            tlb_entry->addr_write |= TLB_NOTDIRTY;
         }
     }
 }
 
 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
 {
-    if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
+    if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
         tlb_entry->addr_write = vaddr;
+    }
 }
 
 /* update the TLB corresponding to virtual page vaddr
@@ -138,17 +169,40 @@
 
     vaddr &= TARGET_PAGE_MASK;
     i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
+    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
         tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
+    }
 }
 
-/* add a new TLB entry. At most one entry for a given virtual address
-   is permitted. Return 0 if OK or 2 if the page could not be mapped
-   (can only happen in non SOFTMMU mode for I/O pages or pages
-   conflicting with the host address space). */
-int tlb_set_page_exec(CPUArchState *env, target_ulong vaddr,
-                      hwaddr paddr, int prot,
-                      int mmu_idx, int is_softmmu)
+/* Our TLB does not support large pages, so remember the area covered by
+   large pages and trigger a full TLB flush if these are invalidated.  */
+static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
+                               target_ulong size)
+{
+    target_ulong mask = ~(size - 1);
+
+    if (env->tlb_flush_addr == (target_ulong)-1) {
+        env->tlb_flush_addr = vaddr & mask;
+        env->tlb_flush_mask = mask;
+        return;
+    }
+    /* Extend the existing region to include the new page.
+       This is a compromise between unnecessary flushes and the cost
+       of maintaining a full variable size TLB.  */
+    mask &= env->tlb_flush_mask;
+    while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
+        mask <<= 1;
+    }
+    env->tlb_flush_addr &= mask;
+    env->tlb_flush_mask = mask;
+}
+
+/* Add a new TLB entry. At most one entry for a given virtual address
+   is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
+   supplied size is only used by tlb_flush_page.  */
+void tlb_set_page(CPUArchState *env, target_ulong vaddr,
+                  hwaddr paddr, int prot,
+                  int mmu_idx, target_ulong size)
 {
     PhysPageDesc *p;
     unsigned long pd;
@@ -156,11 +210,14 @@
     target_ulong address;
     target_ulong code_address;
     ptrdiff_t addend;
-    int ret;
     CPUTLBEntry *te;
     CPUWatchpoint *wp;
     hwaddr iotlb;
 
+    assert(size >= TARGET_PAGE_SIZE);
+    if (size != TARGET_PAGE_SIZE) {
+        tlb_add_large_page(env, vaddr, size);
+    }
     p = phys_page_find(paddr >> TARGET_PAGE_BITS);
     if (!p) {
         pd = IO_MEM_UNASSIGNED;
@@ -168,11 +225,11 @@
         pd = p->phys_offset;
     }
 #if defined(DEBUG_TLB)
-    printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x%08x prot=%x idx=%d smmu=%d pd=0x%08lx\n",
-           vaddr, (int)paddr, prot, mmu_idx, is_softmmu, pd);
+    printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
+           " prot=%x idx=%d pd=0x%08lx\n",
+           vaddr, paddr, prot, mmu_idx, pd);
 #endif
 
-    ret = 0;
     address = vaddr;
     if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
         /* IO memory case (romd handled later) */
@@ -243,7 +300,7 @@
         te->addr_write = -1;
     }
 
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
     /*
      * If we have memchecker running, we need to make sure that page, cached
      * into TLB as the result of this operation will comply with our requirement
@@ -260,7 +317,8 @@
      *  - Cached page belongs to RAM, not I/O area.
      *  - Page is cached for read, or write access.
      */
-    if (memcheck_instrument_mmu && mmu_idx == 1 && !is_softmmu &&
+#if 0
+    if (memcheck_instrument_mmu && mmu_idx == 1 && 
         (pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
         (prot & (PAGE_READ | PAGE_WRITE)) &&
         memcheck_is_checked(vaddr & TARGET_PAGE_MASK, TARGET_PAGE_SIZE)) {
@@ -271,23 +329,37 @@
             te->addr_write ^= TARGET_PAGE_MASK;
         }
     }
-#endif  // CONFIG_MEMCHECK
-
-    return ret;
+#endif
+#endif  // CONFIG_ANDROID_MEMCHECK
 }
 
-int tlb_set_page(CPUArchState *env1, target_ulong vaddr,
-                 hwaddr paddr, int prot,
-                 int mmu_idx, int is_softmmu)
+/* NOTE: this function can trigger an exception */
+/* NOTE2: the returned address is not exactly the physical address: it
+   is the offset relative to phys_ram_base */
+tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
 {
-    if (prot & PAGE_READ)
-        prot |= PAGE_EXEC;
-    return tlb_set_page_exec(env1, vaddr, paddr, prot, mmu_idx, is_softmmu);
+    int mmu_idx, page_index, pd;
+    void *p;
+
+    page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    mmu_idx = cpu_mmu_index(env1);
+    if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
+                 (addr & TARGET_PAGE_MASK))) {
+        cpu_ldub_code(env1, addr);
+    }
+    pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
+    if (pd > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
+#if defined(TARGET_SPARC) || defined(TARGET_MIPS)
+        cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
+#else
+        cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
+#endif
+    }
+    p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
+    return qemu_ram_addr_from_host_nofail(p);
 }
 
 #define MMUSUFFIX _cmmu
-#define GETPC() NULL
-#define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS
 
 #define SHIFT 0
@@ -301,5 +373,3 @@
 
 #define SHIFT 3
 #include "exec/softmmu_template.h"
-
-#undef env
diff --git a/disas.c b/disas.c
index eb87c39..7ce215c 100644
--- a/disas.c
+++ b/disas.c
@@ -39,7 +39,7 @@
 {
     CPUDebug *s = container_of(info, CPUDebug, info);
 
-    cpu_memory_rw_debug(s->env, memaddr, myaddr, length, 0);
+    cpu_memory_rw_debug(ENV_GET_CPU(s->env), memaddr, myaddr, length, 0);
     return 0;
 }
 
@@ -419,7 +419,7 @@
     if (monitor_disas_is_physical) {
         cpu_physical_memory_read(memaddr, myaddr, length);
     } else {
-        cpu_memory_rw_debug(s->env, memaddr,myaddr, length, 0);
+        cpu_memory_rw_debug(ENV_GET_CPU(s->env), memaddr,myaddr, length, 0);
     }
     return 0;
 }
diff --git a/distrib/build-kernel.sh b/distrib/build-kernel.sh
index 9b3bc9d..3d50e9a 100755
--- a/distrib/build-kernel.sh
+++ b/distrib/build-kernel.sh
@@ -158,6 +158,10 @@
             echo "WARNING: android-goldfish-$KERNEL_VERSION doesn't build --arch=$ARCH with GCC 4.7"
         fi
     fi
+    if [ "$ARCH" = "arm64" ]; then
+        # There is no GCC 4.7 toolchain to build AARCH64 binaries.
+        GCC_VERSION=4.8
+    fi
     echo "Autoconfig: --gcc-version=$GCC_VERSION"
 fi
 
@@ -186,6 +190,10 @@
         mips)
             CONFIG=goldfish
             ;;
+        arm64)
+            # TODO(digit): Provide better config.
+            CONFIG=defconfig
+            ;;
         *)
             echo "ERROR: Invalid arch '$ARCH', try one of $VALID_ARCHS"
             exit 1
@@ -220,6 +228,9 @@
         mips)
             CROSSPREFIX=mipsel-linux-android-
             ;;
+        arm64)
+            CROSSPREFIX=aarch64-linux-android-
+            ;;
         *)
             echo "ERROR: Unsupported architecture!"
             exit 1
@@ -235,6 +246,9 @@
     x86|x86_64)
         ZIMAGE=bzImage
         ;;
+    arm64)
+        ZIMAGE=Image.gz
+        ;;
     mips)
         ZIMAGE=
         ;;
@@ -260,6 +274,9 @@
             # x86_46 binaries are under prebuilts/gcc/<host>/x86 !!
             PREBUILT_ARCH=x86
             ;;
+        arm64)
+            PREBUILT_ARCH=aarch64
+            ;;
         *)
             PREBUILT_ARCH=$ARCH
             ;;
@@ -300,10 +317,19 @@
   MAKE_FLAGS="$MAKE_FLAGS V=1"
 fi
 
+case $CONFIG in
+    defconfig)
+        MAKE_DEFCONFIG=$CONFIG
+        ;;
+    *)
+        MAKE_DEFCONFIG=${CONFIG}_defconfig
+        ;;
+esac
+
 # Do the build
 #
 rm -f include/asm &&
-make ${CONFIG}_defconfig &&    # configure the kernel
+make $MAKE_DEFCONFIG &&    # configure the kernel
 make -j$JOBS $MAKE_FLAGS       # build it
 
 if [ $? != 0 ] ; then
diff --git a/distrib/elff/README.TXT b/distrib/elff/README.TXT
new file mode 100644
index 0000000..915787a
--- /dev/null
+++ b/distrib/elff/README.TXT
@@ -0,0 +1,3 @@
+This is a small library used to parse the symbol information inside of
+ELF binaries. It is used by the Android emulator "memcheck" feature to
+convert guest addresses into symbolic function names.
diff --git a/elff/dwarf.h b/distrib/elff/elff/dwarf.h
similarity index 100%
rename from elff/dwarf.h
rename to distrib/elff/elff/dwarf.h
diff --git a/elff/dwarf_cu.cc b/distrib/elff/elff/dwarf_cu.cc
similarity index 99%
rename from elff/dwarf_cu.cc
rename to distrib/elff/elff/dwarf_cu.cc
index 8e7da98..a3d4c52 100644
--- a/elff/dwarf_cu.cc
+++ b/distrib/elff/elff/dwarf_cu.cc
@@ -17,9 +17,9 @@
 

 #include "string.h"

 #include "stdio.h"

-#include "elf_file.h"

-#include "dwarf_cu.h"

-#include "dwarf_utils.h"

+#include "elff/elf_file.h"

+#include "elff/dwarf_cu.h"

+#include "elff/dwarf_utils.h"

 

 DwarfCU::DwarfCU(ElfFile* elf)

     : elf_file_(elf),

diff --git a/elff/dwarf_cu.h b/distrib/elff/elff/dwarf_cu.h
similarity index 99%
rename from elff/dwarf_cu.h
rename to distrib/elff/elff/dwarf_cu.h
index a8f0578..425ecd9 100644
--- a/elff/dwarf_cu.h
+++ b/distrib/elff/elff/dwarf_cu.h
@@ -18,8 +18,8 @@
 #ifndef ELFF_DWARF_CU_H_

 #define ELFF_DWARF_CU_H_

 

-#include "dwarf_defs.h"

-#include "dwarf_die.h"

+#include "elff/dwarf_defs.h"

+#include "elff/dwarf_die.h"

 

 /* Address information descriptor. */

 typedef struct Dwarf_AddressInfo {

diff --git a/elff/dwarf_defs.h b/distrib/elff/elff/dwarf_defs.h
similarity index 99%
rename from elff/dwarf_defs.h
rename to distrib/elff/elff/dwarf_defs.h
index 04573e2..7516e7b 100644
--- a/elff/dwarf_defs.h
+++ b/distrib/elff/elff/dwarf_defs.h
@@ -18,8 +18,8 @@
 #ifndef ELFF_DWARF_DEFS_H_

 #define ELFF_DWARF_DEFS_H_

 

-#include "dwarf.h"

-#include "elf_defs.h"

+#include "elff/dwarf.h"

+#include "elff/elf_defs.h"

 

 /* DWARF structures are packed to 1 byte. */

 #define ELFF_PACKED __attribute__ ((packed))

diff --git a/elff/dwarf_die.cc b/distrib/elff/elff/dwarf_die.cc
similarity index 97%
rename from elff/dwarf_die.cc
rename to distrib/elff/elff/dwarf_die.cc
index 9939952..9d59c23 100644
--- a/elff/dwarf_die.cc
+++ b/distrib/elff/elff/dwarf_die.cc
@@ -15,10 +15,10 @@
  */

 

 #include "stdio.h"

-#include "dwarf_die.h"

-#include "dwarf_cu.h"

-#include "dwarf_utils.h"

-#include "elf_file.h"

+#include "elff/dwarf_die.h"

+#include "elff/dwarf_cu.h"

+#include "elff/dwarf_utils.h"

+#include "elff/elf_file.h"

 

 DIEObject::~DIEObject() {

   /* Delete all children of this object. */

@@ -212,7 +212,7 @@
           Elf_Xword low, high;

           while (elf_file()->get_range<Elf_Xword>(off, &low, &high) &&

                  (low != 0 || high != 0)) {

-            printf("                                %08" FMT_I64 "X - %08" FMT_I64 "X\n",

+            printf("                                %08llX - %08llX\n",

                    (unsigned long long)low, (unsigned long long)high);

             off += 16;

           }

diff --git a/elff/dwarf_die.h b/distrib/elff/elff/dwarf_die.h
similarity index 99%
rename from elff/dwarf_die.h
rename to distrib/elff/elff/dwarf_die.h
index ca2eeeb..5f76bcc 100644
--- a/elff/dwarf_die.h
+++ b/distrib/elff/elff/dwarf_die.h
@@ -18,7 +18,7 @@
 #define ELFF_DWARF_DIE_H_

 

 #include "dwarf_defs.h"

-#include "elf_alloc.h"

+#include "elff/elf_alloc.h"

 

 class ElfFile;

 class DwarfCU;

diff --git a/elff/dwarf_utils.cc b/distrib/elff/elff/dwarf_utils.cc
similarity index 99%
rename from elff/dwarf_utils.cc
rename to distrib/elff/elff/dwarf_utils.cc
index 3b998b4..86178af 100644
--- a/elff/dwarf_utils.cc
+++ b/distrib/elff/elff/dwarf_utils.cc
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #define __STDC_FORMAT_MACROS 1
 #include <inttypes.h>
-#include "dwarf_utils.h"
+#include "elff/dwarf_utils.h"
 
 /* "Stringifies" the parameter. */
 #define DWARF_NAMEFY(val) case val: return "" #val ""
diff --git a/elff/dwarf_utils.h b/distrib/elff/elff/dwarf_utils.h
similarity index 98%
rename from elff/dwarf_utils.h
rename to distrib/elff/elff/dwarf_utils.h
index fc007d1..38ee3b0 100644
--- a/elff/dwarf_utils.h
+++ b/distrib/elff/elff/dwarf_utils.h
@@ -17,7 +17,7 @@
 #ifndef ELFF_DWARF_UTILS_

 #define ELFF_DWARF_UTILS_

 

-#include "dwarf_defs.h"

+#include "elff/dwarf_defs.h"

 

 /* Gets DWARF attribute name string (DW_AT_Xxx) for a given attribute ID.

  * Param:

diff --git a/elff/elf_alloc.cc b/distrib/elff/elff/elf_alloc.cc
similarity index 96%
rename from elff/elf_alloc.cc
rename to distrib/elff/elff/elf_alloc.cc
index 10d740b..6eb0f43 100644
--- a/elff/elf_alloc.cc
+++ b/distrib/elff/elff/elf_alloc.cc
@@ -15,8 +15,8 @@
  * allocations for DWARF objects.

  */

 

-#include "elf_alloc.h"

-#include "elf_file.h"

+#include "elff/elf_alloc.h"

+#include "elff/elf_file.h"

 

 ElfAllocator::ElfAllocator()

     : current_chunk_(NULL) {

diff --git a/elff/elf_alloc.h b/distrib/elff/elff/elf_alloc.h
similarity index 99%
rename from elff/elf_alloc.h
rename to distrib/elff/elff/elf_alloc.h
index bf3103b..066e6dc 100644
--- a/elff/elf_alloc.h
+++ b/distrib/elff/elff/elf_alloc.h
@@ -19,7 +19,7 @@
 #define ELFF_ELF_ALLOC_H_

 

 #include <stdint.h>

-#include "elff-common.h"

+#include "elff/elff-common.h"

 

 class ElfFile;

 

diff --git a/elff/elf_defs.h b/distrib/elff/elff/elf_defs.h
similarity index 81%
rename from elff/elf_defs.h
rename to distrib/elff/elff/elf_defs.h
index 8687f0f..acc586a 100644
--- a/elff/elf_defs.h
+++ b/distrib/elff/elff/elf_defs.h
@@ -17,7 +17,7 @@
 #ifndef ELFF_ELF_DEFS_H_

 #define ELFF_ELF_DEFS_H_

 

-#include "elff_elf.h"

+#include "elff/elff_elf.h"

 

 //=============================================================================

 // Macros.

@@ -28,24 +28,18 @@
  *  p - Pointer to increment.

  *  n - Number of bytes to increment the pointer with.

  */

-#define INC_PTR(p, n)   (reinterpret_cast<uint8_t*>(p) + (n))

+static inline uint8_t* INC_PTR(void* p, size_t n) {

+    return reinterpret_cast<uint8_t*>(p) + n;

+}

 

 /* Increments a constant pointer by n bytes.

  * Param:

  *  p - Pointer to increment.

  *  n - Number of bytes to increment the pointer with.

  */

-#define INC_CPTR(p, n)  (reinterpret_cast<const uint8_t*>(p) + (n))

-

-/* Increments a pointer of a given type by n bytes.

- * Param:

- *  T - Pointer type

- *  p - Pointer to increment.

- *  n - Number of bytes to increment the pointer with.

- */

-#define INC_PTR_T(T, p, n)                              \

-    reinterpret_cast<T*>                                \

-        (reinterpret_cast<uint8_t*>(p) + (n))

+static inline const uint8_t* INC_CPTR(const void* p, size_t n) {

+    return reinterpret_cast<const uint8_t*>(p) + n;

+}

 

 /* Increments a constant pointer of a given type by n bytes.

  * Param:

@@ -53,9 +47,19 @@
  *  p - Pointer to increment.

  *  n - Number of bytes to increment the pointer with.

  */

-#define INC_CPTR_T(T, p, n)                                 \

-    reinterpret_cast<const T*>                              \

-        (reinterpret_cast<const uint8_t*>(p) + (n))

+template <typename T>

+static inline const T* INC_CPTR_T_INNER_TEMPLATE(const void* p, size_t n) {

+    union {

+        const void* p;

+        const uint8_t* p8;

+        const T* pt;

+    } u;

+    u.p = p;

+    u.p8 += n;

+    return u.pt;

+}

+

+#define INC_CPTR_T(T, p, n) INC_CPTR_T_INNER_TEMPLATE<T>(p, n)

 

 /* Calculates number of entries in a static array.

  * Param:

@@ -71,7 +75,7 @@
  *  T - Structure (or class) type.

  *  f - Name of a field (member variable) for this structure (or class).

  */

-#define ELFF_FIELD_OFFSET(T, f) ((size_t)(size_t*)&(((T *)0)->f))

+#define ELFF_FIELD_OFFSET(T, f)  offsetof(T, f)

 

 //=============================================================================

 // Inline routines.

@@ -132,12 +136,4 @@
   return get_byte(&tmp, 0) == 0xFF;

 }

 

-/* Use in printf() statements to dump 64-bit values

- */

-#ifdef _WIN32

-#  define FMT_I64  "I64"

-#else

-#  define FMT_I64  "ll"

-#endif

-

 #endif  // ELFF_ELF_DEFS_H_

diff --git a/elff/elf_file.cc b/distrib/elff/elff/elf_file.cc
similarity index 98%
rename from elff/elf_file.cc
rename to distrib/elff/elff/elf_file.cc
index 3f1825e..4fbb261 100644
--- a/elff/elf_file.cc
+++ b/distrib/elff/elff/elf_file.cc
@@ -15,10 +15,10 @@
  */

 

 #include "string.h"

-#include "elf_file.h"

-#include "elf_alloc.h"

-#include "dwarf_cu.h"

-#include "dwarf_utils.h"

+#include "elff/elf_file.h"

+#include "elff/elf_alloc.h"

+#include "elff/dwarf_cu.h"

+#include "elff/dwarf_utils.h"

 

 #include <fcntl.h>

 #ifndef O_BINARY

diff --git a/elff/elf_file.h b/distrib/elff/elff/elf_file.h
similarity index 98%
rename from elff/elf_file.h
rename to distrib/elff/elff/elf_file.h
index c92fdfb..15e7158 100644
--- a/elff/elf_file.h
+++ b/distrib/elff/elff/elf_file.h
@@ -17,10 +17,10 @@
 #ifndef ELFF_ELF_FILE_H_

 #define ELFF_ELF_FILE_H_

 

-#include "dwarf_die.h"

-#include "elf_mapped_section.h"

-#include "elff_api.h"

-#include "android/utils/mapfile.h"

+#include "elff/dwarf_die.h"

+#include "elff/elf_mapped_section.h"

+#include "elff/elff_api.h"

+#include "elff/elff_map_file.h"

 

 /* Encapsulates architecture-independent functionality of an ELF file.

  *

diff --git a/elff/elf_mapped_section.cc b/distrib/elff/elff/elf_mapped_section.cc
similarity index 95%
rename from elff/elf_mapped_section.cc
rename to distrib/elff/elff/elf_mapped_section.cc
index 60cd21f..99229c2 100644
--- a/elff/elf_mapped_section.cc
+++ b/distrib/elff/elff/elf_mapped_section.cc
@@ -15,8 +15,8 @@
  * a section of an ELF file, mapped to memory.

  */

 

-#include "elf_defs.h"

-#include "elf_mapped_section.h"

+#include "elff/elf_defs.h"

+#include "elff/elf_mapped_section.h"

 

 ElfMappedSection::ElfMappedSection()

     : mapped_at_(NULL),

diff --git a/elff/elf_mapped_section.h b/distrib/elff/elff/elf_mapped_section.h
similarity index 97%
rename from elff/elf_mapped_section.h
rename to distrib/elff/elff/elf_mapped_section.h
index 7da608d..805c9e4 100644
--- a/elff/elf_mapped_section.h
+++ b/distrib/elff/elff/elf_mapped_section.h
@@ -18,8 +18,8 @@
 #ifndef ELFF_ELF_MAPPED_SECTION_H_

 #define ELFF_ELF_MAPPED_SECTION_H_

 

-#include "elf_defs.h"

-#include "android/utils/mapfile.h"

+#include "elff/elf_defs.h"

+#include "elff/elff_map_file.h"

 

 /* Encapsulates a section of an ELF file, mapped to memory. */

 class ElfMappedSection {

diff --git a/elff/elff-common.h b/distrib/elff/elff/elff-common.h
similarity index 100%
rename from elff/elff-common.h
rename to distrib/elff/elff/elff-common.h
diff --git a/elff/elff_api.cc b/distrib/elff/elff/elff_api.cc
similarity index 95%
rename from elff/elff_api.cc
rename to distrib/elff/elff/elff_api.cc
index 46b2ad1..7430a79 100644
--- a/elff/elff_api.cc
+++ b/distrib/elff/elff/elff_api.cc
@@ -15,9 +15,9 @@
  * an ELF file containing debugging information in DWARF format.

  */

 

-#include "elff_api.h"

-#include "elf_file.h"

-#include "dwarf_defs.h"

+#include "elff/elff_api.h"

+#include "elff/elf_file.h"

+#include "elff/dwarf_defs.h"

 

 #ifdef __cplusplus

 extern "C" {

diff --git a/elff/elff_api.h b/distrib/elff/elff/elff_api.h
similarity index 100%
rename from elff/elff_api.h
rename to distrib/elff/elff/elff_api.h
diff --git a/elff/elff_elf.h b/distrib/elff/elff/elff_elf.h
similarity index 99%
rename from elff/elff_elf.h
rename to distrib/elff/elff/elff_elf.h
index ea19233..00e73f7 100644
--- a/elff/elff_elf.h
+++ b/distrib/elff/elff/elff_elf.h
@@ -19,7 +19,7 @@
 #define ELFF_ELH_H_
 
 #include <stdint.h>
-#include "elff-common.h"
+#include "elff/elff-common.h"
 
 //=============================================================================
 // ELF file definitions
diff --git a/android/varint.h b/distrib/elff/elff/elff_map_file.h
similarity index 65%
rename from android/varint.h
rename to distrib/elff/elff/elff_map_file.h
index 7822756..a5fd08b 100644
--- a/android/varint.h
+++ b/distrib/elff/elff/elff_map_file.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2008 The Android Open Source Project
+/* Copyright (C) 2007-2010 The Android Open Source Project
 **
 ** This software is licensed under the terms of the GNU General Public
 ** License version 2, as published by the Free Software Foundation, and
@@ -9,7 +9,11 @@
 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ** GNU General Public License for more details.
 */
-#include <inttypes.h>
 
-extern char *varint_encode(uint64_t value, char *buf);
-extern char *varint_encode_signed(int64_t value, char *buf);
+#ifndef ELFF_MAP_FILE_H
+#define ELFF_MAP_FILE_H
+
+// TODO(digit): Provide standalone implementation for the library.
+#include "android/utils/mapfile.h"
+
+#endif  // ELFF_MAP_FILE_H
diff --git a/distrib/package-release.sh b/distrib/package-release.sh
index a766937..2cd8e2c 100755
--- a/distrib/package-release.sh
+++ b/distrib/package-release.sh
@@ -165,8 +165,8 @@
     SHA1=$(cd $GIT_DIR && git log --oneline -1 .) || \
         panic "Not a Git directory: $GIT_DIR"
 
-    SHA1=$(printf "%s" "$SHA1" | sed -e s/\'/\\\'/g)
-    eval $VARNAME=\"$SHA1\"
+    SHA1=$(printf "%s" "$SHA1" | sed -e "s/'/\\'/g")
+    eval $VARNAME=\'$SHA1\'
 }
 
 # Defaults.
@@ -508,6 +508,7 @@
     PKG_FILE=$PKG_DIR/$PKG_PREFIX-$PKG_REVISION-$SYSTEM.tar.bz2
     (run cd "$TEMP_BUILD_DIR"/$SYSTEM && run tar cf $PKG_FILE $PKG_PREFIX-$PKG_REVISION)
 done
+
 if [ "$OPT_COPY_PREBUILTS" ]; then
     for SYSTEM in linux darwin; do
         SRC_DIR="$TEMP_BUILD_DIR"/$SYSTEM/$PKG_PREFIX-$PKG_REVISION
diff --git a/distrib/sdl-1.2.15/include/SDL_opengl.h b/distrib/sdl-1.2.15/include/SDL_opengl.h
index 3d791d6..e1e4169 100644
--- a/distrib/sdl-1.2.15/include/SDL_opengl.h
+++ b/distrib/sdl-1.2.15/include/SDL_opengl.h
@@ -27,7 +27,7 @@
 #include "SDL_config.h"
 
 #ifdef __WIN32__
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
 #ifndef NOMINMAX
 #define NOMINMAX	/* Don't defined min() and max() */
 #endif
diff --git a/distrib/sdl-1.2.15/include/SDL_syswm.h b/distrib/sdl-1.2.15/include/SDL_syswm.h
index 771d6b1..f252767 100644
--- a/distrib/sdl-1.2.15/include/SDL_syswm.h
+++ b/distrib/sdl-1.2.15/include/SDL_syswm.h
@@ -129,7 +129,7 @@
 } SDL_SysWMinfo;
 
 #elif defined(SDL_VIDEO_DRIVER_WINDIB) || defined(SDL_VIDEO_DRIVER_DDRAW) || defined(SDL_VIDEO_DRIVER_GAPI)
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
 #include <windows.h>
 
 /** The windows custom event structure */
diff --git a/distrib/sdl-1.2.15/src/thread/win32/SDL_systhread.c b/distrib/sdl-1.2.15/src/thread/win32/SDL_systhread.c
index 55cb88a..6912bd2 100644
--- a/distrib/sdl-1.2.15/src/thread/win32/SDL_systhread.c
+++ b/distrib/sdl-1.2.15/src/thread/win32/SDL_systhread.c
@@ -23,7 +23,7 @@
 
 /* Win32 thread management routines for SDL */
 
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
 #include <windows.h>
 
 #include "SDL_thread.h"
diff --git a/distrib/sdl-1.2.15/src/thread/win32/SDL_systhread_c.h b/distrib/sdl-1.2.15/src/thread/win32/SDL_systhread_c.h
index 10b0a7d..9bd0089 100644
--- a/distrib/sdl-1.2.15/src/thread/win32/SDL_systhread_c.h
+++ b/distrib/sdl-1.2.15/src/thread/win32/SDL_systhread_c.h
@@ -21,7 +21,7 @@
 */
 #include "SDL_config.h"
 
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
 #include <windows.h>
 
 typedef HANDLE SYS_ThreadHandle;
diff --git a/distrib/sdl-1.2.15/src/video/wincommon/SDL_lowvideo.h b/distrib/sdl-1.2.15/src/video/wincommon/SDL_lowvideo.h
index 89d1a88..eed13db 100644
--- a/distrib/sdl-1.2.15/src/video/wincommon/SDL_lowvideo.h
+++ b/distrib/sdl-1.2.15/src/video/wincommon/SDL_lowvideo.h
@@ -24,7 +24,7 @@
 #ifndef _SDL_lowvideo_h
 #define _SDL_lowvideo_h
 
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
 #include <windows.h>
 
 #ifndef SetClassLongPtr
diff --git a/distrib/sdl-1.2.15/src/video/wincommon/SDL_sysevents.c b/distrib/sdl-1.2.15/src/video/wincommon/SDL_sysevents.c
index 76c67a1..2773bb9 100644
--- a/distrib/sdl-1.2.15/src/video/wincommon/SDL_sysevents.c
+++ b/distrib/sdl-1.2.15/src/video/wincommon/SDL_sysevents.c
@@ -21,7 +21,7 @@
 */
 #include "SDL_config.h"
 
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
 #include <windows.h>
 
 /* Make sure XBUTTON stuff is defined that isn't in older Platform SDKs... */
diff --git a/distrib/sdl-1.2.15/src/video/wincommon/SDL_sysmouse.c b/distrib/sdl-1.2.15/src/video/wincommon/SDL_sysmouse.c
index 12d17e0..5e9bae0 100644
--- a/distrib/sdl-1.2.15/src/video/wincommon/SDL_sysmouse.c
+++ b/distrib/sdl-1.2.15/src/video/wincommon/SDL_sysmouse.c
@@ -21,7 +21,7 @@
 */
 #include "SDL_config.h"
 
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
 #include <windows.h>
 
 #include "SDL_mouse.h"
diff --git a/distrib/sdl-1.2.15/src/video/wincommon/SDL_syswm.c b/distrib/sdl-1.2.15/src/video/wincommon/SDL_syswm.c
index 504d95d..7781814 100644
--- a/distrib/sdl-1.2.15/src/video/wincommon/SDL_syswm.c
+++ b/distrib/sdl-1.2.15/src/video/wincommon/SDL_syswm.c
@@ -21,7 +21,7 @@
 */
 #include "SDL_config.h"
 
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
 #include <windows.h>
 
 #include "SDL_version.h"
diff --git a/distrib/sdl-1.2.15/src/video/windib/SDL_dibevents.c b/distrib/sdl-1.2.15/src/video/windib/SDL_dibevents.c
index 6cee54a..de4ad03 100644
--- a/distrib/sdl-1.2.15/src/video/windib/SDL_dibevents.c
+++ b/distrib/sdl-1.2.15/src/video/windib/SDL_dibevents.c
@@ -21,7 +21,7 @@
 */
 #include "SDL_config.h"
 
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
 #include <windows.h>
 
 #include "SDL_main.h"
diff --git a/distrib/sdl-1.2.15/src/video/windib/SDL_dibvideo.c b/distrib/sdl-1.2.15/src/video/windib/SDL_dibvideo.c
index 9f1ffff..06c6ce2 100644
--- a/distrib/sdl-1.2.15/src/video/windib/SDL_dibvideo.c
+++ b/distrib/sdl-1.2.15/src/video/windib/SDL_dibvideo.c
@@ -21,7 +21,7 @@
 */
 #include "SDL_config.h"
 
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
 #include <windows.h>
 
 /* Not yet in the mingw32 cross-compile headers */
diff --git a/distrib/sdl-1.2.15/src/video/windib/SDL_dibvideo.h b/distrib/sdl-1.2.15/src/video/windib/SDL_dibvideo.h
index 48b1943..535fa60 100644
--- a/distrib/sdl-1.2.15/src/video/windib/SDL_dibvideo.h
+++ b/distrib/sdl-1.2.15/src/video/windib/SDL_dibvideo.h
@@ -24,7 +24,7 @@
 #ifndef _SDL_dibvideo_h
 #define _SDL_dibvideo_h
 
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
 #include <windows.h>
 
 
diff --git a/distrib/sdl-1.2.15/src/video/windib/SDL_gapidibvideo.h b/distrib/sdl-1.2.15/src/video/windib/SDL_gapidibvideo.h
index 64743d1..e5c079b 100644
--- a/distrib/sdl-1.2.15/src/video/windib/SDL_gapidibvideo.h
+++ b/distrib/sdl-1.2.15/src/video/windib/SDL_gapidibvideo.h
@@ -24,7 +24,7 @@
 #ifndef _SDL_gapidibvideo_h
 #define _SDL_gapidibvideo_h
 
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
 #include <windows.h>
 
 /* Hidden "this" pointer for the video functions */
diff --git a/docs/ANDROID-MEMCHECK.TXT b/docs/ANDROID-MEMCHECK.TXT
index 01e8ccf..95e3959 100644
--- a/docs/ANDROID-MEMCHECK.TXT
+++ b/docs/ANDROID-MEMCHECK.TXT
@@ -53,5 +53,5 @@
 - etc.
 
 All the code related to memory checking is guarded in emulator's code by
-CONFIG_MEMCHECK macro, making it easy to spot changes related to it in the
+CONFIG_ANDROID_MEMCHECK macro, making it easy to spot changes related to it in the
 sources.
diff --git a/docs/ANDROID-QEMU-PIPE.TXT b/docs/ANDROID-QEMU-PIPE.TXT
index e0d295c..1f18599 100644
--- a/docs/ANDROID-QEMU-PIPE.TXT
+++ b/docs/ANDROID-QEMU-PIPE.TXT
@@ -12,6 +12,9 @@
 
   1/ Open the /dev/qemu_pipe device for read+write
 
+     NOTE: Starting with Linux 3.10, the device was renamed as
+           /dev/goldfish_pipe but behaves exactly in the same way.
+
   2/ Write a zero-terminated string describing which service you want to
      connect.
 
@@ -71,13 +74,37 @@
 to the device. Variable names beginning with REG_ correspond to 32-bit I/O
 registers:
 
+  0/ Channel and address values:
+
+     Each communication channel is identified by a unique non-zero value
+     which is either 32-bit or 64-bit, depending on the guest CPU
+     architecture.
+
+     The channel value sent from the kernel to the emulator with:
+
+        void write_channel(channel) {
+        #if 64BIT_GUEST_CPU
+          REG_CHANNEL_HIGH = (channel >> 32);
+        #endif
+          REG_CHANNEL = (channel & 0xffffffffU);
+        }
+
+    Similarly, when passing a kernel address to the emulator:
+
+        void write_address(buffer_address) {
+        #if 64BIT_GUEST_CPU
+          REG_ADDRESS_HIGH = (buffer_address >> 32);
+        #endif
+          REG_ADDRESS = (buffer_address & 0xffffffffU);
+        }
+
   1/ Creating a new channel:
 
      Used by the driver to indicate that the guest just opened /dev/qemu_pipe
-     that will be identified by a 32-bit value named '<channel>' here:
+     that will be identified by a named '<channel>':
 
-        REG_CHANNEL = <channel>
-        REG_CMD     = CMD_OPEN
+        write_channel(<channel>)
+        REG_CMD = CMD_OPEN
 
      IMPORTANT: <channel> should never be 0
 
@@ -86,8 +113,8 @@
      Used by the driver to indicate that the guest called 'close' on the
      channel file descriptor.
 
-        REG_CHANNEL = <channel>
-        REG_CMD     = CMD_CLOSE
+        write_channel(<channel>)
+        REG_CMD = CMD_CLOSE
 
   3/ Writing data to the channel:
 
@@ -95,8 +122,8 @@
      channel's file descriptor. This command is used to send a single
      memory buffer:
 
-        REG_CHANNEL = <channel>
-        REG_ADDRESS = <buffer-address>
+        write_channel(<channel>)
+        write_address(<buffer-address>)
         REG_SIZE    = <buffer-size>
         REG_CMD     = CMD_WRITE_BUFFER
 
@@ -127,8 +154,8 @@
     Corresponds to when the guest does a read() or readv() on the
     channel's file descriptor.
 
-        REG_CHANNEL = <channel>
-        REG_ADDRESS = <buffer-address>
+        write_channel(<channel>)
+        write_address(<buffer-address>)
         REG_SIZE    = <buffer-size>
         REG_CMD     = CMD_READ_BUFFER
 
@@ -145,7 +172,7 @@
 
     Before this, the driver will do:
 
-        REG_CHANNEL = <channel>
+        write_channel(<channel>)
         REG_CMD     = CMD_WAKE_ON_WRITE
 
     To indicate to the virtual device that it is waiting and should be woken
@@ -156,7 +183,7 @@
 
     This is the same than CMD_WAKE_ON_WRITE, but for readability instead.
 
-        REG_CHANNEL = <channel>
+        write_channel(<channel>)
         REG_CMD     = CMD_WAKE_ON_READ
 
   7/ Polling for write-able/read-able state:
@@ -164,7 +191,7 @@
     The following command is used by the driver to implement the select(),
     poll() and epoll() system calls where a pipe channel is involved.
 
-        REG_CHANNEL = <channel>
+        write_channel(<channel>)
         REG_CMD     = CMD_POLL
         mask = REG_STATUS
 
@@ -211,7 +238,9 @@
     This uses the following structure known to both the virtual device and
     the kernel, defined in $QEMU/hw/android/goldfish/pipe.h:
 
-        struct access_params{
+    For 32-bit guest CPUs:
+
+        struct access_params {
             uint32_t channel;
             uint32_t size;
             uint32_t address;
@@ -221,6 +250,18 @@
             uint32_t flags;
         };
 
+    And the 64-bit variant:
+
+        struct access_params_64 {
+            uint64_t channel;
+            uint32_t size;
+            uint64_t address;
+            uint32_t cmd;
+            uint32_t result;
+            /* reserved for future extension */
+            uint32_t flags;
+        };
+
     This is simply a way to pack several parameters into a single structure.
     Preliminary, e.g. at boot time, the kernel will allocate one such structure
     and pass its physical address with:
diff --git a/docs/GOLDFISH-VIRTUAL-HARDWARE.TXT b/docs/GOLDFISH-VIRTUAL-HARDWARE.TXT
index a0dc90d..b2e5ae9 100644
--- a/docs/GOLDFISH-VIRTUAL-HARDWARE.TXT
+++ b/docs/GOLDFISH-VIRTUAL-HARDWARE.TXT
@@ -83,6 +83,11 @@
     0x18 IRQ_BASE    R: Read base IRQ of current device.
     0x1c IRQ_COUNT   R: Read IRQ count of current device.
 
+    # For 64-bit guest architectures only:
+    0x20 NAME_ADDR_HIGH  W: Write high 32-bit of kernel address of name
+                            buffer used by GET_NAME. Must be written to
+                            before the GET_NAME write.
+
 The kernel iterates over the list of current devices with something like:
 
    IO_WRITE(BUS_OP, 0);    // Start iteration, any value other than 0 is invalid.
@@ -103,7 +108,10 @@
        dev.irq_count = IO_READ(IRQ_COUNT);
 
        dev.name = kalloc(dev.name_len + 1);  // allocate room for device name.
-       IO_WRITE(GET_NAME, dev.name);         // copy to kernel memory.
+    #if 64BIT_GUEST_CPU
+       IO_WRITE(NAME_ADDR_HIGH, (uint32_t)(dev.name >> 32));
+    #endif
+       IO_WRITE(GET_NAME, (uint32_t)dev.name);  // copy to kernel memory.
        dev.name[dev.name_len] = 0;
 
        .. add device to kernel's list.
@@ -294,6 +302,9 @@
     0x10  DATA_PTR      W: Write kernel buffer address.
     0x14  DATA_LEN      W: Write kernel buffer size.
 
+    # For 64-bit guest CPUs only:
+    0x18  DATA_PTR_HIGH    W: Write high 32 bits of kernel buffer address.
+
 This is the first case of a multi-instance goldfish device in this document.
 Each instance implements a virtual serial port that contains a small internal
 buffer where incoming data is stored until the kernel fetches it.
@@ -314,6 +325,9 @@
     if (len == 0) return;      // Nothing to do.
 
     available = get_buffer(len, &buffer);  // Get address of buffer and its size.
+    #if 64BIT_GUEST_CPU
+    IO_WRITE(DATA_PTR_HIGH, buffer >> 32);
+    #endif
     IO_WRITE(DATA_PTR, buffer);            // Write buffer address to device.
     IO_WRITE(DATA_LEN, available);         // Write buffer length to device.
     IO_WRITE(CMD, CMD_READ_BUFFER);        // Read the data into kernel buffer.
@@ -332,6 +346,9 @@
 
 Or use the mode efficient sequence:
 
+  #if 64BIT_GUEST_CPU
+  IO_WRITE(DATA_PTR_HIGH, buffer >> 32)
+  #endif
   IO_WRITE(DATA_PTR, buffer)
   IO_WRITE(DATA_LEN, buffer_len)
   IO_WRITE(CMD, CMD_WRITE_BUFFER)
@@ -494,6 +511,11 @@
     0x20  START_READ
     0x24  READ_BUFFER_AVAILABLE
 
+    # For 64-bit guest CPUs
+    0x28  SET_WRITE_BUFFER_1_HIGH  W: Set high 32 bits of 1st kernel output buffer address.
+    0x30  SET_WRITE_BUFFER_2_HIGH  W: Set high 32 bits  of 2nd kernel output buffer address.
+    0x34  SET_READ_BUFFER_HIGH     W: Set high 32 bits of kernel input buffer address.
+
 This device implements a virtual sound card with the following properties:
 
   - Stereo output at fixed 44.1 kHz frequency, using signed 16-bit samples.
@@ -503,9 +525,14 @@
     Optional.
 
 For output, the kernel driver allocates two internal buffers to hold output
-samples, and passes their physical address with
-IO_WRITE(SET_WRITE_BUFFER_1, <buffer1>) and
-IO_WRITE(SET_WRITE_BUFFER_2, <buffer2>).
+samples, and passes their physical address to the emulator as follows:
+
+  #if 64BIT_GUEST_CPU
+  IO_WRITE(SET_WRITE_BUFFER_1_HIGH, (uint32_t)(buffer1 >> 32));
+  IO_WRITE(SET_WRITE_BUFFER_2_HIGH, (uint32_t)(buffer2 >> 32));
+  #endif
+  IO_WRITE(SET_WRITE_BUFFER_1, (uint32_t)buffer1);
+  IO_WRITE(SET_WRITE_BUFFER_2, (uint32_t)buffer2);
 
 After this, samples will be sent from the driver to the virtual device by
 using one of IO_WRITE(WRITE_BUFFER_1, <length1>) or
@@ -532,9 +559,10 @@
 For input, the driver should first IO_READ(READ_SUPPORTED), which will return 1
 if the virtual device supports input, or 0 otherwise. If it does support it,
 the driver must allocate an internal buffer and send its physical address with
-IO_WRITE(SET_READ_BUFFER, <read-buffer>), then perform
-IO_WRITE(START_READ, <read-buffer-length>) to start recording and specify
-the kernel's buffer length.
+IO_WRITE(SET_READ_BUFFER, <read-buffer>) (with a previous write to
+SET_READ_BUFFER_HIGH on 64-bit guest CPUS), then perform
+IO_WRITE(START_READ, <read-buffer-length>) to start recording and
+specify the kernel's buffer length.
 
 Later, the device will raise its IRQ and set bit2 of 'int_status' to indicate
 there are incoming samples to the driver. In its interrupt handler, the latter
@@ -827,41 +855,6 @@
 TODO(digit): Complete this.
 
 
-XI. Goldfish memlog:
-=====================
-
-Relevant files:
-  $QEMU/hw/android/goldfish/memlog.c
-
-Device properties:
-  Name: goldfish_memlog
-  Id: -1
-  IrqCount: 0
-  I/O Registers:
-    ???
-
-Obsolete debugging device. Should be removed from both the kernel and the
-emulator sources.
-
-
-XII. Goldfish switch:
-=====================
-
-Relevant files:
-  $QEMU/hw/android/goldfish/switch.c
-  $KERNEL/arch/arm/mach-goldfish/switch.c
-  $KERNEL/arch/mips/goldfish/switch.c
-
-Device properties:
-  Name: goldfish_switch
-  Id: -1
-  IrqCount: 1
-  I/O Registers:
-
-Obsolete debugging device. Should be removed from both the kernel and the
-emulator sources.
-
-
 XIV. QEMU Pipe device:
 ======================
 
diff --git a/docs/QEMU-MEMORY-MANAGEMENT.TXT b/docs/QEMU-MEMORY-MANAGEMENT.TXT
new file mode 100644
index 0000000..2f6e38a
--- /dev/null
+++ b/docs/QEMU-MEMORY-MANAGEMENT.TXT
@@ -0,0 +1,223 @@
+An overview of memory management in QEMU:
+
+I. RAM Management:
+==================
+
+I.1. RAM Address space:
+-----------------------
+
+All pages of virtual RAM used by QEMU at runtime are allocated from
+contiguous blocks in a specific abstract "RAM address space".
+|ram_addr_t| is the type of block addresses in this space.
+
+A single block of contiguous RAM is allocated with 'qemu_ram_alloc()', which
+takes a size in bytes, and allocates the pages through mmap() in the QEMU
+host process. It also sets up the corresponding KVM / Xen / HAX mappings,
+depending on each accelerator's specific needs.
+
+Each block has a name, which is used for snapshot support.
+
+'qemu_ram_alloc_from_ptr()' can also be used to allocated a new RAM
+block, by passing its content explicitly (can be useful for pages of
+ROM).
+
+'qemu_get_ram_ptr()' will translate a 'ram_addr_t' into the corresponding
+address in the QEMU host process. 'qemu_ram_addr_from_host()' does the
+opposite (i.e. translates a host address into a ram_addr_t if possible,
+or return an error).
+
+Note that ram_addr_t addresses are an internal implementation detail of
+QEMU, i.e. the virtual CPU never sees their values directly; it relies
+instead of addresses in its virtual physical address space, described
+in section II. below.
+
+As an example, when emulating an Android/x86 virtual device, the following
+RAM space is being used:
+
+  0x0000_0000 ... 0x1000_0000   "pc.ram"
+  0x1000_0000 ... 0x1002_0000   "bios.bin"
+  0x1002_0000 ... 0x1004_0000   "pc.rom"
+
+
+I.2. RAM Dirty tracking:
+------------------------
+
+QEMU also associates with each RAM page an 8-bit 'dirty' bitmap. The
+main idea is that whenever a page is written to, the value 0xff is
+written to the page's 'dirty' bitmap. Various clients can later inspect
+some of the flags and clear them. I.e.:
+
+  VGA_DIRTY_FLAG (0x1) is typically used by framebuffer drivers to detect
+  which pages of video RAM were touched since the latest VSYNC. The driver
+  typically copies the pixel values to the real QEMU output, then clears
+  the bits. This is very useful to avoid needless copies if nothing
+  changed in the framebuffer.
+
+  MIGRATION_DIRTY_FLAG (0x8) is used to tracked modified RAM pages during
+  live migration (i.e. moving a QEMU virtual machine from one host to
+  another)
+
+  CODE_DIRTY_FLAG (0x2) is a bit more special, and is used to support
+  self-modifying code properly. More on this later.
+
+
+II. The physical address space:
+===============================
+
+Represents the address space that the virtual CPU can read from / write to.
+|hwaddr| is the type of addresses in this space, which is decomposed
+into 'pages'. Each page in the address space is either unassigned, or
+mapped to a specific kind of memory region.
+
+See |phys_page_find()| and |phys_page_find_alloc()| in translate-all.c for
+the implementation details.
+
+
+II.1. Memory region types:
+--------------------------
+
+There are several memory region types:
+
+  - Regions of RAM pages.
+  - Regions of ROM pages (similar to RAM, but cannot be written to).
+  - Regions of I/O pages, used to communicate with virtual hardware.
+
+Virtual devices can register a new I/O region type by calling
+|cpu_register_io_memory()|. This function allows them to provide
+callbacks that will be invoked every time the virtual CPU reads from
+or writes to any page of the corresponding type.
+
+The memory region type of a given page is encoded using PAGE_BITS bits
+in the following format:
+
+        +-------------------------------+
+        |    mem_type_index     | flags |
+        +-------------------------------+
+
+Where |mem_type_index| is a unique value identifying a given memory
+region type, and |flags| is a 3-bit bitmap used to store flags that are
+only relevant for I/O pages.
+
+The following memory region type values are important:
+
+  IO_MEM_RAM (mem_type_index=0, flags=0):
+    Used for regular RAM pages, always all zero on purpose.
+
+  IO_MEM_ROM (mem_type_index=1, flags=0):
+    Used for ROM pages.
+
+  IO_MEM_UNASSIGNED (mem_type_index=2, flags=0):
+    Used to identify unassigned pages of the physical address space.
+
+  IO_MEM_NOTDIRTY (mem_type_index=3, flags=0):
+    Used to implement tracking of dirty RAM pages. This is essentially
+    used for RAM pages that have not been written to yet.
+
+Any mem_type_index value of 4 or higher corresponds to a device-specific
+I/O memory region type (i.e. with custom read/write callbaks, a
+corresponding 'opaque' value), and can also use the following bits
+in |flags|:
+
+  IO_MEM_ROMD (0x1):
+    Used for ROM-like I/O pages, i.e. they are backed by a page from
+    the RAM address space, but writing to them triggers a device-specific
+    write callback (instead of being ignored or faulting the CPU).
+
+  IO_MEM_SUBPAGE (0x02)
+    Used to indicate that not all addresses in this page map to the same
+    I/O region type / callbacks.
+
+  IO_MEM_SUBWIDTH (0x04)
+    Probably obsolete. Set to indicate that the corresponding I/O region
+    type doesn't support reading/writing values of all possible sizes
+    (1, 2 and 4 bytes). This seems to be never used by the current code.
+
+Note that cpu_register_io_memory() returns a new memory region type value.
+
+II.2. Physical address map:
+---------------------------
+
+QEMU maintains for each assigned page in the physical address space
+two values:
+
+  |phys_offset|, a combination of ram address and memory region type.
+
+  |region_offset|, an optional offset into the region backing the
+  page. This is only useful for I/O pages.
+
+The |phys_offset| value has many interesting encoding which require
+further clarification:
+
+  - Generally speaking, a phys_offset value is decomposed into
+    the following bit fields:
+
+      +-----------------------------------------------------+
+      |         high_addr               |     mem_type      |
+      +-----------------------------------------------------+
+
+    where |mem_type| is a PAGE_BITS memory region type as described
+    previously, and |high_addr| may contain the high bits of a
+    ram_addr_t address for RAM-backed pages.
+
+More specifically:
+
+  - Unassigned pages always have the special value IO_MEM_UNASSIGNED
+    (high_addr=0, mem_type=IO_MEM_UNASSIGNED)
+
+  - RAM pages have mem_type=0 (i.e. IO_MEM_RAM) while high_addr are
+    the high bits of the corresponding ram_addr_t. Hence, a simple call to
+    qemu_get_ram_ptr(phys_offset) will return the corresponding
+    address in host QEMU memory.
+
+    This is the reson why IO_MEM_RAM is always 0:
+
+    RAM page phys_offset value:
+      +-----------------------------------------------------+
+      |   high_addr                     |           0       |
+      +-----------------------------------------------------+
+
+
+  - ROM pages are like RAM pages, but have mem_type=IO_MEM_ROM.
+    QEMU ensures that writing to such a page is a no-op, except on
+    some target architectures, like Sparc, this may cause a CPU fault.
+
+    ROM page phys_offset value:
+      +-----------------------------------------------------+
+      |   high_addr                     |     IO_MEM_ROM    |
+      +-----------------------------------------------------+
+
+  - Dirty RAM page tracking is implemented by using special
+    phys_offset values with mem_type=IO_MEM_NOTDIRTY. Note that these
+    values do not appear directly in the physical page map, but in
+    the CPU TLB cache (explained later).
+
+    non-dirty RAM page phys_offset value (CPU TLB cache only):
+      +-----------------------------------------------------+
+      |   high_addr                     |  IO_MEM_NOTDIRTY  |
+      +-----------------------------------------------------+
+
+   - Other pages are I/O pages, and their high_addr value will
+     be 0 / ignored:
+
+    I/O page phys_offset value:
+      +----------------------------------------------------------+
+      |  0                              | mem_type_index | flags |
+      +----------------------------------------------------------+
+
+    Note that when reading from or writing to I/O pages, the lowest
+    PAGE_BITS bits of the corresponding hwaddr value will be added
+    to the page's |region_offset| value. This new address is passed
+    to the read/write callback as the 'i/o address' for the operation.
+
+   - As a special exception, if the I/O page's IO_MEM_ROMD flag is
+     set, then high_addr is not 0, but the high bits of the corresponding
+     ram_addr_t backing the page's contents on reads. On write operations
+     though, the I/O region type's write callback will be called instead.
+
+     ROMD I/O page phys_offset value:
+      +----------------------------------------------------------+
+      |  high_addr                      | mem_type_index | flags |
+      +----------------------------------------------------------+
+
+     Note that |region_offset| is ignored when reading from such pages,
+     it's only used when writing to the I/O page.
diff --git a/dyngen-exec.h b/dyngen-exec.h
deleted file mode 100644
index db00fba..0000000
--- a/dyngen-exec.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- *  dyngen defines for micro operation code
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#if !defined(__DYNGEN_EXEC_H__)
-#define __DYNGEN_EXEC_H__
-
-#include "qemu-common.h"
-
-#ifdef __OpenBSD__
-#include <sys/types.h>
-#endif
-
-/* XXX: This may be wrong for 64-bit ILP32 hosts.  */
-typedef void * host_reg_t;
-
-#if defined(__i386__)
-#define AREG0 "ebp"
-#elif defined(__x86_64__)
-#define AREG0 "r14"
-#elif defined(_ARCH_PPC)
-#define AREG0 "r27"
-#elif defined(__arm__)
-#define AREG0 "r7"
-#elif defined(__hppa__)
-#define AREG0 "r17"
-#elif defined(__mips__)
-#define AREG0 "s0"
-#elif defined(__sparc__)
-#ifdef CONFIG_SOLARIS
-#define AREG0 "g2"
-#else
-#ifdef __sparc_v9__
-#define AREG0 "g5"
-#else
-#define AREG0 "g6"
-#endif
-#endif
-#elif defined(__s390__)
-#define AREG0 "r10"
-#elif defined(__alpha__)
-/* Note $15 is the frame pointer, so anything in op-i386.c that would
-   require a frame pointer, like alloca, would probably loose.  */
-#define AREG0 "$15"
-#elif defined(__mc68000)
-#define AREG0 "%a5"
-#elif defined(__ia64__)
-#define AREG0 "r7"
-#else
-#error unsupported CPU
-#endif
-
-#define xglue(x, y) x ## y
-#define glue(x, y) xglue(x, y)
-#define stringify(s)	tostring(s)
-#define tostring(s)	#s
-
-/* The return address may point to the start of the next instruction.
-   Subtracting one gets us the call instruction itself.  */
-#if defined(__s390__) && !defined(__s390x__)
-# define GETPC() ((void*)(((unsigned long)__builtin_return_address(0) & 0x7fffffffUL) - 1))
-#elif defined(__arm__)
-/* Thumb return addresses have the low bit set, so we need to subtract two.
-   This is still safe in ARM mode because instructions are 4 bytes.  */
-# define GETPC() ((void *)((unsigned long)__builtin_return_address(0) - 2))
-#else
-# define GETPC() ((void *)((unsigned long)__builtin_return_address(0) - 1))
-#endif
-
-#endif /* !defined(__DYNGEN_EXEC_H__) */
diff --git a/exec.c b/exec.c
index 2f44aa4..6f29389 100644
--- a/exec.c
+++ b/exec.c
@@ -37,7 +37,10 @@
 #include "qemu-common.h"
 #include "tcg.h"
 #include "hw/hw.h"
+#include "hw/qdev.h"
+#include "hw/xen/xen.h"
 #include "qemu/osdep.h"
+#include "qemu/tls.h"
 #include "sysemu/kvm.h"
 #include "exec/cputlb.h"
 #include "exec/hax.h"
@@ -45,70 +48,22 @@
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
 #endif
-#ifdef CONFIG_MEMCHECK
-#include "memcheck/memcheck_api.h"
-#endif  // CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
+#include "android/qemu/memcheck/memcheck_api.h"
+#endif  // CONFIG_ANDROID_MEMCHECK
 
-//#define DEBUG_TB_INVALIDATE
-//#define DEBUG_FLUSH
-//#define DEBUG_TLB
-//#define DEBUG_UNASSIGNED
-
-/* make various TB consistency checks */
-//#define DEBUG_TB_CHECK
-//#define DEBUG_TLB_CHECK
-
-//#define DEBUG_IOPORT
 //#define DEBUG_SUBPAGE
 
 #if !defined(CONFIG_USER_ONLY)
-/* TB consistency checks only implemented for usermode emulation.  */
-#undef DEBUG_TB_CHECK
-#endif
-
-#define SMC_BITMAP_USE_THRESHOLD 10
-
-static TranslationBlock *tbs;
-int code_gen_max_blocks;
-TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
-static int nb_tbs;
-/* any access to the tbs or the page table must use this lock */
-spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
-
-#if defined(__arm__) || defined(__sparc_v9__)
-/* The prologue must be reachable with a direct jump. ARM and Sparc64
- have limited branch ranges (possibly also PPC) so place it in a
- section close to code segment. */
-#define code_gen_section                                \
-    __attribute__((__section__(".gen_code")))           \
-    __attribute__((aligned (32)))
-#elif defined(_WIN32)
-/* Maximum alignment for Win32 is 16. */
-#define code_gen_section                                \
-    __attribute__((aligned (16)))
-#else
-#define code_gen_section                                \
-    __attribute__((aligned (32)))
-#endif
-
-uint8_t code_gen_prologue[1024] code_gen_section;
-static uint8_t *code_gen_buffer;
-static unsigned long code_gen_buffer_size;
-/* threshold to flush the translated code buffer */
-static unsigned long code_gen_buffer_max_size;
-uint8_t *code_gen_ptr;
-
-#if !defined(CONFIG_USER_ONLY)
 int phys_ram_fd;
 static int in_migration;
 
-RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
+RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
 #endif
 
-CPUArchState *first_cpu;
-/* current CPU in the current thread. It is only valid inside
-   cpu_exec() */
-CPUArchState *cpu_single_env;
+struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
+DEFINE_TLS(CPUState *, current_cpu);
+
 /* 0 = Do not count executed instructions.
    1 = Precise instruction counting.
    2 = Adaptive rate instruction counting.  */
@@ -117,46 +72,12 @@
    include some instructions that have not yet been executed.  */
 int64_t qemu_icount;
 
-typedef struct PageDesc {
-    /* list of TBs intersecting this ram page */
-    TranslationBlock *first_tb;
-    /* in order to optimize self modifying code, we count the number
-       of lookups we do to a given page to use a bitmap */
-    unsigned int code_write_count;
-    uint8_t *code_bitmap;
-#if defined(CONFIG_USER_ONLY)
-    unsigned long flags;
-#endif
-} PageDesc;
-
-#define L2_BITS 10
-#if defined(CONFIG_USER_ONLY) && defined(TARGET_VIRT_ADDR_SPACE_BITS)
-/* XXX: this is a temporary hack for alpha target.
- *      In the future, this is to be replaced by a multi-level table
- *      to actually be able to handle the complete 64 bits address space.
- */
-#define L1_BITS (TARGET_VIRT_ADDR_SPACE_BITS - L2_BITS - TARGET_PAGE_BITS)
-#else
-#define L1_BITS (32 - L2_BITS - TARGET_PAGE_BITS)
-#endif
-
-#define L1_SIZE (1 << L1_BITS)
-#define L2_SIZE (1 << L2_BITS)
-
-uintptr_t qemu_real_host_page_size;
-uintptr_t qemu_host_page_size;
-uintptr_t qemu_host_page_mask;
-
-/* XXX: for system emulation, it could just be an array */
-static PageDesc *l1_map[L1_SIZE];
-static PhysPageDesc **l1_phys_map;
-
 #if !defined(CONFIG_USER_ONLY)
 static void io_mem_init(void);
 
 /* io memory support */
-CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
-CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
+CPUWriteMemoryFunc *_io_mem_write[IO_MEM_NB_ENTRIES][4];
+CPUReadMemoryFunc *_io_mem_read[IO_MEM_NB_ENTRIES][4];
 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
 static char io_mem_used[IO_MEM_NB_ENTRIES];
 int io_mem_watch;
@@ -172,10 +93,6 @@
 int loglevel;
 static int log_append = 0;
 
-/* statistics */
-static int tb_flush_count;
-static int tb_phys_invalidate_count;
-
 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 typedef struct subpage_t {
     hwaddr base;
@@ -185,318 +102,20 @@
     ram_addr_t region_offset[TARGET_PAGE_SIZE][2][4];
 } subpage_t;
 
-#ifdef _WIN32
-static void map_exec(void *addr, long size)
-{
-    DWORD old_protect;
-    VirtualProtect(addr, size,
-                   PAGE_EXECUTE_READWRITE, &old_protect);
-
-}
-#else
-static void map_exec(void *addr, long size)
-{
-    unsigned long start, end, page_size;
-
-    page_size = getpagesize();
-    start = (unsigned long)addr;
-    start &= ~(page_size - 1);
-
-    end = (unsigned long)addr + size;
-    end += page_size - 1;
-    end &= ~(page_size - 1);
-
-    mprotect((void *)start, end - start,
-             PROT_READ | PROT_WRITE | PROT_EXEC);
-}
-#endif
-
-static void page_init(void)
-{
-    /* NOTE: we can always suppose that qemu_host_page_size >=
-       TARGET_PAGE_SIZE */
-#ifdef _WIN32
-    {
-        SYSTEM_INFO system_info;
-
-        GetSystemInfo(&system_info);
-        qemu_real_host_page_size = system_info.dwPageSize;
-    }
-#else
-    qemu_real_host_page_size = getpagesize();
-#endif
-    if (qemu_host_page_size == 0)
-        qemu_host_page_size = qemu_real_host_page_size;
-    if (qemu_host_page_size < TARGET_PAGE_SIZE)
-        qemu_host_page_size = TARGET_PAGE_SIZE;
-    qemu_host_page_mask = ~(qemu_host_page_size - 1);
-    l1_phys_map = qemu_vmalloc(L1_SIZE * sizeof(void *));
-    memset(l1_phys_map, 0, L1_SIZE * sizeof(void *));
-
-#if !defined(_WIN32) && defined(CONFIG_USER_ONLY)
-    {
-        long long startaddr, endaddr;
-        FILE *f;
-        int n;
-
-        mmap_lock();
-        last_brk = (unsigned long)sbrk(0);
-        f = fopen("/proc/self/maps", "r");
-        if (f) {
-            do {
-                n = fscanf (f, "%llx-%llx %*[^\n]\n", &startaddr, &endaddr);
-                if (n == 2) {
-                    startaddr = MIN(startaddr,
-                                    (1ULL << TARGET_PHYS_ADDR_SPACE_BITS) - 1);
-                    endaddr = MIN(endaddr,
-                                    (1ULL << TARGET_PHYS_ADDR_SPACE_BITS) - 1);
-                    page_set_flags(startaddr & TARGET_PAGE_MASK,
-                                   TARGET_PAGE_ALIGN(endaddr),
-                                   PAGE_RESERVED);
-                }
-            } while (!feof(f));
-            fclose(f);
-        }
-        mmap_unlock();
-    }
-#endif
-}
-
-static inline PageDesc **page_l1_map(target_ulong index)
-{
-#if TARGET_LONG_BITS > 32
-    /* Host memory outside guest VM.  For 32-bit targets we have already
-       excluded high addresses.  */
-    if (index > ((target_ulong)L2_SIZE * L1_SIZE))
-        return NULL;
-#endif
-    return &l1_map[index >> L2_BITS];
-}
-
-static inline PageDesc *page_find_alloc(target_ulong index)
-{
-    PageDesc **lp, *p;
-    lp = page_l1_map(index);
-    if (!lp)
-        return NULL;
-
-    p = *lp;
-    if (!p) {
-        /* allocate if not found */
-#if defined(CONFIG_USER_ONLY)
-        size_t len = sizeof(PageDesc) * L2_SIZE;
-        /* Don't use g_malloc because it may recurse.  */
-        p = mmap(NULL, len, PROT_READ | PROT_WRITE,
-                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-        *lp = p;
-        if (h2g_valid(p)) {
-            unsigned long addr = h2g(p);
-            page_set_flags(addr & TARGET_PAGE_MASK,
-                           TARGET_PAGE_ALIGN(addr + len),
-                           PAGE_RESERVED);
-        }
-#else
-        p = g_malloc0(sizeof(PageDesc) * L2_SIZE);
-        *lp = p;
-#endif
-    }
-    return p + (index & (L2_SIZE - 1));
-}
-
-static inline PageDesc *page_find(target_ulong index)
-{
-    PageDesc **lp, *p;
-    lp = page_l1_map(index);
-    if (!lp)
-        return NULL;
-
-    p = *lp;
-    if (!p) {
-        return NULL;
-    }
-    return p + (index & (L2_SIZE - 1));
-}
-
-static PhysPageDesc *phys_page_find_alloc(hwaddr index, int alloc)
-{
-    void **lp, **p;
-    PhysPageDesc *pd;
-
-    p = (void **)l1_phys_map;
-#if TARGET_PHYS_ADDR_SPACE_BITS > 32
-
-#if TARGET_PHYS_ADDR_SPACE_BITS > (32 + L1_BITS)
-#error unsupported TARGET_PHYS_ADDR_SPACE_BITS
-#endif
-    lp = p + ((index >> (L1_BITS + L2_BITS)) & (L1_SIZE - 1));
-    p = *lp;
-    if (!p) {
-        /* allocate if not found */
-        if (!alloc)
-            return NULL;
-        p = qemu_vmalloc(sizeof(void *) * L1_SIZE);
-        memset(p, 0, sizeof(void *) * L1_SIZE);
-        *lp = p;
-    }
-#endif
-    lp = p + ((index >> L2_BITS) & (L1_SIZE - 1));
-    pd = *lp;
-    if (!pd) {
-        int i;
-        /* allocate if not found */
-        if (!alloc)
-            return NULL;
-        pd = qemu_vmalloc(sizeof(PhysPageDesc) * L2_SIZE);
-        *lp = pd;
-        for (i = 0; i < L2_SIZE; i++) {
-          pd[i].phys_offset = IO_MEM_UNASSIGNED;
-          pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
-        }
-    }
-    return ((PhysPageDesc *)pd) + (index & (L2_SIZE - 1));
-}
-
-PhysPageDesc *phys_page_find(hwaddr index)
-{
-    return phys_page_find_alloc(index, 0);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-#define mmap_lock() do { } while(0)
-#define mmap_unlock() do { } while(0)
-#endif
-
-#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
-
-#if defined(CONFIG_USER_ONLY)
-/* Currently it is not recommended to allocate big chunks of data in
-   user mode. It will change when a dedicated libc will be used */
-#define USE_STATIC_CODE_GEN_BUFFER
-#endif
-
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE];
-#endif
-
-static void code_gen_alloc(unsigned long tb_size)
-{
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-    code_gen_buffer = static_code_gen_buffer;
-    code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-    map_exec(code_gen_buffer, code_gen_buffer_size);
-#else
-    code_gen_buffer_size = tb_size;
-    if (code_gen_buffer_size == 0) {
-#if defined(CONFIG_USER_ONLY)
-        /* in user mode, phys_ram_size is not meaningful */
-        code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-#else
-        /* XXX: needs adjustments */
-        code_gen_buffer_size = (unsigned long)(ram_size / 4);
-#endif
-    }
-    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
-        code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
-    /* The code gen buffer location may have constraints depending on
-       the host cpu and OS */
-#if defined(__linux__)
-    {
-        int flags;
-        void *start = NULL;
-
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        flags |= MAP_32BIT;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc_v9__)
-        // Map the buffer below 2G, so we can use direct calls and branches
-        flags |= MAP_FIXED;
-        start = (void *) 0x60000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024))
-            code_gen_buffer_size = (512 * 1024 * 1024);
-#elif defined(__arm__)
-        /* Map the buffer below 32M, so we can use direct calls and branches */
-        flags |= MAP_FIXED;
-        start = (void *) 0x01000000UL;
-        if (code_gen_buffer_size > 16 * 1024 * 1024)
-            code_gen_buffer_size = 16 * 1024 * 1024;
-#elif defined(__s390x__)
-        /* Map the buffer so that we can use direct calls and branches.  */
-        /* We have a +- 4GB range on the branches; leave some slop.  */
-        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
-            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
-        }
-        start = (void *)0x90000000UL;
-#endif
-        code_gen_buffer = mmap(start, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC,
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
-    }
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__)
-    {
-        int flags;
-        void *addr = NULL;
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
-         * 0x40000000 is free */
-        flags |= MAP_FIXED;
-        addr = (void *)0x40000000;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc_v9__)
-        // Map the buffer below 2G, so we can use direct calls and branches
-        flags |= MAP_FIXED;
-        addr = (void *) 0x60000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
-            code_gen_buffer_size = (512 * 1024 * 1024);
-        }
-#endif
-        code_gen_buffer = mmap(addr, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC,
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
-    }
-#else
-    code_gen_buffer = g_malloc(code_gen_buffer_size);
-    map_exec(code_gen_buffer, code_gen_buffer_size);
-#endif
-#endif /* !USE_STATIC_CODE_GEN_BUFFER */
-    map_exec(code_gen_prologue, sizeof(code_gen_prologue));
-    code_gen_buffer_max_size = code_gen_buffer_size -
-        code_gen_max_block_size();
-    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
-    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
-}
-
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
    (in bytes) allocated to the translation buffer. Zero means default
    size. */
 void cpu_exec_init_all(unsigned long tb_size)
 {
-    cpu_gen_init();
-    code_gen_alloc(tb_size);
-    code_gen_ptr = code_gen_buffer;
-    page_init();
+    //cpu_gen_init();
+    //code_gen_alloc(tb_size);
+    //code_gen_ptr = code_gen_buffer;
+    //page_init();
+    tcg_exec_init(tb_size);
 #if !defined(CONFIG_USER_ONLY)
+    qemu_mutex_init(&ram_list.mutex);
     io_mem_init();
 #endif
-#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
-    /* There's no guest base to take into account, so go ahead and
-       initialize the prologue now.  */
-    tcg_prologue_init(&tcg_ctx);
-#endif
 }
 
 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
@@ -506,806 +125,85 @@
 static void cpu_common_save(QEMUFile *f, void *opaque)
 {
     CPUOldState *env = opaque;
+    CPUState *cpu = ENV_GET_CPU(env);
 
-    cpu_synchronize_state(env, 0);
+    cpu_synchronize_state(cpu, 0);
 
-    qemu_put_be32s(f, &env->halted);
-    qemu_put_be32s(f, &env->interrupt_request);
+    qemu_put_be32s(f, &cpu->halted);
+    qemu_put_be32s(f, &cpu->interrupt_request);
 }
 
 static int cpu_common_load(QEMUFile *f, void *opaque, int version_id)
 {
     CPUOldState *env = opaque;
+    CPUState *cpu = ENV_GET_CPU(env);
 
     if (version_id != CPU_COMMON_SAVE_VERSION)
         return -EINVAL;
 
-    qemu_get_be32s(f, &env->halted);
-    qemu_get_be32s(f, &env->interrupt_request);
+    qemu_get_be32s(f, &cpu->halted);
+    qemu_get_be32s(f, &cpu->interrupt_request);
     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
        version_id is increased. */
-    env->interrupt_request &= ~0x01;
+    cpu->interrupt_request &= ~0x01;
     tlb_flush(env, 1);
-    cpu_synchronize_state(env, 1);
+    cpu_synchronize_state(cpu, 1);
 
     return 0;
 }
 #endif
 
-CPUArchState *qemu_get_cpu(int cpu)
+CPUState *qemu_get_cpu(int cpu_index)
 {
-    CPUArchState *env = first_cpu;
+    CPUState *cpu;
 
-    while (env) {
-        if (env->cpu_index == cpu)
-            break;
-        env = env->next_cpu;
+    CPU_FOREACH(cpu) {
+        if (cpu->cpu_index == cpu_index)
+            return cpu;
     }
-
-    return env;
+    return NULL;
 }
 
 void cpu_exec_init(CPUArchState *env)
 {
-    CPUArchState **penv;
-    int cpu_index;
+    CPUState *cpu = ENV_GET_CPU(env);
 
 #if defined(CONFIG_USER_ONLY)
     cpu_list_lock();
 #endif
-    env->next_cpu = NULL;
-    penv = &first_cpu;
-    cpu_index = 0;
-    while (*penv != NULL) {
-        penv = &(*penv)->next_cpu;
+    // Compute CPU index from list position.
+    int cpu_index = 0;
+    CPUState *cpu1;
+    CPU_FOREACH(cpu1) {
         cpu_index++;
     }
-    env->cpu_index = cpu_index;
-    env->numa_node = 0;
+    cpu->cpu_index = cpu_index;
+    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
+
+    cpu->numa_node = 0;
     QTAILQ_INIT(&env->breakpoints);
     QTAILQ_INIT(&env->watchpoints);
-    *penv = env;
 #if defined(CONFIG_USER_ONLY)
     cpu_list_unlock();
 #endif
 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
-    register_savevm("cpu_common", cpu_index, CPU_COMMON_SAVE_VERSION,
-                    cpu_common_save, cpu_common_load, env);
-    register_savevm("cpu", cpu_index, CPU_SAVE_VERSION,
-                    cpu_save, cpu_load, env);
+    register_savevm(NULL,
+                    "cpu_common",
+                    cpu_index,
+                    CPU_COMMON_SAVE_VERSION,
+                    cpu_common_save,
+                    cpu_common_load,
+                    env);
+    register_savevm(NULL,
+                    "cpu",
+                    cpu_index,
+                    CPU_SAVE_VERSION,
+                    cpu_save,
+                    cpu_load,
+                    env);
 #endif
 }
 
-static inline void invalidate_page_bitmap(PageDesc *p)
-{
-    if (p->code_bitmap) {
-        g_free(p->code_bitmap);
-        p->code_bitmap = NULL;
-    }
-    p->code_write_count = 0;
-}
-
-/* set to NULL all the 'first_tb' fields in all PageDescs */
-static void page_flush_tb(void)
-{
-    int i, j;
-    PageDesc *p;
-
-    for(i = 0; i < L1_SIZE; i++) {
-        p = l1_map[i];
-        if (p) {
-            for(j = 0; j < L2_SIZE; j++) {
-                p->first_tb = NULL;
-                invalidate_page_bitmap(p);
-                p++;
-            }
-        }
-    }
-}
-
-/* flush all the translation blocks */
-/* XXX: tb_flush is currently not thread safe */
-void tb_flush(CPUArchState *env1)
-{
-    CPUArchState *env;
-#if defined(DEBUG_FLUSH)
-    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
-           (unsigned long)(code_gen_ptr - code_gen_buffer),
-           nb_tbs, nb_tbs > 0 ?
-           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
-#endif
-    if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
-        cpu_abort(env1, "Internal error: code buffer overflow\n");
-
-    nb_tbs = 0;
-
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
-#ifdef CONFIG_MEMCHECK
-        int tb_to_clean;
-        for (tb_to_clean = 0; tb_to_clean < TB_JMP_CACHE_SIZE; tb_to_clean++) {
-            if (env->tb_jmp_cache[tb_to_clean] != NULL &&
-                env->tb_jmp_cache[tb_to_clean]->tpc2gpc != NULL) {
-                g_free(env->tb_jmp_cache[tb_to_clean]->tpc2gpc);
-                env->tb_jmp_cache[tb_to_clean]->tpc2gpc = NULL;
-                env->tb_jmp_cache[tb_to_clean]->tpc2gpc_pairs = 0;
-            }
-        }
-#endif  // CONFIG_MEMCHECK
-        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
-    }
-
-    memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
-    page_flush_tb();
-
-    code_gen_ptr = code_gen_buffer;
-    /* XXX: flush processor icache at this point if cache flush is
-       expensive */
-    tb_flush_count++;
-}
-
-#ifdef DEBUG_TB_CHECK
-
-static void tb_invalidate_check(target_ulong address)
-{
-    TranslationBlock *tb;
-    int i;
-    address &= TARGET_PAGE_MASK;
-    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
-            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
-                  address >= tb->pc + tb->size)) {
-                printf("ERROR invalidate: address=" TARGET_FMT_lx
-                       " PC=%08lx size=%04x\n",
-                       address, (long)tb->pc, tb->size);
-            }
-        }
-    }
-}
-
-/* verify that all the pages have correct rights for code */
-static void tb_page_check(void)
-{
-    TranslationBlock *tb;
-    int i, flags1, flags2;
-
-    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
-            flags1 = page_get_flags(tb->pc);
-            flags2 = page_get_flags(tb->pc + tb->size - 1);
-            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
-                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
-                       (long)tb->pc, tb->size, flags1, flags2);
-            }
-        }
-    }
-}
-
-#endif
-
-/* invalidate one TB */
-static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
-                             int next_offset)
-{
-    TranslationBlock *tb1;
-    for(;;) {
-        tb1 = *ptb;
-        if (tb1 == tb) {
-            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
-            break;
-        }
-        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
-    }
-}
-
-static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
-{
-    TranslationBlock *tb1;
-    unsigned int n1;
-
-    for(;;) {
-        tb1 = *ptb;
-        n1 = (long)tb1 & 3;
-        tb1 = (TranslationBlock *)((long)tb1 & ~3);
-        if (tb1 == tb) {
-            *ptb = tb1->page_next[n1];
-            break;
-        }
-        ptb = &tb1->page_next[n1];
-    }
-}
-
-static inline void tb_jmp_remove(TranslationBlock *tb, int n)
-{
-    TranslationBlock *tb1, **ptb;
-    unsigned int n1;
-
-    ptb = &tb->jmp_next[n];
-    tb1 = *ptb;
-    if (tb1) {
-        /* find tb(n) in circular list */
-        for(;;) {
-            tb1 = *ptb;
-            n1 = (long)tb1 & 3;
-            tb1 = (TranslationBlock *)((long)tb1 & ~3);
-            if (n1 == n && tb1 == tb)
-                break;
-            if (n1 == 2) {
-                ptb = &tb1->jmp_first;
-            } else {
-                ptb = &tb1->jmp_next[n1];
-            }
-        }
-        /* now we can suppress tb(n) from the list */
-        *ptb = tb->jmp_next[n];
-
-        tb->jmp_next[n] = NULL;
-    }
-}
-
-/* reset the jump entry 'n' of a TB so that it is not chained to
-   another TB */
-static inline void tb_reset_jump(TranslationBlock *tb, int n)
-{
-    tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
-}
-
-void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
-{
-    CPUArchState *env;
-    PageDesc *p;
-    unsigned int h, n1;
-    hwaddr phys_pc;
-    TranslationBlock *tb1, *tb2;
-
-    /* remove the TB from the hash list */
-    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-    h = tb_phys_hash_func(phys_pc);
-    tb_remove(&tb_phys_hash[h], tb,
-              offsetof(TranslationBlock, phys_hash_next));
-
-    /* remove the TB from the page list */
-    if (tb->page_addr[0] != page_addr) {
-        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
-        tb_page_remove(&p->first_tb, tb);
-        invalidate_page_bitmap(p);
-    }
-    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
-        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
-        tb_page_remove(&p->first_tb, tb);
-        invalidate_page_bitmap(p);
-    }
-
-    tb_invalidated_flag = 1;
-
-    /* remove the TB from the hash list */
-    h = tb_jmp_cache_hash_func(tb->pc);
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
-        if (env->tb_jmp_cache[h] == tb)
-            env->tb_jmp_cache[h] = NULL;
-    }
-
-    /* suppress this TB from the two jump lists */
-    tb_jmp_remove(tb, 0);
-    tb_jmp_remove(tb, 1);
-
-    /* suppress any remaining jumps to this TB */
-    tb1 = tb->jmp_first;
-    for(;;) {
-        n1 = (long)tb1 & 3;
-        if (n1 == 2)
-            break;
-        tb1 = (TranslationBlock *)((long)tb1 & ~3);
-        tb2 = tb1->jmp_next[n1];
-        tb_reset_jump(tb1, n1);
-        tb1->jmp_next[n1] = NULL;
-        tb1 = tb2;
-    }
-    tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
-
-#ifdef CONFIG_MEMCHECK
-    if (tb->tpc2gpc != NULL) {
-        g_free(tb->tpc2gpc);
-        tb->tpc2gpc = NULL;
-        tb->tpc2gpc_pairs = 0;
-    }
-#endif  // CONFIG_MEMCHECK
-
-    tb_phys_invalidate_count++;
-}
-
-static inline void set_bits(uint8_t *tab, int start, int len)
-{
-    int end, mask, end1;
-
-    end = start + len;
-    tab += start >> 3;
-    mask = 0xff << (start & 7);
-    if ((start & ~7) == (end & ~7)) {
-        if (start < end) {
-            mask &= ~(0xff << (end & 7));
-            *tab |= mask;
-        }
-    } else {
-        *tab++ |= mask;
-        start = (start + 8) & ~7;
-        end1 = end & ~7;
-        while (start < end1) {
-            *tab++ = 0xff;
-            start += 8;
-        }
-        if (start < end) {
-            mask = ~(0xff << (end & 7));
-            *tab |= mask;
-        }
-    }
-}
-
-static void build_page_bitmap(PageDesc *p)
-{
-    int n, tb_start, tb_end;
-    TranslationBlock *tb;
-
-    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
-
-    tb = p->first_tb;
-    while (tb != NULL) {
-        n = (long)tb & 3;
-        tb = (TranslationBlock *)((long)tb & ~3);
-        /* NOTE: this is subtle as a TB may span two physical pages */
-        if (n == 0) {
-            /* NOTE: tb_end may be after the end of the page, but
-               it is not a problem */
-            tb_start = tb->pc & ~TARGET_PAGE_MASK;
-            tb_end = tb_start + tb->size;
-            if (tb_end > TARGET_PAGE_SIZE)
-                tb_end = TARGET_PAGE_SIZE;
-        } else {
-            tb_start = 0;
-            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
-        }
-        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
-        tb = tb->page_next[n];
-    }
-}
-
-TranslationBlock *tb_gen_code(CPUArchState *env,
-                              target_ulong pc, target_ulong cs_base,
-                              int flags, int cflags)
-{
-    TranslationBlock *tb;
-    uint8_t *tc_ptr;
-    target_ulong phys_pc, phys_page2, virt_page2;
-    int code_gen_size;
-
-    phys_pc = get_phys_addr_code(env, pc);
-    tb = tb_alloc(pc);
-    if (!tb) {
-        /* flush must be done */
-        tb_flush(env);
-        /* cannot fail at this point */
-        tb = tb_alloc(pc);
-        /* Don't forget to invalidate previous TB info.  */
-        tb_invalidated_flag = 1;
-    }
-    tc_ptr = code_gen_ptr;
-    tb->tc_ptr = tc_ptr;
-    tb->cs_base = cs_base;
-    tb->flags = flags;
-    tb->cflags = cflags;
-    cpu_gen_code(env, tb, &code_gen_size);
-    code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
-
-    /* check next page if needed */
-    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
-    phys_page2 = -1;
-    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
-        phys_page2 = get_phys_addr_code(env, virt_page2);
-    }
-    tb_link_phys(tb, phys_pc, phys_page2);
-    return tb;
-}
-
-/* invalidate all TBs which intersect with the target physical page
-   starting in range [start;end[. NOTE: start and end must refer to
-   the same physical page. 'is_cpu_write_access' should be true if called
-   from a real cpu write access: the virtual CPU will exit the current
-   TB if code is modified inside this TB. */
-void tb_invalidate_phys_page_range(hwaddr start, hwaddr end,
-                                   int is_cpu_write_access)
-{
-    TranslationBlock *tb, *tb_next, *saved_tb;
-    CPUArchState *env = cpu_single_env;
-    target_ulong tb_start, tb_end;
-    PageDesc *p;
-    int n;
-#ifdef TARGET_HAS_PRECISE_SMC
-    int current_tb_not_found = is_cpu_write_access;
-    TranslationBlock *current_tb = NULL;
-    int current_tb_modified = 0;
-    target_ulong current_pc = 0;
-    target_ulong current_cs_base = 0;
-    int current_flags = 0;
-#endif /* TARGET_HAS_PRECISE_SMC */
-
-    p = page_find(start >> TARGET_PAGE_BITS);
-    if (!p)
-        return;
-    if (!p->code_bitmap &&
-        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
-        is_cpu_write_access) {
-        /* build code bitmap */
-        build_page_bitmap(p);
-    }
-
-    /* we remove all the TBs in the range [start, end[ */
-    /* XXX: see if in some cases it could be faster to invalidate all the code */
-    tb = p->first_tb;
-    while (tb != NULL) {
-        n = (long)tb & 3;
-        tb = (TranslationBlock *)((long)tb & ~3);
-        tb_next = tb->page_next[n];
-        /* NOTE: this is subtle as a TB may span two physical pages */
-        if (n == 0) {
-            /* NOTE: tb_end may be after the end of the page, but
-               it is not a problem */
-            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-            tb_end = tb_start + tb->size;
-        } else {
-            tb_start = tb->page_addr[1];
-            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
-        }
-        if (!(tb_end <= start || tb_start >= end)) {
-#ifdef TARGET_HAS_PRECISE_SMC
-            if (current_tb_not_found) {
-                current_tb_not_found = 0;
-                current_tb = NULL;
-                if (env->mem_io_pc) {
-                    /* now we have a real cpu fault */
-                    current_tb = tb_find_pc(env->mem_io_pc);
-                }
-            }
-            if (current_tb == tb &&
-                (current_tb->cflags & CF_COUNT_MASK) != 1) {
-                /* If we are modifying the current TB, we must stop
-                its execution. We could be more precise by checking
-                that the modification is after the current PC, but it
-                would require a specialized function to partially
-                restore the CPU state */
-
-                current_tb_modified = 1;
-                cpu_restore_state(current_tb, env,  env->mem_io_pc);
-                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
-                                     &current_flags);
-            }
-#endif /* TARGET_HAS_PRECISE_SMC */
-            /* we need to do that to handle the case where a signal
-               occurs while doing tb_phys_invalidate() */
-            saved_tb = NULL;
-            if (env) {
-                saved_tb = env->current_tb;
-                env->current_tb = NULL;
-            }
-            tb_phys_invalidate(tb, -1);
-            if (env) {
-                env->current_tb = saved_tb;
-                if (env->interrupt_request && env->current_tb)
-                    cpu_interrupt(env, env->interrupt_request);
-            }
-        }
-        tb = tb_next;
-    }
-#if !defined(CONFIG_USER_ONLY)
-    /* if no code remaining, no need to continue to use slow writes */
-    if (!p->first_tb) {
-        invalidate_page_bitmap(p);
-        if (is_cpu_write_access) {
-            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
-        }
-    }
-#endif
-#ifdef TARGET_HAS_PRECISE_SMC
-    if (current_tb_modified) {
-        /* we generate a block containing just the instruction
-           modifying the memory. It will ensure that it cannot modify
-           itself */
-        env->current_tb = NULL;
-        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
-        cpu_resume_from_signal(env, NULL);
-    }
-#endif
-}
-
-/* len must be <= 8 and start must be a multiple of len */
-static inline void tb_invalidate_phys_page_fast(hwaddr start, int len)
-{
-    PageDesc *p;
-    int offset, b;
-#if 0
-    if (1) {
-        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
-                  cpu_single_env->mem_io_vaddr, len,
-                  cpu_single_env->eip,
-                  cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
-    }
-#endif
-    p = page_find(start >> TARGET_PAGE_BITS);
-    if (!p)
-        return;
-    if (p->code_bitmap) {
-        offset = start & ~TARGET_PAGE_MASK;
-        b = p->code_bitmap[offset >> 3] >> (offset & 7);
-        if (b & ((1 << len) - 1))
-            goto do_invalidate;
-    } else {
-    do_invalidate:
-        tb_invalidate_phys_page_range(start, start + len, 1);
-    }
-}
-
-#if !defined(CONFIG_SOFTMMU)
-static void tb_invalidate_phys_page(hwaddr addr,
-                                    unsigned long pc, void *puc)
-{
-    TranslationBlock *tb;
-    PageDesc *p;
-    int n;
-#ifdef TARGET_HAS_PRECISE_SMC
-    TranslationBlock *current_tb = NULL;
-    CPUArchState *env = cpu_single_env;
-    int current_tb_modified = 0;
-    target_ulong current_pc = 0;
-    target_ulong current_cs_base = 0;
-    int current_flags = 0;
-#endif
-
-    addr &= TARGET_PAGE_MASK;
-    p = page_find(addr >> TARGET_PAGE_BITS);
-    if (!p)
-        return;
-    tb = p->first_tb;
-#ifdef TARGET_HAS_PRECISE_SMC
-    if (tb && pc != 0) {
-        current_tb = tb_find_pc(pc);
-    }
-#endif
-    while (tb != NULL) {
-        n = (long)tb & 3;
-        tb = (TranslationBlock *)((long)tb & ~3);
-#ifdef TARGET_HAS_PRECISE_SMC
-        if (current_tb == tb &&
-            (current_tb->cflags & CF_COUNT_MASK) != 1) {
-                /* If we are modifying the current TB, we must stop
-                   its execution. We could be more precise by checking
-                   that the modification is after the current PC, but it
-                   would require a specialized function to partially
-                   restore the CPU state */
-
-            current_tb_modified = 1;
-            cpu_restore_state(current_tb, env, pc);
-            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
-                                 &current_flags);
-        }
-#endif /* TARGET_HAS_PRECISE_SMC */
-        tb_phys_invalidate(tb, addr);
-        tb = tb->page_next[n];
-    }
-    p->first_tb = NULL;
-#ifdef TARGET_HAS_PRECISE_SMC
-    if (current_tb_modified) {
-        /* we generate a block containing just the instruction
-           modifying the memory. It will ensure that it cannot modify
-           itself */
-        env->current_tb = NULL;
-        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
-        cpu_resume_from_signal(env, puc);
-    }
-#endif
-}
-#endif
-
-/* add the tb in the target page and protect it if necessary */
-static inline void tb_alloc_page(TranslationBlock *tb,
-                                 unsigned int n, target_ulong page_addr)
-{
-    PageDesc *p;
-    TranslationBlock *last_first_tb;
-
-    tb->page_addr[n] = page_addr;
-    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS);
-    tb->page_next[n] = p->first_tb;
-    last_first_tb = p->first_tb;
-    p->first_tb = (TranslationBlock *)((long)tb | n);
-    invalidate_page_bitmap(p);
-
-#if defined(TARGET_HAS_SMC) || 1
-
-#if defined(CONFIG_USER_ONLY)
-    if (p->flags & PAGE_WRITE) {
-        target_ulong addr;
-        PageDesc *p2;
-        int prot;
-
-        /* force the host page as non writable (writes will have a
-           page fault + mprotect overhead) */
-        page_addr &= qemu_host_page_mask;
-        prot = 0;
-        for(addr = page_addr; addr < page_addr + qemu_host_page_size;
-            addr += TARGET_PAGE_SIZE) {
-
-            p2 = page_find (addr >> TARGET_PAGE_BITS);
-            if (!p2)
-                continue;
-            prot |= p2->flags;
-            p2->flags &= ~PAGE_WRITE;
-            page_get_flags(addr);
-          }
-        mprotect(g2h(page_addr), qemu_host_page_size,
-                 (prot & PAGE_BITS) & ~PAGE_WRITE);
-#ifdef DEBUG_TB_INVALIDATE
-        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
-               page_addr);
-#endif
-    }
-#else
-    /* if some code is already present, then the pages are already
-       protected. So we handle the case where only the first TB is
-       allocated in a physical page */
-    if (!last_first_tb) {
-        tlb_protect_code(page_addr);
-    }
-#endif
-
-#endif /* TARGET_HAS_SMC */
-}
-
-/* Allocate a new translation block. Flush the translation buffer if
-   too many translation blocks or too much generated code. */
-TranslationBlock *tb_alloc(target_ulong pc)
-{
-    TranslationBlock *tb;
-
-    if (nb_tbs >= code_gen_max_blocks ||
-        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
-        return NULL;
-    tb = &tbs[nb_tbs++];
-    tb->pc = pc;
-    tb->cflags = 0;
-#ifdef CONFIG_MEMCHECK
-    tb->tpc2gpc = NULL;
-    tb->tpc2gpc_pairs = 0;
-#endif  // CONFIG_MEMCHECK
-    return tb;
-}
-
-void tb_free(TranslationBlock *tb)
-{
-    /* In practice this is mostly used for single use temporary TB
-       Ignore the hard cases and just back up if this TB happens to
-       be the last one generated.  */
-    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
-        code_gen_ptr = tb->tc_ptr;
-        nb_tbs--;
-    }
-}
-
-/* add a new TB and link it to the physical page tables. phys_page2 is
-   (-1) to indicate that only one page contains the TB. */
-void tb_link_phys(TranslationBlock *tb,
-                  target_ulong phys_pc, target_ulong phys_page2)
-{
-    unsigned int h;
-    TranslationBlock **ptb;
-
-    /* Grab the mmap lock to stop another thread invalidating this TB
-       before we are done.  */
-    mmap_lock();
-    /* add in the physical hash table */
-    h = tb_phys_hash_func(phys_pc);
-    ptb = &tb_phys_hash[h];
-    tb->phys_hash_next = *ptb;
-    *ptb = tb;
-
-    /* add in the page list */
-    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
-    if (phys_page2 != -1)
-        tb_alloc_page(tb, 1, phys_page2);
-    else
-        tb->page_addr[1] = -1;
-
-    tb->jmp_first = (TranslationBlock *)((long)tb | 2);
-    tb->jmp_next[0] = NULL;
-    tb->jmp_next[1] = NULL;
-
-    /* init original jump addresses */
-    if (tb->tb_next_offset[0] != 0xffff)
-        tb_reset_jump(tb, 0);
-    if (tb->tb_next_offset[1] != 0xffff)
-        tb_reset_jump(tb, 1);
-
-#ifdef DEBUG_TB_CHECK
-    tb_page_check();
-#endif
-    mmap_unlock();
-}
-
-/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
-   tb[1].tc_ptr. Return NULL if not found */
-TranslationBlock *tb_find_pc(unsigned long tc_ptr)
-{
-    int m_min, m_max, m;
-    unsigned long v;
-    TranslationBlock *tb;
-
-    if (nb_tbs <= 0)
-        return NULL;
-    if (tc_ptr < (unsigned long)code_gen_buffer ||
-        tc_ptr >= (unsigned long)code_gen_ptr)
-        return NULL;
-    /* binary search (cf Knuth) */
-    m_min = 0;
-    m_max = nb_tbs - 1;
-    while (m_min <= m_max) {
-        m = (m_min + m_max) >> 1;
-        tb = &tbs[m];
-        v = (unsigned long)tb->tc_ptr;
-        if (v == tc_ptr)
-            return tb;
-        else if (tc_ptr < v) {
-            m_max = m - 1;
-        } else {
-            m_min = m + 1;
-        }
-    }
-    return &tbs[m_max];
-}
-
-static void tb_reset_jump_recursive(TranslationBlock *tb);
-
-static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
-{
-    TranslationBlock *tb1, *tb_next, **ptb;
-    unsigned int n1;
-
-    tb1 = tb->jmp_next[n];
-    if (tb1 != NULL) {
-        /* find head of list */
-        for(;;) {
-            n1 = (long)tb1 & 3;
-            tb1 = (TranslationBlock *)((long)tb1 & ~3);
-            if (n1 == 2)
-                break;
-            tb1 = tb1->jmp_next[n1];
-        }
-        /* we are now sure now that tb jumps to tb1 */
-        tb_next = tb1;
-
-        /* remove tb from the jmp_first list */
-        ptb = &tb_next->jmp_first;
-        for(;;) {
-            tb1 = *ptb;
-            n1 = (long)tb1 & 3;
-            tb1 = (TranslationBlock *)((long)tb1 & ~3);
-            if (n1 == n && tb1 == tb)
-                break;
-            ptb = &tb1->jmp_next[n1];
-        }
-        *ptb = tb->jmp_next[n];
-        tb->jmp_next[n] = NULL;
-
-        /* suppress the jump to next tb in generated code */
-        tb_reset_jump(tb, n);
-
-        /* suppress jumps in the tb on which we could have jumped */
-        tb_reset_jump_recursive(tb_next);
-    }
-}
-
-static void tb_reset_jump_recursive(TranslationBlock *tb)
-{
-    tb_reset_jump_recursive2(tb, 0);
-    tb_reset_jump_recursive2(tb, 1);
-}
-
 #if defined(TARGET_HAS_ICE)
 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
 {
@@ -1485,17 +383,17 @@
 
 /* enable or disable single step mode. EXCP_DEBUG is returned by the
    CPU loop after each instruction */
-void cpu_single_step(CPUOldState *env, int enabled)
+void cpu_single_step(CPUState *cpu, int enabled)
 {
 #if defined(TARGET_HAS_ICE)
-    if (env->singlestep_enabled != enabled) {
-        env->singlestep_enabled = enabled;
-        if (kvm_enabled())
-            kvm_update_guest_debug(env, 0);
-        else {
+    if (cpu->singlestep_enabled != enabled) {
+        cpu->singlestep_enabled = enabled;
+        if (kvm_enabled()) {
+            kvm_update_guest_debug(cpu->env_ptr, 0);
+        } else {
             /* must flush all the translated code to avoid inconsistencies */
             /* XXX: only flush what is necessary */
-            tb_flush(env);
+            tb_flush(cpu->env_ptr);
         }
     }
 #endif
@@ -1539,7 +437,7 @@
     cpu_set_log(loglevel);
 }
 
-static void cpu_unlink_tb(CPUOldState *env)
+void cpu_unlink_tb(CPUOldState *env)
 {
     /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
        problem and hope the cpu will stop of its own accord.  For userspace
@@ -1559,51 +457,21 @@
     spin_unlock(&interrupt_lock);
 }
 
-/* mask must never be zero, except for A20 change call */
-void cpu_interrupt(CPUOldState *env, int mask)
+void cpu_reset_interrupt(CPUState *cpu, int mask)
 {
-    int old_mask;
-
-    old_mask = env->interrupt_request;
-    env->interrupt_request |= mask;
-
-#ifndef CONFIG_USER_ONLY
-    /*
-     * If called from iothread context, wake the target cpu in
-     * case its halted.
-     */
-    if (!qemu_cpu_self(env)) {
-        qemu_cpu_kick(env);
-        return;
-    }
-#endif
-
-    if (use_icount) {
-        env->icount_decr.u16.high = 0xffff;
-#ifndef CONFIG_USER_ONLY
-        if (!can_do_io(env)
-            && (mask & ~old_mask) != 0) {
-            cpu_abort(env, "Raised interrupt while not in I/O function");
-        }
-#endif
-    } else {
-        cpu_unlink_tb(env);
-    }
+    cpu->interrupt_request &= ~mask;
 }
 
-void cpu_reset_interrupt(CPUOldState *env, int mask)
+void cpu_exit(CPUState *cpu)
 {
-    env->interrupt_request &= ~mask;
-}
-
-void cpu_exit(CPUOldState *env)
-{
-    env->exit_request = 1;
-    cpu_unlink_tb(env);
+    cpu->exit_request = 1;
+    cpu_unlink_tb(cpu->env_ptr);
 }
 
 void cpu_abort(CPUArchState *env, const char *fmt, ...)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
+
     va_list ap;
     va_list ap2;
 
@@ -1613,18 +481,18 @@
     vfprintf(stderr, fmt, ap);
     fprintf(stderr, "\n");
 #ifdef TARGET_I386
-    cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
+    cpu_dump_state(cpu, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
 #else
-    cpu_dump_state(env, stderr, fprintf, 0);
+    cpu_dump_state(cpu, stderr, fprintf, 0);
 #endif
     if (qemu_log_enabled()) {
         qemu_log("qemu: fatal: ");
         qemu_log_vprintf(fmt, ap2);
         qemu_log("\n");
 #ifdef TARGET_I386
-        log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
+        log_cpu_state(cpu, X86_DUMP_FPU | X86_DUMP_CCOP);
 #else
-        log_cpu_state(env, 0);
+        log_cpu_state(cpu, 0);
 #endif
         qemu_log_flush();
         qemu_log_close();
@@ -1642,47 +510,34 @@
     abort();
 }
 
-CPUArchState *cpu_copy(CPUOldState *env)
-{
-    CPUArchState *new_env = cpu_init(env->cpu_model_str);
-    CPUArchState *next_cpu = new_env->next_cpu;
-    int cpu_index = new_env->cpu_index;
-#if defined(TARGET_HAS_ICE)
-    CPUBreakpoint *bp;
-    CPUWatchpoint *wp;
-#endif
-
-    memcpy(new_env, env, sizeof(CPUOldState));
-
-    /* Preserve chaining and index. */
-    new_env->next_cpu = next_cpu;
-    new_env->cpu_index = cpu_index;
-
-    /* Clone all break/watchpoints.
-       Note: Once we support ptrace with hw-debug register access, make sure
-       BP_CPU break/watchpoints are handled correctly on clone. */
-    QTAILQ_INIT(&env->breakpoints);
-    QTAILQ_INIT(&env->watchpoints);
-#if defined(TARGET_HAS_ICE)
-    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
-        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
-    }
-    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
-        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
-                              wp->flags, NULL);
-    }
-#endif
-
-    return new_env;
-}
-
 #if !defined(CONFIG_USER_ONLY)
+static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
+{
+    RAMBlock *block;
+
+    /* The list is protected by the iothread lock here.  */
+    block = ram_list.mru_block;
+    if (block && addr - block->offset < block->length) {
+        goto found;
+    }
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        if (addr - block->offset < block->length) {
+            goto found;
+        }
+    }
+
+    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
+    abort();
+
+found:
+    ram_list.mru_block = block;
+    return block;
+}
 
 /* Note: start and end must be within the same ram block.  */
 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
                                      int dirty_flags)
 {
-    CPUOldState *env;
     unsigned long length, start1;
     int i;
 
@@ -1704,12 +559,15 @@
         abort();
     }
 
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+    CPUState *cpu;
+    CPU_FOREACH(cpu) {
         int mmu_idx;
         for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-            for(i = 0; i < CPU_TLB_SIZE; i++)
+            for(i = 0; i < CPU_TLB_SIZE; i++) {
+                CPUArchState* env = cpu->env_ptr;
                 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
                                       start1, length);
+            }
         }
     }
 }
@@ -1782,198 +640,6 @@
     return 0;
 }
 
-/*
- * Walks guest process memory "regions" one by one
- * and calls callback function 'fn' for each region.
- */
-int walk_memory_regions(void *priv,
-    int (*fn)(void *, unsigned long, unsigned long, unsigned long))
-{
-    unsigned long start, end;
-    PageDesc *p = NULL;
-    int i, j, prot, prot1;
-    int rc = 0;
-
-    start = end = -1;
-    prot = 0;
-
-    for (i = 0; i <= L1_SIZE; i++) {
-        p = (i < L1_SIZE) ? l1_map[i] : NULL;
-        for (j = 0; j < L2_SIZE; j++) {
-            prot1 = (p == NULL) ? 0 : p[j].flags;
-            /*
-             * "region" is one continuous chunk of memory
-             * that has same protection flags set.
-             */
-            if (prot1 != prot) {
-                end = (i << (32 - L1_BITS)) | (j << TARGET_PAGE_BITS);
-                if (start != -1) {
-                    rc = (*fn)(priv, start, end, prot);
-                    /* callback can stop iteration by returning != 0 */
-                    if (rc != 0)
-                        return (rc);
-                }
-                if (prot1 != 0)
-                    start = end;
-                else
-                    start = -1;
-                prot = prot1;
-            }
-            if (p == NULL)
-                break;
-        }
-    }
-    return (rc);
-}
-
-static int dump_region(void *priv, unsigned long start,
-    unsigned long end, unsigned long prot)
-{
-    FILE *f = (FILE *)priv;
-
-    (void) fprintf(f, "%08lx-%08lx %08lx %c%c%c\n",
-        start, end, end - start,
-        ((prot & PAGE_READ) ? 'r' : '-'),
-        ((prot & PAGE_WRITE) ? 'w' : '-'),
-        ((prot & PAGE_EXEC) ? 'x' : '-'));
-
-    return (0);
-}
-
-/* dump memory mappings */
-void page_dump(FILE *f)
-{
-    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
-            "start", "end", "size", "prot");
-    walk_memory_regions(f, dump_region);
-}
-
-int page_get_flags(target_ulong address)
-{
-    PageDesc *p;
-
-    p = page_find(address >> TARGET_PAGE_BITS);
-    if (!p)
-        return 0;
-    return p->flags;
-}
-
-/* Modify the flags of a page and invalidate the code if necessary.
-   The flag PAGE_WRITE_ORG is positioned automatically depending
-   on PAGE_WRITE.  The mmap_lock should already be held.  */
-void page_set_flags(target_ulong start, target_ulong end, int flags)
-{
-    PageDesc *p;
-    target_ulong addr;
-
-    /* mmap_lock should already be held.  */
-    start = start & TARGET_PAGE_MASK;
-    end = TARGET_PAGE_ALIGN(end);
-    if (flags & PAGE_WRITE)
-        flags |= PAGE_WRITE_ORG;
-    for(addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
-        p = page_find_alloc(addr >> TARGET_PAGE_BITS);
-        /* We may be called for host regions that are outside guest
-           address space.  */
-        if (!p)
-            return;
-        /* if the write protection is set, then we invalidate the code
-           inside */
-        if (!(p->flags & PAGE_WRITE) &&
-            (flags & PAGE_WRITE) &&
-            p->first_tb) {
-            tb_invalidate_phys_page(addr, 0, NULL);
-        }
-        p->flags = flags;
-    }
-}
-
-int page_check_range(target_ulong start, target_ulong len, int flags)
-{
-    PageDesc *p;
-    target_ulong end;
-    target_ulong addr;
-
-    if (start + len < start)
-        /* we've wrapped around */
-        return -1;
-
-    end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
-    start = start & TARGET_PAGE_MASK;
-
-    for(addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
-        p = page_find(addr >> TARGET_PAGE_BITS);
-        if( !p )
-            return -1;
-        if( !(p->flags & PAGE_VALID) )
-            return -1;
-
-        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
-            return -1;
-        if (flags & PAGE_WRITE) {
-            if (!(p->flags & PAGE_WRITE_ORG))
-                return -1;
-            /* unprotect the page if it was put read-only because it
-               contains translated code */
-            if (!(p->flags & PAGE_WRITE)) {
-                if (!page_unprotect(addr, 0, NULL))
-                    return -1;
-            }
-            return 0;
-        }
-    }
-    return 0;
-}
-
-/* called from signal handler: invalidate the code and unprotect the
-   page. Return TRUE if the fault was successfully handled. */
-int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
-{
-    unsigned int page_index, prot, pindex;
-    PageDesc *p, *p1;
-    target_ulong host_start, host_end, addr;
-
-    /* Technically this isn't safe inside a signal handler.  However we
-       know this only ever happens in a synchronous SEGV handler, so in
-       practice it seems to be ok.  */
-    mmap_lock();
-
-    host_start = address & qemu_host_page_mask;
-    page_index = host_start >> TARGET_PAGE_BITS;
-    p1 = page_find(page_index);
-    if (!p1) {
-        mmap_unlock();
-        return 0;
-    }
-    host_end = host_start + qemu_host_page_size;
-    p = p1;
-    prot = 0;
-    for(addr = host_start;addr < host_end; addr += TARGET_PAGE_SIZE) {
-        prot |= p->flags;
-        p++;
-    }
-    /* if the page was really writable, then we change its
-       protection back to writable */
-    if (prot & PAGE_WRITE_ORG) {
-        pindex = (address - host_start) >> TARGET_PAGE_BITS;
-        if (!(p1[pindex].flags & PAGE_WRITE)) {
-            mprotect((void *)g2h(host_start), qemu_host_page_size,
-                     (prot & PAGE_BITS) | PAGE_WRITE);
-            p1[pindex].flags |= PAGE_WRITE;
-            /* and since the content will be modified, we must invalidate
-               the corresponding translated code. */
-            tb_invalidate_phys_page(address, pc, puc);
-#ifdef DEBUG_TB_CHECK
-            tb_invalidate_check(address);
-#endif
-            mmap_unlock();
-            return 1;
-        }
-    }
-    mmap_unlock();
-    return 0;
-}
-
 static inline void tlb_set_dirty(CPUOldState *env,
                                  unsigned long addr, target_ulong vaddr)
 {
@@ -1986,6 +652,19 @@
                              ram_addr_t memory, ram_addr_t region_offset);
 static void *subpage_init (hwaddr base, ram_addr_t *phys,
                            ram_addr_t orig_memory, ram_addr_t region_offset);
+
+static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
+
+/*
+ * Set a custom physical guest memory alloator.
+ * Accelerators with unusual needs may need this.  Hopefully, we can
+ * get rid of it eventually.
+ */
+void phys_mem_set_alloc(void *(*alloc)(size_t))
+{
+    phys_mem_alloc = alloc;
+}
+
 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
                       need_subpage)                                     \
     do {                                                                \
@@ -2022,7 +701,7 @@
 {
     hwaddr addr, end_addr;
     PhysPageDesc *p;
-    CPUOldState *env;
+    CPUState *cpu;
     ram_addr_t orig_size = size;
     subpage_t *subpage;
 
@@ -2099,8 +778,8 @@
     /* since each CPU stores ram addresses in its TLB cache, we must
        reset the modified entries */
     /* XXX: slow ! */
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
-        tlb_flush(env, 1);
+    CPU_FOREACH(cpu) {
+        tlb_flush(cpu->env_ptr, 1);
     }
 }
 
@@ -2127,129 +806,377 @@
         kvm_uncoalesce_mmio_region(addr, size);
 }
 
+void qemu_mutex_lock_ramlist(void)
+{
+    qemu_mutex_lock(&ram_list.mutex);
+}
+
+void qemu_mutex_unlock_ramlist(void)
+{
+    qemu_mutex_unlock(&ram_list.mutex);
+}
+
+#if defined(__linux__) && !defined(CONFIG_ANDROID)
+
+#include <sys/vfs.h>
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+static long gethugepagesize(const char *path)
+{
+    struct statfs fs;
+    int ret;
+
+    do {
+        ret = statfs(path, &fs);
+    } while (ret != 0 && errno == EINTR);
+
+    if (ret != 0) {
+        perror(path);
+        return 0;
+    }
+
+    if (fs.f_type != HUGETLBFS_MAGIC)
+        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+
+    return fs.f_bsize;
+}
+
+static sigjmp_buf sigjump;
+
+static void sigbus_handler(int signal)
+{
+    siglongjmp(sigjump, 1);
+}
+
+static void *file_ram_alloc(RAMBlock *block,
+                            ram_addr_t memory,
+                            const char *path)
+{
+    char *filename;
+    char *sanitized_name;
+    char *c;
+    void *area;
+    int fd;
+    unsigned long hpagesize;
+
+    hpagesize = gethugepagesize(path);
+    if (!hpagesize) {
+        return NULL;
+    }
+
+    if (memory < hpagesize) {
+        return NULL;
+    }
+
+    if (kvm_enabled() && !kvm_has_sync_mmu()) {
+        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
+        return NULL;
+    }
+
+    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
+    sanitized_name = g_strdup(block->mr->name);
+    for (c = sanitized_name; *c != '\0'; c++) {
+        if (*c == '/')
+            *c = '_';
+    }
+
+    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
+                               sanitized_name);
+    g_free(sanitized_name);
+
+    fd = mkstemp(filename);
+    if (fd < 0) {
+        perror("unable to create backing store for hugepages");
+        g_free(filename);
+        return NULL;
+    }
+    unlink(filename);
+    g_free(filename);
+
+    memory = (memory+hpagesize-1) & ~(hpagesize-1);
+
+    /*
+     * ftruncate is not supported by hugetlbfs in older
+     * hosts, so don't bother bailing out on errors.
+     * If anything goes wrong with it under other filesystems,
+     * mmap will fail.
+     */
+    if (ftruncate(fd, memory))
+        perror("ftruncate");
+
+    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+    if (area == MAP_FAILED) {
+        perror("file_ram_alloc: can't mmap RAM pages");
+        close(fd);
+        return (NULL);
+    }
+
+    if (mem_prealloc) {
+        int ret, i;
+        struct sigaction act, oldact;
+        sigset_t set, oldset;
+
+        memset(&act, 0, sizeof(act));
+        act.sa_handler = &sigbus_handler;
+        act.sa_flags = 0;
+
+        ret = sigaction(SIGBUS, &act, &oldact);
+        if (ret) {
+            perror("file_ram_alloc: failed to install signal handler");
+            exit(1);
+        }
+
+        /* unblock SIGBUS */
+        sigemptyset(&set);
+        sigaddset(&set, SIGBUS);
+        pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
+
+        if (sigsetjmp(sigjump, 1)) {
+            fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
+            exit(1);
+        }
+
+        /* MAP_POPULATE silently ignores failures */
+        for (i = 0; i < (memory/hpagesize)-1; i++) {
+            memset(area + (hpagesize*i), 0, 1);
+        }
+
+        ret = sigaction(SIGBUS, &oldact, NULL);
+        if (ret) {
+            perror("file_ram_alloc: failed to reinstall signal handler");
+            exit(1);
+        }
+
+        pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+    }
+
+    block->fd = fd;
+    return area;
+}
+#else
+static void *file_ram_alloc(RAMBlock *block,
+                            ram_addr_t memory,
+                            const char *path)
+{
+    fprintf(stderr, "-mem-path not supported on this host\n");
+    exit(1);
+}
+#endif
+
 static ram_addr_t find_ram_offset(ram_addr_t size)
 {
     RAMBlock *block, *next_block;
-    ram_addr_t offset = 0, mingap = ULONG_MAX;
+    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
 
-    if (QLIST_EMPTY(&ram_list.blocks))
+    assert(size != 0); /* it would hand out same offset multiple times */
+
+    if (QTAILQ_EMPTY(&ram_list.blocks))
         return 0;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
-        ram_addr_t end, next = ULONG_MAX;
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        ram_addr_t end, next = RAM_ADDR_MAX;
 
         end = block->offset + block->length;
 
-        QLIST_FOREACH(next_block, &ram_list.blocks, next) {
+        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
             if (next_block->offset >= end) {
                 next = MIN(next, next_block->offset);
             }
         }
         if (next - end >= size && next - end < mingap) {
-            offset =  end;
+            offset = end;
             mingap = next - end;
         }
     }
+
+    if (offset == RAM_ADDR_MAX) {
+        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
+                (uint64_t)size);
+        abort();
+    }
+
     return offset;
 }
 
-static ram_addr_t last_ram_offset(void)
+ram_addr_t last_ram_offset(void)
 {
     RAMBlock *block;
     ram_addr_t last = 0;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next)
+    QTAILQ_FOREACH(block, &ram_list.blocks, next)
         last = MAX(last, block->offset + block->length);
 
     return last;
 }
 
-ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
-                                   ram_addr_t size, void *host)
+static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
+{
+#ifndef CONFIG_ANDROID
+    int ret;
+
+    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
+    if (!qemu_opt_get_bool(qemu_get_machine_opts(),
+                           "dump-guest-core", true)) {
+        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
+        if (ret) {
+            perror("qemu_madvise");
+            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
+                            "but dump_guest_core=off specified\n");
+        }
+    }
+#endif  // !CONFIG_ANDROID
+}
+
+void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
 {
     RAMBlock *new_block, *block;
 
-    size = TARGET_PAGE_ALIGN(size);
-    new_block = g_malloc0(sizeof(*new_block));
+    new_block = NULL;
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        if (block->offset == addr) {
+            new_block = block;
+            break;
+        }
+    }
+    assert(new_block);
+    assert(!new_block->idstr[0]);
 
-#if 0
-    if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
-        char *id = dev->parent_bus->info->get_dev_path(dev);
+    if (dev) {
+        char *id = qdev_get_dev_path(dev);
         if (id) {
             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
             g_free(id);
         }
     }
-#endif
     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
-        if (!strcmp(block->idstr, new_block->idstr)) {
+    /* This assumes the iothread lock is taken here too.  */
+    qemu_mutex_lock_ramlist();
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                     new_block->idstr);
             abort();
         }
     }
+    qemu_mutex_unlock_ramlist();
+}
 
+static int memory_try_enable_merging(void *addr, size_t len)
+{
+#ifndef CONFIG_ANDROID
+    if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
+        /* disabled by the user */
+        return 0;
+    }
+
+    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
+#else  // CONFIG_ANDROID
+    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
+#endif  // CONFIG_ANDROID
+}
+
+ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
+                                   ram_addr_t size, void *host)
+{
+    RAMBlock *block, *new_block;
+
+    size = TARGET_PAGE_ALIGN(size);
+    new_block = g_malloc0(sizeof(*new_block));
+    new_block->fd = -1;
+
+    /* This assumes the iothread lock is taken here too.  */
+    qemu_mutex_lock_ramlist();
+    //new_block->mr = mr;
+    new_block->offset = find_ram_offset(size);
     if (host) {
         new_block->host = host;
         new_block->flags |= RAM_PREALLOC_MASK;
+    } else if (xen_enabled()) {
+        if (mem_path) {
+            fprintf(stderr, "-mem-path not supported with Xen\n");
+            exit(1);
+        }
+        //xen_ram_alloc(new_block->offset, size, mr);
     } else {
         if (mem_path) {
-#if 0 && defined (__linux__) && !defined(TARGET_S390X)
+            if (phys_mem_alloc != qemu_anon_ram_alloc) {
+                /*
+                 * file_ram_alloc() needs to allocate just like
+                 * phys_mem_alloc, but we haven't bothered to provide
+                 * a hook there.
+                 */
+                fprintf(stderr,
+                        "-mem-path not supported with this accelerator\n");
+                exit(1);
+            }
             new_block->host = file_ram_alloc(new_block, size, mem_path);
-            if (!new_block->host) {
-                new_block->host = qemu_vmalloc(size);
-                qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
-            }
-#else
-            fprintf(stderr, "-mem-path option unsupported\n");
-            exit(1);
-#endif
-        } else {
-#if defined(TARGET_S390X) && defined(CONFIG_KVM)
-            /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
-            new_block->host = mmap((void*)0x1000000, size,
-                                   PROT_EXEC|PROT_READ|PROT_WRITE,
-                                   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
-#else
-            new_block->host = qemu_vmalloc(size);
-
-#ifdef CONFIG_HAX
-        /*
-         * In HAX, qemu allocates the virtual address, and HAX kernel
-         * module populates the region with physical memory. Currently
-         * we don’t populate guest memory on demand, thus we should
-         * make sure that sufficient amount of memory is available in
-         * advance.
-         */
-        if (hax_enabled()) {
-            int ret = hax_populate_ram(
-                    (uint64_t)(uintptr_t)new_block->host,
-                    size);
-            if (ret < 0) {
-                fprintf(stderr, "Hax failed to populate ram\n");
-                exit(-1);
-            }
         }
-#endif
-
-#endif
-#ifdef MADV_MERGEABLE
-            madvise(new_block->host, size, MADV_MERGEABLE);
-#endif
+        if (!new_block->host) {
+            new_block->host = phys_mem_alloc(size);
+            if (!new_block->host) {
+                fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
+                        name, strerror(errno));
+                exit(1);
+            }
+#ifdef CONFIG_HAX
+            if (hax_enabled()) {
+                /*
+                 * In HAX, qemu allocates the virtual address, and HAX kernel
+                 * module populates the region with physical memory. Currently
+                 * we don’t populate guest memory on demand, thus we should
+                 * make sure that sufficient amount of memory is available in
+                 * advance.
+                 */
+                int ret = hax_populate_ram(
+                        (uint64_t)(uintptr_t)new_block->host,
+                        (uint32_t)size);
+                if (ret < 0) {
+                    fprintf(stderr, "Hax failed to populate ram\n");
+                    exit(-1);
+                }
+            }
+#endif  // CONFIG_HAX
+            memory_try_enable_merging(new_block->host, size);
         }
     }
-
-    new_block->offset = find_ram_offset(size);
     new_block->length = size;
 
-    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
+    if (dev) {
+        char *id = qdev_get_dev_path(dev);
+        if (id) {
+            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
+            g_free(id);
+        }
+    }
+    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
+
+    /* Keep the list sorted from biggest to smallest block.  */
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        if (block->length < new_block->length) {
+            break;
+        }
+    }
+    if (block) {
+        QTAILQ_INSERT_BEFORE(block, new_block, next);
+    } else {
+        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
+    }
+    ram_list.mru_block = NULL;
+
+    ram_list.version++;
+    qemu_mutex_unlock_ramlist();
 
     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
                                        last_ram_offset() >> TARGET_PAGE_BITS);
     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
            0xff, size >> TARGET_PAGE_BITS);
+    //cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
+
+    qemu_ram_setup_dump(new_block->host, size);
+    //qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
+    //qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
 
     if (kvm_enabled())
         kvm_setup_guest_memory(new_block->host, size);
@@ -2262,99 +1189,107 @@
     return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
 }
 
+void qemu_ram_free_from_ptr(ram_addr_t addr)
+{
+    RAMBlock *block;
+
+    /* This assumes the iothread lock is taken here too.  */
+    qemu_mutex_lock_ramlist();
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        if (addr == block->offset) {
+            QTAILQ_REMOVE(&ram_list.blocks, block, next);
+            ram_list.mru_block = NULL;
+            ram_list.version++;
+            g_free(block);
+            break;
+        }
+    }
+    qemu_mutex_unlock_ramlist();
+}
+
 void qemu_ram_free(ram_addr_t addr)
 {
     RAMBlock *block;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    /* This assumes the iothread lock is taken here too.  */
+    qemu_mutex_lock_ramlist();
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
-            QLIST_REMOVE(block, next);
+            QTAILQ_REMOVE(&ram_list.blocks, block, next);
+            ram_list.mru_block = NULL;
+            ram_list.version++;
             if (block->flags & RAM_PREALLOC_MASK) {
                 ;
-            } else if (mem_path) {
-#if defined (__linux__) && !defined(TARGET_S390X)
-                if (block->fd) {
-                    munmap(block->host, block->length);
-                    close(block->fd);
-                } else {
-                    qemu_vfree(block->host);
-                }
-#else
-                abort();
+            } else if (xen_enabled()) {
+                //xen_invalidate_map_cache_entry(block->host);
+#ifndef _WIN32
+            } else if (block->fd >= 0) {
+                munmap(block->host, block->length);
+                close(block->fd);
 #endif
             } else {
-#if defined(TARGET_S390X) && defined(CONFIG_KVM)
-                munmap(block->host, block->length);
-#else
-                qemu_vfree(block->host);
-#endif
+                qemu_anon_ram_free(block->host, block->length);
             }
             g_free(block);
-            return;
+            break;
         }
     }
+    qemu_mutex_unlock_ramlist();
 
 }
 
 #ifndef _WIN32
 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
 {
-#ifndef CONFIG_ANDROID
     RAMBlock *block;
     ram_addr_t offset;
     int flags;
     void *area, *vaddr;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         offset = addr - block->offset;
         if (offset < block->length) {
             vaddr = block->host + offset;
             if (block->flags & RAM_PREALLOC_MASK) {
                 ;
+            } else if (xen_enabled()) {
+                abort();
             } else {
                 flags = MAP_FIXED;
                 munmap(vaddr, length);
-                if (mem_path) {
-#if defined(__linux__) && !defined(TARGET_S390X)
-                    if (block->fd) {
+                if (block->fd >= 0) {
 #ifdef MAP_POPULATE
-                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
-                            MAP_PRIVATE;
+                    flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
+                        MAP_PRIVATE;
 #else
-                        flags |= MAP_PRIVATE;
+                    flags |= MAP_PRIVATE;
 #endif
-                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
-                                    flags, block->fd, offset);
-                    } else {
-                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
-                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
-                                    flags, -1, 0);
-                    }
-#else
-                    abort();
-#endif
+                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
+                                flags, block->fd, offset);
                 } else {
-#if defined(TARGET_S390X) && defined(CONFIG_KVM)
-                    flags |= MAP_SHARED | MAP_ANONYMOUS;
-                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
-                                flags, -1, 0);
-#else
+                    /*
+                     * Remap needs to match alloc.  Accelerators that
+                     * set phys_mem_alloc never remap.  If they did,
+                     * we'd need a remap hook here.
+                     */
+                    assert(phys_mem_alloc == qemu_anon_ram_alloc);
+
                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                 flags, -1, 0);
-#endif
                 }
                 if (area != vaddr) {
-                    fprintf(stderr, "Could not remap addr: %lx@%lx\n",
+                    fprintf(stderr, "Could not remap addr: "
+                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
                             length, addr);
                     exit(1);
                 }
-                qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
+                memory_try_enable_merging(vaddr, length);
+                qemu_ram_setup_dump(vaddr, length);
             }
             return;
         }
     }
-#endif /* !CONFIG_ANDROID */
 }
 #endif /* !_WIN32 */
 
@@ -2368,24 +1303,23 @@
  */
 void *qemu_get_ram_ptr(ram_addr_t addr)
 {
-    RAMBlock *block;
-
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
-        if (addr - block->offset < block->length) {
-            /* Move this entry to to start of the list.  */
-            if (block != QLIST_FIRST(&ram_list.blocks)) {
-                QLIST_REMOVE(block, next);
-                QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
-    }
-            return block->host + (addr - block->offset);
+    RAMBlock *block = qemu_get_ram_block(addr);
+#if 0
+    if (xen_enabled()) {
+        /* We need to check if the requested address is in the RAM
+         * because we don't want to map the entire memory in QEMU.
+         * In that case just map until the end of the page.
+         */
+        if (block->offset == 0) {
+            return xen_map_cache(addr, 0, 0);
+        } else if (block->host == NULL) {
+            block->host =
+                xen_map_cache(block->offset, block->length, 1);
         }
     }
-
-        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
-        abort();
-
-    return NULL;
-    }
+#endif
+    return block->host + (addr - block->offset);
+}
 
 /* Return a host pointer to ram allocated with qemu_ram_alloc.
  * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
@@ -2394,10 +1328,10 @@
 {
     RAMBlock *block;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (addr - block->offset < block->length) {
-    return block->host + (addr - block->offset);
-}
+            return block->host + (addr - block->offset);
+        }
     }
 
     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
@@ -2406,18 +1340,38 @@
     return NULL;
 }
 
+/* Some of the softmmu routines need to translate from a host pointer
+   (typically a TLB entry) back to a ram offset.  */
 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
 {
     RAMBlock *block;
     uint8_t *host = ptr;
+#if 0
+    if (xen_enabled()) {
+        *ram_addr = xen_ram_addr_from_mapcache(ptr);
+        return qemu_get_ram_block(*ram_addr)->mr;
+    }
+#endif
+    block = ram_list.mru_block;
+    if (block && block->host && host - block->host < block->length) {
+        goto found;
+    }
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        /* This case append when the block is not mapped. */
+        if (block->host == NULL) {
+            continue;
+        }
         if (host - block->host < block->length) {
-            *ram_addr = block->offset + (host - block->host);
-            return 0;
+            goto found;
+        }
     }
-    }
+
     return -1;
+
+found:
+    *ram_addr = block->offset + (host - block->host);
+    return 0;
 }
 
 /* Some of the softmmu routines need to translate from a host pointer
@@ -2439,7 +1393,7 @@
     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
 #endif
 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 0, 0, 0, 1);
+    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
 #endif
     return 0;
 }
@@ -2450,7 +1404,7 @@
     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
 #endif
 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 0, 0, 0, 2);
+    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
 #endif
     return 0;
 }
@@ -2461,7 +1415,7 @@
     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
 #endif
 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 0, 0, 0, 4);
+    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
 #endif
     return 0;
 }
@@ -2472,7 +1426,7 @@
     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
 #endif
 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 1, 0, 0, 1);
+    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
 #endif
 }
 
@@ -2482,7 +1436,7 @@
     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
 #endif
 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 1, 0, 0, 2);
+    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
 #endif
 }
 
@@ -2492,7 +1446,7 @@
     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
 #endif
 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 1, 0, 0, 4);
+    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
 #endif
 }
 
@@ -2515,7 +1469,7 @@
     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
 #if !defined(CONFIG_USER_ONLY)
-        tb_invalidate_phys_page_fast(ram_addr, 1);
+        tb_invalidate_phys_page_fast0(ram_addr, 1);
         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
 #endif
     }
@@ -2535,7 +1489,7 @@
     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
 #if !defined(CONFIG_USER_ONLY)
-        tb_invalidate_phys_page_fast(ram_addr, 2);
+        tb_invalidate_phys_page_fast0(ram_addr, 2);
         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
 #endif
     }
@@ -2555,7 +1509,7 @@
     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
 #if !defined(CONFIG_USER_ONLY)
-        tb_invalidate_phys_page_fast(ram_addr, 4);
+        tb_invalidate_phys_page_fast0(ram_addr, 4);
         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
 #endif
     }
@@ -2580,12 +1534,23 @@
     notdirty_mem_writel,
 };
 
+static void tb_check_watchpoint(CPUArchState* env)
+{
+    TranslationBlock *tb = tb_find_pc(env->mem_io_pc);
+    if (!tb) {
+        cpu_abort(env, "check_watchpoint: could not find TB for "
+                  "pc=%p", (void *)env->mem_io_pc);
+    }
+    cpu_restore_state(env, env->mem_io_pc);
+    tb_phys_invalidate(tb, -1);
+}
+
 /* Generate a debug exception if a watchpoint has been hit.  */
 static void check_watchpoint(int offset, int len_mask, int flags)
 {
-    CPUArchState *env = cpu_single_env;
+    CPUState *cpu = current_cpu;
+    CPUArchState *env = cpu->env_ptr;
     target_ulong pc, cs_base;
-    TranslationBlock *tb;
     target_ulong vaddr;
     CPUWatchpoint *wp;
     int cpu_flags;
@@ -2594,7 +1559,7 @@
         /* We re-entered the check after replacing the TB. Now raise
          * the debug interrupt so that is will trigger after the
          * current instruction. */
-        cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
+        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
         return;
     }
     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
@@ -2604,13 +1569,7 @@
             wp->flags |= BP_WATCHPOINT_HIT;
             if (!env->watchpoint_hit) {
                 env->watchpoint_hit = wp;
-                tb = tb_find_pc(env->mem_io_pc);
-                if (!tb) {
-                    cpu_abort(env, "check_watchpoint: could not find TB for "
-                              "pc=%p", (void *)env->mem_io_pc);
-                }
-                cpu_restore_state(tb, env, env->mem_io_pc);
-                tb_phys_invalidate(tb, -1);
+                tb_check_watchpoint(env);
                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
                     env->exception_index = EXCP_DEBUG;
                     cpu_loop_exit(env);
@@ -2795,13 +1754,13 @@
     memory >>= IO_MEM_SHIFT;
     for (; idx <= eidx; idx++) {
         for (i = 0; i < 4; i++) {
-            if (io_mem_read[memory][i]) {
-                mmio->mem_read[idx][i] = &io_mem_read[memory][i];
+            if (_io_mem_read[memory][i]) {
+                mmio->mem_read[idx][i] = &_io_mem_read[memory][i];
                 mmio->opaque[idx][0][i] = io_mem_opaque[memory];
                 mmio->region_offset[idx][0][i] = region_offset;
             }
-            if (io_mem_write[memory][i]) {
-                mmio->mem_write[idx][i] = &io_mem_write[memory][i];
+            if (_io_mem_write[memory][i]) {
+                mmio->mem_write[idx][i] = &_io_mem_write[memory][i];
                 mmio->opaque[idx][1][i] = io_mem_opaque[memory];
                 mmio->region_offset[idx][1][i] = region_offset;
             }
@@ -2872,8 +1831,8 @@
     for(i = 0;i < 3; i++) {
         if (!mem_read[i] || !mem_write[i])
             subwidth = IO_MEM_SUBWIDTH;
-        io_mem_read[io_index][i] = mem_read[i];
-        io_mem_write[io_index][i] = mem_write[i];
+        _io_mem_read[io_index][i] = mem_read[i];
+        _io_mem_write[io_index][i] = mem_write[i];
     }
     io_mem_opaque[io_index] = opaque;
     return (io_index << IO_MEM_SHIFT) | subwidth;
@@ -2892,8 +1851,8 @@
     int io_index = io_table_address >> IO_MEM_SHIFT;
 
     for (i=0;i < 3; i++) {
-        io_mem_read[io_index][i] = unassigned_mem_read[i];
-        io_mem_write[io_index][i] = unassigned_mem_write[i];
+        _io_mem_read[io_index][i] = unassigned_mem_read[i];
+        _io_mem_write[io_index][i] = unassigned_mem_write[i];
     }
     io_mem_opaque[io_index] = NULL;
     io_mem_used[io_index] = 0;
@@ -2977,7 +1936,7 @@
     uint8_t *ptr;
     uint32_t val;
     hwaddr page;
-    unsigned long pd;
+    ram_addr_t pd;
     uint8_t* buf8 = (uint8_t*)buf;
     PhysPageDesc *p;
 
@@ -3004,21 +1963,21 @@
                 if (l >= 4 && ((addr1 & 3) == 0)) {
                     /* 32 bit write access */
                     val = ldl_p(buf8);
-                    io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
+                    io_mem_write(io_index, addr1, val, 4);
                     l = 4;
                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
                     /* 16 bit write access */
                     val = lduw_p(buf8);
-                    io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
+                    io_mem_write(io_index, addr1, val, 2);
                     l = 2;
                 } else {
                     /* 8 bit write access */
                     val = ldub_p(buf8);
-                    io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
+                    io_mem_write(io_index, addr1, val, 1);
                     l = 1;
                 }
             } else {
-                unsigned long addr1;
+                ram_addr_t addr1;
                 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
                 /* RAM case */
                 ptr = qemu_get_ram_ptr(addr1);
@@ -3035,17 +1994,17 @@
                     addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
                 if (l >= 4 && ((addr1 & 3) == 0)) {
                     /* 32 bit read access */
-                    val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
+                    val = io_mem_read(io_index, addr1, 4);
                     stl_p(buf8, val);
                     l = 4;
                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
                     /* 16 bit read access */
-                    val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
+                    val = io_mem_read(io_index, addr1, 2);
                     stw_p(buf8, val);
                     l = 2;
                 } else {
                     /* 8 bit read access */
-                    val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
+                    val = io_mem_read(io_index, addr1, 1);
                     stb_p(buf8, val);
                     l = 1;
                 }
@@ -3130,7 +2089,7 @@
     return client;
 }
 
-void cpu_unregister_map_client(void *_client)
+static void cpu_unregister_map_client(void *_client)
 {
     MapClient *client = (MapClient *)_client;
 
@@ -3145,7 +2104,7 @@
     while (!QLIST_EMPTY(&map_client_list)) {
         client = QLIST_FIRST(&map_client_list);
         client->callback(client->opaque);
-        QLIST_REMOVE(client, link);
+        cpu_unregister_map_client(client);
     }
 }
 
@@ -3190,7 +2149,7 @@
             bounce.addr = addr;
             bounce.len = l;
             if (!is_write) {
-                cpu_physical_memory_rw(addr, bounce.buffer, l, 0);
+                cpu_physical_memory_read(addr, bounce.buffer, l);
             }
             ptr = bounce.buffer;
         } else {
@@ -3242,7 +2201,8 @@
 }
 
 /* warning: addr must be aligned */
-uint32_t ldl_phys(hwaddr addr)
+static inline uint32_t ldl_phys_internal(hwaddr addr,
+                                         enum device_endian endian)
 {
     int io_index;
     uint8_t *ptr;
@@ -3263,18 +2223,53 @@
         io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
-        val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
+        val = io_mem_read(io_index, addr, 4);
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap32(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap32(val);
+        }
+#endif
     } else {
         /* RAM case */
         ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldl_p(ptr);
+        switch (endian) {
+            case DEVICE_LITTLE_ENDIAN:
+                val = ldl_le_p(ptr);
+                break;
+            case DEVICE_BIG_ENDIAN:
+                val = ldl_be_p(ptr);
+                break;
+            default:
+                val = ldl_p(ptr);
+                break;
+        }
     }
     return val;
 }
 
+uint32_t ldl_phys(hwaddr addr)
+{
+    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
+}
+
+uint32_t ldl_le_phys(hwaddr addr)
+{
+    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
+}
+
+uint32_t ldl_be_phys(hwaddr addr)
+{
+    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
+}
+
 /* warning: addr must be aligned */
-uint64_t ldq_phys(hwaddr addr)
+static inline uint64_t ldq_phys_internal(hwaddr addr,
+                                         enum device_endian endian)
 {
     int io_index;
     uint8_t *ptr;
@@ -3295,22 +2290,50 @@
         io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+
+        /* XXX This is broken when device endian != cpu endian.
+               Fix and add "endian" variable check */
 #ifdef TARGET_WORDS_BIGENDIAN
-        val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
-        val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
+        val = (uint64_t)io_mem_read(io_index, addr, 4) << 32;
+        val |= io_mem_read(io_index, addr + 4, 4);
 #else
-        val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
-        val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
+        val = io_mem_read(io_index, addr, 4);
+        val |= (uint64_t)io_mem_read(io_index, addr + 4, 4) << 32;
 #endif
     } else {
         /* RAM case */
         ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldq_p(ptr);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            val = ldq_le_p(ptr);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            val = ldq_be_p(ptr);
+            break;
+        default:
+            val = ldq_p(ptr);
+            break;
+        }
     }
     return val;
 }
 
+uint64_t ldq_phys(hwaddr addr)
+{
+    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
+}
+
+uint64_t ldq_le_phys(hwaddr addr)
+{
+    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
+}
+
+uint64_t ldq_be_phys(hwaddr addr)
+{
+    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
+}
+
 /* XXX: optimize */
 uint32_t ldub_phys(hwaddr addr)
 {
@@ -3320,11 +2343,70 @@
 }
 
 /* XXX: optimize */
+static inline uint32_t lduw_phys_internal(hwaddr addr,
+                                          enum device_endian endian)
+{
+    int io_index;
+    uint8_t *ptr;
+    uint64_t val;
+    unsigned long pd;
+    PhysPageDesc *p;
+
+    p = phys_page_find(addr >> TARGET_PAGE_BITS);
+    if (!p) {
+        pd = IO_MEM_UNASSIGNED;
+    } else {
+        pd = p->phys_offset;
+    }
+
+    if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
+        !(pd & IO_MEM_ROMD)) {
+        /* I/O case */
+        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+        if (p)
+            addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+        val = io_mem_read(io_index, addr, 2);
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap16(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap16(val);
+        }
+#endif
+    } else {
+        /* RAM case */
+        ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
+            (addr & ~TARGET_PAGE_MASK);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            val = lduw_le_p(ptr);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            val = lduw_be_p(ptr);
+            break;
+        default:
+            val = lduw_p(ptr);
+            break;
+        }
+    }
+    return val;
+}
+
 uint32_t lduw_phys(hwaddr addr)
 {
-    uint16_t val;
-    cpu_physical_memory_read(addr, (uint8_t *)&val, 2);
-    return tswap16(val);
+    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
+}
+
+uint32_t lduw_le_phys(hwaddr addr)
+{
+    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
+}
+
+uint32_t lduw_be_phys(hwaddr addr)
+{
+    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
 }
 
 /* warning: addr must be aligned. The ram page is not masked as dirty
@@ -3348,7 +2430,7 @@
         io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
-        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+        io_mem_write(io_index, addr, val, 4);
     } else {
         unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
         ptr = qemu_get_ram_ptr(addr1);
@@ -3385,11 +2467,11 @@
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
 #ifdef TARGET_WORDS_BIGENDIAN
-        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
-        io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
+        io_mem_write(io_index, addr, val >> 32, 4);
+        io_mem_write(io_index, addr + 4, val, 4);
 #else
-        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
-        io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
+        io_mem_write(io_index, addr, val, 4);
+        io_mem_write(io_index, addr + 4, val >> 32, 4);
 #endif
     } else {
         ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
@@ -3399,7 +2481,8 @@
 }
 
 /* warning: addr must be aligned */
-void stl_phys(hwaddr addr, uint32_t val)
+static inline void stl_phys_internal(hwaddr addr, uint32_t val,
+                                     enum device_endian endian)
 {
     int io_index;
     uint8_t *ptr;
@@ -3417,17 +2500,51 @@
         io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
-        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap32(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap32(val);
+        }
+#endif
+        io_mem_write(io_index, addr, val, 4);
     } else {
         unsigned long addr1;
         addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
         /* RAM case */
         ptr = qemu_get_ram_ptr(addr1);
-        stl_p(ptr, val);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            stl_le_p(ptr, val);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            stl_be_p(ptr, val);
+            break;
+        default:
+            stl_p(ptr, val);
+            break;
+        }
         invalidate_and_set_dirty(addr1, 4);
     }
 }
 
+void stl_phys(hwaddr addr, uint32_t val)
+{
+    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
+}
+
+void stl_le_phys(hwaddr addr, uint32_t val)
+{
+    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
+}
+
+void stl_be_phys(hwaddr addr, uint32_t val)
+{
+    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
+}
+
 /* XXX: optimize */
 void stb_phys(hwaddr addr, uint32_t val)
 {
@@ -3436,29 +2553,107 @@
 }
 
 /* XXX: optimize */
+static inline void stw_phys_internal(hwaddr addr, uint32_t val,
+                                     enum device_endian endian)
+{
+    int io_index;
+    uint8_t *ptr;
+    unsigned long pd;
+    PhysPageDesc *p;
+
+    p = phys_page_find(addr >> TARGET_PAGE_BITS);
+    if (!p) {
+        pd = IO_MEM_UNASSIGNED;
+    } else {
+        pd = p->phys_offset;
+    }
+
+    if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
+        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+        if (p)
+            addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap16(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap16(val);
+        }
+#endif
+        io_mem_write(io_index, addr, val, 2);
+    } else {
+        unsigned long addr1;
+        addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+        /* RAM case */
+        ptr = qemu_get_ram_ptr(addr1);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            stw_le_p(ptr, val);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            stw_be_p(ptr, val);
+            break;
+        default:
+            stw_p(ptr, val);
+            break;
+        }
+        if (!cpu_physical_memory_is_dirty(addr1)) {
+            /* invalidate code */
+            tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
+            /* set dirty bit */
+            cpu_physical_memory_set_dirty_flags(addr1,
+                (0xff & ~CODE_DIRTY_FLAG));
+        }
+    }
+}
+
 void stw_phys(hwaddr addr, uint32_t val)
 {
-    uint16_t v = tswap16(val);
-    cpu_physical_memory_write(addr, (const uint8_t *)&v, 2);
+    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
+}
+
+void stw_le_phys(hwaddr addr, uint32_t val)
+{
+    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
+}
+
+void stw_be_phys(hwaddr addr, uint32_t val)
+{
+    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
 }
 
 /* XXX: optimize */
 void stq_phys(hwaddr addr, uint64_t val)
 {
     val = tswap64(val);
-    cpu_physical_memory_write(addr, (const uint8_t *)&val, 8);
+    cpu_physical_memory_write(addr, &val, 8);
+}
+
+
+void stq_le_phys(hwaddr addr, uint64_t val)
+{
+    val = cpu_to_le64(val);
+    cpu_physical_memory_write(addr, &val, 8);
+}
+
+void stq_be_phys(hwaddr addr, uint64_t val)
+{
+    val = cpu_to_be64(val);
+    cpu_physical_memory_write(addr, &val, 8);
 }
 
 #endif
 
 /* virtual memory access for debug (includes writing to ROM) */
-int cpu_memory_rw_debug(CPUOldState *env, target_ulong addr,
+int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
                         void *buf, int len, int is_write)
 {
     int l;
     hwaddr phys_addr;
     target_ulong page;
     uint8_t* buf8 = (uint8_t*)buf;
+    CPUArchState *env = cpu->env_ptr;
 
     while (len > 0) {
         page = addr & TARGET_PAGE_MASK;
@@ -3482,118 +2677,3 @@
     }
     return 0;
 }
-
-/* in deterministic execution mode, instructions doing device I/Os
-   must be at the end of the TB */
-void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
-{
-    TranslationBlock *tb;
-    uint32_t n, cflags;
-    target_ulong pc, cs_base;
-    uint64_t flags;
-
-    tb = tb_find_pc(retaddr);
-    if (!tb) {
-        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
-                  (void*)retaddr);
-    }
-    n = env->icount_decr.u16.low + tb->icount;
-    cpu_restore_state(tb, env, retaddr);
-    /* Calculate how many instructions had been executed before the fault
-       occurred.  */
-    n = n - env->icount_decr.u16.low;
-    /* Generate a new TB ending on the I/O insn.  */
-    n++;
-    /* On MIPS and SH, delay slot instructions can only be restarted if
-       they were already the first instruction in the TB.  If this is not
-       the first instruction in a TB then re-execute the preceding
-       branch.  */
-#if defined(TARGET_MIPS)
-    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
-        env->active_tc.PC -= 4;
-        env->icount_decr.u16.low++;
-        env->hflags &= ~MIPS_HFLAG_BMASK;
-    }
-#elif defined(TARGET_SH4)
-    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
-            && n > 1) {
-        env->pc -= 2;
-        env->icount_decr.u16.low++;
-        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
-    }
-#endif
-    /* This should never happen.  */
-    if (n > CF_COUNT_MASK)
-        cpu_abort(env, "TB too big during recompile");
-
-    cflags = n | CF_LAST_IO;
-    pc = tb->pc;
-    cs_base = tb->cs_base;
-    flags = tb->flags;
-    tb_phys_invalidate(tb, -1);
-    /* FIXME: In theory this could raise an exception.  In practice
-       we have already translated the block once so it's probably ok.  */
-    tb_gen_code(env, pc, cs_base, flags, cflags);
-    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
-       the first in the TB) then we end up generating a whole new TB and
-       repeating the fault, which is horribly inefficient.
-       Better would be to execute just this insn uncached, or generate a
-       second new TB.  */
-    cpu_resume_from_signal(env, NULL);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-
-void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
-{
-    int i, target_code_size, max_target_code_size;
-    int direct_jmp_count, direct_jmp2_count, cross_page;
-    TranslationBlock *tb;
-
-    target_code_size = 0;
-    max_target_code_size = 0;
-    cross_page = 0;
-    direct_jmp_count = 0;
-    direct_jmp2_count = 0;
-    for(i = 0; i < nb_tbs; i++) {
-        tb = &tbs[i];
-        target_code_size += tb->size;
-        if (tb->size > max_target_code_size)
-            max_target_code_size = tb->size;
-        if (tb->page_addr[1] != -1)
-            cross_page++;
-        if (tb->tb_next_offset[0] != 0xffff) {
-            direct_jmp_count++;
-            if (tb->tb_next_offset[1] != 0xffff) {
-                direct_jmp2_count++;
-            }
-        }
-    }
-    /* XXX: avoid using doubles ? */
-    cpu_fprintf(f, "Translation buffer state:\n");
-    cpu_fprintf(f, "gen code size       %td/%ld\n",
-                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
-    cpu_fprintf(f, "TB count            %d/%d\n",
-                nb_tbs, code_gen_max_blocks);
-    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
-                nb_tbs ? target_code_size / nb_tbs : 0,
-                max_target_code_size);
-    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
-                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
-                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
-    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
-            cross_page,
-            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
-    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
-                direct_jmp_count,
-                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
-                direct_jmp2_count,
-                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
-    cpu_fprintf(f, "\nStatistics:\n");
-    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
-    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
-    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
-    tcg_dump_info(f, cpu_fprintf);
-}
-
-#endif
diff --git a/gdbstub.c b/gdbstub.c
index cea89dc..c684d9a 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -274,9 +274,9 @@
     RS_SYSCALL,
 };
 typedef struct GDBState {
-    CPUOldState *c_cpu; /* current CPU for step/continue ops */
-    CPUOldState *g_cpu; /* current CPU for other ops */
-    CPUOldState *query_cpu; /* for q{f|s}ThreadInfo */
+    CPUArchState *c_cpu; /* current CPU for step/continue ops */
+    CPUArchState *g_cpu; /* current CPU for other ops */
+    CPUArchState *query_cpu; /* for q{f|s}ThreadInfo */
     enum RSState state; /* parsing state */
     char line_buf[MAX_PACKET_LENGTH];
     int line_buf_index;
@@ -512,7 +512,7 @@
 
 #define NUM_CORE_REGS (CPU_NB_REGS * 2 + 25)
 
-static int cpu_gdb_read_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     if (n < CPU_NB_REGS) {
         GET_REGL(env->regs[gpr_map[n]]);
@@ -559,7 +559,7 @@
     return 0;
 }
 
-static int cpu_gdb_write_register(CPUOldState *env, uint8_t *mem_buf, int i)
+static int cpu_gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int i)
 {
     uint32_t tmp;
 
@@ -637,7 +637,7 @@
 #define GDB_CORE_XML "power-core.xml"
 #endif
 
-static int cpu_gdb_read_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     if (n < 32) {
         /* gprs */
@@ -674,7 +674,7 @@
     return 0;
 }
 
-static int cpu_gdb_write_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     if (n < 32) {
         /* gprs */
@@ -735,7 +735,7 @@
 #define GET_REGA(val) GET_REGL(val)
 #endif
 
-static int cpu_gdb_read_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     if (n < 8) {
         /* g0..g7 */
@@ -790,7 +790,7 @@
     return 0;
 }
 
-static int cpu_gdb_write_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
 #if defined(TARGET_ABI32)
     abi_ulong tmp;
@@ -865,7 +865,7 @@
 #define NUM_CORE_REGS 26
 #define GDB_CORE_XML "arm-core.xml"
 
-static int cpu_gdb_read_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     if (n < 16) {
         /* Core integer register.  */
@@ -892,7 +892,7 @@
     return 0;
 }
 
-static int cpu_gdb_write_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     uint32_t tmp;
 
@@ -935,7 +935,7 @@
 
 #define GDB_CORE_XML "cf-core.xml"
 
-static int cpu_gdb_read_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     if (n < 8) {
         /* D0-D7 */
@@ -954,7 +954,7 @@
     return 0;
 }
 
-static int cpu_gdb_write_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     uint32_t tmp;
 
@@ -979,7 +979,7 @@
 
 #define NUM_CORE_REGS 73
 
-static int cpu_gdb_read_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     if (n < 32) {
         GET_REGL(env->active_tc.gpr[n]);
@@ -1025,7 +1025,7 @@
 #define RESTORE_ROUNDING_MODE \
     set_float_rounding_mode(ieee_rm[env->active_fpu.fcr31 & 3], &env->active_fpu.fp_status)
 
-static int cpu_gdb_write_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     target_ulong tmp;
 
@@ -1081,7 +1081,7 @@
 
 #define NUM_CORE_REGS 59
 
-static int cpu_gdb_read_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     if (n < 8) {
         if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) {
@@ -1115,7 +1115,7 @@
     return 0;
 }
 
-static int cpu_gdb_write_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     uint32_t tmp;
 
@@ -1161,7 +1161,7 @@
 
 #define NUM_CORE_REGS (32 + 5)
 
-static int cpu_gdb_read_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     if (n < 32) {
 	GET_REG32(env->regs[n]);
@@ -1171,7 +1171,7 @@
     return 0;
 }
 
-static int cpu_gdb_write_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     uint32_t tmp;
 
@@ -1191,7 +1191,7 @@
 
 #define NUM_CORE_REGS 49
 
-static int cpu_gdb_read_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     uint8_t srs;
 
@@ -1218,7 +1218,7 @@
     return 0;
 }
 
-static int cpu_gdb_write_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     uint32_t tmp;
 
@@ -1251,7 +1251,7 @@
 
 #define NUM_CORE_REGS 65
 
-static int cpu_gdb_read_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     if (n < 31) {
        GET_REGL(env->ir[n]);
@@ -1278,7 +1278,7 @@
     return 0;
 }
 
-static int cpu_gdb_write_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     target_ulong tmp;
     tmp = ldtul_p(mem_buf);
@@ -1301,12 +1301,12 @@
 
 #define NUM_CORE_REGS 0
 
-static int cpu_gdb_read_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     return 0;
 }
 
-static int cpu_gdb_write_register(CPUOldState *env, uint8_t *mem_buf, int n)
+static int cpu_gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int n)
 {
     return 0;
 }
@@ -1363,7 +1363,7 @@
                      "<xi:include href=\"%s\"/>",
                      GDB_CORE_XML);
 
-            for (r = first_cpu->gdb_regs; r; r = r->next) {
+            for (r = QTAILQ_FIRST(&cpus)->gdb_regs; r; r = r->next) {
                 pstrcat(target_xml, sizeof(target_xml), "<xi:include href=\"");
                 pstrcat(target_xml, sizeof(target_xml), r->xml);
                 pstrcat(target_xml, sizeof(target_xml), "\"/>");
@@ -1381,14 +1381,15 @@
 }
 #endif
 
-static int gdb_read_register(CPUOldState *env, uint8_t *mem_buf, int reg)
+static int gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int reg)
 {
     GDBRegisterState *r;
+    CPUState *cpu = ENV_GET_CPU(env);
 
     if (reg < NUM_CORE_REGS)
         return cpu_gdb_read_register(env, mem_buf, reg);
 
-    for (r = env->gdb_regs; r; r = r->next) {
+    for (r = cpu->gdb_regs; r; r = r->next) {
         if (r->base_reg <= reg && reg < r->base_reg + r->num_regs) {
             return r->get_reg(env, mem_buf, reg - r->base_reg);
         }
@@ -1396,14 +1397,15 @@
     return 0;
 }
 
-static int gdb_write_register(CPUOldState *env, uint8_t *mem_buf, int reg)
+static int gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int reg)
 {
     GDBRegisterState *r;
+    CPUState *cpu = ENV_GET_CPU(env);
 
     if (reg < NUM_CORE_REGS)
         return cpu_gdb_write_register(env, mem_buf, reg);
 
-    for (r = env->gdb_regs; r; r = r->next) {
+    for (r = cpu->gdb_regs; r; r = r->next) {
         if (r->base_reg <= reg && reg < r->base_reg + r->num_regs) {
             return r->set_reg(env, mem_buf, reg - r->base_reg);
         }
@@ -1417,7 +1419,7 @@
    gdb reading a CPU register, and set_reg is gdb modifying a CPU register.
  */
 
-void gdb_register_coprocessor(CPUOldState * env,
+void gdb_register_coprocessor(CPUState *cpu,
                              gdb_reg_cb get_reg, gdb_reg_cb set_reg,
                              int num_regs, const char *xml, int g_pos)
 {
@@ -1431,7 +1433,7 @@
     s->get_reg = get_reg;
     s->set_reg = set_reg;
     s->xml = xml;
-    p = &env->gdb_regs;
+    p = &cpu->gdb_regs;
     while (*p) {
         /* Check for duplicates.  */
         if (strcmp((*p)->xml, xml) == 0)
@@ -1461,17 +1463,17 @@
 
 static int gdb_breakpoint_insert(target_ulong addr, target_ulong len, int type)
 {
-    CPUOldState *env;
+    CPUState *cpu;
     int err = 0;
 
     if (kvm_enabled())
-        return kvm_insert_breakpoint(gdbserver_state->c_cpu, addr, len, type);
+        return kvm_insert_breakpoint(ENV_GET_CPU(gdbserver_state->c_cpu), addr, len, type);
 
     switch (type) {
     case GDB_BREAKPOINT_SW:
     case GDB_BREAKPOINT_HW:
-        for (env = first_cpu; env != NULL; env = env->next_cpu) {
-            err = cpu_breakpoint_insert(env, addr, BP_GDB, NULL);
+        CPU_FOREACH(cpu) {
+            err = cpu_breakpoint_insert(cpu->env_ptr, addr, BP_GDB, NULL);
             if (err)
                 break;
         }
@@ -1480,8 +1482,8 @@
     case GDB_WATCHPOINT_WRITE:
     case GDB_WATCHPOINT_READ:
     case GDB_WATCHPOINT_ACCESS:
-        for (env = first_cpu; env != NULL; env = env->next_cpu) {
-            err = cpu_watchpoint_insert(env, addr, len, xlat_gdb_type[type],
+        CPU_FOREACH(cpu) {
+            err = cpu_watchpoint_insert(cpu->env_ptr, addr, len, xlat_gdb_type[type],
                                         NULL);
             if (err)
                 break;
@@ -1495,17 +1497,17 @@
 
 static int gdb_breakpoint_remove(target_ulong addr, target_ulong len, int type)
 {
-    CPUOldState *env;
+    CPUState *cpu;
     int err = 0;
 
     if (kvm_enabled())
-        return kvm_remove_breakpoint(gdbserver_state->c_cpu, addr, len, type);
+        return kvm_remove_breakpoint(ENV_GET_CPU(gdbserver_state->c_cpu), addr, len, type);
 
     switch (type) {
     case GDB_BREAKPOINT_SW:
     case GDB_BREAKPOINT_HW:
-        for (env = first_cpu; env != NULL; env = env->next_cpu) {
-            err = cpu_breakpoint_remove(env, addr, BP_GDB);
+        CPU_FOREACH(cpu) {
+            err = cpu_breakpoint_remove(cpu->env_ptr, addr, BP_GDB);
             if (err)
                 break;
         }
@@ -1514,8 +1516,8 @@
     case GDB_WATCHPOINT_WRITE:
     case GDB_WATCHPOINT_READ:
     case GDB_WATCHPOINT_ACCESS:
-        for (env = first_cpu; env != NULL; env = env->next_cpu) {
-            err = cpu_watchpoint_remove(env, addr, len, xlat_gdb_type[type]);
+        CPU_FOREACH(cpu) {
+            err = cpu_watchpoint_remove(cpu->env_ptr, addr, len, xlat_gdb_type[type]);
             if (err)
                 break;
         }
@@ -1528,17 +1530,17 @@
 
 static void gdb_breakpoint_remove_all(void)
 {
-    CPUOldState *env;
+    CPUState *cpu;
 
     if (kvm_enabled()) {
-        kvm_remove_all_breakpoints(gdbserver_state->c_cpu);
+        kvm_remove_all_breakpoints(ENV_GET_CPU(gdbserver_state->c_cpu));
         return;
     }
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        cpu_breakpoint_remove_all(env, BP_GDB);
+    CPU_FOREACH(cpu) {
+        cpu_breakpoint_remove_all(cpu->env_ptr, BP_GDB);
 #ifndef CONFIG_USER_ONLY
-        cpu_watchpoint_remove_all(env, BP_GDB);
+        cpu_watchpoint_remove_all(cpu->env_ptr, BP_GDB);
 #endif
     }
 }
@@ -1547,7 +1549,7 @@
 {
 #if defined(TARGET_I386)
     s->c_cpu->eip = pc;
-    cpu_synchronize_state(s->c_cpu, 1);
+    cpu_synchronize_state(ENV_GET_CPU(s->c_cpu), 1);
 #elif defined (TARGET_PPC)
     s->c_cpu->nip = pc;
 #elif defined (TARGET_SPARC)
@@ -1568,22 +1570,22 @@
 #endif
 }
 
-static inline int gdb_id(CPUOldState *env)
+static inline int gdb_id(CPUState *cpu)
 {
 #if defined(CONFIG_USER_ONLY) && defined(USE_NPTL)
-    return env->host_tid;
+    return cpu->host_tid;
 #else
-    return env->cpu_index + 1;
+    return cpu->cpu_index + 1;
 #endif
 }
 
-static CPUOldState *find_cpu(uint32_t thread_id)
+static CPUArchState *find_cpu(uint32_t thread_id)
 {
-    CPUOldState *env;
+    CPUState *cpu;
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        if (gdb_id(env) == thread_id) {
-            return env;
+    CPU_FOREACH(cpu) {
+        if (gdb_id(cpu) == thread_id) {
+            return cpu->env_ptr;
         }
     }
 
@@ -1592,7 +1594,7 @@
 
 static int gdb_handle_packet(GDBState *s, const char *line_buf)
 {
-    CPUOldState *env;
+    CPUArchState *env;
     const char *p;
     uint32_t thread;
     int ch, reg_size, type, res;
@@ -1610,7 +1612,7 @@
     case '?':
         /* TODO: Make this return the correct value for user-mode.  */
         snprintf(buf, sizeof(buf), "T%02xthread:%02x;", GDB_SIGNAL_TRAP,
-                 gdb_id(s->c_cpu));
+                 gdb_id(ENV_GET_CPU(s->c_cpu)));
         put_packet(s, buf);
         /* Remove all the breakpoints when this query is issued,
          * because gdb is doing and initial connect and the state
@@ -1647,7 +1649,7 @@
             addr = strtoull(p, (char **)&p, 16);
             gdb_set_cpu_pc(s, addr);
         }
-        cpu_single_step(s->c_cpu, sstep_flags);
+        cpu_single_step(ENV_GET_CPU(s->c_cpu), sstep_flags);
         gdb_continue(s);
 	return RS_IDLE;
     case 'F':
@@ -1666,7 +1668,7 @@
                 p++;
             type = *p;
             if (gdb_current_syscall_cb)
-                gdb_current_syscall_cb(s->c_cpu, ret, err);
+                gdb_current_syscall_cb(ENV_GET_CPU(s->c_cpu), ret, err);
             if (type == 'C') {
                 put_packet(s, "T02");
             } else {
@@ -1675,7 +1677,7 @@
         }
         break;
     case 'g':
-        cpu_synchronize_state(s->g_cpu, 0);
+        cpu_synchronize_state(ENV_GET_CPU(s->g_cpu), 0);
         len = 0;
         for (addr = 0; addr < num_g_regs; addr++) {
             reg_size = gdb_read_register(s->g_cpu, mem_buf + len, addr);
@@ -1693,7 +1695,7 @@
             len -= reg_size;
             registers += reg_size;
         }
-        cpu_synchronize_state(s->g_cpu, 1);
+        cpu_synchronize_state(ENV_GET_CPU(s->g_cpu), 1);
         put_packet(s, "OK");
         break;
     case 'm':
@@ -1701,7 +1703,7 @@
         if (*p == ',')
             p++;
         len = strtoull(p, NULL, 16);
-        if (cpu_memory_rw_debug(s->g_cpu, addr, mem_buf, len, 0) != 0) {
+        if (cpu_memory_rw_debug(ENV_GET_CPU(s->g_cpu), addr, mem_buf, len, 0) != 0) {
             put_packet (s, "E14");
         } else {
             memtohex(buf, mem_buf, len);
@@ -1716,7 +1718,7 @@
         if (*p == ':')
             p++;
         hextomem(mem_buf, p, len);
-        if (cpu_memory_rw_debug(s->g_cpu, addr, mem_buf, len, 1) != 0)
+        if (cpu_memory_rw_debug(ENV_GET_CPU(s->g_cpu), addr, mem_buf, len, 1) != 0)
             put_packet(s, "E14");
         else
             put_packet(s, "OK");
@@ -1834,14 +1836,14 @@
             put_packet(s, "QC1");
             break;
         } else if (strcmp(p,"fThreadInfo") == 0) {
-            s->query_cpu = first_cpu;
+            s->query_cpu = QTAILQ_FIRST(&cpus)->env_ptr;
             goto report_cpuinfo;
         } else if (strcmp(p,"sThreadInfo") == 0) {
         report_cpuinfo:
             if (s->query_cpu) {
-                snprintf(buf, sizeof(buf), "m%x", gdb_id(s->query_cpu));
+                snprintf(buf, sizeof(buf), "m%x", gdb_id(ENV_GET_CPU(s->query_cpu)));
                 put_packet(s, buf);
-                s->query_cpu = s->query_cpu->next_cpu;
+                s->query_cpu = QTAILQ_NEXT(ENV_GET_CPU(s->query_cpu), node)->env_ptr;
             } else
                 put_packet(s, "l");
             break;
@@ -1849,10 +1851,10 @@
             thread = strtoull(p+16, (char **)&p, 16);
             env = find_cpu(thread);
             if (env != NULL) {
-                cpu_synchronize_state(env, 0);
+                cpu_synchronize_state(ENV_GET_CPU(env), 0);
                 len = snprintf((char *)mem_buf, sizeof(mem_buf),
-                               "CPU#%d [%s]", env->cpu_index,
-                               env->halted ? "halted " : "running");
+                               "CPU#%d [%s]", ENV_GET_CPU(env)->cpu_index,
+                               ENV_GET_CPU(env)->halted ? "halted " : "running");
                 memtohex(buf, mem_buf, len);
                 put_packet(s, buf);
             }
@@ -1948,17 +1950,18 @@
     return RS_IDLE;
 }
 
-void gdb_set_stop_cpu(CPUOldState *env)
+void gdb_set_stop_cpu(CPUState *cpu)
 {
-    gdbserver_state->c_cpu = env;
-    gdbserver_state->g_cpu = env;
+    gdbserver_state->c_cpu = cpu->env_ptr;
+    gdbserver_state->g_cpu = cpu->env_ptr;
 }
 
 #ifndef CONFIG_USER_ONLY
 static void gdb_vm_state_change(void *opaque, int running, int reason)
 {
     GDBState *s = gdbserver_state;
-    CPUOldState *env = s->c_cpu;
+    CPUArchState *env = s->c_cpu;
+    CPUState *cpu = ENV_GET_CPU(env);
     char buf[256];
     const char *type;
     int ret;
@@ -1968,7 +1971,7 @@
         return;
 
     /* disable single step if it was enable */
-    cpu_single_step(env, 0);
+    cpu_single_step(cpu, 0);
 
     if (reason == EXCP_DEBUG) {
         if (env->watchpoint_hit) {
@@ -1985,7 +1988,7 @@
             }
             snprintf(buf, sizeof(buf),
                      "T%02xthread:%02x;%swatch:" TARGET_FMT_lx ";",
-                     GDB_SIGNAL_TRAP, gdb_id(env), type,
+                     GDB_SIGNAL_TRAP, gdb_id(cpu), type,
                      env->watchpoint_hit->vaddr);
             put_packet(s, buf);
             env->watchpoint_hit = NULL;
@@ -1996,7 +1999,7 @@
     } else {
         ret = GDB_SIGNAL_INT;
     }
-    snprintf(buf, sizeof(buf), "T%02xthread:%02x;", ret, gdb_id(env));
+    snprintf(buf, sizeof(buf), "T%02xthread:%02x;", ret, gdb_id(cpu));
     put_packet(s, buf);
 }
 #endif
@@ -2062,7 +2065,7 @@
 #ifdef CONFIG_USER_ONLY
     gdb_handlesig(s->c_cpu, 0);
 #else
-    cpu_exit(s->c_cpu);
+    cpu_exit(ENV_GET_CPU(s->c_cpu));
 #endif
 }
 
@@ -2157,7 +2160,7 @@
 }
 
 int
-gdb_handlesig (CPUOldState *env, int sig)
+gdb_handlesig (CPUState *cpu, int sig)
 {
   GDBState *s;
   char buf[256];
@@ -2168,8 +2171,8 @@
     return sig;
 
   /* disable single step if it was enabled */
-  cpu_single_step(env, 0);
-  tb_flush(env);
+  cpu_single_step(cpu, 0);
+  tb_flush(cpu->env_ptr);
 
   if (sig != 0)
     {
@@ -2206,7 +2209,7 @@
 }
 
 /* Tell the remote gdb that the process has exited.  */
-void gdb_exit(CPUOldState *env, int code)
+void gdb_exit(CPUState *cpu, int code)
 {
   GDBState *s;
   char buf[4];
@@ -2220,7 +2223,7 @@
 }
 
 /* Tell the remote gdb that the process has exited due to SIG.  */
-void gdb_signalled(CPUOldState *env, int sig)
+void gdb_signalled(CPUState *cpu, int sig)
 {
   GDBState *s;
   char buf[4];
@@ -2256,8 +2259,8 @@
     qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val));
 
     s = g_malloc0(sizeof(GDBState));
-    s->c_cpu = first_cpu;
-    s->g_cpu = first_cpu;
+    s->c_cpu = QTAILQ_FIRST(&cpus);
+    s->g_cpu = QTAILQ_FIRST(&cpus);
     s->fd = fd;
     gdb_has_xml = 0;
 
@@ -2308,7 +2311,7 @@
 }
 
 /* Disable gdb stub for child processes.  */
-void gdbserver_fork(CPUOldState *env)
+void gdbserver_fork(CPUArchState *env)
 {
     GDBState *s = gdbserver_state;
     if (gdbserver_fd < 0 || s->fd < 0)
@@ -2434,8 +2437,8 @@
         mon_chr = s->mon_chr;
         memset(s, 0, sizeof(GDBState));
     }
-    s->c_cpu = first_cpu;
-    s->g_cpu = first_cpu;
+    s->c_cpu = QTAILQ_FIRST(&cpus)->env_ptr;
+    s->g_cpu = s->c_cpu;
     s->chr = chr;
     s->state = chr ? RS_IDLE : RS_INACTIVE;
     s->mon_chr = mon_chr;
diff --git a/hw/android/android_arm.c b/hw/android/android_arm.c
index ff0e569..2ee31db 100644
--- a/hw/android/android_arm.c
+++ b/hw/android/android_arm.c
@@ -22,9 +22,9 @@
 #include "ui/console.h"
 #include "sysemu/blockdev.h"
 #include "hw/android/goldfish/pipe.h"
-#ifdef CONFIG_MEMCHECK
-#include "memcheck/memcheck_api.h"
-#endif  // CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
+#include "android/qemu/memcheck/memcheck_api.h"
+#endif  // CONFIG_ANDROID_MEMCHECK
 
 #include "android/utils/debug.h"
 
@@ -34,8 +34,6 @@
 
 char* audio_input_source = NULL;
 
-void goldfish_memlog_init(uint32_t base);
-
 static struct goldfish_device event0_device = {
     .name = "goldfish_events",
     .id = 0,
@@ -51,14 +49,6 @@
 
 /* Board init.  */
 
-#define TEST_SWITCH 1
-#if TEST_SWITCH
-uint32_t switch_test_write(void *opaque, uint32_t state)
-{
-    goldfish_switch_set_state(opaque, state);
-    return state;
-}
-#endif
 static void android_arm_init_(ram_addr_t ram_size,
     const char *boot_device,
     const char *kernel_filename,
@@ -127,8 +117,6 @@
         }
     }
 
-    goldfish_memlog_init(0xff006000);
-
     goldfish_battery_init(android_hw->hw_battery);
 
     goldfish_add_device_no_io(&event0_device);
@@ -138,22 +126,15 @@
     goldfish_add_device_no_io(&nand_device);
     nand_dev_init(nand_device.base);
 #endif
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
     if (memcheck_enabled) {
         trace_dev_init();
     }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
 
-    pipe_dev_init();
-
-#if TEST_SWITCH
-    {
-        void *sw;
-        sw = goldfish_switch_add("test", NULL, NULL, 0);
-        goldfish_switch_set_state(sw, 1);
-        goldfish_switch_add("test2", switch_test_write, sw, 1);
-    }
-#endif
+    bool newDeviceNaming =
+            (androidHwConfig_getKernelDeviceNaming(android_hw) >= 1);
+    pipe_dev_init(newDeviceNaming);
 
     memset(&info, 0, sizeof info);
     info.ram_size        = ram_size;
@@ -161,6 +142,7 @@
     info.kernel_cmdline  = kernel_cmdline;
     info.initrd_filename = initrd_filename;
     info.nb_cpus         = 1;
+    info.is_linux        = 1;
     info.board_id        = 1441;
 
     arm_load_kernel(env, &info);
diff --git a/hw/android/android_mips.c b/hw/android/android_mips.c
index ed9f42f..4b25660 100644
--- a/hw/android/android_mips.c
+++ b/hw/android/android_mips.c
@@ -21,9 +21,9 @@
 #include "android/globals.h"
 #include "audio/audio.h"
 #include "sysemu/blockdev.h"
-#ifdef CONFIG_MEMCHECK
-#include "memcheck/memcheck_api.h"
-#endif  // CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
+#include "android/qemu/memcheck/memcheck_api.h"
+#endif  // CONFIG_ANDROID_MEMCHECK
 
 #include "android/utils/debug.h"
 
@@ -42,8 +42,6 @@
 
 char* audio_input_source = NULL;
 
-void goldfish_memlog_init(uint32_t base);
-
 static struct goldfish_device event0_device = {
     .name = "goldfish_events",
     .id = 0,
@@ -59,15 +57,6 @@
 
 /* Board init.  */
 
-#define TEST_SWITCH 1
-#if TEST_SWITCH
-uint32_t switch_test_write(void *opaque, uint32_t state)
-{
-    goldfish_switch_set_state(opaque, state);
-    return state;
-}
-#endif
-
 #define VIRT_TO_PHYS_ADDEND (-((int64_t)(int32_t)0x80000000))
 
 #define PHYS_TO_VIRT(x) ((x) | ~(target_ulong)0x7fffffff)
@@ -167,7 +156,13 @@
 
     env = cpu_init(cpu_model);
 
-    register_savevm( "cpu", 0, MIPS_CPU_SAVE_VERSION, cpu_save, cpu_load, env );
+    register_savevm(NULL,
+                    "cpu",
+                    0,
+                    MIPS_CPU_SAVE_VERSION,
+                    cpu_save,
+                    cpu_load,
+                    env);
 
     if (ram_size > GOLDFISH_IO_SPACE)
         ram_size = GOLDFISH_IO_SPACE;   /* avoid overlap of ram and IO regs */
@@ -222,8 +217,6 @@
             goldfish_mmc_init(GOLDFISH_MMC, 0, info->bdrv);
 	}
     }
-    goldfish_memlog_init(GOLDFISH_MEMLOG);
-
     goldfish_battery_init(android_hw->hw_battery);
 
     goldfish_add_device_no_io(&event0_device);
@@ -234,21 +227,14 @@
     nand_dev_init(nand_device.base);
 #endif
 
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
     if (memcheck_enabled) {
         trace_dev_init();
     }
-#endif  // CONFIG_MEMCHECK
-    pipe_dev_init();
-
-#if TEST_SWITCH
-    {
-        void *sw;
-        sw = goldfish_switch_add("test", NULL, NULL, 0);
-        goldfish_switch_set_state(sw, 1);
-        goldfish_switch_add("test2", switch_test_write, sw, 1);
-    }
-#endif
+#endif  // CONFIG_ANDROID_MEMCHECK
+    bool newDeviceNaming =
+            (androidHwConfig_getKernelDeviceNaming(android_hw) >= 1);
+    pipe_dev_init(newDeviceNaming);
 
     android_load_kernel(env, ram_size, kernel_filename, kernel_cmdline, initrd_filename);
 }
diff --git a/hw/android/goldfish/audio.c b/hw/android/goldfish/audio.c
index ce0b642..9259a75 100644
--- a/hw/android/goldfish/audio.c
+++ b/hw/android/goldfish/audio.c
@@ -34,38 +34,43 @@
 #define  USE_QEMU_AUDIO_IN  1
 
 enum {
-	/* audio status register */
-	AUDIO_INT_STATUS	= 0x00,
-	/* set this to enable IRQ */
-	AUDIO_INT_ENABLE	= 0x04,
-	/* set these to specify buffer addresses */
-	AUDIO_SET_WRITE_BUFFER_1 = 0x08,
-	AUDIO_SET_WRITE_BUFFER_2 = 0x0C,
-	/* set number of bytes in buffer to write */
-	AUDIO_WRITE_BUFFER_1  = 0x10,
-	AUDIO_WRITE_BUFFER_2  = 0x14,
+    /* audio status register */
+    AUDIO_INT_STATUS	= 0x00,
+    /* set this to enable IRQ */
+    AUDIO_INT_ENABLE	= 0x04,
+    /* set these to specify buffer addresses */
+    AUDIO_SET_WRITE_BUFFER_1 = 0x08,
+    AUDIO_SET_WRITE_BUFFER_2 = 0x0C,
+    /* set number of bytes in buffer to write */
+    AUDIO_WRITE_BUFFER_1  = 0x10,
+    AUDIO_WRITE_BUFFER_2  = 0x14,
 
-	/* true if audio input is supported */
-	AUDIO_READ_SUPPORTED = 0x18,
-	/* buffer to use for audio input */
-	AUDIO_SET_READ_BUFFER = 0x1C,
+    /* true if audio input is supported */
+    AUDIO_READ_SUPPORTED = 0x18,
+    /* buffer to use for audio input */
+    AUDIO_SET_READ_BUFFER = 0x1C,
 
-	/* driver writes number of bytes to read */
-	AUDIO_START_READ  = 0x20,
+    /* driver writes number of bytes to read */
+    AUDIO_START_READ  = 0x20,
 
-	/* number of bytes available in read buffer */
-	AUDIO_READ_BUFFER_AVAILABLE  = 0x24,
+    /* number of bytes available in read buffer */
+    AUDIO_READ_BUFFER_AVAILABLE  = 0x24,
 
-	/* AUDIO_INT_STATUS bits */
+    /* for 64-bit guest CPUs only */
+    AUDIO_SET_WRITE_BUFFER_1_HIGH = 0x28,
+    AUDIO_SET_WRITE_BUFFER_2_HIGH = 0x30,
+    AUDIO_SET_READ_BUFFER_HIGH = 0x34,
 
-	/* this bit set when it is safe to write more bytes to the buffer */
-	AUDIO_INT_WRITE_BUFFER_1_EMPTY	= 1U << 0,
-	AUDIO_INT_WRITE_BUFFER_2_EMPTY	= 1U << 1,
-	AUDIO_INT_READ_BUFFER_FULL      = 1U << 2,
+    /* AUDIO_INT_STATUS bits */
+
+    /* this bit set when it is safe to write more bytes to the buffer */
+    AUDIO_INT_WRITE_BUFFER_1_EMPTY = 1U << 0,
+    AUDIO_INT_WRITE_BUFFER_2_EMPTY = 1U << 1,
+    AUDIO_INT_READ_BUFFER_FULL     = 1U << 2,
 };
 
 struct goldfish_audio_buff {
-    uint32_t  address;
+    uint64_t  address;
     uint32_t  length;
     uint8*    data;
     uint32_t  capacity;
@@ -147,7 +152,13 @@
 static void
 goldfish_audio_buff_set_address( struct goldfish_audio_buff*  b, uint32_t  addr )
 {
-    b->address = addr;
+    uint64_set_low(&b->address, addr);
+}
+
+static void
+goldfish_audio_buff_set_address_high( struct goldfish_audio_buff*  b, uint32_t  addr )
+{
+    uint64_set_high(&b->address, addr);
 }
 
 static void
@@ -210,27 +221,8 @@
     return read;
 }
 
-static void
-goldfish_audio_buff_put( struct goldfish_audio_buff*  b, QEMUFile*  f )
-{
-    qemu_put_be32(f, b->address );
-    qemu_put_be32(f, b->length );
-    qemu_put_be32(f, b->offset );
-    qemu_put_buffer(f, b->data, b->length );
-}
-
-static void
-goldfish_audio_buff_get( struct goldfish_audio_buff*  b, QEMUFile*  f )
-{
-    b->address = qemu_get_be32(f);
-    b->length  = qemu_get_be32(f);
-    b->offset  = qemu_get_be32(f);
-    goldfish_audio_buff_ensure(b, b->length);
-    qemu_get_buffer(f, b->data, b->length);
-}
-
 /* update this whenever you change the goldfish_audio_state structure */
-#define  AUDIO_STATE_SAVE_VERSION  2
+#define  AUDIO_STATE_SAVE_VERSION  3
 
 #define  QFIELD_STRUCT   struct goldfish_audio_state
 QFIELD_BEGIN(audio_state_fields)
@@ -240,6 +232,28 @@
     QFIELD_INT32(current_buffer),
 QFIELD_END
 
+static void
+goldfish_audio_buff_put( struct goldfish_audio_buff*  b, QEMUFile*  f )
+{
+    qemu_put_be64(f, b->address );
+    qemu_put_be32(f, b->length );
+    qemu_put_be32(f, b->offset );
+    qemu_put_buffer(f, b->data, b->length );
+}
+
+static void
+goldfish_audio_buff_get( struct goldfish_audio_buff*  b, QEMUFile*  f, int version_id )
+{
+    if (version_id ==  (AUDIO_STATE_SAVE_VERSION - 1))
+        b->address = (uint64_t)qemu_get_be32(f);
+    else
+        b->address = qemu_get_be64(f);
+    b->length  = qemu_get_be32(f);
+    b->offset  = qemu_get_be32(f);
+    goldfish_audio_buff_ensure(b, b->length);
+    qemu_get_buffer(f, b->data, b->length);
+}
+
 static void  audio_state_save( QEMUFile*  f, void* opaque )
 {
     struct goldfish_audio_state*  s = opaque;
@@ -256,14 +270,15 @@
     struct goldfish_audio_state*  s = opaque;
     int                           ret;
 
-    if (version_id != AUDIO_STATE_SAVE_VERSION)
+    if ((version_id != AUDIO_STATE_SAVE_VERSION) && 
+        (version_id != (AUDIO_STATE_SAVE_VERSION - 1))) {
         return -1;
-
+    }
     ret = qemu_get_struct(f, audio_state_fields, s);
     if (!ret) {
-        goldfish_audio_buff_get( s->out_buff1, f );
-        goldfish_audio_buff_get( s->out_buff2, f );
-        goldfish_audio_buff_get (s->in_buff, f);
+        goldfish_audio_buff_get( s->out_buff1, f, version_id);
+        goldfish_audio_buff_get( s->out_buff2, f, version_id);
+        goldfish_audio_buff_get (s->in_buff, f, version_id);
     }
 
     // Similar to enable_audio - without the buffer reset.
@@ -427,11 +442,21 @@
             D( "%s: AUDIO_SET_WRITE_BUFFER_1 %08x", __FUNCTION__, val);
             goldfish_audio_buff_set_address( s->out_buff1, val );
             break;
+        case AUDIO_SET_WRITE_BUFFER_1_HIGH:
+            /* save pointer to buffer 1 */
+            D( "%s: AUDIO_SET_WRITE_BUFFER_1_HIGH %08x", __FUNCTION__, val);
+            goldfish_audio_buff_set_address_high( s->out_buff1, val );
+            break;
         case AUDIO_SET_WRITE_BUFFER_2:
             /* save pointer to buffer 2 */
             D( "%s: AUDIO_SET_WRITE_BUFFER_2 %08x", __FUNCTION__, val);
             goldfish_audio_buff_set_address( s->out_buff2, val );
             break;
+        case AUDIO_SET_WRITE_BUFFER_2_HIGH:
+            /* save pointer to buffer 2 */
+            D( "%s: AUDIO_SET_WRITE_BUFFER_2_HIGH %08x", __FUNCTION__, val);
+            goldfish_audio_buff_set_address_high( s->out_buff2, val );
+            break;
         case AUDIO_WRITE_BUFFER_1:
             /* record that data in buffer 1 is ready to write */
             //D( "%s: AUDIO_WRITE_BUFFER_1 %08x", __FUNCTION__, val);
@@ -462,6 +487,12 @@
             goldfish_device_set_irq(&s->dev, 0, (s->int_status & s->int_enable));
             break;
 
+        case AUDIO_SET_READ_BUFFER_HIGH:
+            /* save pointer to the read buffer */
+            goldfish_audio_buff_set_address_high( s->in_buff, val );
+            D( "%s: AUDIO_SET_READ_BUFFER_HIGH %08x", __FUNCTION__, val );
+            break;
+
         default:
             cpu_abort (cpu_single_env, "goldfish_audio_write: Bad offset %x\n", offset);
     }
@@ -634,7 +665,12 @@
 
     goldfish_device_add(&s->dev, goldfish_audio_readfn, goldfish_audio_writefn, s);
 
-    register_savevm( "audio_state", 0, AUDIO_STATE_SAVE_VERSION,
-                     audio_state_save, audio_state_load, s );
+    register_savevm(NULL,
+                    "audio_state",
+                    0,
+                    AUDIO_STATE_SAVE_VERSION,
+                    audio_state_save,
+                    audio_state_load,
+                    s);
 }
 
diff --git a/hw/android/goldfish/battery.c b/hw/android/goldfish/battery.c
index 4797bbb..6624176 100644
--- a/hw/android/goldfish/battery.c
+++ b/hw/android/goldfish/battery.c
@@ -178,8 +178,13 @@
 
     goldfish_device_add(&s->dev, goldfish_battery_readfn, goldfish_battery_writefn, s);
 
-    register_savevm( "battery_state", 0, BATTERY_STATE_SAVE_VERSION,
-                     goldfish_battery_save, goldfish_battery_load, s);
+    register_savevm(NULL,
+                    "battery_state",
+                    0,
+                    BATTERY_STATE_SAVE_VERSION,
+                    goldfish_battery_save,
+                    goldfish_battery_load,
+                    s);
 }
 
 void goldfish_battery_set_prop(int ac, int property, int value)
diff --git a/hw/android/goldfish/device.c b/hw/android/goldfish/device.c
index 7338423..bcba751 100644
--- a/hw/android/goldfish/device.c
+++ b/hw/android/goldfish/device.c
@@ -31,9 +31,12 @@
 #define PDEV_BUS_IRQ            (0x18)
 #define PDEV_BUS_IRQ_COUNT      (0x1c)
 
+#define PDEV_BUS_NAME_ADDR_HIGH  (0x20)
+
 struct bus_state {
     struct goldfish_device dev;
     struct goldfish_device *current;
+    uint64_t name_addr_high;
 };
 
 qemu_irq *goldfish_pic;
@@ -42,6 +45,13 @@
 uint32_t goldfish_free_base;
 uint32_t goldfish_free_irq;
 
+int goldfish_64bit_guest = 0;
+
+int goldfish_guest_is_64bit()
+{
+    return goldfish_64bit_guest;
+}
+
 void goldfish_device_set_irq(struct goldfish_device *dev, int irq, int level)
 {
     if(irq >= dev->irq_count)
@@ -165,9 +175,14 @@
             break;
         case PDEV_BUS_GET_NAME:
             if(s->current) {
-                safe_memory_rw_debug(cpu_single_env, value, (void*)s->current->name, strlen(s->current->name), 1);
+                target_ulong name = (target_ulong)(s->name_addr_high | value);
+                safe_memory_rw_debug(current_cpu, name, (void*)s->current->name, strlen(s->current->name), 1);
             }
             break;
+        case PDEV_BUS_NAME_ADDR_HIGH:
+            s->name_addr_high = ((uint64_t)value << 32);
+            goldfish_64bit_guest = 1;
+            break;
         default:
             cpu_abort (cpu_single_env, "goldfish_bus_write: Bad offset %x\n", offset);
     }
diff --git a/hw/android/goldfish/events_device.c b/hw/android/goldfish/events_device.c
index f11e7ab..6672678 100644
--- a/hw/android/goldfish/events_device.c
+++ b/hw/android/goldfish/events_device.c
@@ -537,6 +537,11 @@
      */
     user_event_register_generic(s, events_put_generic);
 
-    register_savevm( "events_state", 0, EVENTS_STATE_SAVE_VERSION,
-                      events_state_save, events_state_load, s );
+    register_savevm(NULL,
+                    "events_state",
+                    0,
+                    EVENTS_STATE_SAVE_VERSION,
+                    events_state_save,
+                    events_state_load,
+                    s);
 }
diff --git a/hw/android/goldfish/fb.c b/hw/android/goldfish/fb.c
index da7376f..8738321 100644
--- a/hw/android/goldfish/fb.c
+++ b/hw/android/goldfish/fb.c
@@ -672,6 +672,11 @@
 
     goldfish_device_add(&s->dev, goldfish_fb_readfn, goldfish_fb_writefn, s);
 
-    register_savevm( "goldfish_fb", 0, GOLDFISH_FB_SAVE_VERSION,
-                     goldfish_fb_save, goldfish_fb_load, s);
+    register_savevm(NULL,
+                    "goldfish_fb",
+                    0,
+                    GOLDFISH_FB_SAVE_VERSION,
+                    goldfish_fb_save,
+                    goldfish_fb_load,
+                    s);
 }
diff --git a/hw/android/goldfish/interrupt.c b/hw/android/goldfish/interrupt.c
index b7738e6..6b6abb0 100644
--- a/hw/android/goldfish/interrupt.c
+++ b/hw/android/goldfish/interrupt.c
@@ -181,8 +181,13 @@
         return NULL;
     }
 
-    register_savevm( "goldfish_int", 0, GOLDFISH_INT_SAVE_VERSION,
-                     goldfish_int_save, goldfish_int_load, s);
+    register_savevm(NULL,
+                    "goldfish_int",
+                    0,
+                    GOLDFISH_INT_SAVE_VERSION,
+                    goldfish_int_save,
+                    goldfish_int_load,
+                    s);
 
     return qi;
 }
diff --git a/hw/android/goldfish/memlog.c b/hw/android/goldfish/memlog.c
deleted file mode 100644
index 4716c9c..0000000
--- a/hw/android/goldfish/memlog.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/* Copyright (C) 2007-2008 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-#include <unistd.h>
-#include <fcntl.h>
-#include <string.h>
-
-#include "migration/qemu-file.h"
-#include "hw/android/goldfish/device.h"
-#include "audio/audio.h"
-
-extern void  dprint(const char*  fmt, ...);
-
-int fd = -1;
-
-static uint32_t memlog_read(void *opaque, hwaddr offset)
-{
-    (void)opaque;
-    (void)offset;
-    return 0;
-}
-
-unsigned info[8];
-
-static void memlog_write(void *opaque, hwaddr offset, uint32_t val)
-{
-    char buf[128];
-    struct goldfish_device *dev = opaque;
-    int ret;
-
-    (void)dev;
-
-    if (offset < 8*4)
-        info[offset / 4] = val;
-
-    if (offset == 0) {
-            /* write PID and VADDR to logfile */
-        snprintf(buf, sizeof buf, "%08x %08x\n", info[0], info[1]);
-        do {
-            ret = write(fd, buf, strlen(buf));
-        } while (ret < 0 && errno == EINTR);
-    }
-}
-
-
-static CPUReadMemoryFunc *memlog_readfn[] = {
-   memlog_read,
-   memlog_read,
-   memlog_read
-};
-
-static CPUWriteMemoryFunc *memlog_writefn[] = {
-   memlog_write,
-   memlog_write,
-   memlog_write
-};
-
-struct goldfish_device memlog_dev;
-
-void goldfish_memlog_init(uint32_t base)
-{
-    struct goldfish_device *dev = &memlog_dev;
-
-    dev->name = "goldfish_memlog";
-    dev->id = 0;
-    dev->base = base;
-    dev->size = 0x1000;
-    dev->irq_count = 0;
-
-    do {
-        fd = open("mem.log", /* O_CREAT | */ O_TRUNC | O_WRONLY, 0644);
-    } while (fd < 0 && errno == EINTR);
-
-    goldfish_device_add(dev, memlog_readfn, memlog_writefn, dev);
-}
-
diff --git a/hw/android/goldfish/mmc.c b/hw/android/goldfish/mmc.c
index f58e36b..a7d8ec4 100644
--- a/hw/android/goldfish/mmc.c
+++ b/hw/android/goldfish/mmc.c
@@ -10,6 +10,7 @@
 ** GNU General Public License for more details.
 */
 #include "cpu.h"
+#include "qemu-common.h"
 #include "migration/qemu-file.h"
 #include "hw/android/goldfish/device.h"
 #include "hw/hw.h"
@@ -49,6 +50,11 @@
     /* MMC state flags */
     MMC_STATE               = 0x2C,
 
+    /* 64-bit guest CPUs only */
+
+    /* Set high 32-bits of buffer address */
+    MMC_SET_BUFFER_HIGH     = 0x30,
+
     /* MMC_INT_STATUS bits */
 
     MMC_STAT_END_OF_CMD     = 1U << 0,
@@ -65,7 +71,7 @@
     struct goldfish_device dev;
     BlockDriverState *bs;
     // pointer to our buffer
-    uint32_t buffer_address;
+    uint64_t buffer_address;
     // offsets for read and write operations
     uint32_t read_offset, write_offset;
     // buffer status flags
@@ -84,10 +90,13 @@
     uint8_t* buf;
 };
 
-#define  GOLDFISH_MMC_SAVE_VERSION  2
+#define  GOLDFISH_MMC_SAVE_VERSION  3
+#define  GOLDFISH_MMC_SAVE_VERSION_LEGACY  2
+
+// Note: This doesn't include |buffer_address| which is saved and loaded
+//       explictly below to support legacy encodings.
 #define  QFIELD_STRUCT  struct goldfish_mmc_state
 QFIELD_BEGIN(goldfish_mmc_fields)
-    QFIELD_INT32(buffer_address),
     QFIELD_INT32(read_offset),
     QFIELD_INT32(write_offset),
     QFIELD_INT32(int_status),
@@ -106,6 +115,7 @@
 {
     struct goldfish_mmc_state*  s = opaque;
 
+    qemu_put_be64(f, s->buffer_address);
     qemu_put_struct(f, goldfish_mmc_fields, s);
 }
 
@@ -113,9 +123,14 @@
 {
     struct goldfish_mmc_state*  s = opaque;
 
-    if (version_id != GOLDFISH_MMC_SAVE_VERSION)
+    if (version_id == GOLDFISH_MMC_SAVE_VERSION) {
+        s->buffer_address = qemu_get_be64(f);
+    } else if (version_id == GOLDFISH_MMC_SAVE_VERSION_LEGACY) {
+        s->buffer_address = qemu_get_be32(f);
+    } else {
+        // Unsupported version!
         return -1;
-
+    }
     return qemu_get_struct(f, goldfish_mmc_fields, s);
 }
 
@@ -252,7 +267,9 @@
                 m = (uint32_t)(capacity / (512*1024)) - 1;
                 // m must fit into 22 bits
                 if (m & 0xFFC00000) {
-                    fprintf(stderr, "SD card too big (%lld bytes).  Maximum SDHC card size is 128 gigabytes.\n", (long long)capacity);
+                    fprintf(stderr, "SD card too big (%" PRId64 " bytes).  "
+                            "Maximum SDHC card size is 128 gigabytes.\n",
+                            capacity);
                     abort();
                 }
 
@@ -278,7 +295,9 @@
                 exponent = 0;
                 capacity = sector_count * 512;
                 if (capacity > 2147483648U) {
-                    fprintf(stderr, "SD card too big (%lld bytes).  Maximum SD card size is 2 gigabytes.\n", (long long)capacity);
+                    fprintf(stderr, "SD card too big (%" PRIu64 " bytes). "
+                            "Maximum SD card size is 2 gigabytes.\n",
+                            capacity);
                     abort();
                 }
                 capacity >>= 10; // convert to Kbytes
@@ -468,7 +487,11 @@
             break;
         case MMC_SET_BUFFER:
             /* save pointer to buffer 1 */
-            s->buffer_address = val;
+            uint64_set_low(&s->buffer_address, val);
+            break;
+        case MMC_SET_BUFFER_HIGH:
+            /* save pointer to buffer 1 */
+            uint64_set_high(&s->buffer_address, val);
             break;
         case MMC_CMD:
             goldfish_mmc_do_command(s, val, s->arg);
@@ -515,7 +538,12 @@
 
     goldfish_device_add(&s->dev, goldfish_mmc_readfn, goldfish_mmc_writefn, s);
 
-    register_savevm( "goldfish_mmc", 0, GOLDFISH_MMC_SAVE_VERSION,
-                     goldfish_mmc_save, goldfish_mmc_load, s);
+    register_savevm(NULL,
+                    "goldfish_mmc",
+                    0,
+                    GOLDFISH_MMC_SAVE_VERSION,
+                    goldfish_mmc_save,
+                    goldfish_mmc_load,
+                    s);
 }
 
diff --git a/hw/android/goldfish/nand.c b/hw/android/goldfish/nand.c
index 072b504..67fce20 100644
--- a/hw/android/goldfish/nand.c
+++ b/hw/android/goldfish/nand.c
@@ -11,6 +11,7 @@
 */
 #include "migration/qemu-file.h"
 #include "nand_reg.h"
+#include "hw/android/goldfish/device.h"
 #include "hw/android/goldfish/nand.h"
 #include "hw/android/goldfish/vmem.h"
 #include "hw/hw.h"
@@ -127,7 +128,7 @@
     uint32_t addr_low;
     uint32_t addr_high;
     uint32_t transfer_size;
-    uint32_t data;
+    uint64_t data;
     uint32_t batch_addr_low;
     uint32_t batch_addr_high;
     uint32_t result;
@@ -138,7 +139,8 @@
  * 2: saving actual disk contents as well
  * 3: use the correct data length and truncate to avoid padding.
  */
-#define  NAND_DEV_STATE_SAVE_VERSION  4
+#define  NAND_DEV_STATE_SAVE_VERSION  5
+#define  NAND_DEV_STATE_SAVE_VERSION_LEGACY  4
 
 #define  QFIELD_STRUCT  nand_dev_controller_state
 QFIELD_BEGIN(nand_dev_controller_state_fields)
@@ -146,12 +148,26 @@
     QFIELD_INT32(addr_low),
     QFIELD_INT32(addr_high),
     QFIELD_INT32(transfer_size),
-    QFIELD_INT32(data),
+    QFIELD_INT64(data),
     QFIELD_INT32(batch_addr_low),
     QFIELD_INT32(batch_addr_high),
     QFIELD_INT32(result),
 QFIELD_END
 
+// Legacy encoding support, split the structure in two halves, with
+// a 32-bit |data| field in the middle to be loaded explictly.
+QFIELD_BEGIN(nand_dev_controller_state_legacy_1_fields)
+    QFIELD_INT32(dev),
+    QFIELD_INT32(addr_low),
+    QFIELD_INT32(addr_high),
+    QFIELD_INT32(transfer_size),
+QFIELD_END
+
+QFIELD_BEGIN(nand_dev_controller_state_legacy_2_fields)
+    QFIELD_INT32(batch_addr_low),
+    QFIELD_INT32(batch_addr_high),
+    QFIELD_INT32(result),
+QFIELD_END
 
 /* EINTR-proof read - due to SIGALRM in use elsewhere */
 static int  do_read(int  fd, void*  buf, size_t  size)
@@ -351,18 +367,24 @@
     nand_dev_controller_state*  s = opaque;
     int ret;
 
-    if (version_id != NAND_DEV_STATE_SAVE_VERSION)
-        return -1;
-
-    if ((ret = qemu_get_struct(f, nand_dev_controller_state_fields, s)))
-        return ret;
-    if ((ret = nand_dev_load_disks(f)))
-        return ret;
-
-    return 0;
+    if (version_id == NAND_DEV_STATE_SAVE_VERSION) {
+        ret = qemu_get_struct(f, nand_dev_controller_state_fields, s);
+    } else if (version_id == NAND_DEV_STATE_SAVE_VERSION_LEGACY) {
+        ret = qemu_get_struct(f, nand_dev_controller_state_legacy_1_fields, s);
+        if (!ret) {
+            // Read 32-bit data field directly.
+            s->data = (uint64_t)qemu_get_be32(f);
+            ret = qemu_get_struct(
+                    f, nand_dev_controller_state_legacy_2_fields, s);
+        }
+    } else {
+        // Invalid encoding.
+        ret = -1;
+    }
+    return ret ? ret : nand_dev_load_disks(f);
 }
 
-static uint32_t nand_dev_read_file(nand_dev *dev, uint32_t data, uint64_t addr, uint32_t total_len)
+static uint32_t nand_dev_read_file(nand_dev *dev, target_ulong data, uint64_t addr, uint32_t total_len)
 {
     uint32_t len = total_len;
     size_t read_len = dev->erase_size;
@@ -382,14 +404,14 @@
         if(!eof) {
             read_len = do_read(dev->fd, dev->data, read_len);
         }
-        safe_memory_rw_debug(cpu_single_env, data, dev->data, read_len, 1);
+        safe_memory_rw_debug(current_cpu, data, dev->data, read_len, 1);
         data += read_len;
         len -= read_len;
     }
     return total_len;
 }
 
-static uint32_t nand_dev_write_file(nand_dev *dev, uint32_t data, uint64_t addr, uint32_t total_len)
+static uint32_t nand_dev_write_file(nand_dev *dev, target_ulong data, uint64_t addr, uint32_t total_len)
 {
     uint32_t len = total_len;
     size_t write_len = dev->erase_size;
@@ -401,7 +423,7 @@
     while(len > 0) {
         if(len < write_len)
             write_len = len;
-        safe_memory_rw_debug(cpu_single_env, data, dev->data, write_len, 0);
+        safe_memory_rw_debug(current_cpu, data, dev->data, write_len, 0);
         ret = do_write(dev->fd, dev->data, write_len);
         if(ret < write_len) {
             XLOG("nand_dev_write_file, write failed: %s\n", strerror(errno));
@@ -456,14 +478,23 @@
     if (cmd == NAND_CMD_WRITE_BATCH || cmd == NAND_CMD_READ_BATCH ||
         cmd == NAND_CMD_ERASE_BATCH) {
         struct batch_data bd;
+        struct batch_data_64 bd64;
         uint64_t bd_addr = ((uint64_t)s->batch_addr_high << 32) | s->batch_addr_low;
-
-        cpu_physical_memory_read(bd_addr, (void*)&bd, sizeof(struct batch_data));
-        s->dev = bd.dev;
-        s->addr_low = bd.addr_low;
-        s->addr_high = bd.addr_high;
-        s->transfer_size = bd.transfer_size;
-        s->data = bd.data;
+        if (goldfish_guest_is_64bit()) {
+            cpu_physical_memory_read(bd_addr, (void*)&bd64, sizeof(struct batch_data_64));
+            s->dev = bd64.dev;
+            s->addr_low = bd64.addr_low;
+            s->addr_high = bd64.addr_high;
+            s->transfer_size = bd64.transfer_size;
+            s->data = bd64.data;
+        } else {
+            cpu_physical_memory_read(bd_addr, (void*)&bd, sizeof(struct batch_data));
+            s->dev = bd.dev;
+            s->addr_low = bd.addr_low;
+            s->addr_high = bd.addr_high;
+            s->transfer_size = bd.transfer_size;
+            s->data = bd.data;
+        }
     }
     addr = s->addr_low | ((uint64_t)s->addr_high << 32);
     size = s->transfer_size;
@@ -475,7 +506,7 @@
     case NAND_CMD_GET_DEV_NAME:
         if(size > dev->devname_len)
             size = dev->devname_len;
-        safe_memory_rw_debug(cpu_single_env, s->data, (uint8_t*)dev->devname, size, 1);
+        safe_memory_rw_debug(current_cpu, s->data, (uint8_t*)dev->devname, size, 1);
         return size;
     case NAND_CMD_READ_BATCH:
     case NAND_CMD_READ:
@@ -485,7 +516,7 @@
             size = dev->max_size - addr;
         if(dev->fd >= 0)
             return nand_dev_read_file(dev, s->data, addr, size);
-        safe_memory_rw_debug(cpu_single_env,s->data, &dev->data[addr], size, 1);
+        safe_memory_rw_debug(current_cpu, s->data, &dev->data[addr], size, 1);
         return size;
     case NAND_CMD_WRITE_BATCH:
     case NAND_CMD_WRITE:
@@ -497,7 +528,7 @@
             size = dev->max_size - addr;
         if(dev->fd >= 0)
             return nand_dev_write_file(dev, s->data, addr, size);
-        safe_memory_rw_debug(cpu_single_env,s->data, &dev->data[addr], size, 0);
+        safe_memory_rw_debug(current_cpu, s->data, &dev->data[addr], size, 0);
         return size;
     case NAND_CMD_ERASE_BATCH:
     case NAND_CMD_ERASE:
@@ -551,16 +582,25 @@
         s->transfer_size = value;
         break;
     case NAND_DATA:
-        s->data = value;
+        uint64_set_low(&s->data, value);
+        break;
+    case NAND_DATA_HIGH:
+        uint64_set_high(&s->data, value);
         break;
     case NAND_COMMAND:
         s->result = nand_dev_do_cmd(s, value);
         if (value == NAND_CMD_WRITE_BATCH || value == NAND_CMD_READ_BATCH ||
             value == NAND_CMD_ERASE_BATCH) {
             struct batch_data bd;
+            struct batch_data_64 bd64;
             uint64_t bd_addr = ((uint64_t)s->batch_addr_high << 32) | s->batch_addr_low;
-            bd.result = s->result;
-            cpu_physical_memory_write(bd_addr, (void*)&bd, sizeof(struct batch_data));
+            if (goldfish_guest_is_64bit()) {
+                bd64.result = s->result;
+                cpu_physical_memory_write(bd_addr, (void*)&bd64, sizeof(struct batch_data_64));
+            } else {
+                bd.result = s->result;
+                cpu_physical_memory_write(bd_addr, (void*)&bd, sizeof(struct batch_data));
+            }
         }
         break;
     default:
@@ -641,8 +681,13 @@
     cpu_register_physical_memory(base, 0x00000fff, iomemtype);
     s->base = base;
 
-    register_savevm( "nand_dev", instance_id++, NAND_DEV_STATE_SAVE_VERSION,
-                      nand_dev_controller_state_save, nand_dev_controller_state_load, s);
+    register_savevm(NULL,
+                    "nand_dev",
+                    instance_id++,
+                    NAND_DEV_STATE_SAVE_VERSION,
+                    nand_dev_controller_state_save,
+                    nand_dev_controller_state_load,
+                    s);
 }
 
 static int arg_match(const char *a, const char *b, size_t b_len)
diff --git a/hw/android/goldfish/nand_reg.h b/hw/android/goldfish/nand_reg.h
index 34d7c44..be89bcb 100644
--- a/hw/android/goldfish/nand_reg.h
+++ b/hw/android/goldfish/nand_reg.h
@@ -13,57 +13,68 @@
 #define NAND_DEVICE_REG_H
 
 enum nand_cmd {
-	NAND_CMD_GET_DEV_NAME,  // Write device name for NAND_DEV to NAND_DATA (vaddr)
-	NAND_CMD_READ,
-	NAND_CMD_WRITE,
-	NAND_CMD_ERASE,
-	NAND_CMD_BLOCK_BAD_GET, // NAND_RESULT is 1 if block is bad, 0 if it is not
-	NAND_CMD_BLOCK_BAD_SET,
-	NAND_CMD_READ_BATCH,	// BATCH OP extensions.
-	NAND_CMD_WRITE_BATCH,
-	NAND_CMD_ERASE_BATCH
+    NAND_CMD_GET_DEV_NAME,  // Write device name for NAND_DEV to NAND_DATA (vaddr)
+    NAND_CMD_READ,
+    NAND_CMD_WRITE,
+    NAND_CMD_ERASE,
+    NAND_CMD_BLOCK_BAD_GET, // NAND_RESULT is 1 if block is bad, 0 if it is not
+    NAND_CMD_BLOCK_BAD_SET,
+    NAND_CMD_READ_BATCH,	// BATCH OP extensions.
+    NAND_CMD_WRITE_BATCH,
+    NAND_CMD_ERASE_BATCH
 };
 
 struct batch_data{
-	uint32_t dev;
-	uint32_t addr_low;
-	uint32_t addr_high;
-	uint32_t transfer_size;
-	uint32_t data;
-	uint32_t result;
+    uint32_t dev;
+    uint32_t addr_low;
+    uint32_t addr_high;
+    uint32_t transfer_size;
+    uint32_t data;
+    uint32_t result;
+};
+
+struct batch_data_64 {
+    uint32_t dev;
+    uint32_t addr_low;
+    uint32_t addr_high;
+    uint32_t transfer_size;
+    uint64_t data;
+    uint32_t result;
 };
 
 enum nand_dev_flags {
-	NAND_DEV_FLAG_READ_ONLY = 0x00000001,
-	NAND_DEV_FLAG_BATCH_CAP = 0x00000002
+    NAND_DEV_FLAG_READ_ONLY = 0x00000001,
+    NAND_DEV_FLAG_BATCH_CAP = 0x00000002
 };
 
 #define NAND_VERSION_CURRENT (1)
 
 enum nand_reg {
-	// Global
-	NAND_VERSION        = 0x000,
-	NAND_NUM_DEV        = 0x004,
-	NAND_DEV            = 0x008,
+    // Global
+    NAND_VERSION        = 0x000,
+    NAND_NUM_DEV        = 0x004,
+    NAND_DEV            = 0x008,
 
-	// Dev info
-	NAND_DEV_FLAGS      = 0x010,
-	NAND_DEV_NAME_LEN   = 0x014,
-	NAND_DEV_PAGE_SIZE  = 0x018,
-	NAND_DEV_EXTRA_SIZE = 0x01c,
-	NAND_DEV_ERASE_SIZE = 0x020,
-	NAND_DEV_SIZE_LOW   = 0x028,
-	NAND_DEV_SIZE_HIGH  = 0x02c,
+    // Dev info
+    NAND_DEV_FLAGS      = 0x010,
+    NAND_DEV_NAME_LEN   = 0x014,
+    NAND_DEV_PAGE_SIZE  = 0x018,
+    NAND_DEV_EXTRA_SIZE = 0x01c,
+    NAND_DEV_ERASE_SIZE = 0x020,
+    NAND_DEV_SIZE_LOW   = 0x028,
+    NAND_DEV_SIZE_HIGH  = 0x02c,
 
-	// Command
-	NAND_RESULT         = 0x040,
-	NAND_COMMAND        = 0x044,
-	NAND_DATA           = 0x048,
-	NAND_TRANSFER_SIZE  = 0x04c,
-	NAND_ADDR_LOW       = 0x050,
-	NAND_ADDR_HIGH      = 0x054,
-	NAND_BATCH_ADDR_LOW = 0x058,
-	NAND_BATCH_ADDR_HIGH= 0x05c,
+    // Command
+    NAND_RESULT         = 0x040,
+    NAND_COMMAND        = 0x044,
+    NAND_DATA           = 0x048,
+    NAND_TRANSFER_SIZE  = 0x04c,
+    NAND_ADDR_LOW       = 0x050,
+    NAND_ADDR_HIGH      = 0x054,
+    NAND_BATCH_ADDR_LOW = 0x058,
+    NAND_BATCH_ADDR_HIGH= 0x05c,
+
+    NAND_DATA_HIGH      = 0x100,  // For 64-bit guest CPUs.
 };
 
 #endif
diff --git a/hw/android/goldfish/pipe.c b/hw/android/goldfish/pipe.c
index df94f1c..574516c 100644
--- a/hw/android/goldfish/pipe.c
+++ b/hw/android/goldfish/pipe.c
@@ -53,7 +53,10 @@
 /* Maximum length of pipe service name, in characters (excluding final 0) */
 #define MAX_PIPE_SERVICE_NAME_SIZE  255
 
-#define GOLDFISH_PIPE_SAVE_VERSION  2
+#define GOLDFISH_PIPE_SAVE_VERSION  3
+
+// Up to Tools r22.6, the emulator saved with this version number.
+#define GOLDFISH_PIPE_SAVE_VERSION_LEGACY  2
 
 /***********************************************************************
  ***********************************************************************
@@ -128,7 +131,7 @@
     struct Pipe*              next;
     struct Pipe*              next_waked;
     PipeDevice*                device;
-    uint32_t                   channel;
+    uint64_t                   channel;
     void*                      opaque;
     const GoldfishPipeFuncs*   funcs;
     const PipeService*         service;
@@ -150,7 +153,7 @@
 }
 
 static Pipe*
-pipe_new(uint32_t channel, PipeDevice* dev)
+pipe_new(uint64_t channel, PipeDevice* dev)
 {
     Pipe*  pipe = pipe_new0(dev);
     pipe->channel = channel;
@@ -159,7 +162,7 @@
 }
 
 static Pipe**
-pipe_list_findp_channel( Pipe** list, uint32_t channel )
+pipe_list_findp_channel( Pipe** list, uint64_t channel )
 {
     Pipe** pnode = list;
     for (;;) {
@@ -229,7 +232,7 @@
     }
 
     /* Now save other common data */
-    qemu_put_be32(file, (unsigned int)pipe->channel);
+    qemu_put_be64(file, pipe->channel);
     qemu_put_byte(file, (int)pipe->wanted);
     qemu_put_byte(file, (int)pipe->closed);
 
@@ -247,12 +250,12 @@
 }
 
 static Pipe*
-pipe_load( PipeDevice* dev, QEMUFile* file )
+pipe_load( PipeDevice* dev, QEMUFile* file, int version_id )
 {
     Pipe*              pipe;
     const PipeService* service = NULL;
     int   state = qemu_get_byte(file);
-    uint32_t channel;
+    uint64_t channel;
 
     if (state != 0) {
         /* Pipe is associated with a service. */
@@ -268,7 +271,11 @@
         }
     }
 
-    channel = qemu_get_be32(file);
+    if (version_id == GOLDFISH_PIPE_SAVE_VERSION_LEGACY) {
+        channel = qemu_get_be32(file);
+    } else {
+        channel = qemu_get_be64(file);
+    }
     pipe = pipe_new(channel, dev);
     pipe->wanted  = qemu_get_byte(file);
     pipe->closed  = qemu_get_byte(file);
@@ -350,8 +357,8 @@
     const GoldfishPipeBuffer*  buffers_limit = buffers + numBuffers;
     int ret = 0;
 
-    DD("%s: channel=0x%x numBuffers=%d", __FUNCTION__,
-       pcon->pipe->channel,
+    DD("%s: channel=0x%llx numBuffers=%d", __FUNCTION__,
+       (unsigned long long)pcon->pipe->channel,
        numBuffers);
 
     while (buffers < buffers_limit) {
@@ -953,10 +960,10 @@
     Pipe*  signaled_pipes;
 
     /* i/o registers */
-    uint32_t  address;
+    uint64_t  address;
     uint32_t  size;
     uint32_t  status;
-    uint32_t  channel;
+    uint64_t  channel;
     uint32_t  wakes;
     uint64_t  params_addr;
 };
@@ -982,7 +989,7 @@
 
     switch (command) {
     case PIPE_CMD_OPEN:
-        DD("%s: CMD_OPEN channel=0x%x", __FUNCTION__, dev->channel);
+        DD("%s: CMD_OPEN channel=0x%llx", __FUNCTION__, (unsigned long long)dev->channel);
         if (pipe != NULL) {
             dev->status = PIPE_ERROR_INVAL;
             break;
@@ -994,7 +1001,7 @@
         break;
 
     case PIPE_CMD_CLOSE:
-        DD("%s: CMD_CLOSE channel=0x%x", __FUNCTION__, dev->channel);
+        DD("%s: CMD_CLOSE channel=0x%llx", __FUNCTION__, (unsigned long long)dev->channel);
         /* Remove from device's lists */
         *lookup = pipe->next;
         pipe->next = NULL;
@@ -1010,35 +1017,43 @@
     case PIPE_CMD_READ_BUFFER: {
         /* Translate virtual address into physical one, into emulator memory. */
         GoldfishPipeBuffer  buffer;
-        uint32_t            address = dev->address;
-        uint32_t            page    = address & TARGET_PAGE_MASK;
+        target_ulong        address = dev->address;
+        target_ulong        page    = address & TARGET_PAGE_MASK;
         hwaddr  phys;
-        phys = safe_get_phys_page_debug(env, page);
+        phys = safe_get_phys_page_debug(ENV_GET_CPU(env), page);
+#ifdef TARGET_X86_64
+        phys = phys & TARGET_PTE_MASK;
+#endif
         buffer.data = qemu_get_ram_ptr(phys) + (address - page);
         buffer.size = dev->size;
         dev->status = pipe->funcs->recvBuffers(pipe->opaque, &buffer, 1);
-        DD("%s: CMD_READ_BUFFER channel=0x%x address=0x%08x size=%d > status=%d",
-           __FUNCTION__, dev->channel, dev->address, dev->size, dev->status);
+        DD("%s: CMD_READ_BUFFER channel=0x%llx address=0x%16llx size=%d > status=%d",
+           __FUNCTION__, (unsigned long long)dev->channel, (unsigned long long)dev->address,
+           dev->size, dev->status);
         break;
     }
 
     case PIPE_CMD_WRITE_BUFFER: {
         /* Translate virtual address into physical one, into emulator memory. */
         GoldfishPipeBuffer  buffer;
-        uint32_t            address = dev->address;
-        uint32_t            page    = address & TARGET_PAGE_MASK;
+        target_ulong        address = dev->address;
+        target_ulong        page    = address & TARGET_PAGE_MASK;
         hwaddr  phys;
-        phys = safe_get_phys_page_debug(env, page);
+        phys = safe_get_phys_page_debug(ENV_GET_CPU(env), page);
+#ifdef TARGET_X86_64
+        phys = phys & TARGET_PTE_MASK;
+#endif
         buffer.data = qemu_get_ram_ptr(phys) + (address - page);
         buffer.size = dev->size;
         dev->status = pipe->funcs->sendBuffers(pipe->opaque, &buffer, 1);
-        DD("%s: CMD_WRITE_BUFFER channel=0x%x address=0x%08x size=%d > status=%d",
-           __FUNCTION__, dev->channel, dev->address, dev->size, dev->status);
+        DD("%s: CMD_WRITE_BUFFER channel=0x%llx address=0x%16llx size=%d > status=%d",
+           __FUNCTION__, (unsigned long long)dev->channel, (unsigned long long)dev->address,
+           dev->size, dev->status);
         break;
     }
 
     case PIPE_CMD_WAKE_ON_READ:
-        DD("%s: CMD_WAKE_ON_READ channel=0x%x", __FUNCTION__, dev->channel);
+        DD("%s: CMD_WAKE_ON_READ channel=0x%llx", __FUNCTION__, (unsigned long long)dev->channel);
         if ((pipe->wanted & PIPE_WAKE_READ) == 0) {
             pipe->wanted |= PIPE_WAKE_READ;
             pipe->funcs->wakeOn(pipe->opaque, pipe->wanted);
@@ -1047,7 +1062,7 @@
         break;
 
     case PIPE_CMD_WAKE_ON_WRITE:
-        DD("%s: CMD_WAKE_ON_WRITE channel=0x%x", __FUNCTION__, dev->channel);
+        DD("%s: CMD_WAKE_ON_WRITE channel=0x%llx", __FUNCTION__, (unsigned long long)dev->channel);
         if ((pipe->wanted & PIPE_WAKE_WRITE) == 0) {
             pipe->wanted |= PIPE_WAKE_WRITE;
             pipe->funcs->wakeOn(pipe->opaque, pipe->wanted);
@@ -1077,12 +1092,22 @@
 
     case PIPE_REG_ADDRESS:
         DR("%s: address=%d (0x%x)", __FUNCTION__, value, value);
-        s->address = value;
+        uint64_set_low(&s->address, value);
+        break;
+
+    case PIPE_REG_ADDRESS_HIGH:
+        DR("%s: address_high=%d (0x%x)", __FUNCTION__, value, value);
+        uint64_set_high(&s->address, value);
         break;
 
     case PIPE_REG_CHANNEL:
         DR("%s: channel=%d (0x%x)", __FUNCTION__, value, value);
-        s->channel = value;
+        uint64_set_low(&s->channel, value);
+        break;
+
+    case PIPE_REG_CHANNEL_HIGH:
+        DR("%s: channel_high=%d (0x%x)", __FUNCTION__, value, value);
+        uint64_set_high(&s->channel, value);
         break;
 
     case PIPE_REG_PARAMS_ADDR_HIGH:
@@ -1097,27 +1122,45 @@
     case PIPE_REG_ACCESS_PARAMS:
     {
         struct access_params aps;
+        struct access_params_64 aps64;
         uint32_t cmd;
 
         /* Don't touch aps.result if anything wrong */
         if (s->params_addr == 0)
             break;
 
-        cpu_physical_memory_read(s->params_addr, (void*)&aps,
-                        sizeof(struct access_params));
-
+        if (goldfish_guest_is_64bit()) {
+            cpu_physical_memory_read(s->params_addr, (void*)&aps64,
+                                     sizeof(aps64));
+        } else {
+            cpu_physical_memory_read(s->params_addr, (void*)&aps,
+                                     sizeof(aps));
+        }
         /* sync pipe device state from batch buffer */
-        s->channel = aps.channel;
-        s->size = aps.size;
-        s->address = aps.address;
-        cmd = aps.cmd;
+        if (goldfish_guest_is_64bit()) {
+            s->channel = aps64.channel;
+            s->size = aps64.size;
+            s->address = aps64.address;
+            cmd = aps64.cmd;
+        } else {
+            s->channel = aps.channel;
+            s->size = aps.size;
+            s->address = aps.address;
+            cmd = aps.cmd;
+        }
         if ((cmd != PIPE_CMD_READ_BUFFER) && (cmd != PIPE_CMD_WRITE_BUFFER))
             break;
 
         pipeDevice_doCommand(s, cmd);
-        aps.result = s->status;
-        cpu_physical_memory_write(s->params_addr, (void*)&aps,
-                    sizeof(struct access_params));
+        if (goldfish_guest_is_64bit()) {
+            aps64.result = s->status;
+            cpu_physical_memory_write(s->params_addr, (void*)&aps64,
+                                      sizeof(aps64));
+        } else {
+            aps.result = s->status;
+            cpu_physical_memory_write(s->params_addr, (void*)&aps,
+                                      sizeof(aps));
+        }
     }
     break;
 
@@ -1141,8 +1184,8 @@
     case PIPE_REG_CHANNEL:
         if (dev->signaled_pipes != NULL) {
             Pipe* pipe = dev->signaled_pipes;
-            DR("%s: channel=0x%x wanted=%d", __FUNCTION__,
-               pipe->channel, pipe->wanted);
+            DR("%s: channel=0x%llx wanted=%d", __FUNCTION__,
+               (unsigned long long)pipe->channel, pipe->wanted);
             dev->wakes = pipe->wanted;
             pipe->wanted = 0;
             dev->signaled_pipes = pipe->next_waked;
@@ -1151,7 +1194,17 @@
                 goldfish_device_set_irq(&dev->dev, 0, 0);
                 DD("%s: lowering IRQ", __FUNCTION__);
             }
-            return pipe->channel;
+            return (uint32_t)(pipe->channel & 0xFFFFFFFFUL);
+        }
+        DR("%s: no signaled channels", __FUNCTION__);
+        return 0;
+
+    case PIPE_REG_CHANNEL_HIGH:
+        if (dev->signaled_pipes != NULL) {
+            Pipe* pipe = dev->signaled_pipes;
+            DR("%s: channel_high=0x%llx wanted=%d", __FUNCTION__,
+               (unsigned long long)pipe->channel, pipe->wanted);
+            return (uint32_t)(pipe->channel >> 32);
         }
         DR("%s: no signaled channels", __FUNCTION__);
         return 0;
@@ -1161,10 +1214,10 @@
         return dev->wakes;
 
     case PIPE_REG_PARAMS_ADDR_HIGH:
-        return dev->params_addr >> 32;
+        return (uint32_t)(dev->params_addr >> 32);
 
     case PIPE_REG_PARAMS_ADDR_LOW:
-        return dev->params_addr & 0xFFFFFFFFUL;
+        return (uint32_t)(dev->params_addr & 0xFFFFFFFFUL);
 
     default:
         D("%s: offset=%d (0x%x)\n", __FUNCTION__, offset, offset);
@@ -1190,10 +1243,10 @@
     PipeDevice* dev = opaque;
     Pipe* pipe;
 
-    qemu_put_be32(file, dev->address);
+    qemu_put_be64(file, dev->address);
     qemu_put_be32(file, dev->size);
     qemu_put_be32(file, dev->status);
-    qemu_put_be32(file, dev->channel);
+    qemu_put_be64(file, dev->channel);
     qemu_put_be32(file, dev->wakes);
     qemu_put_be64(file, dev->params_addr);
 
@@ -1216,13 +1269,22 @@
     PipeDevice* dev = opaque;
     Pipe*       pipe;
 
-    if (version_id != GOLDFISH_PIPE_SAVE_VERSION)
+    if ((version_id != GOLDFISH_PIPE_SAVE_VERSION) &&
+        (version_id != GOLDFISH_PIPE_SAVE_VERSION_LEGACY)) {
         return -EINVAL;
-
-    dev->address = qemu_get_be32(file);
+    }
+    if (version_id == GOLDFISH_PIPE_SAVE_VERSION_LEGACY) {
+        dev->address = (uint64_t)qemu_get_be32(file);
+    } else {
+        dev->address = qemu_get_be64(file);
+    }
     dev->size    = qemu_get_be32(file);
     dev->status  = qemu_get_be32(file);
-    dev->channel = qemu_get_be32(file);
+    if (version_id == GOLDFISH_PIPE_SAVE_VERSION_LEGACY) {
+        dev->channel = (uint64_t)qemu_get_be32(file);
+    } else {
+        dev->channel = qemu_get_be64(file);
+    }
     dev->wakes   = qemu_get_be32(file);
     dev->params_addr   = qemu_get_be64(file);
 
@@ -1231,7 +1293,7 @@
 
     /* Load all pipe connections */
     for ( ; count > 0; count-- ) {
-        pipe = pipe_load(dev, file);
+        pipe = pipe_load(dev, file, version_id);
         if (pipe == NULL) {
             return -EIO;
         }
@@ -1250,13 +1312,13 @@
 }
 
 /* initialize the trace device */
-void pipe_dev_init()
+void pipe_dev_init(bool newDeviceNaming)
 {
     PipeDevice *s;
 
     s = (PipeDevice *) g_malloc0(sizeof(*s));
 
-    s->dev.name = "qemu_pipe";
+    s->dev.name = newDeviceNaming ? "goldfish_pipe" : "qemu_pipe";
     s->dev.id = -1;
     s->dev.base = 0;       // will be allocated dynamically
     s->dev.size = 0x2000;
@@ -1265,8 +1327,13 @@
 
     goldfish_device_add(&s->dev, pipe_dev_readfn, pipe_dev_writefn, s);
 
-    register_savevm( "goldfish_pipe", 0, GOLDFISH_PIPE_SAVE_VERSION,
-                      goldfish_pipe_save, goldfish_pipe_load, s);
+    register_savevm(NULL,
+                    "goldfish_pipe",
+                    0,
+                    GOLDFISH_PIPE_SAVE_VERSION,
+                    goldfish_pipe_save,
+                    goldfish_pipe_load,
+                    s);
 
 #if DEBUG_ZERO_PIPE
     goldfish_pipe_add_type("zero", NULL, &zeroPipe_funcs);
@@ -1286,7 +1353,7 @@
     Pipe** lookup;
     PipeDevice*  dev = pipe->device;
 
-    DD("%s: channel=0x%x flags=%d", __FUNCTION__, pipe->channel, flags);
+    DD("%s: channel=0x%llx flags=%d", __FUNCTION__, (unsigned long long)pipe->channel, flags);
 
     /* If not already there, add to the list of signaled pipes */
     lookup = pipe_list_findp_waked(&dev->signaled_pipes, pipe);
@@ -1306,7 +1373,7 @@
 {
     Pipe* pipe = hwpipe;
 
-    D("%s: channel=0x%x (closed=%d)", __FUNCTION__, pipe->channel, pipe->closed);
+    D("%s: channel=0x%llx (closed=%d)", __FUNCTION__, (unsigned long long)pipe->channel, pipe->closed);
 
     if (!pipe->closed) {
         pipe->closed = 1;
diff --git a/hw/android/goldfish/switch.c b/hw/android/goldfish/switch.c
deleted file mode 100644
index 5f5eaae..0000000
--- a/hw/android/goldfish/switch.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/* Copyright (C) 2007-2008 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-#include "migration/qemu-file.h"
-#include "hw/android/goldfish/device.h"
-#include "hw/android/goldfish/vmem.h"
-#include "hw/hw.h"
-
-enum {
-    SW_NAME_LEN     = 0x00,
-    SW_NAME_PTR     = 0x04,
-    SW_FLAGS        = 0x08,
-    SW_STATE        = 0x0c,
-    SW_INT_STATUS   = 0x10,
-    SW_INT_ENABLE   = 0x14,
-
-    SW_FLAGS_OUTPUT = 1U << 0
-};
-
-
-struct switch_state {
-    struct goldfish_device dev;
-    char *name;
-    uint32_t state;
-    uint32_t state_changed : 1;
-    uint32_t int_enable : 1;
-    uint32_t (*writefn)(void *opaque, uint32_t state);
-    void *writeopaque;
-};
-
-#define  GOLDFISH_SWITCH_SAVE_VERSION  1
-
-static void  goldfish_switch_save(QEMUFile*  f, void*  opaque)
-{
-    struct switch_state*  s = opaque;
-
-    qemu_put_be32(f, s->state);
-    qemu_put_byte(f, s->state_changed);
-    qemu_put_byte(f, s->int_enable);
-}
-
-static int  goldfish_switch_load(QEMUFile*  f, void*  opaque, int  version_id)
-{
-    struct switch_state*  s = opaque;
-
-    if (version_id != GOLDFISH_SWITCH_SAVE_VERSION)
-        return -1;
-
-    s->state         = qemu_get_be32(f);
-    s->state_changed = qemu_get_byte(f);
-    s->int_enable    = qemu_get_byte(f);
-
-    return 0;
-}
-
-static uint32_t goldfish_switch_read(void *opaque, hwaddr offset)
-{
-    struct switch_state *s = (struct switch_state *)opaque;
-
-    //printf("goldfish_switch_read %x %x\n", offset, size);
-
-    switch (offset) {
-        case SW_NAME_LEN:
-            return strlen(s->name);
-        case SW_FLAGS:
-            return s->writefn ? SW_FLAGS_OUTPUT : 0;
-        case SW_STATE:
-            return s->state;
-        case SW_INT_STATUS:
-            if(s->state_changed && s->int_enable) {
-                s->state_changed = 0;
-                goldfish_device_set_irq(&s->dev, 0, 0);
-                return 1;
-            }
-            return 0;
-    default:
-        cpu_abort (cpu_single_env, "goldfish_switch_read: Bad offset %x\n", offset);
-        return 0;
-    }
-}
-
-static void goldfish_switch_write(void *opaque, hwaddr offset, uint32_t value)
-{
-    struct switch_state *s = (struct switch_state *)opaque;
-
-    //printf("goldfish_switch_read %x %x %x\n", offset, value, size);
-
-    switch(offset) {
-        case SW_NAME_PTR:
-            safe_memory_rw_debug(cpu_single_env, value, (void*)s->name, strlen(s->name), 1);
-            break;
-
-        case SW_STATE:
-            if(s->writefn) {
-                uint32_t new_state;
-                new_state = s->writefn(s->writeopaque, value);
-                if(new_state != s->state) {
-                    goldfish_switch_set_state(s, new_state);
-                }
-            }
-            else
-                cpu_abort (cpu_single_env, "goldfish_switch_write: write to SW_STATE on input\n");
-            break;
-
-        case SW_INT_ENABLE:
-            value &= 1;
-            if(s->state_changed && s->int_enable != value)
-                goldfish_device_set_irq(&s->dev, 0, value);
-            s->int_enable = value;
-            break;
-
-        default:
-            cpu_abort (cpu_single_env, "goldfish_switch_write: Bad offset %x\n", offset);
-    }
-}
-
-static CPUReadMemoryFunc *goldfish_switch_readfn[] = {
-    goldfish_switch_read,
-    goldfish_switch_read,
-    goldfish_switch_read
-};
-
-static CPUWriteMemoryFunc *goldfish_switch_writefn[] = {
-    goldfish_switch_write,
-    goldfish_switch_write,
-    goldfish_switch_write
-};
-
-void goldfish_switch_set_state(void *opaque, uint32_t state)
-{
-    struct switch_state *s = opaque;
-    s->state_changed = 1;
-    s->state = state;
-    if(s->int_enable)
-        goldfish_device_set_irq(&s->dev, 0, 1);
-}
-
-void *goldfish_switch_add(char *name, uint32_t (*writefn)(void *opaque, uint32_t state), void *writeopaque, int id)
-{
-    int ret;
-    struct switch_state *s;
-
-    s = g_malloc0(sizeof(*s));
-    s->dev.name = "goldfish-switch";
-    s->dev.id = id;
-    s->dev.size = 0x1000;
-    s->dev.irq_count = 1;
-    s->name = name;
-    s->writefn = writefn;
-    s->writeopaque = writeopaque;
-
-
-    ret = goldfish_device_add(&s->dev, goldfish_switch_readfn, goldfish_switch_writefn, s);
-    if(ret) {
-        g_free(s);
-        return NULL;
-    }
-
-    register_savevm( "goldfish_switch", 0, GOLDFISH_SWITCH_SAVE_VERSION,
-                     goldfish_switch_save, goldfish_switch_load, s);
-
-    return s;
-}
-
diff --git a/hw/android/goldfish/timer.c b/hw/android/goldfish/timer.c
index 3d46db0..b8921e5 100644
--- a/hw/android/goldfish/timer.c
+++ b/hw/android/goldfish/timer.c
@@ -238,11 +238,21 @@
     timer_state.dev.irq = timerirq;
     timer_state.timer = timer_new(QEMU_CLOCK_VIRTUAL, SCALE_NS, goldfish_timer_tick, &timer_state);
     goldfish_device_add(&timer_state.dev, goldfish_timer_readfn, goldfish_timer_writefn, &timer_state);
-    register_savevm( "goldfish_timer", 0, GOLDFISH_TIMER_SAVE_VERSION,
-                     goldfish_timer_save, goldfish_timer_load, &timer_state);
+    register_savevm(NULL,
+                    "goldfish_timer",
+                    0,
+                    GOLDFISH_TIMER_SAVE_VERSION,
+                    goldfish_timer_save,
+                    goldfish_timer_load,
+                    &timer_state);
 
     goldfish_device_add(&rtc_state.dev, goldfish_rtc_readfn, goldfish_rtc_writefn, &rtc_state);
-    register_savevm( "goldfish_rtc", 0, GOLDFISH_RTC_SAVE_VERSION,
-                     goldfish_rtc_save, goldfish_rtc_load, &rtc_state);
+    register_savevm(NULL,
+                    "goldfish_rtc",
+                    0,
+                    GOLDFISH_RTC_SAVE_VERSION,
+                    goldfish_rtc_save,
+                    goldfish_rtc_load,
+                    &rtc_state);
 }
 
diff --git a/hw/android/goldfish/trace.c b/hw/android/goldfish/trace.c
index d9f550f..27fada5 100644
--- a/hw/android/goldfish/trace.c
+++ b/hw/android/goldfish/trace.c
@@ -17,10 +17,10 @@
 #include "hw/android/goldfish/trace.h"
 #include "hw/android/goldfish/vmem.h"
 #include "sysemu/sysemu.h"
-#ifdef CONFIG_MEMCHECK
-#include "memcheck/memcheck.h"
-#include "memcheck/memcheck_util.h"
-#endif  // CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
+#include "android/qemu/memcheck/memcheck.h"
+#include "android/qemu/memcheck/memcheck_util.h"
+#endif  // CONFIG_ANDROID_MEMCHECK
 
 /* Set to 1 to debug tracing */
 #define DEBUG   0
@@ -76,11 +76,11 @@
         if (trace_filename != NULL) {
             D("QEMU.trace: kernel, context switch %u\n", value);
         }
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
         if (memcheck_enabled) {
             memcheck_switch(value);
         }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
         tid = (unsigned) value;
         break;
     case TRACE_DEV_REG_TGID:    // save the tgid for the following fork/clone
@@ -95,22 +95,22 @@
         if (trace_filename != NULL) {
             D("QEMU.trace: kernel, fork %u\n", value);
         }
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
         if (memcheck_enabled) {
             memcheck_fork(tgid, value);
         }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
         break;
     case TRACE_DEV_REG_CLONE:    // fork, clone new pid (i.e. thread)
         DPID("QEMU.trace: clone (pid=%d tgid=%d value=%d)\n", pid, tgid, value);
         if (trace_filename != NULL) {
             D("QEMU.trace: kernel, clone %u\n", value);
         }
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
         if (memcheck_enabled) {
             memcheck_clone(tgid, value);
         }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
         break;
     case TRACE_DEV_REG_EXECVE_VMSTART:  // execve, vstart
         vstart = value;
@@ -127,7 +127,7 @@
             D("QEMU.trace: kernel, init exec [%lx,%lx]@%lx [%s]\n",
               vstart, vend, eoff, exec_path);
         }
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
         if (memcheck_enabled) {
             if (exec_path[0] == '\0') {
                 // vstrcpy may fail to copy path. In this case lets do it
@@ -136,22 +136,22 @@
             }
             memcheck_mmap_exepath(vstart, vend, eoff, exec_path);
         }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
         exec_path[0] = 0;
         break;
     case TRACE_DEV_REG_CMDLINE_LEN:     // execve, process cmdline length
         cmdlen = value;
         break;
     case TRACE_DEV_REG_CMDLINE:         // execve, process cmdline
-        safe_memory_rw_debug(cpu_single_env, value, (uint8_t*)exec_arg, cmdlen, 0);
+        safe_memory_rw_debug(current_cpu, value, (uint8_t*)exec_arg, cmdlen, 0);
         if (trace_filename != NULL) {
             D("QEMU.trace: kernel, execve [%.*s]\n", cmdlen, exec_arg);
         }
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
         if (memcheck_enabled) {
             memcheck_set_cmd_line(exec_arg, cmdlen);
         }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
 #if DEBUG || DEBUG_PID
         if (trace_filename != NULL) {
             int i;
@@ -168,11 +168,11 @@
         if (trace_filename != NULL) {
             D("QEMU.trace: kernel, exit %x\n", value);
         }
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
         if (memcheck_enabled) {
             memcheck_exit(value);
         }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
         break;
     case TRACE_DEV_REG_NAME:            // record thread name
         vstrcpy(value, exec_path, CLIENT_PAGE_SIZE);
@@ -193,7 +193,7 @@
         if (trace_filename != NULL) {
             D("QEMU.trace: kernel, mmap [%lx,%lx]@%lx [%s]\n", vstart, vend, eoff, exec_path);
         }
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
         if (memcheck_enabled) {
             if (exec_path[0] == '\0') {
                 // vstrcpy may fail to copy path. In this case lets do it
@@ -202,17 +202,17 @@
             }
             memcheck_mmap_exepath(vstart, vend, eoff, exec_path);
         }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
         exec_path[0] = 0;
         break;
     case TRACE_DEV_REG_INIT_PID:        // init, name the pid that starts before device registered
         pid = value;
         DPID("QEMU.trace: pid=%d\n", value);
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
         if (memcheck_enabled) {
             memcheck_init_pid(value);
         }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
         break;
     case TRACE_DEV_REG_INIT_NAME:       // init, the comm of the init pid
         vstrcpy(value, exec_path, CLIENT_PAGE_SIZE);
@@ -253,7 +253,7 @@
 
     case TRACE_DEV_REG_STOP_EMU:        // stop the VM execution
         cpu_single_env->exception_index = EXCP_HLT;
-        cpu_single_env->halted = 1;
+        current_cpu->halted = 1;
         qemu_system_shutdown_request();
         cpu_loop_exit(cpu_single_env);
         break;
@@ -265,11 +265,11 @@
         unmap_start = value;
         break;
     case TRACE_DEV_REG_UNMAP_END:
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
         if (memcheck_enabled) {
             memcheck_unmap(unmap_start, value);
         }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
         break;
 
     case TRACE_DEV_REG_METHOD_ENTRY:
@@ -286,7 +286,7 @@
         }
         break;
 
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
     case TRACE_DEV_REG_MALLOC:
         if (memcheck_enabled) {
             memcheck_guest_alloc(value);
@@ -316,7 +316,7 @@
             memcheck_guest_print_str(value);
         }
         break;
-#endif // CONFIG_MEMCHECK
+#endif // CONFIG_ANDROID_MEMCHECK
 
     default:
         if (offset < 4096) {
diff --git a/hw/android/goldfish/tty.c b/hw/android/goldfish/tty.c
index 7b0823f..7f032b1 100644
--- a/hw/android/goldfish/tty.c
+++ b/hw/android/goldfish/tty.c
@@ -22,6 +22,7 @@
 
     TTY_DATA_PTR       = 0x10,
     TTY_DATA_LEN       = 0x14,
+    TTY_DATA_PTR_HIGH  = 0x18,
 
     TTY_CMD_INT_DISABLE    = 0,
     TTY_CMD_INT_ENABLE     = 1,
@@ -32,20 +33,20 @@
 struct tty_state {
     struct goldfish_device dev;
     CharDriverState *cs;
-    uint32_t ptr;
+    uint64_t ptr;
     uint32_t ptr_len;
     uint32_t ready;
     uint8_t data[128];
     uint32_t data_count;
 };
 
-#define  GOLDFISH_TTY_SAVE_VERSION  1
+#define  GOLDFISH_TTY_SAVE_VERSION  2
 
 static void  goldfish_tty_save(QEMUFile*  f, void*  opaque)
 {
     struct tty_state*  s = opaque;
 
-    qemu_put_be32( f, s->ptr );
+    qemu_put_be64( f, s->ptr );
     qemu_put_be32( f, s->ptr_len );
     qemu_put_byte( f, s->ready );
     qemu_put_byte( f, s->data_count );
@@ -56,10 +57,15 @@
 {
     struct tty_state*  s = opaque;
 
-    if (version_id != GOLDFISH_TTY_SAVE_VERSION)
+    if ((version_id != GOLDFISH_TTY_SAVE_VERSION) && 
+        (version_id != (GOLDFISH_TTY_SAVE_VERSION - 1))) {
         return -1;
-
-    s->ptr        = qemu_get_be32(f);
+    }
+    if (version_id == (GOLDFISH_TTY_SAVE_VERSION - 1)) {
+        s->ptr    = (uint64_t)qemu_get_be32(f);
+    } else {
+        s->ptr    = qemu_get_be64(f);
+    }
     s->ptr_len    = qemu_get_be32(f);
     s->ready      = qemu_get_byte(f);
     s->data_count = qemu_get_byte(f);
@@ -87,7 +93,7 @@
 {
     struct tty_state *s = (struct tty_state *)opaque;
 
-    //printf("goldfish_tty_read %x %x %x\n", offset, value, size);
+    //printf("goldfish_tty_write %x %x %x\n", offset, value, size);
 
     switch(offset) {
         case TTY_PUT_CHAR: {
@@ -117,7 +123,7 @@
                 case TTY_CMD_WRITE_BUFFER:
                     if(s->cs) {
                         int len;
-                        hwaddr  buf;
+                        target_ulong  buf;
 
                         buf = s->ptr;
                         len = s->ptr_len;
@@ -128,20 +134,20 @@
                             if (to_write > len)
                                 to_write = len;
 
-                            safe_memory_rw_debug(cpu_single_env, buf, (uint8_t*)temp, to_write, 0);
+                            safe_memory_rw_debug(current_cpu, buf, (uint8_t*)temp, to_write, 0);
                             qemu_chr_write(s->cs, (const uint8_t*)temp, to_write);
                             buf += to_write;
                             len -= to_write;
                         }
-                        //printf("goldfish_tty_write: got %d bytes from %x\n", s->ptr_len, s->ptr);
+                        //printf("goldfish_tty_write: got %d bytes from %llx\n", s->ptr_len, (unsigned long long)s->ptr);
                     }
                     break;
 
                 case TTY_CMD_READ_BUFFER:
                     if(s->ptr_len > s->data_count)
                         cpu_abort (cpu_single_env, "goldfish_tty_write: reading more data than available %d %d\n", s->ptr_len, s->data_count);
-                    safe_memory_rw_debug(cpu_single_env,s->ptr, s->data, s->ptr_len,1);
-                    //printf("goldfish_tty_write: read %d bytes to %x\n", s->ptr_len, s->ptr);
+                    safe_memory_rw_debug(current_cpu, s->ptr, s->data, s->ptr_len,1);
+                    //printf("goldfish_tty_write: read %d bytes to %llx\n", s->ptr_len, (unsigned long long)s->ptr);
                     if(s->data_count > s->ptr_len)
                         memmove(s->data, s->data + s->ptr_len, s->data_count - s->ptr_len);
                     s->data_count -= s->ptr_len;
@@ -155,7 +161,11 @@
             break;
 
         case TTY_DATA_PTR:
-            s->ptr = value;
+            uint64_set_low(&s->ptr, value);
+            break;
+
+        case TTY_DATA_PTR_HIGH:
+            uint64_set_high(&s->ptr, value);
             break;
 
         case TTY_DATA_LEN:
@@ -219,8 +229,13 @@
     if(ret) {
         g_free(s);
     } else {
-        register_savevm( "goldfish_tty", instance_id++, GOLDFISH_TTY_SAVE_VERSION,
-                         goldfish_tty_save, goldfish_tty_load, s);
+        register_savevm(NULL,
+                        "goldfish_tty",
+                        instance_id++,
+                        GOLDFISH_TTY_SAVE_VERSION,
+                        goldfish_tty_save,
+                        goldfish_tty_load,
+                        s);
     }
     return ret;
 }
diff --git a/hw/android/goldfish/vmem.c b/hw/android/goldfish/vmem.c
index 95c6a05..0d7c524 100644
--- a/hw/android/goldfish/vmem.c
+++ b/hw/android/goldfish/vmem.c
@@ -26,22 +26,24 @@
 // and on AMD some of those ioctls (in particular KVM_GET_MSRS) are 10 to 100x
 // slower than on Intel chips.
 
-int safe_memory_rw_debug(CPUOldState *env, target_ulong addr, uint8_t *buf,
+int safe_memory_rw_debug(CPUState *cpu, target_ulong addr, uint8_t *buf,
                          int len, int is_write)
 {
 #ifdef TARGET_I386
     if (kvm_enabled()) {
-        kvm_get_sregs(env);
+        kvm_get_sregs(cpu);
     }
 #endif
-    return cpu_memory_rw_debug(env, addr, buf, len, is_write);
+    return cpu_memory_rw_debug(cpu, addr, buf, len, is_write);
 }
 
-hwaddr safe_get_phys_page_debug(CPUOldState *env, target_ulong addr)
+hwaddr safe_get_phys_page_debug(CPUState *cpu, target_ulong addr)
 {
+    CPUArchState *env = cpu->env_ptr;
+
 #ifdef TARGET_I386
     if (kvm_enabled()) {
-        kvm_get_sregs(env);
+        kvm_get_sregs(cpu);
     }
 #endif
     return cpu_get_phys_page_debug(env, addr);
diff --git a/hw/arm/arm_gic.c b/hw/arm/arm_gic.c
index 1f5af8b..9eae169 100644
--- a/hw/arm/arm_gic.c
+++ b/hw/arm/arm_gic.c
@@ -728,5 +728,11 @@
     s->iomemtype = cpu_register_io_memory(gic_dist_readfn,
                                           gic_dist_writefn, s);
     gic_reset(s);
-    register_savevm("arm_gic", -1, 1, gic_save, gic_load, s);
+    register_savevm(NULL,
+                    "arm_gic",
+                    -1,
+                    1,
+                    gic_save,
+                    gic_load,
+                    s);
 }
diff --git a/hw/arm/armv7m_nvic.c b/hw/arm/armv7m_nvic.c
index c2c239b..7ca353b 100644
--- a/hw/arm/armv7m_nvic.c
+++ b/hw/arm/armv7m_nvic.c
@@ -397,7 +397,7 @@
     gic_init(&s->gic);
     cpu_register_physical_memory(0xe000e000, 0x1000, s->gic.iomemtype);
     s->systick.timer = timer_new(QEMU_CLOCK_VIRTUAL, SCALE_NS, systick_timer_tick, s);
-    register_savevm("armv7m_nvic", -1, 1, nvic_save, nvic_load, s);
+    register_savevm(NULL, "armv7m_nvic", -1, 1, nvic_save, nvic_load, s);
 }
 
 static void armv7m_nvic_register_devices(void)
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index b6ab870..4c90148 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -46,9 +46,9 @@
 {
     CPUARMState *env = opaque;
 
-    cpu_reset(env);
+    cpu_reset(ENV_GET_CPU(env));
     if (env->boot_info)
-        arm_load_kernel(env, env->boot_info);
+        arm_load_kernel(env, (struct arm_boot_info*)env->boot_info);
 
     /* TODO:  Reset secondary CPUs.  */
 }
@@ -58,7 +58,7 @@
     p += 4;                       \
 } while (0)
 
-static void set_kernel_args(struct arm_boot_info *info,
+static void set_kernel_args(const struct arm_boot_info *info,
                 int initrd_size, hwaddr base)
 {
     hwaddr p;
@@ -111,7 +111,7 @@
     WRITE_WORD(p, 0);
 }
 
-static void set_kernel_args_old(struct arm_boot_info *info,
+static void set_kernel_args_old(const struct arm_boot_info *info,
                 int initrd_size, hwaddr base)
 {
     hwaddr p;
@@ -190,6 +190,7 @@
     int initrd_size;
     int n;
     int is_linux = 0;
+    int nb_cpus;
     uint64_t elf_entry;
     hwaddr entry;
 
@@ -199,8 +200,7 @@
         exit(1);
     }
 
-    if (info->nb_cpus == 0)
-        info->nb_cpus = 1;
+    nb_cpus = info->nb_cpus ? info->nb_cpus : 1;
     env->boot_info = info;
 
     /* Assume that raw images are linux kernels, and ELF images are not.  */
@@ -246,7 +246,7 @@
         for (n = 0; n < sizeof(bootloader) / 4; n++) {
             stl_phys_notdirty(info->loader_start + (n * 4), bootloader[n]);
         }
-        if (info->nb_cpus > 1) {
+        if (nb_cpus > 1) {
             for (n = 0; n < sizeof(smpboot) / 4; n++) {
                 stl_phys_notdirty(info->smp_loader_start + (n * 4), smpboot[n]);
             }
@@ -256,6 +256,6 @@
         else
             set_kernel_args(info, initrd_size, info->loader_start);
     }
-    info->is_linux = is_linux;
+    //info->is_linux = is_linux;
     qemu_register_reset(main_cpu_reset, 0, env);
 }
diff --git a/hw/arm/pic.c b/hw/arm/pic.c
index d542a82..8915f9e 100644
--- a/hw/arm/pic.c
+++ b/hw/arm/pic.c
@@ -25,18 +25,19 @@
 static void arm_pic_cpu_handler(void *opaque, int irq, int level)
 {
     CPUOldState *env = (CPUOldState *)opaque;
+    CPUState *cpu = ENV_GET_CPU(env);
     switch (irq) {
     case ARM_PIC_CPU_IRQ:
         if (level)
-            cpu_interrupt(env, CPU_INTERRUPT_HARD);
+            cpu_interrupt(cpu, CPU_INTERRUPT_HARD);
         else
-            cpu_reset_interrupt(env, CPU_INTERRUPT_HARD);
+            cpu_reset_interrupt(cpu, CPU_INTERRUPT_HARD);
         break;
     case ARM_PIC_CPU_FIQ:
         if (level)
-            cpu_interrupt(env, CPU_INTERRUPT_FIQ);
+            cpu_interrupt(cpu, CPU_INTERRUPT_FIQ);
         else
-            cpu_reset_interrupt(env, CPU_INTERRUPT_FIQ);
+            cpu_reset_interrupt(cpu, CPU_INTERRUPT_FIQ);
         break;
     default:
         hw_error("arm_pic_cpu_handler: Bad interrput line %d\n", irq);
diff --git a/hw/block/cdrom.c b/hw/block/cdrom.c
deleted file mode 100644
index b938234..0000000
--- a/hw/block/cdrom.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * QEMU ATAPI CD-ROM Emulator
- *
- * Copyright (c) 2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-/* ??? Most of the ATAPI emulation is still in ide.c.  It should be moved
-   here.  */
-
-#include "qemu-common.h"
-#include "hw/scsi/scsi-disk.h"
-
-static void lba_to_msf(uint8_t *buf, int lba)
-{
-    lba += 150;
-    buf[0] = (lba / 75) / 60;
-    buf[1] = (lba / 75) % 60;
-    buf[2] = lba % 75;
-}
-
-/* same toc as bochs. Return -1 if error or the toc length */
-/* XXX: check this */
-int cdrom_read_toc(int nb_sectors, uint8_t *buf, int msf, int start_track)
-{
-    uint8_t *q;
-    int len;
-
-    if (start_track > 1 && start_track != 0xaa)
-        return -1;
-    q = buf + 2;
-    *q++ = 1; /* first session */
-    *q++ = 1; /* last session */
-    if (start_track <= 1) {
-        *q++ = 0; /* reserved */
-        *q++ = 0x14; /* ADR, control */
-        *q++ = 1;    /* track number */
-        *q++ = 0; /* reserved */
-        if (msf) {
-            *q++ = 0; /* reserved */
-            lba_to_msf(q, 0);
-            q += 3;
-        } else {
-            /* sector 0 */
-            stl_be_p(q, 0);
-            q += 4;
-        }
-    }
-    /* lead out track */
-    *q++ = 0; /* reserved */
-    *q++ = 0x16; /* ADR, control */
-    *q++ = 0xaa; /* track number */
-    *q++ = 0; /* reserved */
-    if (msf) {
-        *q++ = 0; /* reserved */
-        lba_to_msf(q, nb_sectors);
-        q += 3;
-    } else {
-        stl_be_p(q, nb_sectors);
-        q += 4;
-    }
-    len = q - buf;
-    stw_be_p(buf, len - 2);
-    return len;
-}
-
-/* mostly same info as PearPc */
-int cdrom_read_toc_raw(int nb_sectors, uint8_t *buf, int msf, int session_num)
-{
-    uint8_t *q;
-    int len;
-
-    q = buf + 2;
-    *q++ = 1; /* first session */
-    *q++ = 1; /* last session */
-
-    *q++ = 1; /* session number */
-    *q++ = 0x14; /* data track */
-    *q++ = 0; /* track number */
-    *q++ = 0xa0; /* lead-in */
-    *q++ = 0; /* min */
-    *q++ = 0; /* sec */
-    *q++ = 0; /* frame */
-    *q++ = 0;
-    *q++ = 1; /* first track */
-    *q++ = 0x00; /* disk type */
-    *q++ = 0x00;
-
-    *q++ = 1; /* session number */
-    *q++ = 0x14; /* data track */
-    *q++ = 0; /* track number */
-    *q++ = 0xa1;
-    *q++ = 0; /* min */
-    *q++ = 0; /* sec */
-    *q++ = 0; /* frame */
-    *q++ = 0;
-    *q++ = 1; /* last track */
-    *q++ = 0x00;
-    *q++ = 0x00;
-
-    *q++ = 1; /* session number */
-    *q++ = 0x14; /* data track */
-    *q++ = 0; /* track number */
-    *q++ = 0xa2; /* lead-out */
-    *q++ = 0; /* min */
-    *q++ = 0; /* sec */
-    *q++ = 0; /* frame */
-    if (msf) {
-        *q++ = 0; /* reserved */
-        lba_to_msf(q, nb_sectors);
-        q += 3;
-    } else {
-        stl_be_p(q, nb_sectors);
-        q += 4;
-    }
-
-    *q++ = 1; /* session number */
-    *q++ = 0x14; /* ADR, control */
-    *q++ = 0;    /* track number */
-    *q++ = 1;    /* point */
-    *q++ = 0; /* min */
-    *q++ = 0; /* sec */
-    *q++ = 0; /* frame */
-    if (msf) {
-        *q++ = 0;
-        lba_to_msf(q, 0);
-        q += 3;
-    } else {
-        *q++ = 0;
-        *q++ = 0;
-        *q++ = 0;
-        *q++ = 0;
-    }
-
-    len = q - buf;
-    stw_be_p(buf, len - 2);
-    return len;
-}
diff --git a/hw/bt/core.c b/hw/bt/core.c
deleted file mode 100644
index 15062cc..0000000
--- a/hw/bt/core.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Convenience functions for bluetooth.
- *
- * Copyright (C) 2008 Andrzej Zaborowski  <balrog@zabor.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 or
- * (at your option) version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu-common.h"
-#include "net/net.h"
-#include "hw/bt.h"
-
-/* Slave implementations can ignore this */
-static void bt_dummy_lmp_mode_change(struct bt_link_s *link)
-{
-}
-
-/* Slaves should never receive these PDUs */
-static void bt_dummy_lmp_connection_complete(struct bt_link_s *link)
-{
-    if (link->slave->reject_reason)
-        fprintf(stderr, "%s: stray LMP_not_accepted received, fixme\n",
-                        __FUNCTION__);
-    else
-        fprintf(stderr, "%s: stray LMP_accepted received, fixme\n",
-                        __FUNCTION__);
-    exit(-1);
-}
-
-static void bt_dummy_lmp_disconnect_master(struct bt_link_s *link)
-{
-    fprintf(stderr, "%s: stray LMP_detach received, fixme\n", __FUNCTION__);
-    exit(-1);
-}
-
-static void bt_dummy_lmp_acl_resp(struct bt_link_s *link,
-                const uint8_t *data, int start, int len)
-{
-    fprintf(stderr, "%s: stray ACL response PDU, fixme\n", __FUNCTION__);
-    exit(-1);
-}
-
-/* Slaves that don't hold any additional per link state can use these */
-static void bt_dummy_lmp_connection_request(struct bt_link_s *req)
-{
-    struct bt_link_s *link = g_malloc0(sizeof(struct bt_link_s));
-
-    link->slave = req->slave;
-    link->host = req->host;
-
-    req->host->reject_reason = 0;
-    req->host->lmp_connection_complete(link);
-}
-
-static void bt_dummy_lmp_disconnect_slave(struct bt_link_s *link)
-{
-    g_free(link);
-}
-
-static void bt_dummy_destroy(struct bt_device_s *device)
-{
-    bt_device_done(device);
-    g_free(device);
-}
-
-static int bt_dev_idx = 0;
-
-void bt_device_init(struct bt_device_s *dev, struct bt_scatternet_s *net)
-{
-    memset(dev, 0, sizeof(*dev));
-    dev->inquiry_scan = 1;
-    dev->page_scan = 1;
-
-    dev->bd_addr.b[0] = bt_dev_idx & 0xff;
-    dev->bd_addr.b[1] = bt_dev_idx >> 8;
-    dev->bd_addr.b[2] = 0xd0;
-    dev->bd_addr.b[3] = 0xba;
-    dev->bd_addr.b[4] = 0xbe;
-    dev->bd_addr.b[5] = 0xba;
-    bt_dev_idx ++;
-
-    /* Simple slave-only devices need to implement only .lmp_acl_data */
-    dev->lmp_connection_complete = bt_dummy_lmp_connection_complete;
-    dev->lmp_disconnect_master = bt_dummy_lmp_disconnect_master;
-    dev->lmp_acl_resp = bt_dummy_lmp_acl_resp;
-    dev->lmp_mode_change = bt_dummy_lmp_mode_change;
-    dev->lmp_connection_request = bt_dummy_lmp_connection_request;
-    dev->lmp_disconnect_slave = bt_dummy_lmp_disconnect_slave;
-
-    dev->handle_destroy = bt_dummy_destroy;
-
-    dev->net = net;
-    dev->next = net->slave;
-    net->slave = dev;
-}
-
-void bt_device_done(struct bt_device_s *dev)
-{
-    struct bt_device_s **p = &dev->net->slave;
-
-    while (*p && *p != dev)
-        p = &(*p)->next;
-    if (*p != dev) {
-        fprintf(stderr, "%s: bad bt device \"%s\"\n", __FUNCTION__,
-                        dev->lmp_name ?: "(null)");
-        exit(-1);
-    }
-
-    *p = dev->next;
-}
diff --git a/hw/bt/hci-csr.c b/hw/bt/hci-csr.c
deleted file mode 100644
index 5f5198f..0000000
--- a/hw/bt/hci-csr.c
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * Bluetooth serial HCI transport.
- * CSR41814 HCI with H4p vendor extensions.
- *
- * Copyright (C) 2008 Andrzej Zaborowski  <balrog@zabor.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 or
- * (at your option) version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu-common.h"
-#include "sysemu/char.h"
-#include "qemu/timer.h"
-#include "hw/irq.h"
-#include "sysemu/sysemu.h"
-#include "net/net.h"
-#include "hw/bt.h"
-
-struct csrhci_s {
-    int enable;
-    qemu_irq *pins;
-    int pin_state;
-    int modem_state;
-    CharDriverState chr;
-#define FIFO_LEN	4096
-    int out_start;
-    int out_len;
-    int out_size;
-    uint8_t outfifo[FIFO_LEN * 2];
-    uint8_t inpkt[FIFO_LEN];
-    int in_len;
-    int in_hdr;
-    int in_data;
-    QEMUTimer *out_tm;
-    int64_t baud_delay;
-
-    bdaddr_t bd_addr;
-    struct HCIInfo *hci;
-};
-
-/* H4+ packet types */
-enum {
-    H4_CMD_PKT   = 1,
-    H4_ACL_PKT   = 2,
-    H4_SCO_PKT   = 3,
-    H4_EVT_PKT   = 4,
-    H4_NEG_PKT   = 6,
-    H4_ALIVE_PKT = 7,
-};
-
-/* CSR41814 negotiation start magic packet */
-static const uint8_t csrhci_neg_packet[] = {
-    H4_NEG_PKT, 10,
-    0x00, 0xa0, 0x01, 0x00, 0x00,
-    0x4c, 0x00, 0x96, 0x00, 0x00,
-};
-
-/* CSR41814 vendor-specific command OCFs */
-enum {
-    OCF_CSR_SEND_FIRMWARE = 0x000,
-};
-
-static inline void csrhci_fifo_wake(struct csrhci_s *s)
-{
-    if (!s->enable || !s->out_len)
-        return;
-
-    /* XXX: Should wait for s->modem_state & CHR_TIOCM_RTS? */
-    if (s->chr.chr_can_read && s->chr.chr_can_read(s->chr.handler_opaque) &&
-                    s->chr.chr_read) {
-        s->chr.chr_read(s->chr.handler_opaque,
-                        s->outfifo + s->out_start ++, 1);
-        s->out_len --;
-        if (s->out_start >= s->out_size) {
-            s->out_start = 0;
-            s->out_size = FIFO_LEN;
-        }
-    }
-
-    if (s->out_len)
-        timer_mod(s->out_tm, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->baud_delay);
-}
-
-#define csrhci_out_packetz(s, len) memset(csrhci_out_packet(s, len), 0, len)
-static uint8_t *csrhci_out_packet(struct csrhci_s *s, int len)
-{
-    int off = s->out_start + s->out_len;
-
-    /* TODO: do the padding here, i.e. align len */
-    s->out_len += len;
-
-    if (off < FIFO_LEN) {
-        if (off + len > FIFO_LEN && (s->out_size = off + len) > FIFO_LEN * 2) {
-            fprintf(stderr, "%s: can't alloc %i bytes\n", __FUNCTION__, len);
-            exit(-1);
-        }
-        return s->outfifo + off;
-    }
-
-    if (s->out_len > s->out_size) {
-        fprintf(stderr, "%s: can't alloc %i bytes\n", __FUNCTION__, len);
-        exit(-1);
-    }
-
-    return s->outfifo + off - s->out_size;
-}
-
-static inline uint8_t *csrhci_out_packet_csr(struct csrhci_s *s,
-                int type, int len)
-{
-    uint8_t *ret = csrhci_out_packetz(s, len + 2);
-
-    *ret ++ = type;
-    *ret ++ = len;
-
-    return ret;
-}
-
-static inline uint8_t *csrhci_out_packet_event(struct csrhci_s *s,
-                int evt, int len)
-{
-    uint8_t *ret = csrhci_out_packetz(s,
-                    len + 1 + sizeof(struct hci_event_hdr));
-
-    *ret ++ = H4_EVT_PKT;
-    ((struct hci_event_hdr *) ret)->evt = evt;
-    ((struct hci_event_hdr *) ret)->plen = len;
-
-    return ret + sizeof(struct hci_event_hdr);
-}
-
-static void csrhci_in_packet_vendor(struct csrhci_s *s, int ocf,
-                uint8_t *data, int len)
-{
-    int offset;
-    uint8_t *rpkt;
-
-    switch (ocf) {
-    case OCF_CSR_SEND_FIRMWARE:
-        /* Check if this is the bd_address packet */
-        if (len >= 18 + 8 && data[12] == 0x01 && data[13] == 0x00) {
-            offset = 18;
-            s->bd_addr.b[0] = data[offset + 7];	/* Beyond cmd packet end(!?) */
-            s->bd_addr.b[1] = data[offset + 6];
-            s->bd_addr.b[2] = data[offset + 4];
-            s->bd_addr.b[3] = data[offset + 0];
-            s->bd_addr.b[4] = data[offset + 3];
-            s->bd_addr.b[5] = data[offset + 2];
-
-            s->hci->bdaddr_set(s->hci, s->bd_addr.b);
-            fprintf(stderr, "%s: bd_address loaded from firmware: "
-                            "%02x:%02x:%02x:%02x:%02x:%02x\n", __FUNCTION__,
-                            s->bd_addr.b[0], s->bd_addr.b[1], s->bd_addr.b[2],
-                            s->bd_addr.b[3], s->bd_addr.b[4], s->bd_addr.b[5]);
-        }
-
-        rpkt = csrhci_out_packet_event(s, EVT_VENDOR, 11);
-        /* Status bytes: no error */
-        rpkt[9] = 0x00;
-        rpkt[10] = 0x00;
-        break;
-
-    default:
-        fprintf(stderr, "%s: got a bad CMD packet\n", __FUNCTION__);
-        return;
-    }
-
-    csrhci_fifo_wake(s);
-}
-
-static void csrhci_in_packet(struct csrhci_s *s, uint8_t *pkt)
-{
-    uint8_t *rpkt;
-    int opc;
-
-    switch (*pkt ++) {
-    case H4_CMD_PKT:
-        opc = le16_to_cpu(((struct hci_command_hdr *) pkt)->opcode);
-        if (cmd_opcode_ogf(opc) == OGF_VENDOR_CMD) {
-            csrhci_in_packet_vendor(s, cmd_opcode_ocf(opc),
-                            pkt + sizeof(struct hci_command_hdr),
-                            s->in_len - sizeof(struct hci_command_hdr) - 1);
-            return;
-        }
-
-        /* TODO: if the command is OCF_READ_LOCAL_COMMANDS or the likes,
-         * we need to send it to the HCI layer and then add our supported
-         * commands to the returned mask (such as OGF_VENDOR_CMD).  With
-         * bt-hci.c we could just have hooks for this kind of commands but
-         * we can't with bt-host.c.  */
-
-        s->hci->cmd_send(s->hci, pkt, s->in_len - 1);
-        break;
-
-    case H4_EVT_PKT:
-        goto bad_pkt;
-
-    case H4_ACL_PKT:
-        s->hci->acl_send(s->hci, pkt, s->in_len - 1);
-        break;
-
-    case H4_SCO_PKT:
-        s->hci->sco_send(s->hci, pkt, s->in_len - 1);
-        break;
-
-    case H4_NEG_PKT:
-        if (s->in_hdr != sizeof(csrhci_neg_packet) ||
-                        memcmp(pkt - 1, csrhci_neg_packet, s->in_hdr)) {
-            fprintf(stderr, "%s: got a bad NEG packet\n", __FUNCTION__);
-            return;
-        }
-        pkt += 2;
-
-        rpkt = csrhci_out_packet_csr(s, H4_NEG_PKT, 10);
-
-        *rpkt ++ = 0x20;	/* Operational settings negotation Ok */
-        memcpy(rpkt, pkt, 7); rpkt += 7;
-        *rpkt ++ = 0xff;
-        *rpkt = 0xff;
-        break;
-
-    case H4_ALIVE_PKT:
-        if (s->in_hdr != 4 || pkt[1] != 0x55 || pkt[2] != 0x00) {
-            fprintf(stderr, "%s: got a bad ALIVE packet\n", __FUNCTION__);
-            return;
-        }
-
-        rpkt = csrhci_out_packet_csr(s, H4_ALIVE_PKT, 2);
-
-        *rpkt ++ = 0xcc;
-        *rpkt = 0x00;
-        break;
-
-    default:
-    bad_pkt:
-        /* TODO: error out */
-        fprintf(stderr, "%s: got a bad packet\n", __FUNCTION__);
-        break;
-    }
-
-    csrhci_fifo_wake(s);
-}
-
-static int csrhci_header_len(const uint8_t *pkt)
-{
-    switch (pkt[0]) {
-    case H4_CMD_PKT:
-        return HCI_COMMAND_HDR_SIZE;
-    case H4_EVT_PKT:
-        return HCI_EVENT_HDR_SIZE;
-    case H4_ACL_PKT:
-        return HCI_ACL_HDR_SIZE;
-    case H4_SCO_PKT:
-        return HCI_SCO_HDR_SIZE;
-    case H4_NEG_PKT:
-        return pkt[1] + 1;
-    case H4_ALIVE_PKT:
-        return 3;
-    }
-
-    exit(-1);
-}
-
-static int csrhci_data_len(const uint8_t *pkt)
-{
-    switch (*pkt ++) {
-    case H4_CMD_PKT:
-        /* It seems that vendor-specific command packets for H4+ are all
-         * one byte longer than indicated in the standard header.  */
-        if (le16_to_cpu(((struct hci_command_hdr *) pkt)->opcode) == 0xfc00)
-            return (((struct hci_command_hdr *) pkt)->plen + 1) & ~1;
-
-        return ((struct hci_command_hdr *) pkt)->plen;
-    case H4_EVT_PKT:
-        return ((struct hci_event_hdr *) pkt)->plen;
-    case H4_ACL_PKT:
-        return le16_to_cpu(((struct hci_acl_hdr *) pkt)->dlen);
-    case H4_SCO_PKT:
-        return ((struct hci_sco_hdr *) pkt)->dlen;
-    case H4_NEG_PKT:
-    case H4_ALIVE_PKT:
-        return 0;
-    }
-
-    exit(-1);
-}
-
-static int csrhci_write(struct CharDriverState *chr,
-                const uint8_t *buf, int len)
-{
-    struct csrhci_s *s = (struct csrhci_s *) chr->opaque;
-    int plen = s->in_len;
-
-    if (!s->enable)
-        return 0;
-
-    s->in_len += len;
-    memcpy(s->inpkt + plen, buf, len);
-
-    while (1) {
-        if (s->in_len >= 2 && plen < 2)
-            s->in_hdr = csrhci_header_len(s->inpkt) + 1;
-
-        if (s->in_len >= s->in_hdr && plen < s->in_hdr)
-            s->in_data = csrhci_data_len(s->inpkt) + s->in_hdr;
-
-        if (s->in_len >= s->in_data) {
-            csrhci_in_packet(s, s->inpkt);
-
-            memmove(s->inpkt, s->inpkt + s->in_len, s->in_len - s->in_data);
-            s->in_len -= s->in_data;
-            s->in_hdr = INT_MAX;
-            s->in_data = INT_MAX;
-            plen = 0;
-        } else
-            break;
-    }
-
-    return len;
-}
-
-static void csrhci_out_hci_packet_event(void *opaque,
-                const uint8_t *data, int len)
-{
-    struct csrhci_s *s = (struct csrhci_s *) opaque;
-    uint8_t *pkt = csrhci_out_packet(s, (len + 2) & ~1);	/* Align */
-
-    *pkt ++ = H4_EVT_PKT;
-    memcpy(pkt, data, len);
-
-    csrhci_fifo_wake(s);
-}
-
-static void csrhci_out_hci_packet_acl(void *opaque,
-                const uint8_t *data, int len)
-{
-    struct csrhci_s *s = (struct csrhci_s *) opaque;
-    uint8_t *pkt = csrhci_out_packet(s, (len + 2) & ~1);	/* Align */
-
-    *pkt ++ = H4_ACL_PKT;
-    pkt[len & ~1] = 0;
-    memcpy(pkt, data, len);
-
-    csrhci_fifo_wake(s);
-}
-
-static int csrhci_ioctl(struct CharDriverState *chr, int cmd, void *arg)
-{
-    QEMUSerialSetParams *ssp;
-    struct csrhci_s *s = (struct csrhci_s *) chr->opaque;
-    int prev_state = s->modem_state;
-
-    switch (cmd) {
-    case CHR_IOCTL_SERIAL_SET_PARAMS:
-        ssp = (QEMUSerialSetParams *) arg;
-        s->baud_delay = get_ticks_per_sec() / ssp->speed;
-        /* Moments later... (but shorter than 100ms) */
-        s->modem_state |= CHR_TIOCM_CTS;
-        break;
-
-    case CHR_IOCTL_SERIAL_GET_TIOCM:
-        *(int *) arg = s->modem_state;
-        break;
-
-    case CHR_IOCTL_SERIAL_SET_TIOCM:
-        s->modem_state = *(int *) arg;
-        if (~s->modem_state & prev_state & CHR_TIOCM_RTS)
-            s->modem_state &= ~CHR_TIOCM_CTS;
-        break;
-
-    default:
-        return -ENOTSUP;
-    }
-    return 0;
-}
-
-static void csrhci_reset(struct csrhci_s *s)
-{
-    s->out_len = 0;
-    s->out_size = FIFO_LEN;
-    s->in_len = 0;
-    s->baud_delay = get_ticks_per_sec();
-    s->enable = 0;
-    s->in_hdr = INT_MAX;
-    s->in_data = INT_MAX;
-
-    s->modem_state = 0;
-    /* After a while... (but sooner than 10ms) */
-    s->modem_state |= CHR_TIOCM_CTS;
-
-    memset(&s->bd_addr, 0, sizeof(bdaddr_t));
-}
-
-static void csrhci_out_tick(void *opaque)
-{
-    csrhci_fifo_wake((struct csrhci_s *) opaque);
-}
-
-static void csrhci_pins(void *opaque, int line, int level)
-{
-    struct csrhci_s *s = (struct csrhci_s *) opaque;
-    int state = s->pin_state;
-
-    s->pin_state &= ~(1 << line);
-    s->pin_state |= (!!level) << line;
-
-    if ((state & ~s->pin_state) & (1 << csrhci_pin_reset)) {
-        /* TODO: Disappear from lower layers */
-        csrhci_reset(s);
-    }
-
-    if (s->pin_state == 3 && state != 3) {
-        s->enable = 1;
-        /* TODO: Wake lower layers up */
-    }
-}
-
-qemu_irq *csrhci_pins_get(CharDriverState *chr)
-{
-    struct csrhci_s *s = (struct csrhci_s *) chr->opaque;
-
-    return s->pins;
-}
-
-CharDriverState *uart_hci_init(qemu_irq wakeup)
-{
-    struct csrhci_s *s = (struct csrhci_s *)
-            g_malloc0(sizeof(struct csrhci_s));
-
-    s->chr.opaque = s;
-    s->chr.chr_write = csrhci_write;
-    s->chr.chr_ioctl = csrhci_ioctl;
-
-    s->hci = qemu_next_hci();
-    s->hci->opaque = s;
-    s->hci->evt_recv = csrhci_out_hci_packet_event;
-    s->hci->acl_recv = csrhci_out_hci_packet_acl;
-
-    s->out_tm = timer_new(QEMU_CLOCK_VIRTUAL, SCALE_NS, csrhci_out_tick, s);
-    s->pins = qemu_allocate_irqs(csrhci_pins, s, __csrhci_pins);
-    csrhci_reset(s);
-
-    return &s->chr;
-}
diff --git a/hw/bt/hci.c b/hw/bt/hci.c
deleted file mode 100644
index 2b36e56..0000000
--- a/hw/bt/hci.c
+++ /dev/null
@@ -1,2221 +0,0 @@
-/*
- * QEMU Bluetooth HCI logic.
- *
- * Copyright (C) 2007 OpenMoko, Inc.
- * Copyright (C) 2008 Andrzej Zaborowski  <balrog@zabor.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu-common.h"
-#include "qemu/timer.h"
-#include "hw/usb.h"
-#include "net/net.h"
-#include "hw/bt.h"
-
-struct bt_hci_s {
-    uint8_t *(*evt_packet)(void *opaque);
-    void (*evt_submit)(void *opaque, int len);
-    void *opaque;
-    uint8_t evt_buf[256];
-
-    uint8_t acl_buf[4096];
-    int acl_len;
-
-    uint16_t asb_handle;
-    uint16_t psb_handle;
-
-    int last_cmd;	/* Note: Always little-endian */
-
-    struct bt_device_s *conn_req_host;
-
-    struct {
-        int inquire;
-        int periodic;
-        int responses_left;
-        int responses;
-        QEMUTimer *inquiry_done;
-        QEMUTimer *inquiry_next;
-        int inquiry_length;
-        int inquiry_period;
-        int inquiry_mode;
-
-#define HCI_HANDLE_OFFSET	0x20
-#define HCI_HANDLES_MAX		0x10
-        struct bt_hci_master_link_s {
-            struct bt_link_s *link;
-            void (*lmp_acl_data)(struct bt_link_s *link,
-                            const uint8_t *data, int start, int len);
-            QEMUTimer *acl_mode_timer;
-        } handle[HCI_HANDLES_MAX];
-        uint32_t role_bmp;
-        int last_handle;
-        int connecting;
-        bdaddr_t awaiting_bdaddr[HCI_HANDLES_MAX];
-    } lm;
-
-    uint8_t event_mask[8];
-    uint16_t voice_setting;	/* Notw: Always little-endian */
-    uint16_t conn_accept_tout;
-    QEMUTimer *conn_accept_timer;
-
-    struct HCIInfo info;
-    struct bt_device_s device;
-};
-
-#define DEFAULT_RSSI_DBM	20
-
-#define hci_from_info(ptr)	container_of((ptr), struct bt_hci_s, info)
-#define hci_from_device(ptr)	container_of((ptr), struct bt_hci_s, device)
-
-struct bt_hci_link_s {
-    struct bt_link_s btlink;
-    uint16_t handle;	/* Local */
-};
-
-/* LMP layer emulation */
-#if 0
-static void bt_submit_lmp(struct bt_device_s *bt, int length, uint8_t *data)
-{
-    int resp, resplen, error, op, tr;
-    uint8_t respdata[17];
-
-    if (length < 1)
-        return;
-
-    tr = *data & 1;
-    op = *(data ++) >> 1;
-    resp = LMP_ACCEPTED;
-    resplen = 2;
-    respdata[1] = op;
-    error = 0;
-    length --;
-
-    if (op >= 0x7c) {	/* Extended opcode */
-        op |= *(data ++) << 8;
-        resp = LMP_ACCEPTED_EXT;
-        resplen = 4;
-        respdata[0] = op >> 8;
-        respdata[1] = op & 0xff;
-        length --;
-    }
-
-    switch (op) {
-    case LMP_ACCEPTED:
-        /* data[0]	Op code
-         */
-        if (length < 1) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        resp = 0;
-        break;
-
-    case LMP_ACCEPTED_EXT:
-        /* data[0]	Escape op code
-         * data[1]	Extended op code
-         */
-        if (length < 2) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        resp = 0;
-        break;
-
-    case LMP_NOT_ACCEPTED:
-        /* data[0]	Op code
-         * data[1]	Error code
-         */
-        if (length < 2) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        resp = 0;
-        break;
-
-    case LMP_NOT_ACCEPTED_EXT:
-        /* data[0]	Op code
-         * data[1]	Extended op code
-         * data[2]	Error code
-         */
-        if (length < 3) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        resp = 0;
-        break;
-
-    case LMP_HOST_CONNECTION_REQ:
-        break;
-
-    case LMP_SETUP_COMPLETE:
-        resp = LMP_SETUP_COMPLETE;
-        resplen = 1;
-        bt->setup = 1;
-        break;
-
-    case LMP_DETACH:
-        /* data[0]	Error code
-         */
-        if (length < 1) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        bt->setup = 0;
-        resp = 0;
-        break;
-
-    case LMP_SUPERVISION_TIMEOUT:
-        /* data[0,1]	Supervision timeout
-         */
-        if (length < 2) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        resp = 0;
-        break;
-
-    case LMP_QUALITY_OF_SERVICE:
-        resp = 0;
-        /* Fall through */
-    case LMP_QOS_REQ:
-        /* data[0,1]	Poll interval
-         * data[2]	N(BC)
-         */
-        if (length < 3) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        break;
-
-    case LMP_MAX_SLOT:
-        resp = 0;
-        /* Fall through */
-    case LMP_MAX_SLOT_REQ:
-        /* data[0]	Max slots
-         */
-        if (length < 1) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        break;
-
-    case LMP_AU_RAND:
-    case LMP_IN_RAND:
-    case LMP_COMB_KEY:
-        /* data[0-15]	Random number
-         */
-        if (length < 16) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        if (op == LMP_AU_RAND) {
-            if (bt->key_present) {
-                resp = LMP_SRES;
-                resplen = 5;
-                /* XXX: [Part H] Section 6.1 on page 801 */
-            } else {
-                error = HCI_PIN_OR_KEY_MISSING;
-                goto not_accepted;
-            }
-        } else if (op == LMP_IN_RAND) {
-            error = HCI_PAIRING_NOT_ALLOWED;
-            goto not_accepted;
-        } else {
-            /* XXX: [Part H] Section 3.2 on page 779 */
-            resp = LMP_UNIT_KEY;
-            resplen = 17;
-            memcpy(respdata + 1, bt->key, 16);
-
-            error = HCI_UNIT_LINK_KEY_USED;
-            goto not_accepted;
-        }
-        break;
-
-    case LMP_UNIT_KEY:
-        /* data[0-15]	Key
-         */
-        if (length < 16) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        memcpy(bt->key, data, 16);
-        bt->key_present = 1;
-        break;
-
-    case LMP_SRES:
-        /* data[0-3]	Authentication response
-         */
-        if (length < 4) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        break;
-
-    case LMP_CLKOFFSET_REQ:
-        resp = LMP_CLKOFFSET_RES;
-        resplen = 3;
-        respdata[1] = 0x33;
-        respdata[2] = 0x33;
-        break;
-
-    case LMP_CLKOFFSET_RES:
-        /* data[0,1]	Clock offset
-         * (Slave to master only)
-         */
-        if (length < 2) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        break;
-
-    case LMP_VERSION_REQ:
-    case LMP_VERSION_RES:
-        /* data[0]	VersNr
-         * data[1,2]	CompId
-         * data[3,4]	SubVersNr
-         */
-        if (length < 5) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        if (op == LMP_VERSION_REQ) {
-            resp = LMP_VERSION_RES;
-            resplen = 6;
-            respdata[1] = 0x20;
-            respdata[2] = 0xff;
-            respdata[3] = 0xff;
-            respdata[4] = 0xff;
-            respdata[5] = 0xff;
-        } else
-            resp = 0;
-        break;
-
-    case LMP_FEATURES_REQ:
-    case LMP_FEATURES_RES:
-        /* data[0-7]	Features
-         */
-        if (length < 8) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        if (op == LMP_FEATURES_REQ) {
-            resp = LMP_FEATURES_RES;
-            resplen = 9;
-            respdata[1] = (bt->lmp_caps >> 0) & 0xff;
-            respdata[2] = (bt->lmp_caps >> 8) & 0xff;
-            respdata[3] = (bt->lmp_caps >> 16) & 0xff;
-            respdata[4] = (bt->lmp_caps >> 24) & 0xff;
-            respdata[5] = (bt->lmp_caps >> 32) & 0xff;
-            respdata[6] = (bt->lmp_caps >> 40) & 0xff;
-            respdata[7] = (bt->lmp_caps >> 48) & 0xff;
-            respdata[8] = (bt->lmp_caps >> 56) & 0xff;
-        } else
-            resp = 0;
-        break;
-
-    case LMP_NAME_REQ:
-        /* data[0]	Name offset
-         */
-        if (length < 1) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        resp = LMP_NAME_RES;
-        resplen = 17;
-        respdata[1] = data[0];
-        respdata[2] = strlen(bt->lmp_name);
-        memset(respdata + 3, 0x00, 14);
-        if (respdata[2] > respdata[1])
-            memcpy(respdata + 3, bt->lmp_name + respdata[1],
-                            respdata[2] - respdata[1]);
-        break;
-
-    case LMP_NAME_RES:
-        /* data[0]	Name offset
-         * data[1]	Name length
-         * data[2-15]	Name fragment
-         */
-        if (length < 16) {
-            error = HCI_UNSUPPORTED_LMP_PARAMETER_VALUE;
-            goto not_accepted;
-        }
-        resp = 0;
-        break;
-
-    default:
-        error = HCI_UNKNOWN_LMP_PDU;
-        /* Fall through */
-    not_accepted:
-        if (op >> 8) {
-            resp = LMP_NOT_ACCEPTED_EXT;
-            resplen = 5;
-            respdata[0] = op >> 8;
-            respdata[1] = op & 0xff;
-            respdata[2] = error;
-        } else {
-            resp = LMP_NOT_ACCEPTED;
-            resplen = 3;
-            respdata[0] = op & 0xff;
-            respdata[1] = error;
-        }
-    }
-
-    if (resp == 0)
-        return;
-
-    if (resp >> 8) {
-        respdata[0] = resp >> 8;
-        respdata[1] = resp & 0xff;
-    } else
-        respdata[0] = resp & 0xff;
-
-    respdata[0] <<= 1;
-    respdata[0] |= tr;
-}
-
-static void bt_submit_raw_acl(struct bt_piconet_s *net, int length, uint8_t *data)
-{
-    struct bt_device_s *slave;
-    if (length < 1)
-        return;
-
-    slave = 0;
-#if 0
-    slave = net->slave;
-#endif
-
-    switch (data[0] & 3) {
-    case LLID_ACLC:
-        bt_submit_lmp(slave, length - 1, data + 1);
-        break;
-    case LLID_ACLU_START:
-#if 0
-        bt_sumbit_l2cap(slave, length - 1, data + 1, (data[0] >> 2) & 1);
-        breka;
-#endif
-    default:
-    case LLID_ACLU_CONT:
-        break;
-    }
-}
-#endif
-
-/* HCI layer emulation */
-
-/* Note: we could ignore endiannes because unswapped handles will still
- * be valid as connection identifiers for the guest - they don't have to
- * be continuously allocated.  We do it though, to preserve similar
- * behaviour between hosts.  Some things, like the BD_ADDR cannot be
- * preserved though (for example if a real hci is used).  */
-#ifdef HOST_WORDS_BIGENDIAN
-# define HNDL(raw)	bswap16(raw)
-#else
-# define HNDL(raw)	(raw)
-#endif
-
-static const uint8_t bt_event_reserved_mask[8] = {
-    0xff, 0x9f, 0xfb, 0xff, 0x07, 0x18, 0x00, 0x00,
-};
-
-static inline uint8_t *bt_hci_event_start(struct bt_hci_s *hci,
-                int evt, int len)
-{
-    uint8_t *packet, mask;
-    int mask_byte;
-
-    if (len > 255) {
-        fprintf(stderr, "%s: HCI event params too long (%ib)\n",
-                        __FUNCTION__, len);
-        exit(-1);
-    }
-
-    mask_byte = (evt - 1) >> 3;
-    mask = 1 << ((evt - 1) & 3);
-    if (mask & bt_event_reserved_mask[mask_byte] & ~hci->event_mask[mask_byte])
-        return NULL;
-
-    packet = hci->evt_packet(hci->opaque);
-    packet[0] = evt;
-    packet[1] = len;
-
-    return &packet[2];
-}
-
-static inline void bt_hci_event(struct bt_hci_s *hci, int evt,
-                void *params, int len)
-{
-    uint8_t *packet = bt_hci_event_start(hci, evt, len);
-
-    if (!packet)
-        return;
-
-    if (len)
-        memcpy(packet, params, len);
-
-    hci->evt_submit(hci->opaque, len + 2);
-}
-
-static inline void bt_hci_event_status(struct bt_hci_s *hci, int status)
-{
-    evt_cmd_status params = {
-        .status	= status,
-        .ncmd	= 1,
-        .opcode	= hci->last_cmd,
-    };
-
-    bt_hci_event(hci, EVT_CMD_STATUS, &params, EVT_CMD_STATUS_SIZE);
-}
-
-static inline void bt_hci_event_complete(struct bt_hci_s *hci,
-                void *ret, int len)
-{
-    uint8_t *packet = bt_hci_event_start(hci, EVT_CMD_COMPLETE,
-                    len + EVT_CMD_COMPLETE_SIZE);
-    evt_cmd_complete *params = (evt_cmd_complete *) packet;
-
-    if (!packet)
-        return;
-
-    params->ncmd	= 1;
-    params->opcode	= hci->last_cmd;
-    if (len)
-        memcpy(&packet[EVT_CMD_COMPLETE_SIZE], ret, len);
-
-    hci->evt_submit(hci->opaque, len + EVT_CMD_COMPLETE_SIZE + 2);
-}
-
-static void bt_hci_inquiry_done(void *opaque)
-{
-    struct bt_hci_s *hci = (struct bt_hci_s *) opaque;
-    uint8_t status = HCI_SUCCESS;
-
-    if (!hci->lm.periodic)
-        hci->lm.inquire = 0;
-
-    /* The specification is inconsistent about this one.  Page 565 reads
-     * "The event parameters of Inquiry Complete event will have a summary
-     * of the result from the Inquiry process, which reports the number of
-     * nearby Bluetooth devices that responded [so hci->responses].", but
-     * Event Parameters (see page 729) has only Status.  */
-    bt_hci_event(hci, EVT_INQUIRY_COMPLETE, &status, 1);
-}
-
-static void bt_hci_inquiry_result_standard(struct bt_hci_s *hci,
-                struct bt_device_s *slave)
-{
-    inquiry_info params = {
-        .num_responses		= 1,
-        .bdaddr			= BAINIT(&slave->bd_addr),
-        .pscan_rep_mode		= 0x00,	/* R0 */
-        .pscan_period_mode	= 0x00,	/* P0 - deprecated */
-        .pscan_mode		= 0x00,	/* Standard scan - deprecated */
-        .dev_class[0]		= slave->class[0],
-        .dev_class[1]		= slave->class[1],
-        .dev_class[2]		= slave->class[2],
-        /* TODO: return the clkoff *differenece* */
-        .clock_offset		= slave->clkoff,	/* Note: no swapping */
-    };
-
-    bt_hci_event(hci, EVT_INQUIRY_RESULT, &params, INQUIRY_INFO_SIZE);
-}
-
-static void bt_hci_inquiry_result_with_rssi(struct bt_hci_s *hci,
-                struct bt_device_s *slave)
-{
-    inquiry_info_with_rssi params = {
-        .num_responses		= 1,
-        .bdaddr			= BAINIT(&slave->bd_addr),
-        .pscan_rep_mode		= 0x00,	/* R0 */
-        .pscan_period_mode	= 0x00,	/* P0 - deprecated */
-        .dev_class[0]		= slave->class[0],
-        .dev_class[1]		= slave->class[1],
-        .dev_class[2]		= slave->class[2],
-        /* TODO: return the clkoff *differenece* */
-        .clock_offset		= slave->clkoff,	/* Note: no swapping */
-        .rssi			= DEFAULT_RSSI_DBM,
-    };
-
-    bt_hci_event(hci, EVT_INQUIRY_RESULT_WITH_RSSI,
-                    &params, INQUIRY_INFO_WITH_RSSI_SIZE);
-}
-
-static void bt_hci_inquiry_result(struct bt_hci_s *hci,
-                struct bt_device_s *slave)
-{
-    if (!slave->inquiry_scan || !hci->lm.responses_left)
-        return;
-
-    hci->lm.responses_left --;
-    hci->lm.responses ++;
-
-    switch (hci->lm.inquiry_mode) {
-    case 0x00:
-        bt_hci_inquiry_result_standard(hci, slave);
-        return;
-    case 0x01:
-        bt_hci_inquiry_result_with_rssi(hci, slave);
-        return;
-    default:
-        fprintf(stderr, "%s: bad inquiry mode %02x\n", __FUNCTION__,
-                        hci->lm.inquiry_mode);
-        exit(-1);
-    }
-}
-
-static void bt_hci_mod_timer_1280ms(QEMUTimer *timer, int period)
-{
-    timer_mod(timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                   muldiv64(period << 7, get_ticks_per_sec(), 100));
-}
-
-static void bt_hci_inquiry_start(struct bt_hci_s *hci, int length)
-{
-    struct bt_device_s *slave;
-
-    hci->lm.inquiry_length = length;
-    for (slave = hci->device.net->slave; slave; slave = slave->next)
-        /* Don't uncover ourselves.  */
-        if (slave != &hci->device)
-            bt_hci_inquiry_result(hci, slave);
-
-    /* TODO: register for a callback on a new device's addition to the
-     * scatternet so that if it's added before inquiry_length expires,
-     * an Inquiry Result is generated immediately.  Alternatively re-loop
-     * through the devices on the inquiry_length expiration and report
-     * devices not seen before.  */
-    if (hci->lm.responses_left)
-        bt_hci_mod_timer_1280ms(hci->lm.inquiry_done, hci->lm.inquiry_length);
-    else
-        bt_hci_inquiry_done(hci);
-
-    if (hci->lm.periodic)
-        bt_hci_mod_timer_1280ms(hci->lm.inquiry_next, hci->lm.inquiry_period);
-}
-
-static void bt_hci_inquiry_next(void *opaque)
-{
-    struct bt_hci_s *hci = (struct bt_hci_s *) opaque;
-
-    hci->lm.responses_left += hci->lm.responses;
-    hci->lm.responses = 0;
-    bt_hci_inquiry_start(hci,  hci->lm.inquiry_length);
-}
-
-static inline int bt_hci_handle_bad(struct bt_hci_s *hci, uint16_t handle)
-{
-    return !(handle & HCI_HANDLE_OFFSET) ||
-            handle >= (HCI_HANDLE_OFFSET | HCI_HANDLES_MAX) ||
-            !hci->lm.handle[handle & ~HCI_HANDLE_OFFSET].link;
-}
-
-static inline int bt_hci_role_master(struct bt_hci_s *hci, uint16_t handle)
-{
-    return !!(hci->lm.role_bmp & (1 << (handle & ~HCI_HANDLE_OFFSET)));
-}
-
-static inline struct bt_device_s *bt_hci_remote_dev(struct bt_hci_s *hci,
-                uint16_t handle)
-{
-    struct bt_link_s *link = hci->lm.handle[handle & ~HCI_HANDLE_OFFSET].link;
-
-    return bt_hci_role_master(hci, handle) ? link->slave : link->host;
-}
-
-static void bt_hci_mode_tick(void *opaque);
-static void bt_hci_lmp_link_establish(struct bt_hci_s *hci,
-                struct bt_link_s *link, int master)
-{
-    hci->lm.handle[hci->lm.last_handle].link = link;
-
-    if (master) {
-        /* We are the master side of an ACL link */
-        hci->lm.role_bmp |= 1 << hci->lm.last_handle;
-
-        hci->lm.handle[hci->lm.last_handle].lmp_acl_data =
-                link->slave->lmp_acl_data;
-    } else {
-        /* We are the slave side of an ACL link */
-        hci->lm.role_bmp &= ~(1 << hci->lm.last_handle);
-
-        hci->lm.handle[hci->lm.last_handle].lmp_acl_data =
-                link->host->lmp_acl_resp;
-    }
-
-    /* Mode */
-    if (master) {
-        link->acl_mode = acl_active;
-        hci->lm.handle[hci->lm.last_handle].acl_mode_timer =
-                timer_new(QEMU_CLOCK_VIRTUAL, SCALE_NS, bt_hci_mode_tick, link);
-    }
-}
-
-static void bt_hci_lmp_link_teardown(struct bt_hci_s *hci, uint16_t handle)
-{
-    handle &= ~HCI_HANDLE_OFFSET;
-    hci->lm.handle[handle].link = NULL;
-
-    if (bt_hci_role_master(hci, handle)) {
-        timer_del(hci->lm.handle[handle].acl_mode_timer);
-        timer_free(hci->lm.handle[handle].acl_mode_timer);
-    }
-}
-
-static int bt_hci_connect(struct bt_hci_s *hci, bdaddr_t *bdaddr)
-{
-    struct bt_device_s *slave;
-    struct bt_link_s link;
-
-    for (slave = hci->device.net->slave; slave; slave = slave->next)
-        if (slave->page_scan && !bacmp(&slave->bd_addr, bdaddr))
-            break;
-    if (!slave || slave == &hci->device)
-        return -ENODEV;
-
-    bacpy(&hci->lm.awaiting_bdaddr[hci->lm.connecting ++], &slave->bd_addr);
-
-    link.slave = slave;
-    link.host = &hci->device;
-    link.slave->lmp_connection_request(&link);	/* Always last */
-
-    return 0;
-}
-
-static void bt_hci_connection_reject(struct bt_hci_s *hci,
-                struct bt_device_s *host, uint8_t because)
-{
-    struct bt_link_s link = {
-        .slave	= &hci->device,
-        .host	= host,
-        /* Rest uninitialised */
-    };
-
-    host->reject_reason = because;
-    host->lmp_connection_complete(&link);
-}
-
-static void bt_hci_connection_reject_event(struct bt_hci_s *hci,
-                bdaddr_t *bdaddr)
-{
-    evt_conn_complete params;
-
-    params.status	= HCI_NO_CONNECTION;
-    params.handle	= 0;
-    bacpy(&params.bdaddr, bdaddr);
-    params.link_type	= ACL_LINK;
-    params.encr_mode	= 0x00;		/* Encryption not required */
-    bt_hci_event(hci, EVT_CONN_COMPLETE, &params, EVT_CONN_COMPLETE_SIZE);
-}
-
-static void bt_hci_connection_accept(struct bt_hci_s *hci,
-                struct bt_device_s *host)
-{
-    struct bt_hci_link_s *link = g_malloc0(sizeof(struct bt_hci_link_s));
-    evt_conn_complete params;
-    uint16_t handle;
-    uint8_t status = HCI_SUCCESS;
-    int tries = HCI_HANDLES_MAX;
-
-    /* Make a connection handle */
-    do {
-        while (hci->lm.handle[++ hci->lm.last_handle].link && -- tries)
-            hci->lm.last_handle &= HCI_HANDLES_MAX - 1;
-        handle = hci->lm.last_handle | HCI_HANDLE_OFFSET;
-    } while ((handle == hci->asb_handle || handle == hci->psb_handle) &&
-            tries);
-
-    if (!tries) {
-        g_free(link);
-        bt_hci_connection_reject(hci, host, HCI_REJECTED_LIMITED_RESOURCES);
-        status = HCI_NO_CONNECTION;
-        goto complete;
-    }
-
-    link->btlink.slave	= &hci->device;
-    link->btlink.host	= host;
-    link->handle = handle;
-
-    /* Link established */
-    bt_hci_lmp_link_establish(hci, &link->btlink, 0);
-
-complete:
-    params.status	= status;
-    params.handle	= HNDL(handle);
-    bacpy(&params.bdaddr, &host->bd_addr);
-    params.link_type	= ACL_LINK;
-    params.encr_mode	= 0x00;		/* Encryption not required */
-    bt_hci_event(hci, EVT_CONN_COMPLETE, &params, EVT_CONN_COMPLETE_SIZE);
-
-    /* Neets to be done at the very end because it can trigger a (nested)
-     * disconnected, in case the other and had cancelled the request
-     * locally.  */
-    if (status == HCI_SUCCESS) {
-        host->reject_reason = 0;
-        host->lmp_connection_complete(&link->btlink);
-    }
-}
-
-static void bt_hci_lmp_connection_request(struct bt_link_s *link)
-{
-    struct bt_hci_s *hci = hci_from_device(link->slave);
-    evt_conn_request params;
-
-    if (hci->conn_req_host) {
-        bt_hci_connection_reject(hci, link->host,
-                                 HCI_REJECTED_LIMITED_RESOURCES);
-        return;
-    }
-    hci->conn_req_host = link->host;
-    /* TODO: if masked and auto-accept, then auto-accept,
-     * if masked and not auto-accept, then auto-reject */
-    /* TODO: kick the hci->conn_accept_timer, timeout after
-     * hci->conn_accept_tout * 0.625 msec */
-
-    bacpy(&params.bdaddr, &link->host->bd_addr);
-    memcpy(&params.dev_class, &link->host->class, sizeof(params.dev_class));
-    params.link_type	= ACL_LINK;
-    bt_hci_event(hci, EVT_CONN_REQUEST, &params, EVT_CONN_REQUEST_SIZE);
-    return;
-}
-
-static void bt_hci_conn_accept_timeout(void *opaque)
-{
-    struct bt_hci_s *hci = (struct bt_hci_s *) opaque;
-
-    if (!hci->conn_req_host)
-        /* Already accepted or rejected.  If the other end cancelled the
-         * connection request then we still have to reject or accept it
-         * and then we'll get a disconnect.  */
-        return;
-
-    /* TODO */
-}
-
-/* Remove from the list of devices which we wanted to connect to and
- * are awaiting a response from.  If the callback sees a response from
- * a device which is not on the list it will assume it's a connection
- * that's been cancelled by the host in the meantime and immediately
- * try to detach the link and send a Connection Complete.  */
-static int bt_hci_lmp_connection_ready(struct bt_hci_s *hci,
-                bdaddr_t *bdaddr)
-{
-    int i;
-
-    for (i = 0; i < hci->lm.connecting; i ++)
-        if (!bacmp(&hci->lm.awaiting_bdaddr[i], bdaddr)) {
-            if (i < -- hci->lm.connecting)
-                bacpy(&hci->lm.awaiting_bdaddr[i],
-                                &hci->lm.awaiting_bdaddr[hci->lm.connecting]);
-            return 0;
-        }
-
-    return 1;
-}
-
-static void bt_hci_lmp_connection_complete(struct bt_link_s *link)
-{
-    struct bt_hci_s *hci = hci_from_device(link->host);
-    evt_conn_complete params;
-    uint16_t handle;
-    uint8_t status = HCI_SUCCESS;
-    int tries = HCI_HANDLES_MAX;
-
-    if (bt_hci_lmp_connection_ready(hci, &link->slave->bd_addr)) {
-        if (!hci->device.reject_reason)
-            link->slave->lmp_disconnect_slave(link);
-        handle = 0;
-        status = HCI_NO_CONNECTION;
-        goto complete;
-    }
-
-    if (hci->device.reject_reason) {
-        handle = 0;
-        status = hci->device.reject_reason;
-        goto complete;
-    }
-
-    /* Make a connection handle */
-    do {
-        while (hci->lm.handle[++ hci->lm.last_handle].link && -- tries)
-            hci->lm.last_handle &= HCI_HANDLES_MAX - 1;
-        handle = hci->lm.last_handle | HCI_HANDLE_OFFSET;
-    } while ((handle == hci->asb_handle || handle == hci->psb_handle) &&
-            tries);
-
-    if (!tries) {
-        link->slave->lmp_disconnect_slave(link);
-        status = HCI_NO_CONNECTION;
-        goto complete;
-    }
-
-    /* Link established */
-    link->handle = handle;
-    bt_hci_lmp_link_establish(hci, link, 1);
-
-complete:
-    params.status	= status;
-    params.handle	= HNDL(handle);
-    params.link_type	= ACL_LINK;
-    bacpy(&params.bdaddr, &link->slave->bd_addr);
-    params.encr_mode	= 0x00;		/* Encryption not required */
-    bt_hci_event(hci, EVT_CONN_COMPLETE, &params, EVT_CONN_COMPLETE_SIZE);
-}
-
-static void bt_hci_disconnect(struct bt_hci_s *hci,
-                uint16_t handle, int reason)
-{
-    struct bt_link_s *btlink =
-            hci->lm.handle[handle & ~HCI_HANDLE_OFFSET].link;
-    struct bt_hci_link_s *link;
-    evt_disconn_complete params;
-
-    if (bt_hci_role_master(hci, handle)) {
-        btlink->slave->reject_reason = reason;
-        btlink->slave->lmp_disconnect_slave(btlink);
-        /* The link pointer is invalid from now on */
-
-        goto complete;
-    }
-
-    btlink->host->reject_reason = reason;
-    btlink->host->lmp_disconnect_master(btlink);
-
-    /* We are the slave, we get to clean this burden */
-    link = (struct bt_hci_link_s *) btlink;
-    g_free(link);
-
-complete:
-    bt_hci_lmp_link_teardown(hci, handle);
-
-    params.status	= HCI_SUCCESS;
-    params.handle	= HNDL(handle);
-    params.reason	= HCI_CONNECTION_TERMINATED;
-    bt_hci_event(hci, EVT_DISCONN_COMPLETE,
-                    &params, EVT_DISCONN_COMPLETE_SIZE);
-}
-
-/* TODO: use only one function */
-static void bt_hci_lmp_disconnect_host(struct bt_link_s *link)
-{
-    struct bt_hci_s *hci = hci_from_device(link->host);
-    uint16_t handle = link->handle;
-    evt_disconn_complete params;
-
-    bt_hci_lmp_link_teardown(hci, handle);
-
-    params.status	= HCI_SUCCESS;
-    params.handle	= HNDL(handle);
-    params.reason	= hci->device.reject_reason;
-    bt_hci_event(hci, EVT_DISCONN_COMPLETE,
-                    &params, EVT_DISCONN_COMPLETE_SIZE);
-}
-
-static void bt_hci_lmp_disconnect_slave(struct bt_link_s *btlink)
-{
-    struct bt_hci_link_s *link = (struct bt_hci_link_s *) btlink;
-    struct bt_hci_s *hci = hci_from_device(btlink->slave);
-    uint16_t handle = link->handle;
-    evt_disconn_complete params;
-
-    g_free(link);
-
-    bt_hci_lmp_link_teardown(hci, handle);
-
-    params.status	= HCI_SUCCESS;
-    params.handle	= HNDL(handle);
-    params.reason	= hci->device.reject_reason;
-    bt_hci_event(hci, EVT_DISCONN_COMPLETE,
-                    &params, EVT_DISCONN_COMPLETE_SIZE);
-}
-
-static int bt_hci_name_req(struct bt_hci_s *hci, bdaddr_t *bdaddr)
-{
-    struct bt_device_s *slave;
-    evt_remote_name_req_complete params;
-    int len;
-
-    for (slave = hci->device.net->slave; slave; slave = slave->next)
-        if (slave->page_scan && !bacmp(&slave->bd_addr, bdaddr))
-            break;
-    if (!slave)
-        return -ENODEV;
-
-    bt_hci_event_status(hci, HCI_SUCCESS);
-
-    params.status       = HCI_SUCCESS;
-    bacpy(&params.bdaddr, &slave->bd_addr);
-    len = snprintf(params.name, sizeof(params.name),
-                    "%s", slave->lmp_name ?: "");
-    memset(params.name + len, 0, sizeof(params.name) - len);
-    bt_hci_event(hci, EVT_REMOTE_NAME_REQ_COMPLETE,
-                    &params, EVT_REMOTE_NAME_REQ_COMPLETE_SIZE);
-
-    return 0;
-}
-
-static int bt_hci_features_req(struct bt_hci_s *hci, uint16_t handle)
-{
-    struct bt_device_s *slave;
-    evt_read_remote_features_complete params;
-
-    if (bt_hci_handle_bad(hci, handle))
-        return -ENODEV;
-
-    slave = bt_hci_remote_dev(hci, handle);
-
-    bt_hci_event_status(hci, HCI_SUCCESS);
-
-    params.status	= HCI_SUCCESS;
-    params.handle	= HNDL(handle);
-    params.features[0]	= (slave->lmp_caps >>  0) & 0xff;
-    params.features[1]	= (slave->lmp_caps >>  8) & 0xff;
-    params.features[2]	= (slave->lmp_caps >> 16) & 0xff;
-    params.features[3]	= (slave->lmp_caps >> 24) & 0xff;
-    params.features[4]	= (slave->lmp_caps >> 32) & 0xff;
-    params.features[5]	= (slave->lmp_caps >> 40) & 0xff;
-    params.features[6]	= (slave->lmp_caps >> 48) & 0xff;
-    params.features[7]	= (slave->lmp_caps >> 56) & 0xff;
-    bt_hci_event(hci, EVT_READ_REMOTE_FEATURES_COMPLETE,
-                    &params, EVT_READ_REMOTE_FEATURES_COMPLETE_SIZE);
-
-    return 0;
-}
-
-static int bt_hci_version_req(struct bt_hci_s *hci, uint16_t handle)
-{
-    evt_read_remote_version_complete params;
-
-    if (bt_hci_handle_bad(hci, handle))
-        return -ENODEV;
-
-    bt_hci_remote_dev(hci, handle);
-
-    bt_hci_event_status(hci, HCI_SUCCESS);
-
-    params.status	= HCI_SUCCESS;
-    params.handle	= HNDL(handle);
-    params.lmp_ver	= 0x03;
-    params.manufacturer	= cpu_to_le16(0xa000);
-    params.lmp_subver	= cpu_to_le16(0xa607);
-    bt_hci_event(hci, EVT_READ_REMOTE_VERSION_COMPLETE,
-                    &params, EVT_READ_REMOTE_VERSION_COMPLETE_SIZE);
-
-    return 0;
-}
-
-static int bt_hci_clkoffset_req(struct bt_hci_s *hci, uint16_t handle)
-{
-    struct bt_device_s *slave;
-    evt_read_clock_offset_complete params;
-
-    if (bt_hci_handle_bad(hci, handle))
-        return -ENODEV;
-
-    slave = bt_hci_remote_dev(hci, handle);
-
-    bt_hci_event_status(hci, HCI_SUCCESS);
-
-    params.status	= HCI_SUCCESS;
-    params.handle	= HNDL(handle);
-    /* TODO: return the clkoff *differenece* */
-    params.clock_offset	= slave->clkoff;	/* Note: no swapping */
-    bt_hci_event(hci, EVT_READ_CLOCK_OFFSET_COMPLETE,
-                    &params, EVT_READ_CLOCK_OFFSET_COMPLETE_SIZE);
-
-    return 0;
-}
-
-static void bt_hci_event_mode(struct bt_hci_s *hci, struct bt_link_s *link,
-                uint16_t handle)
-{
-    evt_mode_change params = {
-        .status		= HCI_SUCCESS,
-        .handle		= HNDL(handle),
-        .mode		= link->acl_mode,
-        .interval	= cpu_to_le16(link->acl_interval),
-    };
-
-    bt_hci_event(hci, EVT_MODE_CHANGE, &params, EVT_MODE_CHANGE_SIZE);
-}
-
-static void bt_hci_lmp_mode_change_master(struct bt_hci_s *hci,
-                struct bt_link_s *link, int mode, uint16_t interval)
-{
-    link->acl_mode = mode;
-    link->acl_interval = interval;
-
-    bt_hci_event_mode(hci, link, link->handle);
-
-    link->slave->lmp_mode_change(link);
-}
-
-static void bt_hci_lmp_mode_change_slave(struct bt_link_s *btlink)
-{
-    struct bt_hci_link_s *link = (struct bt_hci_link_s *) btlink;
-    struct bt_hci_s *hci = hci_from_device(btlink->slave);
-
-    bt_hci_event_mode(hci, btlink, link->handle);
-}
-
-static int bt_hci_mode_change(struct bt_hci_s *hci, uint16_t handle,
-                int interval, int mode)
-{
-    struct bt_hci_master_link_s *link;
-
-    if (bt_hci_handle_bad(hci, handle) || !bt_hci_role_master(hci, handle))
-        return -ENODEV;
-
-    link = &hci->lm.handle[handle & ~HCI_HANDLE_OFFSET];
-    if (link->link->acl_mode != acl_active) {
-        bt_hci_event_status(hci, HCI_COMMAND_DISALLOWED);
-        return 0;
-    }
-
-    bt_hci_event_status(hci, HCI_SUCCESS);
-
-    timer_mod(link->acl_mode_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                   muldiv64(interval * 625, get_ticks_per_sec(), 1000000));
-    bt_hci_lmp_mode_change_master(hci, link->link, mode, interval);
-
-    return 0;
-}
-
-static int bt_hci_mode_cancel(struct bt_hci_s *hci, uint16_t handle, int mode)
-{
-    struct bt_hci_master_link_s *link;
-
-    if (bt_hci_handle_bad(hci, handle) || !bt_hci_role_master(hci, handle))
-        return -ENODEV;
-
-    link = &hci->lm.handle[handle & ~HCI_HANDLE_OFFSET];
-    if (link->link->acl_mode != mode) {
-        bt_hci_event_status(hci, HCI_COMMAND_DISALLOWED);
-
-        return 0;
-    }
-
-    bt_hci_event_status(hci, HCI_SUCCESS);
-
-    timer_del(link->acl_mode_timer);
-    bt_hci_lmp_mode_change_master(hci, link->link, acl_active, 0);
-
-    return 0;
-}
-
-static void bt_hci_mode_tick(void *opaque)
-{
-    struct bt_link_s *link = opaque;
-    struct bt_hci_s *hci = hci_from_device(link->host);
-
-    bt_hci_lmp_mode_change_master(hci, link, acl_active, 0);
-}
-
-static void bt_hci_reset(struct bt_hci_s *hci)
-{
-    hci->acl_len = 0;
-    hci->last_cmd = 0;
-    hci->lm.connecting = 0;
-
-    hci->event_mask[0] = 0xff;
-    hci->event_mask[1] = 0xff;
-    hci->event_mask[2] = 0xff;
-    hci->event_mask[3] = 0xff;
-    hci->event_mask[4] = 0xff;
-    hci->event_mask[5] = 0x1f;
-    hci->event_mask[6] = 0x00;
-    hci->event_mask[7] = 0x00;
-    hci->device.inquiry_scan = 0;
-    hci->device.page_scan = 0;
-    if (hci->device.lmp_name)
-        g_free((void *) hci->device.lmp_name);
-    hci->device.lmp_name = NULL;
-    hci->device.class[0] = 0x00;
-    hci->device.class[1] = 0x00;
-    hci->device.class[2] = 0x00;
-    hci->voice_setting = 0x0000;
-    hci->conn_accept_tout = 0x1f40;
-    hci->lm.inquiry_mode = 0x00;
-
-    hci->psb_handle = 0x000;
-    hci->asb_handle = 0x000;
-
-    /* XXX: timer_del(sl->acl_mode_timer); for all links */
-    timer_del(hci->lm.inquiry_done);
-    timer_del(hci->lm.inquiry_next);
-    timer_del(hci->conn_accept_timer);
-}
-
-static void bt_hci_read_local_version_rp(struct bt_hci_s *hci)
-{
-    read_local_version_rp lv = {
-        .status		= HCI_SUCCESS,
-        .hci_ver	= 0x03,
-        .hci_rev	= cpu_to_le16(0xa607),
-        .lmp_ver	= 0x03,
-        .manufacturer	= cpu_to_le16(0xa000),
-        .lmp_subver	= cpu_to_le16(0xa607),
-    };
-
-    bt_hci_event_complete(hci, &lv, READ_LOCAL_VERSION_RP_SIZE);
-}
-
-static void bt_hci_read_local_commands_rp(struct bt_hci_s *hci)
-{
-    read_local_commands_rp lc = {
-        .status		= HCI_SUCCESS,
-        .commands	= {
-            /* Keep updated! */
-            /* Also, keep in sync with hci->device.lmp_caps in bt_new_hci */
-            0xbf, 0x80, 0xf9, 0x03, 0xb2, 0xc0, 0x03, 0xc3,
-            0x00, 0x0f, 0x80, 0x00, 0xc0, 0x00, 0xe8, 0x13,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-        },
-    };
-
-    bt_hci_event_complete(hci, &lc, READ_LOCAL_COMMANDS_RP_SIZE);
-}
-
-static void bt_hci_read_local_features_rp(struct bt_hci_s *hci)
-{
-    read_local_features_rp lf = {
-        .status		= HCI_SUCCESS,
-        .features	= {
-            (hci->device.lmp_caps >>  0) & 0xff,
-            (hci->device.lmp_caps >>  8) & 0xff,
-            (hci->device.lmp_caps >> 16) & 0xff,
-            (hci->device.lmp_caps >> 24) & 0xff,
-            (hci->device.lmp_caps >> 32) & 0xff,
-            (hci->device.lmp_caps >> 40) & 0xff,
-            (hci->device.lmp_caps >> 48) & 0xff,
-            (hci->device.lmp_caps >> 56) & 0xff,
-        },
-    };
-
-    bt_hci_event_complete(hci, &lf, READ_LOCAL_FEATURES_RP_SIZE);
-}
-
-static void bt_hci_read_local_ext_features_rp(struct bt_hci_s *hci, int page)
-{
-    read_local_ext_features_rp lef = {
-        .status		= HCI_SUCCESS,
-        .page_num	= page,
-        .max_page_num	= 0x00,
-        .features	= {
-            /* Keep updated! */
-            0x5f, 0x35, 0x85, 0x7e, 0x9b, 0x19, 0x00, 0x80,
-        },
-    };
-    if (page)
-        memset(lef.features, 0, sizeof(lef.features));
-
-    bt_hci_event_complete(hci, &lef, READ_LOCAL_EXT_FEATURES_RP_SIZE);
-}
-
-static void bt_hci_read_buffer_size_rp(struct bt_hci_s *hci)
-{
-    read_buffer_size_rp bs = {
-        /* This can be made configurable, for one standard USB dongle HCI
-         * the four values are cpu_to_le16(0x0180), 0x40,
-         * cpu_to_le16(0x0008), cpu_to_le16(0x0008).  */
-        .status		= HCI_SUCCESS,
-        .acl_mtu	= cpu_to_le16(0x0200),
-        .sco_mtu	= 0,
-        .acl_max_pkt	= cpu_to_le16(0x0001),
-        .sco_max_pkt	= cpu_to_le16(0x0000),
-    };
-
-    bt_hci_event_complete(hci, &bs, READ_BUFFER_SIZE_RP_SIZE);
-}
-
-/* Deprecated in V2.0 (page 661) */
-static void bt_hci_read_country_code_rp(struct bt_hci_s *hci)
-{
-    read_country_code_rp cc ={
-        .status		= HCI_SUCCESS,
-        .country_code	= 0x00,	/* North America & Europe^1 and Japan */
-    };
-
-    bt_hci_event_complete(hci, &cc, READ_COUNTRY_CODE_RP_SIZE);
-
-    /* ^1. Except France, sorry */
-}
-
-static void bt_hci_read_bd_addr_rp(struct bt_hci_s *hci)
-{
-    read_bd_addr_rp ba = {
-        .status = HCI_SUCCESS,
-        .bdaddr = BAINIT(&hci->device.bd_addr),
-    };
-
-    bt_hci_event_complete(hci, &ba, READ_BD_ADDR_RP_SIZE);
-}
-
-static int bt_hci_link_quality_rp(struct bt_hci_s *hci, uint16_t handle)
-{
-    read_link_quality_rp lq = {
-        .status		= HCI_SUCCESS,
-        .handle		= HNDL(handle),
-        .link_quality	= 0xff,
-    };
-
-    if (bt_hci_handle_bad(hci, handle))
-        lq.status = HCI_NO_CONNECTION;
-
-    bt_hci_event_complete(hci, &lq, READ_LINK_QUALITY_RP_SIZE);
-    return 0;
-}
-
-/* Generate a Command Complete event with only the Status parameter */
-static inline void bt_hci_event_complete_status(struct bt_hci_s *hci,
-                uint8_t status)
-{
-    bt_hci_event_complete(hci, &status, 1);
-}
-
-static inline void bt_hci_event_complete_conn_cancel(struct bt_hci_s *hci,
-                uint8_t status, bdaddr_t *bd_addr)
-{
-    create_conn_cancel_rp params = {
-        .status = status,
-        .bdaddr = BAINIT(bd_addr),
-    };
-
-    bt_hci_event_complete(hci, &params, CREATE_CONN_CANCEL_RP_SIZE);
-}
-
-static inline void bt_hci_event_auth_complete(struct bt_hci_s *hci,
-                uint16_t handle)
-{
-    evt_auth_complete params = {
-        .status = HCI_SUCCESS,
-        .handle = HNDL(handle),
-    };
-
-    bt_hci_event(hci, EVT_AUTH_COMPLETE, &params, EVT_AUTH_COMPLETE_SIZE);
-}
-
-static inline void bt_hci_event_encrypt_change(struct bt_hci_s *hci,
-                uint16_t handle, uint8_t mode)
-{
-    evt_encrypt_change params = {
-        .status		= HCI_SUCCESS,
-        .handle		= HNDL(handle),
-        .encrypt	= mode,
-    };
-
-    bt_hci_event(hci, EVT_ENCRYPT_CHANGE, &params, EVT_ENCRYPT_CHANGE_SIZE);
-}
-
-static inline void bt_hci_event_complete_name_cancel(struct bt_hci_s *hci,
-                bdaddr_t *bd_addr)
-{
-    remote_name_req_cancel_rp params = {
-        .status = HCI_INVALID_PARAMETERS,
-        .bdaddr = BAINIT(bd_addr),
-    };
-
-    bt_hci_event_complete(hci, &params, REMOTE_NAME_REQ_CANCEL_RP_SIZE);
-}
-
-static inline void bt_hci_event_read_remote_ext_features(struct bt_hci_s *hci,
-                uint16_t handle)
-{
-    evt_read_remote_ext_features_complete params = {
-        .status = HCI_UNSUPPORTED_FEATURE,
-        .handle = HNDL(handle),
-        /* Rest uninitialised */
-    };
-
-    bt_hci_event(hci, EVT_READ_REMOTE_EXT_FEATURES_COMPLETE,
-                    &params, EVT_READ_REMOTE_EXT_FEATURES_COMPLETE_SIZE);
-}
-
-static inline void bt_hci_event_complete_lmp_handle(struct bt_hci_s *hci,
-                uint16_t handle)
-{
-    read_lmp_handle_rp params = {
-        .status		= HCI_NO_CONNECTION,
-        .handle		= HNDL(handle),
-        .reserved	= 0,
-        /* Rest uninitialised */
-    };
-
-    bt_hci_event_complete(hci, &params, READ_LMP_HANDLE_RP_SIZE);
-}
-
-static inline void bt_hci_event_complete_role_discovery(struct bt_hci_s *hci,
-                int status, uint16_t handle, int master)
-{
-    role_discovery_rp params = {
-        .status		= status,
-        .handle		= HNDL(handle),
-        .role		= master ? 0x00 : 0x01,
-    };
-
-    bt_hci_event_complete(hci, &params, ROLE_DISCOVERY_RP_SIZE);
-}
-
-static inline void bt_hci_event_complete_flush(struct bt_hci_s *hci,
-                int status, uint16_t handle)
-{
-    flush_rp params = {
-        .status		= status,
-        .handle		= HNDL(handle),
-    };
-
-    bt_hci_event_complete(hci, &params, FLUSH_RP_SIZE);
-}
-
-static inline void bt_hci_event_complete_read_local_name(struct bt_hci_s *hci)
-{
-    read_local_name_rp params;
-    params.status = HCI_SUCCESS;
-    memset(params.name, 0, sizeof(params.name));
-    if (hci->device.lmp_name)
-        strncpy(params.name, hci->device.lmp_name, sizeof(params.name));
-
-    bt_hci_event_complete(hci, &params, READ_LOCAL_NAME_RP_SIZE);
-}
-
-static inline void bt_hci_event_complete_read_conn_accept_timeout(
-                struct bt_hci_s *hci)
-{
-    read_conn_accept_timeout_rp params = {
-        .status		= HCI_SUCCESS,
-        .timeout	= cpu_to_le16(hci->conn_accept_tout),
-    };
-
-    bt_hci_event_complete(hci, &params, READ_CONN_ACCEPT_TIMEOUT_RP_SIZE);
-}
-
-static inline void bt_hci_event_complete_read_scan_enable(struct bt_hci_s *hci)
-{
-    read_scan_enable_rp params = {
-        .status = HCI_SUCCESS,
-        .enable =
-                (hci->device.inquiry_scan ? SCAN_INQUIRY : 0) |
-                (hci->device.page_scan ? SCAN_PAGE : 0),
-    };
-
-    bt_hci_event_complete(hci, &params, READ_SCAN_ENABLE_RP_SIZE);
-}
-
-static inline void bt_hci_event_complete_read_local_class(struct bt_hci_s *hci)
-{
-    read_class_of_dev_rp params;
-
-    params.status = HCI_SUCCESS;
-    memcpy(params.dev_class, hci->device.class, sizeof(params.dev_class));
-
-    bt_hci_event_complete(hci, &params, READ_CLASS_OF_DEV_RP_SIZE);
-}
-
-static inline void bt_hci_event_complete_voice_setting(struct bt_hci_s *hci)
-{
-    read_voice_setting_rp params = {
-        .status		= HCI_SUCCESS,
-        .voice_setting	= hci->voice_setting,	/* Note: no swapping */
-    };
-
-    bt_hci_event_complete(hci, &params, READ_VOICE_SETTING_RP_SIZE);
-}
-
-static inline void bt_hci_event_complete_read_inquiry_mode(
-                struct bt_hci_s *hci)
-{
-    read_inquiry_mode_rp params = {
-        .status		= HCI_SUCCESS,
-        .mode		= hci->lm.inquiry_mode,
-    };
-
-    bt_hci_event_complete(hci, &params, READ_INQUIRY_MODE_RP_SIZE);
-}
-
-static inline void bt_hci_event_num_comp_pkts(struct bt_hci_s *hci,
-                uint16_t handle, int packets)
-{
-    uint16_t buf[EVT_NUM_COMP_PKTS_SIZE(1) / 2 + 1];
-    evt_num_comp_pkts *params = (void *) ((uint8_t *) buf + 1);
-
-    params->num_hndl			= 1;
-    params->connection->handle		= HNDL(handle);
-    params->connection->num_packets	= cpu_to_le16(packets);
-
-    bt_hci_event(hci, EVT_NUM_COMP_PKTS, params, EVT_NUM_COMP_PKTS_SIZE(1));
-}
-
-static void bt_submit_hci(struct HCIInfo *info,
-                const uint8_t *data, int length)
-{
-    struct bt_hci_s *hci = hci_from_info(info);
-    uint16_t cmd;
-    int paramlen, i;
-
-    if (length < HCI_COMMAND_HDR_SIZE)
-        goto short_hci;
-
-    memcpy(&hci->last_cmd, data, 2);
-
-    cmd = (data[1] << 8) | data[0];
-    paramlen = data[2];
-    if (cmd_opcode_ogf(cmd) == 0 || cmd_opcode_ocf(cmd) == 0)	/* NOP */
-        return;
-
-    data += HCI_COMMAND_HDR_SIZE;
-    length -= HCI_COMMAND_HDR_SIZE;
-
-    if (paramlen > length)
-        return;
-
-#define PARAM(cmd, param)	(((cmd##_cp *) data)->param)
-#define PARAM16(cmd, param)	le16_to_cpup(&PARAM(cmd, param))
-#define PARAMHANDLE(cmd)	HNDL(PARAM(cmd, handle))
-#define LENGTH_CHECK(cmd)	if (length < sizeof(cmd##_cp)) goto short_hci
-    /* Note: the supported commands bitmask in bt_hci_read_local_commands_rp
-     * needs to be updated every time a command is implemented here!  */
-    switch (cmd) {
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_INQUIRY):
-        LENGTH_CHECK(inquiry);
-
-        if (PARAM(inquiry, length) < 1) {
-            bt_hci_event_complete_status(hci, HCI_INVALID_PARAMETERS);
-            break;
-        }
-
-        hci->lm.inquire = 1;
-        hci->lm.periodic = 0;
-        hci->lm.responses_left = PARAM(inquiry, num_rsp) ?: INT_MAX;
-        hci->lm.responses = 0;
-        bt_hci_event_status(hci, HCI_SUCCESS);
-        bt_hci_inquiry_start(hci, PARAM(inquiry, length));
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_INQUIRY_CANCEL):
-        if (!hci->lm.inquire || hci->lm.periodic) {
-            fprintf(stderr, "%s: Inquiry Cancel should only be issued after "
-                            "the Inquiry command has been issued, a Command "
-                            "Status event has been received for the Inquiry "
-                            "command, and before the Inquiry Complete event "
-                            "occurs", __FUNCTION__);
-            bt_hci_event_complete_status(hci, HCI_COMMAND_DISALLOWED);
-            break;
-        }
-
-        hci->lm.inquire = 0;
-        timer_del(hci->lm.inquiry_done);
-        bt_hci_event_complete_status(hci, HCI_SUCCESS);
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_PERIODIC_INQUIRY):
-        LENGTH_CHECK(periodic_inquiry);
-
-        if (!(PARAM(periodic_inquiry, length) <
-                                PARAM16(periodic_inquiry, min_period) &&
-                                PARAM16(periodic_inquiry, min_period) <
-                                PARAM16(periodic_inquiry, max_period)) ||
-                        PARAM(periodic_inquiry, length) < 1 ||
-                        PARAM16(periodic_inquiry, min_period) < 2 ||
-                        PARAM16(periodic_inquiry, max_period) < 3) {
-            bt_hci_event_complete_status(hci, HCI_INVALID_PARAMETERS);
-            break;
-        }
-
-        hci->lm.inquire = 1;
-        hci->lm.periodic = 1;
-        hci->lm.responses_left = PARAM(periodic_inquiry, num_rsp);
-        hci->lm.responses = 0;
-        hci->lm.inquiry_period = PARAM16(periodic_inquiry, max_period);
-        bt_hci_event_complete_status(hci, HCI_SUCCESS);
-        bt_hci_inquiry_start(hci, PARAM(periodic_inquiry, length));
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_EXIT_PERIODIC_INQUIRY):
-        if (!hci->lm.inquire || !hci->lm.periodic) {
-            fprintf(stderr, "%s: Inquiry Cancel should only be issued after "
-                            "the Inquiry command has been issued, a Command "
-                            "Status event has been received for the Inquiry "
-                            "command, and before the Inquiry Complete event "
-                            "occurs", __FUNCTION__);
-            bt_hci_event_complete_status(hci, HCI_COMMAND_DISALLOWED);
-            break;
-        }
-        hci->lm.inquire = 0;
-        timer_del(hci->lm.inquiry_done);
-        timer_del(hci->lm.inquiry_next);
-        bt_hci_event_complete_status(hci, HCI_SUCCESS);
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_CREATE_CONN):
-        LENGTH_CHECK(create_conn);
-
-        if (hci->lm.connecting >= HCI_HANDLES_MAX) {
-            bt_hci_event_status(hci, HCI_REJECTED_LIMITED_RESOURCES);
-            break;
-        }
-        bt_hci_event_status(hci, HCI_SUCCESS);
-
-        if (bt_hci_connect(hci, &PARAM(create_conn, bdaddr)))
-            bt_hci_connection_reject_event(hci, &PARAM(create_conn, bdaddr));
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_DISCONNECT):
-        LENGTH_CHECK(disconnect);
-
-        if (bt_hci_handle_bad(hci, PARAMHANDLE(disconnect))) {
-            bt_hci_event_status(hci, HCI_NO_CONNECTION);
-            break;
-        }
-
-        bt_hci_event_status(hci, HCI_SUCCESS);
-        bt_hci_disconnect(hci, PARAMHANDLE(disconnect),
-                        PARAM(disconnect, reason));
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_CREATE_CONN_CANCEL):
-        LENGTH_CHECK(create_conn_cancel);
-
-        if (bt_hci_lmp_connection_ready(hci,
-                                &PARAM(create_conn_cancel, bdaddr))) {
-            for (i = 0; i < HCI_HANDLES_MAX; i ++)
-                if (bt_hci_role_master(hci, i) && hci->lm.handle[i].link &&
-                                !bacmp(&hci->lm.handle[i].link->slave->bd_addr,
-                                        &PARAM(create_conn_cancel, bdaddr)))
-                   break;
-
-            bt_hci_event_complete_conn_cancel(hci, i < HCI_HANDLES_MAX ?
-                            HCI_ACL_CONNECTION_EXISTS : HCI_NO_CONNECTION,
-                            &PARAM(create_conn_cancel, bdaddr));
-        } else
-            bt_hci_event_complete_conn_cancel(hci, HCI_SUCCESS,
-                            &PARAM(create_conn_cancel, bdaddr));
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_ACCEPT_CONN_REQ):
-        LENGTH_CHECK(accept_conn_req);
-
-        if (!hci->conn_req_host ||
-                        bacmp(&PARAM(accept_conn_req, bdaddr),
-                                &hci->conn_req_host->bd_addr)) {
-            bt_hci_event_status(hci, HCI_INVALID_PARAMETERS);
-            break;
-        }
-
-        bt_hci_event_status(hci, HCI_SUCCESS);
-        bt_hci_connection_accept(hci, hci->conn_req_host);
-        hci->conn_req_host = NULL;
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_REJECT_CONN_REQ):
-        LENGTH_CHECK(reject_conn_req);
-
-        if (!hci->conn_req_host ||
-                        bacmp(&PARAM(reject_conn_req, bdaddr),
-                                &hci->conn_req_host->bd_addr)) {
-            bt_hci_event_status(hci, HCI_INVALID_PARAMETERS);
-            break;
-        }
-
-        bt_hci_event_status(hci, HCI_SUCCESS);
-        bt_hci_connection_reject(hci, hci->conn_req_host,
-                        PARAM(reject_conn_req, reason));
-        bt_hci_connection_reject_event(hci, &hci->conn_req_host->bd_addr);
-        hci->conn_req_host = NULL;
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_AUTH_REQUESTED):
-        LENGTH_CHECK(auth_requested);
-
-        if (bt_hci_handle_bad(hci, PARAMHANDLE(auth_requested)))
-            bt_hci_event_status(hci, HCI_NO_CONNECTION);
-        else {
-            bt_hci_event_status(hci, HCI_SUCCESS);
-            bt_hci_event_auth_complete(hci, PARAMHANDLE(auth_requested));
-        }
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_SET_CONN_ENCRYPT):
-        LENGTH_CHECK(set_conn_encrypt);
-
-        if (bt_hci_handle_bad(hci, PARAMHANDLE(set_conn_encrypt)))
-            bt_hci_event_status(hci, HCI_NO_CONNECTION);
-        else {
-            bt_hci_event_status(hci, HCI_SUCCESS);
-            bt_hci_event_encrypt_change(hci,
-                            PARAMHANDLE(set_conn_encrypt),
-                            PARAM(set_conn_encrypt, encrypt));
-        }
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_REMOTE_NAME_REQ):
-        LENGTH_CHECK(remote_name_req);
-
-        if (bt_hci_name_req(hci, &PARAM(remote_name_req, bdaddr)))
-            bt_hci_event_status(hci, HCI_NO_CONNECTION);
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_REMOTE_NAME_REQ_CANCEL):
-        LENGTH_CHECK(remote_name_req_cancel);
-
-        bt_hci_event_complete_name_cancel(hci,
-                        &PARAM(remote_name_req_cancel, bdaddr));
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_READ_REMOTE_FEATURES):
-        LENGTH_CHECK(read_remote_features);
-
-        if (bt_hci_features_req(hci, PARAMHANDLE(read_remote_features)))
-            bt_hci_event_status(hci, HCI_NO_CONNECTION);
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_READ_REMOTE_EXT_FEATURES):
-        LENGTH_CHECK(read_remote_ext_features);
-
-        if (bt_hci_handle_bad(hci, PARAMHANDLE(read_remote_ext_features)))
-            bt_hci_event_status(hci, HCI_NO_CONNECTION);
-        else {
-            bt_hci_event_status(hci, HCI_SUCCESS);
-            bt_hci_event_read_remote_ext_features(hci,
-                            PARAMHANDLE(read_remote_ext_features));
-        }
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_READ_REMOTE_VERSION):
-        LENGTH_CHECK(read_remote_version);
-
-        if (bt_hci_version_req(hci, PARAMHANDLE(read_remote_version)))
-            bt_hci_event_status(hci, HCI_NO_CONNECTION);
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_READ_CLOCK_OFFSET):
-        LENGTH_CHECK(read_clock_offset);
-
-        if (bt_hci_clkoffset_req(hci, PARAMHANDLE(read_clock_offset)))
-            bt_hci_event_status(hci, HCI_NO_CONNECTION);
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_CTL, OCF_READ_LMP_HANDLE):
-        LENGTH_CHECK(read_lmp_handle);
-
-        /* TODO: */
-        bt_hci_event_complete_lmp_handle(hci, PARAMHANDLE(read_lmp_handle));
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_POLICY, OCF_HOLD_MODE):
-        LENGTH_CHECK(hold_mode);
-
-        if (PARAM16(hold_mode, min_interval) >
-                        PARAM16(hold_mode, max_interval) ||
-                        PARAM16(hold_mode, min_interval) < 0x0002 ||
-                        PARAM16(hold_mode, max_interval) > 0xff00 ||
-                        (PARAM16(hold_mode, min_interval) & 1) ||
-                        (PARAM16(hold_mode, max_interval) & 1)) {
-            bt_hci_event_status(hci, HCI_INVALID_PARAMETERS);
-            break;
-        }
-
-        if (bt_hci_mode_change(hci, PARAMHANDLE(hold_mode),
-                                PARAM16(hold_mode, max_interval),
-                                acl_hold))
-            bt_hci_event_status(hci, HCI_NO_CONNECTION);
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_POLICY, OCF_PARK_MODE):
-        LENGTH_CHECK(park_mode);
-
-        if (PARAM16(park_mode, min_interval) >
-                        PARAM16(park_mode, max_interval) ||
-                        PARAM16(park_mode, min_interval) < 0x000e ||
-                        (PARAM16(park_mode, min_interval) & 1) ||
-                        (PARAM16(park_mode, max_interval) & 1)) {
-            bt_hci_event_status(hci, HCI_INVALID_PARAMETERS);
-            break;
-        }
-
-        if (bt_hci_mode_change(hci, PARAMHANDLE(park_mode),
-                                PARAM16(park_mode, max_interval),
-                                acl_parked))
-            bt_hci_event_status(hci, HCI_NO_CONNECTION);
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_POLICY, OCF_EXIT_PARK_MODE):
-        LENGTH_CHECK(exit_park_mode);
-
-        if (bt_hci_mode_cancel(hci, PARAMHANDLE(exit_park_mode),
-                                acl_parked))
-            bt_hci_event_status(hci, HCI_NO_CONNECTION);
-        break;
-
-    case cmd_opcode_pack(OGF_LINK_POLICY, OCF_ROLE_DISCOVERY):
-        LENGTH_CHECK(role_discovery);
-
-        if (bt_hci_handle_bad(hci, PARAMHANDLE(role_discovery)))
-            bt_hci_event_complete_role_discovery(hci,
-                            HCI_NO_CONNECTION, PARAMHANDLE(role_discovery), 0);
-        else
-            bt_hci_event_complete_role_discovery(hci,
-                            HCI_SUCCESS, PARAMHANDLE(role_discovery),
-                            bt_hci_role_master(hci,
-                                    PARAMHANDLE(role_discovery)));
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_SET_EVENT_MASK):
-        LENGTH_CHECK(set_event_mask);
-
-        memcpy(hci->event_mask, PARAM(set_event_mask, mask), 8);
-        bt_hci_event_complete_status(hci, HCI_SUCCESS);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_RESET):
-        bt_hci_reset(hci);
-        bt_hci_event_status(hci, HCI_SUCCESS);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_SET_EVENT_FLT):
-        if (length >= 1 && PARAM(set_event_flt, flt_type) == FLT_CLEAR_ALL)
-            /* No length check */;
-        else
-            LENGTH_CHECK(set_event_flt);
-
-        /* Filters are not implemented */
-        bt_hci_event_complete_status(hci, HCI_SUCCESS);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_FLUSH):
-        LENGTH_CHECK(flush);
-
-        if (bt_hci_handle_bad(hci, PARAMHANDLE(flush)))
-            bt_hci_event_complete_flush(hci,
-                            HCI_NO_CONNECTION, PARAMHANDLE(flush));
-        else {
-            /* TODO: ordering? */
-            bt_hci_event(hci, EVT_FLUSH_OCCURRED,
-                            &PARAM(flush, handle),
-                            EVT_FLUSH_OCCURRED_SIZE);
-            bt_hci_event_complete_flush(hci,
-                            HCI_SUCCESS, PARAMHANDLE(flush));
-        }
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_CHANGE_LOCAL_NAME):
-        LENGTH_CHECK(change_local_name);
-
-        if (hci->device.lmp_name)
-            g_free((void *) hci->device.lmp_name);
-        hci->device.lmp_name = g_strndup(PARAM(change_local_name, name),
-                        sizeof(PARAM(change_local_name, name)));
-        bt_hci_event_complete_status(hci, HCI_SUCCESS);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_READ_LOCAL_NAME):
-        bt_hci_event_complete_read_local_name(hci);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_READ_CONN_ACCEPT_TIMEOUT):
-        bt_hci_event_complete_read_conn_accept_timeout(hci);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_WRITE_CONN_ACCEPT_TIMEOUT):
-        /* TODO */
-        LENGTH_CHECK(write_conn_accept_timeout);
-
-        if (PARAM16(write_conn_accept_timeout, timeout) < 0x0001 ||
-                        PARAM16(write_conn_accept_timeout, timeout) > 0xb540) {
-            bt_hci_event_complete_status(hci, HCI_INVALID_PARAMETERS);
-            break;
-        }
-
-        hci->conn_accept_tout = PARAM16(write_conn_accept_timeout, timeout);
-        bt_hci_event_complete_status(hci, HCI_SUCCESS);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_READ_SCAN_ENABLE):
-        bt_hci_event_complete_read_scan_enable(hci);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_WRITE_SCAN_ENABLE):
-        LENGTH_CHECK(write_scan_enable);
-
-        /* TODO: check that the remaining bits are all 0 */
-        hci->device.inquiry_scan =
-                !!(PARAM(write_scan_enable, scan_enable) & SCAN_INQUIRY);
-        hci->device.page_scan =
-                !!(PARAM(write_scan_enable, scan_enable) & SCAN_PAGE);
-        bt_hci_event_complete_status(hci, HCI_SUCCESS);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_READ_CLASS_OF_DEV):
-        bt_hci_event_complete_read_local_class(hci);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_WRITE_CLASS_OF_DEV):
-        LENGTH_CHECK(write_class_of_dev);
-
-        memcpy(hci->device.class, PARAM(write_class_of_dev, dev_class),
-                        sizeof(PARAM(write_class_of_dev, dev_class)));
-        bt_hci_event_complete_status(hci, HCI_SUCCESS);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_READ_VOICE_SETTING):
-        bt_hci_event_complete_voice_setting(hci);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_WRITE_VOICE_SETTING):
-        LENGTH_CHECK(write_voice_setting);
-
-        hci->voice_setting = PARAM(write_voice_setting, voice_setting);
-        bt_hci_event_complete_status(hci, HCI_SUCCESS);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_HOST_NUMBER_OF_COMPLETED_PACKETS):
-        if (length < data[0] * 2 + 1)
-            goto short_hci;
-
-        for (i = 0; i < data[0]; i ++)
-            if (bt_hci_handle_bad(hci,
-                                    data[i * 2 + 1] | (data[i * 2 + 2] << 8)))
-                bt_hci_event_complete_status(hci, HCI_INVALID_PARAMETERS);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_READ_INQUIRY_MODE):
-        /* Only if (local_features[3] & 0x40) && (local_commands[12] & 0x40)
-         * else
-         *     goto unknown_command */
-        bt_hci_event_complete_read_inquiry_mode(hci);
-        break;
-
-    case cmd_opcode_pack(OGF_HOST_CTL, OCF_WRITE_INQUIRY_MODE):
-        /* Only if (local_features[3] & 0x40) && (local_commands[12] & 0x80)
-         * else
-         *     goto unknown_command */
-        LENGTH_CHECK(write_inquiry_mode);
-
-        if (PARAM(write_inquiry_mode, mode) > 0x01) {
-            bt_hci_event_complete_status(hci, HCI_INVALID_PARAMETERS);
-            break;
-        }
-
-        hci->lm.inquiry_mode = PARAM(write_inquiry_mode, mode);
-        bt_hci_event_complete_status(hci, HCI_SUCCESS);
-        break;
-
-    case cmd_opcode_pack(OGF_INFO_PARAM, OCF_READ_LOCAL_VERSION):
-        bt_hci_read_local_version_rp(hci);
-        break;
-
-    case cmd_opcode_pack(OGF_INFO_PARAM, OCF_READ_LOCAL_COMMANDS):
-        bt_hci_read_local_commands_rp(hci);
-        break;
-
-    case cmd_opcode_pack(OGF_INFO_PARAM, OCF_READ_LOCAL_FEATURES):
-        bt_hci_read_local_features_rp(hci);
-        break;
-
-    case cmd_opcode_pack(OGF_INFO_PARAM, OCF_READ_LOCAL_EXT_FEATURES):
-        LENGTH_CHECK(read_local_ext_features);
-
-        bt_hci_read_local_ext_features_rp(hci,
-                        PARAM(read_local_ext_features, page_num));
-        break;
-
-    case cmd_opcode_pack(OGF_INFO_PARAM, OCF_READ_BUFFER_SIZE):
-        bt_hci_read_buffer_size_rp(hci);
-        break;
-
-    case cmd_opcode_pack(OGF_INFO_PARAM, OCF_READ_COUNTRY_CODE):
-        bt_hci_read_country_code_rp(hci);
-        break;
-
-    case cmd_opcode_pack(OGF_INFO_PARAM, OCF_READ_BD_ADDR):
-        bt_hci_read_bd_addr_rp(hci);
-        break;
-
-    case cmd_opcode_pack(OGF_STATUS_PARAM, OCF_READ_LINK_QUALITY):
-        LENGTH_CHECK(read_link_quality);
-
-        bt_hci_link_quality_rp(hci, PARAMHANDLE(read_link_quality));
-        break;
-
-    default:
-        bt_hci_event_status(hci, HCI_UNKNOWN_COMMAND);
-        break;
-
-    short_hci:
-        fprintf(stderr, "%s: HCI packet too short (%iB)\n",
-                        __FUNCTION__, length);
-        bt_hci_event_status(hci, HCI_INVALID_PARAMETERS);
-        break;
-    }
-}
-
-/* We could perform fragmentation here, we can't do "recombination" because
- * at this layer the length of the payload is not know ahead, so we only
- * know that a packet contained the last fragment of the SDU when the next
- * SDU starts.  */
-static inline void bt_hci_lmp_acl_data(struct bt_hci_s *hci, uint16_t handle,
-                const uint8_t *data, int start, int len)
-{
-    struct hci_acl_hdr *pkt = (void *) hci->acl_buf;
-
-    /* TODO: packet flags */
-    /* TODO: avoid memcpy'ing */
-
-    if (len + HCI_ACL_HDR_SIZE > sizeof(hci->acl_buf)) {
-        fprintf(stderr, "%s: can't take ACL packets %i bytes long\n",
-                        __FUNCTION__, len);
-        return;
-    }
-    memcpy(hci->acl_buf + HCI_ACL_HDR_SIZE, data, len);
-
-    pkt->handle = cpu_to_le16(
-                    acl_handle_pack(handle, start ? ACL_START : ACL_CONT));
-    pkt->dlen = cpu_to_le16(len);
-    hci->info.acl_recv(hci->info.opaque,
-                    hci->acl_buf, len + HCI_ACL_HDR_SIZE);
-}
-
-static void bt_hci_lmp_acl_data_slave(struct bt_link_s *btlink,
-                const uint8_t *data, int start, int len)
-{
-    struct bt_hci_link_s *link = (struct bt_hci_link_s *) btlink;
-
-    bt_hci_lmp_acl_data(hci_from_device(btlink->slave),
-                    link->handle, data, start, len);
-}
-
-static void bt_hci_lmp_acl_data_host(struct bt_link_s *link,
-                const uint8_t *data, int start, int len)
-{
-    bt_hci_lmp_acl_data(hci_from_device(link->host),
-                    link->handle, data, start, len);
-}
-
-static void bt_submit_acl(struct HCIInfo *info,
-                const uint8_t *data, int length)
-{
-    struct bt_hci_s *hci = hci_from_info(info);
-    uint16_t handle;
-    int datalen, flags;
-    struct bt_link_s *link;
-
-    if (length < HCI_ACL_HDR_SIZE) {
-        fprintf(stderr, "%s: ACL packet too short (%iB)\n",
-                        __FUNCTION__, length);
-        return;
-    }
-
-    handle = acl_handle((data[1] << 8) | data[0]);
-    flags = acl_flags((data[1] << 8) | data[0]);
-    datalen = (data[3] << 8) | data[2];
-    data += HCI_ACL_HDR_SIZE;
-    length -= HCI_ACL_HDR_SIZE;
-
-    if (bt_hci_handle_bad(hci, handle)) {
-        fprintf(stderr, "%s: invalid ACL handle %03x\n",
-                        __FUNCTION__, handle);
-        /* TODO: signal an error */
-        return;
-    }
-    handle &= ~HCI_HANDLE_OFFSET;
-
-    if (datalen > length) {
-        fprintf(stderr, "%s: ACL packet too short (%iB < %iB)\n",
-                        __FUNCTION__, length, datalen);
-        return;
-    }
-
-    link = hci->lm.handle[handle].link;
-
-    if ((flags & ~3) == ACL_ACTIVE_BCAST) {
-        if (!hci->asb_handle)
-            hci->asb_handle = handle;
-        else if (handle != hci->asb_handle) {
-            fprintf(stderr, "%s: Bad handle %03x in Active Slave Broadcast\n",
-                            __FUNCTION__, handle);
-            /* TODO: signal an error */
-            return;
-        }
-
-        /* TODO */
-    }
-
-    if ((flags & ~3) == ACL_PICO_BCAST) {
-        if (!hci->psb_handle)
-            hci->psb_handle = handle;
-        else if (handle != hci->psb_handle) {
-            fprintf(stderr, "%s: Bad handle %03x in Parked Slave Broadcast\n",
-                            __FUNCTION__, handle);
-            /* TODO: signal an error */
-            return;
-        }
-
-        /* TODO */
-    }
-
-    /* TODO: increase counter and send EVT_NUM_COMP_PKTS */
-    bt_hci_event_num_comp_pkts(hci, handle | HCI_HANDLE_OFFSET, 1);
-
-    /* Do this last as it can trigger further events even in this HCI */
-    hci->lm.handle[handle].lmp_acl_data(link, data,
-                    (flags & 3) == ACL_START, length);
-}
-
-static void bt_submit_sco(struct HCIInfo *info,
-                const uint8_t *data, int length)
-{
-    struct bt_hci_s *hci = hci_from_info(info);
-    uint16_t handle;
-    int datalen;
-
-    if (length < 3)
-        return;
-
-    handle = acl_handle((data[1] << 8) | data[0]);
-    datalen = data[2];
-    length -= 3;
-
-    if (bt_hci_handle_bad(hci, handle)) {
-        fprintf(stderr, "%s: invalid SCO handle %03x\n",
-                        __FUNCTION__, handle);
-        return;
-    }
-
-    if (datalen > length) {
-        fprintf(stderr, "%s: SCO packet too short (%iB < %iB)\n",
-                        __FUNCTION__, length, datalen);
-        return;
-    }
-
-    /* TODO */
-
-    /* TODO: increase counter and send EVT_NUM_COMP_PKTS if synchronous
-     * Flow Control is enabled.
-     * (See Read/Write_Synchronous_Flow_Control_Enable on page 513 and
-     * page 514.)  */
-}
-
-static uint8_t *bt_hci_evt_packet(void *opaque)
-{
-    /* TODO: allocate a packet from upper layer */
-    struct bt_hci_s *s = opaque;
-
-    return s->evt_buf;
-}
-
-static void bt_hci_evt_submit(void *opaque, int len)
-{
-    /* TODO: notify upper layer */
-    struct bt_hci_s *s = opaque;
-
-    s->info.evt_recv(s->info.opaque, s->evt_buf, len);
-}
-
-static int bt_hci_bdaddr_set(struct HCIInfo *info, const uint8_t *bd_addr)
-{
-    struct bt_hci_s *hci = hci_from_info(info);
-
-    bacpy(&hci->device.bd_addr, (const bdaddr_t *) bd_addr);
-    return 0;
-}
-
-static void bt_hci_done(struct HCIInfo *info);
-static void bt_hci_destroy(struct bt_device_s *dev)
-{
-    struct bt_hci_s *hci = hci_from_device(dev);
-
-    bt_hci_done(&hci->info);
-}
-
-struct HCIInfo *bt_new_hci(struct bt_scatternet_s *net)
-{
-    struct bt_hci_s *s = g_malloc0(sizeof(struct bt_hci_s));
-
-    s->lm.inquiry_done = timer_new(QEMU_CLOCK_VIRTUAL, SCALE_NS, bt_hci_inquiry_done, s);
-    s->lm.inquiry_next = timer_new(QEMU_CLOCK_VIRTUAL, SCALE_NS, bt_hci_inquiry_next, s);
-    s->conn_accept_timer =
-            timer_new(QEMU_CLOCK_VIRTUAL, SCALE_NS, bt_hci_conn_accept_timeout, s);
-
-    s->evt_packet = bt_hci_evt_packet;
-    s->evt_submit = bt_hci_evt_submit;
-    s->opaque = s;
-
-    bt_device_init(&s->device, net);
-    s->device.lmp_connection_request = bt_hci_lmp_connection_request;
-    s->device.lmp_connection_complete = bt_hci_lmp_connection_complete;
-    s->device.lmp_disconnect_master = bt_hci_lmp_disconnect_host;
-    s->device.lmp_disconnect_slave = bt_hci_lmp_disconnect_slave;
-    s->device.lmp_acl_data = bt_hci_lmp_acl_data_slave;
-    s->device.lmp_acl_resp = bt_hci_lmp_acl_data_host;
-    s->device.lmp_mode_change = bt_hci_lmp_mode_change_slave;
-
-    /* Keep updated! */
-    /* Also keep in sync with supported commands bitmask in
-     * bt_hci_read_local_commands_rp */
-    s->device.lmp_caps = 0x8000199b7e85355fll;
-
-    bt_hci_reset(s);
-
-    s->info.cmd_send = bt_submit_hci;
-    s->info.sco_send = bt_submit_sco;
-    s->info.acl_send = bt_submit_acl;
-    s->info.bdaddr_set = bt_hci_bdaddr_set;
-
-    s->device.handle_destroy = bt_hci_destroy;
-
-    return &s->info;
-}
-
-static void bt_hci_done(struct HCIInfo *info)
-{
-    struct bt_hci_s *hci = hci_from_info(info);
-    int handle;
-
-    bt_device_done(&hci->device);
-
-    if (hci->device.lmp_name)
-        g_free((void *) hci->device.lmp_name);
-
-    /* Be gentle and send DISCONNECT to all connected peers and those
-     * currently waiting for us to accept or reject a connection request.
-     * This frees the links.  */
-    if (hci->conn_req_host) {
-        bt_hci_connection_reject(hci,
-                                 hci->conn_req_host, HCI_OE_POWER_OFF);
-        return;
-    }
-
-    for (handle = HCI_HANDLE_OFFSET;
-                    handle < (HCI_HANDLE_OFFSET | HCI_HANDLES_MAX); handle ++)
-        if (!bt_hci_handle_bad(hci, handle))
-            bt_hci_disconnect(hci, handle, HCI_OE_POWER_OFF);
-
-    /* TODO: this is not enough actually, there may be slaves from whom
-     * we have requested a connection who will soon (or not) respond with
-     * an accept or a reject, so we should also check if hci->lm.connecting
-     * is non-zero and if so, avoid freeing the hci but otherwise disappear
-     * from all qemu social life (e.g. stop scanning and request to be
-     * removed from s->device.net) and arrange for
-     * s->device.lmp_connection_complete to free the remaining bits once
-     * hci->lm.awaiting_bdaddr[] is empty.  */
-
-    timer_free(hci->lm.inquiry_done);
-    timer_free(hci->lm.inquiry_next);
-    timer_free(hci->conn_accept_timer);
-
-    g_free(hci);
-}
diff --git a/hw/bt/hid.c b/hw/bt/hid.c
deleted file mode 100644
index 37c4239..0000000
--- a/hw/bt/hid.c
+++ /dev/null
@@ -1,570 +0,0 @@
-/*
- * QEMU Bluetooth HID Profile wrapper for USB HID.
- *
- * Copyright (C) 2007-2008 OpenMoko, Inc.
- * Written by Andrzej Zaborowski <andrew@openedhand.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 or
- * (at your option) version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu-common.h"
-#include "hw/usb.h"
-#include "hw/bt.h"
-
-enum hid_transaction_req {
-    BT_HANDSHAKE			= 0x0,
-    BT_HID_CONTROL			= 0x1,
-    BT_GET_REPORT			= 0x4,
-    BT_SET_REPORT			= 0x5,
-    BT_GET_PROTOCOL			= 0x6,
-    BT_SET_PROTOCOL			= 0x7,
-    BT_GET_IDLE				= 0x8,
-    BT_SET_IDLE				= 0x9,
-    BT_DATA				= 0xa,
-    BT_DATC				= 0xb,
-};
-
-enum hid_transaction_handshake {
-    BT_HS_SUCCESSFUL			= 0x0,
-    BT_HS_NOT_READY			= 0x1,
-    BT_HS_ERR_INVALID_REPORT_ID		= 0x2,
-    BT_HS_ERR_UNSUPPORTED_REQUEST	= 0x3,
-    BT_HS_ERR_INVALID_PARAMETER		= 0x4,
-    BT_HS_ERR_UNKNOWN			= 0xe,
-    BT_HS_ERR_FATAL			= 0xf,
-};
-
-enum hid_transaction_control {
-    BT_HC_NOP				= 0x0,
-    BT_HC_HARD_RESET			= 0x1,
-    BT_HC_SOFT_RESET			= 0x2,
-    BT_HC_SUSPEND			= 0x3,
-    BT_HC_EXIT_SUSPEND			= 0x4,
-    BT_HC_VIRTUAL_CABLE_UNPLUG		= 0x5,
-};
-
-enum hid_protocol {
-    BT_HID_PROTO_BOOT			= 0,
-    BT_HID_PROTO_REPORT			= 1,
-};
-
-enum hid_boot_reportid {
-    BT_HID_BOOT_INVALID			= 0,
-    BT_HID_BOOT_KEYBOARD,
-    BT_HID_BOOT_MOUSE,
-};
-
-enum hid_data_pkt {
-    BT_DATA_OTHER			= 0,
-    BT_DATA_INPUT,
-    BT_DATA_OUTPUT,
-    BT_DATA_FEATURE,
-};
-
-#define BT_HID_MTU			48
-
-/* HID interface requests */
-#define GET_REPORT			0xa101
-#define GET_IDLE			0xa102
-#define GET_PROTOCOL			0xa103
-#define SET_REPORT			0x2109
-#define SET_IDLE			0x210a
-#define SET_PROTOCOL			0x210b
-
-struct bt_hid_device_s {
-    struct bt_l2cap_device_s btdev;
-    struct bt_l2cap_conn_params_s *control;
-    struct bt_l2cap_conn_params_s *interrupt;
-    USBDevice *usbdev;
-
-    int proto;
-    int connected;
-    int data_type;
-    int intr_state;
-    struct {
-        int len;
-        uint8_t buffer[1024];
-    } dataother, datain, dataout, feature, intrdataout;
-    enum {
-        bt_state_ready,
-        bt_state_transaction,
-        bt_state_suspend,
-    } state;
-};
-
-static void bt_hid_reset(struct bt_hid_device_s *s)
-{
-    struct bt_scatternet_s *net = s->btdev.device.net;
-
-    /* Go as far as... */
-    bt_l2cap_device_done(&s->btdev);
-    bt_l2cap_device_init(&s->btdev, net);
-
-    s->usbdev->handle_reset(s->usbdev);
-    s->proto = BT_HID_PROTO_REPORT;
-    s->state = bt_state_ready;
-    s->dataother.len = 0;
-    s->datain.len = 0;
-    s->dataout.len = 0;
-    s->feature.len = 0;
-    s->intrdataout.len = 0;
-    s->intr_state = 0;
-}
-
-static int bt_hid_out(struct bt_hid_device_s *s)
-{
-    USBPacket p;
-
-    if (s->data_type == BT_DATA_OUTPUT) {
-        p.pid = USB_TOKEN_OUT;
-        p.devep = 1;
-        p.data = s->dataout.buffer;
-        p.len = s->dataout.len;
-        s->dataout.len = s->usbdev->handle_data(s->usbdev, &p);
-
-        return s->dataout.len;
-    }
-
-    if (s->data_type == BT_DATA_FEATURE) {
-        /* XXX:
-         * does this send a USB_REQ_CLEAR_FEATURE/USB_REQ_SET_FEATURE
-         * or a SET_REPORT? */
-        p.devep = 0;
-    }
-
-    return -1;
-}
-
-static int bt_hid_in(struct bt_hid_device_s *s)
-{
-    USBPacket p;
-
-    p.pid = USB_TOKEN_IN;
-    p.devep = 1;
-    p.data = s->datain.buffer;
-    p.len = sizeof(s->datain.buffer);
-    s->datain.len = s->usbdev->handle_data(s->usbdev, &p);
-
-    return s->datain.len;
-}
-
-static void bt_hid_send_handshake(struct bt_hid_device_s *s, int result)
-{
-    *s->control->sdu_out(s->control, 1) =
-            (BT_HANDSHAKE << 4) | result;
-    s->control->sdu_submit(s->control);
-}
-
-static void bt_hid_send_control(struct bt_hid_device_s *s, int operation)
-{
-    *s->control->sdu_out(s->control, 1) =
-            (BT_HID_CONTROL << 4) | operation;
-    s->control->sdu_submit(s->control);
-}
-
-static void bt_hid_disconnect(struct bt_hid_device_s *s)
-{
-    /* Disconnect s->control and s->interrupt */
-}
-
-static void bt_hid_send_data(struct bt_l2cap_conn_params_s *ch, int type,
-                const uint8_t *data, int len)
-{
-    uint8_t *pkt, hdr = (BT_DATA << 4) | type;
-    int plen;
-
-    do {
-        plen = MIN(len, ch->remote_mtu - 1);
-        pkt = ch->sdu_out(ch, plen + 1);
-
-        pkt[0] = hdr;
-        if (plen)
-            memcpy(pkt + 1, data, plen);
-        ch->sdu_submit(ch);
-
-        len -= plen;
-        data += plen;
-        hdr = (BT_DATC << 4) | type;
-    } while (plen == ch->remote_mtu - 1);
-}
-
-static void bt_hid_control_transaction(struct bt_hid_device_s *s,
-                const uint8_t *data, int len)
-{
-    uint8_t type, parameter;
-    int rlen, ret = -1;
-    if (len < 1)
-        return;
-
-    type = data[0] >> 4;
-    parameter = data[0] & 0xf;
-
-    switch (type) {
-    case BT_HANDSHAKE:
-    case BT_DATA:
-        switch (parameter) {
-        default:
-            /* These are not expected to be sent this direction.  */
-            ret = BT_HS_ERR_INVALID_PARAMETER;
-        }
-        break;
-
-    case BT_HID_CONTROL:
-        if (len != 1 || (parameter != BT_HC_VIRTUAL_CABLE_UNPLUG &&
-                                s->state == bt_state_transaction)) {
-            ret = BT_HS_ERR_INVALID_PARAMETER;
-            break;
-        }
-        switch (parameter) {
-        case BT_HC_NOP:
-            break;
-        case BT_HC_HARD_RESET:
-        case BT_HC_SOFT_RESET:
-            bt_hid_reset(s);
-            break;
-        case BT_HC_SUSPEND:
-            if (s->state == bt_state_ready)
-                s->state = bt_state_suspend;
-            else
-                ret = BT_HS_ERR_INVALID_PARAMETER;
-            break;
-        case BT_HC_EXIT_SUSPEND:
-            if (s->state == bt_state_suspend)
-                s->state = bt_state_ready;
-            else
-                ret = BT_HS_ERR_INVALID_PARAMETER;
-            break;
-        case BT_HC_VIRTUAL_CABLE_UNPLUG:
-            bt_hid_disconnect(s);
-            break;
-        default:
-            ret = BT_HS_ERR_INVALID_PARAMETER;
-        }
-        break;
-
-    case BT_GET_REPORT:
-        /* No ReportIDs declared.  */
-        if (((parameter & 8) && len != 3) ||
-                        (!(parameter & 8) && len != 1) ||
-                        s->state != bt_state_ready) {
-            ret = BT_HS_ERR_INVALID_PARAMETER;
-            break;
-        }
-        if (parameter & 8)
-            rlen = data[2] | (data[3] << 8);
-        else
-            rlen = INT_MAX;
-        switch (parameter & 3) {
-        case BT_DATA_OTHER:
-            ret = BT_HS_ERR_INVALID_PARAMETER;
-            break;
-        case BT_DATA_INPUT:
-            /* Here we can as well poll s->usbdev */
-            bt_hid_send_data(s->control, BT_DATA_INPUT,
-                            s->datain.buffer, MIN(rlen, s->datain.len));
-            break;
-        case BT_DATA_OUTPUT:
-            bt_hid_send_data(s->control, BT_DATA_OUTPUT,
-                            s->dataout.buffer, MIN(rlen, s->dataout.len));
-            break;
-        case BT_DATA_FEATURE:
-            bt_hid_send_data(s->control, BT_DATA_FEATURE,
-                            s->feature.buffer, MIN(rlen, s->feature.len));
-            break;
-        }
-        break;
-
-    case BT_SET_REPORT:
-        if (len < 2 || len > BT_HID_MTU || s->state != bt_state_ready ||
-                        (parameter & 3) == BT_DATA_OTHER ||
-                        (parameter & 3) == BT_DATA_INPUT) {
-            ret = BT_HS_ERR_INVALID_PARAMETER;
-            break;
-        }
-        s->data_type = parameter & 3;
-        if (s->data_type == BT_DATA_OUTPUT) {
-            s->dataout.len = len - 1;
-            memcpy(s->dataout.buffer, data + 1, s->dataout.len);
-        } else {
-            s->feature.len = len - 1;
-            memcpy(s->feature.buffer, data + 1, s->feature.len);
-        }
-        if (len == BT_HID_MTU)
-            s->state = bt_state_transaction;
-        else
-            bt_hid_out(s);
-        break;
-
-    case BT_GET_PROTOCOL:
-        if (len != 1 || s->state == bt_state_transaction) {
-            ret = BT_HS_ERR_INVALID_PARAMETER;
-            break;
-        }
-        *s->control->sdu_out(s->control, 1) = s->proto;
-        s->control->sdu_submit(s->control);
-        break;
-
-    case BT_SET_PROTOCOL:
-        if (len != 1 || s->state == bt_state_transaction ||
-                        (parameter != BT_HID_PROTO_BOOT &&
-                         parameter != BT_HID_PROTO_REPORT)) {
-            ret = BT_HS_ERR_INVALID_PARAMETER;
-            break;
-        }
-        s->proto = parameter;
-        s->usbdev->handle_control(s->usbdev, SET_PROTOCOL, s->proto, 0, 0,
-                                  NULL);
-        ret = BT_HS_SUCCESSFUL;
-        break;
-
-    case BT_GET_IDLE:
-        if (len != 1 || s->state == bt_state_transaction) {
-            ret = BT_HS_ERR_INVALID_PARAMETER;
-            break;
-        }
-        s->usbdev->handle_control(s->usbdev, GET_IDLE, 0, 0, 1,
-                        s->control->sdu_out(s->control, 1));
-        s->control->sdu_submit(s->control);
-        break;
-
-    case BT_SET_IDLE:
-        if (len != 2 || s->state == bt_state_transaction) {
-            ret = BT_HS_ERR_INVALID_PARAMETER;
-            break;
-        }
-
-        /* We don't need to know about the Idle Rate here really,
-         * so just pass it on to the device.  */
-        ret = s->usbdev->handle_control(s->usbdev,
-                        SET_IDLE, data[1], 0, 0, NULL) ?
-                BT_HS_SUCCESSFUL : BT_HS_ERR_INVALID_PARAMETER;
-        /* XXX: Does this generate a handshake? */
-        break;
-
-    case BT_DATC:
-        if (len > BT_HID_MTU || s->state != bt_state_transaction) {
-            ret = BT_HS_ERR_INVALID_PARAMETER;
-            break;
-        }
-        if (s->data_type == BT_DATA_OUTPUT) {
-            memcpy(s->dataout.buffer + s->dataout.len, data + 1, len - 1);
-            s->dataout.len += len - 1;
-        } else {
-            memcpy(s->feature.buffer + s->feature.len, data + 1, len - 1);
-            s->feature.len += len - 1;
-        }
-        if (len < BT_HID_MTU) {
-            bt_hid_out(s);
-            s->state = bt_state_ready;
-        }
-        break;
-
-    default:
-        ret = BT_HS_ERR_UNSUPPORTED_REQUEST;
-    }
-
-    if (ret != -1)
-        bt_hid_send_handshake(s, ret);
-}
-
-static void bt_hid_control_sdu(void *opaque, const uint8_t *data, int len)
-{
-    struct bt_hid_device_s *hid = opaque;
-
-    bt_hid_control_transaction(hid, data, len);
-}
-
-static void bt_hid_datain(void *opaque)
-{
-    struct bt_hid_device_s *hid = opaque;
-
-    /* If suspended, wake-up and send a wake-up event first.  We might
-     * want to also inspect the input report and ignore event like
-     * mouse movements until a button event occurs.  */
-    if (hid->state == bt_state_suspend) {
-        hid->state = bt_state_ready;
-    }
-
-    if (bt_hid_in(hid) > 0)
-        /* TODO: when in boot-mode precede any Input reports with the ReportID
-         * byte, here and in GetReport/SetReport on the Control channel.  */
-        bt_hid_send_data(hid->interrupt, BT_DATA_INPUT,
-                        hid->datain.buffer, hid->datain.len);
-}
-
-static void bt_hid_interrupt_sdu(void *opaque, const uint8_t *data, int len)
-{
-    struct bt_hid_device_s *hid = opaque;
-
-    if (len > BT_HID_MTU || len < 1)
-        goto bad;
-    if ((data[0] & 3) != BT_DATA_OUTPUT)
-        goto bad;
-    if ((data[0] >> 4) == BT_DATA) {
-        if (hid->intr_state)
-            goto bad;
-
-        hid->data_type = BT_DATA_OUTPUT;
-        hid->intrdataout.len = 0;
-    } else if ((data[0] >> 4) == BT_DATC) {
-        if (!hid->intr_state)
-            goto bad;
-    } else
-        goto bad;
-
-    memcpy(hid->intrdataout.buffer + hid->intrdataout.len, data + 1, len - 1);
-    hid->intrdataout.len += len - 1;
-    hid->intr_state = (len == BT_HID_MTU);
-    if (!hid->intr_state) {
-        memcpy(hid->dataout.buffer, hid->intrdataout.buffer,
-                        hid->dataout.len = hid->intrdataout.len);
-        bt_hid_out(hid);
-    }
-
-    return;
-bad:
-    fprintf(stderr, "%s: bad transaction on Interrupt channel.\n",
-                    __FUNCTION__);
-}
-
-/* "Virtual cable" plug/unplug event.  */
-static void bt_hid_connected_update(struct bt_hid_device_s *hid)
-{
-    int prev = hid->connected;
-
-    hid->connected = hid->control && hid->interrupt;
-
-    /* Stop page-/inquiry-scanning when a host is connected.  */
-    hid->btdev.device.page_scan = !hid->connected;
-    hid->btdev.device.inquiry_scan = !hid->connected;
-
-    if (hid->connected && !prev) {
-        hid->usbdev->handle_reset(hid->usbdev);
-        hid->proto = BT_HID_PROTO_REPORT;
-    }
-
-    /* Should set HIDVirtualCable in SDP (possibly need to check that SDP
-     * isn't destroyed yet, in case we're being called from handle_destroy) */
-}
-
-static void bt_hid_close_control(void *opaque)
-{
-    struct bt_hid_device_s *hid = opaque;
-
-    hid->control = NULL;
-    bt_hid_connected_update(hid);
-}
-
-static void bt_hid_close_interrupt(void *opaque)
-{
-    struct bt_hid_device_s *hid = opaque;
-
-    hid->interrupt = NULL;
-    bt_hid_connected_update(hid);
-}
-
-static int bt_hid_new_control_ch(struct bt_l2cap_device_s *dev,
-                struct bt_l2cap_conn_params_s *params)
-{
-    struct bt_hid_device_s *hid = (struct bt_hid_device_s *) dev;
-
-    if (hid->control)
-        return 1;
-
-    hid->control = params;
-    hid->control->opaque = hid;
-    hid->control->close = bt_hid_close_control;
-    hid->control->sdu_in = bt_hid_control_sdu;
-
-    bt_hid_connected_update(hid);
-
-    return 0;
-}
-
-static int bt_hid_new_interrupt_ch(struct bt_l2cap_device_s *dev,
-                struct bt_l2cap_conn_params_s *params)
-{
-    struct bt_hid_device_s *hid = (struct bt_hid_device_s *) dev;
-
-    if (hid->interrupt)
-        return 1;
-
-    hid->interrupt = params;
-    hid->interrupt->opaque = hid;
-    hid->interrupt->close = bt_hid_close_interrupt;
-    hid->interrupt->sdu_in = bt_hid_interrupt_sdu;
-
-    bt_hid_connected_update(hid);
-
-    return 0;
-}
-
-static void bt_hid_destroy(struct bt_device_s *dev)
-{
-    struct bt_hid_device_s *hid = (struct bt_hid_device_s *) dev;
-
-    if (hid->connected)
-        bt_hid_send_control(hid, BT_HC_VIRTUAL_CABLE_UNPLUG);
-    bt_l2cap_device_done(&hid->btdev);
-
-    hid->usbdev->handle_destroy(hid->usbdev);
-
-    g_free(hid);
-}
-
-enum peripheral_minor_class {
-    class_other		= 0 << 4,
-    class_keyboard	= 1 << 4,
-    class_pointing	= 2 << 4,
-    class_combo		= 3 << 4,
-};
-
-static struct bt_device_s *bt_hid_init(struct bt_scatternet_s *net,
-                USBDevice *dev, enum peripheral_minor_class minor)
-{
-    struct bt_hid_device_s *s = g_malloc0(sizeof(*s));
-    uint32_t class =
-            /* Format type */
-            (0 << 0) |
-            /* Device class */
-            (minor << 2) |
-            (5 << 8) |  /* "Peripheral" */
-            /* Service classes */
-            (1 << 13) | /* Limited discoverable mode */
-            (1 << 19);  /* Capturing device (?) */
-
-    bt_l2cap_device_init(&s->btdev, net);
-    bt_l2cap_sdp_init(&s->btdev);
-    bt_l2cap_psm_register(&s->btdev, BT_PSM_HID_CTRL,
-                    BT_HID_MTU, bt_hid_new_control_ch);
-    bt_l2cap_psm_register(&s->btdev, BT_PSM_HID_INTR,
-                    BT_HID_MTU, bt_hid_new_interrupt_ch);
-
-    s->usbdev = dev;
-    s->btdev.device.lmp_name = s->usbdev->devname;
-    usb_hid_datain_cb(s->usbdev, s, bt_hid_datain);
-
-    s->btdev.device.handle_destroy = bt_hid_destroy;
-
-    s->btdev.device.class[0] = (class >>  0) & 0xff;
-    s->btdev.device.class[1] = (class >>  8) & 0xff;
-    s->btdev.device.class[2] = (class >> 16) & 0xff;
-
-    return &s->btdev.device;
-}
-
-struct bt_device_s *bt_keyboard_init(struct bt_scatternet_s *net)
-{
-    return bt_hid_init(net, usb_keyboard_init(), class_keyboard);
-}
diff --git a/hw/bt/l2cap.c b/hw/bt/l2cap.c
deleted file mode 100644
index 05e0d28..0000000
--- a/hw/bt/l2cap.c
+++ /dev/null
@@ -1,1362 +0,0 @@
-/*
- * QEMU Bluetooth L2CAP logic.
- *
- * Copyright (C) 2008 Andrzej Zaborowski  <balrog@zabor.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu-common.h"
-#include "qemu/timer.h"
-#include "hw/bt.h"
-
-#define L2CAP_CID_MAX	0x100	/* Between 0x40 and 0x10000 */
-
-struct l2cap_instance_s {
-    struct bt_link_s *link;
-    struct bt_l2cap_device_s *dev;
-    int role;
-
-    uint8_t frame_in[65535 + L2CAP_HDR_SIZE] __attribute__ ((aligned (4)));
-    int frame_in_len;
-
-    uint8_t frame_out[65535 + L2CAP_HDR_SIZE] __attribute__ ((aligned (4)));
-    int frame_out_len;
-
-    /* Signalling channel timers.  They exist per-request but we can make
-     * sure we have no more than one outstanding request at any time.  */
-    QEMUTimer *rtx;
-    QEMUTimer *ertx;
-
-    int last_id;
-    int next_id;
-
-    struct l2cap_chan_s {
-        struct bt_l2cap_conn_params_s params;
-
-        void (*frame_in)(struct l2cap_chan_s *chan, uint16_t cid,
-                        const l2cap_hdr *hdr, int len);
-        int mps;
-        int min_mtu;
-
-        struct l2cap_instance_s *l2cap;
-
-        /* Only allocated channels */
-        uint16_t remote_cid;
-#define L2CAP_CFG_INIT	2
-#define L2CAP_CFG_ACC	1
-        int config_req_id; /* TODO: handle outgoing requests generically */
-        int config;
-
-        /* Only connection-oriented channels.  Note: if we allow the tx and
-         * rx traffic to be in different modes at any time, we need two.  */
-        int mode;
-
-        /* Only flow-controlled, connection-oriented channels */
-        uint8_t sdu[65536]; /* TODO: dynamically allocate */
-        int len_cur, len_total;
-        int rexmit;
-        int monitor_timeout;
-        QEMUTimer *monitor_timer;
-        QEMUTimer *retransmission_timer;
-    } *cid[L2CAP_CID_MAX];
-    /* The channel state machine states map as following:
-     * CLOSED           -> !cid[N]
-     * WAIT_CONNECT     -> never occurs
-     * WAIT_CONNECT_RSP -> never occurs
-     * CONFIG           -> cid[N] && config < 3
-     *   WAIT_CONFIG         -> never occurs, cid[N] && config == 0 && !config_r
-     *   WAIT_SEND_CONFIG    -> never occurs, cid[N] && config == 1 && !config_r
-     *   WAIT_CONFIG_REQ_RSP -> cid[N] && config == 0 && config_req_id
-     *   WAIT_CONFIG_RSP     -> cid[N] && config == 1 && config_req_id
-     *   WAIT_CONFIG_REQ     -> cid[N] && config == 2
-     * OPEN             -> cid[N] && config == 3
-     * WAIT_DISCONNECT  -> never occurs
-     */
-
-    struct l2cap_chan_s signalling_ch;
-    struct l2cap_chan_s group_ch;
-};
-
-struct slave_l2cap_instance_s {
-    struct bt_link_s link;	/* Underlying logical link (ACL) */
-    struct l2cap_instance_s l2cap;
-};
-
-struct bt_l2cap_psm_s {
-    int psm;
-    int min_mtu;
-    int (*new_channel)(struct bt_l2cap_device_s *device,
-                    struct bt_l2cap_conn_params_s *params);
-    struct bt_l2cap_psm_s *next;
-};
-
-static const uint16_t l2cap_fcs16_table[256] = {
-    0x0000, 0xc0c1, 0xc181, 0x0140, 0xc301, 0x03c0, 0x0280, 0xc241,
-    0xc601, 0x06c0, 0x0780, 0xc741, 0x0500, 0xc5c1, 0xc481, 0x0440,
-    0xcc01, 0x0cc0, 0x0d80, 0xcd41, 0x0f00, 0xcfc1, 0xce81, 0x0e40,
-    0x0a00, 0xcac1, 0xcb81, 0x0b40, 0xc901, 0x09c0, 0x0880, 0xc841,
-    0xd801, 0x18c0, 0x1980, 0xd941, 0x1b00, 0xdbc1, 0xda81, 0x1a40,
-    0x1e00, 0xdec1, 0xdf81, 0x1f40, 0xdd01, 0x1dc0, 0x1c80, 0xdc41,
-    0x1400, 0xd4c1, 0xd581, 0x1540, 0xd701, 0x17c0, 0x1680, 0xd641,
-    0xd201, 0x12c0, 0x1380, 0xd341, 0x1100, 0xd1c1, 0xd081, 0x1040,
-    0xf001, 0x30c0, 0x3180, 0xf141, 0x3300, 0xf3c1, 0xf281, 0x3240,
-    0x3600, 0xf6c1, 0xf781, 0x3740, 0xf501, 0x35c0, 0x3480, 0xf441,
-    0x3c00, 0xfcc1, 0xfd81, 0x3d40, 0xff01, 0x3fc0, 0x3e80, 0xfe41,
-    0xfa01, 0x3ac0, 0x3b80, 0xfb41, 0x3900, 0xf9c1, 0xf881, 0x3840,
-    0x2800, 0xe8c1, 0xe981, 0x2940, 0xeb01, 0x2bc0, 0x2a80, 0xea41,
-    0xee01, 0x2ec0, 0x2f80, 0xef41, 0x2d00, 0xedc1, 0xec81, 0x2c40,
-    0xe401, 0x24c0, 0x2580, 0xe541, 0x2700, 0xe7c1, 0xe681, 0x2640,
-    0x2200, 0xe2c1, 0xe381, 0x2340, 0xe101, 0x21c0, 0x2080, 0xe041,
-    0xa001, 0x60c0, 0x6180, 0xa141, 0x6300, 0xa3c1, 0xa281, 0x6240,
-    0x6600, 0xa6c1, 0xa781, 0x6740, 0xa501, 0x65c0, 0x6480, 0xa441,
-    0x6c00, 0xacc1, 0xad81, 0x6d40, 0xaf01, 0x6fc0, 0x6e80, 0xae41,
-    0xaa01, 0x6ac0, 0x6b80, 0xab41, 0x6900, 0xa9c1, 0xa881, 0x6840,
-    0x7800, 0xb8c1, 0xb981, 0x7940, 0xbb01, 0x7bc0, 0x7a80, 0xba41,
-    0xbe01, 0x7ec0, 0x7f80, 0xbf41, 0x7d00, 0xbdc1, 0xbc81, 0x7c40,
-    0xb401, 0x74c0, 0x7580, 0xb541, 0x7700, 0xb7c1, 0xb681, 0x7640,
-    0x7200, 0xb2c1, 0xb381, 0x7340, 0xb101, 0x71c0, 0x7080, 0xb041,
-    0x5000, 0x90c1, 0x9181, 0x5140, 0x9301, 0x53c0, 0x5280, 0x9241,
-    0x9601, 0x56c0, 0x5780, 0x9741, 0x5500, 0x95c1, 0x9481, 0x5440,
-    0x9c01, 0x5cc0, 0x5d80, 0x9d41, 0x5f00, 0x9fc1, 0x9e81, 0x5e40,
-    0x5a00, 0x9ac1, 0x9b81, 0x5b40, 0x9901, 0x59c0, 0x5880, 0x9841,
-    0x8801, 0x48c0, 0x4980, 0x8941, 0x4b00, 0x8bc1, 0x8a81, 0x4a40,
-    0x4e00, 0x8ec1, 0x8f81, 0x4f40, 0x8d01, 0x4dc0, 0x4c80, 0x8c41,
-    0x4400, 0x84c1, 0x8581, 0x4540, 0x8701, 0x47c0, 0x4680, 0x8641,
-    0x8201, 0x42c0, 0x4380, 0x8341, 0x4100, 0x81c1, 0x8081, 0x4040,
-};
-
-static uint16_t l2cap_fcs16(const uint8_t *message, int len)
-{
-    uint16_t fcs = 0x0000;
-
-    while (len --)
-#if 0
-    {
-        int i;
-
-        fcs ^= *message ++;
-        for (i = 8; i; -- i)
-            if (fcs & 1)
-                fcs = (fcs >> 1) ^ 0xa001;
-            else
-                fcs = (fcs >> 1);
-    }
-#else
-        fcs = (fcs >> 8) ^ l2cap_fcs16_table[(fcs ^ *message ++) & 0xff];
-#endif
-
-    return fcs;
-}
-
-/* L2CAP layer logic (protocol) */
-
-static void l2cap_retransmission_timer_update(struct l2cap_chan_s *ch)
-{
-#if 0
-    if (ch->mode != L2CAP_MODE_BASIC && ch->rexmit)
-        timer_mod(ch->retransmission_timer);
-    else
-        timer_del(ch->retransmission_timer);
-#endif
-}
-
-static void l2cap_monitor_timer_update(struct l2cap_chan_s *ch)
-{
-#if 0
-    if (ch->mode != L2CAP_MODE_BASIC && !ch->rexmit)
-        timer_mod(ch->monitor_timer);
-    else
-        timer_del(ch->monitor_timer);
-#endif
-}
-
-static void l2cap_command_reject(struct l2cap_instance_s *l2cap, int id,
-                uint16_t reason, const void *data, int plen)
-{
-    uint8_t *pkt;
-    l2cap_cmd_hdr *hdr;
-    l2cap_cmd_rej *params;
-    uint16_t len;
-
-    reason = cpu_to_le16(reason);
-    len = cpu_to_le16(L2CAP_CMD_REJ_SIZE + plen);
-
-    pkt = l2cap->signalling_ch.params.sdu_out(&l2cap->signalling_ch.params,
-                    L2CAP_CMD_HDR_SIZE + L2CAP_CMD_REJ_SIZE + plen);
-    hdr = (void *) (pkt + 0);
-    params = (void *) (pkt + L2CAP_CMD_HDR_SIZE);
-
-    hdr->code = L2CAP_COMMAND_REJ;
-    hdr->ident = id;
-    memcpy(&hdr->len, &len, sizeof(hdr->len));
-    memcpy(&params->reason, &reason, sizeof(reason));
-    if (plen)
-       memcpy(pkt + L2CAP_CMD_HDR_SIZE + L2CAP_CMD_REJ_SIZE, data, plen);
-
-    l2cap->signalling_ch.params.sdu_submit(&l2cap->signalling_ch.params);
-}
-
-static void l2cap_command_reject_cid(struct l2cap_instance_s *l2cap, int id,
-                uint16_t reason, uint16_t dcid, uint16_t scid)
-{
-    l2cap_cmd_rej_cid params = {
-        .dcid = dcid,
-        .scid = scid,
-    };
-
-    l2cap_command_reject(l2cap, id, reason, &params, L2CAP_CMD_REJ_CID_SIZE);
-}
-
-static void l2cap_connection_response(struct l2cap_instance_s *l2cap,
-                int dcid, int scid, int result, int status)
-{
-    uint8_t *pkt;
-    l2cap_cmd_hdr *hdr;
-    l2cap_conn_rsp *params;
-
-    pkt = l2cap->signalling_ch.params.sdu_out(&l2cap->signalling_ch.params,
-                    L2CAP_CMD_HDR_SIZE + L2CAP_CONN_RSP_SIZE);
-    hdr = (void *) (pkt + 0);
-    params = (void *) (pkt + L2CAP_CMD_HDR_SIZE);
-
-    hdr->code = L2CAP_CONN_RSP;
-    hdr->ident = l2cap->last_id;
-    hdr->len = cpu_to_le16(L2CAP_CONN_RSP_SIZE);
-
-    params->dcid = cpu_to_le16(dcid);
-    params->scid = cpu_to_le16(scid);
-    params->result = cpu_to_le16(result);
-    params->status = cpu_to_le16(status);
-
-    l2cap->signalling_ch.params.sdu_submit(&l2cap->signalling_ch.params);
-}
-
-static void l2cap_configuration_request(struct l2cap_instance_s *l2cap,
-                int dcid, int flag, const uint8_t *data, int len)
-{
-    uint8_t *pkt;
-    l2cap_cmd_hdr *hdr;
-    l2cap_conf_req *params;
-
-    pkt = l2cap->signalling_ch.params.sdu_out(&l2cap->signalling_ch.params,
-                    L2CAP_CMD_HDR_SIZE + L2CAP_CONF_REQ_SIZE(len));
-    hdr = (void *) (pkt + 0);
-    params = (void *) (pkt + L2CAP_CMD_HDR_SIZE);
-
-    /* TODO: unify the id sequencing */
-    l2cap->last_id = l2cap->next_id;
-    l2cap->next_id = l2cap->next_id == 255 ? 1 : l2cap->next_id + 1;
-
-    hdr->code = L2CAP_CONF_REQ;
-    hdr->ident = l2cap->last_id;
-    hdr->len = cpu_to_le16(L2CAP_CONF_REQ_SIZE(len));
-
-    params->dcid = cpu_to_le16(dcid);
-    params->flags = cpu_to_le16(flag);
-    if (len)
-        memcpy(params->data, data, len);
-
-    l2cap->signalling_ch.params.sdu_submit(&l2cap->signalling_ch.params);
-}
-
-static void l2cap_configuration_response(struct l2cap_instance_s *l2cap,
-                int scid, int flag, int result, const uint8_t *data, int len)
-{
-    uint8_t *pkt;
-    l2cap_cmd_hdr *hdr;
-    l2cap_conf_rsp *params;
-
-    pkt = l2cap->signalling_ch.params.sdu_out(&l2cap->signalling_ch.params,
-                    L2CAP_CMD_HDR_SIZE + L2CAP_CONF_RSP_SIZE(len));
-    hdr = (void *) (pkt + 0);
-    params = (void *) (pkt + L2CAP_CMD_HDR_SIZE);
-
-    hdr->code = L2CAP_CONF_RSP;
-    hdr->ident = l2cap->last_id;
-    hdr->len = cpu_to_le16(L2CAP_CONF_RSP_SIZE(len));
-
-    params->scid = cpu_to_le16(scid);
-    params->flags = cpu_to_le16(flag);
-    params->result = cpu_to_le16(result);
-    if (len)
-        memcpy(params->data, data, len);
-
-    l2cap->signalling_ch.params.sdu_submit(&l2cap->signalling_ch.params);
-}
-
-static void l2cap_disconnection_response(struct l2cap_instance_s *l2cap,
-                int dcid, int scid)
-{
-    uint8_t *pkt;
-    l2cap_cmd_hdr *hdr;
-    l2cap_disconn_rsp *params;
-
-    pkt = l2cap->signalling_ch.params.sdu_out(&l2cap->signalling_ch.params,
-                    L2CAP_CMD_HDR_SIZE + L2CAP_DISCONN_RSP_SIZE);
-    hdr = (void *) (pkt + 0);
-    params = (void *) (pkt + L2CAP_CMD_HDR_SIZE);
-
-    hdr->code = L2CAP_DISCONN_RSP;
-    hdr->ident = l2cap->last_id;
-    hdr->len = cpu_to_le16(L2CAP_DISCONN_RSP_SIZE);
-
-    params->dcid = cpu_to_le16(dcid);
-    params->scid = cpu_to_le16(scid);
-
-    l2cap->signalling_ch.params.sdu_submit(&l2cap->signalling_ch.params);
-}
-
-static void l2cap_echo_response(struct l2cap_instance_s *l2cap,
-                const uint8_t *data, int len)
-{
-    uint8_t *pkt;
-    l2cap_cmd_hdr *hdr;
-    uint8_t *params;
-
-    pkt = l2cap->signalling_ch.params.sdu_out(&l2cap->signalling_ch.params,
-                    L2CAP_CMD_HDR_SIZE + len);
-    hdr = (void *) (pkt + 0);
-    params = (void *) (pkt + L2CAP_CMD_HDR_SIZE);
-
-    hdr->code = L2CAP_ECHO_RSP;
-    hdr->ident = l2cap->last_id;
-    hdr->len = cpu_to_le16(len);
-
-    memcpy(params, data, len);
-
-    l2cap->signalling_ch.params.sdu_submit(&l2cap->signalling_ch.params);
-}
-
-static void l2cap_info_response(struct l2cap_instance_s *l2cap, int type,
-                int result, const uint8_t *data, int len)
-{
-    uint8_t *pkt;
-    l2cap_cmd_hdr *hdr;
-    l2cap_info_rsp *params;
-
-    pkt = l2cap->signalling_ch.params.sdu_out(&l2cap->signalling_ch.params,
-                    L2CAP_CMD_HDR_SIZE + L2CAP_INFO_RSP_SIZE + len);
-    hdr = (void *) (pkt + 0);
-    params = (void *) (pkt + L2CAP_CMD_HDR_SIZE);
-
-    hdr->code = L2CAP_INFO_RSP;
-    hdr->ident = l2cap->last_id;
-    hdr->len = cpu_to_le16(L2CAP_INFO_RSP_SIZE + len);
-
-    params->type = cpu_to_le16(type);
-    params->result = cpu_to_le16(result);
-    if (len)
-       memcpy(params->data, data, len);
-
-    l2cap->signalling_ch.params.sdu_submit(&l2cap->signalling_ch.params);
-}
-
-static uint8_t *l2cap_bframe_out(struct bt_l2cap_conn_params_s *parm, int len);
-static void l2cap_bframe_submit(struct bt_l2cap_conn_params_s *parms);
-#if 0
-static uint8_t *l2cap_iframe_out(struct bt_l2cap_conn_params_s *parm, int len);
-static void l2cap_iframe_submit(struct bt_l2cap_conn_params_s *parm);
-#endif
-static void l2cap_bframe_in(struct l2cap_chan_s *ch, uint16_t cid,
-                const l2cap_hdr *hdr, int len);
-static void l2cap_iframe_in(struct l2cap_chan_s *ch, uint16_t cid,
-                const l2cap_hdr *hdr, int len);
-
-static int l2cap_cid_new(struct l2cap_instance_s *l2cap)
-{
-    int i;
-
-    for (i = L2CAP_CID_ALLOC; i < L2CAP_CID_MAX; i ++)
-        if (!l2cap->cid[i])
-            return i;
-
-    return L2CAP_CID_INVALID;
-}
-
-static inline struct bt_l2cap_psm_s *l2cap_psm(
-                struct bt_l2cap_device_s *device, int psm)
-{
-    struct bt_l2cap_psm_s *ret = device->first_psm;
-
-    while (ret && ret->psm != psm)
-        ret = ret->next;
-
-    return ret;
-}
-
-static struct l2cap_chan_s *l2cap_channel_open(struct l2cap_instance_s *l2cap,
-                int psm, int source_cid)
-{
-    struct l2cap_chan_s *ch = NULL;
-    struct bt_l2cap_psm_s *psm_info;
-    int result, status;
-    int cid = l2cap_cid_new(l2cap);
-
-    if (cid) {
-        /* See what the channel is to be used for.. */
-        psm_info = l2cap_psm(l2cap->dev, psm);
-
-        if (psm_info) {
-            /* Device supports this use-case.  */
-            ch = g_malloc0(sizeof(*ch));
-            ch->params.sdu_out = l2cap_bframe_out;
-            ch->params.sdu_submit = l2cap_bframe_submit;
-            ch->frame_in = l2cap_bframe_in;
-            ch->mps = 65536;
-            ch->min_mtu = MAX(48, psm_info->min_mtu);
-            ch->params.remote_mtu = MAX(672, ch->min_mtu);
-            ch->remote_cid = source_cid;
-            ch->mode = L2CAP_MODE_BASIC;
-            ch->l2cap = l2cap;
-
-            /* Does it feel like opening yet another channel though?  */
-            if (!psm_info->new_channel(l2cap->dev, &ch->params)) {
-                l2cap->cid[cid] = ch;
-
-                result = L2CAP_CR_SUCCESS;
-                status = L2CAP_CS_NO_INFO;
-            } else {
-                g_free(ch);
-
-                result = L2CAP_CR_NO_MEM;
-                status = L2CAP_CS_NO_INFO;
-            }
-        } else {
-            result = L2CAP_CR_BAD_PSM;
-            status = L2CAP_CS_NO_INFO;
-        }
-    } else {
-        result = L2CAP_CR_NO_MEM;
-        status = L2CAP_CS_NO_INFO;
-    }
-
-    l2cap_connection_response(l2cap, cid, source_cid, result, status);
-
-    return ch;
-}
-
-static void l2cap_channel_close(struct l2cap_instance_s *l2cap,
-                int cid, int source_cid)
-{
-    struct l2cap_chan_s *ch = NULL;
-
-    /* According to Volume 3, section 6.1.1, pg 1048 of BT Core V2.0, a
-     * connection in CLOSED state still responds with a L2CAP_DisconnectRsp
-     * message on an L2CAP_DisconnectReq event.  */
-    if (unlikely(cid < L2CAP_CID_ALLOC)) {
-        l2cap_command_reject_cid(l2cap, l2cap->last_id, L2CAP_REJ_CID_INVAL,
-                        cid, source_cid);
-        return;
-    }
-    if (likely(cid >= L2CAP_CID_ALLOC && cid < L2CAP_CID_MAX))
-        ch = l2cap->cid[cid];
-
-    if (likely(ch)) {
-        if (ch->remote_cid != source_cid) {
-            fprintf(stderr, "%s: Ignoring a Disconnection Request with the "
-                            "invalid SCID %04x.\n", __FUNCTION__, source_cid);
-            return;
-        }
-
-        l2cap->cid[cid] = NULL;
-
-        ch->params.close(ch->params.opaque);
-        g_free(ch);
-    }
-
-    l2cap_disconnection_response(l2cap, cid, source_cid);
-}
-
-static void l2cap_channel_config_null(struct l2cap_instance_s *l2cap,
-                struct l2cap_chan_s *ch)
-{
-    l2cap_configuration_request(l2cap, ch->remote_cid, 0, NULL, 0);
-    ch->config_req_id = l2cap->last_id;
-    ch->config &= ~L2CAP_CFG_INIT;
-}
-
-static void l2cap_channel_config_req_event(struct l2cap_instance_s *l2cap,
-                struct l2cap_chan_s *ch)
-{
-    /* Use all default channel options and terminate negotiation.  */
-    l2cap_channel_config_null(l2cap, ch);
-}
-
-static int l2cap_channel_config(struct l2cap_instance_s *l2cap,
-                struct l2cap_chan_s *ch, int flag,
-                const uint8_t *data, int len)
-{
-    l2cap_conf_opt *opt;
-    l2cap_conf_opt_qos *qos;
-    uint32_t val;
-    uint8_t rsp[len];
-    int result = L2CAP_CONF_SUCCESS;
-
-    data = memcpy(rsp, data, len);
-    while (len) {
-        opt = (void *) data;
-
-        if (len < L2CAP_CONF_OPT_SIZE ||
-                        len < L2CAP_CONF_OPT_SIZE + opt->len) {
-            result = L2CAP_CONF_REJECT;
-            break;
-        }
-        data += L2CAP_CONF_OPT_SIZE + opt->len;
-        len -= L2CAP_CONF_OPT_SIZE + opt->len;
-
-        switch (opt->type & 0x7f) {
-        case L2CAP_CONF_MTU:
-            if (opt->len != 2) {
-                result = L2CAP_CONF_REJECT;
-                break;
-            }
-
-            /* MTU */
-            val = le16_to_cpup((void *) opt->val);
-            if (val < ch->min_mtu) {
-                cpu_to_le16w((void *) opt->val, ch->min_mtu);
-                result = L2CAP_CONF_UNACCEPT;
-                break;
-            }
-
-            ch->params.remote_mtu = val;
-            break;
-
-        case L2CAP_CONF_FLUSH_TO:
-            if (opt->len != 2) {
-                result = L2CAP_CONF_REJECT;
-                break;
-            }
-
-            /* Flush Timeout */
-            val = le16_to_cpup((void *) opt->val);
-            if (val < 0x0001) {
-                opt->val[0] = 0xff;
-                opt->val[1] = 0xff;
-                result = L2CAP_CONF_UNACCEPT;
-                break;
-            }
-            break;
-
-        case L2CAP_CONF_QOS:
-            if (opt->len != L2CAP_CONF_OPT_QOS_SIZE) {
-                result = L2CAP_CONF_REJECT;
-                break;
-            }
-            qos = (void *) opt->val;
-
-            /* Flags */
-            val = qos->flags;
-            if (val) {
-                qos->flags = 0;
-                result = L2CAP_CONF_UNACCEPT;
-            }
-
-            /* Service type */
-            val = qos->service_type;
-            if (val != L2CAP_CONF_QOS_BEST_EFFORT &&
-                            val != L2CAP_CONF_QOS_NO_TRAFFIC) {
-                qos->service_type = L2CAP_CONF_QOS_BEST_EFFORT;
-                result = L2CAP_CONF_UNACCEPT;
-            }
-
-            if (val != L2CAP_CONF_QOS_NO_TRAFFIC) {
-                /* XXX: These values should possibly be calculated
-                 * based on LM / baseband properties also.  */
-
-                /* Token rate */
-                val = le32_to_cpu(qos->token_rate);
-                if (val == L2CAP_CONF_QOS_WILDCARD)
-                    qos->token_rate = cpu_to_le32(0x100000);
-
-                /* Token bucket size */
-                val = le32_to_cpu(qos->token_bucket_size);
-                if (val == L2CAP_CONF_QOS_WILDCARD)
-                    qos->token_bucket_size = cpu_to_le32(65500);
-
-                /* Any Peak bandwidth value is correct to return as-is */
-                /* Any Access latency value is correct to return as-is */
-                /* Any Delay variation value is correct to return as-is */
-            }
-            break;
-
-        case L2CAP_CONF_RFC:
-            if (opt->len != 9) {
-                result = L2CAP_CONF_REJECT;
-                break;
-            }
-
-            /* Mode */
-            val = opt->val[0];
-            switch (val) {
-            case L2CAP_MODE_BASIC:
-                ch->mode = val;
-                ch->frame_in = l2cap_bframe_in;
-
-                /* All other parameters shall be ignored */
-                break;
-
-            case L2CAP_MODE_RETRANS:
-            case L2CAP_MODE_FLOWCTL:
-                ch->mode = val;
-                ch->frame_in = l2cap_iframe_in;
-                /* Note: most of these parameters refer to incoming traffic
-                 * so we don't need to save them as long as we can accept
-                 * incoming PDUs at any values of the parameters.  */
-
-                /* TxWindow size */
-                val = opt->val[1];
-                if (val < 1 || val > 32) {
-                    opt->val[1] = 32;
-                    result = L2CAP_CONF_UNACCEPT;
-                    break;
-                }
-
-                /* MaxTransmit */
-                val = opt->val[2];
-                if (val < 1) {
-                    opt->val[2] = 1;
-                    result = L2CAP_CONF_UNACCEPT;
-                    break;
-                }
-
-                /* Remote Retransmission time-out shouldn't affect local
-                 * operation (?) */
-
-                /* The Monitor time-out drives the local Monitor timer (?),
-                 * so save the value.  */
-                val = (opt->val[6] << 8) | opt->val[5];
-                if (val < 30) {
-                    opt->val[5] = 100 & 0xff;
-                    opt->val[6] = 100 >> 8;
-                    result = L2CAP_CONF_UNACCEPT;
-                    break;
-                }
-                ch->monitor_timeout = val;
-                l2cap_monitor_timer_update(ch);
-
-                /* MPS */
-                val = (opt->val[8] << 8) | opt->val[7];
-                if (val < ch->min_mtu) {
-                    opt->val[7] = ch->min_mtu & 0xff;
-                    opt->val[8] = ch->min_mtu >> 8;
-                    result = L2CAP_CONF_UNACCEPT;
-                    break;
-                }
-                ch->mps = val;
-                break;
-
-            default:
-                result = L2CAP_CONF_UNACCEPT;
-                break;
-            }
-            break;
-
-        default:
-            if (!(opt->type >> 7))
-                result = L2CAP_CONF_UNKNOWN;
-            break;
-        }
-
-        if (result != L2CAP_CONF_SUCCESS)
-            break;	/* XXX: should continue? */
-    }
-
-    l2cap_configuration_response(l2cap, ch->remote_cid,
-                    flag, result, rsp, len);
-
-    return result == L2CAP_CONF_SUCCESS && !flag;
-}
-
-static void l2cap_channel_config_req_msg(struct l2cap_instance_s *l2cap,
-                int flag, int cid, const uint8_t *data, int len)
-{
-    struct l2cap_chan_s *ch;
-
-    if (unlikely(cid >= L2CAP_CID_MAX || !l2cap->cid[cid])) {
-        l2cap_command_reject_cid(l2cap, l2cap->last_id, L2CAP_REJ_CID_INVAL,
-                        cid, 0x0000);
-        return;
-    }
-    ch = l2cap->cid[cid];
-
-    /* From OPEN go to WAIT_CONFIG_REQ and from WAIT_CONFIG_REQ_RSP to
-     * WAIT_CONFIG_REQ_RSP.  This is assuming the transition chart for OPEN
-     * on pg 1053, section 6.1.5, volume 3 of BT Core V2.0 has a mistake
-     * and on options-acceptable we go back to OPEN and otherwise to
-     * WAIT_CONFIG_REQ and not the other way.  */
-    ch->config &= ~L2CAP_CFG_ACC;
-
-    if (l2cap_channel_config(l2cap, ch, flag, data, len))
-        /* Go to OPEN or WAIT_CONFIG_RSP */
-        ch->config |= L2CAP_CFG_ACC;
-
-    /* TODO: if the incoming traffic flow control or retransmission mode
-     * changed then we probably need to also generate the
-     * ConfigureChannel_Req event and set the outgoing traffic to the same
-     * mode.  */
-    if (!(ch->config & L2CAP_CFG_INIT) && (ch->config & L2CAP_CFG_ACC) &&
-                    !ch->config_req_id)
-        l2cap_channel_config_req_event(l2cap, ch);
-}
-
-static int l2cap_channel_config_rsp_msg(struct l2cap_instance_s *l2cap,
-                int result, int flag, int cid, const uint8_t *data, int len)
-{
-    struct l2cap_chan_s *ch;
-
-    if (unlikely(cid >= L2CAP_CID_MAX || !l2cap->cid[cid])) {
-        l2cap_command_reject_cid(l2cap, l2cap->last_id, L2CAP_REJ_CID_INVAL,
-                        cid, 0x0000);
-        return 0;
-    }
-    ch = l2cap->cid[cid];
-
-    if (ch->config_req_id != l2cap->last_id)
-        return 1;
-    ch->config_req_id = 0;
-
-    if (result == L2CAP_CONF_SUCCESS) {
-        if (!flag)
-            ch->config |= L2CAP_CFG_INIT;
-        else
-            l2cap_channel_config_null(l2cap, ch);
-    } else
-        /* Retry until we succeed */
-        l2cap_channel_config_req_event(l2cap, ch);
-
-    return 0;
-}
-
-static void l2cap_channel_open_req_msg(struct l2cap_instance_s *l2cap,
-                int psm, int source_cid)
-{
-    struct l2cap_chan_s *ch = l2cap_channel_open(l2cap, psm, source_cid);
-
-    if (!ch)
-        return;
-
-    /* Optional */
-    if (!(ch->config & L2CAP_CFG_INIT) && !ch->config_req_id)
-        l2cap_channel_config_req_event(l2cap, ch);
-}
-
-static void l2cap_info(struct l2cap_instance_s *l2cap, int type)
-{
-    uint8_t data[4];
-    int len = 0;
-    int result = L2CAP_IR_SUCCESS;
-
-    switch (type) {
-    case L2CAP_IT_CL_MTU:
-        data[len ++] = l2cap->group_ch.mps & 0xff;
-        data[len ++] = l2cap->group_ch.mps >> 8;
-        break;
-
-    case L2CAP_IT_FEAT_MASK:
-        /* (Prematurely) report Flow control and Retransmission modes.  */
-        data[len ++] = 0x03;
-        data[len ++] = 0x00;
-        data[len ++] = 0x00;
-        data[len ++] = 0x00;
-        break;
-
-    default:
-        result = L2CAP_IR_NOTSUPP;
-    }
-
-    l2cap_info_response(l2cap, type, result, data, len);
-}
-
-static void l2cap_command(struct l2cap_instance_s *l2cap, int code, int id,
-                const uint8_t *params, int len)
-{
-    int err;
-
-#if 0
-    /* TODO: do the IDs really have to be in sequence?  */
-    if (!id || (id != l2cap->last_id && id != l2cap->next_id)) {
-        fprintf(stderr, "%s: out of sequence command packet ignored.\n",
-                        __FUNCTION__);
-        return;
-    }
-#else
-    l2cap->next_id = id;
-#endif
-    if (id == l2cap->next_id) {
-        l2cap->last_id = l2cap->next_id;
-        l2cap->next_id = l2cap->next_id == 255 ? 1 : l2cap->next_id + 1;
-    } else {
-        /* TODO: Need to re-send the same response, without re-executing
-         * the corresponding command!  */
-    }
-
-    switch (code) {
-    case L2CAP_COMMAND_REJ:
-        if (unlikely(len != 2 && len != 4 && len != 6)) {
-            err = L2CAP_REJ_CMD_NOT_UNDERSTOOD;
-            goto reject;
-        }
-
-        /* We never issue commands other than Command Reject currently.  */
-        fprintf(stderr, "%s: stray Command Reject (%02x, %04x) "
-                        "packet, ignoring.\n", __FUNCTION__, id,
-                        le16_to_cpu(((l2cap_cmd_rej *) params)->reason));
-        break;
-
-    case L2CAP_CONN_REQ:
-        if (unlikely(len != L2CAP_CONN_REQ_SIZE)) {
-            err = L2CAP_REJ_CMD_NOT_UNDERSTOOD;
-            goto reject;
-        }
-
-        l2cap_channel_open_req_msg(l2cap,
-                        le16_to_cpu(((l2cap_conn_req *) params)->psm),
-                        le16_to_cpu(((l2cap_conn_req *) params)->scid));
-        break;
-
-    case L2CAP_CONN_RSP:
-        if (unlikely(len != L2CAP_CONN_RSP_SIZE)) {
-            err = L2CAP_REJ_CMD_NOT_UNDERSTOOD;
-            goto reject;
-        }
-
-        /* We never issue Connection Requests currently. TODO  */
-        fprintf(stderr, "%s: unexpected Connection Response (%02x) "
-                        "packet, ignoring.\n", __FUNCTION__, id);
-        break;
-
-    case L2CAP_CONF_REQ:
-        if (unlikely(len < L2CAP_CONF_REQ_SIZE(0))) {
-            err = L2CAP_REJ_CMD_NOT_UNDERSTOOD;
-            goto reject;
-        }
-
-        l2cap_channel_config_req_msg(l2cap,
-                        le16_to_cpu(((l2cap_conf_req *) params)->flags) & 1,
-                        le16_to_cpu(((l2cap_conf_req *) params)->dcid),
-                        ((l2cap_conf_req *) params)->data,
-                        len - L2CAP_CONF_REQ_SIZE(0));
-        break;
-
-    case L2CAP_CONF_RSP:
-        if (unlikely(len < L2CAP_CONF_RSP_SIZE(0))) {
-            err = L2CAP_REJ_CMD_NOT_UNDERSTOOD;
-            goto reject;
-        }
-
-        if (l2cap_channel_config_rsp_msg(l2cap,
-                        le16_to_cpu(((l2cap_conf_rsp *) params)->result),
-                        le16_to_cpu(((l2cap_conf_rsp *) params)->flags) & 1,
-                        le16_to_cpu(((l2cap_conf_rsp *) params)->scid),
-                        ((l2cap_conf_rsp *) params)->data,
-                        len - L2CAP_CONF_RSP_SIZE(0)))
-            fprintf(stderr, "%s: unexpected Configure Response (%02x) "
-                            "packet, ignoring.\n", __FUNCTION__, id);
-        break;
-
-    case L2CAP_DISCONN_REQ:
-        if (unlikely(len != L2CAP_DISCONN_REQ_SIZE)) {
-            err = L2CAP_REJ_CMD_NOT_UNDERSTOOD;
-            goto reject;
-        }
-
-        l2cap_channel_close(l2cap,
-                        le16_to_cpu(((l2cap_disconn_req *) params)->dcid),
-                        le16_to_cpu(((l2cap_disconn_req *) params)->scid));
-        break;
-
-    case L2CAP_DISCONN_RSP:
-        if (unlikely(len != L2CAP_DISCONN_RSP_SIZE)) {
-            err = L2CAP_REJ_CMD_NOT_UNDERSTOOD;
-            goto reject;
-        }
-
-        /* We never issue Disconnection Requests currently. TODO  */
-        fprintf(stderr, "%s: unexpected Disconnection Response (%02x) "
-                        "packet, ignoring.\n", __FUNCTION__, id);
-        break;
-
-    case L2CAP_ECHO_REQ:
-        l2cap_echo_response(l2cap, params, len);
-        break;
-
-    case L2CAP_ECHO_RSP:
-        /* We never issue Echo Requests currently. TODO  */
-        fprintf(stderr, "%s: unexpected Echo Response (%02x) "
-                        "packet, ignoring.\n", __FUNCTION__, id);
-        break;
-
-    case L2CAP_INFO_REQ:
-        if (unlikely(len != L2CAP_INFO_REQ_SIZE)) {
-            err = L2CAP_REJ_CMD_NOT_UNDERSTOOD;
-            goto reject;
-        }
-
-        l2cap_info(l2cap, le16_to_cpu(((l2cap_info_req *) params)->type));
-        break;
-
-    case L2CAP_INFO_RSP:
-        if (unlikely(len != L2CAP_INFO_RSP_SIZE)) {
-            err = L2CAP_REJ_CMD_NOT_UNDERSTOOD;
-            goto reject;
-        }
-
-        /* We never issue Information Requests currently. TODO  */
-        fprintf(stderr, "%s: unexpected Information Response (%02x) "
-                        "packet, ignoring.\n", __FUNCTION__, id);
-        break;
-
-    default:
-        err = L2CAP_REJ_CMD_NOT_UNDERSTOOD;
-    reject:
-        l2cap_command_reject(l2cap, id, err, 0, 0);
-        break;
-    }
-}
-
-static void l2cap_rexmit_enable(struct l2cap_chan_s *ch, int enable)
-{
-    ch->rexmit = enable;
-
-    l2cap_retransmission_timer_update(ch);
-    l2cap_monitor_timer_update(ch);
-}
-
-/* Command frame SDU */
-static void l2cap_cframe_in(void *opaque, const uint8_t *data, int len)
-{
-    struct l2cap_instance_s *l2cap = opaque;
-    const l2cap_cmd_hdr *hdr;
-    int clen;
-
-    while (len) {
-        hdr = (void *) data;
-        if (len < L2CAP_CMD_HDR_SIZE)
-            /* TODO: signal an error */
-            return;
-        len -= L2CAP_CMD_HDR_SIZE;
-        data += L2CAP_CMD_HDR_SIZE;
-
-        clen = le16_to_cpu(hdr->len);
-        if (len < clen) {
-            l2cap_command_reject(l2cap, hdr->ident,
-                            L2CAP_REJ_CMD_NOT_UNDERSTOOD, 0, 0);
-            break;
-        }
-
-        l2cap_command(l2cap, hdr->code, hdr->ident, data, clen);
-        len -= clen;
-        data += clen;
-    }
-}
-
-/* Group frame SDU */
-static void l2cap_gframe_in(void *opaque, const uint8_t *data, int len)
-{
-}
-
-/* Supervisory frame */
-static void l2cap_sframe_in(struct l2cap_chan_s *ch, uint16_t ctrl)
-{
-}
-
-/* Basic L2CAP mode Information frame */
-static void l2cap_bframe_in(struct l2cap_chan_s *ch, uint16_t cid,
-                const l2cap_hdr *hdr, int len)
-{
-    /* We have a full SDU, no further processing */
-    ch->params.sdu_in(ch->params.opaque, hdr->data, len);
-}
-
-/* Flow Control and Retransmission mode frame */
-static void l2cap_iframe_in(struct l2cap_chan_s *ch, uint16_t cid,
-                const l2cap_hdr *hdr, int len)
-{
-    uint16_t fcs = le16_to_cpup((void *) (hdr->data + len - 2));
-
-    if (len < 4)
-        goto len_error;
-    if (l2cap_fcs16((const uint8_t *) hdr, L2CAP_HDR_SIZE + len - 2) != fcs)
-        goto fcs_error;
-
-    if ((hdr->data[0] >> 7) == ch->rexmit)
-        l2cap_rexmit_enable(ch, !(hdr->data[0] >> 7));
-
-    if (hdr->data[0] & 1) {
-        if (len != 4) {
-            /* TODO: Signal an error? */
-            return;
-        }
-        return l2cap_sframe_in(ch, le16_to_cpup((void *) hdr->data));
-    }
-
-    switch (hdr->data[1] >> 6) {	/* SAR */
-    case L2CAP_SAR_NO_SEG:
-        if (ch->len_total)
-            goto seg_error;
-        if (len - 4 > ch->mps)
-            goto len_error;
-
-        return ch->params.sdu_in(ch->params.opaque, hdr->data + 2, len - 4);
-
-    case L2CAP_SAR_START:
-        if (ch->len_total || len < 6)
-            goto seg_error;
-        if (len - 6 > ch->mps)
-            goto len_error;
-
-        ch->len_total = le16_to_cpup((void *) (hdr->data + 2));
-        if (len >= 6 + ch->len_total)
-            goto seg_error;
-
-        ch->len_cur = len - 6;
-        memcpy(ch->sdu, hdr->data + 4, ch->len_cur);
-        break;
-
-    case L2CAP_SAR_END:
-        if (!ch->len_total || ch->len_cur + len - 4 < ch->len_total)
-            goto seg_error;
-        if (len - 4 > ch->mps)
-            goto len_error;
-
-        memcpy(ch->sdu + ch->len_cur, hdr->data + 2, len - 4);
-        return ch->params.sdu_in(ch->params.opaque, ch->sdu, ch->len_total);
-
-    case L2CAP_SAR_CONT:
-        if (!ch->len_total || ch->len_cur + len - 4 >= ch->len_total)
-            goto seg_error;
-        if (len - 4 > ch->mps)
-            goto len_error;
-
-        memcpy(ch->sdu + ch->len_cur, hdr->data + 2, len - 4);
-        ch->len_cur += len - 4;
-        break;
-
-    seg_error:
-    len_error:	/* TODO */
-    fcs_error:	/* TODO */
-        ch->len_cur = 0;
-        ch->len_total = 0;
-        break;
-    }
-}
-
-static void l2cap_frame_in(struct l2cap_instance_s *l2cap,
-                const l2cap_hdr *frame)
-{
-    uint16_t cid = le16_to_cpu(frame->cid);
-    uint16_t len = le16_to_cpu(frame->len);
-
-    if (unlikely(cid >= L2CAP_CID_MAX || !l2cap->cid[cid])) {
-        fprintf(stderr, "%s: frame addressed to a non-existent L2CAP "
-                        "channel %04x received.\n", __FUNCTION__, cid);
-        return;
-    }
-
-    l2cap->cid[cid]->frame_in(l2cap->cid[cid], cid, frame, len);
-}
-
-/* "Recombination" */
-static void l2cap_pdu_in(struct l2cap_instance_s *l2cap,
-                const uint8_t *data, int len)
-{
-    const l2cap_hdr *hdr = (void *) l2cap->frame_in;
-
-    if (unlikely(len + l2cap->frame_in_len > sizeof(l2cap->frame_in))) {
-        if (l2cap->frame_in_len < sizeof(l2cap->frame_in)) {
-            memcpy(l2cap->frame_in + l2cap->frame_in_len, data,
-                            sizeof(l2cap->frame_in) - l2cap->frame_in_len);
-            l2cap->frame_in_len = sizeof(l2cap->frame_in);
-            /* TODO: truncate */
-            l2cap_frame_in(l2cap, hdr);
-        }
-
-        return;
-    }
-
-    memcpy(l2cap->frame_in + l2cap->frame_in_len, data, len);
-    l2cap->frame_in_len += len;
-
-    if (len >= L2CAP_HDR_SIZE)
-        if (len >= L2CAP_HDR_SIZE + le16_to_cpu(hdr->len))
-            l2cap_frame_in(l2cap, hdr);
-            /* There is never a start of a new PDU in the same ACL packet, so
-             * no need to memmove the remaining payload and loop.  */
-}
-
-static inline uint8_t *l2cap_pdu_out(struct l2cap_instance_s *l2cap,
-                uint16_t cid, uint16_t len)
-{
-    l2cap_hdr *hdr = (void *) l2cap->frame_out;
-
-    l2cap->frame_out_len = len + L2CAP_HDR_SIZE;
-
-    hdr->cid = cpu_to_le16(cid);
-    hdr->len = cpu_to_le16(len);
-
-    return l2cap->frame_out + L2CAP_HDR_SIZE;
-}
-
-static inline void l2cap_pdu_submit(struct l2cap_instance_s *l2cap)
-{
-    /* TODO: Fragmentation */
-    (l2cap->role ?
-     l2cap->link->slave->lmp_acl_data : l2cap->link->host->lmp_acl_resp)
-            (l2cap->link, l2cap->frame_out, 1, l2cap->frame_out_len);
-}
-
-static uint8_t *l2cap_bframe_out(struct bt_l2cap_conn_params_s *parm, int len)
-{
-    struct l2cap_chan_s *chan = (struct l2cap_chan_s *) parm;
-
-    if (len > chan->params.remote_mtu) {
-        fprintf(stderr, "%s: B-Frame for CID %04x longer than %i octets.\n",
-                        __FUNCTION__,
-                        chan->remote_cid, chan->params.remote_mtu);
-        exit(-1);
-    }
-
-    return l2cap_pdu_out(chan->l2cap, chan->remote_cid, len);
-}
-
-static void l2cap_bframe_submit(struct bt_l2cap_conn_params_s *parms)
-{
-    struct l2cap_chan_s *chan = (struct l2cap_chan_s *) parms;
-
-    return l2cap_pdu_submit(chan->l2cap);
-}
-
-#if 0
-/* Stub: Only used if an emulated device requests outgoing flow control */
-static uint8_t *l2cap_iframe_out(struct bt_l2cap_conn_params_s *parm, int len)
-{
-    struct l2cap_chan_s *chan = (struct l2cap_chan_s *) parm;
-
-    if (len > chan->params.remote_mtu) {
-        /* TODO: slice into segments and queue each segment as a separate
-         * I-Frame in a FIFO of I-Frames, local to the CID.  */
-    } else {
-        /* TODO: add to the FIFO of I-Frames, local to the CID.  */
-        /* Possibly we need to return a pointer to a contiguous buffer
-         * for now and then memcpy from it into FIFOs in l2cap_iframe_submit
-         * while segmenting at the same time.  */
-    }
-    return 0;
-}
-
-static void l2cap_iframe_submit(struct bt_l2cap_conn_params_s *parm)
-{
-    /* TODO: If flow control indicates clear to send, start submitting the
-     * invidual I-Frames from the FIFO, but don't remove them from there.
-     * Kick the appropriate timer until we get an S-Frame, and only then
-     * remove from FIFO or resubmit and re-kick the timer if the timer
-     * expired.  */
-}
-#endif
-
-static void l2cap_init(struct l2cap_instance_s *l2cap,
-                struct bt_link_s *link, int role)
-{
-    l2cap->link = link;
-    l2cap->role = role;
-    l2cap->dev = (struct bt_l2cap_device_s *)
-            (role ? link->host : link->slave);
-
-    l2cap->next_id = 1;
-
-    /* Establish the signalling channel */
-    l2cap->signalling_ch.params.sdu_in = l2cap_cframe_in;
-    l2cap->signalling_ch.params.sdu_out = l2cap_bframe_out;
-    l2cap->signalling_ch.params.sdu_submit = l2cap_bframe_submit;
-    l2cap->signalling_ch.params.opaque = l2cap;
-    l2cap->signalling_ch.params.remote_mtu = 48;
-    l2cap->signalling_ch.remote_cid = L2CAP_CID_SIGNALLING;
-    l2cap->signalling_ch.frame_in = l2cap_bframe_in;
-    l2cap->signalling_ch.mps = 65536;
-    l2cap->signalling_ch.min_mtu = 48;
-    l2cap->signalling_ch.mode = L2CAP_MODE_BASIC;
-    l2cap->signalling_ch.l2cap = l2cap;
-    l2cap->cid[L2CAP_CID_SIGNALLING] = &l2cap->signalling_ch;
-
-    /* Establish the connection-less data channel */
-    l2cap->group_ch.params.sdu_in = l2cap_gframe_in;
-    l2cap->group_ch.params.opaque = l2cap;
-    l2cap->group_ch.frame_in = l2cap_bframe_in;
-    l2cap->group_ch.mps = 65533;
-    l2cap->group_ch.l2cap = l2cap;
-    l2cap->group_ch.remote_cid = L2CAP_CID_INVALID;
-    l2cap->cid[L2CAP_CID_GROUP] = &l2cap->group_ch;
-}
-
-static void l2cap_teardown(struct l2cap_instance_s *l2cap, int send_disconnect)
-{
-    int cid;
-
-    /* Don't send DISCONNECT if we are currently handling a DISCONNECT
-     * sent from the other side.  */
-    if (send_disconnect) {
-        if (l2cap->role)
-            l2cap->dev->device.lmp_disconnect_slave(l2cap->link);
-            /* l2cap->link is invalid from now on.  */
-        else
-            l2cap->dev->device.lmp_disconnect_master(l2cap->link);
-    }
-
-    for (cid = L2CAP_CID_ALLOC; cid < L2CAP_CID_MAX; cid ++)
-        if (l2cap->cid[cid]) {
-            l2cap->cid[cid]->params.close(l2cap->cid[cid]->params.opaque);
-            g_free(l2cap->cid[cid]);
-        }
-
-    if (l2cap->role)
-        g_free(l2cap);
-    else
-        g_free(l2cap->link);
-}
-
-/* L2CAP glue to lower layers in bluetooth stack (LMP) */
-
-static void l2cap_lmp_connection_request(struct bt_link_s *link)
-{
-    struct bt_l2cap_device_s *dev = (struct bt_l2cap_device_s *) link->slave;
-    struct slave_l2cap_instance_s *l2cap;
-
-    /* Always accept - we only get called if (dev->device->page_scan).  */
-
-    l2cap = g_malloc0(sizeof(struct slave_l2cap_instance_s));
-    l2cap->link.slave = &dev->device;
-    l2cap->link.host = link->host;
-    l2cap_init(&l2cap->l2cap, &l2cap->link, 0);
-
-    /* Always at the end */
-    link->host->reject_reason = 0;
-    link->host->lmp_connection_complete(&l2cap->link);
-}
-
-/* Stub */
-static void l2cap_lmp_connection_complete(struct bt_link_s *link)
-{
-    struct bt_l2cap_device_s *dev = (struct bt_l2cap_device_s *) link->host;
-    struct l2cap_instance_s *l2cap;
-
-    if (dev->device.reject_reason) {
-        /* Signal to upper layer */
-        return;
-    }
-
-    l2cap = g_malloc0(sizeof(struct l2cap_instance_s));
-    l2cap_init(l2cap, link, 1);
-
-    link->acl_mode = acl_active;
-
-    /* Signal to upper layer */
-}
-
-/* Stub */
-static void l2cap_lmp_disconnect_host(struct bt_link_s *link)
-{
-    struct bt_l2cap_device_s *dev = (struct bt_l2cap_device_s *) link->host;
-    struct l2cap_instance_s *l2cap =
-            /* TODO: Retrieve from upper layer */ (void *) dev;
-
-    /* Signal to upper layer */
-
-    l2cap_teardown(l2cap, 0);
-}
-
-static void l2cap_lmp_disconnect_slave(struct bt_link_s *link)
-{
-    struct slave_l2cap_instance_s *l2cap =
-            (struct slave_l2cap_instance_s *) link;
-
-    l2cap_teardown(&l2cap->l2cap, 0);
-}
-
-static void l2cap_lmp_acl_data_slave(struct bt_link_s *link,
-                const uint8_t *data, int start, int len)
-{
-    struct slave_l2cap_instance_s *l2cap =
-            (struct slave_l2cap_instance_s *) link;
-
-    if (start)
-        l2cap->l2cap.frame_in_len = 0;
-
-    l2cap_pdu_in(&l2cap->l2cap, data, len);
-}
-
-/* Stub */
-static void l2cap_lmp_acl_data_host(struct bt_link_s *link,
-                const uint8_t *data, int start, int len)
-{
-    struct bt_l2cap_device_s *dev = (struct bt_l2cap_device_s *) link->host;
-    struct l2cap_instance_s *l2cap =
-            /* TODO: Retrieve from upper layer */ (void *) dev;
-
-    if (start)
-        l2cap->frame_in_len = 0;
-
-    l2cap_pdu_in(l2cap, data, len);
-}
-
-static void l2cap_dummy_destroy(struct bt_device_s *dev)
-{
-    struct bt_l2cap_device_s *l2cap_dev = (struct bt_l2cap_device_s *) dev;
-
-    bt_l2cap_device_done(l2cap_dev);
-}
-
-void bt_l2cap_device_init(struct bt_l2cap_device_s *dev,
-                struct bt_scatternet_s *net)
-{
-    bt_device_init(&dev->device, net);
-
-    dev->device.lmp_connection_request = l2cap_lmp_connection_request;
-    dev->device.lmp_connection_complete = l2cap_lmp_connection_complete;
-    dev->device.lmp_disconnect_master = l2cap_lmp_disconnect_host;
-    dev->device.lmp_disconnect_slave = l2cap_lmp_disconnect_slave;
-    dev->device.lmp_acl_data = l2cap_lmp_acl_data_slave;
-    dev->device.lmp_acl_resp = l2cap_lmp_acl_data_host;
-
-    dev->device.handle_destroy = l2cap_dummy_destroy;
-}
-
-void bt_l2cap_device_done(struct bt_l2cap_device_s *dev)
-{
-    bt_device_done(&dev->device);
-
-    /* Should keep a list of all instances and go through it and
-     * invoke l2cap_teardown() for each.  */
-}
-
-void bt_l2cap_psm_register(struct bt_l2cap_device_s *dev, int psm, int min_mtu,
-                int (*new_channel)(struct bt_l2cap_device_s *dev,
-                        struct bt_l2cap_conn_params_s *params))
-{
-    struct bt_l2cap_psm_s *new_psm = l2cap_psm(dev, psm);
-
-    if (new_psm) {
-        fprintf(stderr, "%s: PSM %04x already registered for device `%s'.\n",
-                        __FUNCTION__, psm, dev->device.lmp_name);
-        exit(-1);
-    }
-
-    new_psm = g_malloc0(sizeof(*new_psm));
-    new_psm->psm = psm;
-    new_psm->min_mtu = min_mtu;
-    new_psm->new_channel = new_channel;
-    new_psm->next = dev->first_psm;
-    dev->first_psm = new_psm;
-}
diff --git a/hw/bt/sdp.c b/hw/bt/sdp.c
deleted file mode 100644
index 1531007..0000000
--- a/hw/bt/sdp.c
+++ /dev/null
@@ -1,970 +0,0 @@
-/*
- * Service Discover Protocol server for QEMU L2CAP devices
- *
- * Copyright (C) 2008 Andrzej Zaborowski  <balrog@zabor.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu-common.h"
-#include "hw/bt.h"
-
-struct bt_l2cap_sdp_state_s {
-    struct bt_l2cap_conn_params_s *channel;
-
-    struct sdp_service_record_s {
-        int match;
-
-        int *uuid;
-        int uuids;
-        struct sdp_service_attribute_s {
-            int match;
-
-            int attribute_id;
-            int len;
-            void *pair;
-        } *attribute_list;
-        int attributes;
-    } *service_list;
-    int services;
-};
-
-static ssize_t sdp_datalen(const uint8_t **element, ssize_t *left)
-{
-    size_t len = *(*element) ++ & SDP_DSIZE_MASK;
-
-    if (!*left)
-        return -1;
-    (*left) --;
-
-    if (len < SDP_DSIZE_NEXT1)
-        return 1 << len;
-    else if (len == SDP_DSIZE_NEXT1) {
-        if (*left < 1)
-            return -1;
-        (*left) --;
-
-        return *(*element) ++;
-    } else if (len == SDP_DSIZE_NEXT2) {
-        if (*left < 2)
-            return -1;
-        (*left) -= 2;
-
-        len = (*(*element) ++) << 8;
-        return len | (*(*element) ++);
-    } else {
-        if (*left < 4)
-            return -1;
-        (*left) -= 4;
-
-        len = (*(*element) ++) << 24;
-        len |= (*(*element) ++) << 16;
-        len |= (*(*element) ++) << 8;
-        return len | (*(*element) ++);
-    }
-}
-
-static const uint8_t bt_base_uuid[12] = {
-    0x00, 0x00, 0x10, 0x00, 0x80, 0x00, 0x00, 0x80, 0x5f, 0x9b, 0x34, 0xfb,
-};
-
-static int sdp_uuid_match(struct sdp_service_record_s *record,
-                const uint8_t *uuid, ssize_t datalen)
-{
-    int *lo, hi, val;
-
-    if (datalen == 16 || datalen == 4) {
-        if (datalen == 16 && memcmp(uuid + 4, bt_base_uuid, 12))
-            return 0;
-
-        if (uuid[0] | uuid[1])
-            return 0;
-        uuid += 2;
-    }
-
-    val = (uuid[0] << 8) | uuid[1];
-    lo = record->uuid;
-    hi = record->uuids;
-    while (hi >>= 1)
-        if (lo[hi] <= val)
-            lo += hi;
-
-    return *lo == val;
-}
-
-#define CONTINUATION_PARAM_SIZE	(1 + sizeof(int))
-#define MAX_PDU_OUT_SIZE	96	/* Arbitrary */
-#define PDU_HEADER_SIZE		5
-#define MAX_RSP_PARAM_SIZE	(MAX_PDU_OUT_SIZE - PDU_HEADER_SIZE - \
-                CONTINUATION_PARAM_SIZE)
-
-static int sdp_svc_match(struct bt_l2cap_sdp_state_s *sdp,
-                const uint8_t **req, ssize_t *len)
-{
-    size_t datalen;
-    int i;
-
-    if ((**req & ~SDP_DSIZE_MASK) != SDP_DTYPE_UUID)
-        return 1;
-
-    datalen = sdp_datalen(req, len);
-    if (datalen != 2 && datalen != 4 && datalen != 16)
-        return 1;
-
-    for (i = 0; i < sdp->services; i ++)
-        if (sdp_uuid_match(&sdp->service_list[i], *req, datalen))
-            sdp->service_list[i].match = 1;
-
-    (*req) += datalen;
-    (*len) -= datalen;
-
-    return 0;
-}
-
-static ssize_t sdp_svc_search(struct bt_l2cap_sdp_state_s *sdp,
-                uint8_t *rsp, const uint8_t *req, ssize_t len)
-{
-    ssize_t seqlen;
-    int i, count, start, end, max;
-    int32_t handle;
-
-    /* Perform the search */
-    for (i = 0; i < sdp->services; i ++)
-        sdp->service_list[i].match = 0;
-
-    if (len < 1)
-        return -SDP_INVALID_SYNTAX;
-    if ((*req & ~SDP_DSIZE_MASK) == SDP_DTYPE_SEQ) {
-        seqlen = sdp_datalen(&req, &len);
-        if (seqlen < 3 || len < seqlen)
-            return -SDP_INVALID_SYNTAX;
-        len -= seqlen;
-
-        while (seqlen)
-            if (sdp_svc_match(sdp, &req, &seqlen))
-                return -SDP_INVALID_SYNTAX;
-    } else if (sdp_svc_match(sdp, &req, &seqlen))
-        return -SDP_INVALID_SYNTAX;
-
-    if (len < 3)
-        return -SDP_INVALID_SYNTAX;
-    max = (req[0] << 8) | req[1];
-    req += 2;
-    len -= 2;
-
-    if (*req) {
-        if (len <= sizeof(int))
-            return -SDP_INVALID_SYNTAX;
-        len -= sizeof(int);
-        memcpy(&start, req + 1, sizeof(int));
-    } else
-        start = 0;
-
-    if (len > 1)
-        return -SDP_INVALID_SYNTAX;
-
-    /* Output the results */
-    len = 4;
-    count = 0;
-    end = start;
-    for (i = 0; i < sdp->services; i ++)
-        if (sdp->service_list[i].match) {
-            if (count >= start && count < max && len + 4 < MAX_RSP_PARAM_SIZE) {
-                handle = i;
-                memcpy(rsp + len, &handle, 4);
-                len += 4;
-                end = count + 1;
-            }
-
-            count ++;
-        }
-
-    rsp[0] = count >> 8;
-    rsp[1] = count & 0xff;
-    rsp[2] = (end - start) >> 8;
-    rsp[3] = (end - start) & 0xff;
-
-    if (end < count) {
-        rsp[len ++] = sizeof(int);
-        memcpy(rsp + len, &end, sizeof(int));
-        len += 4;
-    } else
-        rsp[len ++] = 0;
-
-    return len;
-}
-
-static int sdp_attr_match(struct sdp_service_record_s *record,
-                const uint8_t **req, ssize_t *len)
-{
-    int i, start, end;
-
-    if (**req == (SDP_DTYPE_UINT | SDP_DSIZE_2)) {
-        (*req) ++;
-        if (*len < 3)
-            return 1;
-
-        start = (*(*req) ++) << 8;
-        start |= *(*req) ++;
-        end = start;
-        *len -= 3;
-    } else if (**req == (SDP_DTYPE_UINT | SDP_DSIZE_4)) {
-        (*req) ++;
-        if (*len < 5)
-            return 1;
-
-        start = (*(*req) ++) << 8;
-        start |= *(*req) ++;
-        end = (*(*req) ++) << 8;
-        end |= *(*req) ++;
-        *len -= 5;
-    } else
-        return 1;
-
-    for (i = 0; i < record->attributes; i ++)
-        if (record->attribute_list[i].attribute_id >= start &&
-                        record->attribute_list[i].attribute_id <= end)
-            record->attribute_list[i].match = 1;
-
-    return 0;
-}
-
-static ssize_t sdp_attr_get(struct bt_l2cap_sdp_state_s *sdp,
-                uint8_t *rsp, const uint8_t *req, ssize_t len)
-{
-    ssize_t seqlen;
-    int i, start, end, max;
-    int32_t handle;
-    struct sdp_service_record_s *record;
-    uint8_t *lst;
-
-    /* Perform the search */
-    if (len < 7)
-        return -SDP_INVALID_SYNTAX;
-    memcpy(&handle, req, 4);
-    req += 4;
-    len -= 4;
-
-    if (handle < 0 || handle > sdp->services)
-        return -SDP_INVALID_RECORD_HANDLE;
-    record = &sdp->service_list[handle];
-
-    for (i = 0; i < record->attributes; i ++)
-        record->attribute_list[i].match = 0;
-
-    max = (req[0] << 8) | req[1];
-    req += 2;
-    len -= 2;
-    if (max < 0x0007)
-        return -SDP_INVALID_SYNTAX;
-
-    if ((*req & ~SDP_DSIZE_MASK) == SDP_DTYPE_SEQ) {
-        seqlen = sdp_datalen(&req, &len);
-        if (seqlen < 3 || len < seqlen)
-            return -SDP_INVALID_SYNTAX;
-        len -= seqlen;
-
-        while (seqlen)
-            if (sdp_attr_match(record, &req, &seqlen))
-                return -SDP_INVALID_SYNTAX;
-    } else if (sdp_attr_match(record, &req, &seqlen))
-        return -SDP_INVALID_SYNTAX;
-
-    if (len < 1)
-        return -SDP_INVALID_SYNTAX;
-
-    if (*req) {
-        if (len <= sizeof(int))
-            return -SDP_INVALID_SYNTAX;
-        len -= sizeof(int);
-        memcpy(&start, req + 1, sizeof(int));
-    } else
-        start = 0;
-
-    if (len > 1)
-        return -SDP_INVALID_SYNTAX;
-
-    /* Output the results */
-    lst = rsp + 2;
-    max = MIN(max, MAX_RSP_PARAM_SIZE);
-    len = 3 - start;
-    end = 0;
-    for (i = 0; i < record->attributes; i ++)
-        if (record->attribute_list[i].match) {
-            if (len >= 0 && len + record->attribute_list[i].len < max) {
-                memcpy(lst + len, record->attribute_list[i].pair,
-                                record->attribute_list[i].len);
-                end = len + record->attribute_list[i].len;
-            }
-            len += record->attribute_list[i].len;
-        }
-    if (0 >= start) {
-       lst[0] = SDP_DTYPE_SEQ | SDP_DSIZE_NEXT2;
-       lst[1] = (len + start - 3) >> 8;
-       lst[2] = (len + start - 3) & 0xff;
-    }
-
-    rsp[0] = end >> 8;
-    rsp[1] = end & 0xff;
-
-    if (end < len) {
-        len = end + start;
-        lst[end ++] = sizeof(int);
-        memcpy(lst + end, &len, sizeof(int));
-        end += sizeof(int);
-    } else
-        lst[end ++] = 0;
-
-    return end + 2;
-}
-
-static int sdp_svc_attr_match(struct bt_l2cap_sdp_state_s *sdp,
-                const uint8_t **req, ssize_t *len)
-{
-    int i, j, start, end;
-    struct sdp_service_record_s *record;
-
-    if (**req == (SDP_DTYPE_UINT | SDP_DSIZE_2)) {
-        (*req) ++;
-        if (*len < 3)
-            return 1;
-
-        start = (*(*req) ++) << 8;
-        start |= *(*req) ++;
-        end = start;
-        *len -= 3;
-    } else if (**req == (SDP_DTYPE_UINT | SDP_DSIZE_4)) {
-        (*req) ++;
-        if (*len < 5)
-            return 1;
-
-        start = (*(*req) ++) << 8;
-        start |= *(*req) ++;
-        end = (*(*req) ++) << 8;
-        end |= *(*req) ++;
-        *len -= 5;
-    } else
-        return 1;
-
-    for (i = 0; i < sdp->services; i ++)
-        if ((record = &sdp->service_list[i])->match)
-            for (j = 0; j < record->attributes; j ++)
-                if (record->attribute_list[j].attribute_id >= start &&
-                                record->attribute_list[j].attribute_id <= end)
-                    record->attribute_list[j].match = 1;
-
-    return 0;
-}
-
-static ssize_t sdp_svc_search_attr_get(struct bt_l2cap_sdp_state_s *sdp,
-                uint8_t *rsp, const uint8_t *req, ssize_t len)
-{
-    ssize_t seqlen;
-    int i, j, start, end, max;
-    struct sdp_service_record_s *record;
-    uint8_t *lst;
-
-    /* Perform the search */
-    for (i = 0; i < sdp->services; i ++) {
-        sdp->service_list[i].match = 0;
-            for (j = 0; j < sdp->service_list[i].attributes; j ++)
-                sdp->service_list[i].attribute_list[j].match = 0;
-    }
-
-    if (len < 1)
-        return -SDP_INVALID_SYNTAX;
-    if ((*req & ~SDP_DSIZE_MASK) == SDP_DTYPE_SEQ) {
-        seqlen = sdp_datalen(&req, &len);
-        if (seqlen < 3 || len < seqlen)
-            return -SDP_INVALID_SYNTAX;
-        len -= seqlen;
-
-        while (seqlen)
-            if (sdp_svc_match(sdp, &req, &seqlen))
-                return -SDP_INVALID_SYNTAX;
-    } else if (sdp_svc_match(sdp, &req, &seqlen))
-        return -SDP_INVALID_SYNTAX;
-
-    if (len < 3)
-        return -SDP_INVALID_SYNTAX;
-    max = (req[0] << 8) | req[1];
-    req += 2;
-    len -= 2;
-    if (max < 0x0007)
-        return -SDP_INVALID_SYNTAX;
-
-    if ((*req & ~SDP_DSIZE_MASK) == SDP_DTYPE_SEQ) {
-        seqlen = sdp_datalen(&req, &len);
-        if (seqlen < 3 || len < seqlen)
-            return -SDP_INVALID_SYNTAX;
-        len -= seqlen;
-
-        while (seqlen)
-            if (sdp_svc_attr_match(sdp, &req, &seqlen))
-                return -SDP_INVALID_SYNTAX;
-    } else if (sdp_svc_attr_match(sdp, &req, &seqlen))
-        return -SDP_INVALID_SYNTAX;
-
-    if (len < 1)
-        return -SDP_INVALID_SYNTAX;
-
-    if (*req) {
-        if (len <= sizeof(int))
-            return -SDP_INVALID_SYNTAX;
-        len -= sizeof(int);
-        memcpy(&start, req + 1, sizeof(int));
-    } else
-        start = 0;
-
-    if (len > 1)
-        return -SDP_INVALID_SYNTAX;
-
-    /* Output the results */
-    /* This assumes empty attribute lists are never to be returned even
-     * for matching Service Records.  In practice this shouldn't happen
-     * as the requestor will usually include the always present
-     * ServiceRecordHandle AttributeID in AttributeIDList.  */
-    lst = rsp + 2;
-    max = MIN(max, MAX_RSP_PARAM_SIZE);
-    len = 3 - start;
-    end = 0;
-    for (i = 0; i < sdp->services; i ++)
-        if ((record = &sdp->service_list[i])->match) {
-            len += 3;
-            seqlen = len;
-            for (j = 0; j < record->attributes; j ++)
-                if (record->attribute_list[j].match) {
-                    if (len >= 0)
-                        if (len + record->attribute_list[j].len < max) {
-                            memcpy(lst + len, record->attribute_list[j].pair,
-                                            record->attribute_list[j].len);
-                            end = len + record->attribute_list[j].len;
-                        }
-                    len += record->attribute_list[j].len;
-                }
-            if (seqlen == len)
-                len -= 3;
-            else if (seqlen >= 3 && seqlen < max) {
-                lst[seqlen - 3] = SDP_DTYPE_SEQ | SDP_DSIZE_NEXT2;
-                lst[seqlen - 2] = (len - seqlen) >> 8;
-                lst[seqlen - 1] = (len - seqlen) & 0xff;
-            }
-        }
-    if (len == 3 - start)
-        len -= 3;
-    else if (0 >= start) {
-       lst[0] = SDP_DTYPE_SEQ | SDP_DSIZE_NEXT2;
-       lst[1] = (len + start - 3) >> 8;
-       lst[2] = (len + start - 3) & 0xff;
-    }
-
-    rsp[0] = end >> 8;
-    rsp[1] = end & 0xff;
-
-    if (end < len) {
-        len = end + start;
-        lst[end ++] = sizeof(int);
-        memcpy(lst + end, &len, sizeof(int));
-        end += sizeof(int);
-    } else
-        lst[end ++] = 0;
-
-    return end + 2;
-}
-
-static void bt_l2cap_sdp_sdu_in(void *opaque, const uint8_t *data, int len)
-{
-    struct bt_l2cap_sdp_state_s *sdp = opaque;
-    enum bt_sdp_cmd pdu_id;
-    uint8_t rsp[MAX_PDU_OUT_SIZE - PDU_HEADER_SIZE], *sdu_out;
-    int transaction_id, plen;
-    int err = 0;
-    int rsp_len = 0;
-
-    if (len < 5) {
-        fprintf(stderr, "%s: short SDP PDU (%iB).\n", __FUNCTION__, len);
-        return;
-    }
-
-    pdu_id = *data ++;
-    transaction_id = (data[0] << 8) | data[1];
-    plen = (data[2] << 8) | data[3];
-    data += 4;
-    len -= 5;
-
-    if (len != plen) {
-        fprintf(stderr, "%s: wrong SDP PDU length (%iB != %iB).\n",
-                        __FUNCTION__, plen, len);
-        err = SDP_INVALID_PDU_SIZE;
-        goto respond;
-    }
-
-    switch (pdu_id) {
-    case SDP_SVC_SEARCH_REQ:
-        rsp_len = sdp_svc_search(sdp, rsp, data, len);
-        pdu_id = SDP_SVC_SEARCH_RSP;
-        break;
-
-    case SDP_SVC_ATTR_REQ:
-        rsp_len = sdp_attr_get(sdp, rsp, data, len);
-        pdu_id = SDP_SVC_ATTR_RSP;
-        break;
-
-    case SDP_SVC_SEARCH_ATTR_REQ:
-        rsp_len = sdp_svc_search_attr_get(sdp, rsp, data, len);
-        pdu_id = SDP_SVC_SEARCH_ATTR_RSP;
-        break;
-
-    case SDP_ERROR_RSP:
-    case SDP_SVC_ATTR_RSP:
-    case SDP_SVC_SEARCH_RSP:
-    case SDP_SVC_SEARCH_ATTR_RSP:
-    default:
-        fprintf(stderr, "%s: unexpected SDP PDU ID %02x.\n",
-                        __FUNCTION__, pdu_id);
-        err = SDP_INVALID_SYNTAX;
-        break;
-    }
-
-    if (rsp_len < 0) {
-        err = -rsp_len;
-        rsp_len = 0;
-    }
-
-respond:
-    if (err) {
-        pdu_id = SDP_ERROR_RSP;
-        rsp[rsp_len ++] = err >> 8;
-        rsp[rsp_len ++] = err & 0xff;
-    }
-
-    sdu_out = sdp->channel->sdu_out(sdp->channel, rsp_len + PDU_HEADER_SIZE);
-
-    sdu_out[0] = pdu_id;
-    sdu_out[1] = transaction_id >> 8;
-    sdu_out[2] = transaction_id & 0xff;
-    sdu_out[3] = rsp_len >> 8;
-    sdu_out[4] = rsp_len & 0xff;
-    memcpy(sdu_out + PDU_HEADER_SIZE, rsp, rsp_len);
-
-    sdp->channel->sdu_submit(sdp->channel);
-}
-
-static void bt_l2cap_sdp_close_ch(void *opaque)
-{
-    struct bt_l2cap_sdp_state_s *sdp = opaque;
-    int i;
-
-    for (i = 0; i < sdp->services; i ++) {
-        g_free(sdp->service_list[i].attribute_list->pair);
-        g_free(sdp->service_list[i].attribute_list);
-        g_free(sdp->service_list[i].uuid);
-    }
-    g_free(sdp->service_list);
-    g_free(sdp);
-}
-
-struct sdp_def_service_s {
-    uint16_t class_uuid;
-    struct sdp_def_attribute_s {
-        uint16_t id;
-        struct sdp_def_data_element_s {
-            uint8_t type;
-            union {
-                uint32_t uint;
-                const char *str;
-                struct sdp_def_data_element_s *list;
-            } value;
-        } data;
-    } attributes[];
-};
-
-/* Calculate a safe byte count to allocate that will store the given
- * element, at the same time count elements of a UUID type.  */
-static int sdp_attr_max_size(struct sdp_def_data_element_s *element,
-                int *uuids)
-{
-    int type = element->type & ~SDP_DSIZE_MASK;
-    int len;
-
-    if (type == SDP_DTYPE_UINT || type == SDP_DTYPE_UUID ||
-                    type == SDP_DTYPE_BOOL) {
-        if (type == SDP_DTYPE_UUID)
-            (*uuids) ++;
-        return 1 + (1 << (element->type & SDP_DSIZE_MASK));
-    }
-
-    if (type == SDP_DTYPE_STRING || type == SDP_DTYPE_URL) {
-        if (element->type & SDP_DSIZE_MASK) {
-            for (len = 0; element->value.str[len] |
-                            element->value.str[len + 1]; len ++);
-            return len;
-        } else
-            return 2 + strlen(element->value.str);
-    }
-
-    if (type != SDP_DTYPE_SEQ)
-        exit(-1);
-    len = 2;
-    element = element->value.list;
-    while (element->type)
-        len += sdp_attr_max_size(element ++, uuids);
-    if (len > 255)
-        exit (-1);
-
-    return len;
-}
-
-static int sdp_attr_write(uint8_t *data,
-                struct sdp_def_data_element_s *element, int **uuid)
-{
-    int type = element->type & ~SDP_DSIZE_MASK;
-    int len = 0;
-
-    if (type == SDP_DTYPE_UINT || type == SDP_DTYPE_BOOL) {
-        data[len ++] = element->type;
-        if ((element->type & SDP_DSIZE_MASK) == SDP_DSIZE_1)
-            data[len ++] = (element->value.uint >>  0) & 0xff;
-        else if ((element->type & SDP_DSIZE_MASK) == SDP_DSIZE_2) {
-            data[len ++] = (element->value.uint >>  8) & 0xff;
-            data[len ++] = (element->value.uint >>  0) & 0xff;
-        } else if ((element->type & SDP_DSIZE_MASK) == SDP_DSIZE_4) {
-            data[len ++] = (element->value.uint >>  24) & 0xff;
-            data[len ++] = (element->value.uint >>  16) & 0xff;
-            data[len ++] = (element->value.uint >>  8) & 0xff;
-            data[len ++] = (element->value.uint >>  0) & 0xff;
-        }
-
-        return len;
-    }
-
-    if (type == SDP_DTYPE_UUID) {
-        *(*uuid) ++ = element->value.uint;
-
-        data[len ++] = element->type;
-        data[len ++] = (element->value.uint >>  24) & 0xff;
-        data[len ++] = (element->value.uint >>  16) & 0xff;
-        data[len ++] = (element->value.uint >>  8) & 0xff;
-        data[len ++] = (element->value.uint >>  0) & 0xff;
-        memcpy(data + len, bt_base_uuid, 12);
-
-        return len + 12;
-    }
-
-    data[0] = type | SDP_DSIZE_NEXT1;
-    if (type == SDP_DTYPE_STRING || type == SDP_DTYPE_URL) {
-        if (element->type & SDP_DSIZE_MASK)
-            for (len = 0; element->value.str[len] |
-                            element->value.str[len + 1]; len ++);
-        else
-            len = strlen(element->value.str);
-        memcpy(data + 2, element->value.str, data[1] = len);
-
-        return len + 2;
-    }
-
-    len = 2;
-    element = element->value.list;
-    while (element->type)
-        len += sdp_attr_write(data + len, element ++, uuid);
-    data[1] = len - 2;
-
-    return len;
-}
-
-static int sdp_attributeid_compare(const struct sdp_service_attribute_s *a,
-                const struct sdp_service_attribute_s *b)
-{
-    return (int) b->attribute_id - a->attribute_id;
-}
-
-static int sdp_uuid_compare(const int *a, const int *b)
-{
-    return *a - *b;
-}
-
-static void sdp_service_record_build(struct sdp_service_record_s *record,
-                struct sdp_def_service_s *def, int handle)
-{
-    int len = 0;
-    uint8_t *data;
-    int *uuid;
-
-    record->uuids = 0;
-    while (def->attributes[record->attributes].data.type) {
-        len += 3;
-        len += sdp_attr_max_size(&def->attributes[record->attributes ++].data,
-                        &record->uuids);
-    }
-    record->uuids = 1 << ffs(record->uuids - 1);
-    record->attribute_list =
-            g_malloc0(record->attributes * sizeof(*record->attribute_list));
-    record->uuid =
-            g_malloc0(record->uuids * sizeof(*record->uuid));
-    data = g_malloc(len);
-
-    record->attributes = 0;
-    uuid = record->uuid;
-    while (def->attributes[record->attributes].data.type) {
-        record->attribute_list[record->attributes].pair = data;
-
-        len = 0;
-        data[len ++] = SDP_DTYPE_UINT | SDP_DSIZE_2;
-        data[len ++] = def->attributes[record->attributes].id >> 8;
-        data[len ++] = def->attributes[record->attributes].id & 0xff;
-        len += sdp_attr_write(data + len,
-                        &def->attributes[record->attributes].data, &uuid);
-
-        /* Special case: assign a ServiceRecordHandle in sequence */
-        if (def->attributes[record->attributes].id == SDP_ATTR_RECORD_HANDLE)
-            def->attributes[record->attributes].data.value.uint = handle;
-        /* Note: we could also assign a ServiceDescription based on
-         * sdp->device.device->lmp_name.  */
-
-        record->attribute_list[record->attributes ++].len = len;
-        data += len;
-    }
-
-    /* Sort the attribute list by the AttributeID */
-    qsort(record->attribute_list, record->attributes,
-                    sizeof(*record->attribute_list),
-                    (void *) sdp_attributeid_compare);
-    /* Sort the searchable UUIDs list for bisection */
-    qsort(record->uuid, record->uuids,
-                    sizeof(*record->uuid),
-                    (void *) sdp_uuid_compare);
-}
-
-static void sdp_service_db_build(struct bt_l2cap_sdp_state_s *sdp,
-                struct sdp_def_service_s **service)
-{
-    sdp->services = 0;
-    while (service[sdp->services])
-        sdp->services ++;
-    sdp->service_list =
-            g_malloc0(sdp->services * sizeof(*sdp->service_list));
-
-    sdp->services = 0;
-    while (*service) {
-        sdp_service_record_build(&sdp->service_list[sdp->services],
-                        *service, sdp->services);
-        service ++;
-        sdp->services ++;
-    }
-}
-
-#undef TRUE
-#undef FALSE
-
-#define LAST { .type = 0 }
-#define SERVICE(name, attrs)				\
-    static struct sdp_def_service_s glue(glue(sdp_service_, name), _s) = { \
-        .attributes = { attrs { .data = LAST } },	\
-    };
-#define ATTRIBUTE(attrid, val)	{ .id = glue(SDP_ATTR_, attrid), .data = val },
-#define UINT8(val)	{				\
-        .type       = SDP_DTYPE_UINT | SDP_DSIZE_1,	\
-        .value.uint = val,				\
-    },
-#define UINT16(val)	{				\
-        .type       = SDP_DTYPE_UINT | SDP_DSIZE_2,	\
-        .value.uint = val,				\
-    },
-#define UINT32(val)	{				\
-        .type       = SDP_DTYPE_UINT | SDP_DSIZE_4,	\
-        .value.uint = val,				\
-    },
-#define UUID128(val)	{				\
-        .type       = SDP_DTYPE_UUID | SDP_DSIZE_16,	\
-        .value.uint = val,				\
-    },
-#define TRUE	{				\
-        .type       = SDP_DTYPE_BOOL | SDP_DSIZE_1,	\
-        .value.uint = 1,				\
-    },
-#define FALSE	{				\
-        .type       = SDP_DTYPE_BOOL | SDP_DSIZE_1,	\
-        .value.uint = 0,				\
-    },
-#define STRING(val)	{				\
-        .type       = SDP_DTYPE_STRING,			\
-        .value.str  = val,				\
-    },
-#define ARRAY(...)	{				\
-        .type       = SDP_DTYPE_STRING | SDP_DSIZE_2,	\
-        .value.str  = (char []) { __VA_ARGS__, 0, 0 },	\
-    },
-#define URL(val)	{				\
-        .type       = SDP_DTYPE_URL,			\
-        .value.str  = val,				\
-    },
-#if 1
-#define LIST(val)	{				\
-        .type       = SDP_DTYPE_SEQ,			\
-        .value.list = (struct sdp_def_data_element_s []) { val LAST }, \
-    },
-#endif
-
-/* Try to keep each single attribute below MAX_PDU_OUT_SIZE bytes
- * in resulting SDP data representation size.  */
-
-SERVICE(hid,
-    ATTRIBUTE(RECORD_HANDLE,   UINT32(0))	/* Filled in later */
-    ATTRIBUTE(SVCLASS_ID_LIST, LIST(UUID128(HID_SVCLASS_ID)))
-    ATTRIBUTE(RECORD_STATE,    UINT32(1))
-    ATTRIBUTE(PROTO_DESC_LIST, LIST(
-        LIST(UUID128(L2CAP_UUID) UINT16(BT_PSM_HID_CTRL))
-        LIST(UUID128(HIDP_UUID))
-    ))
-    ATTRIBUTE(BROWSE_GRP_LIST, LIST(UUID128(0x1002)))
-    ATTRIBUTE(LANG_BASE_ATTR_ID_LIST, LIST(
-        UINT16(0x656e) UINT16(0x006a) UINT16(0x0100)
-    ))
-    ATTRIBUTE(PFILE_DESC_LIST, LIST(
-        LIST(UUID128(HID_PROFILE_ID) UINT16(0x0100))
-    ))
-    ATTRIBUTE(DOC_URL,         URL("http://bellard.org/qemu/user-doc.html"))
-    ATTRIBUTE(SVCNAME_PRIMARY, STRING("QEMU Bluetooth HID"))
-    ATTRIBUTE(SVCDESC_PRIMARY, STRING("QEMU Keyboard/Mouse"))
-    ATTRIBUTE(SVCPROV_PRIMARY, STRING("QEMU " QEMU_VERSION))
-
-    /* Profile specific */
-    ATTRIBUTE(DEVICE_RELEASE_NUMBER,	UINT16(0x0091)) /* Deprecated, remove */
-    ATTRIBUTE(PARSER_VERSION,		UINT16(0x0111))
-    /* TODO: extract from l2cap_device->device.class[0] */
-    ATTRIBUTE(DEVICE_SUBCLASS,		UINT8(0x40))
-    ATTRIBUTE(COUNTRY_CODE,		UINT8(0x15))
-    ATTRIBUTE(VIRTUAL_CABLE,		TRUE)
-    ATTRIBUTE(RECONNECT_INITIATE,	FALSE)
-    /* TODO: extract from hid->usbdev->report_desc */
-    ATTRIBUTE(DESCRIPTOR_LIST,		LIST(
-        LIST(UINT8(0x22) ARRAY(
-            0x05, 0x01,	/* Usage Page (Generic Desktop) */
-            0x09, 0x06,	/* Usage (Keyboard) */
-            0xa1, 0x01,	/* Collection (Application) */
-            0x75, 0x01,	/*   Report Size (1) */
-            0x95, 0x08,	/*   Report Count (8) */
-            0x05, 0x07,	/*   Usage Page (Key Codes) */
-            0x19, 0xe0,	/*   Usage Minimum (224) */
-            0x29, 0xe7,	/*   Usage Maximum (231) */
-            0x15, 0x00,	/*   Logical Minimum (0) */
-            0x25, 0x01,	/*   Logical Maximum (1) */
-            0x81, 0x02,	/*   Input (Data, Variable, Absolute) */
-            0x95, 0x01,	/*   Report Count (1) */
-            0x75, 0x08,	/*   Report Size (8) */
-            0x81, 0x01,	/*   Input (Constant) */
-            0x95, 0x05,	/*   Report Count (5) */
-            0x75, 0x01,	/*   Report Size (1) */
-            0x05, 0x08,	/*   Usage Page (LEDs) */
-            0x19, 0x01,	/*   Usage Minimum (1) */
-            0x29, 0x05,	/*   Usage Maximum (5) */
-            0x91, 0x02,	/*   Output (Data, Variable, Absolute) */
-            0x95, 0x01,	/*   Report Count (1) */
-            0x75, 0x03,	/*   Report Size (3) */
-            0x91, 0x01,	/*   Output (Constant) */
-            0x95, 0x06,	/*   Report Count (6) */
-            0x75, 0x08,	/*   Report Size (8) */
-            0x15, 0x00,	/*   Logical Minimum (0) */
-            0x25, 0xff,	/*   Logical Maximum (255) */
-            0x05, 0x07,	/*   Usage Page (Key Codes) */
-            0x19, 0x00,	/*   Usage Minimum (0) */
-            0x29, 0xff,	/*   Usage Maximum (255) */
-            0x81, 0x00,	/*   Input (Data, Array) */
-            0xc0	/* End Collection */
-    ))))
-    ATTRIBUTE(LANG_ID_BASE_LIST,	LIST(
-        LIST(UINT16(0x0409) UINT16(0x0100))
-    ))
-    ATTRIBUTE(SDP_DISABLE,		FALSE)
-    ATTRIBUTE(BATTERY_POWER,		TRUE)
-    ATTRIBUTE(REMOTE_WAKEUP,		TRUE)
-    ATTRIBUTE(BOOT_DEVICE,		TRUE)	/* XXX: untested */
-    ATTRIBUTE(SUPERVISION_TIMEOUT,	UINT16(0x0c80))
-    ATTRIBUTE(NORMALLY_CONNECTABLE,	TRUE)
-    ATTRIBUTE(PROFILE_VERSION,		UINT16(0x0100))
-)
-
-SERVICE(sdp,
-    ATTRIBUTE(RECORD_HANDLE,   UINT32(0))	/* Filled in later */
-    ATTRIBUTE(SVCLASS_ID_LIST, LIST(UUID128(SDP_SERVER_SVCLASS_ID)))
-    ATTRIBUTE(RECORD_STATE,    UINT32(1))
-    ATTRIBUTE(PROTO_DESC_LIST, LIST(
-        LIST(UUID128(L2CAP_UUID) UINT16(BT_PSM_SDP))
-        LIST(UUID128(SDP_UUID))
-    ))
-    ATTRIBUTE(BROWSE_GRP_LIST, LIST(UUID128(0x1002)))
-    ATTRIBUTE(LANG_BASE_ATTR_ID_LIST, LIST(
-        UINT16(0x656e) UINT16(0x006a) UINT16(0x0100)
-    ))
-    ATTRIBUTE(PFILE_DESC_LIST, LIST(
-        LIST(UUID128(SDP_SERVER_PROFILE_ID) UINT16(0x0100))
-    ))
-    ATTRIBUTE(DOC_URL,         URL("http://bellard.org/qemu/user-doc.html"))
-    ATTRIBUTE(SVCPROV_PRIMARY, STRING("QEMU " QEMU_VERSION))
-
-    /* Profile specific */
-    ATTRIBUTE(VERSION_NUM_LIST, LIST(UINT16(0x0100)))
-    ATTRIBUTE(SVCDB_STATE    , UINT32(1))
-)
-
-SERVICE(pnp,
-    ATTRIBUTE(RECORD_HANDLE,   UINT32(0))	/* Filled in later */
-    ATTRIBUTE(SVCLASS_ID_LIST, LIST(UUID128(PNP_INFO_SVCLASS_ID)))
-    ATTRIBUTE(RECORD_STATE,    UINT32(1))
-    ATTRIBUTE(PROTO_DESC_LIST, LIST(
-        LIST(UUID128(L2CAP_UUID) UINT16(BT_PSM_SDP))
-        LIST(UUID128(SDP_UUID))
-    ))
-    ATTRIBUTE(BROWSE_GRP_LIST, LIST(UUID128(0x1002)))
-    ATTRIBUTE(LANG_BASE_ATTR_ID_LIST, LIST(
-        UINT16(0x656e) UINT16(0x006a) UINT16(0x0100)
-    ))
-    ATTRIBUTE(PFILE_DESC_LIST, LIST(
-        LIST(UUID128(PNP_INFO_PROFILE_ID) UINT16(0x0100))
-    ))
-    ATTRIBUTE(DOC_URL,         URL("http://bellard.org/qemu/user-doc.html"))
-    ATTRIBUTE(SVCPROV_PRIMARY, STRING("QEMU " QEMU_VERSION))
-
-    /* Profile specific */
-    ATTRIBUTE(SPECIFICATION_ID, UINT16(0x0100))
-    ATTRIBUTE(VERSION,         UINT16(0x0100))
-    ATTRIBUTE(PRIMARY_RECORD,  TRUE)
-)
-
-static int bt_l2cap_sdp_new_ch(struct bt_l2cap_device_s *dev,
-                struct bt_l2cap_conn_params_s *params)
-{
-    struct bt_l2cap_sdp_state_s *sdp = g_malloc0(sizeof(*sdp));
-    struct sdp_def_service_s *services[] = {
-        &sdp_service_sdp_s,
-        &sdp_service_hid_s,
-        &sdp_service_pnp_s,
-        NULL,
-    };
-
-    sdp->channel = params;
-    sdp->channel->opaque = sdp;
-    sdp->channel->close = bt_l2cap_sdp_close_ch;
-    sdp->channel->sdu_in = bt_l2cap_sdp_sdu_in;
-
-    sdp_service_db_build(sdp, services);
-
-    return 0;
-}
-
-void bt_l2cap_sdp_init(struct bt_l2cap_device_s *dev)
-{
-    bt_l2cap_psm_register(dev, BT_PSM_SDP,
-                    MAX_PDU_OUT_SIZE, bt_l2cap_sdp_new_ch);
-}
diff --git a/hw/core/dma.c b/hw/core/dma.c
index 5e9bc8d..5a7d874 100644
--- a/hw/core/dma.c
+++ b/hw/core/dma.c
@@ -449,9 +449,9 @@
 /* request the emulator to transfer a new DMA memory block ASAP */
 void DMA_schedule(int nchan)
 {
-    CPUOldState *env = cpu_single_env;
-    if (env)
-        cpu_exit(env);
+    CPUState *cpu = current_cpu;
+    if (cpu)
+        cpu_exit(cpu);
 }
 
 static void dma_reset(void *opaque)
@@ -567,8 +567,21 @@
               high_page_enable ? 0x480 : -1);
     dma_init2(&dma_controllers[1], 0xc0, 1, 0x88,
               high_page_enable ? 0x488 : -1);
-    register_savevm ("dma", 0, 1, dma_save, dma_load, &dma_controllers[0]);
-    register_savevm ("dma", 1, 1, dma_save, dma_load, &dma_controllers[1]);
+    register_savevm(NULL,
+                    "dma",
+                    0,
+                    1,
+                    dma_save,
+                    dma_load,
+                    &dma_controllers[0]);
+
+    register_savevm(NULL,
+                    "dma",
+                    1,
+                    1,
+                    dma_save,
+                    dma_load,
+                    &dma_controllers[1]);
 
     dma_bh = qemu_bh_new(DMA_run_bh, NULL);
 }
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 9c23047..227b7e0 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -521,3 +521,10 @@
     if (main_system_bus)
         qbus_print(mon, main_system_bus, 0);
 }
+
+char *qdev_get_dev_path(DeviceState *dev)
+{
+    // TODO(digit): Implement this properly.
+    return NULL;
+}
+
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 3588044..19be47b 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -119,7 +119,7 @@
 /* SMM support */
 void cpu_smm_update(CPUOldState *env)
 {
-    if (i440fx_state && env == first_cpu)
+    if (i440fx_state && ENV_GET_CPU(env) == first_cpu)
         i440fx_set_smm(i440fx_state, (env->hflags >> HF_SMM_SHIFT) & 1);
 }
 
@@ -146,19 +146,21 @@
 
 static void pic_irq_request(void *opaque, int irq, int level)
 {
-    CPUOldState *env = first_cpu;
+    CPUState *cpu = first_cpu;
+    CPUArchState *env = cpu->env_ptr;
 
     if (env->apic_state) {
-        while (env) {
+        while (cpu) {
             if (apic_accept_pic_intr(env))
                 apic_deliver_pic_intr(env, level);
-            env = env->next_cpu;
+            cpu = QTAILQ_NEXT(cpu, node);
+            env = cpu ? cpu->env_ptr : NULL;
         }
     } else {
         if (level)
-            cpu_interrupt(env, CPU_INTERRUPT_HARD);
+            cpu_interrupt(cpu, CPU_INTERRUPT_HARD);
         else
-            cpu_reset_interrupt(env, CPU_INTERRUPT_HARD);
+            cpu_reset_interrupt(cpu, CPU_INTERRUPT_HARD);
     }
 }
 
@@ -385,12 +387,13 @@
 void ioport_set_a20(int enable)
 {
     /* XXX: send to all CPUs ? */
-    cpu_x86_set_a20(first_cpu, enable);
+    cpu_x86_set_a20(first_cpu->env_ptr, enable);
 }
 
 int ioport_get_a20(void)
 {
-    return ((first_cpu->a20_mask >> 20) & 1);
+    CPUArchState *env = first_cpu->env_ptr;
+    return (env->a20_mask >> 20) & 1;
 }
 
 static void ioport92_write(void *opaque, uint32_t addr, uint32_t val)
@@ -761,7 +764,7 @@
 static void main_cpu_reset(void *opaque)
 {
     CPUOldState *env = opaque;
-    cpu_reset(env);
+    cpu_reset(ENV_GET_CPU(env));
 }
 
 static const int ide_iobase[2] = { 0x1f0, 0x170 };
@@ -862,8 +865,6 @@
     .size = 0x1000
 };
 
-void goldfish_memlog_init(uint32_t base);
-
 /* PC hardware initialisation */
 static void pc_init1(ram_addr_t ram_size,
                      const char *boot_device,
@@ -915,7 +916,7 @@
             exit(1);
         }
         if ((env->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
-            env->cpuid_apic_id = env->cpu_index;
+            env->cpuid_apic_id = ENV_GET_CPU(env)->cpu_index;
             apic_init(env);
         }
         qemu_register_reset(main_cpu_reset, 0, env);
@@ -1037,13 +1038,13 @@
 
     goldfish_battery_init(android_hw->hw_battery);
 
-    goldfish_memlog_init(0);
-
 #ifdef CONFIG_NAND
     goldfish_add_device_no_io(&nand_device);
     nand_dev_init(nand_device.base);
-    pipe_dev_init();
 #endif
+    bool newDeviceNaming =
+            (androidHwConfig_getKernelDeviceNaming(android_hw) >= 1);
+    pipe_dev_init(newDeviceNaming);
 
     {
         DriveInfo* info = drive_get( IF_IDE, 0, 0 );
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index e4c3816..d5effe6 100644
--- a/hw/input/pckbd.c
+++ b/hw/input/pckbd.c
@@ -370,7 +370,7 @@
     s->irq_mouse = mouse_irq;
 
     kbd_reset(s);
-    register_savevm("pckbd", 0, 3, kbd_save, kbd_load, s);
+    register_savevm(NULL, "pckbd", 0, 3, kbd_save, kbd_load, s);
     register_ioport_read(io_base, 1, 1, kbd_read_data, s);
     register_ioport_write(io_base, 1, 1, kbd_write_data, s);
     register_ioport_read(io_base + 4, 1, 1, kbd_read_status, s);
@@ -431,7 +431,7 @@
     s->mask = mask;
 
     kbd_reset(s);
-    register_savevm("pckbd", 0, 3, kbd_save, kbd_load, s);
+    register_savevm(NULL, "pckbd", 0, 3, kbd_save, kbd_load, s);
     s_io_memory = cpu_register_io_memory(kbd_mm_read, kbd_mm_write, s);
     cpu_register_physical_memory(base, size, s_io_memory);
 
diff --git a/hw/input/ps2.c b/hw/input/ps2.c
index e965da6..778ab67 100644
--- a/hw/input/ps2.c
+++ b/hw/input/ps2.c
@@ -591,7 +591,7 @@
     s->common.update_arg = update_arg;
     s->scancode_set = 2;
     ps2_reset(&s->common);
-    register_savevm("ps2kbd", 0, 3, ps2_kbd_save, ps2_kbd_load, s);
+    register_savevm(NULL, "ps2kbd", 0, 3, ps2_kbd_save, ps2_kbd_load, s);
     //qemu_add_kbd_event_handler(ps2_put_keycode, s);
     qemu_register_reset(ps2_reset, 0, &s->common);
     return s;
@@ -604,7 +604,7 @@
     s->common.update_irq = update_irq;
     s->common.update_arg = update_arg;
     ps2_reset(&s->common);
-    register_savevm("ps2mouse", 0, 2, ps2_mouse_save, ps2_mouse_load, s);
+    register_savevm(NULL, "ps2mouse", 0, 2, ps2_mouse_save, ps2_mouse_load, s);
     //qemu_add_mouse_event_handler(ps2_mouse_event, s, 0, "QEMU PS/2 Mouse");
     qemu_register_reset(ps2_reset, 0, &s->common);
     return s;
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 31a164e..2b81648 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -138,6 +138,7 @@
 
 static void apic_local_deliver(CPUOldState *env, int vector)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
     APICState *s = env->apic_state;
     uint32_t lvt = s->lvt[vector];
     int trigger_mode;
@@ -147,15 +148,15 @@
 
     switch ((lvt >> 8) & 7) {
     case APIC_DM_SMI:
-        cpu_interrupt(env, CPU_INTERRUPT_SMI);
+        cpu_interrupt(cpu, CPU_INTERRUPT_SMI);
         break;
 
     case APIC_DM_NMI:
-        cpu_interrupt(env, CPU_INTERRUPT_NMI);
+        cpu_interrupt(cpu, CPU_INTERRUPT_NMI);
         break;
 
     case APIC_DM_EXTINT:
-        cpu_interrupt(env, CPU_INTERRUPT_HARD);
+        cpu_interrupt(cpu, CPU_INTERRUPT_HARD);
         break;
 
     case APIC_DM_FIXED:
@@ -182,7 +183,7 @@
             reset_bit(s->irr, lvt & 0xff);
             /* fall through */
         case APIC_DM_EXTINT:
-            cpu_reset_interrupt(env, CPU_INTERRUPT_HARD);
+            cpu_reset_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_HARD);
             break;
         }
     }
@@ -239,18 +240,18 @@
 
         case APIC_DM_SMI:
             foreach_apic(apic_iter, deliver_bitmask,
-                cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_SMI) );
+                cpu_interrupt(ENV_GET_CPU(apic_iter->cpu_env), CPU_INTERRUPT_SMI) );
             return;
 
         case APIC_DM_NMI:
             foreach_apic(apic_iter, deliver_bitmask,
-                cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_NMI) );
+                cpu_interrupt(ENV_GET_CPU(apic_iter->cpu_env), CPU_INTERRUPT_NMI) );
             return;
 
         case APIC_DM_INIT:
             /* normal INIT IPI sent to processors */
             foreach_apic(apic_iter, deliver_bitmask,
-                         cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_INIT) );
+                         cpu_interrupt(ENV_GET_CPU(apic_iter->cpu_env), CPU_INTERRUPT_INIT) );
             return;
 
         case APIC_DM_EXTINT:
@@ -365,7 +366,7 @@
     ppr = apic_get_ppr(s);
     if (ppr && (irrv & 0xf0) <= (ppr & 0xf0))
         return;
-    cpu_interrupt(s->cpu_env, CPU_INTERRUPT_HARD);
+    cpu_interrupt(ENV_GET_CPU(s->cpu_env), CPU_INTERRUPT_HARD);
 }
 
 void apic_reset_irq_delivered(void)
@@ -481,20 +482,21 @@
     s->next_time = 0;
     s->wait_for_sipi = 1;
 
-    env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
+    ENV_GET_CPU(env)->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
 }
 
 static void apic_startup(APICState *s, int vector_num)
 {
     s->sipi_vector = vector_num;
-    cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
+    cpu_interrupt(ENV_GET_CPU(s->cpu_env), CPU_INTERRUPT_SIPI);
 }
 
 void apic_sipi(CPUOldState *env)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
     APICState *s = env->apic_state;
 
-    cpu_reset_interrupt(env, CPU_INTERRUPT_SIPI);
+    cpu_reset_interrupt(cpu, CPU_INTERRUPT_SIPI);
 
     if (!s->wait_for_sipi)
         return;
@@ -502,7 +504,7 @@
     env->eip = 0;
     cpu_x86_load_seg_cache(env, R_CS, s->sipi_vector << 8, s->sipi_vector << 12,
                            0xffff, 0);
-    env->halted = 0;
+    cpu->halted = 0;
     s->wait_for_sipi = 0;
 }
 
@@ -908,7 +910,7 @@
     s->apicbase = 0xfee00000 |
         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
 
-    cpu_reset(s->cpu_env);
+    cpu_reset(ENV_GET_CPU(s->cpu_env));
     apic_init_reset(s->cpu_env);
 
     if (bsp) {
@@ -958,7 +960,7 @@
     }
     s->timer = timer_new(QEMU_CLOCK_VIRTUAL, SCALE_NS, apic_timer, s);
 
-    register_savevm("apic", s->idx, 2, apic_save, apic_load, s);
+    register_savevm(NULL, "apic", s->idx, 2, apic_save, apic_load, s);
     qemu_register_reset(apic_reset, 0, s);
 
     local_apics[s->idx] = s;
diff --git a/hw/intc/i8259.c b/hw/intc/i8259.c
index 3c38de0..e77ed8a 100644
--- a/hw/intc/i8259.c
+++ b/hw/intc/i8259.c
@@ -508,7 +508,7 @@
         register_ioport_write(elcr_addr, 1, 1, elcr_ioport_write, s);
         register_ioport_read(elcr_addr, 1, 1, elcr_ioport_read, s);
     }
-    register_savevm("i8259", io_addr, 1, pic_save, pic_load, s);
+    register_savevm(NULL, "i8259", io_addr, 1, pic_save, pic_load, s);
     qemu_register_reset(pic_reset, 0, s);
 }
 
diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index 8e0cd77..aff9b9c 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -254,7 +254,7 @@
                                        ioapic_mem_write, s);
     cpu_register_physical_memory(0xfec00000, 0x1000, io_memory);
 
-    register_savevm("ioapic", 0, 1, ioapic_save, ioapic_load, s);
+    register_savevm(NULL, "ioapic", 0, 1, ioapic_save, ioapic_load, s);
     qemu_register_reset(ioapic_reset, 0, s);
 
     return s;
diff --git a/hw/mips/mips_int.c b/hw/mips/mips_int.c
index 3a6afbf..6fc3d05 100644
--- a/hw/mips/mips_int.c
+++ b/hw/mips/mips_int.c
@@ -6,16 +6,18 @@
    IRQ may change */
 void cpu_mips_update_irq(CPUOldState *env)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
+
     if ((env->CP0_Status & (1 << CP0St_IE)) &&
         !(env->CP0_Status & (1 << CP0St_EXL)) &&
         !(env->CP0_Status & (1 << CP0St_ERL)) &&
         !(env->hflags & MIPS_HFLAG_DM)) {
         if ((env->CP0_Status & env->CP0_Cause & CP0Ca_IP_mask) &&
-            !(env->interrupt_request & CPU_INTERRUPT_HARD)) {
-            cpu_interrupt(env, CPU_INTERRUPT_HARD);
-	}
+            !(cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
+            cpu_interrupt(cpu, CPU_INTERRUPT_HARD);
+        }
     } else
-        cpu_reset_interrupt(env, CPU_INTERRUPT_HARD);
+        cpu_reset_interrupt(cpu, CPU_INTERRUPT_HARD);
 }
 
 static void cpu_mips_irq_request(void *opaque, int irq, int level)
diff --git a/hw/mips/mips_pic.c b/hw/mips/mips_pic.c
index 7f6abab..bb54037 100644
--- a/hw/mips/mips_pic.c
+++ b/hw/mips/mips_pic.c
@@ -26,10 +26,10 @@
     causebit = 0x00000100 << irq;
     if (level) {
         env->CP0_Cause |= causebit;
-        cpu_interrupt(env, CPU_INTERRUPT_HARD);
+        cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_HARD);
     } else {
         env->CP0_Cause &= ~causebit;
-        cpu_reset_interrupt(env, CPU_INTERRUPT_HARD);
+        cpu_reset_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_HARD);
     }
 }
 
diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
index 98b15c1..9f0dd0b 100644
--- a/hw/net/ne2000.c
+++ b/hw/net/ne2000.c
@@ -504,7 +504,7 @@
     addr &= ~1; /* XXX: check exact behaviour if not even */
     if (addr < 32 ||
         (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
-        *(uint16_t *)(s->mem + addr) = cpu_to_le16(val);
+        stw_le_p(s->mem + addr, val);
     }
 }
 
@@ -533,7 +533,7 @@
     addr &= ~1; /* XXX: check exact behaviour if not even */
     if (addr < 32 ||
         (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
-        return le16_to_cpu(*(uint16_t *)(s->mem + addr));
+        return lduw_le_p(s->mem + addr);
     } else {
         return 0xffff;
     }
@@ -726,7 +726,7 @@
 {
     NE2000State *s = vc->opaque;
 
-    unregister_savevm("ne2000", s);
+    unregister_savevm(NULL, "ne2000", s);
 
     isa_unassign_ioport(s->isa_io_base, 16);
     isa_unassign_ioport(s->isa_io_base + 0x10, 2);
@@ -765,7 +765,7 @@
 
     qemu_format_nic_info_str(s->vc, s->macaddr);
 
-    register_savevm("ne2000", -1, 2, ne2000_save, ne2000_load, s);
+    register_savevm(NULL, "ne2000", -1, 2, ne2000_save, ne2000_load, s);
 }
 
 /***********************************************************/
@@ -800,7 +800,7 @@
 {
     NE2000State *s = vc->opaque;
 
-    unregister_savevm("ne2000", s);
+    unregister_savevm(NULL, "ne2000", s);
 }
 
 static void pci_ne2000_init(PCIDevice *pci_dev)
@@ -829,7 +829,7 @@
 
     qemu_format_nic_info_str(s->vc, s->macaddr);
 
-    register_savevm("ne2000", -1, 3, ne2000_save, ne2000_load, s);
+    register_savevm(NULL, "ne2000", -1, 3, ne2000_save, ne2000_load, s);
 }
 
 static void ne2000_register_devices(void)
diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c
index 755a296..feaa687 100644
--- a/hw/net/smc91c111.c
+++ b/hw/net/smc91c111.c
@@ -791,8 +791,8 @@
                                  smc91c111_cleanup, s);
     qemu_format_nic_info_str(s->vc, s->macaddr);
 
-    register_savevm( "smc91c111", 0, SMC91C111_SAVE_VERSION,
-                     smc91c111_save, smc91c111_load, s);
+    register_savevm(NULL, "smc91c111", 0, SMC91C111_SAVE_VERSION,
+                    smc91c111_save, smc91c111_load, s);
 }
 
 static void smc91c111_register_devices(void)
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index da637a6..3fe29ad 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -281,7 +281,7 @@
     fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)(display_type == DT_NOGRAPHIC));
     fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
 
-    register_savevm("fw_cfg", -1, 1, fw_cfg_save, fw_cfg_load, s);
+    register_savevm(NULL, "fw_cfg", -1, 1, fw_cfg_save, fw_cfg_load, s);
     qemu_register_reset(fw_cfg_reset, 0, s);
     fw_cfg_reset(s);
 
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index 01bd623..c396909 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -201,7 +201,7 @@
 
     d->config[0x72] = 0x02; /* SMRAM */
 
-    register_savevm("I440FX", 0, 2, i440fx_save, i440fx_load, d);
+    register_savevm(NULL, "I440FX", 0, 2, i440fx_save, i440fx_load, d);
     *pi440fx_state = d;
     return b;
 }
@@ -333,7 +333,7 @@
 
     d = pci_register_device(bus, "PIIX3", sizeof(PCIDevice),
                                     devfn, NULL, NULL);
-    register_savevm("PIIX3", 0, 2, piix_save, piix_load, d);
+    register_savevm(NULL, "PIIX3", 0, 2, piix_save, piix_load, d);
 
     piix3_dev = d;
     pci_conf = d->config;
@@ -356,7 +356,7 @@
 
     d = pci_register_device(bus, "PIIX4", sizeof(PCIDevice),
                                     devfn, NULL, NULL);
-    register_savevm("PIIX4", 0, 2, piix_save, piix_load, d);
+    register_savevm(NULL, "PIIX4", 0, 2, piix_save, piix_load, d);
 
     piix4_dev = d;
     pci_conf = d->config;
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 42ac3f7..80ab1f4 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -105,7 +105,7 @@
     bus->nirq = nirq;
     bus->next = first_bus;
     first_bus = bus;
-    register_savevm("PCIBUS", nbus++, 1, pcibus_save, pcibus_load, bus);
+    register_savevm(NULL, "PCIBUS", nbus++, 1, pcibus_save, pcibus_load, bus);
     return bus;
 }
 
@@ -349,7 +349,7 @@
     } else {
         addr = 0x10 + region_num * 4;
     }
-    *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
+    pci_set_long(pci_dev->config + addr, type);
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -358,7 +358,7 @@
     int cmd, i;
     uint32_t last_addr, new_addr, config_ofs;
 
-    cmd = le16_to_cpu(*(uint16_t *)(d->config + PCI_COMMAND));
+    cmd = pci_get_word(d->config + PCI_COMMAND);
     for(i = 0; i < PCI_NUM_REGIONS; i++) {
         r = &d->io_regions[i];
         if (i == PCI_ROM_SLOT) {
@@ -369,8 +369,7 @@
         if (r->size != 0) {
             if (r->type & PCI_ADDRESS_SPACE_IO) {
                 if (cmd & PCI_COMMAND_IO) {
-                    new_addr = le32_to_cpu(*(uint32_t *)(d->config +
-                                                         config_ofs));
+                    new_addr = pci_get_long(d->config + config_ofs);
                     new_addr = new_addr & ~(r->size - 1);
                     last_addr = new_addr + r->size - 1;
                     /* NOTE: we have only 64K ioports on PC */
@@ -383,8 +382,7 @@
                 }
             } else {
                 if (cmd & PCI_COMMAND_MEMORY) {
-                    new_addr = le32_to_cpu(*(uint32_t *)(d->config +
-                                                         config_ofs));
+                    new_addr = pci_get_long(d->config + config_ofs);
                     /* the ROM slot has a specific enable bit */
                     if (i == PCI_ROM_SLOT && !(new_addr & 1))
                         goto no_mem_map;
@@ -441,13 +439,13 @@
     default:
     case 4:
 	if (address <= 0xfc) {
-	    val = le32_to_cpu(*(uint32_t *)(d->config + address));
+	    val = pci_get_long(d->config + address);
 	    break;
 	}
 	/* fall through */
     case 2:
         if (address <= 0xfe) {
-	    val = le16_to_cpu(*(uint16_t *)(d->config + address));
+	    val = pci_get_word(d->config + address);
 	    break;
 	}
 	/* fall through */
@@ -485,7 +483,7 @@
             val &= ~(r->size - 1);
             val |= r->type;
         }
-        *(uint32_t *)(d->config + address) = cpu_to_le32(val);
+        pci_set_long(d->config + address, val);
         pci_update_mappings(d);
         return;
     }
@@ -729,7 +727,7 @@
 
     monitor_printf(mon, "  Bus %2d, device %3d, function %d:\n",
                    d->bus->bus_num, d->devfn >> 3, d->devfn & 7);
-    class = le16_to_cpu(*((uint16_t *)(d->config + PCI_CLASS_DEVICE)));
+    class = pci_get_word(d->config + PCI_CLASS_DEVICE);
     monitor_printf(mon, "    ");
     desc = pci_class_descriptions;
     while (desc->desc && class != desc->class)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
deleted file mode 100644
index 253ba19..0000000
--- a/hw/scsi/scsi-disk.c
+++ /dev/null
@@ -1,969 +0,0 @@
-/*
- * SCSI Device emulation
- *
- * Copyright (c) 2006 CodeSourcery.
- * Based on code by Fabrice Bellard
- *
- * Written by Paul Brook
- *
- * This code is licenced under the LGPL.
- *
- * Note that this file only handles the SCSI architecture model and device
- * commands.  Emulation of interface/link layer protocols is handled by
- * the host adapter emulator.
- */
-
-#include <qemu-common.h>
-#include "sysemu/sysemu.h"
-#include "sysemu/blockdev.h"
-//#define DEBUG_SCSI
-
-#ifdef DEBUG_SCSI
-#define DPRINTF(fmt, ...) \
-do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) do {} while(0)
-#endif
-
-#define BADF(fmt, ...) \
-do { fprintf(stderr, "scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
-
-#include "qemu-common.h"
-#include "block/block.h"
-#include "hw/scsi/scsi-disk.h"
-
-#define SENSE_NO_SENSE        0
-#define SENSE_NOT_READY       2
-#define SENSE_HARDWARE_ERROR  4
-#define SENSE_ILLEGAL_REQUEST 5
-
-#define STATUS_GOOD            0
-#define STATUS_CHECK_CONDITION 2
-
-#define SCSI_DMA_BUF_SIZE    131072
-#define SCSI_MAX_INQUIRY_LEN 256
-
-#define SCSI_REQ_STATUS_RETRY 0x01
-
-typedef struct SCSIRequest {
-    SCSIDeviceState *dev;
-    uint32_t tag;
-    /* ??? We should probably keep track of whether the data transfer is
-       a read or a write.  Currently we rely on the host getting it right.  */
-    /* Both sector and sector_count are in terms of qemu 512 byte blocks.  */
-    uint64_t sector;
-    uint32_t sector_count;
-    struct iovec iov;
-    QEMUIOVector qiov;
-    BlockDriverAIOCB *aiocb;
-    struct SCSIRequest *next;
-    uint32_t status;
-} SCSIRequest;
-
-struct SCSIDeviceState
-{
-    BlockDriverState *bdrv;
-    SCSIRequest *requests;
-    /* The qemu block layer uses a fixed 512 byte sector size.
-       This is the number of 512 byte blocks in a single scsi sector.  */
-    int cluster_size;
-    uint64_t max_lba;
-    int sense;
-    int tcq;
-    /* Completion functions may be called from either scsi_{read,write}_data
-       or from the AIO completion routines.  */
-    scsi_completionfn completion;
-    void *opaque;
-    char drive_serial_str[21];
-};
-
-/* Global pool of SCSIRequest structures.  */
-static SCSIRequest *free_requests = NULL;
-
-static SCSIRequest *scsi_new_request(SCSIDeviceState *s, uint32_t tag)
-{
-    SCSIRequest *r;
-
-    if (free_requests) {
-        r = free_requests;
-        free_requests = r->next;
-    } else {
-        r = g_malloc(sizeof(SCSIRequest));
-        r->iov.iov_base = qemu_memalign(512, SCSI_DMA_BUF_SIZE);
-    }
-    r->dev = s;
-    r->tag = tag;
-    r->sector_count = 0;
-    r->iov.iov_len = 0;
-    r->aiocb = NULL;
-    r->status = 0;
-
-    r->next = s->requests;
-    s->requests = r;
-    return r;
-}
-
-static void scsi_remove_request(SCSIRequest *r)
-{
-    SCSIRequest *last;
-    SCSIDeviceState *s = r->dev;
-
-    if (s->requests == r) {
-        s->requests = r->next;
-    } else {
-        last = s->requests;
-        while (last && last->next != r)
-            last = last->next;
-        if (last) {
-            last->next = r->next;
-        } else {
-            BADF("Orphaned request\n");
-        }
-    }
-    r->next = free_requests;
-    free_requests = r;
-}
-
-static SCSIRequest *scsi_find_request(SCSIDeviceState *s, uint32_t tag)
-{
-    SCSIRequest *r;
-
-    r = s->requests;
-    while (r && r->tag != tag)
-        r = r->next;
-
-    return r;
-}
-
-/* Helper function for command completion.  */
-static void scsi_command_complete(SCSIRequest *r, int status, int sense)
-{
-    SCSIDeviceState *s = r->dev;
-    uint32_t tag;
-    DPRINTF("Command complete tag=0x%x status=%d sense=%d\n", r->tag, status, sense);
-    s->sense = sense;
-    tag = r->tag;
-    scsi_remove_request(r);
-    s->completion(s->opaque, SCSI_REASON_DONE, tag, status);
-}
-
-/* Cancel a pending data transfer.  */
-static void scsi_cancel_io(SCSIDevice *d, uint32_t tag)
-{
-    SCSIDeviceState *s = d->state;
-    SCSIRequest *r;
-    DPRINTF("Cancel tag=0x%x\n", tag);
-    r = scsi_find_request(s, tag);
-    if (r) {
-        if (r->aiocb)
-            bdrv_aio_cancel(r->aiocb);
-        r->aiocb = NULL;
-        scsi_remove_request(r);
-    }
-}
-
-static void scsi_read_complete(void * opaque, int ret)
-{
-    SCSIRequest *r = (SCSIRequest *)opaque;
-    SCSIDeviceState *s = r->dev;
-
-    if (ret) {
-        DPRINTF("IO error\n");
-        s->completion(s->opaque, SCSI_REASON_DATA, r->tag, 0);
-        scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_NO_SENSE);
-        return;
-    }
-    DPRINTF("Data ready tag=0x%x len=%d\n", r->tag, r->iov.iov_len);
-
-    s->completion(s->opaque, SCSI_REASON_DATA, r->tag, r->iov.iov_len);
-}
-
-/* Read more data from scsi device into buffer.  */
-static void scsi_read_data(SCSIDevice *d, uint32_t tag)
-{
-    SCSIDeviceState *s = d->state;
-    SCSIRequest *r;
-    uint32_t n;
-
-    r = scsi_find_request(s, tag);
-    if (!r) {
-        BADF("Bad read tag 0x%x\n", tag);
-        /* ??? This is the wrong error.  */
-        scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_HARDWARE_ERROR);
-        return;
-    }
-    if (r->sector_count == (uint32_t)-1) {
-        DPRINTF("Read buf_len=%d\n", r->iov.iov_len);
-        r->sector_count = 0;
-        s->completion(s->opaque, SCSI_REASON_DATA, r->tag, r->iov.iov_len);
-        return;
-    }
-    DPRINTF("Read sector_count=%d\n", r->sector_count);
-    if (r->sector_count == 0) {
-        scsi_command_complete(r, STATUS_GOOD, SENSE_NO_SENSE);
-        return;
-    }
-
-    n = r->sector_count;
-    if (n > SCSI_DMA_BUF_SIZE / 512)
-        n = SCSI_DMA_BUF_SIZE / 512;
-
-    r->iov.iov_len = n * 512;
-    qemu_iovec_init_external(&r->qiov, &r->iov, 1);
-    r->aiocb = bdrv_aio_readv(s->bdrv, r->sector, &r->qiov, n,
-                              scsi_read_complete, r);
-    if (r->aiocb == NULL)
-        scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_HARDWARE_ERROR);
-    r->sector += n;
-    r->sector_count -= n;
-}
-
-static int scsi_handle_write_error(SCSIRequest *r, int error)
-{
-    BlockErrorAction action = bdrv_get_on_error(r->dev->bdrv, 0);
-
-    if (action == BLOCK_ERR_IGNORE)
-        return 0;
-
-    if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
-            || action == BLOCK_ERR_STOP_ANY) {
-        r->status |= SCSI_REQ_STATUS_RETRY;
-        vm_stop(0);
-    } else {
-        scsi_command_complete(r, STATUS_CHECK_CONDITION,
-                SENSE_HARDWARE_ERROR);
-    }
-
-    return 1;
-}
-
-static void scsi_write_complete(void * opaque, int ret)
-{
-    SCSIRequest *r = (SCSIRequest *)opaque;
-    SCSIDeviceState *s = r->dev;
-    uint32_t len;
-    uint32_t n;
-
-    r->aiocb = NULL;
-
-    if (ret) {
-        if (scsi_handle_write_error(r, -ret))
-            return;
-    }
-
-    n = r->iov.iov_len / 512;
-    r->sector += n;
-    r->sector_count -= n;
-    if (r->sector_count == 0) {
-        scsi_command_complete(r, STATUS_GOOD, SENSE_NO_SENSE);
-    } else {
-        len = r->sector_count * 512;
-        if (len > SCSI_DMA_BUF_SIZE) {
-            len = SCSI_DMA_BUF_SIZE;
-        }
-        r->iov.iov_len = len;
-        DPRINTF("Write complete tag=0x%x more=%d\n", r->tag, len);
-        s->completion(s->opaque, SCSI_REASON_DATA, r->tag, len);
-    }
-}
-
-static void scsi_write_request(SCSIRequest *r)
-{
-    SCSIDeviceState *s = r->dev;
-    uint32_t n;
-
-    n = r->iov.iov_len / 512;
-    if (n) {
-        qemu_iovec_init_external(&r->qiov, &r->iov, 1);
-        r->aiocb = bdrv_aio_writev(s->bdrv, r->sector, &r->qiov, n,
-                                   scsi_write_complete, r);
-        if (r->aiocb == NULL)
-            scsi_command_complete(r, STATUS_CHECK_CONDITION,
-                                  SENSE_HARDWARE_ERROR);
-    } else {
-        /* Invoke completion routine to fetch data from host.  */
-        scsi_write_complete(r, 0);
-    }
-}
-
-/* Write data to a scsi device.  Returns nonzero on failure.
-   The transfer may complete asynchronously.  */
-static int scsi_write_data(SCSIDevice *d, uint32_t tag)
-{
-    SCSIDeviceState *s = d->state;
-    SCSIRequest *r;
-
-    DPRINTF("Write data tag=0x%x\n", tag);
-    r = scsi_find_request(s, tag);
-    if (!r) {
-        BADF("Bad write tag 0x%x\n", tag);
-        scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_HARDWARE_ERROR);
-        return 1;
-    }
-
-    if (r->aiocb)
-        BADF("Data transfer already in progress\n");
-
-    scsi_write_request(r);
-
-    return 0;
-}
-
-static void scsi_dma_restart_cb(void *opaque, int running, int reason)
-{
-    SCSIDeviceState *s = opaque;
-    SCSIRequest *r = s->requests;
-    if (!running)
-        return;
-
-    while (r) {
-        if (r->status & SCSI_REQ_STATUS_RETRY) {
-            r->status &= ~SCSI_REQ_STATUS_RETRY;
-            scsi_write_request(r);
-        }
-        r = r->next;
-    }
-}
-
-/* Return a pointer to the data buffer.  */
-static uint8_t *scsi_get_buf(SCSIDevice *d, uint32_t tag)
-{
-    SCSIDeviceState *s = d->state;
-    SCSIRequest *r;
-
-    r = scsi_find_request(s, tag);
-    if (!r) {
-        BADF("Bad buffer tag 0x%x\n", tag);
-        return NULL;
-    }
-    return (uint8_t *)r->iov.iov_base;
-}
-
-/* Execute a scsi command.  Returns the length of the data expected by the
-   command.  This will be Positive for data transfers from the device
-   (eg. disk reads), negative for transfers to the device (eg. disk writes),
-   and zero if the command does not transfer any data.  */
-
-static int32_t scsi_send_command(SCSIDevice *d, uint32_t tag,
-                                 uint8_t *buf, int lun)
-{
-    SCSIDeviceState *s = d->state;
-    uint64_t nb_sectors;
-    uint64_t lba;
-    uint32_t len;
-    int __attribute__((unused)) cmdlen;
-    int is_write;
-    uint8_t command;
-    uint8_t *outbuf;
-    SCSIRequest *r;
-
-    command = buf[0];
-    r = scsi_find_request(s, tag);
-    if (r) {
-        BADF("Tag 0x%x already in use\n", tag);
-        scsi_cancel_io(d, tag);
-    }
-    /* ??? Tags are not unique for different luns.  We only implement a
-       single lun, so this should not matter.  */
-    r = scsi_new_request(s, tag);
-    outbuf = (uint8_t *)r->iov.iov_base;
-    is_write = 0;
-    DPRINTF("Command: lun=%d tag=0x%x data=0x%02x", lun, tag, buf[0]);
-    switch (command >> 5) {
-    case 0:
-        lba = (uint64_t) buf[3] | ((uint64_t) buf[2] << 8) |
-              (((uint64_t) buf[1] & 0x1f) << 16);
-        len = buf[4];
-        cmdlen = 6;
-        break;
-    case 1:
-    case 2:
-        lba = (uint64_t) buf[5] | ((uint64_t) buf[4] << 8) |
-              ((uint64_t) buf[3] << 16) | ((uint64_t) buf[2] << 24);
-        len = buf[8] | (buf[7] << 8);
-        cmdlen = 10;
-        break;
-    case 4:
-        lba = (uint64_t) buf[9] | ((uint64_t) buf[8] << 8) |
-              ((uint64_t) buf[7] << 16) | ((uint64_t) buf[6] << 24) |
-              ((uint64_t) buf[5] << 32) | ((uint64_t) buf[4] << 40) |
-              ((uint64_t) buf[3] << 48) | ((uint64_t) buf[2] << 56);
-        len = buf[13] | (buf[12] << 8) | (buf[11] << 16) | (buf[10] << 24);
-        cmdlen = 16;
-        break;
-    case 5:
-        lba = (uint64_t) buf[5] | ((uint64_t) buf[4] << 8) |
-              ((uint64_t) buf[3] << 16) | ((uint64_t) buf[2] << 24);
-        len = buf[9] | (buf[8] << 8) | (buf[7] << 16) | (buf[6] << 24);
-        cmdlen = 12;
-        break;
-    default:
-        BADF("Unsupported command length, command %x\n", command);
-        goto fail;
-    }
-#ifdef DEBUG_SCSI
-    {
-        int i;
-        for (i = 1; i < cmdlen; i++) {
-            printf(" 0x%02x", buf[i]);
-        }
-        printf("\n");
-    }
-#endif
-    if (lun || buf[1] >> 5) {
-        /* Only LUN 0 supported.  */
-        DPRINTF("Unimplemented LUN %d\n", lun ? lun : buf[1] >> 5);
-        if (command != 0x03 && command != 0x12) /* REQUEST SENSE and INQUIRY */
-            goto fail;
-    }
-    switch (command) {
-    case 0x0:
-	DPRINTF("Test Unit Ready\n");
-        if (!bdrv_is_inserted(s->bdrv))
-            goto notready;
-	break;
-    case 0x03:
-        DPRINTF("Request Sense (len %d)\n", len);
-        if (len < 4)
-            goto fail;
-        memset(outbuf, 0, 4);
-        r->iov.iov_len = 4;
-        if (s->sense == SENSE_NOT_READY && len >= 18) {
-            memset(outbuf, 0, 18);
-            r->iov.iov_len = 18;
-            outbuf[7] = 10;
-            /* asc 0x3a, ascq 0: Medium not present */
-            outbuf[12] = 0x3a;
-            outbuf[13] = 0;
-        }
-        outbuf[0] = 0xf0;
-        outbuf[1] = 0;
-        outbuf[2] = s->sense;
-        break;
-    case 0x12:
-        DPRINTF("Inquiry (len %d)\n", len);
-        if (buf[1] & 0x2) {
-            /* Command support data - optional, not implemented */
-            BADF("optional INQUIRY command support request not implemented\n");
-            goto fail;
-        }
-        else if (buf[1] & 0x1) {
-            /* Vital product data */
-            uint8_t page_code = buf[2];
-            if (len < 4) {
-                BADF("Error: Inquiry (EVPD[%02X]) buffer size %d is "
-                     "less than 4\n", page_code, len);
-                goto fail;
-            }
-
-            switch (page_code) {
-                case 0x00:
-                    {
-                        /* Supported page codes, mandatory */
-                        DPRINTF("Inquiry EVPD[Supported pages] "
-                                "buffer size %d\n", len);
-
-                        r->iov.iov_len = 0;
-
-                        if (bdrv_get_type_hint(s->bdrv) == BDRV_TYPE_CDROM) {
-                            outbuf[r->iov.iov_len++] = 5;
-                        } else {
-                            outbuf[r->iov.iov_len++] = 0;
-                        }
-
-                        outbuf[r->iov.iov_len++] = 0x00; // this page
-                        outbuf[r->iov.iov_len++] = 0x00;
-                        outbuf[r->iov.iov_len++] = 3;    // number of pages
-                        outbuf[r->iov.iov_len++] = 0x00; // list of supported pages (this page)
-                        outbuf[r->iov.iov_len++] = 0x80; // unit serial number
-                        outbuf[r->iov.iov_len++] = 0x83; // device identification
-                    }
-                    break;
-                case 0x80:
-                    {
-                        int l;
-
-                        /* Device serial number, optional */
-                        if (len < 4) {
-                            BADF("Error: EVPD[Serial number] Inquiry buffer "
-                                 "size %d too small, %d needed\n", len, 4);
-                            goto fail;
-                        }
-
-                        DPRINTF("Inquiry EVPD[Serial number] buffer size %d\n", len);
-                        l = MIN(len, strlen(s->drive_serial_str));
-
-                        r->iov.iov_len = 0;
-
-                        /* Supported page codes */
-                        if (bdrv_get_type_hint(s->bdrv) == BDRV_TYPE_CDROM) {
-                            outbuf[r->iov.iov_len++] = 5;
-                        } else {
-                            outbuf[r->iov.iov_len++] = 0;
-                        }
-
-                        outbuf[r->iov.iov_len++] = 0x80; // this page
-                        outbuf[r->iov.iov_len++] = 0x00;
-                        outbuf[r->iov.iov_len++] = l;
-                        memcpy(&outbuf[r->iov.iov_len], s->drive_serial_str, l);
-                        r->iov.iov_len += l;
-                    }
-
-                    break;
-                case 0x83:
-                    {
-                        /* Device identification page, mandatory */
-                        int max_len = 255 - 8;
-                        int id_len = strlen(bdrv_get_device_name(s->bdrv));
-                        if (id_len > max_len)
-                            id_len = max_len;
-
-                        DPRINTF("Inquiry EVPD[Device identification] "
-                                "buffer size %d\n", len);
-                        r->iov.iov_len = 0;
-                        if (bdrv_get_type_hint(s->bdrv) == BDRV_TYPE_CDROM) {
-                            outbuf[r->iov.iov_len++] = 5;
-                        } else {
-                            outbuf[r->iov.iov_len++] = 0;
-                        }
-
-                        outbuf[r->iov.iov_len++] = 0x83; // this page
-                        outbuf[r->iov.iov_len++] = 0x00;
-                        outbuf[r->iov.iov_len++] = 3 + id_len;
-
-                        outbuf[r->iov.iov_len++] = 0x2; // ASCII
-                        outbuf[r->iov.iov_len++] = 0;   // not officially assigned
-                        outbuf[r->iov.iov_len++] = 0;   // reserved
-                        outbuf[r->iov.iov_len++] = id_len; // length of data following
-
-                        memcpy(&outbuf[r->iov.iov_len],
-                               bdrv_get_device_name(s->bdrv), id_len);
-                        r->iov.iov_len += id_len;
-                    }
-                    break;
-                default:
-                    BADF("Error: unsupported Inquiry (EVPD[%02X]) "
-                         "buffer size %d\n", page_code, len);
-                    goto fail;
-            }
-            /* done with EVPD */
-            break;
-        }
-        else {
-            /* Standard INQUIRY data */
-            if (buf[2] != 0) {
-                BADF("Error: Inquiry (STANDARD) page or code "
-                     "is non-zero [%02X]\n", buf[2]);
-                goto fail;
-            }
-
-            /* PAGE CODE == 0 */
-            if (len < 5) {
-                BADF("Error: Inquiry (STANDARD) buffer size %d "
-                     "is less than 5\n", len);
-                goto fail;
-            }
-
-            if (len < 36) {
-                BADF("Error: Inquiry (STANDARD) buffer size %d "
-                     "is less than 36 (TODO: only 5 required)\n", len);
-            }
-        }
-
-        if(len > SCSI_MAX_INQUIRY_LEN)
-            len = SCSI_MAX_INQUIRY_LEN;
-
-        memset(outbuf, 0, len);
-
-        if (lun || buf[1] >> 5) {
-            outbuf[0] = 0x7f;	/* LUN not supported */
-	} else if (bdrv_get_type_hint(s->bdrv) == BDRV_TYPE_CDROM) {
-	    outbuf[0] = 5;
-            outbuf[1] = 0x80;
-	    memcpy(&outbuf[16], "QEMU CD-ROM    ", 16);
-	} else {
-	    outbuf[0] = 0;
-	    memcpy(&outbuf[16], "QEMU HARDDISK  ", 16);
-	}
-	memcpy(&outbuf[8], "QEMU   ", 8);
-        memcpy(&outbuf[32], QEMU_VERSION, 4);
-        /* Identify device as SCSI-3 rev 1.
-           Some later commands are also implemented. */
-	outbuf[2] = 3;
-	outbuf[3] = 2; /* Format 2 */
-	outbuf[4] = len - 5; /* Additional Length = (Len - 1) - 4 */
-        /* Sync data transfer and TCQ.  */
-        outbuf[7] = 0x10 | (s->tcq ? 0x02 : 0);
-	r->iov.iov_len = len;
-	break;
-    case 0x16:
-        DPRINTF("Reserve(6)\n");
-        if (buf[1] & 1)
-            goto fail;
-        break;
-    case 0x17:
-        DPRINTF("Release(6)\n");
-        if (buf[1] & 1)
-            goto fail;
-        break;
-    case 0x1a:
-    case 0x5a:
-        {
-            uint8_t *p;
-            int page;
-
-            page = buf[2] & 0x3f;
-            DPRINTF("Mode Sense (page %d, len %d)\n", page, len);
-            p = outbuf;
-            memset(p, 0, 4);
-            outbuf[1] = 0; /* Default media type.  */
-            outbuf[3] = 0; /* Block descriptor length.  */
-            if (bdrv_get_type_hint(s->bdrv) == BDRV_TYPE_CDROM) {
-                outbuf[2] = 0x80; /* Readonly.  */
-            }
-            p += 4;
-            if (page == 4) {
-                int cylinders, heads, secs;
-
-                /* Rigid disk device geometry page. */
-                p[0] = 4;
-                p[1] = 0x16;
-                /* if a geometry hint is available, use it */
-                bdrv_get_geometry_hint(s->bdrv, &cylinders, &heads, &secs);
-                p[2] = (cylinders >> 16) & 0xff;
-                p[3] = (cylinders >> 8) & 0xff;
-                p[4] = cylinders & 0xff;
-                p[5] = heads & 0xff;
-                /* Write precomp start cylinder, disabled */
-                p[6] = (cylinders >> 16) & 0xff;
-                p[7] = (cylinders >> 8) & 0xff;
-                p[8] = cylinders & 0xff;
-                /* Reduced current start cylinder, disabled */
-                p[9] = (cylinders >> 16) & 0xff;
-                p[10] = (cylinders >> 8) & 0xff;
-                p[11] = cylinders & 0xff;
-                /* Device step rate [ns], 200ns */
-                p[12] = 0;
-                p[13] = 200;
-                /* Landing zone cylinder */
-                p[14] = 0xff;
-                p[15] =  0xff;
-                p[16] = 0xff;
-                /* Medium rotation rate [rpm], 5400 rpm */
-                p[20] = (5400 >> 8) & 0xff;
-                p[21] = 5400 & 0xff;
-                p += 0x16;
-            } else if (page == 5) {
-                int cylinders, heads, secs;
-
-                /* Flexible disk device geometry page. */
-                p[0] = 5;
-                p[1] = 0x1e;
-                /* Transfer rate [kbit/s], 5Mbit/s */
-                p[2] = 5000 >> 8;
-                p[3] = 5000 & 0xff;
-                /* if a geometry hint is available, use it */
-                bdrv_get_geometry_hint(s->bdrv, &cylinders, &heads, &secs);
-                p[4] = heads & 0xff;
-                p[5] = secs & 0xff;
-                p[6] = s->cluster_size * 2;
-                p[8] = (cylinders >> 8) & 0xff;
-                p[9] = cylinders & 0xff;
-                /* Write precomp start cylinder, disabled */
-                p[10] = (cylinders >> 8) & 0xff;
-                p[11] = cylinders & 0xff;
-                /* Reduced current start cylinder, disabled */
-                p[12] = (cylinders >> 8) & 0xff;
-                p[13] = cylinders & 0xff;
-                /* Device step rate [100us], 100us */
-                p[14] = 0;
-                p[15] = 1;
-                /* Device step pulse width [us], 1us */
-                p[16] = 1;
-                /* Device head settle delay [100us], 100us */
-                p[17] = 0;
-                p[18] = 1;
-                /* Motor on delay [0.1s], 0.1s */
-                p[19] = 1;
-                /* Motor off delay [0.1s], 0.1s */
-                p[20] = 1;
-                /* Medium rotation rate [rpm], 5400 rpm */
-                p[28] = (5400 >> 8) & 0xff;
-                p[29] = 5400 & 0xff;
-                p += 0x1e;
-            } else if ((page == 8 || page == 0x3f)) {
-                /* Caching page.  */
-                memset(p,0,20);
-                p[0] = 8;
-                p[1] = 0x12;
-                p[2] = 4; /* WCE */
-                p += 20;
-            }
-            if ((page == 0x3f || page == 0x2a)
-                    && (bdrv_get_type_hint(s->bdrv) == BDRV_TYPE_CDROM)) {
-                /* CD Capabilities and Mechanical Status page. */
-                p[0] = 0x2a;
-                p[1] = 0x14;
-                p[2] = 3; // CD-R & CD-RW read
-                p[3] = 0; // Writing not supported
-                p[4] = 0x7f; /* Audio, composite, digital out,
-                                         mode 2 form 1&2, multi session */
-                p[5] = 0xff; /* CD DA, DA accurate, RW supported,
-                                         RW corrected, C2 errors, ISRC,
-                                         UPC, Bar code */
-                p[6] = 0x2d | (bdrv_is_locked(s->bdrv)? 2 : 0);
-                /* Locking supported, jumper present, eject, tray */
-                p[7] = 0; /* no volume & mute control, no
-                                      changer */
-                p[8] = (50 * 176) >> 8; // 50x read speed
-                p[9] = (50 * 176) & 0xff;
-                p[10] = 0 >> 8; // No volume
-                p[11] = 0 & 0xff;
-                p[12] = 2048 >> 8; // 2M buffer
-                p[13] = 2048 & 0xff;
-                p[14] = (16 * 176) >> 8; // 16x read speed current
-                p[15] = (16 * 176) & 0xff;
-                p[18] = (16 * 176) >> 8; // 16x write speed
-                p[19] = (16 * 176) & 0xff;
-                p[20] = (16 * 176) >> 8; // 16x write speed current
-                p[21] = (16 * 176) & 0xff;
-                p += 22;
-            }
-            r->iov.iov_len = p - outbuf;
-            outbuf[0] = r->iov.iov_len - 4;
-            if (r->iov.iov_len > len)
-                r->iov.iov_len = len;
-        }
-        break;
-    case 0x1b:
-        DPRINTF("Start Stop Unit\n");
-        if (bdrv_get_type_hint(s->bdrv) == BDRV_TYPE_CDROM &&
-            (buf[4] & 2))
-            /* load/eject medium */
-            bdrv_eject(s->bdrv, !(buf[4] & 1));
-	break;
-    case 0x1e:
-        DPRINTF("Prevent Allow Medium Removal (prevent = %d)\n", buf[4] & 3);
-        bdrv_set_locked(s->bdrv, buf[4] & 1);
-	break;
-    case 0x25:
-	DPRINTF("Read Capacity\n");
-        /* The normal LEN field for this command is zero.  */
-	memset(outbuf, 0, 8);
-	bdrv_get_geometry(s->bdrv, &nb_sectors);
-        nb_sectors /= s->cluster_size;
-        /* Returned value is the address of the last sector.  */
-        if (nb_sectors) {
-            nb_sectors--;
-            /* Remember the new size for read/write sanity checking. */
-            s->max_lba = nb_sectors;
-            /* Clip to 2TB, instead of returning capacity modulo 2TB. */
-            if (nb_sectors > UINT32_MAX)
-                nb_sectors = UINT32_MAX;
-            outbuf[0] = (nb_sectors >> 24) & 0xff;
-            outbuf[1] = (nb_sectors >> 16) & 0xff;
-            outbuf[2] = (nb_sectors >> 8) & 0xff;
-            outbuf[3] = nb_sectors & 0xff;
-            outbuf[4] = 0;
-            outbuf[5] = 0;
-            outbuf[6] = s->cluster_size * 2;
-            outbuf[7] = 0;
-            r->iov.iov_len = 8;
-        } else {
-        notready:
-            scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_NOT_READY);
-            return 0;
-        }
-	break;
-    case 0x08:
-    case 0x28:
-    case 0x88:
-        DPRINTF("Read (sector %lld, count %d)\n", lba, len);
-        if (lba > s->max_lba)
-            goto illegal_lba;
-        r->sector = lba * s->cluster_size;
-        r->sector_count = len * s->cluster_size;
-        break;
-    case 0x0a:
-    case 0x2a:
-    case 0x8a:
-        DPRINTF("Write (sector %lld, count %d)\n", lba, len);
-        if (lba > s->max_lba)
-            goto illegal_lba;
-        r->sector = lba * s->cluster_size;
-        r->sector_count = len * s->cluster_size;
-        is_write = 1;
-        break;
-    case 0x35:
-        DPRINTF("Synchronise cache (sector %d, count %d)\n", lba, len);
-        bdrv_flush(s->bdrv);
-        break;
-    case 0x43:
-        {
-            int start_track, format, msf, toclen;
-
-            msf = buf[1] & 2;
-            format = buf[2] & 0xf;
-            start_track = buf[6];
-            bdrv_get_geometry(s->bdrv, &nb_sectors);
-            DPRINTF("Read TOC (track %d format %d msf %d)\n", start_track, format, msf >> 1);
-            nb_sectors /= s->cluster_size;
-            switch(format) {
-            case 0:
-                toclen = cdrom_read_toc(nb_sectors, outbuf, msf, start_track);
-                break;
-            case 1:
-                /* multi session : only a single session defined */
-                toclen = 12;
-                memset(outbuf, 0, 12);
-                outbuf[1] = 0x0a;
-                outbuf[2] = 0x01;
-                outbuf[3] = 0x01;
-                break;
-            case 2:
-                toclen = cdrom_read_toc_raw(nb_sectors, outbuf, msf, start_track);
-                break;
-            default:
-                goto error_cmd;
-            }
-            if (toclen > 0) {
-                if (len > toclen)
-                  len = toclen;
-                r->iov.iov_len = len;
-                break;
-            }
-        error_cmd:
-            DPRINTF("Read TOC error\n");
-            goto fail;
-        }
-    case 0x46:
-        DPRINTF("Get Configuration (rt %d, maxlen %d)\n", buf[1] & 3, len);
-        memset(outbuf, 0, 8);
-        /* ??? This should probably return much more information.  For now
-           just return the basic header indicating the CD-ROM profile.  */
-        outbuf[7] = 8; // CD-ROM
-        r->iov.iov_len = 8;
-        break;
-    case 0x56:
-        DPRINTF("Reserve(10)\n");
-        if (buf[1] & 3)
-            goto fail;
-        break;
-    case 0x57:
-        DPRINTF("Release(10)\n");
-        if (buf[1] & 3)
-            goto fail;
-        break;
-    case 0x9e:
-        /* Service Action In subcommands. */
-        if ((buf[1] & 31) == 0x10) {
-            DPRINTF("SAI READ CAPACITY(16)\n");
-            memset(outbuf, 0, len);
-            bdrv_get_geometry(s->bdrv, &nb_sectors);
-            nb_sectors /= s->cluster_size;
-            /* Returned value is the address of the last sector.  */
-            if (nb_sectors) {
-                nb_sectors--;
-                /* Remember the new size for read/write sanity checking. */
-                s->max_lba = nb_sectors;
-                outbuf[0] = (nb_sectors >> 56) & 0xff;
-                outbuf[1] = (nb_sectors >> 48) & 0xff;
-                outbuf[2] = (nb_sectors >> 40) & 0xff;
-                outbuf[3] = (nb_sectors >> 32) & 0xff;
-                outbuf[4] = (nb_sectors >> 24) & 0xff;
-                outbuf[5] = (nb_sectors >> 16) & 0xff;
-                outbuf[6] = (nb_sectors >> 8) & 0xff;
-                outbuf[7] = nb_sectors & 0xff;
-                outbuf[8] = 0;
-                outbuf[9] = 0;
-                outbuf[10] = s->cluster_size * 2;
-                outbuf[11] = 0;
-                /* Protection, exponent and lowest lba field left blank. */
-                r->iov.iov_len = len;
-            } else {
-                scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_NOT_READY);
-                return 0;
-            }
-            break;
-        }
-        DPRINTF("Unsupported Service Action In\n");
-        goto fail;
-    case 0xa0:
-        DPRINTF("Report LUNs (len %d)\n", len);
-        if (len < 16)
-            goto fail;
-        memset(outbuf, 0, 16);
-        outbuf[3] = 8;
-        r->iov.iov_len = 16;
-        break;
-    case 0x2f:
-        DPRINTF("Verify (sector %d, count %d)\n", lba, len);
-        break;
-    default:
-	DPRINTF("Unknown SCSI command (%2.2x)\n", buf[0]);
-    fail:
-        scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_ILLEGAL_REQUEST);
-	return 0;
-    illegal_lba:
-        scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_HARDWARE_ERROR);
-        return 0;
-    }
-    if (r->sector_count == 0 && r->iov.iov_len == 0) {
-        scsi_command_complete(r, STATUS_GOOD, SENSE_NO_SENSE);
-    }
-    len = r->sector_count * 512 + r->iov.iov_len;
-    if (is_write) {
-        return -len;
-    } else {
-        if (!r->sector_count)
-            r->sector_count = -1;
-        return len;
-    }
-}
-
-static void scsi_destroy(SCSIDevice *d)
-{
-    g_free(d->state);
-    g_free(d);
-}
-
-SCSIDevice *scsi_disk_init(BlockDriverState *bdrv, int tcq,
-                           scsi_completionfn completion, void *opaque)
-{
-    SCSIDevice *d;
-    SCSIDeviceState *s;
-    uint64_t nb_sectors;
-
-    s = (SCSIDeviceState *)g_malloc0(sizeof(SCSIDeviceState));
-    s->bdrv = bdrv;
-    s->tcq = tcq;
-    s->completion = completion;
-    s->opaque = opaque;
-    if (bdrv_get_type_hint(s->bdrv) == BDRV_TYPE_CDROM) {
-        s->cluster_size = 4;
-    } else {
-        s->cluster_size = 1;
-    }
-    bdrv_get_geometry(s->bdrv, &nb_sectors);
-    nb_sectors /= s->cluster_size;
-    if (nb_sectors)
-        nb_sectors--;
-    s->max_lba = nb_sectors;
-#if 0
-    strncpy(s->drive_serial_str, drive_get_serial(s->bdrv),
-            sizeof(s->drive_serial_str));
-    if (strlen(s->drive_serial_str) == 0)
-#endif
-        pstrcpy(s->drive_serial_str, sizeof(s->drive_serial_str), "0");
-    qemu_add_vm_change_state_handler(scsi_dma_restart_cb, s);
-    d = (SCSIDevice *)g_malloc0(sizeof(SCSIDevice));
-    d->state = s;
-    d->destroy = scsi_destroy;
-    d->send_command = scsi_send_command;
-    d->read_data = scsi_read_data;
-    d->write_data = scsi_write_data;
-    d->cancel_io = scsi_cancel_io;
-    d->get_buf = scsi_get_buf;
-
-    return d;
-}
diff --git a/hw/timer/i8254.c b/hw/timer/i8254.c
index 1347511..2bd1ef0 100644
--- a/hw/timer/i8254.c
+++ b/hw/timer/i8254.c
@@ -495,7 +495,7 @@
     s->irq_timer = timer_new(QEMU_CLOCK_VIRTUAL, SCALE_NS, pit_irq_timer, s);
     s->irq = irq;
 
-    register_savevm("i8254", base, 1, pit_save, pit_load, pit);
+    register_savevm(NULL, "i8254", base, 1, pit_save, pit_load, pit);
 
     qemu_register_reset(pit_reset, 0, pit);
     register_ioport_write(base, 4, 1, pit_ioport_write, pit);
diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index 5d6c4a0..568a0c3 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -627,10 +627,16 @@
     register_ioport_write(base, 2, 1, cmos_ioport_write, s);
     register_ioport_read(base, 2, 1, cmos_ioport_read, s);
 
-    register_savevm("mc146818rtc", base, 1, rtc_save, rtc_load, s);
+    register_savevm(NULL, "mc146818rtc", base, 1, rtc_save, rtc_load, s);
 #ifdef TARGET_I386
     if (rtc_td_hack)
-        register_savevm("mc146818rtc-td", base, 1, rtc_save_td, rtc_load_td, s);
+        register_savevm(NULL,
+                        "mc146818rtc-td",
+                        base,
+                        1,
+                        rtc_save_td,
+                        rtc_load_td,
+                        s);
 #endif
     qemu_register_reset(rtc_reset, 0, s);
 
@@ -744,10 +750,16 @@
     io_memory = cpu_register_io_memory(rtc_mm_read, rtc_mm_write, s);
     cpu_register_physical_memory(base, 2 << it_shift, io_memory);
 
-    register_savevm("mc146818rtc", base, 1, rtc_save, rtc_load, s);
+    register_savevm(NULL, "mc146818rtc", base, 1, rtc_save, rtc_load, s);
 #ifdef TARGET_I386
     if (rtc_td_hack)
-        register_savevm("mc146818rtc-td", base, 1, rtc_save_td, rtc_load_td, s);
+        register_savevm(NULL,
+                        "mc146818rtc-td",
+                        base,
+                        1,
+                        rtc_save_td,
+                        rtc_load_td,
+                        s);
 #endif
     qemu_register_reset(rtc_reset, 0, s);
     return s;
diff --git a/hw/usb/core.c b/hw/usb/core.c
deleted file mode 100644
index 04b4764..0000000
--- a/hw/usb/core.c
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * QEMU USB emulation
- *
- * Copyright (c) 2005 Fabrice Bellard
- *
- * 2008 Generic packet handler rewrite by Max Krasnyansky
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "hw/usb.h"
-
-void usb_attach(USBPort *port, USBDevice *dev)
-{
-    port->attach(port, dev);
-}
-
-/**********************/
-
-/* generic USB device helpers (you are not forced to use them when
-   writing your USB device driver, but they help handling the
-   protocol)
-*/
-
-#define SETUP_STATE_IDLE 0
-#define SETUP_STATE_DATA 1
-#define SETUP_STATE_ACK  2
-
-static int do_token_setup(USBDevice *s, USBPacket *p)
-{
-    int request, value, index;
-    int ret = 0;
-
-    if (p->len != 8)
-        return USB_RET_STALL;
-
-    memcpy(s->setup_buf, p->data, 8);
-    s->setup_len   = (s->setup_buf[7] << 8) | s->setup_buf[6];
-    s->setup_index = 0;
-
-    request = (s->setup_buf[0] << 8) | s->setup_buf[1];
-    value   = (s->setup_buf[3] << 8) | s->setup_buf[2];
-    index   = (s->setup_buf[5] << 8) | s->setup_buf[4];
-
-    if (s->setup_buf[0] & USB_DIR_IN) {
-        ret = s->handle_control(s, request, value, index,
-                                s->setup_len, s->data_buf);
-        if (ret < 0)
-            return ret;
-
-        if (ret < s->setup_len)
-            s->setup_len = ret;
-        s->setup_state = SETUP_STATE_DATA;
-    } else {
-        if (s->setup_len == 0)
-            s->setup_state = SETUP_STATE_ACK;
-        else
-            s->setup_state = SETUP_STATE_DATA;
-    }
-
-    return ret;
-}
-
-static int do_token_in(USBDevice *s, USBPacket *p)
-{
-    int request, value, index;
-    int ret = 0;
-
-    if (p->devep != 0)
-        return s->handle_data(s, p);
-
-    request = (s->setup_buf[0] << 8) | s->setup_buf[1];
-    value   = (s->setup_buf[3] << 8) | s->setup_buf[2];
-    index   = (s->setup_buf[5] << 8) | s->setup_buf[4];
-
-    switch(s->setup_state) {
-    case SETUP_STATE_ACK:
-        if (!(s->setup_buf[0] & USB_DIR_IN)) {
-            s->setup_state = SETUP_STATE_IDLE;
-            ret = s->handle_control(s, request, value, index,
-                                    s->setup_len, s->data_buf);
-            if (ret > 0)
-                return 0;
-            return ret;
-        }
-
-        /* return 0 byte */
-        return 0;
-
-    case SETUP_STATE_DATA:
-        if (s->setup_buf[0] & USB_DIR_IN) {
-            int len = s->setup_len - s->setup_index;
-            if (len > p->len)
-                len = p->len;
-            memcpy(p->data, s->data_buf + s->setup_index, len);
-            s->setup_index += len;
-            if (s->setup_index >= s->setup_len)
-                s->setup_state = SETUP_STATE_ACK;
-            return len;
-        }
-
-        s->setup_state = SETUP_STATE_IDLE;
-        return USB_RET_STALL;
-
-    default:
-        return USB_RET_STALL;
-    }
-}
-
-static int do_token_out(USBDevice *s, USBPacket *p)
-{
-    if (p->devep != 0)
-        return s->handle_data(s, p);
-
-    switch(s->setup_state) {
-    case SETUP_STATE_ACK:
-        if (s->setup_buf[0] & USB_DIR_IN) {
-            s->setup_state = SETUP_STATE_IDLE;
-            /* transfer OK */
-        } else {
-            /* ignore additional output */
-        }
-        return 0;
-
-    case SETUP_STATE_DATA:
-        if (!(s->setup_buf[0] & USB_DIR_IN)) {
-            int len = s->setup_len - s->setup_index;
-            if (len > p->len)
-                len = p->len;
-            memcpy(s->data_buf + s->setup_index, p->data, len);
-            s->setup_index += len;
-            if (s->setup_index >= s->setup_len)
-                s->setup_state = SETUP_STATE_ACK;
-            return len;
-        }
-
-        s->setup_state = SETUP_STATE_IDLE;
-        return USB_RET_STALL;
-
-    default:
-        return USB_RET_STALL;
-    }
-}
-
-/*
- * Generic packet handler.
- * Called by the HC (host controller).
- *
- * Returns length of the transaction or one of the USB_RET_XXX codes.
- */
-int usb_generic_handle_packet(USBDevice *s, USBPacket *p)
-{
-    switch(p->pid) {
-    case USB_MSG_ATTACH:
-        s->state = USB_STATE_ATTACHED;
-        return 0;
-
-    case USB_MSG_DETACH:
-        s->state = USB_STATE_NOTATTACHED;
-        return 0;
-
-    case USB_MSG_RESET:
-        s->remote_wakeup = 0;
-        s->addr = 0;
-        s->state = USB_STATE_DEFAULT;
-        s->handle_reset(s);
-        return 0;
-    }
-
-    /* Rest of the PIDs must match our address */
-    if (s->state < USB_STATE_DEFAULT || p->devaddr != s->addr)
-        return USB_RET_NODEV;
-
-    switch (p->pid) {
-    case USB_TOKEN_SETUP:
-        return do_token_setup(s, p);
-
-    case USB_TOKEN_IN:
-        return do_token_in(s, p);
-
-    case USB_TOKEN_OUT:
-        return do_token_out(s, p);
-
-    default:
-        return USB_RET_STALL;
-    }
-}
-
-/* XXX: fix overflow */
-int set_usb_string(uint8_t *buf, const char *str)
-{
-    int len, i;
-    uint8_t *q;
-
-    q = buf;
-    len = strlen(str);
-    *q++ = 2 * len + 2;
-    *q++ = 3;
-    for(i = 0; i < len; i++) {
-        *q++ = str[i];
-        *q++ = 0;
-    }
-    return q - buf;
-}
-
-/* Send an internal message to a USB device.  */
-void usb_send_msg(USBDevice *dev, int msg)
-{
-    USBPacket p;
-    memset(&p, 0, sizeof(p));
-    p.pid = msg;
-    dev->handle_packet(dev, &p);
-
-    /* This _must_ be synchronous */
-}
diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c
deleted file mode 100644
index 15c1fd6..0000000
--- a/hw/usb/dev-hid.c
+++ /dev/null
@@ -1,916 +0,0 @@
-/*
- * QEMU USB HID devices
- *
- * Copyright (c) 2005 Fabrice Bellard
- * Copyright (c) 2007 OpenMoko, Inc.  (andrew@openedhand.com)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "hw/hw.h"
-#include "ui/console.h"
-#include "hw/usb.h"
-
-/* HID interface requests */
-#define GET_REPORT   0xa101
-#define GET_IDLE     0xa102
-#define GET_PROTOCOL 0xa103
-#define SET_REPORT   0x2109
-#define SET_IDLE     0x210a
-#define SET_PROTOCOL 0x210b
-
-/* HID descriptor types */
-#define USB_DT_HID    0x21
-#define USB_DT_REPORT 0x22
-#define USB_DT_PHY    0x23
-
-#define USB_MOUSE     1
-#define USB_TABLET    2
-#define USB_KEYBOARD  3
-
-typedef struct USBMouseState {
-    int dx, dy, dz, buttons_state;
-    int x, y;
-    int mouse_grabbed;
-    QEMUPutMouseEntry *eh_entry;
-} USBMouseState;
-
-typedef struct USBKeyboardState {
-    uint16_t modifiers;
-    uint8_t leds;
-    uint8_t key[16];
-    int keys;
-} USBKeyboardState;
-
-typedef struct USBHIDState {
-    USBDevice dev;
-    union {
-        USBMouseState ptr;
-        USBKeyboardState kbd;
-    };
-    int kind;
-    int protocol;
-    uint8_t idle;
-    int changed;
-    void *datain_opaque;
-    void (*datain)(void *);
-} USBHIDState;
-
-/* mostly the same values as the Bochs USB Mouse device */
-static const uint8_t qemu_mouse_dev_descriptor[] = {
-	0x12,       /*  u8 bLength; */
-	0x01,       /*  u8 bDescriptorType; Device */
-	0x00, 0x01, /*  u16 bcdUSB; v1.0 */
-
-	0x00,	    /*  u8  bDeviceClass; */
-	0x00,	    /*  u8  bDeviceSubClass; */
-	0x00,       /*  u8  bDeviceProtocol; [ low/full speeds only ] */
-	0x08,       /*  u8  bMaxPacketSize0; 8 Bytes */
-
-	0x27, 0x06, /*  u16 idVendor; */
- 	0x01, 0x00, /*  u16 idProduct; */
-	0x00, 0x00, /*  u16 bcdDevice */
-
-	0x03,       /*  u8  iManufacturer; */
-	0x02,       /*  u8  iProduct; */
-	0x01,       /*  u8  iSerialNumber; */
-	0x01        /*  u8  bNumConfigurations; */
-};
-
-static const uint8_t qemu_mouse_config_descriptor[] = {
-	/* one configuration */
-	0x09,       /*  u8  bLength; */
-	0x02,       /*  u8  bDescriptorType; Configuration */
-	0x22, 0x00, /*  u16 wTotalLength; */
-	0x01,       /*  u8  bNumInterfaces; (1) */
-	0x01,       /*  u8  bConfigurationValue; */
-	0x04,       /*  u8  iConfiguration; */
-	0xa0,       /*  u8  bmAttributes;
-				 Bit 7: must be set,
-				     6: Self-powered,
-				     5: Remote wakeup,
-				     4..0: resvd */
-	50,         /*  u8  MaxPower; */
-
-	/* USB 1.1:
-	 * USB 2.0, single TT organization (mandatory):
-	 *	one interface, protocol 0
-	 *
-	 * USB 2.0, multiple TT organization (optional):
-	 *	two interfaces, protocols 1 (like single TT)
-	 *	and 2 (multiple TT mode) ... config is
-	 *	sometimes settable
-	 *	NOT IMPLEMENTED
-	 */
-
-	/* one interface */
-	0x09,       /*  u8  if_bLength; */
-	0x04,       /*  u8  if_bDescriptorType; Interface */
-	0x00,       /*  u8  if_bInterfaceNumber; */
-	0x00,       /*  u8  if_bAlternateSetting; */
-	0x01,       /*  u8  if_bNumEndpoints; */
-	0x03,       /*  u8  if_bInterfaceClass; */
-	0x01,       /*  u8  if_bInterfaceSubClass; */
-	0x02,       /*  u8  if_bInterfaceProtocol; [usb1.1 or single tt] */
-	0x07,       /*  u8  if_iInterface; */
-
-        /* HID descriptor */
-        0x09,        /*  u8  bLength; */
-        0x21,        /*  u8 bDescriptorType; */
-        0x01, 0x00,  /*  u16 HID_class */
-        0x00,        /*  u8 country_code */
-        0x01,        /*  u8 num_descriptors */
-        0x22,        /*  u8 type; Report */
-        52, 0,       /*  u16 len */
-
-	/* one endpoint (status change endpoint) */
-	0x07,       /*  u8  ep_bLength; */
-	0x05,       /*  u8  ep_bDescriptorType; Endpoint */
-	0x81,       /*  u8  ep_bEndpointAddress; IN Endpoint 1 */
- 	0x03,       /*  u8  ep_bmAttributes; Interrupt */
- 	0x04, 0x00, /*  u16 ep_wMaxPacketSize; */
-	0x0a,       /*  u8  ep_bInterval; (255ms -- usb 2.0 spec) */
-};
-
-static const uint8_t qemu_tablet_config_descriptor[] = {
-	/* one configuration */
-	0x09,       /*  u8  bLength; */
-	0x02,       /*  u8  bDescriptorType; Configuration */
-	0x22, 0x00, /*  u16 wTotalLength; */
-	0x01,       /*  u8  bNumInterfaces; (1) */
-	0x01,       /*  u8  bConfigurationValue; */
-	0x05,       /*  u8  iConfiguration; */
-	0xa0,       /*  u8  bmAttributes;
-				 Bit 7: must be set,
-				     6: Self-powered,
-				     5: Remote wakeup,
-				     4..0: resvd */
-	50,         /*  u8  MaxPower; */
-
-	/* USB 1.1:
-	 * USB 2.0, single TT organization (mandatory):
-	 *	one interface, protocol 0
-	 *
-	 * USB 2.0, multiple TT organization (optional):
-	 *	two interfaces, protocols 1 (like single TT)
-	 *	and 2 (multiple TT mode) ... config is
-	 *	sometimes settable
-	 *	NOT IMPLEMENTED
-	 */
-
-	/* one interface */
-	0x09,       /*  u8  if_bLength; */
-	0x04,       /*  u8  if_bDescriptorType; Interface */
-	0x00,       /*  u8  if_bInterfaceNumber; */
-	0x00,       /*  u8  if_bAlternateSetting; */
-	0x01,       /*  u8  if_bNumEndpoints; */
-	0x03,       /*  u8  if_bInterfaceClass; */
-	0x01,       /*  u8  if_bInterfaceSubClass; */
-	0x02,       /*  u8  if_bInterfaceProtocol; [usb1.1 or single tt] */
-	0x07,       /*  u8  if_iInterface; */
-
-        /* HID descriptor */
-        0x09,        /*  u8  bLength; */
-        0x21,        /*  u8 bDescriptorType; */
-        0x01, 0x00,  /*  u16 HID_class */
-        0x00,        /*  u8 country_code */
-        0x01,        /*  u8 num_descriptors */
-        0x22,        /*  u8 type; Report */
-        74, 0,       /*  u16 len */
-
-	/* one endpoint (status change endpoint) */
-	0x07,       /*  u8  ep_bLength; */
-	0x05,       /*  u8  ep_bDescriptorType; Endpoint */
-	0x81,       /*  u8  ep_bEndpointAddress; IN Endpoint 1 */
- 	0x03,       /*  u8  ep_bmAttributes; Interrupt */
- 	0x08, 0x00, /*  u16 ep_wMaxPacketSize; */
-	0x0a,       /*  u8  ep_bInterval; (255ms -- usb 2.0 spec) */
-};
-
-static const uint8_t qemu_keyboard_config_descriptor[] = {
-    /* one configuration */
-    0x09,		/*  u8  bLength; */
-    USB_DT_CONFIG,	/*  u8  bDescriptorType; Configuration */
-    0x22, 0x00,		/*  u16 wTotalLength; */
-    0x01,		/*  u8  bNumInterfaces; (1) */
-    0x01,		/*  u8  bConfigurationValue; */
-    0x06,		/*  u8  iConfiguration; */
-    0xa0,		/*  u8  bmAttributes;
-				Bit 7: must be set,
-				    6: Self-powered,
-				    5: Remote wakeup,
-				    4..0: resvd */
-    0x32,		/*  u8  MaxPower; */
-
-    /* USB 1.1:
-     * USB 2.0, single TT organization (mandatory):
-     *	one interface, protocol 0
-     *
-     * USB 2.0, multiple TT organization (optional):
-     *	two interfaces, protocols 1 (like single TT)
-     *	and 2 (multiple TT mode) ... config is
-     *	sometimes settable
-     *	NOT IMPLEMENTED
-     */
-
-    /* one interface */
-    0x09,		/*  u8  if_bLength; */
-    USB_DT_INTERFACE,	/*  u8  if_bDescriptorType; Interface */
-    0x00,		/*  u8  if_bInterfaceNumber; */
-    0x00,		/*  u8  if_bAlternateSetting; */
-    0x01,		/*  u8  if_bNumEndpoints; */
-    0x03,		/*  u8  if_bInterfaceClass; HID */
-    0x01,		/*  u8  if_bInterfaceSubClass; Boot */
-    0x01,		/*  u8  if_bInterfaceProtocol; Keyboard */
-    0x07,		/*  u8  if_iInterface; */
-
-    /* HID descriptor */
-    0x09,		/*  u8  bLength; */
-    USB_DT_HID,		/*  u8  bDescriptorType; */
-    0x11, 0x01,		/*  u16 HID_class */
-    0x00,		/*  u8  country_code */
-    0x01,		/*  u8  num_descriptors */
-    USB_DT_REPORT,	/*  u8  type; Report */
-    0x3f, 0x00,		/*  u16 len */
-
-    /* one endpoint (status change endpoint) */
-    0x07,		/*  u8  ep_bLength; */
-    USB_DT_ENDPOINT,	/*  u8  ep_bDescriptorType; Endpoint */
-    USB_DIR_IN | 0x01,	/*  u8  ep_bEndpointAddress; IN Endpoint 1 */
-    0x03,		/*  u8  ep_bmAttributes; Interrupt */
-    0x08, 0x00,		/*  u16 ep_wMaxPacketSize; */
-    0x0a,		/*  u8  ep_bInterval; (255ms -- usb 2.0 spec) */
-};
-
-static const uint8_t qemu_mouse_hid_report_descriptor[] = {
-    0x05, 0x01,		/* Usage Page (Generic Desktop) */
-    0x09, 0x02,		/* Usage (Mouse) */
-    0xa1, 0x01,		/* Collection (Application) */
-    0x09, 0x01,		/*   Usage (Pointer) */
-    0xa1, 0x00,		/*   Collection (Physical) */
-    0x05, 0x09,		/*     Usage Page (Button) */
-    0x19, 0x01,		/*     Usage Minimum (1) */
-    0x29, 0x03,		/*     Usage Maximum (3) */
-    0x15, 0x00,		/*     Logical Minimum (0) */
-    0x25, 0x01,		/*     Logical Maximum (1) */
-    0x95, 0x03,		/*     Report Count (3) */
-    0x75, 0x01,		/*     Report Size (1) */
-    0x81, 0x02,		/*     Input (Data, Variable, Absolute) */
-    0x95, 0x01,		/*     Report Count (1) */
-    0x75, 0x05,		/*     Report Size (5) */
-    0x81, 0x01,		/*     Input (Constant) */
-    0x05, 0x01,		/*     Usage Page (Generic Desktop) */
-    0x09, 0x30,		/*     Usage (X) */
-    0x09, 0x31,		/*     Usage (Y) */
-    0x09, 0x38,		/*     Usage (Wheel) */
-    0x15, 0x81,		/*     Logical Minimum (-0x7f) */
-    0x25, 0x7f,		/*     Logical Maximum (0x7f) */
-    0x75, 0x08,		/*     Report Size (8) */
-    0x95, 0x03,		/*     Report Count (3) */
-    0x81, 0x06,		/*     Input (Data, Variable, Relative) */
-    0xc0,		/*   End Collection */
-    0xc0,		/* End Collection */
-};
-
-static const uint8_t qemu_tablet_hid_report_descriptor[] = {
-    0x05, 0x01,		/* Usage Page (Generic Desktop) */
-    0x09, 0x01,		/* Usage (Pointer) */
-    0xa1, 0x01,		/* Collection (Application) */
-    0x09, 0x01,		/*   Usage (Pointer) */
-    0xa1, 0x00,		/*   Collection (Physical) */
-    0x05, 0x09,		/*     Usage Page (Button) */
-    0x19, 0x01,		/*     Usage Minimum (1) */
-    0x29, 0x03,		/*     Usage Maximum (3) */
-    0x15, 0x00,		/*     Logical Minimum (0) */
-    0x25, 0x01,		/*     Logical Maximum (1) */
-    0x95, 0x03,		/*     Report Count (3) */
-    0x75, 0x01,		/*     Report Size (1) */
-    0x81, 0x02,		/*     Input (Data, Variable, Absolute) */
-    0x95, 0x01,		/*     Report Count (1) */
-    0x75, 0x05,		/*     Report Size (5) */
-    0x81, 0x01,		/*     Input (Constant) */
-    0x05, 0x01,		/*     Usage Page (Generic Desktop) */
-    0x09, 0x30,		/*     Usage (X) */
-    0x09, 0x31,		/*     Usage (Y) */
-    0x15, 0x00,		/*     Logical Minimum (0) */
-    0x26, 0xff, 0x7f,	/*     Logical Maximum (0x7fff) */
-    0x35, 0x00,		/*     Physical Minimum (0) */
-    0x46, 0xff, 0x7f,	/*     Physical Maximum (0x7fff) */
-    0x75, 0x10,		/*     Report Size (16) */
-    0x95, 0x02,		/*     Report Count (2) */
-    0x81, 0x02,		/*     Input (Data, Variable, Absolute) */
-    0x05, 0x01,		/*     Usage Page (Generic Desktop) */
-    0x09, 0x38,		/*     Usage (Wheel) */
-    0x15, 0x81,		/*     Logical Minimum (-0x7f) */
-    0x25, 0x7f,		/*     Logical Maximum (0x7f) */
-    0x35, 0x00,		/*     Physical Minimum (same as logical) */
-    0x45, 0x00,		/*     Physical Maximum (same as logical) */
-    0x75, 0x08,		/*     Report Size (8) */
-    0x95, 0x01,		/*     Report Count (1) */
-    0x81, 0x06,		/*     Input (Data, Variable, Relative) */
-    0xc0,		/*   End Collection */
-    0xc0,		/* End Collection */
-};
-
-static const uint8_t qemu_keyboard_hid_report_descriptor[] = {
-    0x05, 0x01,		/* Usage Page (Generic Desktop) */
-    0x09, 0x06,		/* Usage (Keyboard) */
-    0xa1, 0x01,		/* Collection (Application) */
-    0x75, 0x01,		/*   Report Size (1) */
-    0x95, 0x08,		/*   Report Count (8) */
-    0x05, 0x07,		/*   Usage Page (Key Codes) */
-    0x19, 0xe0,		/*   Usage Minimum (224) */
-    0x29, 0xe7,		/*   Usage Maximum (231) */
-    0x15, 0x00,		/*   Logical Minimum (0) */
-    0x25, 0x01,		/*   Logical Maximum (1) */
-    0x81, 0x02,		/*   Input (Data, Variable, Absolute) */
-    0x95, 0x01,		/*   Report Count (1) */
-    0x75, 0x08,		/*   Report Size (8) */
-    0x81, 0x01,		/*   Input (Constant) */
-    0x95, 0x05,		/*   Report Count (5) */
-    0x75, 0x01,		/*   Report Size (1) */
-    0x05, 0x08,		/*   Usage Page (LEDs) */
-    0x19, 0x01,		/*   Usage Minimum (1) */
-    0x29, 0x05,		/*   Usage Maximum (5) */
-    0x91, 0x02,		/*   Output (Data, Variable, Absolute) */
-    0x95, 0x01,		/*   Report Count (1) */
-    0x75, 0x03,		/*   Report Size (3) */
-    0x91, 0x01,		/*   Output (Constant) */
-    0x95, 0x06,		/*   Report Count (6) */
-    0x75, 0x08,		/*   Report Size (8) */
-    0x15, 0x00,		/*   Logical Minimum (0) */
-    0x25, 0xff,		/*   Logical Maximum (255) */
-    0x05, 0x07,		/*   Usage Page (Key Codes) */
-    0x19, 0x00,		/*   Usage Minimum (0) */
-    0x29, 0xff,		/*   Usage Maximum (255) */
-    0x81, 0x00,		/*   Input (Data, Array) */
-    0xc0,		/* End Collection */
-};
-
-#define USB_HID_USAGE_ERROR_ROLLOVER	0x01
-#define USB_HID_USAGE_POSTFAIL		0x02
-#define USB_HID_USAGE_ERROR_UNDEFINED	0x03
-
-/* Indices are QEMU keycodes, values are from HID Usage Table.  Indices
- * above 0x80 are for keys that come after 0xe0 or 0xe1+0x1d or 0xe1+0x9d.  */
-static const uint8_t usb_hid_usage_keys[0x100] = {
-    0x00, 0x29, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23,
-    0x24, 0x25, 0x26, 0x27, 0x2d, 0x2e, 0x2a, 0x2b,
-    0x14, 0x1a, 0x08, 0x15, 0x17, 0x1c, 0x18, 0x0c,
-    0x12, 0x13, 0x2f, 0x30, 0x28, 0xe0, 0x04, 0x16,
-    0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x0f, 0x33,
-    0x34, 0x35, 0xe1, 0x31, 0x1d, 0x1b, 0x06, 0x19,
-    0x05, 0x11, 0x10, 0x36, 0x37, 0x38, 0xe5, 0x55,
-    0xe2, 0x2c, 0x32, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e,
-    0x3f, 0x40, 0x41, 0x42, 0x43, 0x53, 0x47, 0x5f,
-    0x60, 0x61, 0x56, 0x5c, 0x5d, 0x5e, 0x57, 0x59,
-    0x5a, 0x5b, 0x62, 0x63, 0x00, 0x00, 0x00, 0x44,
-    0x45, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e,
-    0xe8, 0xe9, 0x71, 0x72, 0x73, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0xe3, 0xe7, 0x65,
-
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x58, 0xe4, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x00, 0x46,
-    0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x4a,
-    0x52, 0x4b, 0x00, 0x50, 0x00, 0x4f, 0x00, 0x4d,
-    0x51, 0x4e, 0x49, 0x4c, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-};
-
-static void usb_hid_changed(USBHIDState *hs)
-{
-    hs->changed = 1;
-
-    if (hs->datain)
-        hs->datain(hs->datain_opaque);
-}
-
-static void usb_mouse_event(void *opaque,
-                            int dx1, int dy1, int dz1, int buttons_state)
-{
-    USBHIDState *hs = opaque;
-    USBMouseState *s = &hs->ptr;
-
-    s->dx += dx1;
-    s->dy += dy1;
-    s->dz += dz1;
-    s->buttons_state = buttons_state;
-
-    usb_hid_changed(hs);
-}
-
-static void usb_tablet_event(void *opaque,
-			     int x, int y, int dz, int buttons_state)
-{
-    USBHIDState *hs = opaque;
-    USBMouseState *s = &hs->ptr;
-
-    s->x = x;
-    s->y = y;
-    s->dz += dz;
-    s->buttons_state = buttons_state;
-
-    usb_hid_changed(hs);
-}
-
-static void usb_keyboard_event(void *opaque, int keycode)
-{
-    USBHIDState *hs = opaque;
-    USBKeyboardState *s = &hs->kbd;
-    uint8_t hid_code, key;
-    int i;
-
-    key = keycode & 0x7f;
-    hid_code = usb_hid_usage_keys[key | ((s->modifiers >> 1) & (1 << 7))];
-    s->modifiers &= ~(1 << 8);
-
-    switch (hid_code) {
-    case 0x00:
-        return;
-
-    case 0xe0:
-        if (s->modifiers & (1 << 9)) {
-            s->modifiers ^= 3 << 8;
-            return;
-        }
-    case 0xe1 ... 0xe7:
-        if (keycode & (1 << 7)) {
-            s->modifiers &= ~(1 << (hid_code & 0x0f));
-            return;
-        }
-    case 0xe8 ... 0xef:
-        s->modifiers |= 1 << (hid_code & 0x0f);
-        return;
-    }
-
-    if (keycode & (1 << 7)) {
-        for (i = s->keys - 1; i >= 0; i --)
-            if (s->key[i] == hid_code) {
-                s->key[i] = s->key[-- s->keys];
-                s->key[s->keys] = 0x00;
-                usb_hid_changed(hs);
-                break;
-            }
-        if (i < 0)
-            return;
-    } else {
-        for (i = s->keys - 1; i >= 0; i --)
-            if (s->key[i] == hid_code)
-                break;
-        if (i < 0) {
-            if (s->keys < sizeof(s->key))
-                s->key[s->keys ++] = hid_code;
-        } else
-            return;
-    }
-
-    usb_hid_changed(hs);
-}
-
-static inline int int_clamp(int val, int vmin, int vmax)
-{
-    if (val < vmin)
-        return vmin;
-    else if (val > vmax)
-        return vmax;
-    else
-        return val;
-}
-
-static int usb_mouse_poll(USBHIDState *hs, uint8_t *buf, int len)
-{
-    int dx, dy, dz, b, l;
-    USBMouseState *s = &hs->ptr;
-
-    if (!s->mouse_grabbed) {
-	s->eh_entry = qemu_add_mouse_event_handler(usb_mouse_event, hs,
-                                                  0, "QEMU USB Mouse");
-	s->mouse_grabbed = 1;
-    }
-
-    dx = int_clamp(s->dx, -127, 127);
-    dy = int_clamp(s->dy, -127, 127);
-    dz = int_clamp(s->dz, -127, 127);
-
-    s->dx -= dx;
-    s->dy -= dy;
-    s->dz -= dz;
-
-    /* Appears we have to invert the wheel direction */
-    dz = 0 - dz;
-
-    b = 0;
-    if (s->buttons_state & MOUSE_EVENT_LBUTTON)
-        b |= 0x01;
-    if (s->buttons_state & MOUSE_EVENT_RBUTTON)
-        b |= 0x02;
-    if (s->buttons_state & MOUSE_EVENT_MBUTTON)
-        b |= 0x04;
-
-    l = 0;
-    if (len > l)
-        buf[l ++] = b;
-    if (len > l)
-        buf[l ++] = dx;
-    if (len > l)
-        buf[l ++] = dy;
-    if (len > l)
-        buf[l ++] = dz;
-    return l;
-}
-
-static int usb_tablet_poll(USBHIDState *hs, uint8_t *buf, int len)
-{
-    int dz, b, l;
-    USBMouseState *s = &hs->ptr;
-
-    if (!s->mouse_grabbed) {
-	s->eh_entry = qemu_add_mouse_event_handler(usb_tablet_event, hs,
-                                                  1, "QEMU USB Tablet");
-	s->mouse_grabbed = 1;
-    }
-
-    dz = int_clamp(s->dz, -127, 127);
-    s->dz -= dz;
-
-    /* Appears we have to invert the wheel direction */
-    dz = 0 - dz;
-    b = 0;
-    if (s->buttons_state & MOUSE_EVENT_LBUTTON)
-        b |= 0x01;
-    if (s->buttons_state & MOUSE_EVENT_RBUTTON)
-        b |= 0x02;
-    if (s->buttons_state & MOUSE_EVENT_MBUTTON)
-        b |= 0x04;
-
-    buf[0] = b;
-    buf[1] = s->x & 0xff;
-    buf[2] = s->x >> 8;
-    buf[3] = s->y & 0xff;
-    buf[4] = s->y >> 8;
-    buf[5] = dz;
-    l = 6;
-
-    return l;
-}
-
-static int usb_keyboard_poll(USBKeyboardState *s, uint8_t *buf, int len)
-{
-    if (len < 2)
-        return 0;
-
-    buf[0] = s->modifiers & 0xff;
-    buf[1] = 0;
-    if (s->keys > 6)
-        memset(buf + 2, USB_HID_USAGE_ERROR_ROLLOVER, MIN(8, len) - 2);
-    else
-        memcpy(buf + 2, s->key, MIN(8, len) - 2);
-
-    return MIN(8, len);
-}
-
-static int usb_keyboard_write(USBKeyboardState *s, uint8_t *buf, int len)
-{
-    if (len > 0) {
-        /* 0x01: Num Lock LED
-         * 0x02: Caps Lock LED
-         * 0x04: Scroll Lock LED
-         * 0x08: Compose LED
-         * 0x10: Kana LED */
-        s->leds = buf[0];
-    }
-    return 0;
-}
-
-static void usb_mouse_handle_reset(USBDevice *dev)
-{
-    USBHIDState *s = (USBHIDState *)dev;
-
-    s->ptr.dx = 0;
-    s->ptr.dy = 0;
-    s->ptr.dz = 0;
-    s->ptr.x = 0;
-    s->ptr.y = 0;
-    s->ptr.buttons_state = 0;
-    s->protocol = 1;
-}
-
-static void usb_keyboard_handle_reset(USBDevice *dev)
-{
-    USBHIDState *s = (USBHIDState *)dev;
-
-    qemu_add_kbd_event_handler(usb_keyboard_event, s);
-    s->protocol = 1;
-}
-
-static int usb_hid_handle_control(USBDevice *dev, int request, int value,
-                                  int index, int length, uint8_t *data)
-{
-    USBHIDState *s = (USBHIDState *)dev;
-    int ret = 0;
-
-    switch(request) {
-    case DeviceRequest | USB_REQ_GET_STATUS:
-        data[0] = (1 << USB_DEVICE_SELF_POWERED) |
-            (dev->remote_wakeup << USB_DEVICE_REMOTE_WAKEUP);
-        data[1] = 0x00;
-        ret = 2;
-        break;
-    case DeviceOutRequest | USB_REQ_CLEAR_FEATURE:
-        if (value == USB_DEVICE_REMOTE_WAKEUP) {
-            dev->remote_wakeup = 0;
-        } else {
-            goto fail;
-        }
-        ret = 0;
-        break;
-    case DeviceOutRequest | USB_REQ_SET_FEATURE:
-        if (value == USB_DEVICE_REMOTE_WAKEUP) {
-            dev->remote_wakeup = 1;
-        } else {
-            goto fail;
-        }
-        ret = 0;
-        break;
-    case DeviceOutRequest | USB_REQ_SET_ADDRESS:
-        dev->addr = value;
-        ret = 0;
-        break;
-    case DeviceRequest | USB_REQ_GET_DESCRIPTOR:
-        switch(value >> 8) {
-        case USB_DT_DEVICE:
-            memcpy(data, qemu_mouse_dev_descriptor,
-                   sizeof(qemu_mouse_dev_descriptor));
-            ret = sizeof(qemu_mouse_dev_descriptor);
-            break;
-        case USB_DT_CONFIG:
-	    if (s->kind == USB_MOUSE) {
-		memcpy(data, qemu_mouse_config_descriptor,
-		       sizeof(qemu_mouse_config_descriptor));
-		ret = sizeof(qemu_mouse_config_descriptor);
-	    } else if (s->kind == USB_TABLET) {
-		memcpy(data, qemu_tablet_config_descriptor,
-		       sizeof(qemu_tablet_config_descriptor));
-		ret = sizeof(qemu_tablet_config_descriptor);
-            } else if (s->kind == USB_KEYBOARD) {
-                memcpy(data, qemu_keyboard_config_descriptor,
-                       sizeof(qemu_keyboard_config_descriptor));
-                ret = sizeof(qemu_keyboard_config_descriptor);
-            }
-            break;
-        case USB_DT_STRING:
-            switch(value & 0xff) {
-            case 0:
-                /* language ids */
-                data[0] = 4;
-                data[1] = 3;
-                data[2] = 0x09;
-                data[3] = 0x04;
-                ret = 4;
-                break;
-            case 1:
-                /* serial number */
-                ret = set_usb_string(data, "1");
-                break;
-            case 2:
-                /* product description */
-                ret = set_usb_string(data, s->dev.devname);
-                break;
-            case 3:
-                /* vendor description */
-                ret = set_usb_string(data, "QEMU " QEMU_VERSION);
-                break;
-            case 4:
-                ret = set_usb_string(data, "HID Mouse");
-                break;
-            case 5:
-                ret = set_usb_string(data, "HID Tablet");
-                break;
-            case 6:
-                ret = set_usb_string(data, "HID Keyboard");
-                break;
-            case 7:
-                ret = set_usb_string(data, "Endpoint1 Interrupt Pipe");
-                break;
-            default:
-                goto fail;
-            }
-            break;
-        default:
-            goto fail;
-        }
-        break;
-    case DeviceRequest | USB_REQ_GET_CONFIGURATION:
-        data[0] = 1;
-        ret = 1;
-        break;
-    case DeviceOutRequest | USB_REQ_SET_CONFIGURATION:
-        ret = 0;
-        break;
-    case DeviceRequest | USB_REQ_GET_INTERFACE:
-        data[0] = 0;
-        ret = 1;
-        break;
-    case DeviceOutRequest | USB_REQ_SET_INTERFACE:
-        ret = 0;
-        break;
-        /* hid specific requests */
-    case InterfaceRequest | USB_REQ_GET_DESCRIPTOR:
-        switch(value >> 8) {
-        case 0x22:
-	    if (s->kind == USB_MOUSE) {
-		memcpy(data, qemu_mouse_hid_report_descriptor,
-		       sizeof(qemu_mouse_hid_report_descriptor));
-		ret = sizeof(qemu_mouse_hid_report_descriptor);
-	    } else if (s->kind == USB_TABLET) {
-		memcpy(data, qemu_tablet_hid_report_descriptor,
-		       sizeof(qemu_tablet_hid_report_descriptor));
-		ret = sizeof(qemu_tablet_hid_report_descriptor);
-            } else if (s->kind == USB_KEYBOARD) {
-                memcpy(data, qemu_keyboard_hid_report_descriptor,
-                       sizeof(qemu_keyboard_hid_report_descriptor));
-                ret = sizeof(qemu_keyboard_hid_report_descriptor);
-            }
-            break;
-        default:
-            goto fail;
-        }
-        break;
-    case GET_REPORT:
-	if (s->kind == USB_MOUSE)
-            ret = usb_mouse_poll(s, data, length);
-	else if (s->kind == USB_TABLET)
-            ret = usb_tablet_poll(s, data, length);
-        else if (s->kind == USB_KEYBOARD)
-            ret = usb_keyboard_poll(&s->kbd, data, length);
-        break;
-    case SET_REPORT:
-        if (s->kind == USB_KEYBOARD)
-            ret = usb_keyboard_write(&s->kbd, data, length);
-        else
-            goto fail;
-        break;
-    case GET_PROTOCOL:
-        if (s->kind != USB_KEYBOARD)
-            goto fail;
-        ret = 1;
-        data[0] = s->protocol;
-        break;
-    case SET_PROTOCOL:
-        if (s->kind != USB_KEYBOARD)
-            goto fail;
-        ret = 0;
-        s->protocol = value;
-        break;
-    case GET_IDLE:
-        ret = 1;
-        data[0] = s->idle;
-        break;
-    case SET_IDLE:
-        s->idle = (uint8_t) (value >> 8);
-        ret = 0;
-        break;
-    default:
-    fail:
-        ret = USB_RET_STALL;
-        break;
-    }
-    return ret;
-}
-
-static int usb_hid_handle_data(USBDevice *dev, USBPacket *p)
-{
-    USBHIDState *s = (USBHIDState *)dev;
-    int ret = 0;
-
-    switch(p->pid) {
-    case USB_TOKEN_IN:
-        if (p->devep == 1) {
-            /* TODO: Implement finite idle delays.  */
-            if (!(s->changed || s->idle))
-                return USB_RET_NAK;
-            s->changed = 0;
-            if (s->kind == USB_MOUSE)
-                ret = usb_mouse_poll(s, p->data, p->len);
-            else if (s->kind == USB_TABLET)
-                ret = usb_tablet_poll(s, p->data, p->len);
-            else if (s->kind == USB_KEYBOARD)
-                ret = usb_keyboard_poll(&s->kbd, p->data, p->len);
-        } else {
-            goto fail;
-        }
-        break;
-    case USB_TOKEN_OUT:
-    default:
-    fail:
-        ret = USB_RET_STALL;
-        break;
-    }
-    return ret;
-}
-
-static void usb_hid_handle_destroy(USBDevice *dev)
-{
-    USBHIDState *s = (USBHIDState *)dev;
-
-    if (s->kind != USB_KEYBOARD)
-        qemu_remove_mouse_event_handler(s->ptr.eh_entry);
-    /* TODO: else */
-    g_free(s);
-}
-
-USBDevice *usb_tablet_init(void)
-{
-    USBHIDState *s;
-
-    s = g_malloc0(sizeof(USBHIDState));
-    s->dev.speed = USB_SPEED_FULL;
-    s->dev.handle_packet = usb_generic_handle_packet;
-
-    s->dev.handle_reset = usb_mouse_handle_reset;
-    s->dev.handle_control = usb_hid_handle_control;
-    s->dev.handle_data = usb_hid_handle_data;
-    s->dev.handle_destroy = usb_hid_handle_destroy;
-    s->kind = USB_TABLET;
-    /* Force poll routine to be run and grab input the first time.  */
-    s->changed = 1;
-
-    pstrcpy(s->dev.devname, sizeof(s->dev.devname), "QEMU USB Tablet");
-
-    return (USBDevice *)s;
-}
-
-USBDevice *usb_mouse_init(void)
-{
-    USBHIDState *s;
-
-    s = g_malloc0(sizeof(USBHIDState));
-    s->dev.speed = USB_SPEED_FULL;
-    s->dev.handle_packet = usb_generic_handle_packet;
-
-    s->dev.handle_reset = usb_mouse_handle_reset;
-    s->dev.handle_control = usb_hid_handle_control;
-    s->dev.handle_data = usb_hid_handle_data;
-    s->dev.handle_destroy = usb_hid_handle_destroy;
-    s->kind = USB_MOUSE;
-    /* Force poll routine to be run and grab input the first time.  */
-    s->changed = 1;
-
-    pstrcpy(s->dev.devname, sizeof(s->dev.devname), "QEMU USB Mouse");
-
-    return (USBDevice *)s;
-}
-
-USBDevice *usb_keyboard_init(void)
-{
-    USBHIDState *s;
-
-    s = g_malloc0(sizeof(USBHIDState));
-    s->dev.speed = USB_SPEED_FULL;
-    s->dev.handle_packet = usb_generic_handle_packet;
-
-    s->dev.handle_reset = usb_keyboard_handle_reset;
-    s->dev.handle_control = usb_hid_handle_control;
-    s->dev.handle_data = usb_hid_handle_data;
-    s->dev.handle_destroy = usb_hid_handle_destroy;
-    s->kind = USB_KEYBOARD;
-
-    pstrcpy(s->dev.devname, sizeof(s->dev.devname), "QEMU USB Keyboard");
-
-    return (USBDevice *) s;
-}
-
-void usb_hid_datain_cb(USBDevice *dev, void *opaque, void (*datain)(void *))
-{
-    USBHIDState *s = (USBHIDState *)dev;
-
-    s->datain_opaque = opaque;
-    s->datain = datain;
-}
diff --git a/hw/usb/dev-hub.c b/hw/usb/dev-hub.c
deleted file mode 100644
index dcb7a07..0000000
--- a/hw/usb/dev-hub.c
+++ /dev/null
@@ -1,550 +0,0 @@
-/*
- * QEMU USB HUB emulation
- *
- * Copyright (c) 2005 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "hw/usb.h"
-
-//#define DEBUG
-
-#define MAX_PORTS 8
-
-typedef struct USBHubPort {
-    USBPort port;
-    uint16_t wPortStatus;
-    uint16_t wPortChange;
-} USBHubPort;
-
-typedef struct USBHubState {
-    USBDevice dev;
-    int nb_ports;
-    USBHubPort ports[MAX_PORTS];
-} USBHubState;
-
-#define ClearHubFeature		(0x2000 | USB_REQ_CLEAR_FEATURE)
-#define ClearPortFeature	(0x2300 | USB_REQ_CLEAR_FEATURE)
-#define GetHubDescriptor	(0xa000 | USB_REQ_GET_DESCRIPTOR)
-#define GetHubStatus		(0xa000 | USB_REQ_GET_STATUS)
-#define GetPortStatus		(0xa300 | USB_REQ_GET_STATUS)
-#define SetHubFeature		(0x2000 | USB_REQ_SET_FEATURE)
-#define SetPortFeature		(0x2300 | USB_REQ_SET_FEATURE)
-
-#define PORT_STAT_CONNECTION	0x0001
-#define PORT_STAT_ENABLE	0x0002
-#define PORT_STAT_SUSPEND	0x0004
-#define PORT_STAT_OVERCURRENT	0x0008
-#define PORT_STAT_RESET		0x0010
-#define PORT_STAT_POWER		0x0100
-#define PORT_STAT_LOW_SPEED	0x0200
-#define PORT_STAT_HIGH_SPEED    0x0400
-#define PORT_STAT_TEST          0x0800
-#define PORT_STAT_INDICATOR     0x1000
-
-#define PORT_STAT_C_CONNECTION	0x0001
-#define PORT_STAT_C_ENABLE	0x0002
-#define PORT_STAT_C_SUSPEND	0x0004
-#define PORT_STAT_C_OVERCURRENT	0x0008
-#define PORT_STAT_C_RESET	0x0010
-
-#define PORT_CONNECTION	        0
-#define PORT_ENABLE		1
-#define PORT_SUSPEND		2
-#define PORT_OVERCURRENT	3
-#define PORT_RESET		4
-#define PORT_POWER		8
-#define PORT_LOWSPEED		9
-#define PORT_HIGHSPEED		10
-#define PORT_C_CONNECTION	16
-#define PORT_C_ENABLE		17
-#define PORT_C_SUSPEND		18
-#define PORT_C_OVERCURRENT	19
-#define PORT_C_RESET		20
-#define PORT_TEST               21
-#define PORT_INDICATOR          22
-
-/* same as Linux kernel root hubs */
-
-static const uint8_t qemu_hub_dev_descriptor[] = {
-	0x12,       /*  u8 bLength; */
-	0x01,       /*  u8 bDescriptorType; Device */
-	0x10, 0x01, /*  u16 bcdUSB; v1.1 */
-
-	0x09,	    /*  u8  bDeviceClass; HUB_CLASSCODE */
-	0x00,	    /*  u8  bDeviceSubClass; */
-	0x00,       /*  u8  bDeviceProtocol; [ low/full speeds only ] */
-	0x08,       /*  u8  bMaxPacketSize0; 8 Bytes */
-
-	0x00, 0x00, /*  u16 idVendor; */
- 	0x00, 0x00, /*  u16 idProduct; */
-	0x01, 0x01, /*  u16 bcdDevice */
-
-	0x03,       /*  u8  iManufacturer; */
-	0x02,       /*  u8  iProduct; */
-	0x01,       /*  u8  iSerialNumber; */
-	0x01        /*  u8  bNumConfigurations; */
-};
-
-/* XXX: patch interrupt size */
-static const uint8_t qemu_hub_config_descriptor[] = {
-
-	/* one configuration */
-	0x09,       /*  u8  bLength; */
-	0x02,       /*  u8  bDescriptorType; Configuration */
-	0x19, 0x00, /*  u16 wTotalLength; */
-	0x01,       /*  u8  bNumInterfaces; (1) */
-	0x01,       /*  u8  bConfigurationValue; */
-	0x00,       /*  u8  iConfiguration; */
-	0xc0,       /*  u8  bmAttributes;
-				 Bit 7: must be set,
-				     6: Self-powered,
-				     5: Remote wakeup,
-				     4..0: resvd */
-	0x00,       /*  u8  MaxPower; */
-
-	/* USB 1.1:
-	 * USB 2.0, single TT organization (mandatory):
-	 *	one interface, protocol 0
-	 *
-	 * USB 2.0, multiple TT organization (optional):
-	 *	two interfaces, protocols 1 (like single TT)
-	 *	and 2 (multiple TT mode) ... config is
-	 *	sometimes settable
-	 *	NOT IMPLEMENTED
-	 */
-
-	/* one interface */
-	0x09,       /*  u8  if_bLength; */
-	0x04,       /*  u8  if_bDescriptorType; Interface */
-	0x00,       /*  u8  if_bInterfaceNumber; */
-	0x00,       /*  u8  if_bAlternateSetting; */
-	0x01,       /*  u8  if_bNumEndpoints; */
-	0x09,       /*  u8  if_bInterfaceClass; HUB_CLASSCODE */
-	0x00,       /*  u8  if_bInterfaceSubClass; */
-	0x00,       /*  u8  if_bInterfaceProtocol; [usb1.1 or single tt] */
-	0x00,       /*  u8  if_iInterface; */
-
-	/* one endpoint (status change endpoint) */
-	0x07,       /*  u8  ep_bLength; */
-	0x05,       /*  u8  ep_bDescriptorType; Endpoint */
-	0x81,       /*  u8  ep_bEndpointAddress; IN Endpoint 1 */
- 	0x03,       /*  u8  ep_bmAttributes; Interrupt */
- 	0x02, 0x00, /*  u16 ep_wMaxPacketSize; 1 + (MAX_ROOT_PORTS / 8) */
-	0xff        /*  u8  ep_bInterval; (255ms -- usb 2.0 spec) */
-};
-
-static const uint8_t qemu_hub_hub_descriptor[] =
-{
-	0x00,			/*  u8  bLength; patched in later */
-	0x29,			/*  u8  bDescriptorType; Hub-descriptor */
-	0x00,			/*  u8  bNbrPorts; (patched later) */
-	0x0a,			/* u16  wHubCharacteristics; */
-	0x00,			/*   (per-port OC, no power switching) */
-	0x01,			/*  u8  bPwrOn2pwrGood; 2ms */
-	0x00			/*  u8  bHubContrCurrent; 0 mA */
-
-        /* DeviceRemovable and PortPwrCtrlMask patched in later */
-};
-
-static void usb_hub_attach(USBPort *port1, USBDevice *dev)
-{
-    USBHubState *s = port1->opaque;
-    USBHubPort *port = &s->ports[port1->index];
-
-    if (dev) {
-        if (port->port.dev)
-            usb_attach(port1, NULL);
-
-        port->wPortStatus |= PORT_STAT_CONNECTION;
-        port->wPortChange |= PORT_STAT_C_CONNECTION;
-        if (dev->speed == USB_SPEED_LOW)
-            port->wPortStatus |= PORT_STAT_LOW_SPEED;
-        else
-            port->wPortStatus &= ~PORT_STAT_LOW_SPEED;
-        port->port.dev = dev;
-        /* send the attach message */
-        usb_send_msg(dev, USB_MSG_ATTACH);
-    } else {
-        dev = port->port.dev;
-        if (dev) {
-            port->wPortStatus &= ~PORT_STAT_CONNECTION;
-            port->wPortChange |= PORT_STAT_C_CONNECTION;
-            if (port->wPortStatus & PORT_STAT_ENABLE) {
-                port->wPortStatus &= ~PORT_STAT_ENABLE;
-                port->wPortChange |= PORT_STAT_C_ENABLE;
-            }
-            /* send the detach message */
-            usb_send_msg(dev, USB_MSG_DETACH);
-            port->port.dev = NULL;
-        }
-    }
-}
-
-static void usb_hub_handle_reset(USBDevice *dev)
-{
-    /* XXX: do it */
-}
-
-static int usb_hub_handle_control(USBDevice *dev, int request, int value,
-                                  int index, int length, uint8_t *data)
-{
-    USBHubState *s = (USBHubState *)dev;
-    int ret;
-
-    switch(request) {
-    case DeviceRequest | USB_REQ_GET_STATUS:
-        data[0] = (1 << USB_DEVICE_SELF_POWERED) |
-            (dev->remote_wakeup << USB_DEVICE_REMOTE_WAKEUP);
-        data[1] = 0x00;
-        ret = 2;
-        break;
-    case DeviceOutRequest | USB_REQ_CLEAR_FEATURE:
-        if (value == USB_DEVICE_REMOTE_WAKEUP) {
-            dev->remote_wakeup = 0;
-        } else {
-            goto fail;
-        }
-        ret = 0;
-        break;
-    case EndpointOutRequest | USB_REQ_CLEAR_FEATURE:
-        if (value == 0 && index != 0x81) { /* clear ep halt */
-            goto fail;
-        }
-        ret = 0;
-        break;
-    case DeviceOutRequest | USB_REQ_SET_FEATURE:
-        if (value == USB_DEVICE_REMOTE_WAKEUP) {
-            dev->remote_wakeup = 1;
-        } else {
-            goto fail;
-        }
-        ret = 0;
-        break;
-    case DeviceOutRequest | USB_REQ_SET_ADDRESS:
-        dev->addr = value;
-        ret = 0;
-        break;
-    case DeviceRequest | USB_REQ_GET_DESCRIPTOR:
-        switch(value >> 8) {
-        case USB_DT_DEVICE:
-            memcpy(data, qemu_hub_dev_descriptor,
-                   sizeof(qemu_hub_dev_descriptor));
-            ret = sizeof(qemu_hub_dev_descriptor);
-            break;
-        case USB_DT_CONFIG:
-            memcpy(data, qemu_hub_config_descriptor,
-                   sizeof(qemu_hub_config_descriptor));
-
-            /* status change endpoint size based on number
-             * of ports */
-            data[22] = (s->nb_ports + 1 + 7) / 8;
-
-            ret = sizeof(qemu_hub_config_descriptor);
-            break;
-        case USB_DT_STRING:
-            switch(value & 0xff) {
-            case 0:
-                /* language ids */
-                data[0] = 4;
-                data[1] = 3;
-                data[2] = 0x09;
-                data[3] = 0x04;
-                ret = 4;
-                break;
-            case 1:
-                /* serial number */
-                ret = set_usb_string(data, "314159");
-                break;
-            case 2:
-                /* product description */
-                ret = set_usb_string(data, "QEMU USB Hub");
-                break;
-            case 3:
-                /* vendor description */
-                ret = set_usb_string(data, "QEMU " QEMU_VERSION);
-                break;
-            default:
-                goto fail;
-            }
-            break;
-        default:
-            goto fail;
-        }
-        break;
-    case DeviceRequest | USB_REQ_GET_CONFIGURATION:
-        data[0] = 1;
-        ret = 1;
-        break;
-    case DeviceOutRequest | USB_REQ_SET_CONFIGURATION:
-        ret = 0;
-        break;
-    case DeviceRequest | USB_REQ_GET_INTERFACE:
-        data[0] = 0;
-        ret = 1;
-        break;
-    case DeviceOutRequest | USB_REQ_SET_INTERFACE:
-        ret = 0;
-        break;
-        /* usb specific requests */
-    case GetHubStatus:
-        data[0] = 0;
-        data[1] = 0;
-        data[2] = 0;
-        data[3] = 0;
-        ret = 4;
-        break;
-    case GetPortStatus:
-        {
-            unsigned int n = index - 1;
-            USBHubPort *port;
-            if (n >= s->nb_ports)
-                goto fail;
-            port = &s->ports[n];
-            data[0] = port->wPortStatus;
-            data[1] = port->wPortStatus >> 8;
-            data[2] = port->wPortChange;
-            data[3] = port->wPortChange >> 8;
-            ret = 4;
-        }
-        break;
-    case SetHubFeature:
-    case ClearHubFeature:
-        if (value == 0 || value == 1) {
-        } else {
-            goto fail;
-        }
-        ret = 0;
-        break;
-    case SetPortFeature:
-        {
-            unsigned int n = index - 1;
-            USBHubPort *port;
-            USBDevice *dev;
-            if (n >= s->nb_ports)
-                goto fail;
-            port = &s->ports[n];
-            dev = port->port.dev;
-            switch(value) {
-            case PORT_SUSPEND:
-                port->wPortStatus |= PORT_STAT_SUSPEND;
-                break;
-            case PORT_RESET:
-                if (dev) {
-                    usb_send_msg(dev, USB_MSG_RESET);
-                    port->wPortChange |= PORT_STAT_C_RESET;
-                    /* set enable bit */
-                    port->wPortStatus |= PORT_STAT_ENABLE;
-                }
-                break;
-            case PORT_POWER:
-                break;
-            default:
-                goto fail;
-            }
-            ret = 0;
-        }
-        break;
-    case ClearPortFeature:
-        {
-            unsigned int n = index - 1;
-            USBHubPort *port;
-            if (n >= s->nb_ports)
-                goto fail;
-            port = &s->ports[n];
-            switch(value) {
-            case PORT_ENABLE:
-                port->wPortStatus &= ~PORT_STAT_ENABLE;
-                break;
-            case PORT_C_ENABLE:
-                port->wPortChange &= ~PORT_STAT_C_ENABLE;
-                break;
-            case PORT_SUSPEND:
-                port->wPortStatus &= ~PORT_STAT_SUSPEND;
-                break;
-            case PORT_C_SUSPEND:
-                port->wPortChange &= ~PORT_STAT_C_SUSPEND;
-                break;
-            case PORT_C_CONNECTION:
-                port->wPortChange &= ~PORT_STAT_C_CONNECTION;
-                break;
-            case PORT_C_OVERCURRENT:
-                port->wPortChange &= ~PORT_STAT_C_OVERCURRENT;
-                break;
-            case PORT_C_RESET:
-                port->wPortChange &= ~PORT_STAT_C_RESET;
-                break;
-            default:
-                goto fail;
-            }
-            ret = 0;
-        }
-        break;
-    case GetHubDescriptor:
-        {
-            unsigned int n, limit, var_hub_size = 0;
-            memcpy(data, qemu_hub_hub_descriptor,
-                   sizeof(qemu_hub_hub_descriptor));
-            data[2] = s->nb_ports;
-
-            /* fill DeviceRemovable bits */
-            limit = ((s->nb_ports + 1 + 7) / 8) + 7;
-            for (n = 7; n < limit; n++) {
-                data[n] = 0x00;
-                var_hub_size++;
-            }
-
-            /* fill PortPwrCtrlMask bits */
-            limit = limit + ((s->nb_ports + 7) / 8);
-            for (;n < limit; n++) {
-                data[n] = 0xff;
-                var_hub_size++;
-            }
-
-            ret = sizeof(qemu_hub_hub_descriptor) + var_hub_size;
-            data[0] = ret;
-            break;
-        }
-    default:
-    fail:
-        ret = USB_RET_STALL;
-        break;
-    }
-    return ret;
-}
-
-static int usb_hub_handle_data(USBDevice *dev, USBPacket *p)
-{
-    USBHubState *s = (USBHubState *)dev;
-    int ret;
-
-    switch(p->pid) {
-    case USB_TOKEN_IN:
-        if (p->devep == 1) {
-            USBHubPort *port;
-            unsigned int status;
-            int i, n;
-            n = (s->nb_ports + 1 + 7) / 8;
-            if (p->len == 1) { /* FreeBSD workaround */
-                n = 1;
-            } else if (n > p->len) {
-                return USB_RET_BABBLE;
-            }
-            status = 0;
-            for(i = 0; i < s->nb_ports; i++) {
-                port = &s->ports[i];
-                if (port->wPortChange)
-                    status |= (1 << (i + 1));
-            }
-            if (status != 0) {
-                for(i = 0; i < n; i++) {
-                    p->data[i] = status >> (8 * i);
-                }
-                ret = n;
-            } else {
-                ret = USB_RET_NAK; /* usb11 11.13.1 */
-            }
-        } else {
-            goto fail;
-        }
-        break;
-    case USB_TOKEN_OUT:
-    default:
-    fail:
-        ret = USB_RET_STALL;
-        break;
-    }
-    return ret;
-}
-
-static int usb_hub_broadcast_packet(USBHubState *s, USBPacket *p)
-{
-    USBHubPort *port;
-    USBDevice *dev;
-    int i, ret;
-
-    for(i = 0; i < s->nb_ports; i++) {
-        port = &s->ports[i];
-        dev = port->port.dev;
-        if (dev && (port->wPortStatus & PORT_STAT_ENABLE)) {
-            ret = dev->handle_packet(dev, p);
-            if (ret != USB_RET_NODEV) {
-                return ret;
-            }
-        }
-    }
-    return USB_RET_NODEV;
-}
-
-static int usb_hub_handle_packet(USBDevice *dev, USBPacket *p)
-{
-    USBHubState *s = (USBHubState *)dev;
-
-#if defined(DEBUG) && 0
-    printf("usb_hub: pid=0x%x\n", pid);
-#endif
-    if (dev->state == USB_STATE_DEFAULT &&
-        dev->addr != 0 &&
-        p->devaddr != dev->addr &&
-        (p->pid == USB_TOKEN_SETUP ||
-         p->pid == USB_TOKEN_OUT ||
-         p->pid == USB_TOKEN_IN)) {
-        /* broadcast the packet to the devices */
-        return usb_hub_broadcast_packet(s, p);
-    }
-    return usb_generic_handle_packet(dev, p);
-}
-
-static void usb_hub_handle_destroy(USBDevice *dev)
-{
-    USBHubState *s = (USBHubState *)dev;
-
-    g_free(s);
-}
-
-USBDevice *usb_hub_init(int nb_ports)
-{
-    USBHubState *s;
-    USBHubPort *port;
-    int i;
-
-    if (nb_ports > MAX_PORTS)
-        return NULL;
-    s = g_malloc0(sizeof(USBHubState));
-    s->dev.speed = USB_SPEED_FULL;
-    s->dev.handle_packet = usb_hub_handle_packet;
-
-    /* generic USB device init */
-    s->dev.handle_reset = usb_hub_handle_reset;
-    s->dev.handle_control = usb_hub_handle_control;
-    s->dev.handle_data = usb_hub_handle_data;
-    s->dev.handle_destroy = usb_hub_handle_destroy;
-
-    pstrcpy(s->dev.devname, sizeof(s->dev.devname), "QEMU USB Hub");
-
-    s->nb_ports = nb_ports;
-    for(i = 0; i < s->nb_ports; i++) {
-        port = &s->ports[i];
-        qemu_register_usb_port(&port->port, s, i, usb_hub_attach);
-        port->wPortStatus = PORT_STAT_POWER;
-        port->wPortChange = 0;
-    }
-    return (USBDevice *)s;
-}
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
deleted file mode 100644
index 24d978c..0000000
--- a/hw/usb/dev-storage.c
+++ /dev/null
@@ -1,582 +0,0 @@
-/*
- * USB Mass Storage Device emulation
- *
- * Copyright (c) 2006 CodeSourcery.
- * Written by Paul Brook
- *
- * This code is licenced under the LGPL.
- */
-
-#include "qemu-common.h"
-#include "hw/usb.h"
-#include "block/block.h"
-#include "hw/scsi/scsi-disk.h"
-#include "ui/console.h"
-
-//#define DEBUG_MSD
-
-#ifdef DEBUG_MSD
-#define DPRINTF(fmt, ...) \
-do { printf("usb-msd: " fmt , ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) do {} while(0)
-#endif
-
-/* USB requests.  */
-#define MassStorageReset  0xff
-#define GetMaxLun         0xfe
-
-enum USBMSDMode {
-    USB_MSDM_CBW, /* Command Block.  */
-    USB_MSDM_DATAOUT, /* Tranfer data to device.  */
-    USB_MSDM_DATAIN, /* Transfer data from device.  */
-    USB_MSDM_CSW /* Command Status.  */
-};
-
-typedef struct {
-    USBDevice dev;
-    enum USBMSDMode mode;
-    uint32_t scsi_len;
-    uint8_t *scsi_buf;
-    uint32_t usb_len;
-    uint8_t *usb_buf;
-    uint32_t data_len;
-    uint32_t residue;
-    uint32_t tag;
-    BlockDriverState *bs;
-    SCSIDevice *scsi_dev;
-    int result;
-    /* For async completion.  */
-    USBPacket *packet;
-} MSDState;
-
-struct usb_msd_cbw {
-    uint32_t sig;
-    uint32_t tag;
-    uint32_t data_len;
-    uint8_t flags;
-    uint8_t lun;
-    uint8_t cmd_len;
-    uint8_t cmd[16];
-};
-
-struct usb_msd_csw {
-    uint32_t sig;
-    uint32_t tag;
-    uint32_t residue;
-    uint8_t status;
-};
-
-static const uint8_t qemu_msd_dev_descriptor[] = {
-	0x12,       /*  u8 bLength; */
-	0x01,       /*  u8 bDescriptorType; Device */
-	0x00, 0x01, /*  u16 bcdUSB; v1.0 */
-
-	0x00,	    /*  u8  bDeviceClass; */
-	0x00,	    /*  u8  bDeviceSubClass; */
-	0x00,       /*  u8  bDeviceProtocol; [ low/full speeds only ] */
-	0x08,       /*  u8  bMaxPacketSize0; 8 Bytes */
-
-        /* Vendor and product id are arbitrary.  */
-	0x00, 0x00, /*  u16 idVendor; */
- 	0x00, 0x00, /*  u16 idProduct; */
-	0x00, 0x00, /*  u16 bcdDevice */
-
-	0x01,       /*  u8  iManufacturer; */
-	0x02,       /*  u8  iProduct; */
-	0x03,       /*  u8  iSerialNumber; */
-	0x01        /*  u8  bNumConfigurations; */
-};
-
-static const uint8_t qemu_msd_config_descriptor[] = {
-
-	/* one configuration */
-	0x09,       /*  u8  bLength; */
-	0x02,       /*  u8  bDescriptorType; Configuration */
-	0x20, 0x00, /*  u16 wTotalLength; */
-	0x01,       /*  u8  bNumInterfaces; (1) */
-	0x01,       /*  u8  bConfigurationValue; */
-	0x00,       /*  u8  iConfiguration; */
-	0xc0,       /*  u8  bmAttributes;
-				 Bit 7: must be set,
-				     6: Self-powered,
-				     5: Remote wakeup,
-				     4..0: resvd */
-	0x00,       /*  u8  MaxPower; */
-
-	/* one interface */
-	0x09,       /*  u8  if_bLength; */
-	0x04,       /*  u8  if_bDescriptorType; Interface */
-	0x00,       /*  u8  if_bInterfaceNumber; */
-	0x00,       /*  u8  if_bAlternateSetting; */
-	0x02,       /*  u8  if_bNumEndpoints; */
-	0x08,       /*  u8  if_bInterfaceClass; MASS STORAGE */
-	0x06,       /*  u8  if_bInterfaceSubClass; SCSI */
-	0x50,       /*  u8  if_bInterfaceProtocol; Bulk Only */
-	0x00,       /*  u8  if_iInterface; */
-
-	/* Bulk-In endpoint */
-	0x07,       /*  u8  ep_bLength; */
-	0x05,       /*  u8  ep_bDescriptorType; Endpoint */
-	0x81,       /*  u8  ep_bEndpointAddress; IN Endpoint 1 */
- 	0x02,       /*  u8  ep_bmAttributes; Bulk */
- 	0x40, 0x00, /*  u16 ep_wMaxPacketSize; */
-	0x00,       /*  u8  ep_bInterval; */
-
-	/* Bulk-Out endpoint */
-	0x07,       /*  u8  ep_bLength; */
-	0x05,       /*  u8  ep_bDescriptorType; Endpoint */
-	0x02,       /*  u8  ep_bEndpointAddress; OUT Endpoint 2 */
- 	0x02,       /*  u8  ep_bmAttributes; Bulk */
- 	0x40, 0x00, /*  u16 ep_wMaxPacketSize; */
-	0x00        /*  u8  ep_bInterval; */
-};
-
-static void usb_msd_copy_data(MSDState *s)
-{
-    uint32_t len;
-    len = s->usb_len;
-    if (len > s->scsi_len)
-        len = s->scsi_len;
-    if (s->mode == USB_MSDM_DATAIN) {
-        memcpy(s->usb_buf, s->scsi_buf, len);
-    } else {
-        memcpy(s->scsi_buf, s->usb_buf, len);
-    }
-    s->usb_len -= len;
-    s->scsi_len -= len;
-    s->usb_buf += len;
-    s->scsi_buf += len;
-    s->data_len -= len;
-    if (s->scsi_len == 0) {
-        if (s->mode == USB_MSDM_DATAIN) {
-            s->scsi_dev->read_data(s->scsi_dev, s->tag);
-        } else if (s->mode == USB_MSDM_DATAOUT) {
-            s->scsi_dev->write_data(s->scsi_dev, s->tag);
-        }
-    }
-}
-
-static void usb_msd_send_status(MSDState *s)
-{
-    struct usb_msd_csw csw;
-
-    csw.sig = cpu_to_le32(0x53425355);
-    csw.tag = cpu_to_le32(s->tag);
-    csw.residue = s->residue;
-    csw.status = s->result;
-    memcpy(s->usb_buf, &csw, 13);
-}
-
-static void usb_msd_command_complete(void *opaque, int reason, uint32_t tag,
-                                     uint32_t arg)
-{
-    MSDState *s = (MSDState *)opaque;
-    USBPacket *p = s->packet;
-
-    if (tag != s->tag) {
-        fprintf(stderr, "usb-msd: Unexpected SCSI Tag 0x%x\n", tag);
-    }
-    if (reason == SCSI_REASON_DONE) {
-        DPRINTF("Command complete %d\n", arg);
-        s->residue = s->data_len;
-        s->result = arg != 0;
-        if (s->packet) {
-            if (s->data_len == 0 && s->mode == USB_MSDM_DATAOUT) {
-                /* A deferred packet with no write data remaining must be
-                   the status read packet.  */
-                usb_msd_send_status(s);
-                s->mode = USB_MSDM_CBW;
-            } else {
-                if (s->data_len) {
-                    s->data_len -= s->usb_len;
-                    if (s->mode == USB_MSDM_DATAIN)
-                        memset(s->usb_buf, 0, s->usb_len);
-                    s->usb_len = 0;
-                }
-                if (s->data_len == 0)
-                    s->mode = USB_MSDM_CSW;
-            }
-            s->packet = NULL;
-            usb_packet_complete(p);
-        } else if (s->data_len == 0) {
-            s->mode = USB_MSDM_CSW;
-        }
-        return;
-    }
-    s->scsi_len = arg;
-    s->scsi_buf = s->scsi_dev->get_buf(s->scsi_dev, tag);
-    if (p) {
-        usb_msd_copy_data(s);
-        if (s->usb_len == 0) {
-            /* Set s->packet to NULL before calling usb_packet_complete
-               because annother request may be issued before
-               usb_packet_complete returns.  */
-            DPRINTF("Packet complete %p\n", p);
-            s->packet = NULL;
-            usb_packet_complete(p);
-        }
-    }
-}
-
-static void usb_msd_handle_reset(USBDevice *dev)
-{
-    MSDState *s = (MSDState *)dev;
-
-    DPRINTF("Reset\n");
-    s->mode = USB_MSDM_CBW;
-}
-
-static int usb_msd_handle_control(USBDevice *dev, int request, int value,
-                                  int index, int length, uint8_t *data)
-{
-    MSDState *s = (MSDState *)dev;
-    int ret = 0;
-
-    switch (request) {
-    case DeviceRequest | USB_REQ_GET_STATUS:
-        data[0] = (1 << USB_DEVICE_SELF_POWERED) |
-            (dev->remote_wakeup << USB_DEVICE_REMOTE_WAKEUP);
-        data[1] = 0x00;
-        ret = 2;
-        break;
-    case DeviceOutRequest | USB_REQ_CLEAR_FEATURE:
-        if (value == USB_DEVICE_REMOTE_WAKEUP) {
-            dev->remote_wakeup = 0;
-        } else {
-            goto fail;
-        }
-        ret = 0;
-        break;
-    case DeviceOutRequest | USB_REQ_SET_FEATURE:
-        if (value == USB_DEVICE_REMOTE_WAKEUP) {
-            dev->remote_wakeup = 1;
-        } else {
-            goto fail;
-        }
-        ret = 0;
-        break;
-    case DeviceOutRequest | USB_REQ_SET_ADDRESS:
-        dev->addr = value;
-        ret = 0;
-        break;
-    case DeviceRequest | USB_REQ_GET_DESCRIPTOR:
-        switch(value >> 8) {
-        case USB_DT_DEVICE:
-            memcpy(data, qemu_msd_dev_descriptor,
-                   sizeof(qemu_msd_dev_descriptor));
-            ret = sizeof(qemu_msd_dev_descriptor);
-            break;
-        case USB_DT_CONFIG:
-            memcpy(data, qemu_msd_config_descriptor,
-                   sizeof(qemu_msd_config_descriptor));
-            ret = sizeof(qemu_msd_config_descriptor);
-            break;
-        case USB_DT_STRING:
-            switch(value & 0xff) {
-            case 0:
-                /* language ids */
-                data[0] = 4;
-                data[1] = 3;
-                data[2] = 0x09;
-                data[3] = 0x04;
-                ret = 4;
-                break;
-            case 1:
-                /* vendor description */
-                ret = set_usb_string(data, "QEMU " QEMU_VERSION);
-                break;
-            case 2:
-                /* product description */
-                ret = set_usb_string(data, "QEMU USB HARDDRIVE");
-                break;
-            case 3:
-                /* serial number */
-                ret = set_usb_string(data, "1");
-                break;
-            default:
-                goto fail;
-            }
-            break;
-        default:
-            goto fail;
-        }
-        break;
-    case DeviceRequest | USB_REQ_GET_CONFIGURATION:
-        data[0] = 1;
-        ret = 1;
-        break;
-    case DeviceOutRequest | USB_REQ_SET_CONFIGURATION:
-        ret = 0;
-        break;
-    case DeviceRequest | USB_REQ_GET_INTERFACE:
-        data[0] = 0;
-        ret = 1;
-        break;
-    case DeviceOutRequest | USB_REQ_SET_INTERFACE:
-        ret = 0;
-        break;
-    case EndpointOutRequest | USB_REQ_CLEAR_FEATURE:
-        if (value == 0 && index != 0x81) { /* clear ep halt */
-            goto fail;
-        }
-        ret = 0;
-        break;
-        /* Class specific requests.  */
-    case MassStorageReset:
-        /* Reset state ready for the next CBW.  */
-        s->mode = USB_MSDM_CBW;
-        ret = 0;
-        break;
-    case GetMaxLun:
-        data[0] = 0;
-        ret = 1;
-        break;
-    default:
-    fail:
-        ret = USB_RET_STALL;
-        break;
-    }
-    return ret;
-}
-
-static void usb_msd_cancel_io(USBPacket *p, void *opaque)
-{
-    MSDState *s = opaque;
-    s->scsi_dev->cancel_io(s->scsi_dev, s->tag);
-    s->packet = NULL;
-    s->scsi_len = 0;
-}
-
-static int usb_msd_handle_data(USBDevice *dev, USBPacket *p)
-{
-    MSDState *s = (MSDState *)dev;
-    int ret = 0;
-    struct usb_msd_cbw cbw;
-    uint8_t devep = p->devep;
-    uint8_t *data = p->data;
-    int len = p->len;
-
-    switch (p->pid) {
-    case USB_TOKEN_OUT:
-        if (devep != 2)
-            goto fail;
-
-        switch (s->mode) {
-        case USB_MSDM_CBW:
-            if (len != 31) {
-                fprintf(stderr, "usb-msd: Bad CBW size");
-                goto fail;
-            }
-            memcpy(&cbw, data, 31);
-            if (le32_to_cpu(cbw.sig) != 0x43425355) {
-                fprintf(stderr, "usb-msd: Bad signature %08x\n",
-                        le32_to_cpu(cbw.sig));
-                goto fail;
-            }
-            DPRINTF("Command on LUN %d\n", cbw.lun);
-            if (cbw.lun != 0) {
-                fprintf(stderr, "usb-msd: Bad LUN %d\n", cbw.lun);
-                goto fail;
-            }
-            s->tag = le32_to_cpu(cbw.tag);
-            s->data_len = le32_to_cpu(cbw.data_len);
-            if (s->data_len == 0) {
-                s->mode = USB_MSDM_CSW;
-            } else if (cbw.flags & 0x80) {
-                s->mode = USB_MSDM_DATAIN;
-            } else {
-                s->mode = USB_MSDM_DATAOUT;
-            }
-            DPRINTF("Command tag 0x%x flags %08x len %d data %d\n",
-                    s->tag, cbw.flags, cbw.cmd_len, s->data_len);
-            s->residue = 0;
-            s->scsi_dev->send_command(s->scsi_dev, s->tag, cbw.cmd, 0);
-            /* ??? Should check that USB and SCSI data transfer
-               directions match.  */
-            if (s->residue == 0) {
-                if (s->mode == USB_MSDM_DATAIN) {
-                    s->scsi_dev->read_data(s->scsi_dev, s->tag);
-                } else if (s->mode == USB_MSDM_DATAOUT) {
-                    s->scsi_dev->write_data(s->scsi_dev, s->tag);
-                }
-            }
-            ret = len;
-            break;
-
-        case USB_MSDM_DATAOUT:
-            DPRINTF("Data out %d/%d\n", len, s->data_len);
-            if (len > s->data_len)
-                goto fail;
-
-            s->usb_buf = data;
-            s->usb_len = len;
-            if (s->scsi_len) {
-                usb_msd_copy_data(s);
-            }
-            if (s->residue && s->usb_len) {
-                s->data_len -= s->usb_len;
-                if (s->data_len == 0)
-                    s->mode = USB_MSDM_CSW;
-                s->usb_len = 0;
-            }
-            if (s->usb_len) {
-                DPRINTF("Deferring packet %p\n", p);
-                usb_defer_packet(p, usb_msd_cancel_io, s);
-                s->packet = p;
-                ret = USB_RET_ASYNC;
-            } else {
-                ret = len;
-            }
-            break;
-
-        default:
-            DPRINTF("Unexpected write (len %d)\n", len);
-            goto fail;
-        }
-        break;
-
-    case USB_TOKEN_IN:
-        if (devep != 1)
-            goto fail;
-
-        switch (s->mode) {
-        case USB_MSDM_DATAOUT:
-            if (s->data_len != 0 || len < 13)
-                goto fail;
-            /* Waiting for SCSI write to complete.  */
-            usb_defer_packet(p, usb_msd_cancel_io, s);
-            s->packet = p;
-            ret = USB_RET_ASYNC;
-            break;
-
-        case USB_MSDM_CSW:
-            DPRINTF("Command status %d tag 0x%x, len %d\n",
-                    s->result, s->tag, len);
-            if (len < 13)
-                goto fail;
-
-            s->usb_len = len;
-            s->usb_buf = data;
-            usb_msd_send_status(s);
-            s->mode = USB_MSDM_CBW;
-            ret = 13;
-            break;
-
-        case USB_MSDM_DATAIN:
-            DPRINTF("Data in %d/%d\n", len, s->data_len);
-            if (len > s->data_len)
-                len = s->data_len;
-            s->usb_buf = data;
-            s->usb_len = len;
-            if (s->scsi_len) {
-                usb_msd_copy_data(s);
-            }
-            if (s->residue && s->usb_len) {
-                s->data_len -= s->usb_len;
-                memset(s->usb_buf, 0, s->usb_len);
-                if (s->data_len == 0)
-                    s->mode = USB_MSDM_CSW;
-                s->usb_len = 0;
-            }
-            if (s->usb_len) {
-                DPRINTF("Deferring packet %p\n", p);
-                usb_defer_packet(p, usb_msd_cancel_io, s);
-                s->packet = p;
-                ret = USB_RET_ASYNC;
-            } else {
-                ret = len;
-            }
-            break;
-
-        default:
-            DPRINTF("Unexpected read (len %d)\n", len);
-            goto fail;
-        }
-        break;
-
-    default:
-        DPRINTF("Bad token\n");
-    fail:
-        ret = USB_RET_STALL;
-        break;
-    }
-
-    return ret;
-}
-
-static void usb_msd_handle_destroy(USBDevice *dev)
-{
-    MSDState *s = (MSDState *)dev;
-
-    s->scsi_dev->destroy(s->scsi_dev);
-    bdrv_delete(s->bs);
-    g_free(s);
-}
-
-USBDevice *usb_msd_init(const char *filename)
-{
-    MSDState *s;
-    BlockDriverState *bdrv;
-    BlockDriver *drv = NULL;
-    const char *p1;
-    char fmt[32];
-
-    p1 = strchr(filename, ':');
-    if (p1++) {
-        const char *p2;
-
-        if (strstart(filename, "format=", &p2)) {
-            int len = MIN(p1 - p2, sizeof(fmt));
-            pstrcpy(fmt, len, p2);
-
-            drv = bdrv_find_format(fmt);
-            if (!drv) {
-                printf("invalid format %s\n", fmt);
-                return NULL;
-            }
-        } else if (*filename != ':') {
-            printf("unrecognized USB mass-storage option %s\n", filename);
-            return NULL;
-        }
-
-        filename = p1;
-    }
-
-    if (!*filename) {
-        printf("block device specification needed\n");
-        return NULL;
-    }
-
-    s = g_malloc0(sizeof(MSDState));
-
-    bdrv = bdrv_new("usb");
-    if (bdrv_open(bdrv, filename, 0, drv) < 0)
-        goto fail;
-    s->bs = bdrv;
-
-    s->dev.speed = USB_SPEED_FULL;
-    s->dev.handle_packet = usb_generic_handle_packet;
-
-    s->dev.handle_reset = usb_msd_handle_reset;
-    s->dev.handle_control = usb_msd_handle_control;
-    s->dev.handle_data = usb_msd_handle_data;
-    s->dev.handle_destroy = usb_msd_handle_destroy;
-
-    snprintf(s->dev.devname, sizeof(s->dev.devname), "QEMU USB MSD(%.16s)",
-             filename);
-
-    s->scsi_dev = scsi_disk_init(bdrv, 0, usb_msd_command_complete, s);
-    usb_msd_handle_reset((USBDevice *)s);
-    return (USBDevice *)s;
- fail:
-    g_free(s);
-    return NULL;
-}
-
-BlockDriverState *usb_msd_get_bdrv(USBDevice *dev)
-{
-    MSDState *s = (MSDState *)dev;
-
-    return s->bs;
-}
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
deleted file mode 100644
index 5ed63d5..0000000
--- a/hw/usb/hcd-ohci.c
+++ /dev/null
@@ -1,1759 +0,0 @@
-/*
- * QEMU USB OHCI Emulation
- * Copyright (c) 2004 Gianni Tedesco
- * Copyright (c) 2006 CodeSourcery
- * Copyright (c) 2006 Openedhand Ltd.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- * TODO:
- *  o Isochronous transfers
- *  o Allocate bandwidth in frames properly
- *  o Disable timers when nothing needs to be done, or remove timer usage
- *    all together.
- *  o Handle unrecoverable errors properly
- *  o BIOS work to boot from USB storage
-*/
-
-#include "hw/hw.h"
-#include "qemu/timer.h"
-#include "hw/usb.h"
-#include "hw/pci/pci.h"
-#include "hw/arm/pxa.h"
-#include "hw/devices.h"
-
-//#define DEBUG_OHCI
-/* Dump packet contents.  */
-//#define DEBUG_PACKET
-//#define DEBUG_ISOCH
-/* This causes frames to occur 1000x slower */
-//#define OHCI_TIME_WARP 1
-
-#ifdef DEBUG_OHCI
-#define dprintf printf
-#else
-#define dprintf(...)
-#endif
-
-/* Number of Downstream Ports on the root hub.  */
-
-#define OHCI_MAX_PORTS 15
-
-static int64_t usb_frame_time;
-static int64_t usb_bit_time;
-
-typedef struct OHCIPort {
-    USBPort port;
-    uint32_t ctrl;
-} OHCIPort;
-
-enum ohci_type {
-    OHCI_TYPE_PCI,
-    OHCI_TYPE_PXA,
-    OHCI_TYPE_SM501,
-};
-
-typedef struct {
-    qemu_irq irq;
-    enum ohci_type type;
-    int mem;
-    int num_ports;
-    const char *name;
-
-    QEMUTimer *eof_timer;
-    int64_t sof_time;
-
-    /* OHCI state */
-    /* Control partition */
-    uint32_t ctl, status;
-    uint32_t intr_status;
-    uint32_t intr;
-
-    /* memory pointer partition */
-    uint32_t hcca;
-    uint32_t ctrl_head, ctrl_cur;
-    uint32_t bulk_head, bulk_cur;
-    uint32_t per_cur;
-    uint32_t done;
-    int done_count;
-
-    /* Frame counter partition */
-    uint32_t fsmps:15;
-    uint32_t fit:1;
-    uint32_t fi:14;
-    uint32_t frt:1;
-    uint16_t frame_number;
-    uint16_t padding;
-    uint32_t pstart;
-    uint32_t lst;
-
-    /* Root Hub partition */
-    uint32_t rhdesc_a, rhdesc_b;
-    uint32_t rhstatus;
-    OHCIPort rhport[OHCI_MAX_PORTS];
-
-    /* PXA27x Non-OHCI events */
-    uint32_t hstatus;
-    uint32_t hmask;
-    uint32_t hreset;
-    uint32_t htest;
-
-    /* SM501 local memory offset */
-    hwaddr localmem_base;
-
-    /* Active packets.  */
-    uint32_t old_ctl;
-    USBPacket usb_packet;
-    uint8_t usb_buf[8192];
-    uint32_t async_td;
-    int async_complete;
-
-} OHCIState;
-
-/* Host Controller Communications Area */
-struct ohci_hcca {
-    uint32_t intr[32];
-    uint16_t frame, pad;
-    uint32_t done;
-};
-
-static void ohci_bus_stop(OHCIState *ohci);
-
-/* Bitfields for the first word of an Endpoint Desciptor.  */
-#define OHCI_ED_FA_SHIFT  0
-#define OHCI_ED_FA_MASK   (0x7f<<OHCI_ED_FA_SHIFT)
-#define OHCI_ED_EN_SHIFT  7
-#define OHCI_ED_EN_MASK   (0xf<<OHCI_ED_EN_SHIFT)
-#define OHCI_ED_D_SHIFT   11
-#define OHCI_ED_D_MASK    (3<<OHCI_ED_D_SHIFT)
-#define OHCI_ED_S         (1<<13)
-#define OHCI_ED_K         (1<<14)
-#define OHCI_ED_F         (1<<15)
-#define OHCI_ED_MPS_SHIFT 16
-#define OHCI_ED_MPS_MASK  (0x7ff<<OHCI_ED_MPS_SHIFT)
-
-/* Flags in the head field of an Endpoint Desciptor.  */
-#define OHCI_ED_H         1
-#define OHCI_ED_C         2
-
-/* Bitfields for the first word of a Transfer Desciptor.  */
-#define OHCI_TD_R         (1<<18)
-#define OHCI_TD_DP_SHIFT  19
-#define OHCI_TD_DP_MASK   (3<<OHCI_TD_DP_SHIFT)
-#define OHCI_TD_DI_SHIFT  21
-#define OHCI_TD_DI_MASK   (7<<OHCI_TD_DI_SHIFT)
-#define OHCI_TD_T0        (1<<24)
-#define OHCI_TD_T1        (1<<24)
-#define OHCI_TD_EC_SHIFT  26
-#define OHCI_TD_EC_MASK   (3<<OHCI_TD_EC_SHIFT)
-#define OHCI_TD_CC_SHIFT  28
-#define OHCI_TD_CC_MASK   (0xf<<OHCI_TD_CC_SHIFT)
-
-/* Bitfields for the first word of an Isochronous Transfer Desciptor.  */
-/* CC & DI - same as in the General Transfer Desciptor */
-#define OHCI_TD_SF_SHIFT  0
-#define OHCI_TD_SF_MASK   (0xffff<<OHCI_TD_SF_SHIFT)
-#define OHCI_TD_FC_SHIFT  24
-#define OHCI_TD_FC_MASK   (7<<OHCI_TD_FC_SHIFT)
-
-/* Isochronous Transfer Desciptor - Offset / PacketStatusWord */
-#define OHCI_TD_PSW_CC_SHIFT 12
-#define OHCI_TD_PSW_CC_MASK  (0xf<<OHCI_TD_PSW_CC_SHIFT)
-#define OHCI_TD_PSW_SIZE_SHIFT 0
-#define OHCI_TD_PSW_SIZE_MASK  (0xfff<<OHCI_TD_PSW_SIZE_SHIFT)
-
-#define OHCI_PAGE_MASK    0xfffff000
-#define OHCI_OFFSET_MASK  0xfff
-
-#define OHCI_DPTR_MASK    0xfffffff0
-
-#define OHCI_BM(val, field) \
-  (((val) & OHCI_##field##_MASK) >> OHCI_##field##_SHIFT)
-
-#define OHCI_SET_BM(val, field, newval) do { \
-    val &= ~OHCI_##field##_MASK; \
-    val |= ((newval) << OHCI_##field##_SHIFT) & OHCI_##field##_MASK; \
-    } while(0)
-
-/* endpoint descriptor */
-struct ohci_ed {
-    uint32_t flags;
-    uint32_t tail;
-    uint32_t head;
-    uint32_t next;
-};
-
-/* General transfer descriptor */
-struct ohci_td {
-    uint32_t flags;
-    uint32_t cbp;
-    uint32_t next;
-    uint32_t be;
-};
-
-/* Isochronous transfer descriptor */
-struct ohci_iso_td {
-    uint32_t flags;
-    uint32_t bp;
-    uint32_t next;
-    uint32_t be;
-    uint16_t offset[8];
-};
-
-#define USB_HZ                      12000000
-
-/* OHCI Local stuff */
-#define OHCI_CTL_CBSR         ((1<<0)|(1<<1))
-#define OHCI_CTL_PLE          (1<<2)
-#define OHCI_CTL_IE           (1<<3)
-#define OHCI_CTL_CLE          (1<<4)
-#define OHCI_CTL_BLE          (1<<5)
-#define OHCI_CTL_HCFS         ((1<<6)|(1<<7))
-#define  OHCI_USB_RESET       0x00
-#define  OHCI_USB_RESUME      0x40
-#define  OHCI_USB_OPERATIONAL 0x80
-#define  OHCI_USB_SUSPEND     0xc0
-#define OHCI_CTL_IR           (1<<8)
-#define OHCI_CTL_RWC          (1<<9)
-#define OHCI_CTL_RWE          (1<<10)
-
-#define OHCI_STATUS_HCR       (1<<0)
-#define OHCI_STATUS_CLF       (1<<1)
-#define OHCI_STATUS_BLF       (1<<2)
-#define OHCI_STATUS_OCR       (1<<3)
-#define OHCI_STATUS_SOC       ((1<<6)|(1<<7))
-
-#define OHCI_INTR_SO          (1<<0) /* Scheduling overrun */
-#define OHCI_INTR_WD          (1<<1) /* HcDoneHead writeback */
-#define OHCI_INTR_SF          (1<<2) /* Start of frame */
-#define OHCI_INTR_RD          (1<<3) /* Resume detect */
-#define OHCI_INTR_UE          (1<<4) /* Unrecoverable error */
-#define OHCI_INTR_FNO         (1<<5) /* Frame number overflow */
-#define OHCI_INTR_RHSC        (1<<6) /* Root hub status change */
-#define OHCI_INTR_OC          (1<<30) /* Ownership change */
-#define OHCI_INTR_MIE         (1<<31) /* Master Interrupt Enable */
-
-#define OHCI_HCCA_SIZE        0x100
-#define OHCI_HCCA_MASK        0xffffff00
-
-#define OHCI_EDPTR_MASK       0xfffffff0
-
-#define OHCI_FMI_FI           0x00003fff
-#define OHCI_FMI_FSMPS        0xffff0000
-#define OHCI_FMI_FIT          0x80000000
-
-#define OHCI_FR_RT            (1<<31)
-
-#define OHCI_LS_THRESH        0x628
-
-#define OHCI_RHA_RW_MASK      0x00000000 /* Mask of supported features.  */
-#define OHCI_RHA_PSM          (1<<8)
-#define OHCI_RHA_NPS          (1<<9)
-#define OHCI_RHA_DT           (1<<10)
-#define OHCI_RHA_OCPM         (1<<11)
-#define OHCI_RHA_NOCP         (1<<12)
-#define OHCI_RHA_POTPGT_MASK  0xff000000
-
-#define OHCI_RHS_LPS          (1<<0)
-#define OHCI_RHS_OCI          (1<<1)
-#define OHCI_RHS_DRWE         (1<<15)
-#define OHCI_RHS_LPSC         (1<<16)
-#define OHCI_RHS_OCIC         (1<<17)
-#define OHCI_RHS_CRWE         (1<<31)
-
-#define OHCI_PORT_CCS         (1<<0)
-#define OHCI_PORT_PES         (1<<1)
-#define OHCI_PORT_PSS         (1<<2)
-#define OHCI_PORT_POCI        (1<<3)
-#define OHCI_PORT_PRS         (1<<4)
-#define OHCI_PORT_PPS         (1<<8)
-#define OHCI_PORT_LSDA        (1<<9)
-#define OHCI_PORT_CSC         (1<<16)
-#define OHCI_PORT_PESC        (1<<17)
-#define OHCI_PORT_PSSC        (1<<18)
-#define OHCI_PORT_OCIC        (1<<19)
-#define OHCI_PORT_PRSC        (1<<20)
-#define OHCI_PORT_WTC         (OHCI_PORT_CSC|OHCI_PORT_PESC|OHCI_PORT_PSSC \
-                               |OHCI_PORT_OCIC|OHCI_PORT_PRSC)
-
-#define OHCI_TD_DIR_SETUP     0x0
-#define OHCI_TD_DIR_OUT       0x1
-#define OHCI_TD_DIR_IN        0x2
-#define OHCI_TD_DIR_RESERVED  0x3
-
-#define OHCI_CC_NOERROR             0x0
-#define OHCI_CC_CRC                 0x1
-#define OHCI_CC_BITSTUFFING         0x2
-#define OHCI_CC_DATATOGGLEMISMATCH  0x3
-#define OHCI_CC_STALL               0x4
-#define OHCI_CC_DEVICENOTRESPONDING 0x5
-#define OHCI_CC_PIDCHECKFAILURE     0x6
-#define OHCI_CC_UNDEXPETEDPID       0x7
-#define OHCI_CC_DATAOVERRUN         0x8
-#define OHCI_CC_DATAUNDERRUN        0x9
-#define OHCI_CC_BUFFEROVERRUN       0xc
-#define OHCI_CC_BUFFERUNDERRUN      0xd
-
-#define OHCI_HRESET_FSBIR       (1 << 0)
-
-/* Update IRQ levels */
-static inline void ohci_intr_update(OHCIState *ohci)
-{
-    int level = 0;
-
-    if ((ohci->intr & OHCI_INTR_MIE) &&
-        (ohci->intr_status & ohci->intr))
-        level = 1;
-
-    qemu_set_irq(ohci->irq, level);
-}
-
-/* Set an interrupt */
-static inline void ohci_set_interrupt(OHCIState *ohci, uint32_t intr)
-{
-    ohci->intr_status |= intr;
-    ohci_intr_update(ohci);
-}
-
-/* Attach or detach a device on a root hub port.  */
-static void ohci_attach(USBPort *port1, USBDevice *dev)
-{
-    OHCIState *s = port1->opaque;
-    OHCIPort *port = &s->rhport[port1->index];
-    uint32_t old_state = port->ctrl;
-
-    if (dev) {
-        if (port->port.dev) {
-            usb_attach(port1, NULL);
-        }
-        /* set connect status */
-        port->ctrl |= OHCI_PORT_CCS | OHCI_PORT_CSC;
-
-        /* update speed */
-        if (dev->speed == USB_SPEED_LOW)
-            port->ctrl |= OHCI_PORT_LSDA;
-        else
-            port->ctrl &= ~OHCI_PORT_LSDA;
-        port->port.dev = dev;
-
-        /* notify of remote-wakeup */
-        if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND)
-            ohci_set_interrupt(s, OHCI_INTR_RD);
-
-        /* send the attach message */
-        usb_send_msg(dev, USB_MSG_ATTACH);
-        dprintf("usb-ohci: Attached port %d\n", port1->index);
-    } else {
-        /* set connect status */
-        if (port->ctrl & OHCI_PORT_CCS) {
-            port->ctrl &= ~OHCI_PORT_CCS;
-            port->ctrl |= OHCI_PORT_CSC;
-        }
-        /* disable port */
-        if (port->ctrl & OHCI_PORT_PES) {
-            port->ctrl &= ~OHCI_PORT_PES;
-            port->ctrl |= OHCI_PORT_PESC;
-        }
-        dev = port->port.dev;
-        if (dev) {
-            /* send the detach message */
-            usb_send_msg(dev, USB_MSG_DETACH);
-        }
-        port->port.dev = NULL;
-        dprintf("usb-ohci: Detached port %d\n", port1->index);
-    }
-
-    if (old_state != port->ctrl)
-        ohci_set_interrupt(s, OHCI_INTR_RHSC);
-}
-
-/* Reset the controller */
-static void ohci_reset(void *opaque)
-{
-    OHCIState *ohci = opaque;
-    OHCIPort *port;
-    int i;
-
-    ohci_bus_stop(ohci);
-    ohci->ctl = 0;
-    ohci->old_ctl = 0;
-    ohci->status = 0;
-    ohci->intr_status = 0;
-    ohci->intr = OHCI_INTR_MIE;
-
-    ohci->hcca = 0;
-    ohci->ctrl_head = ohci->ctrl_cur = 0;
-    ohci->bulk_head = ohci->bulk_cur = 0;
-    ohci->per_cur = 0;
-    ohci->done = 0;
-    ohci->done_count = 7;
-
-    /* FSMPS is marked TBD in OCHI 1.0, what gives ffs?
-     * I took the value linux sets ...
-     */
-    ohci->fsmps = 0x2778;
-    ohci->fi = 0x2edf;
-    ohci->fit = 0;
-    ohci->frt = 0;
-    ohci->frame_number = 0;
-    ohci->pstart = 0;
-    ohci->lst = OHCI_LS_THRESH;
-
-    ohci->rhdesc_a = OHCI_RHA_NPS | ohci->num_ports;
-    ohci->rhdesc_b = 0x0; /* Impl. specific */
-    ohci->rhstatus = 0;
-
-    for (i = 0; i < ohci->num_ports; i++)
-      {
-        port = &ohci->rhport[i];
-        port->ctrl = 0;
-        if (port->port.dev)
-            ohci_attach(&port->port, port->port.dev);
-      }
-    if (ohci->async_td) {
-        usb_cancel_packet(&ohci->usb_packet);
-        ohci->async_td = 0;
-    }
-    dprintf("usb-ohci: Reset %s\n", ohci->name);
-}
-
-/* Get an array of dwords from main memory */
-static inline int get_dwords(OHCIState *ohci,
-                             uint32_t addr, uint32_t *buf, int num)
-{
-    int i;
-
-    addr += ohci->localmem_base;
-
-    for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        cpu_physical_memory_rw(addr, (uint8_t *)buf, sizeof(*buf), 0);
-        *buf = le32_to_cpu(*buf);
-    }
-
-    return 1;
-}
-
-/* Put an array of dwords in to main memory */
-static inline int put_dwords(OHCIState *ohci,
-                             uint32_t addr, uint32_t *buf, int num)
-{
-    int i;
-
-    addr += ohci->localmem_base;
-
-    for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        uint32_t tmp = cpu_to_le32(*buf);
-        cpu_physical_memory_rw(addr, (uint8_t *)&tmp, sizeof(tmp), 1);
-    }
-
-    return 1;
-}
-
-/* Get an array of words from main memory */
-static inline int get_words(OHCIState *ohci,
-                            uint32_t addr, uint16_t *buf, int num)
-{
-    int i;
-
-    addr += ohci->localmem_base;
-
-    for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        cpu_physical_memory_rw(addr, (uint8_t *)buf, sizeof(*buf), 0);
-        *buf = le16_to_cpu(*buf);
-    }
-
-    return 1;
-}
-
-/* Put an array of words in to main memory */
-static inline int put_words(OHCIState *ohci,
-                            uint32_t addr, uint16_t *buf, int num)
-{
-    int i;
-
-    addr += ohci->localmem_base;
-
-    for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        uint16_t tmp = cpu_to_le16(*buf);
-        cpu_physical_memory_rw(addr, (uint8_t *)&tmp, sizeof(tmp), 1);
-    }
-
-    return 1;
-}
-
-static inline int ohci_read_ed(OHCIState *ohci,
-                               uint32_t addr, struct ohci_ed *ed)
-{
-    return get_dwords(ohci, addr, (uint32_t *)ed, sizeof(*ed) >> 2);
-}
-
-static inline int ohci_read_td(OHCIState *ohci,
-                               uint32_t addr, struct ohci_td *td)
-{
-    return get_dwords(ohci, addr, (uint32_t *)td, sizeof(*td) >> 2);
-}
-
-static inline int ohci_read_iso_td(OHCIState *ohci,
-                                   uint32_t addr, struct ohci_iso_td *td)
-{
-    return (get_dwords(ohci, addr, (uint32_t *)td, 4) &&
-            get_words(ohci, addr + 16, td->offset, 8));
-}
-
-static inline int ohci_read_hcca(OHCIState *ohci,
-                                 uint32_t addr, struct ohci_hcca *hcca)
-{
-    cpu_physical_memory_rw(addr + ohci->localmem_base,
-                           (uint8_t *)hcca, sizeof(*hcca), 0);
-    return 1;
-}
-
-static inline int ohci_put_ed(OHCIState *ohci,
-                              uint32_t addr, struct ohci_ed *ed)
-{
-    return put_dwords(ohci, addr, (uint32_t *)ed, sizeof(*ed) >> 2);
-}
-
-static inline int ohci_put_td(OHCIState *ohci,
-                              uint32_t addr, struct ohci_td *td)
-{
-    return put_dwords(ohci, addr, (uint32_t *)td, sizeof(*td) >> 2);
-}
-
-static inline int ohci_put_iso_td(OHCIState *ohci,
-                                  uint32_t addr, struct ohci_iso_td *td)
-{
-    return (put_dwords(ohci, addr, (uint32_t *)td, 4) &&
-            put_words(ohci, addr + 16, td->offset, 8));
-}
-
-static inline int ohci_put_hcca(OHCIState *ohci,
-                                uint32_t addr, struct ohci_hcca *hcca)
-{
-    cpu_physical_memory_rw(addr + ohci->localmem_base,
-                           (uint8_t *)hcca, sizeof(*hcca), 1);
-    return 1;
-}
-
-/* Read/Write the contents of a TD from/to main memory.  */
-static void ohci_copy_td(OHCIState *ohci, struct ohci_td *td,
-                         uint8_t *buf, int len, int write)
-{
-    uint32_t ptr;
-    uint32_t n;
-
-    ptr = td->cbp;
-    n = 0x1000 - (ptr & 0xfff);
-    if (n > len)
-        n = len;
-    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, n, write);
-    if (n == len)
-        return;
-    ptr = td->be & ~0xfffu;
-    buf += n;
-    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, len - n, write);
-}
-
-/* Read/Write the contents of an ISO TD from/to main memory.  */
-static void ohci_copy_iso_td(OHCIState *ohci,
-                             uint32_t start_addr, uint32_t end_addr,
-                             uint8_t *buf, int len, int write)
-{
-    uint32_t ptr;
-    uint32_t n;
-
-    ptr = start_addr;
-    n = 0x1000 - (ptr & 0xfff);
-    if (n > len)
-        n = len;
-    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, n, write);
-    if (n == len)
-        return;
-    ptr = end_addr & ~0xfffu;
-    buf += n;
-    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, len - n, write);
-}
-
-static void ohci_process_lists(OHCIState *ohci, int completion);
-
-static void ohci_async_complete_packet(USBPacket *packet, void *opaque)
-{
-    OHCIState *ohci = opaque;
-#ifdef DEBUG_PACKET
-    dprintf("Async packet complete\n");
-#endif
-    ohci->async_complete = 1;
-    ohci_process_lists(ohci, 1);
-}
-
-#define USUB(a, b) ((int16_t)((uint16_t)(a) - (uint16_t)(b)))
-
-static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
-                               int completion)
-{
-    int dir;
-    size_t len = 0;
-    const char * __attribute__((unused)) str = NULL;
-    int pid;
-    int ret;
-    int i;
-    USBDevice *dev;
-    struct ohci_iso_td iso_td;
-    uint32_t addr;
-    uint16_t starting_frame;
-    int16_t relative_frame_number;
-    int frame_count;
-    uint32_t start_offset, next_offset, end_offset = 0;
-    uint32_t start_addr, end_addr;
-
-    addr = ed->head & OHCI_DPTR_MASK;
-
-    if (!ohci_read_iso_td(ohci, addr, &iso_td)) {
-        printf("usb-ohci: ISO_TD read error at %x\n", addr);
-        return 0;
-    }
-
-    starting_frame = OHCI_BM(iso_td.flags, TD_SF);
-    frame_count = OHCI_BM(iso_td.flags, TD_FC);
-    relative_frame_number = USUB(ohci->frame_number, starting_frame);
-
-#ifdef DEBUG_ISOCH
-    printf("--- ISO_TD ED head 0x%.8x tailp 0x%.8x\n"
-           "0x%.8x 0x%.8x 0x%.8x 0x%.8x\n"
-           "0x%.8x 0x%.8x 0x%.8x 0x%.8x\n"
-           "0x%.8x 0x%.8x 0x%.8x 0x%.8x\n"
-           "frame_number 0x%.8x starting_frame 0x%.8x\n"
-           "frame_count  0x%.8x relative %d\n"
-           "di 0x%.8x cc 0x%.8x\n",
-           ed->head & OHCI_DPTR_MASK, ed->tail & OHCI_DPTR_MASK,
-           iso_td.flags, iso_td.bp, iso_td.next, iso_td.be,
-           iso_td.offset[0], iso_td.offset[1], iso_td.offset[2], iso_td.offset[3],
-           iso_td.offset[4], iso_td.offset[5], iso_td.offset[6], iso_td.offset[7],
-           ohci->frame_number, starting_frame,
-           frame_count, relative_frame_number,
-           OHCI_BM(iso_td.flags, TD_DI), OHCI_BM(iso_td.flags, TD_CC));
-#endif
-
-    if (relative_frame_number < 0) {
-        dprintf("usb-ohci: ISO_TD R=%d < 0\n", relative_frame_number);
-        return 1;
-    } else if (relative_frame_number > frame_count) {
-        /* ISO TD expired - retire the TD to the Done Queue and continue with
-           the next ISO TD of the same ED */
-        dprintf("usb-ohci: ISO_TD R=%d > FC=%d\n", relative_frame_number,
-               frame_count);
-        OHCI_SET_BM(iso_td.flags, TD_CC, OHCI_CC_DATAOVERRUN);
-        ed->head &= ~OHCI_DPTR_MASK;
-        ed->head |= (iso_td.next & OHCI_DPTR_MASK);
-        iso_td.next = ohci->done;
-        ohci->done = addr;
-        i = OHCI_BM(iso_td.flags, TD_DI);
-        if (i < ohci->done_count)
-            ohci->done_count = i;
-        ohci_put_iso_td(ohci, addr, &iso_td);
-        return 0;
-    }
-
-    dir = OHCI_BM(ed->flags, ED_D);
-    switch (dir) {
-    case OHCI_TD_DIR_IN:
-        str = "in";
-        pid = USB_TOKEN_IN;
-        break;
-    case OHCI_TD_DIR_OUT:
-        str = "out";
-        pid = USB_TOKEN_OUT;
-        break;
-    case OHCI_TD_DIR_SETUP:
-        str = "setup";
-        pid = USB_TOKEN_SETUP;
-        break;
-    default:
-        printf("usb-ohci: Bad direction %d\n", dir);
-        return 1;
-    }
-
-    if (!iso_td.bp || !iso_td.be) {
-        printf("usb-ohci: ISO_TD bp 0x%.8x be 0x%.8x\n", iso_td.bp, iso_td.be);
-        return 1;
-    }
-
-    start_offset = iso_td.offset[relative_frame_number];
-    next_offset = iso_td.offset[relative_frame_number + 1];
-
-    if (!(OHCI_BM(start_offset, TD_PSW_CC) & 0xe) ||
-        ((relative_frame_number < frame_count) &&
-         !(OHCI_BM(next_offset, TD_PSW_CC) & 0xe))) {
-        printf("usb-ohci: ISO_TD cc != not accessed 0x%.8x 0x%.8x\n",
-               start_offset, next_offset);
-        return 1;
-    }
-
-    if ((relative_frame_number < frame_count) && (start_offset > next_offset)) {
-        printf("usb-ohci: ISO_TD start_offset=0x%.8x > next_offset=0x%.8x\n",
-                start_offset, next_offset);
-        return 1;
-    }
-
-    if ((start_offset & 0x1000) == 0) {
-        start_addr = (iso_td.bp & OHCI_PAGE_MASK) |
-            (start_offset & OHCI_OFFSET_MASK);
-    } else {
-        start_addr = (iso_td.be & OHCI_PAGE_MASK) |
-            (start_offset & OHCI_OFFSET_MASK);
-    }
-
-    if (relative_frame_number < frame_count) {
-        end_offset = next_offset - 1;
-        if ((end_offset & 0x1000) == 0) {
-            end_addr = (iso_td.bp & OHCI_PAGE_MASK) |
-                (end_offset & OHCI_OFFSET_MASK);
-        } else {
-            end_addr = (iso_td.be & OHCI_PAGE_MASK) |
-                (end_offset & OHCI_OFFSET_MASK);
-        }
-    } else {
-        /* Last packet in the ISO TD */
-        end_addr = iso_td.be;
-    }
-
-    if ((start_addr & OHCI_PAGE_MASK) != (end_addr & OHCI_PAGE_MASK)) {
-        len = (end_addr & OHCI_OFFSET_MASK) + 0x1001
-            - (start_addr & OHCI_OFFSET_MASK);
-    } else {
-        len = end_addr - start_addr + 1;
-    }
-
-    if (len && dir != OHCI_TD_DIR_IN) {
-        ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, len, 0);
-    }
-
-    if (completion) {
-        ret = ohci->usb_packet.len;
-    } else {
-        ret = USB_RET_NODEV;
-        for (i = 0; i < ohci->num_ports; i++) {
-            dev = ohci->rhport[i].port.dev;
-            if ((ohci->rhport[i].ctrl & OHCI_PORT_PES) == 0)
-                continue;
-            ohci->usb_packet.pid = pid;
-            ohci->usb_packet.devaddr = OHCI_BM(ed->flags, ED_FA);
-            ohci->usb_packet.devep = OHCI_BM(ed->flags, ED_EN);
-            ohci->usb_packet.data = ohci->usb_buf;
-            ohci->usb_packet.len = len;
-            ohci->usb_packet.complete_cb = ohci_async_complete_packet;
-            ohci->usb_packet.complete_opaque = ohci;
-            ret = dev->handle_packet(dev, &ohci->usb_packet);
-            if (ret != USB_RET_NODEV)
-                break;
-        }
-
-        if (ret == USB_RET_ASYNC) {
-            return 1;
-        }
-    }
-
-#ifdef DEBUG_ISOCH
-    printf("so 0x%.8x eo 0x%.8x\nsa 0x%.8x ea 0x%.8x\ndir %s len %zu ret %d\n",
-           start_offset, end_offset, start_addr, end_addr, str, len, ret);
-#endif
-
-    /* Writeback */
-    if (dir == OHCI_TD_DIR_IN && ret >= 0 && ret <= len) {
-        /* IN transfer succeeded */
-        ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, ret, 1);
-        OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_CC,
-                    OHCI_CC_NOERROR);
-        OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_SIZE, ret);
-    } else if (dir == OHCI_TD_DIR_OUT && ret == len) {
-        /* OUT transfer succeeded */
-        OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_CC,
-                    OHCI_CC_NOERROR);
-        OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_SIZE, 0);
-    } else {
-        if (ret > (ssize_t) len) {
-            printf("usb-ohci: DataOverrun %d > %zu\n", ret, len);
-            OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_CC,
-                        OHCI_CC_DATAOVERRUN);
-            OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_SIZE,
-                        len);
-        } else if (ret >= 0) {
-            printf("usb-ohci: DataUnderrun %d\n", ret);
-            OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_CC,
-                        OHCI_CC_DATAUNDERRUN);
-        } else {
-            switch (ret) {
-            case USB_RET_NODEV:
-                OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_CC,
-                            OHCI_CC_DEVICENOTRESPONDING);
-                OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_SIZE,
-                            0);
-                break;
-            case USB_RET_NAK:
-            case USB_RET_STALL:
-                printf("usb-ohci: got NAK/STALL %d\n", ret);
-                OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_CC,
-                            OHCI_CC_STALL);
-                OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_SIZE,
-                            0);
-                break;
-            default:
-                printf("usb-ohci: Bad device response %d\n", ret);
-                OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_CC,
-                            OHCI_CC_UNDEXPETEDPID);
-                break;
-            }
-        }
-    }
-
-    if (relative_frame_number == frame_count) {
-        /* Last data packet of ISO TD - retire the TD to the Done Queue */
-        OHCI_SET_BM(iso_td.flags, TD_CC, OHCI_CC_NOERROR);
-        ed->head &= ~OHCI_DPTR_MASK;
-        ed->head |= (iso_td.next & OHCI_DPTR_MASK);
-        iso_td.next = ohci->done;
-        ohci->done = addr;
-        i = OHCI_BM(iso_td.flags, TD_DI);
-        if (i < ohci->done_count)
-            ohci->done_count = i;
-    }
-    ohci_put_iso_td(ohci, addr, &iso_td);
-    return 1;
-}
-
-/* Service a transport descriptor.
-   Returns nonzero to terminate processing of this endpoint.  */
-
-static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
-{
-    int dir;
-    size_t len = 0;
-    const char * __attribute__((unused)) str = NULL;
-    int pid;
-    int ret;
-    int i;
-    USBDevice *dev;
-    struct ohci_td td;
-    uint32_t addr;
-    int flag_r;
-    int completion;
-
-    addr = ed->head & OHCI_DPTR_MASK;
-    /* See if this TD has already been submitted to the device.  */
-    completion = (addr == ohci->async_td);
-    if (completion && !ohci->async_complete) {
-#ifdef DEBUG_PACKET
-        dprintf("Skipping async TD\n");
-#endif
-        return 1;
-    }
-    if (!ohci_read_td(ohci, addr, &td)) {
-        fprintf(stderr, "usb-ohci: TD read error at %x\n", addr);
-        return 0;
-    }
-
-    dir = OHCI_BM(ed->flags, ED_D);
-    switch (dir) {
-    case OHCI_TD_DIR_OUT:
-    case OHCI_TD_DIR_IN:
-        /* Same value.  */
-        break;
-    default:
-        dir = OHCI_BM(td.flags, TD_DP);
-        break;
-    }
-
-    switch (dir) {
-    case OHCI_TD_DIR_IN:
-        str = "in";
-        pid = USB_TOKEN_IN;
-        break;
-    case OHCI_TD_DIR_OUT:
-        str = "out";
-        pid = USB_TOKEN_OUT;
-        break;
-    case OHCI_TD_DIR_SETUP:
-        str = "setup";
-        pid = USB_TOKEN_SETUP;
-        break;
-    default:
-        fprintf(stderr, "usb-ohci: Bad direction\n");
-        return 1;
-    }
-    if (td.cbp && td.be) {
-        if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) {
-            len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff);
-        } else {
-            len = (td.be - td.cbp) + 1;
-        }
-
-        if (len && dir != OHCI_TD_DIR_IN && !completion) {
-            ohci_copy_td(ohci, &td, ohci->usb_buf, len, 0);
-        }
-    }
-
-    flag_r = (td.flags & OHCI_TD_R) != 0;
-#ifdef DEBUG_PACKET
-    dprintf(" TD @ 0x%.8x %u bytes %s r=%d cbp=0x%.8x be=0x%.8x\n",
-            addr, len, str, flag_r, td.cbp, td.be);
-
-    if (len > 0 && dir != OHCI_TD_DIR_IN) {
-        dprintf("  data:");
-        for (i = 0; i < len; i++)
-            printf(" %.2x", ohci->usb_buf[i]);
-        dprintf("\n");
-    }
-#endif
-    if (completion) {
-        ret = ohci->usb_packet.len;
-        ohci->async_td = 0;
-        ohci->async_complete = 0;
-    } else {
-        ret = USB_RET_NODEV;
-        for (i = 0; i < ohci->num_ports; i++) {
-            dev = ohci->rhport[i].port.dev;
-            if ((ohci->rhport[i].ctrl & OHCI_PORT_PES) == 0)
-                continue;
-
-            if (ohci->async_td) {
-                /* ??? The hardware should allow one active packet per
-                   endpoint.  We only allow one active packet per controller.
-                   This should be sufficient as long as devices respond in a
-                   timely manner.
-                 */
-#ifdef DEBUG_PACKET
-                dprintf("Too many pending packets\n");
-#endif
-                return 1;
-            }
-            ohci->usb_packet.pid = pid;
-            ohci->usb_packet.devaddr = OHCI_BM(ed->flags, ED_FA);
-            ohci->usb_packet.devep = OHCI_BM(ed->flags, ED_EN);
-            ohci->usb_packet.data = ohci->usb_buf;
-            ohci->usb_packet.len = len;
-            ohci->usb_packet.complete_cb = ohci_async_complete_packet;
-            ohci->usb_packet.complete_opaque = ohci;
-            ret = dev->handle_packet(dev, &ohci->usb_packet);
-            if (ret != USB_RET_NODEV)
-                break;
-        }
-#ifdef DEBUG_PACKET
-        dprintf("ret=%d\n", ret);
-#endif
-        if (ret == USB_RET_ASYNC) {
-            ohci->async_td = addr;
-            return 1;
-        }
-    }
-    if (ret >= 0) {
-        if (dir == OHCI_TD_DIR_IN) {
-            ohci_copy_td(ohci, &td, ohci->usb_buf, ret, 1);
-#ifdef DEBUG_PACKET
-            dprintf("  data:");
-            for (i = 0; i < ret; i++)
-                printf(" %.2x", ohci->usb_buf[i]);
-            dprintf("\n");
-#endif
-        } else {
-            ret = len;
-        }
-    }
-
-    /* Writeback */
-    if (ret == len || (dir == OHCI_TD_DIR_IN && ret >= 0 && flag_r)) {
-        /* Transmission succeeded.  */
-        if (ret == len) {
-            td.cbp = 0;
-        } else {
-            td.cbp += ret;
-            if ((td.cbp & 0xfff) + ret > 0xfff) {
-                td.cbp &= 0xfff;
-                td.cbp |= td.be & ~0xfff;
-            }
-        }
-        td.flags |= OHCI_TD_T1;
-        td.flags ^= OHCI_TD_T0;
-        OHCI_SET_BM(td.flags, TD_CC, OHCI_CC_NOERROR);
-        OHCI_SET_BM(td.flags, TD_EC, 0);
-
-        ed->head &= ~OHCI_ED_C;
-        if (td.flags & OHCI_TD_T0)
-            ed->head |= OHCI_ED_C;
-    } else {
-        if (ret >= 0) {
-            dprintf("usb-ohci: Underrun\n");
-            OHCI_SET_BM(td.flags, TD_CC, OHCI_CC_DATAUNDERRUN);
-        } else {
-            switch (ret) {
-            case USB_RET_NODEV:
-                OHCI_SET_BM(td.flags, TD_CC, OHCI_CC_DEVICENOTRESPONDING);
-            case USB_RET_NAK:
-                dprintf("usb-ohci: got NAK\n");
-                return 1;
-            case USB_RET_STALL:
-                dprintf("usb-ohci: got STALL\n");
-                OHCI_SET_BM(td.flags, TD_CC, OHCI_CC_STALL);
-                break;
-            case USB_RET_BABBLE:
-                dprintf("usb-ohci: got BABBLE\n");
-                OHCI_SET_BM(td.flags, TD_CC, OHCI_CC_DATAOVERRUN);
-                break;
-            default:
-                fprintf(stderr, "usb-ohci: Bad device response %d\n", ret);
-                OHCI_SET_BM(td.flags, TD_CC, OHCI_CC_UNDEXPETEDPID);
-                OHCI_SET_BM(td.flags, TD_EC, 3);
-                break;
-            }
-        }
-        ed->head |= OHCI_ED_H;
-    }
-
-    /* Retire this TD */
-    ed->head &= ~OHCI_DPTR_MASK;
-    ed->head |= td.next & OHCI_DPTR_MASK;
-    td.next = ohci->done;
-    ohci->done = addr;
-    i = OHCI_BM(td.flags, TD_DI);
-    if (i < ohci->done_count)
-        ohci->done_count = i;
-    ohci_put_td(ohci, addr, &td);
-    return OHCI_BM(td.flags, TD_CC) != OHCI_CC_NOERROR;
-}
-
-/* Service an endpoint list.  Returns nonzero if active TD were found.  */
-static int ohci_service_ed_list(OHCIState *ohci, uint32_t head, int completion)
-{
-    struct ohci_ed ed;
-    uint32_t next_ed;
-    uint32_t cur;
-    int active;
-
-    active = 0;
-
-    if (head == 0)
-        return 0;
-
-    for (cur = head; cur; cur = next_ed) {
-        if (!ohci_read_ed(ohci, cur, &ed)) {
-            fprintf(stderr, "usb-ohci: ED read error at %x\n", cur);
-            return 0;
-        }
-
-        next_ed = ed.next & OHCI_DPTR_MASK;
-
-        if ((ed.head & OHCI_ED_H) || (ed.flags & OHCI_ED_K)) {
-            uint32_t addr;
-            /* Cancel pending packets for ED that have been paused.  */
-            addr = ed.head & OHCI_DPTR_MASK;
-            if (ohci->async_td && addr == ohci->async_td) {
-                usb_cancel_packet(&ohci->usb_packet);
-                ohci->async_td = 0;
-            }
-            continue;
-        }
-
-        while ((ed.head & OHCI_DPTR_MASK) != ed.tail) {
-#ifdef DEBUG_PACKET
-            dprintf("ED @ 0x%.8x fa=%u en=%u d=%u s=%u k=%u f=%u mps=%u "
-                    "h=%u c=%u\n  head=0x%.8x tailp=0x%.8x next=0x%.8x\n", cur,
-                    OHCI_BM(ed.flags, ED_FA), OHCI_BM(ed.flags, ED_EN),
-                    OHCI_BM(ed.flags, ED_D), (ed.flags & OHCI_ED_S)!= 0,
-                    (ed.flags & OHCI_ED_K) != 0, (ed.flags & OHCI_ED_F) != 0,
-                    OHCI_BM(ed.flags, ED_MPS), (ed.head & OHCI_ED_H) != 0,
-                    (ed.head & OHCI_ED_C) != 0, ed.head & OHCI_DPTR_MASK,
-                    ed.tail & OHCI_DPTR_MASK, ed.next & OHCI_DPTR_MASK);
-#endif
-            active = 1;
-
-            if ((ed.flags & OHCI_ED_F) == 0) {
-                if (ohci_service_td(ohci, &ed))
-                    break;
-            } else {
-                /* Handle isochronous endpoints */
-                if (ohci_service_iso_td(ohci, &ed, completion))
-                    break;
-            }
-        }
-
-        ohci_put_ed(ohci, cur, &ed);
-    }
-
-    return active;
-}
-
-/* Generate a SOF event, and set a timer for EOF */
-static void ohci_sof(OHCIState *ohci)
-{
-    ohci->sof_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    timer_mod(ohci->eof_timer, ohci->sof_time + usb_frame_time);
-    ohci_set_interrupt(ohci, OHCI_INTR_SF);
-}
-
-/* Process Control and Bulk lists.  */
-static void ohci_process_lists(OHCIState *ohci, int completion)
-{
-    if ((ohci->ctl & OHCI_CTL_CLE) && (ohci->status & OHCI_STATUS_CLF)) {
-        if (ohci->ctrl_cur && ohci->ctrl_cur != ohci->ctrl_head)
-          dprintf("usb-ohci: head %x, cur %x\n",
-                          ohci->ctrl_head, ohci->ctrl_cur);
-        if (!ohci_service_ed_list(ohci, ohci->ctrl_head, completion)) {
-            ohci->ctrl_cur = 0;
-            ohci->status &= ~OHCI_STATUS_CLF;
-        }
-    }
-
-    if ((ohci->ctl & OHCI_CTL_BLE) && (ohci->status & OHCI_STATUS_BLF)) {
-        if (!ohci_service_ed_list(ohci, ohci->bulk_head, completion)) {
-            ohci->bulk_cur = 0;
-            ohci->status &= ~OHCI_STATUS_BLF;
-        }
-    }
-}
-
-/* Do frame processing on frame boundary */
-static void ohci_frame_boundary(void *opaque)
-{
-    OHCIState *ohci = opaque;
-    struct ohci_hcca hcca;
-
-    ohci_read_hcca(ohci, ohci->hcca, &hcca);
-
-    /* Process all the lists at the end of the frame */
-    if (ohci->ctl & OHCI_CTL_PLE) {
-        int n;
-
-        n = ohci->frame_number & 0x1f;
-        ohci_service_ed_list(ohci, le32_to_cpu(hcca.intr[n]), 0);
-    }
-
-    /* Cancel all pending packets if either of the lists has been disabled.  */
-    if (ohci->async_td &&
-        ohci->old_ctl & (~ohci->ctl) & (OHCI_CTL_BLE | OHCI_CTL_CLE)) {
-        usb_cancel_packet(&ohci->usb_packet);
-        ohci->async_td = 0;
-    }
-    ohci->old_ctl = ohci->ctl;
-    ohci_process_lists(ohci, 0);
-
-    /* Frame boundary, so do EOF stuf here */
-    ohci->frt = ohci->fit;
-
-    /* XXX: endianness */
-    ohci->frame_number = (ohci->frame_number + 1) & 0xffff;
-    hcca.frame = cpu_to_le32(ohci->frame_number);
-
-    if (ohci->done_count == 0 && !(ohci->intr_status & OHCI_INTR_WD)) {
-        if (!ohci->done)
-            abort();
-        if (ohci->intr & ohci->intr_status)
-            ohci->done |= 1;
-        hcca.done = cpu_to_le32(ohci->done);
-        ohci->done = 0;
-        ohci->done_count = 7;
-        ohci_set_interrupt(ohci, OHCI_INTR_WD);
-    }
-
-    if (ohci->done_count != 7 && ohci->done_count != 0)
-        ohci->done_count--;
-
-    /* Do SOF stuff here */
-    ohci_sof(ohci);
-
-    /* Writeback HCCA */
-    ohci_put_hcca(ohci, ohci->hcca, &hcca);
-}
-
-/* Start sending SOF tokens across the USB bus, lists are processed in
- * next frame
- */
-static int ohci_bus_start(OHCIState *ohci)
-{
-    ohci->eof_timer = timer_new(QEMU_CLOCK_VIRTUAL, SCALE_NS,
-                    ohci_frame_boundary,
-                    ohci);
-
-    if (ohci->eof_timer == NULL) {
-        fprintf(stderr, "usb-ohci: %s: qemu_new_timer_ns failed\n", ohci->name);
-        /* TODO: Signal unrecoverable error */
-        return 0;
-    }
-
-    dprintf("usb-ohci: %s: USB Operational\n", ohci->name);
-
-    ohci_sof(ohci);
-
-    return 1;
-}
-
-/* Stop sending SOF tokens on the bus */
-static void ohci_bus_stop(OHCIState *ohci)
-{
-    if (ohci->eof_timer)
-        timer_del(ohci->eof_timer);
-    ohci->eof_timer = NULL;
-}
-
-/* Sets a flag in a port status register but only set it if the port is
- * connected, if not set ConnectStatusChange flag. If flag is enabled
- * return 1.
- */
-static int ohci_port_set_if_connected(OHCIState *ohci, int i, uint32_t val)
-{
-    int ret = 1;
-
-    /* writing a 0 has no effect */
-    if (val == 0)
-        return 0;
-
-    /* If CurrentConnectStatus is cleared we set
-     * ConnectStatusChange
-     */
-    if (!(ohci->rhport[i].ctrl & OHCI_PORT_CCS)) {
-        ohci->rhport[i].ctrl |= OHCI_PORT_CSC;
-        if (ohci->rhstatus & OHCI_RHS_DRWE) {
-            /* TODO: CSC is a wakeup event */
-        }
-        return 0;
-    }
-
-    if (ohci->rhport[i].ctrl & val)
-        ret = 0;
-
-    /* set the bit */
-    ohci->rhport[i].ctrl |= val;
-
-    return ret;
-}
-
-/* Set the frame interval - frame interval toggle is manipulated by the hcd only */
-static void ohci_set_frame_interval(OHCIState *ohci, uint16_t val)
-{
-    val &= OHCI_FMI_FI;
-
-    if (val != ohci->fi) {
-        dprintf("usb-ohci: %s: FrameInterval = 0x%x (%u)\n",
-            ohci->name, ohci->fi, ohci->fi);
-    }
-
-    ohci->fi = val;
-}
-
-static void ohci_port_power(OHCIState *ohci, int i, int p)
-{
-    if (p) {
-        ohci->rhport[i].ctrl |= OHCI_PORT_PPS;
-    } else {
-        ohci->rhport[i].ctrl &= ~(OHCI_PORT_PPS|
-                    OHCI_PORT_CCS|
-                    OHCI_PORT_PSS|
-                    OHCI_PORT_PRS);
-    }
-}
-
-/* Set HcControlRegister */
-static void ohci_set_ctl(OHCIState *ohci, uint32_t val)
-{
-    uint32_t old_state;
-    uint32_t new_state;
-
-    old_state = ohci->ctl & OHCI_CTL_HCFS;
-    ohci->ctl = val;
-    new_state = ohci->ctl & OHCI_CTL_HCFS;
-
-    /* no state change */
-    if (old_state == new_state)
-        return;
-
-    switch (new_state) {
-    case OHCI_USB_OPERATIONAL:
-        ohci_bus_start(ohci);
-        break;
-    case OHCI_USB_SUSPEND:
-        ohci_bus_stop(ohci);
-        dprintf("usb-ohci: %s: USB Suspended\n", ohci->name);
-        break;
-    case OHCI_USB_RESUME:
-        dprintf("usb-ohci: %s: USB Resume\n", ohci->name);
-        break;
-    case OHCI_USB_RESET:
-        ohci_reset(ohci);
-        dprintf("usb-ohci: %s: USB Reset\n", ohci->name);
-        break;
-    }
-}
-
-static uint32_t ohci_get_frame_remaining(OHCIState *ohci)
-{
-    uint16_t fr;
-    int64_t tks;
-
-    if ((ohci->ctl & OHCI_CTL_HCFS) != OHCI_USB_OPERATIONAL)
-        return (ohci->frt << 31);
-
-    /* Being in USB operational state guarnatees sof_time was
-     * set already.
-     */
-    tks = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - ohci->sof_time;
-
-    /* avoid muldiv if possible */
-    if (tks >= usb_frame_time)
-        return (ohci->frt << 31);
-
-    tks = muldiv64(1, tks, usb_bit_time);
-    fr = (uint16_t)(ohci->fi - tks);
-
-    return (ohci->frt << 31) | fr;
-}
-
-
-/* Set root hub status */
-static void ohci_set_hub_status(OHCIState *ohci, uint32_t val)
-{
-    uint32_t old_state;
-
-    old_state = ohci->rhstatus;
-
-    /* write 1 to clear OCIC */
-    if (val & OHCI_RHS_OCIC)
-        ohci->rhstatus &= ~OHCI_RHS_OCIC;
-
-    if (val & OHCI_RHS_LPS) {
-        int i;
-
-        for (i = 0; i < ohci->num_ports; i++)
-            ohci_port_power(ohci, i, 0);
-        dprintf("usb-ohci: powered down all ports\n");
-    }
-
-    if (val & OHCI_RHS_LPSC) {
-        int i;
-
-        for (i = 0; i < ohci->num_ports; i++)
-            ohci_port_power(ohci, i, 1);
-        dprintf("usb-ohci: powered up all ports\n");
-    }
-
-    if (val & OHCI_RHS_DRWE)
-        ohci->rhstatus |= OHCI_RHS_DRWE;
-
-    if (val & OHCI_RHS_CRWE)
-        ohci->rhstatus &= ~OHCI_RHS_DRWE;
-
-    if (old_state != ohci->rhstatus)
-        ohci_set_interrupt(ohci, OHCI_INTR_RHSC);
-}
-
-/* Set root hub port status */
-static void ohci_port_set_status(OHCIState *ohci, int portnum, uint32_t val)
-{
-    uint32_t old_state;
-    OHCIPort *port;
-
-    port = &ohci->rhport[portnum];
-    old_state = port->ctrl;
-
-    /* Write to clear CSC, PESC, PSSC, OCIC, PRSC */
-    if (val & OHCI_PORT_WTC)
-        port->ctrl &= ~(val & OHCI_PORT_WTC);
-
-    if (val & OHCI_PORT_CCS)
-        port->ctrl &= ~OHCI_PORT_PES;
-
-    ohci_port_set_if_connected(ohci, portnum, val & OHCI_PORT_PES);
-
-    if (ohci_port_set_if_connected(ohci, portnum, val & OHCI_PORT_PSS))
-        dprintf("usb-ohci: port %d: SUSPEND\n", portnum);
-
-    if (ohci_port_set_if_connected(ohci, portnum, val & OHCI_PORT_PRS)) {
-        dprintf("usb-ohci: port %d: RESET\n", portnum);
-        usb_send_msg(port->port.dev, USB_MSG_RESET);
-        port->ctrl &= ~OHCI_PORT_PRS;
-        /* ??? Should this also set OHCI_PORT_PESC.  */
-        port->ctrl |= OHCI_PORT_PES | OHCI_PORT_PRSC;
-    }
-
-    /* Invert order here to ensure in ambiguous case, device is
-     * powered up...
-     */
-    if (val & OHCI_PORT_LSDA)
-        ohci_port_power(ohci, portnum, 0);
-    if (val & OHCI_PORT_PPS)
-        ohci_port_power(ohci, portnum, 1);
-
-    if (old_state != port->ctrl)
-        ohci_set_interrupt(ohci, OHCI_INTR_RHSC);
-
-    return;
-}
-
-static uint32_t ohci_mem_read(void *ptr, hwaddr addr)
-{
-    OHCIState *ohci = ptr;
-    uint32_t retval;
-
-    /* Only aligned reads are allowed on OHCI */
-    if (addr & 3) {
-        fprintf(stderr, "usb-ohci: Mis-aligned read\n");
-        return 0xffffffff;
-    } else if (addr >= 0x54 && addr < 0x54 + ohci->num_ports * 4) {
-        /* HcRhPortStatus */
-        retval = ohci->rhport[(addr - 0x54) >> 2].ctrl | OHCI_PORT_PPS;
-    } else {
-        switch (addr >> 2) {
-        case 0: /* HcRevision */
-            retval = 0x10;
-            break;
-
-        case 1: /* HcControl */
-            retval = ohci->ctl;
-            break;
-
-        case 2: /* HcCommandStatus */
-            retval = ohci->status;
-            break;
-
-        case 3: /* HcInterruptStatus */
-            retval = ohci->intr_status;
-            break;
-
-        case 4: /* HcInterruptEnable */
-        case 5: /* HcInterruptDisable */
-            retval = ohci->intr;
-            break;
-
-        case 6: /* HcHCCA */
-            retval = ohci->hcca;
-            break;
-
-        case 7: /* HcPeriodCurrentED */
-            retval = ohci->per_cur;
-            break;
-
-        case 8: /* HcControlHeadED */
-            retval = ohci->ctrl_head;
-            break;
-
-        case 9: /* HcControlCurrentED */
-            retval = ohci->ctrl_cur;
-            break;
-
-        case 10: /* HcBulkHeadED */
-            retval = ohci->bulk_head;
-            break;
-
-        case 11: /* HcBulkCurrentED */
-            retval = ohci->bulk_cur;
-            break;
-
-        case 12: /* HcDoneHead */
-            retval = ohci->done;
-            break;
-
-        case 13: /* HcFmInterretval */
-            retval = (ohci->fit << 31) | (ohci->fsmps << 16) | (ohci->fi);
-            break;
-
-        case 14: /* HcFmRemaining */
-            retval = ohci_get_frame_remaining(ohci);
-            break;
-
-        case 15: /* HcFmNumber */
-            retval = ohci->frame_number;
-            break;
-
-        case 16: /* HcPeriodicStart */
-            retval = ohci->pstart;
-            break;
-
-        case 17: /* HcLSThreshold */
-            retval = ohci->lst;
-            break;
-
-        case 18: /* HcRhDescriptorA */
-            retval = ohci->rhdesc_a;
-            break;
-
-        case 19: /* HcRhDescriptorB */
-            retval = ohci->rhdesc_b;
-            break;
-
-        case 20: /* HcRhStatus */
-            retval = ohci->rhstatus;
-            break;
-
-        /* PXA27x specific registers */
-        case 24: /* HcStatus */
-            retval = ohci->hstatus & ohci->hmask;
-            break;
-
-        case 25: /* HcHReset */
-            retval = ohci->hreset;
-            break;
-
-        case 26: /* HcHInterruptEnable */
-            retval = ohci->hmask;
-            break;
-
-        case 27: /* HcHInterruptTest */
-            retval = ohci->htest;
-            break;
-
-        default:
-            fprintf(stderr, "ohci_read: Bad offset %x\n", (int)addr);
-            retval = 0xffffffff;
-        }
-    }
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    retval = bswap32(retval);
-#endif
-    return retval;
-}
-
-static void ohci_mem_write(void *ptr, hwaddr addr, uint32_t val)
-{
-    OHCIState *ohci = ptr;
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    val = bswap32(val);
-#endif
-
-    /* Only aligned reads are allowed on OHCI */
-    if (addr & 3) {
-        fprintf(stderr, "usb-ohci: Mis-aligned write\n");
-        return;
-    }
-
-    if (addr >= 0x54 && addr < 0x54 + ohci->num_ports * 4) {
-        /* HcRhPortStatus */
-        ohci_port_set_status(ohci, (addr - 0x54) >> 2, val);
-        return;
-    }
-
-    switch (addr >> 2) {
-    case 1: /* HcControl */
-        ohci_set_ctl(ohci, val);
-        break;
-
-    case 2: /* HcCommandStatus */
-        /* SOC is read-only */
-        val = (val & ~OHCI_STATUS_SOC);
-
-        /* Bits written as '0' remain unchanged in the register */
-        ohci->status |= val;
-
-        if (ohci->status & OHCI_STATUS_HCR)
-            ohci_reset(ohci);
-        break;
-
-    case 3: /* HcInterruptStatus */
-        ohci->intr_status &= ~val;
-        ohci_intr_update(ohci);
-        break;
-
-    case 4: /* HcInterruptEnable */
-        ohci->intr |= val;
-        ohci_intr_update(ohci);
-        break;
-
-    case 5: /* HcInterruptDisable */
-        ohci->intr &= ~val;
-        ohci_intr_update(ohci);
-        break;
-
-    case 6: /* HcHCCA */
-        ohci->hcca = val & OHCI_HCCA_MASK;
-        break;
-
-    case 8: /* HcControlHeadED */
-        ohci->ctrl_head = val & OHCI_EDPTR_MASK;
-        break;
-
-    case 9: /* HcControlCurrentED */
-        ohci->ctrl_cur = val & OHCI_EDPTR_MASK;
-        break;
-
-    case 10: /* HcBulkHeadED */
-        ohci->bulk_head = val & OHCI_EDPTR_MASK;
-        break;
-
-    case 11: /* HcBulkCurrentED */
-        ohci->bulk_cur = val & OHCI_EDPTR_MASK;
-        break;
-
-    case 13: /* HcFmInterval */
-        ohci->fsmps = (val & OHCI_FMI_FSMPS) >> 16;
-        ohci->fit = (val & OHCI_FMI_FIT) >> 31;
-        ohci_set_frame_interval(ohci, val);
-        break;
-
-    case 15: /* HcFmNumber */
-        break;
-
-    case 16: /* HcPeriodicStart */
-        ohci->pstart = val & 0xffff;
-        break;
-
-    case 17: /* HcLSThreshold */
-        ohci->lst = val & 0xffff;
-        break;
-
-    case 18: /* HcRhDescriptorA */
-        ohci->rhdesc_a &= ~OHCI_RHA_RW_MASK;
-        ohci->rhdesc_a |= val & OHCI_RHA_RW_MASK;
-        break;
-
-    case 19: /* HcRhDescriptorB */
-        break;
-
-    case 20: /* HcRhStatus */
-        ohci_set_hub_status(ohci, val);
-        break;
-
-    /* PXA27x specific registers */
-    case 24: /* HcStatus */
-        ohci->hstatus &= ~(val & ohci->hmask);
-
-    case 25: /* HcHReset */
-        ohci->hreset = val & ~OHCI_HRESET_FSBIR;
-        if (val & OHCI_HRESET_FSBIR)
-            ohci_reset(ohci);
-        break;
-
-    case 26: /* HcHInterruptEnable */
-        ohci->hmask = val;
-        break;
-
-    case 27: /* HcHInterruptTest */
-        ohci->htest = val;
-        break;
-
-    default:
-        fprintf(stderr, "ohci_write: Bad offset %x\n", (int)addr);
-        break;
-    }
-}
-
-/* Only dword reads are defined on OHCI register space */
-static CPUReadMemoryFunc *ohci_readfn[3]={
-    ohci_mem_read,
-    ohci_mem_read,
-    ohci_mem_read
-};
-
-/* Only dword writes are defined on OHCI register space */
-static CPUWriteMemoryFunc *ohci_writefn[3]={
-    ohci_mem_write,
-    ohci_mem_write,
-    ohci_mem_write
-};
-
-static void usb_ohci_init(OHCIState *ohci, int num_ports, int devfn,
-                          qemu_irq irq, enum ohci_type type,
-                          const char *name, uint32_t localmem_base)
-{
-    int i;
-
-    if (usb_frame_time == 0) {
-#ifdef OHCI_TIME_WARP
-        usb_frame_time = get_ticks_per_sec();
-        usb_bit_time = muldiv64(1, get_ticks_per_sec(), USB_HZ/1000);
-#else
-        usb_frame_time = muldiv64(1, get_ticks_per_sec(), 1000);
-        if (get_ticks_per_sec() >= USB_HZ) {
-            usb_bit_time = muldiv64(1, get_ticks_per_sec(), USB_HZ);
-        } else {
-            usb_bit_time = 1;
-        }
-#endif
-        dprintf("usb-ohci: usb_bit_time=%lli usb_frame_time=%lli\n",
-                usb_frame_time, usb_bit_time);
-    }
-
-    ohci->mem = cpu_register_io_memory(ohci_readfn, ohci_writefn, ohci);
-    ohci->localmem_base = localmem_base;
-    ohci->name = name;
-
-    ohci->irq = irq;
-    ohci->type = type;
-
-    ohci->num_ports = num_ports;
-    for (i = 0; i < num_ports; i++) {
-        qemu_register_usb_port(&ohci->rhport[i].port, ohci, i, ohci_attach);
-    }
-
-    ohci->async_td = 0;
-    qemu_register_reset(ohci_reset, 0, ohci);
-    ohci_reset(ohci);
-}
-
-typedef struct {
-    PCIDevice pci_dev;
-    OHCIState state;
-} OHCIPCIState;
-
-static void ohci_mapfunc(PCIDevice *pci_dev, int i,
-            uint32_t addr, uint32_t size, int type)
-{
-    OHCIPCIState *ohci = (OHCIPCIState *)pci_dev;
-    cpu_register_physical_memory(addr, size, ohci->state.mem);
-}
-
-void usb_ohci_init_pci(struct PCIBus *bus, int num_ports, int devfn)
-{
-    OHCIPCIState *ohci;
-
-    ohci = (OHCIPCIState *)pci_register_device(bus, "OHCI USB", sizeof(*ohci),
-                                               devfn, NULL, NULL);
-    if (ohci == NULL) {
-        fprintf(stderr, "usb-ohci: Failed to register PCI device\n");
-        return;
-    }
-
-    pci_config_set_vendor_id(ohci->pci_dev.config, PCI_VENDOR_ID_APPLE);
-    pci_config_set_device_id(ohci->pci_dev.config,
-                             PCI_DEVICE_ID_APPLE_IPID_USB);
-    ohci->pci_dev.config[0x09] = 0x10; /* OHCI */
-    pci_config_set_class(ohci->pci_dev.config, PCI_CLASS_SERIAL_USB);
-    ohci->pci_dev.config[0x3d] = 0x01; /* interrupt pin 1 */
-
-    usb_ohci_init(&ohci->state, num_ports, devfn, ohci->pci_dev.irq[0],
-                  OHCI_TYPE_PCI, ohci->pci_dev.name, 0);
-
-    pci_register_bar((struct PCIDevice *)ohci, 0, 256,
-                           PCI_ADDRESS_SPACE_MEM, ohci_mapfunc);
-}
-
-void usb_ohci_init_pxa(hwaddr base, int num_ports, int devfn,
-                       qemu_irq irq)
-{
-    OHCIState *ohci = (OHCIState *)g_malloc0(sizeof(OHCIState));
-
-    usb_ohci_init(ohci, num_ports, devfn, irq,
-                  OHCI_TYPE_PXA, "OHCI USB", 0);
-
-    cpu_register_physical_memory(base, 0x1000, ohci->mem);
-}
-
-void usb_ohci_init_sm501(uint32_t mmio_base, uint32_t localmem_base,
-                         int num_ports, int devfn, qemu_irq irq)
-{
-    OHCIState *ohci = (OHCIState *)g_malloc0(sizeof(OHCIState));
-
-    usb_ohci_init(ohci, num_ports, devfn, irq,
-                  OHCI_TYPE_SM501, "OHCI USB", localmem_base);
-
-    cpu_register_physical_memory(mmio_base, 0x1000, ohci->mem);
-}
-
diff --git a/hw/usb/usb-linux.c b/hw/usb/usb-linux.c
deleted file mode 100644
index d0c2249..0000000
--- a/hw/usb/usb-linux.c
+++ /dev/null
@@ -1,1691 +0,0 @@
-/*
- * Linux host USB redirector
- *
- * Copyright (c) 2005 Fabrice Bellard
- *
- * Copyright (c) 2008 Max Krasnyansky
- *      Support for host device auto connect & disconnect
- *      Major rewrite to support fully async operation
- *
- * Copyright 2008 TJ <linux@tjworld.net>
- *      Added flexible support for /dev/bus/usb /sys/bus/usb/devices in addition
- *      to the legacy /proc/bus/usb USB device discovery and handling
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu-common.h"
-#include "qemu/timer.h"
-#include "monitor/monitor.h"
-
-#include <dirent.h>
-#include <sys/ioctl.h>
-#include <signal.h>
-
-#include <linux/usbdevice_fs.h>
-#include <linux/version.h>
-#include "hw/usb.h"
-
-/* We redefine it to avoid version problems */
-struct usb_ctrltransfer {
-    uint8_t  bRequestType;
-    uint8_t  bRequest;
-    uint16_t wValue;
-    uint16_t wIndex;
-    uint16_t wLength;
-    uint32_t timeout;
-    void *data;
-};
-
-struct usb_ctrlrequest {
-    uint8_t bRequestType;
-    uint8_t bRequest;
-    uint16_t wValue;
-    uint16_t wIndex;
-    uint16_t wLength;
-};
-
-typedef int USBScanFunc(void *opaque, int bus_num, int addr, int class_id,
-                        int vendor_id, int product_id,
-                        const char *product_name, int speed);
-static int usb_host_find_device(int *pbus_num, int *paddr,
-                                char *product_name, int product_name_size,
-                                const char *devname);
-//#define DEBUG
-
-#ifdef DEBUG
-#define dprintf printf
-#else
-#define dprintf(...)
-#endif
-
-#define USBDBG_DEVOPENED "husb: opened %s/devices\n"
-
-#define USBPROCBUS_PATH "/proc/bus/usb"
-#define PRODUCT_NAME_SZ 32
-#define MAX_ENDPOINTS 16
-#define USBDEVBUS_PATH "/dev/bus/usb"
-#define USBSYSBUS_PATH "/sys/bus/usb"
-
-static char *usb_host_device_path;
-
-#define USB_FS_NONE 0
-#define USB_FS_PROC 1
-#define USB_FS_DEV 2
-#define USB_FS_SYS 3
-
-static int usb_fs_type;
-
-/* endpoint association data */
-struct endp_data {
-    uint8_t type;
-    uint8_t halted;
-};
-
-enum {
-    CTRL_STATE_IDLE = 0,
-    CTRL_STATE_SETUP,
-    CTRL_STATE_DATA,
-    CTRL_STATE_ACK
-};
-
-/*
- * Control transfer state.
- * Note that 'buffer' _must_ follow 'req' field because
- * we need contigious buffer when we submit control URB.
- */
-struct ctrl_struct {
-    uint16_t len;
-    uint16_t offset;
-    uint8_t  state;
-    struct   usb_ctrlrequest req;
-    uint8_t  buffer[1024];
-};
-
-typedef struct USBHostDevice {
-    USBDevice dev;
-    int       fd;
-
-    uint8_t   descr[1024];
-    int       descr_len;
-    int       configuration;
-    int       ninterfaces;
-    int       closing;
-
-    struct ctrl_struct ctrl;
-    struct endp_data endp_table[MAX_ENDPOINTS];
-
-    /* Host side address */
-    int bus_num;
-    int addr;
-
-    struct USBHostDevice *next;
-} USBHostDevice;
-
-static int is_isoc(USBHostDevice *s, int ep)
-{
-    return s->endp_table[ep - 1].type == USBDEVFS_URB_TYPE_ISO;
-}
-
-static int is_halted(USBHostDevice *s, int ep)
-{
-    return s->endp_table[ep - 1].halted;
-}
-
-static void clear_halt(USBHostDevice *s, int ep)
-{
-    s->endp_table[ep - 1].halted = 0;
-}
-
-static void set_halt(USBHostDevice *s, int ep)
-{
-    s->endp_table[ep - 1].halted = 1;
-}
-
-static USBHostDevice *hostdev_list;
-
-static void hostdev_link(USBHostDevice *dev)
-{
-    dev->next = hostdev_list;
-    hostdev_list = dev;
-}
-
-static void hostdev_unlink(USBHostDevice *dev)
-{
-    USBHostDevice *pdev = hostdev_list;
-    USBHostDevice **prev = &hostdev_list;
-
-    while (pdev) {
-	if (pdev == dev) {
-            *prev = dev->next;
-            return;
-        }
-
-        prev = &pdev->next;
-        pdev = pdev->next;
-    }
-}
-
-static USBHostDevice *hostdev_find(int bus_num, int addr)
-{
-    USBHostDevice *s = hostdev_list;
-    while (s) {
-        if (s->bus_num == bus_num && s->addr == addr)
-            return s;
-        s = s->next;
-    }
-    return NULL;
-}
-
-/*
- * Async URB state.
- * We always allocate one isoc descriptor even for bulk transfers
- * to simplify allocation and casts.
- */
-typedef struct AsyncURB
-{
-    struct usbdevfs_urb urb;
-    struct usbdevfs_iso_packet_desc isocpd;
-
-    USBPacket     *packet;
-    USBHostDevice *hdev;
-} AsyncURB;
-
-static AsyncURB *async_alloc(void)
-{
-    return (AsyncURB *) g_malloc0(sizeof(AsyncURB));
-}
-
-static void async_free(AsyncURB *aurb)
-{
-    g_free(aurb);
-}
-
-static void async_complete_ctrl(USBHostDevice *s, USBPacket *p)
-{
-    switch(s->ctrl.state) {
-    case CTRL_STATE_SETUP:
-        if (p->len < s->ctrl.len)
-            s->ctrl.len = p->len;
-        s->ctrl.state = CTRL_STATE_DATA;
-        p->len = 8;
-        break;
-
-    case CTRL_STATE_ACK:
-        s->ctrl.state = CTRL_STATE_IDLE;
-        p->len = 0;
-        break;
-
-    default:
-        break;
-    }
-}
-
-static void async_complete(void *opaque)
-{
-    USBHostDevice *s = opaque;
-    AsyncURB *aurb;
-
-    while (1) {
-    	USBPacket *p;
-
-	int r = ioctl(s->fd, USBDEVFS_REAPURBNDELAY, &aurb);
-        if (r < 0) {
-            if (errno == EAGAIN)
-                return;
-
-            if (errno == ENODEV && !s->closing) {
-                printf("husb: device %d.%d disconnected\n", s->bus_num, s->addr);
-	        usb_device_del_addr(0, s->dev.addr);
-                return;
-            }
-
-            dprintf("husb: async. reap urb failed errno %d\n", errno);
-            return;
-        }
-
-        p = aurb->packet;
-
-	dprintf("husb: async completed. aurb %p status %d alen %d\n",
-                aurb, aurb->urb.status, aurb->urb.actual_length);
-
-	if (p) {
-            switch (aurb->urb.status) {
-            case 0:
-                p->len = aurb->urb.actual_length;
-                if (aurb->urb.type == USBDEVFS_URB_TYPE_CONTROL)
-                    async_complete_ctrl(s, p);
-                break;
-
-            case -EPIPE:
-                set_halt(s, p->devep);
-                /* fall through */
-            default:
-                p->len = USB_RET_NAK;
-                break;
-            }
-
-            usb_packet_complete(p);
-	}
-
-        async_free(aurb);
-    }
-}
-
-static void async_cancel(USBPacket *unused, void *opaque)
-{
-    AsyncURB *aurb = opaque;
-    USBHostDevice *s = aurb->hdev;
-
-    dprintf("husb: async cancel. aurb %p\n", aurb);
-
-    /* Mark it as dead (see async_complete above) */
-    aurb->packet = NULL;
-
-    int r = ioctl(s->fd, USBDEVFS_DISCARDURB, aurb);
-    if (r < 0) {
-        dprintf("husb: async. discard urb failed errno %d\n", errno);
-    }
-}
-
-static int usb_host_claim_interfaces(USBHostDevice *dev, int configuration)
-{
-    int dev_descr_len, config_descr_len;
-    int interface, nb_interfaces;
-    int ret, i;
-
-    if (configuration == 0) /* address state - ignore */
-        return 1;
-
-    dprintf("husb: claiming interfaces. config %d\n", configuration);
-
-    i = 0;
-    dev_descr_len = dev->descr[0];
-    if (dev_descr_len > dev->descr_len)
-        goto fail;
-
-    i += dev_descr_len;
-    while (i < dev->descr_len) {
-        dprintf("husb: i is %d, descr_len is %d, dl %d, dt %d\n", i, dev->descr_len,
-               dev->descr[i], dev->descr[i+1]);
-
-        if (dev->descr[i+1] != USB_DT_CONFIG) {
-            i += dev->descr[i];
-            continue;
-        }
-        config_descr_len = dev->descr[i];
-
-	printf("husb: config #%d need %d\n", dev->descr[i + 5], configuration);
-
-        if (configuration < 0 || configuration == dev->descr[i + 5]) {
-            configuration = dev->descr[i + 5];
-            break;
-        }
-
-        i += config_descr_len;
-    }
-
-    if (i >= dev->descr_len) {
-        fprintf(stderr, "husb: update iface failed. no matching configuration\n");
-        goto fail;
-    }
-    nb_interfaces = dev->descr[i + 4];
-
-#ifdef USBDEVFS_DISCONNECT
-    /* earlier Linux 2.4 do not support that */
-    {
-        struct usbdevfs_ioctl ctrl;
-        for (interface = 0; interface < nb_interfaces; interface++) {
-            ctrl.ioctl_code = USBDEVFS_DISCONNECT;
-            ctrl.ifno = interface;
-            ctrl.data = NULL;
-            ret = ioctl(dev->fd, USBDEVFS_IOCTL, &ctrl);
-            if (ret < 0 && errno != ENODATA) {
-                perror("USBDEVFS_DISCONNECT");
-                goto fail;
-            }
-        }
-    }
-#endif
-
-    /* XXX: only grab if all interfaces are free */
-    for (interface = 0; interface < nb_interfaces; interface++) {
-        ret = ioctl(dev->fd, USBDEVFS_CLAIMINTERFACE, &interface);
-        if (ret < 0) {
-            if (errno == EBUSY) {
-                printf("husb: update iface. device already grabbed\n");
-            } else {
-                perror("husb: failed to claim interface");
-            }
-        fail:
-            return 0;
-        }
-    }
-
-    printf("husb: %d interfaces claimed for configuration %d\n",
-           nb_interfaces, configuration);
-
-    dev->ninterfaces   = nb_interfaces;
-    dev->configuration = configuration;
-    return 1;
-}
-
-static int usb_host_release_interfaces(USBHostDevice *s)
-{
-    int ret, i;
-
-    dprintf("husb: releasing interfaces\n");
-
-    for (i = 0; i < s->ninterfaces; i++) {
-        ret = ioctl(s->fd, USBDEVFS_RELEASEINTERFACE, &i);
-        if (ret < 0) {
-            perror("husb: failed to release interface");
-            return 0;
-        }
-    }
-
-    return 1;
-}
-
-static void usb_host_handle_reset(USBDevice *dev)
-{
-    USBHostDevice *s = (USBHostDevice *) dev;
-
-    dprintf("husb: reset device %u.%u\n", s->bus_num, s->addr);
-
-    ioctl(s->fd, USBDEVFS_RESET);
-
-    usb_host_claim_interfaces(s, s->configuration);
-}
-
-static void usb_host_handle_destroy(USBDevice *dev)
-{
-    USBHostDevice *s = (USBHostDevice *)dev;
-
-    s->closing = 1;
-
-    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
-
-    hostdev_unlink(s);
-
-    async_complete(s);
-
-    if (s->fd >= 0)
-        close(s->fd);
-
-    g_free(s);
-}
-
-static int usb_linux_update_endp_table(USBHostDevice *s);
-
-static int usb_host_handle_data(USBHostDevice *s, USBPacket *p)
-{
-    struct usbdevfs_urb *urb;
-    AsyncURB *aurb;
-    int ret;
-
-    aurb = async_alloc();
-    aurb->hdev   = s;
-    aurb->packet = p;
-
-    urb = &aurb->urb;
-
-    if (p->pid == USB_TOKEN_IN)
-    	urb->endpoint = p->devep | 0x80;
-    else
-    	urb->endpoint = p->devep;
-
-    if (is_halted(s, p->devep)) {
-	ret = ioctl(s->fd, USBDEVFS_CLEAR_HALT, &urb->endpoint);
-        if (ret < 0) {
-            dprintf("husb: failed to clear halt. ep 0x%x errno %d\n",
-                   urb->endpoint, errno);
-            return USB_RET_NAK;
-        }
-        clear_halt(s, p->devep);
-    }
-
-    urb->buffer        = p->data;
-    urb->buffer_length = p->len;
-
-    if (is_isoc(s, p->devep)) {
-        /* Setup ISOC transfer */
-        urb->type     = USBDEVFS_URB_TYPE_ISO;
-        urb->flags    = USBDEVFS_URB_ISO_ASAP;
-        urb->number_of_packets = 1;
-        urb->iso_frame_desc[0].length = p->len;
-    } else {
-        /* Setup bulk transfer */
-        urb->type     = USBDEVFS_URB_TYPE_BULK;
-    }
-
-    urb->usercontext = s;
-
-    ret = ioctl(s->fd, USBDEVFS_SUBMITURB, urb);
-
-    dprintf("husb: data submit. ep 0x%x len %u aurb %p\n", urb->endpoint, p->len, aurb);
-
-    if (ret < 0) {
-        dprintf("husb: submit failed. errno %d\n", errno);
-        async_free(aurb);
-
-        switch(errno) {
-        case ETIMEDOUT:
-            return USB_RET_NAK;
-        case EPIPE:
-        default:
-            return USB_RET_STALL;
-        }
-    }
-
-    usb_defer_packet(p, async_cancel, aurb);
-    return USB_RET_ASYNC;
-}
-
-static int ctrl_error(void)
-{
-    if (errno == ETIMEDOUT)
-        return USB_RET_NAK;
-    else
-        return USB_RET_STALL;
-}
-
-static int usb_host_set_address(USBHostDevice *s, int addr)
-{
-    dprintf("husb: ctrl set addr %u\n", addr);
-    s->dev.addr = addr;
-    return 0;
-}
-
-static int usb_host_set_config(USBHostDevice *s, int config)
-{
-    usb_host_release_interfaces(s);
-
-    int ret = ioctl(s->fd, USBDEVFS_SETCONFIGURATION, &config);
-
-    dprintf("husb: ctrl set config %d ret %d errno %d\n", config, ret, errno);
-
-    if (ret < 0)
-        return ctrl_error();
-
-    usb_host_claim_interfaces(s, config);
-    return 0;
-}
-
-static int usb_host_set_interface(USBHostDevice *s, int iface, int alt)
-{
-    struct usbdevfs_setinterface si;
-    int ret;
-
-    si.interface  = iface;
-    si.altsetting = alt;
-    ret = ioctl(s->fd, USBDEVFS_SETINTERFACE, &si);
-
-    dprintf("husb: ctrl set iface %d altset %d ret %d errno %d\n",
-    	iface, alt, ret, errno);
-
-    if (ret < 0)
-        return ctrl_error();
-
-    usb_linux_update_endp_table(s);
-    return 0;
-}
-
-static int usb_host_handle_control(USBHostDevice *s, USBPacket *p)
-{
-    struct usbdevfs_urb *urb;
-    AsyncURB *aurb;
-    int ret, value, index;
-
-    /*
-     * Process certain standard device requests.
-     * These are infrequent and are processed synchronously.
-     */
-    value = le16_to_cpu(s->ctrl.req.wValue);
-    index = le16_to_cpu(s->ctrl.req.wIndex);
-
-    dprintf("husb: ctrl type 0x%x req 0x%x val 0x%x index %u len %u\n",
-        s->ctrl.req.bRequestType, s->ctrl.req.bRequest, value, index,
-        s->ctrl.len);
-
-    if (s->ctrl.req.bRequestType == 0) {
-        switch (s->ctrl.req.bRequest) {
-        case USB_REQ_SET_ADDRESS:
-            return usb_host_set_address(s, value);
-
-        case USB_REQ_SET_CONFIGURATION:
-            return usb_host_set_config(s, value & 0xff);
-        }
-    }
-
-    if (s->ctrl.req.bRequestType == 1 &&
-                  s->ctrl.req.bRequest == USB_REQ_SET_INTERFACE)
-        return usb_host_set_interface(s, index, value);
-
-    /* The rest are asynchronous */
-
-    aurb = async_alloc();
-    aurb->hdev   = s;
-    aurb->packet = p;
-
-    /*
-     * Setup ctrl transfer.
-     *
-     * s->ctrl is layed out such that data buffer immediately follows
-     * 'req' struct which is exactly what usbdevfs expects.
-     */
-    urb = &aurb->urb;
-
-    urb->type     = USBDEVFS_URB_TYPE_CONTROL;
-    urb->endpoint = p->devep;
-
-    urb->buffer        = &s->ctrl.req;
-    urb->buffer_length = 8 + s->ctrl.len;
-
-    urb->usercontext = s;
-
-    ret = ioctl(s->fd, USBDEVFS_SUBMITURB, urb);
-
-    dprintf("husb: submit ctrl. len %u aurb %p\n", urb->buffer_length, aurb);
-
-    if (ret < 0) {
-        dprintf("husb: submit failed. errno %d\n", errno);
-        async_free(aurb);
-
-        switch(errno) {
-        case ETIMEDOUT:
-            return USB_RET_NAK;
-        case EPIPE:
-        default:
-            return USB_RET_STALL;
-        }
-    }
-
-    usb_defer_packet(p, async_cancel, aurb);
-    return USB_RET_ASYNC;
-}
-
-static int do_token_setup(USBDevice *dev, USBPacket *p)
-{
-    USBHostDevice *s = (USBHostDevice *) dev;
-    int ret = 0;
-
-    if (p->len != 8)
-        return USB_RET_STALL;
-
-    memcpy(&s->ctrl.req, p->data, 8);
-    s->ctrl.len    = le16_to_cpu(s->ctrl.req.wLength);
-    s->ctrl.offset = 0;
-    s->ctrl.state  = CTRL_STATE_SETUP;
-
-    if (s->ctrl.req.bRequestType & USB_DIR_IN) {
-        ret = usb_host_handle_control(s, p);
-        if (ret < 0)
-            return ret;
-
-        if (ret < s->ctrl.len)
-            s->ctrl.len = ret;
-        s->ctrl.state = CTRL_STATE_DATA;
-    } else {
-        if (s->ctrl.len == 0)
-            s->ctrl.state = CTRL_STATE_ACK;
-        else
-            s->ctrl.state = CTRL_STATE_DATA;
-    }
-
-    return ret;
-}
-
-static int do_token_in(USBDevice *dev, USBPacket *p)
-{
-    USBHostDevice *s = (USBHostDevice *) dev;
-    int ret = 0;
-
-    if (p->devep != 0)
-        return usb_host_handle_data(s, p);
-
-    switch(s->ctrl.state) {
-    case CTRL_STATE_ACK:
-        if (!(s->ctrl.req.bRequestType & USB_DIR_IN)) {
-            ret = usb_host_handle_control(s, p);
-            if (ret == USB_RET_ASYNC)
-                return USB_RET_ASYNC;
-
-            s->ctrl.state = CTRL_STATE_IDLE;
-            return ret > 0 ? 0 : ret;
-        }
-
-        return 0;
-
-    case CTRL_STATE_DATA:
-        if (s->ctrl.req.bRequestType & USB_DIR_IN) {
-            int len = s->ctrl.len - s->ctrl.offset;
-            if (len > p->len)
-                len = p->len;
-            memcpy(p->data, s->ctrl.buffer + s->ctrl.offset, len);
-            s->ctrl.offset += len;
-            if (s->ctrl.offset >= s->ctrl.len)
-                s->ctrl.state = CTRL_STATE_ACK;
-            return len;
-        }
-
-        s->ctrl.state = CTRL_STATE_IDLE;
-        return USB_RET_STALL;
-
-    default:
-        return USB_RET_STALL;
-    }
-}
-
-static int do_token_out(USBDevice *dev, USBPacket *p)
-{
-    USBHostDevice *s = (USBHostDevice *) dev;
-
-    if (p->devep != 0)
-        return usb_host_handle_data(s, p);
-
-    switch(s->ctrl.state) {
-    case CTRL_STATE_ACK:
-        if (s->ctrl.req.bRequestType & USB_DIR_IN) {
-            s->ctrl.state = CTRL_STATE_IDLE;
-            /* transfer OK */
-        } else {
-            /* ignore additional output */
-        }
-        return 0;
-
-    case CTRL_STATE_DATA:
-        if (!(s->ctrl.req.bRequestType & USB_DIR_IN)) {
-            int len = s->ctrl.len - s->ctrl.offset;
-            if (len > p->len)
-                len = p->len;
-            memcpy(s->ctrl.buffer + s->ctrl.offset, p->data, len);
-            s->ctrl.offset += len;
-            if (s->ctrl.offset >= s->ctrl.len)
-                s->ctrl.state = CTRL_STATE_ACK;
-            return len;
-        }
-
-        s->ctrl.state = CTRL_STATE_IDLE;
-        return USB_RET_STALL;
-
-    default:
-        return USB_RET_STALL;
-    }
-}
-
-/*
- * Packet handler.
- * Called by the HC (host controller).
- *
- * Returns length of the transaction or one of the USB_RET_XXX codes.
- */
-static int usb_host_handle_packet(USBDevice *s, USBPacket *p)
-{
-    switch(p->pid) {
-    case USB_MSG_ATTACH:
-        s->state = USB_STATE_ATTACHED;
-        return 0;
-
-    case USB_MSG_DETACH:
-        s->state = USB_STATE_NOTATTACHED;
-        return 0;
-
-    case USB_MSG_RESET:
-        s->remote_wakeup = 0;
-        s->addr = 0;
-        s->state = USB_STATE_DEFAULT;
-        s->handle_reset(s);
-        return 0;
-    }
-
-    /* Rest of the PIDs must match our address */
-    if (s->state < USB_STATE_DEFAULT || p->devaddr != s->addr)
-        return USB_RET_NODEV;
-
-    switch (p->pid) {
-    case USB_TOKEN_SETUP:
-        return do_token_setup(s, p);
-
-    case USB_TOKEN_IN:
-        return do_token_in(s, p);
-
-    case USB_TOKEN_OUT:
-        return do_token_out(s, p);
-
-    default:
-        return USB_RET_STALL;
-    }
-}
-
-/* returns 1 on problem encountered or 0 for success */
-static int usb_linux_update_endp_table(USBHostDevice *s)
-{
-    uint8_t *descriptors;
-    uint8_t devep, type, configuration, alt_interface;
-    struct usb_ctrltransfer ct;
-    int interface, ret, length, i;
-
-    ct.bRequestType = USB_DIR_IN;
-    ct.bRequest = USB_REQ_GET_CONFIGURATION;
-    ct.wValue = 0;
-    ct.wIndex = 0;
-    ct.wLength = 1;
-    ct.data = &configuration;
-    ct.timeout = 50;
-
-    ret = ioctl(s->fd, USBDEVFS_CONTROL, &ct);
-    if (ret < 0) {
-        perror("usb_linux_update_endp_table");
-        return 1;
-    }
-
-    /* in address state */
-    if (configuration == 0)
-        return 1;
-
-    /* get the desired configuration, interface, and endpoint descriptors
-     * from device description */
-    descriptors = &s->descr[18];
-    length = s->descr_len - 18;
-    i = 0;
-
-    if (descriptors[i + 1] != USB_DT_CONFIG ||
-        descriptors[i + 5] != configuration) {
-        dprintf("invalid descriptor data - configuration\n");
-        return 1;
-    }
-    i += descriptors[i];
-
-    while (i < length) {
-        if (descriptors[i + 1] != USB_DT_INTERFACE ||
-            (descriptors[i + 1] == USB_DT_INTERFACE &&
-             descriptors[i + 4] == 0)) {
-            i += descriptors[i];
-            continue;
-        }
-
-        interface = descriptors[i + 2];
-
-        ct.bRequestType = USB_DIR_IN | USB_RECIP_INTERFACE;
-        ct.bRequest = USB_REQ_GET_INTERFACE;
-        ct.wValue = 0;
-        ct.wIndex = interface;
-        ct.wLength = 1;
-        ct.data = &alt_interface;
-        ct.timeout = 50;
-
-        ret = ioctl(s->fd, USBDEVFS_CONTROL, &ct);
-        if (ret < 0) {
-            alt_interface = interface;
-        }
-
-        /* the current interface descriptor is the active interface
-         * and has endpoints */
-        if (descriptors[i + 3] != alt_interface) {
-            i += descriptors[i];
-            continue;
-        }
-
-        /* advance to the endpoints */
-        while (i < length && descriptors[i +1] != USB_DT_ENDPOINT)
-            i += descriptors[i];
-
-        if (i >= length)
-            break;
-
-        while (i < length) {
-            if (descriptors[i + 1] != USB_DT_ENDPOINT)
-                break;
-
-            devep = descriptors[i + 2];
-            switch (descriptors[i + 3] & 0x3) {
-            case 0x00:
-                type = USBDEVFS_URB_TYPE_CONTROL;
-                break;
-            case 0x01:
-                type = USBDEVFS_URB_TYPE_ISO;
-                break;
-            case 0x02:
-                type = USBDEVFS_URB_TYPE_BULK;
-                break;
-            case 0x03:
-                type = USBDEVFS_URB_TYPE_INTERRUPT;
-                break;
-            default:
-                dprintf("usb_host: malformed endpoint type\n");
-                type = USBDEVFS_URB_TYPE_BULK;
-            }
-            s->endp_table[(devep & 0xf) - 1].type = type;
-            s->endp_table[(devep & 0xf) - 1].halted = 0;
-
-            i += descriptors[i];
-        }
-    }
-    return 0;
-}
-
-static USBDevice *usb_host_device_open_addr(int bus_num, int addr, const char *prod_name)
-{
-    int fd = -1, ret;
-    USBHostDevice *dev = NULL;
-    struct usbdevfs_connectinfo ci;
-    char buf[1024];
-
-    dev = g_malloc0(sizeof(USBHostDevice));
-
-    dev->bus_num = bus_num;
-    dev->addr = addr;
-
-    printf("husb: open device %d.%d\n", bus_num, addr);
-
-    if (!usb_host_device_path) {
-        perror("husb: USB Host Device Path not set");
-        goto fail;
-    }
-    snprintf(buf, sizeof(buf), "%s/%03d/%03d", usb_host_device_path,
-             bus_num, addr);
-    fd = open(buf, O_RDWR | O_NONBLOCK);
-    if (fd < 0) {
-        perror(buf);
-        goto fail;
-    }
-    dprintf("husb: opened %s\n", buf);
-
-    /* read the device description */
-    dev->descr_len = read(fd, dev->descr, sizeof(dev->descr));
-    if (dev->descr_len <= 0) {
-        perror("husb: reading device data failed");
-        goto fail;
-    }
-
-#ifdef DEBUG
-    {
-        int x;
-        printf("=== begin dumping device descriptor data ===\n");
-        for (x = 0; x < dev->descr_len; x++)
-            printf("%02x ", dev->descr[x]);
-        printf("\n=== end dumping device descriptor data ===\n");
-    }
-#endif
-
-    dev->fd = fd;
-
-    /*
-     * Initial configuration is -1 which makes us claim first
-     * available config. We used to start with 1, which does not
-     * always work. I've seen devices where first config starts
-     * with 2.
-     */
-    if (!usb_host_claim_interfaces(dev, -1))
-        goto fail;
-
-    ret = ioctl(fd, USBDEVFS_CONNECTINFO, &ci);
-    if (ret < 0) {
-        perror("usb_host_device_open: USBDEVFS_CONNECTINFO");
-        goto fail;
-    }
-
-    printf("husb: grabbed usb device %d.%d\n", bus_num, addr);
-
-    ret = usb_linux_update_endp_table(dev);
-    if (ret)
-        goto fail;
-
-    if (ci.slow)
-        dev->dev.speed = USB_SPEED_LOW;
-    else
-        dev->dev.speed = USB_SPEED_HIGH;
-
-    dev->dev.handle_packet  = usb_host_handle_packet;
-    dev->dev.handle_reset   = usb_host_handle_reset;
-    dev->dev.handle_destroy = usb_host_handle_destroy;
-
-    if (!prod_name || prod_name[0] == '\0')
-        snprintf(dev->dev.devname, sizeof(dev->dev.devname),
-                 "host:%d.%d", bus_num, addr);
-    else
-        pstrcpy(dev->dev.devname, sizeof(dev->dev.devname),
-                prod_name);
-
-    /* USB devio uses 'write' flag to check for async completions */
-    qemu_set_fd_handler(dev->fd, NULL, async_complete, dev);
-
-    hostdev_link(dev);
-
-    return (USBDevice *) dev;
-
-fail:
-    if (dev)
-        g_free(dev);
-
-    close(fd);
-    return NULL;
-}
-
-static int usb_host_auto_add(const char *spec);
-static int usb_host_auto_del(const char *spec);
-
-USBDevice *usb_host_device_open(const char *devname)
-{
-    Monitor *mon = cur_mon;
-    int bus_num, addr;
-    char product_name[PRODUCT_NAME_SZ];
-
-    if (strstr(devname, "auto:")) {
-        usb_host_auto_add(devname);
-        return NULL;
-    }
-
-    if (usb_host_find_device(&bus_num, &addr, product_name, sizeof(product_name),
-                             devname) < 0)
-        return NULL;
-
-    if (hostdev_find(bus_num, addr)) {
-       monitor_printf(mon, "husb: host usb device %d.%d is already open\n",
-                      bus_num, addr);
-       return NULL;
-    }
-
-    return usb_host_device_open_addr(bus_num, addr, product_name);
-}
-
-int usb_host_device_close(const char *devname)
-{
-    char product_name[PRODUCT_NAME_SZ];
-    int bus_num, addr;
-    USBHostDevice *s;
-
-    if (strstr(devname, "auto:"))
-        return usb_host_auto_del(devname);
-
-    if (usb_host_find_device(&bus_num, &addr, product_name, sizeof(product_name),
-                             devname) < 0)
-        return -1;
-
-    s = hostdev_find(bus_num, addr);
-    if (s) {
-        usb_device_del_addr(0, s->dev.addr);
-        return 0;
-    }
-
-    return -1;
-}
-
-static int get_tag_value(char *buf, int buf_size,
-                         const char *str, const char *tag,
-                         const char *stopchars)
-{
-    const char *p;
-    char *q;
-    p = strstr(str, tag);
-    if (!p)
-        return -1;
-    p += strlen(tag);
-    while (qemu_isspace(*p))
-        p++;
-    q = buf;
-    while (*p != '\0' && !strchr(stopchars, *p)) {
-        if ((q - buf) < (buf_size - 1))
-            *q++ = *p;
-        p++;
-    }
-    *q = '\0';
-    return q - buf;
-}
-
-/*
- * Use /proc/bus/usb/devices or /dev/bus/usb/devices file to determine
- * host's USB devices. This is legacy support since many distributions
- * are moving to /sys/bus/usb
- */
-static int usb_host_scan_dev(void *opaque, USBScanFunc *func)
-{
-    FILE *f = 0;
-    char line[1024];
-    char buf[1024];
-    int bus_num, addr, speed, device_count, class_id, product_id, vendor_id;
-    char product_name[512];
-    int ret = 0;
-
-    if (!usb_host_device_path) {
-        perror("husb: USB Host Device Path not set");
-        goto the_end;
-    }
-    snprintf(line, sizeof(line), "%s/devices", usb_host_device_path);
-    f = fopen(line, "r");
-    if (!f) {
-        perror("husb: cannot open devices file");
-        goto the_end;
-    }
-
-    device_count = 0;
-    bus_num = addr = speed = class_id = product_id = vendor_id = 0;
-    for(;;) {
-        if (fgets(line, sizeof(line), f) == NULL)
-            break;
-        if (strlen(line) > 0)
-            line[strlen(line) - 1] = '\0';
-        if (line[0] == 'T' && line[1] == ':') {
-            if (device_count && (vendor_id || product_id)) {
-                /* New device.  Add the previously discovered device.  */
-                ret = func(opaque, bus_num, addr, class_id, vendor_id,
-                           product_id, product_name, speed);
-                if (ret)
-                    goto the_end;
-            }
-            if (get_tag_value(buf, sizeof(buf), line, "Bus=", " ") < 0)
-                goto fail;
-            bus_num = atoi(buf);
-            if (get_tag_value(buf, sizeof(buf), line, "Dev#=", " ") < 0)
-                goto fail;
-            addr = atoi(buf);
-            if (get_tag_value(buf, sizeof(buf), line, "Spd=", " ") < 0)
-                goto fail;
-            if (!strcmp(buf, "480"))
-                speed = USB_SPEED_HIGH;
-            else if (!strcmp(buf, "1.5"))
-                speed = USB_SPEED_LOW;
-            else
-                speed = USB_SPEED_FULL;
-            product_name[0] = '\0';
-            class_id = 0xff;
-            device_count++;
-            product_id = 0;
-            vendor_id = 0;
-        } else if (line[0] == 'P' && line[1] == ':') {
-            if (get_tag_value(buf, sizeof(buf), line, "Vendor=", " ") < 0)
-                goto fail;
-            vendor_id = strtoul(buf, NULL, 16);
-            if (get_tag_value(buf, sizeof(buf), line, "ProdID=", " ") < 0)
-                goto fail;
-            product_id = strtoul(buf, NULL, 16);
-        } else if (line[0] == 'S' && line[1] == ':') {
-            if (get_tag_value(buf, sizeof(buf), line, "Product=", "") < 0)
-                goto fail;
-            pstrcpy(product_name, sizeof(product_name), buf);
-        } else if (line[0] == 'D' && line[1] == ':') {
-            if (get_tag_value(buf, sizeof(buf), line, "Cls=", " (") < 0)
-                goto fail;
-            class_id = strtoul(buf, NULL, 16);
-        }
-    fail: ;
-    }
-    if (device_count && (vendor_id || product_id)) {
-        /* Add the last device.  */
-        ret = func(opaque, bus_num, addr, class_id, vendor_id,
-                   product_id, product_name, speed);
-    }
- the_end:
-    if (f)
-        fclose(f);
-    return ret;
-}
-
-/*
- * Read sys file-system device file
- *
- * @line address of buffer to put file contents in
- * @line_size size of line
- * @device_file path to device file (printf format string)
- * @device_name device being opened (inserted into device_file)
- *
- * @return 0 failed, 1 succeeded ('line' contains data)
- */
-static int usb_host_read_file(char *line, size_t line_size, const char *device_file, const char *device_name)
-{
-    Monitor *mon = cur_mon;
-    FILE *f;
-    int ret = 0;
-    char filename[PATH_MAX];
-
-    snprintf(filename, PATH_MAX, USBSYSBUS_PATH "/devices/%s/%s", device_name,
-             device_file);
-    f = fopen(filename, "r");
-    if (f) {
-        ret = (fgets(line, line_size, f) != NULL);
-        fclose(f);
-    } else {
-        monitor_printf(mon, "husb: could not open %s\n", filename);
-    }
-
-    return ret;
-}
-
-/*
- * Use /sys/bus/usb/devices/ directory to determine host's USB
- * devices.
- *
- * This code is based on Robert Schiele's original patches posted to
- * the Novell bug-tracker https://bugzilla.novell.com/show_bug.cgi?id=241950
- */
-static int usb_host_scan_sys(void *opaque, USBScanFunc *func)
-{
-    DIR *dir = 0;
-    char line[1024];
-    int bus_num, addr, speed, class_id, product_id, vendor_id;
-    int ret = 0;
-    char product_name[512];
-    struct dirent *de;
-
-    dir = opendir(USBSYSBUS_PATH "/devices");
-    if (!dir) {
-        perror("husb: cannot open devices directory");
-        goto the_end;
-    }
-
-    while ((de = readdir(dir))) {
-        if (de->d_name[0] != '.' && !strchr(de->d_name, ':')) {
-            char *tmpstr = de->d_name;
-            if (!strncmp(de->d_name, "usb", 3))
-                tmpstr += 3;
-            bus_num = atoi(tmpstr);
-
-            if (!usb_host_read_file(line, sizeof(line), "devnum", de->d_name))
-                goto the_end;
-            if (sscanf(line, "%d", &addr) != 1)
-                goto the_end;
-
-            if (!usb_host_read_file(line, sizeof(line), "bDeviceClass",
-                                    de->d_name))
-                goto the_end;
-            if (sscanf(line, "%x", &class_id) != 1)
-                goto the_end;
-
-            if (!usb_host_read_file(line, sizeof(line), "idVendor", de->d_name))
-                goto the_end;
-            if (sscanf(line, "%x", &vendor_id) != 1)
-                goto the_end;
-
-            if (!usb_host_read_file(line, sizeof(line), "idProduct",
-                                    de->d_name))
-                goto the_end;
-            if (sscanf(line, "%x", &product_id) != 1)
-                goto the_end;
-
-            if (!usb_host_read_file(line, sizeof(line), "product",
-                                    de->d_name)) {
-                *product_name = 0;
-            } else {
-                if (strlen(line) > 0)
-                    line[strlen(line) - 1] = '\0';
-                pstrcpy(product_name, sizeof(product_name), line);
-            }
-
-            if (!usb_host_read_file(line, sizeof(line), "speed", de->d_name))
-                goto the_end;
-            if (!strcmp(line, "480\n"))
-                speed = USB_SPEED_HIGH;
-            else if (!strcmp(line, "1.5\n"))
-                speed = USB_SPEED_LOW;
-            else
-                speed = USB_SPEED_FULL;
-
-            ret = func(opaque, bus_num, addr, class_id, vendor_id,
-                       product_id, product_name, speed);
-            if (ret)
-                goto the_end;
-        }
-    }
- the_end:
-    if (dir)
-        closedir(dir);
-    return ret;
-}
-
-/*
- * Determine how to access the host's USB devices and call the
- * specific support function.
- */
-static int usb_host_scan(void *opaque, USBScanFunc *func)
-{
-    Monitor *mon = cur_mon;
-    FILE *f = 0;
-    DIR *dir = 0;
-    int ret = 0;
-    const char *fs_type[] = {"unknown", "proc", "dev", "sys"};
-    char devpath[PATH_MAX];
-
-    /* only check the host once */
-    if (!usb_fs_type) {
-        f = fopen(USBPROCBUS_PATH "/devices", "r");
-        if (f) {
-            /* devices found in /proc/bus/usb/ */
-            strcpy(devpath, USBPROCBUS_PATH);
-            usb_fs_type = USB_FS_PROC;
-            fclose(f);
-            dprintf(USBDBG_DEVOPENED, USBPROCBUS_PATH);
-            goto found_devices;
-        }
-        /* try additional methods if an access method hasn't been found yet */
-        f = fopen(USBDEVBUS_PATH "/devices", "r");
-        if (f) {
-            /* devices found in /dev/bus/usb/ */
-            strcpy(devpath, USBDEVBUS_PATH);
-            usb_fs_type = USB_FS_DEV;
-            fclose(f);
-            dprintf(USBDBG_DEVOPENED, USBDEVBUS_PATH);
-            goto found_devices;
-        }
-        dir = opendir(USBSYSBUS_PATH "/devices");
-        if (dir) {
-            /* devices found in /dev/bus/usb/ (yes - not a mistake!) */
-            strcpy(devpath, USBDEVBUS_PATH);
-            usb_fs_type = USB_FS_SYS;
-            closedir(dir);
-            dprintf(USBDBG_DEVOPENED, USBSYSBUS_PATH);
-            goto found_devices;
-        }
-    found_devices:
-        if (!usb_fs_type) {
-            monitor_printf(mon, "husb: unable to access USB devices\n");
-            return -ENOENT;
-        }
-
-        /* the module setting (used later for opening devices) */
-        usb_host_device_path = g_malloc0(strlen(devpath)+1);
-        strcpy(usb_host_device_path, devpath);
-        monitor_printf(mon, "husb: using %s file-system with %s\n",
-                       fs_type[usb_fs_type], usb_host_device_path);
-    }
-
-    switch (usb_fs_type) {
-    case USB_FS_PROC:
-    case USB_FS_DEV:
-        ret = usb_host_scan_dev(opaque, func);
-        break;
-    case USB_FS_SYS:
-        ret = usb_host_scan_sys(opaque, func);
-        break;
-    default:
-        ret = -EINVAL;
-        break;
-    }
-    return ret;
-}
-
-struct USBAutoFilter {
-    struct USBAutoFilter *next;
-    int bus_num;
-    int addr;
-    int vendor_id;
-    int product_id;
-};
-
-static QEMUTimer *usb_auto_timer;
-static struct USBAutoFilter *usb_auto_filter;
-
-static int usb_host_auto_scan(void *opaque, int bus_num, int addr,
-                     int class_id, int vendor_id, int product_id,
-                     const char *product_name, int speed)
-{
-    struct USBAutoFilter *f;
-    struct USBDevice *dev;
-
-    /* Ignore hubs */
-    if (class_id == 9)
-        return 0;
-
-    for (f = usb_auto_filter; f; f = f->next) {
-	if (f->bus_num >= 0 && f->bus_num != bus_num)
-            continue;
-
-	if (f->addr >= 0 && f->addr != addr)
-            continue;
-
-	if (f->vendor_id >= 0 && f->vendor_id != vendor_id)
-            continue;
-
-	if (f->product_id >= 0 && f->product_id != product_id)
-            continue;
-
-        /* We got a match */
-
-        /* Allredy attached ? */
-        if (hostdev_find(bus_num, addr))
-            return 0;
-
-        dprintf("husb: auto open: bus_num %d addr %d\n", bus_num, addr);
-
-	dev = usb_host_device_open_addr(bus_num, addr, product_name);
-	if (dev)
-	    usb_device_add_dev(dev);
-    }
-
-    return 0;
-}
-
-static void usb_host_auto_timer(void *unused)
-{
-    usb_host_scan(NULL, usb_host_auto_scan);
-    timer_mod(usb_auto_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 2000);
-}
-
-/*
- * Autoconnect filter
- * Format:
- *    auto:bus:dev[:vid:pid]
- *    auto:bus.dev[:vid:pid]
- *
- *    bus  - bus number    (dec, * means any)
- *    dev  - device number (dec, * means any)
- *    vid  - vendor id     (hex, * means any)
- *    pid  - product id    (hex, * means any)
- *
- *    See 'lsusb' output.
- */
-static int parse_filter(const char *spec, struct USBAutoFilter *f)
-{
-    enum { BUS, DEV, VID, PID, DONE };
-    const char *p = spec;
-    int i;
-
-    f->bus_num    = -1;
-    f->addr       = -1;
-    f->vendor_id  = -1;
-    f->product_id = -1;
-
-    for (i = BUS; i < DONE; i++) {
-    	p = strpbrk(p, ":.");
-    	if (!p) break;
-        p++;
-
-    	if (*p == '*')
-            continue;
-
-        switch(i) {
-        case BUS: f->bus_num = strtol(p, NULL, 10);    break;
-        case DEV: f->addr    = strtol(p, NULL, 10);    break;
-        case VID: f->vendor_id  = strtol(p, NULL, 16); break;
-        case PID: f->product_id = strtol(p, NULL, 16); break;
-        }
-    }
-
-    if (i < DEV) {
-        fprintf(stderr, "husb: invalid auto filter spec %s\n", spec);
-        return -1;
-    }
-
-    return 0;
-}
-
-static int match_filter(const struct USBAutoFilter *f1,
-                        const struct USBAutoFilter *f2)
-{
-    return f1->bus_num    == f2->bus_num &&
-           f1->addr       == f2->addr &&
-           f1->vendor_id  == f2->vendor_id &&
-           f1->product_id == f2->product_id;
-}
-
-static int usb_host_auto_add(const char *spec)
-{
-    struct USBAutoFilter filter, *f;
-
-    if (parse_filter(spec, &filter) < 0)
-        return -1;
-
-    f = g_malloc0(sizeof(*f));
-
-    *f = filter;
-
-    if (!usb_auto_filter) {
-        /*
-         * First entry. Init and start the monitor.
-         * Right now we're using timer to check for new devices.
-         * If this turns out to be too expensive we can move that into a
-         * separate thread.
-         */
-	usb_auto_timer = timer_new(QEMU_CLOCK_REALTIME, SCALE_MS, usb_host_auto_timer, NULL);
-	if (!usb_auto_timer) {
-            fprintf(stderr, "husb: failed to allocate auto scan timer\n");
-            g_free(f);
-            return -1;
-        }
-
-        /* Check for new devices every two seconds */
-        timer_mod(usb_auto_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 2000);
-    }
-
-    dprintf("husb: added auto filter: bus_num %d addr %d vid %d pid %d\n",
-	f->bus_num, f->addr, f->vendor_id, f->product_id);
-
-    f->next = usb_auto_filter;
-    usb_auto_filter = f;
-
-    return 0;
-}
-
-static int usb_host_auto_del(const char *spec)
-{
-    struct USBAutoFilter *pf = usb_auto_filter;
-    struct USBAutoFilter **prev = &usb_auto_filter;
-    struct USBAutoFilter filter;
-
-    if (parse_filter(spec, &filter) < 0)
-        return -1;
-
-    while (pf) {
-        if (match_filter(pf, &filter)) {
-            dprintf("husb: removed auto filter: bus_num %d addr %d vid %d pid %d\n",
-	             pf->bus_num, pf->addr, pf->vendor_id, pf->product_id);
-
-            *prev = pf->next;
-
-	    if (!usb_auto_filter) {
-                /* No more filters. Stop scanning. */
-                timer_del(usb_auto_timer);
-                timer_free(usb_auto_timer);
-            }
-
-            return 0;
-        }
-
-        prev = &pf->next;
-        pf   = pf->next;
-    }
-
-    return -1;
-}
-
-typedef struct FindDeviceState {
-    int vendor_id;
-    int product_id;
-    int bus_num;
-    int addr;
-    char product_name[PRODUCT_NAME_SZ];
-} FindDeviceState;
-
-static int usb_host_find_device_scan(void *opaque, int bus_num, int addr,
-                                     int class_id,
-                                     int vendor_id, int product_id,
-                                     const char *product_name, int speed)
-{
-    FindDeviceState *s = opaque;
-    if ((vendor_id == s->vendor_id &&
-        product_id == s->product_id) ||
-        (bus_num == s->bus_num &&
-        addr == s->addr)) {
-        pstrcpy(s->product_name, PRODUCT_NAME_SZ, product_name);
-        s->bus_num = bus_num;
-        s->addr = addr;
-        return 1;
-    } else {
-        return 0;
-    }
-}
-
-/* the syntax is :
-   'bus.addr' (decimal numbers) or
-   'vendor_id:product_id' (hexa numbers) */
-static int usb_host_find_device(int *pbus_num, int *paddr,
-                                char *product_name, int product_name_size,
-                                const char *devname)
-{
-    const char *p;
-    int ret;
-    FindDeviceState fs;
-
-    p = strchr(devname, '.');
-    if (p) {
-        *pbus_num = strtoul(devname, NULL, 0);
-        *paddr = strtoul(p + 1, NULL, 0);
-        fs.bus_num = *pbus_num;
-        fs.addr = *paddr;
-        fs.vendor_id = -1;
-        fs.product_id = -1;
-        ret = usb_host_scan(&fs, usb_host_find_device_scan);
-        if (ret)
-            pstrcpy(product_name, product_name_size, fs.product_name);
-        return 0;
-    }
-
-    p = strchr(devname, ':');
-    if (p) {
-        fs.vendor_id = strtoul(devname, NULL, 16);
-        fs.product_id = strtoul(p + 1, NULL, 16);
-        fs.bus_num = -1;
-        fs.addr = -1;
-        ret = usb_host_scan(&fs, usb_host_find_device_scan);
-        if (ret) {
-            *pbus_num = fs.bus_num;
-            *paddr = fs.addr;
-            pstrcpy(product_name, product_name_size, fs.product_name);
-            return 0;
-        }
-    }
-    return -1;
-}
-
-/**********************/
-/* USB host device info */
-
-struct usb_class_info {
-    int class;
-    const char *class_name;
-};
-
-static const struct usb_class_info usb_class_info[] = {
-    { USB_CLASS_AUDIO, "Audio"},
-    { USB_CLASS_COMM, "Communication"},
-    { USB_CLASS_HID, "HID"},
-    { USB_CLASS_HUB, "Hub" },
-    { USB_CLASS_PHYSICAL, "Physical" },
-    { USB_CLASS_PRINTER, "Printer" },
-    { USB_CLASS_MASS_STORAGE, "Storage" },
-    { USB_CLASS_CDC_DATA, "Data" },
-    { USB_CLASS_APP_SPEC, "Application Specific" },
-    { USB_CLASS_VENDOR_SPEC, "Vendor Specific" },
-    { USB_CLASS_STILL_IMAGE, "Still Image" },
-    { USB_CLASS_CSCID, "Smart Card" },
-    { USB_CLASS_CONTENT_SEC, "Content Security" },
-    { -1, NULL }
-};
-
-static const char *usb_class_str(uint8_t class)
-{
-    const struct usb_class_info *p;
-    for(p = usb_class_info; p->class != -1; p++) {
-        if (p->class == class)
-            break;
-    }
-    return p->class_name;
-}
-
-static void usb_info_device(int bus_num, int addr, int class_id,
-                            int vendor_id, int product_id,
-                            const char *product_name,
-                            int speed)
-{
-    Monitor *mon = cur_mon;
-    const char *class_str, *speed_str;
-
-    switch(speed) {
-    case USB_SPEED_LOW:
-        speed_str = "1.5";
-        break;
-    case USB_SPEED_FULL:
-        speed_str = "12";
-        break;
-    case USB_SPEED_HIGH:
-        speed_str = "480";
-        break;
-    default:
-        speed_str = "?";
-        break;
-    }
-
-    monitor_printf(mon, "  Device %d.%d, speed %s Mb/s\n",
-                bus_num, addr, speed_str);
-    class_str = usb_class_str(class_id);
-    if (class_str)
-        monitor_printf(mon, "    %s:", class_str);
-    else
-        monitor_printf(mon, "    Class %02x:", class_id);
-    monitor_printf(mon, " USB device %04x:%04x", vendor_id, product_id);
-    if (product_name[0] != '\0')
-        monitor_printf(mon, ", %s", product_name);
-    monitor_printf(mon, "\n");
-}
-
-static int usb_host_info_device(void *opaque, int bus_num, int addr,
-                                int class_id,
-                                int vendor_id, int product_id,
-                                const char *product_name,
-                                int speed)
-{
-    usb_info_device(bus_num, addr, class_id, vendor_id, product_id,
-                    product_name, speed);
-    return 0;
-}
-
-static void dec2str(int val, char *str, size_t size)
-{
-    if (val == -1)
-        snprintf(str, size, "*");
-    else
-        snprintf(str, size, "%d", val);
-}
-
-static void hex2str(int val, char *str, size_t size)
-{
-    if (val == -1)
-        snprintf(str, size, "*");
-    else
-        snprintf(str, size, "%x", val);
-}
-
-void usb_host_info(Monitor *mon)
-{
-    struct USBAutoFilter *f;
-
-    usb_host_scan(NULL, usb_host_info_device);
-
-    if (usb_auto_filter)
-        monitor_printf(mon, "  Auto filters:\n");
-    for (f = usb_auto_filter; f; f = f->next) {
-        char bus[10], addr[10], vid[10], pid[10];
-        dec2str(f->bus_num, bus, sizeof(bus));
-        dec2str(f->addr, addr, sizeof(addr));
-        hex2str(f->vendor_id, vid, sizeof(vid));
-        hex2str(f->product_id, pid, sizeof(pid));
-        monitor_printf(mon, "    Device %s.%s ID %s:%s\n",
-                       bus, addr, vid, pid);
-    }
-}
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index a943fcb..48e3354 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -20,6 +20,9 @@
 #define CPU_ALL_H
 
 #include "qemu-common.h"
+#include "qemu/queue.h"
+#include "qemu/thread.h"
+#include "qemu/tls.h"
 #include "exec/cpu-common.h"
 
 /* some important defines:
@@ -321,6 +324,9 @@
 #define TARGET_PAGE_SIZE (1 << TARGET_PAGE_BITS)
 #define TARGET_PAGE_MASK ~(TARGET_PAGE_SIZE - 1)
 #define TARGET_PAGE_ALIGN(addr) (((addr) + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK)
+#ifdef TARGET_X86_64
+#define TARGET_PTE_MASK 0x7fffffffffffULL
+#endif
 
 /* ??? These should be the larger of uintptr_t and target_ulong.  */
 extern uintptr_t qemu_real_host_page_size;
@@ -355,20 +361,8 @@
 int page_check_range(target_ulong start, target_ulong len, int flags);
 #endif
 
-CPUArchState *cpu_copy(CPUArchState *env);
-CPUArchState *qemu_get_cpu(int cpu);
-
-#define CPU_DUMP_CODE 0x00010000
-
-void cpu_dump_state(CPUArchState *env, FILE *f, fprintf_function cpu_fprintf,
-                    int flags);
-void cpu_dump_statistics(CPUArchState *env, FILE *f, fprintf_function cpu_fprintf,
-                          int flags);
-
 void QEMU_NORETURN cpu_abort(CPUArchState *env, const char *fmt, ...)
     GCC_FMT_ATTR(2, 3);
-extern CPUArchState *first_cpu;
-extern CPUArchState *cpu_single_env;
 
 /* Flags for use in ENV->INTERRUPT_PENDING.
 
@@ -420,13 +414,6 @@
      | CPU_INTERRUPT_TGT_EXT_3   \
      | CPU_INTERRUPT_TGT_EXT_4)
 
-void cpu_interrupt(CPUOldState *s, int mask);
-void cpu_reset_interrupt(CPUOldState *env, int mask);
-
-void cpu_exit(CPUOldState *s);
-
-int qemu_cpu_has_work(CPUOldState *env);
-
 /* Breakpoint/watchpoint flags */
 #define BP_MEM_READ           0x01
 #define BP_MEM_WRITE          0x02
@@ -452,10 +439,7 @@
 #define SSTEP_NOIRQ   0x2  /* Do not use IRQ while single stepping */
 #define SSTEP_NOTIMER 0x4  /* Do not Timers while single stepping */
 
-void cpu_single_step(CPUOldState *env, int enabled);
-void cpu_reset(CPUOldState *s);
-int cpu_is_stopped(CPUOldState *env);
-void run_on_cpu(CPUOldState *env, void (*func)(void *data), void *data);
+void cpu_single_step(CPUState *cpu, int enabled);
 
 /* IO ports API */
 #include "exec/ioport.h"
@@ -463,7 +447,7 @@
 /* Return the physical page corresponding to a virtual one. Use it
    only for debugging because no protection checks are done. Return -1
    if no page found. */
-hwaddr cpu_get_phys_page_debug(CPUOldState *env, target_ulong addr);
+hwaddr cpu_get_phys_page_debug(CPUArchState *env, target_ulong addr);
 
 /* memory API */
 
@@ -479,15 +463,19 @@
     ram_addr_t length;
     uint32_t flags;
     char idstr[256];
-    QLIST_ENTRY(RAMBlock) next;
-#if defined(__linux__) && !defined(TARGET_S390X)
+    /* Reads can take either the iothread or the ramlist lock.
+     * Writes must take both locks.
+     */
+    QTAILQ_ENTRY(RAMBlock) next;
     int fd;
-#endif
 } RAMBlock;
 
 typedef struct RAMList {
+    QemuMutex mutex;
     uint8_t *phys_dirty;
-    QLIST_HEAD(ram, RAMBlock) blocks;
+    RAMBlock *mru_block;
+    QTAILQ_HEAD(ram, RAMBlock) blocks;
+    uint32_t version;
 } RAMList;
 extern RAMList ram_list;
 
@@ -555,7 +543,7 @@
 
 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
                                      int dirty_flags);
-void cpu_tlb_update_dirty(CPUOldState *env);
+void cpu_tlb_update_dirty(CPUArchState *env);
 
 int cpu_physical_memory_set_dirty_tracking(int enable);
 
@@ -591,10 +579,10 @@
 extern int64_t dev_time;
 #endif
 
-int cpu_memory_rw_debug(CPUOldState *env, target_ulong addr,
+int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
                         void *buf, int len, int is_write);
 
-void cpu_inject_x86_mce(CPUOldState *cenv, int bank, uint64_t status,
+void cpu_inject_x86_mce(CPUArchState *cenv, int bank, uint64_t status,
                         uint64_t mcg_status, uint64_t addr, uint64_t misc);
 
 #endif /* CPU_ALL_H */
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index b45cfd7..689d73c 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -118,18 +118,32 @@
 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
                                int is_write, hwaddr access_len);
 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
-void cpu_unregister_map_client(void *cookie);
 
 uint32_t ldub_phys(hwaddr addr);
+uint32_t lduw_le_phys(hwaddr addr);
+uint32_t lduw_be_phys(hwaddr addr);
+uint32_t ldl_le_phys(hwaddr addr);
+uint32_t ldl_be_phys(hwaddr addr);
+uint64_t ldq_le_phys(hwaddr addr);
+uint64_t ldq_be_phys(hwaddr addr);
+void stb_phys(hwaddr addr, uint32_t val);
+void stw_le_phys(hwaddr addr, uint32_t val);
+void stw_be_phys(hwaddr addr, uint32_t val);
+void stl_le_phys(hwaddr addr, uint32_t val);
+void stl_be_phys(hwaddr addr, uint32_t val);
+void stq_le_phys(hwaddr addr, uint64_t val);
+void stq_be_phys(hwaddr addr, uint64_t val);
+
+#ifdef NEED_CPU_H
 uint32_t lduw_phys(hwaddr addr);
 uint32_t ldl_phys(hwaddr addr);
 uint64_t ldq_phys(hwaddr addr);
 void stl_phys_notdirty(hwaddr addr, uint32_t val);
 void stq_phys_notdirty(hwaddr addr, uint64_t val);
-void stb_phys(hwaddr addr, uint32_t val);
 void stw_phys(hwaddr addr, uint32_t val);
 void stl_phys(hwaddr addr, uint32_t val);
 void stq_phys(hwaddr addr, uint64_t val);
+#endif
 
 void cpu_physical_memory_write_rom(hwaddr addr,
                                    const void *buf, int len);
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index 3b784cf..4f36369 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -27,6 +27,7 @@
 #include <setjmp.h>
 #include <inttypes.h>
 #include <signal.h>
+#include "qemu-common.h"
 #include "qemu/osdep.h"
 #include "qemu/queue.h"
 #include "exec/hwaddr.h"
@@ -94,20 +95,20 @@
     target_ulong addr_code;
     /* Addend to virtual address to get host address.  IO accesses
        use the corresponding iotlb value.  */
-    size_t addend;
+    uintptr_t addend;
     /* padding to get a power of two size */
     uint8_t dummy[(1 << CPU_TLB_ENTRY_BITS) -
                   (sizeof(target_ulong) * 3 +
-                   ((-sizeof(target_ulong) * 3) & (sizeof(size_t) - 1)) +
-                   sizeof(size_t))];
+                   ((-sizeof(target_ulong) * 3) & (sizeof(uintptr_t) - 1)) +
+                   sizeof(uintptr_t))];
 } CPUTLBEntry;
 
-extern int CPUTLBEntry_wrong_size[sizeof(CPUTLBEntry) == (1 << CPU_TLB_ENTRY_BITS) ? 1 : -1];
+QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
 
 #define CPU_COMMON_TLB \
     /* The meaning of the MMU modes is defined in the target code. */   \
     CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE];                  \
-    hwaddr iotlb[NB_MMU_MODES][CPU_TLB_SIZE];               \
+    hwaddr iotlb[NB_MMU_MODES][CPU_TLB_SIZE];                           \
     target_ulong tlb_flush_addr;                                        \
     target_ulong tlb_flush_mask;
 
@@ -154,17 +155,12 @@
     /* in order to avoid passing too many arguments to the MMIO         \
        helpers, we store some rarely used information in the CPU        \
        context) */                                                      \
-    unsigned long mem_io_pc; /* host pc at which the memory was         \
-                                accessed */                             \
+    uintptr_t  mem_io_pc; /* host pc at which the memory was            \
+                             accessed */                                \
     target_ulong mem_io_vaddr; /* target virtual addr at which the      \
                                      memory was accessed */             \
-    uint32_t halted; /* Nonzero if the CPU is in suspend state */       \
-    uint32_t interrupt_request;                                         \
-    volatile sig_atomic_t exit_request;                                 \
     CPU_COMMON_TLB                                                      \
     struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];           \
-    /* buffer for temporaries in the code generator */                  \
-    long temp_buf[CPU_TEMP_BUF_NLONGS];                                 \
                                                                         \
     int64_t icount_extra; /* Instructions until next timer event.  */   \
     /* Number of cycles left, with interrupt flag in high bit.          \
@@ -179,38 +175,16 @@
     /* from this point: preserved by CPU reset */                       \
     /* ice debug support */                                             \
     QTAILQ_HEAD(breakpoints_head, CPUBreakpoint) breakpoints;            \
-    int singlestep_enabled;                                             \
                                                                         \
     QTAILQ_HEAD(watchpoints_head, CPUWatchpoint) watchpoints;            \
     CPUWatchpoint *watchpoint_hit;                                      \
                                                                         \
-    struct GDBRegisterState *gdb_regs;                                  \
-                                                                        \
     /* Core interrupt code */                                           \
     jmp_buf jmp_env;                                                    \
     int exception_index;                                                \
                                                                         \
-    CPUOldState *next_cpu; /* next CPU sharing TB cache */                 \
-    int cpu_index; /* CPU index (informative) */                        \
-    uint32_t host_tid; /* host thread ID */                             \
-    int numa_node; /* NUMA node this cpu is belonging to  */            \
-    int nr_cores;  /* number of cores within this CPU package */        \
-    int nr_threads;/* number of threads within this CPU */              \
-    int running; /* Nonzero if cpu is currently running(usermode).  */  \
     /* user data */                                                     \
     void *opaque;                                                       \
                                                                         \
-    uint32_t created;                                                   \
-    uint32_t stop;   /* Stop request */                                 \
-    uint32_t stopped; /* Artificially stopped */                        \
-    struct QemuThread *thread;                                          \
-    struct QemuCond *halt_cond;                                         \
-    struct qemu_work_item *queued_work_first, *queued_work_last;        \
-    const char *cpu_model_str;                                          \
-    struct KVMState *kvm_state;                                         \
-    struct kvm_run *kvm_run;                                            \
-    int kvm_fd;                                                         \
-    int kvm_vcpu_dirty;                                                 \
-    struct hax_vcpu_state *hax_vcpu;
 
 #endif
diff --git a/include/exec/def-helper.h b/include/exec/def-helper.h
index 022a9ce..73d51f9 100644
--- a/include/exec/def-helper.h
+++ b/include/exec/def-helper.h
@@ -240,8 +240,7 @@
 #elif GEN_HELPER == 2
 /* Register helpers.  */
 
-#define DEF_HELPER_FLAGS_0(name, flags, ret) \
-tcg_register_helper(HELPER(name), #name);
+#define DEF_HELPER_FLAGS_0(name, flags, ret)  { HELPER(name), #name },
 
 #define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
 DEF_HELPER_FLAGS_0(name, flags, ret)
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index c1a1839..7befcdd 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -46,9 +46,21 @@
 typedef struct TranslationBlock TranslationBlock;
 
 /* XXX: make safe guess about sizes */
-#define MAX_OP_PER_INSTR 96
-/* A Call op needs up to 6 + 2N parameters (N = number of arguments).  */
-#define MAX_OPC_PARAM 10
+#define MAX_OP_PER_INSTR 208
+
+#if HOST_LONG_BITS == 32
+#define MAX_OPC_PARAM_PER_ARG 2
+#else
+#define MAX_OPC_PARAM_PER_ARG 1
+#endif
+#define MAX_OPC_PARAM_IARGS 5
+#define MAX_OPC_PARAM_OARGS 1
+#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
+
+/* A Call op needs up to 4 + 2N parameters on 32-bit archs,
+ * and up to 4 + N parameters on 64-bit archs
+ * (N = number of input arguments + output arguments).  */
+#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS))
 #define OPC_BUF_SIZE 2048
 #define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR)
 
@@ -60,14 +72,6 @@
 
 #define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * MAX_OPC_PARAM)
 
-extern target_ulong gen_opc_pc[OPC_BUF_SIZE];
-extern target_ulong gen_opc_npc[OPC_BUF_SIZE];
-extern uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
-extern uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
-extern uint16_t gen_opc_icount[OPC_BUF_SIZE];
-extern target_ulong gen_opc_jump_pc[2];
-extern uint32_t gen_opc_hflags[OPC_BUF_SIZE];
-
 #include "qemu/log.h"
 
 void gen_intermediate_code(CPUArchState *env, struct TranslationBlock *tb);
@@ -77,29 +81,41 @@
 
 unsigned long code_gen_max_block_size(void);
 void cpu_gen_init(void);
+void tcg_exec_init(unsigned long tb_size);
 int cpu_gen_code(CPUArchState *env, struct TranslationBlock *tb,
                  int *gen_code_size_ptr);
-bool cpu_restore_state(struct TranslationBlock *tb,
-                       CPUArchState *env, uintptr_t searched_pc);
+bool cpu_restore_state(CPUArchState *env, uintptr_t searched_pc);
+
 void QEMU_NORETURN cpu_resume_from_signal(CPUArchState *env1, void *puc);
 void QEMU_NORETURN cpu_io_recompile(CPUArchState *env, uintptr_t retaddr);
-TranslationBlock *tb_gen_code(CPUArchState *env,
+TranslationBlock *tb_gen_code(CPUArchState *env, 
                               target_ulong pc, target_ulong cs_base, int flags,
                               int cflags);
 void cpu_exec_init(CPUArchState *env);
 void QEMU_NORETURN cpu_loop_exit(CPUArchState *env1);
 int page_unprotect(target_ulong address, uintptr_t pc, void *puc);
-void tb_invalidate_phys_page_range(hwaddr start, hwaddr end,
+void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
                                    int is_cpu_write_access);
-void tb_invalidate_page_range(target_ulong start, target_ulong end);
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
+                              int is_cpu_write_access);
+#if !defined(CONFIG_USER_ONLY)
+/* cputlb.c */
 void tlb_flush_page(CPUArchState *env, target_ulong addr);
 void tlb_flush(CPUArchState *env, int flush_global);
-int tlb_set_page_exec(CPUArchState *env, target_ulong vaddr,
-                      hwaddr paddr, int prot,
-                      int mmu_idx, int is_softmmu);
-int tlb_set_page(CPUArchState *env1, target_ulong vaddr,
-                 hwaddr paddr, int prot,
-                 int mmu_idx, int is_softmmu);
+void tlb_set_page(CPUArchState *env, target_ulong vaddr,
+                  hwaddr paddr, int prot,
+                  int mmu_idx, target_ulong size);
+void tb_reset_jump_recursive(TranslationBlock *tb);
+void tb_invalidate_phys_addr(hwaddr addr);
+#else
+static inline void tlb_flush_page(CPUArchState *env, target_ulong addr)
+{
+}
+
+static inline void tlb_flush(CPUArchState *env, int flush_global)
+{
+}
+#endif
 
 typedef struct PhysPageDesc {
     /* offset in host memory of the page + io_index in the low bits */
@@ -108,6 +124,7 @@
 } PhysPageDesc;
 
 PhysPageDesc *phys_page_find(hwaddr index);
+PhysPageDesc *phys_page_find_alloc(hwaddr index, int alloc);
 
 int io_mem_watch;
 
@@ -116,8 +133,6 @@
 #define CODE_GEN_PHYS_HASH_BITS     15
 #define CODE_GEN_PHYS_HASH_SIZE     (1 << CODE_GEN_PHYS_HASH_BITS)
 
-#define MIN_CODE_GEN_BUFFER_SIZE     (1024 * 1024)
-
 /* estimated block size for TB allocation */
 /* XXX: use a per code average code fragment size and modulate it
    according to the host CPU */
@@ -168,7 +183,7 @@
     struct TranslationBlock *jmp_first;
     uint32_t icount;
 
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
     /* Maps PCs in this translation block to corresponding PCs in guest address
      * space. The array is arranged in such way, that every even entry contains
      * PC in the translation block, followed by an odd entry that contains
@@ -178,7 +193,26 @@
     uintptr_t*   tpc2gpc;
     /* Number of pairs (pc_tb, pc_guest) in tpc2gpc array. */
     unsigned int    tpc2gpc_pairs;
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
+};
+
+#include "exec/spinlock.h"
+
+typedef struct TBContext TBContext;
+
+struct TBContext {
+
+    TranslationBlock *tbs;
+    TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+    int nb_tbs;
+    /* any access to the tbs or the page table must use this lock */
+    spinlock_t tb_lock;
+
+    /* statistics */
+    int tb_flush_count;
+    int tb_phys_invalidate_count;
+
+    int tb_invalidated_flag;
 };
 
 static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
@@ -201,7 +235,7 @@
     return (pc >> 2) & (CODE_GEN_PHYS_HASH_SIZE - 1);
 }
 
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
 /* Gets translated PC for a given (translated PC, guest PC) pair.
  * Return:
  *  Translated PC, or NULL if pair index was too large.
@@ -253,16 +287,15 @@
     }
     return 0;
 }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
 
-TranslationBlock *tb_alloc(target_ulong pc);
 void tb_free(TranslationBlock *tb);
 void tb_flush(CPUArchState *env);
 void tb_link_phys(TranslationBlock *tb,
                   target_ulong phys_pc, target_ulong phys_page2);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
-
-extern TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+void tb_invalidate_phys_page_fast0(hwaddr start, int len);
+    
 extern uint8_t *code_gen_ptr;
 extern int code_gen_max_blocks;
 
@@ -353,28 +386,54 @@
     }
 }
 
-TranslationBlock *tb_find_pc(unsigned long pc_ptr);
+/* GETRA is the true target of the return instruction that we'll execute,
+   defined here for simplicity of defining the follow-up macros.  */
+#if defined(CONFIG_TCG_INTERPRETER)
+extern uintptr_t tci_tb_ptr;
+# define GETRA() tci_tb_ptr
+#else
+# define GETRA() \
+    ((uintptr_t)__builtin_extract_return_addr(__builtin_return_address(0)))
+#endif
 
-extern CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
-extern CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
-extern void *io_mem_opaque[IO_MEM_NB_ENTRIES];
+/* The true return address will often point to a host insn that is part of
+   the next translated guest insn.  Adjust the address backward to point to
+   the middle of the call insn.  Subtracting one would do the job except for
+   several compressed mode architectures (arm, mips) which set the low bit
+   to indicate the compressed mode; subtracting two works around that.  It
+   is also the case that there are no host isas that contain a call insn
+   smaller than 4 bytes, so we don't worry about special-casing this.  */
+#if defined(CONFIG_TCG_INTERPRETER)
+# define GETPC_ADJ   0
+#else
+# define GETPC_ADJ   2
+#endif
 
-#include "exec/spinlock.h"
-
-extern spinlock_t tb_lock;
-
-extern int tb_invalidated_flag;
+#define GETPC()  (GETRA() - GETPC_ADJ)
 
 #if !defined(CONFIG_USER_ONLY)
 
-void tlb_fill(target_ulong addr, int is_write, int mmu_idx,
-              void *retaddr);
+void phys_mem_set_alloc(void *(*alloc)(size_t));
 
-#include "exec/softmmu_defs.h"
+TranslationBlock *tb_find_pc(uintptr_t pc_ptr);
+
+uint64_t io_mem_read(int index, hwaddr addr, unsigned size);
+void io_mem_write(int index, hwaddr addr, uint64_t value, unsigned size);
+
+extern CPUWriteMemoryFunc *_io_mem_write[IO_MEM_NB_ENTRIES][4];
+extern CPUReadMemoryFunc *_io_mem_read[IO_MEM_NB_ENTRIES][4];
+extern void *io_mem_opaque[IO_MEM_NB_ENTRIES];
+
+void tlb_fill(CPUArchState *env1, target_ulong addr, int is_write, int mmu_idx,
+              uintptr_t retaddr);
+
+uint8_t helper_ldb_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+uint16_t helper_ldw_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+uint32_t helper_ldl_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
 
 #define ACCESS_TYPE (NB_MMU_MODES + 1)
 #define MEMSUFFIX _code
-#define env cpu_single_env
 
 #define DATA_SIZE 1
 #include "exec/softmmu_header.h"
@@ -390,51 +449,29 @@
 
 #undef ACCESS_TYPE
 #undef MEMSUFFIX
-#undef env
 
 #endif
 
 #if defined(CONFIG_USER_ONLY)
-static inline target_ulong get_phys_addr_code(CPUArchState *env1, target_ulong addr)
+static inline tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
 {
     return addr;
 }
 #else
-/* NOTE: this function can trigger an exception */
-/* NOTE2: the returned address is not exactly the physical address: it
-   is the offset relative to phys_ram_base */
-static inline target_ulong get_phys_addr_code(CPUArchState *env1, target_ulong addr)
-{
-    int mmu_idx, page_index, pd;
-    void *p;
-
-    page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    mmu_idx = cpu_mmu_index(env1);
-    if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
-                 (addr & TARGET_PAGE_MASK))) {
-        ldub_code(addr);
-    }
-    pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
-    if (pd > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
-#if defined(TARGET_SPARC) || defined(TARGET_MIPS)
-        do_unassigned_access(addr, 0, 1, 0, 4);
-#else
-        cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
-#endif
-    }
-    p = (void *)(unsigned long)addr
-        + env1->tlb_table[mmu_idx][page_index].addend;
-    return qemu_ram_addr_from_host_nofail(p);
-}
+/* cputlb.c */
+tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr);
 #endif
 
 typedef void (CPUDebugExcpHandler)(CPUArchState *env);
 
-CPUDebugExcpHandler *cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler);
+void cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler);
 
 /* vl.c */
 extern int singlestep;
 
+/* cpu-exec.c */
+extern volatile sig_atomic_t exit_request;
+
 /* Deterministic execution requires that IO only be performed on the last
    instruction of a TB so that interrupts take effect immediately.  */
 static inline int can_do_io(CPUArchState *env)
@@ -443,7 +480,7 @@
         return 1;
     }
     /* If not executing code then assume we are ok.  */
-    if (!env->current_tb) {
+    if (env->current_tb == NULL) {
         return 1;
     }
     return env->can_do_io != 0;
diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index 79c637d..4d122d0 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -13,22 +13,22 @@
 #ifdef NEED_CPU_H
 #include "cpu.h"
 
-typedef void (*gdb_syscall_complete_cb)(CPUOldState *env,
+typedef void (*gdb_syscall_complete_cb)(CPUState *env,
                                         target_ulong ret, target_ulong err);
 
 void gdb_do_syscall(gdb_syscall_complete_cb cb, const char *fmt, ...);
 int use_gdb_syscalls(void);
-void gdb_set_stop_cpu(CPUOldState *env);
-void gdb_exit(CPUOldState *, int);
+void gdb_set_stop_cpu(CPUState *env);
+void gdb_exit(CPUArchState *, int);
 #ifdef CONFIG_USER_ONLY
 int gdb_queuesig (void);
-int gdb_handlesig (CPUOldState *, int);
+int gdb_handlesig (CPUState *, int);
 void gdb_signalled(CPUArchState *, int);
 void gdbserver_fork(CPUArchState *);
 #endif
 /* Get or set a register.  Returns the size of the register.  */
 typedef int (*gdb_reg_cb)(CPUArchState *env, uint8_t *buf, int reg);
-void gdb_register_coprocessor(CPUOldState *env,
+void gdb_register_coprocessor(CPUState *cpu,
                               gdb_reg_cb get_reg, gdb_reg_cb set_reg,
                               int num_regs, const char *xml, int g_pos);
 
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index fd62e69..97f01d1 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -19,7 +19,7 @@
     count = tcg_temp_local_new_i32();
     tcg_gen_ld_i32(count, cpu_env, offsetof(CPUArchState, icount_decr.u32));
     /* This is a horrid hack to allow fixing up the value later.  */
-    icount_arg = gen_opparam_ptr + 1;
+    icount_arg = tcg_ctx.gen_opparam_ptr + 1;
     tcg_gen_subi_i32(count, count, 0xdeadbeef);
 
     tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, icount_label);
diff --git a/include/exec/hax.h b/include/exec/hax.h
index c8cd9a2..8db22ee 100644
--- a/include/exec/hax.h
+++ b/include/exec/hax.h
@@ -13,21 +13,21 @@
 int hax_enabled(void);
 int hax_set_ramsize(uint64_t ramsize);
 int hax_init(int smp_cpus);
-int hax_init_vcpu(CPUOldState *env);
+int hax_init_vcpu(CPUState *cpu);
 /* Execute vcpu in non-root mode */
-int hax_vcpu_exec(CPUOldState *env);
+int hax_vcpu_exec(CPUState *cpu);
 /* Sync vcpu state with HAX driver */
 int hax_sync_vcpus(void);
-void hax_vcpu_sync_state(CPUOldState *env, int modified);
+void hax_vcpu_sync_state(CPUState *cpu, int modified);
 int hax_populate_ram(uint64_t va, uint32_t size);
 int hax_set_phys_mem(hwaddr start_addr,
                      ram_addr_t size, ram_addr_t phys_offset);
 /* Check if QEMU need emulate guest execution */
-int hax_vcpu_emulation_mode(CPUOldState *env);
-int hax_stop_emulation(CPUOldState *env);
-int hax_stop_translate(CPUOldState *env);
-int hax_arch_get_registers(CPUOldState *env);
-void hax_raise_event(CPUOldState *env);
+int hax_vcpu_emulation_mode(CPUState *cpu);
+int hax_stop_emulation(CPUState *cpu);
+int hax_stop_translate(CPUState *cpu);
+int hax_arch_get_registers(CPUState *cpu);
+void hax_raise_event(CPUState *cpu);
 void hax_reset_vcpu_state(void *opaque);
 
 #include "target-i386/hax-interface.h"
diff --git a/include/exec/poison.h b/include/exec/poison.h
index b58caac..85ed1f4 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -14,6 +14,7 @@
 #pragma GCC poison TARGET_M68K
 #pragma GCC poison TARGET_MIPS
 #pragma GCC poison TARGET_MIPS64
+#pragma GCC poison TARGET_OPENRISC
 #pragma GCC poison TARGET_PPC
 #pragma GCC poison TARGET_PPCEMB
 #pragma GCC poison TARGET_PPC64
@@ -35,8 +36,18 @@
 #pragma GCC poison TARGET_PAGE_ALIGN
 
 #pragma GCC poison CPUOldState
+#pragma GCC poison CPUArchState
 #pragma GCC poison env
 
+#pragma GCC poison lduw_phys
+#pragma GCC poison ldl_phys
+#pragma GCC poison ldq_phys
+#pragma GCC poison stl_phys_notdirty
+#pragma GCC poison stq_phys_notdirty
+#pragma GCC poison stw_phys
+#pragma GCC poison stl_phys
+#pragma GCC poison stq_phys
+
 #pragma GCC poison CPU_INTERRUPT_HARD
 #pragma GCC poison CPU_INTERRUPT_EXITTB
 #pragma GCC poison CPU_INTERRUPT_TIMER
diff --git a/include/exec/softmmu-semi.h b/include/exec/softmmu-semi.h
index 7d484ca..8401f7d 100644
--- a/include/exec/softmmu-semi.h
+++ b/include/exec/softmmu-semi.h
@@ -13,14 +13,14 @@
 {
     uint32_t val;
 
-    cpu_memory_rw_debug(env, addr, (uint8_t *)&val, 4, 0);
+    cpu_memory_rw_debug(ENV_GET_CPU(env), addr, (uint8_t *)&val, 4, 0);
     return tswap32(val);
 }
 static inline uint32_t softmmu_tget8(CPUArchState *env, uint32_t addr)
 {
     uint8_t val;
 
-    cpu_memory_rw_debug(env, addr, &val, 1, 0);
+    cpu_memory_rw_debug(ENV_GET_CPU(env), addr, &val, 1, 0);
     return val;
 }
 
@@ -31,7 +31,7 @@
 static inline void softmmu_tput32(CPUArchState *env, uint32_t addr, uint32_t val)
 {
     val = tswap32(val);
-    cpu_memory_rw_debug(env, addr, (uint8_t *)&val, 4, 1);
+    cpu_memory_rw_debug(ENV_GET_CPU(env), addr, (uint8_t *)&val, 4, 1);
 }
 #define put_user_u32(arg, p) ({ softmmu_tput32(env, p, arg) ; 0; })
 #define put_user_ual(arg, p) put_user_u32(arg, p)
@@ -43,7 +43,7 @@
     /* TODO: Make this something that isn't fixed size.  */
     p = malloc(len);
     if (p && copy) {
-        cpu_memory_rw_debug(env, addr, p, len, 0);
+        cpu_memory_rw_debug(ENV_GET_CPU(env), addr, p, len, 0);
     }
     return p;
 }
@@ -59,7 +59,7 @@
         return NULL;
     }
     do {
-        cpu_memory_rw_debug(env, addr, &c, 1, 0);
+        cpu_memory_rw_debug(ENV_GET_CPU(env), addr, &c, 1, 0);
         addr++;
         *(p++) = c;
     } while (c);
@@ -70,7 +70,7 @@
                                 target_ulong len)
 {
     if (len) {
-        cpu_memory_rw_debug(env, addr, p, len, 1);
+        cpu_memory_rw_debug(ENV_GET_CPU(env), addr, p, len, 1);
     }
     free(p);
 }
diff --git a/include/exec/softmmu_defs.h b/include/exec/softmmu_defs.h
deleted file mode 100644
index e38bb75..0000000
--- a/include/exec/softmmu_defs.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef SOFTMMU_DEFS_H
-#define SOFTMMU_DEFS_H
-
-uint8_t REGPARM __ldb_mmu(target_ulong addr, int mmu_idx);
-void REGPARM __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
-uint16_t REGPARM __ldw_mmu(target_ulong addr, int mmu_idx);
-void REGPARM __stw_mmu(target_ulong addr, uint16_t val, int mmu_idx);
-uint32_t REGPARM __ldl_mmu(target_ulong addr, int mmu_idx);
-void REGPARM __stl_mmu(target_ulong addr, uint32_t val, int mmu_idx);
-uint64_t REGPARM __ldq_mmu(target_ulong addr, int mmu_idx);
-void REGPARM __stq_mmu(target_ulong addr, uint64_t val, int mmu_idx);
-
-uint8_t REGPARM __ldb_cmmu(target_ulong addr, int mmu_idx);
-void REGPARM __stb_cmmu(target_ulong addr, uint8_t val, int mmu_idx);
-uint16_t REGPARM __ldw_cmmu(target_ulong addr, int mmu_idx);
-void REGPARM __stw_cmmu(target_ulong addr, uint16_t val, int mmu_idx);
-uint32_t REGPARM __ldl_cmmu(target_ulong addr, int mmu_idx);
-void REGPARM __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
-uint64_t REGPARM __ldq_cmmu(target_ulong addr, int mmu_idx);
-void REGPARM __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
-
-#endif
diff --git a/include/exec/softmmu_exec.h b/include/exec/softmmu_exec.h
index 861b107..6fde154 100644
--- a/include/exec/softmmu_exec.h
+++ b/include/exec/softmmu_exec.h
@@ -1,4 +1,14 @@
-/* Common softmmu definitions and inline routines.  */
+/*
+ *  Software MMU support
+ *
+ * Generate inline load/store functions for all MMU modes (typically
+ * at least _user and _kernel) as well as _data versions, for all data
+ * sizes.
+ *
+ * Used by target op helpers.
+ *
+ * MMU mode suffixes are defined in target cpu.h.
+ */
 
 /* XXX: find something cleaner.
  * Furthermore, this is false for 64 bits targets
@@ -9,7 +19,8 @@
 #define ldul_executive  ldl_executive
 #define ldul_supervisor ldl_supervisor
 
-#include "exec/softmmu_defs.h"
+/* The memory helpers for tcg-generated code need tcg_target_long etc.  */
+#include "tcg.h"
 
 #define ACCESS_TYPE 0
 #define MEMSUFFIX MMU_MODE0_SUFFIX
diff --git a/include/exec/softmmu_header.h b/include/exec/softmmu_header.h
index 82916bd..d8d9c81 100644
--- a/include/exec/softmmu_header.h
+++ b/include/exec/softmmu_header.h
@@ -49,16 +49,8 @@
 
 #if ACCESS_TYPE < (NB_MMU_MODES)
 
-#if defined(OUTSIDE_JIT)
-/* Dispatch calls to __ldx_outside_jit / __stx_outside_jit, which don't
- * expect CPU environment. to be cached in ebp register, but rather uses
- * cpu_single_env variable for that purpose.
- */
-#define MMUSUFFIX _outside_jit
-#else   // OUTSIDE_JIT
-#define MMUSUFFIX _mmu
-#endif  // OUTSIDE_JIT
 #define CPU_MMU_INDEX ACCESS_TYPE
+#define MMUSUFFIX _mmu
 
 #elif ACCESS_TYPE == (NB_MMU_MODES)
 
@@ -88,12 +80,12 @@
 
 /* generic load/store macros */
 
-static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr)
+static inline RES_TYPE
+glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr)
 {
     int page_index;
     RES_TYPE res;
     target_ulong addr;
-    unsigned long physaddr;
     int mmu_idx;
 
     addr = ptr;
@@ -101,20 +93,20 @@
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx);
+        res = glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(env, addr, mmu_idx);
     } else {
-        physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
-        res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)physaddr);
+        uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
+        res = glue(glue(ld, USUFFIX), _raw)(hostaddr);
     }
     return res;
 }
 
 #if DATA_SIZE <= 2
-static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
+static inline int
+glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr)
 {
     int res, page_index;
     target_ulong addr;
-    unsigned long physaddr;
     int mmu_idx;
 
     addr = ptr;
@@ -122,10 +114,11 @@
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = (DATA_STYPE)glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx);
+        res = (DATA_STYPE)glue(glue(helper_ld, SUFFIX),
+                               MMUSUFFIX)(env, addr, mmu_idx);
     } else {
-        physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
-        res = glue(glue(lds, SUFFIX), _raw)((uint8_t *)physaddr);
+        uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
+        res = glue(glue(lds, SUFFIX), _raw)(hostaddr);
     }
     return res;
 }
@@ -135,11 +128,12 @@
 
 /* generic store macro */
 
-static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+static inline void
+glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr,
+                                      RES_TYPE v)
 {
     int page_index;
     target_ulong addr;
-    unsigned long physaddr;
     int mmu_idx;
 
     addr = ptr;
@@ -147,10 +141,10 @@
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        glue(glue(__st, SUFFIX), MMUSUFFIX)(addr, v, mmu_idx);
+        glue(glue(helper_st, SUFFIX), MMUSUFFIX)(env, addr, v, mmu_idx);
     } else {
-        physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
-        glue(glue(st, SUFFIX), _raw)((uint8_t *)physaddr, v);
+        uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
+        glue(glue(st, SUFFIX), _raw)(hostaddr, v);
     }
 }
 
@@ -159,46 +153,50 @@
 #if ACCESS_TYPE != (NB_MMU_MODES + 1)
 
 #if DATA_SIZE == 8
-static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
+static inline float64 glue(cpu_ldfq, MEMSUFFIX)(CPUArchState *env,
+                                                target_ulong ptr)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
-    u.i = glue(ldq, MEMSUFFIX)(ptr);
+    u.i = glue(cpu_ldq, MEMSUFFIX)(env, ptr);
     return u.d;
 }
 
-static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
+static inline void glue(cpu_stfq, MEMSUFFIX)(CPUArchState *env,
+                                             target_ulong ptr, float64 v)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
     u.d = v;
-    glue(stq, MEMSUFFIX)(ptr, u.i);
+    glue(cpu_stq, MEMSUFFIX)(env, ptr, u.i);
 }
 #endif /* DATA_SIZE == 8 */
 
 #if DATA_SIZE == 4
-static inline float32 glue(ldfl, MEMSUFFIX)(target_ulong ptr)
+static inline float32 glue(cpu_ldfl, MEMSUFFIX)(CPUArchState *env,
+                                                target_ulong ptr)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(ldl, MEMSUFFIX)(ptr);
+    u.i = glue(cpu_ldl, MEMSUFFIX)(env, ptr);
     return u.f;
 }
 
-static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
+static inline void glue(cpu_stfl, MEMSUFFIX)(CPUArchState *env,
+                                             target_ulong ptr, float32 v)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
     u.f = v;
-    glue(stl, MEMSUFFIX)(ptr, u.i);
+    glue(cpu_stl, MEMSUFFIX)(env, ptr, u.i);
 }
 #endif /* DATA_SIZE == 4 */
 
diff --git a/include/exec/softmmu_outside_jit.h b/include/exec/softmmu_outside_jit.h
deleted file mode 100644
index 716f79b..0000000
--- a/include/exec/softmmu_outside_jit.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* Copyright (C) 2007-2009 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-/*
- * Contains SOFTMMU macros expansion for ldx_user and stx_user routines used
- * outside of JIT. The issue is that regular implementation of these routines
- * assumes that pointer to CPU environment is stored in ebp register, which
- * is true for calls made inside JIT, but is not necessarily true for calls
- * made outside of JIT. The way SOFTMMU macros are expanded in this header
- * enforces ldx/stx routines to use CPU environment stored in cpu_single_env
- * variable.
- */
-#ifndef QEMU_SOFTMMU_OUTSIDE_JIT_H
-#define QEMU_SOFTMMU_OUTSIDE_JIT_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-// Declares routines implemented in softmmu_outside_jit.c, that are used in
-// this macros expansion. Note that MMUSUFFIX _outside_jit is enforced in
-// softmmu_header.h by defining OUTSIDE_JIT macro.
-////////////////////////////////////////////////////////////////////////////////
-
-uint8_t REGPARM __ldb_outside_jit(target_ulong addr, int mmu_idx);
-void REGPARM __stb_outside_jit(target_ulong addr, uint8_t val, int mmu_idx);
-uint16_t REGPARM __ldw_outside_jit(target_ulong addr, int mmu_idx);
-void REGPARM __stw_outside_jit(target_ulong addr, uint16_t val, int mmu_idx);
-uint32_t REGPARM __ldl_outside_jit(target_ulong addr, int mmu_idx);
-void REGPARM __stl_outside_jit(target_ulong addr, uint32_t val, int mmu_idx);
-uint64_t REGPARM __ldq_outside_jit(target_ulong addr, int mmu_idx);
-void REGPARM __stq_outside_jit(target_ulong addr, uint64_t val, int mmu_idx);
-
-// Enforces MMUSUFFIX to be set to _outside_jit in softmmu_header.h
-#define OUTSIDE_JIT
-// Enforces use of cpu_single_env for CPU environment.
-#define env cpu_single_env
-
-// =============================================================================
-// Generate ld/stx_user
-// =============================================================================
-#if defined(TARGET_MIPS)
-#define MEMSUFFIX MMU_MODE2_SUFFIX
-#else
-#define MEMSUFFIX MMU_MODE1_SUFFIX
-#endif
-#define ACCESS_TYPE 1
-
-#define DATA_SIZE 1
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 2
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 4
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 8
-#include "exec/softmmu_header.h"
-
-#undef MEMSUFFIX
-#undef ACCESS_TYPE
-
-// =============================================================================
-// Generate ld/stx_kernel
-// =============================================================================
-#define MEMSUFFIX MMU_MODE0_SUFFIX
-#define ACCESS_TYPE 0
-
-#define DATA_SIZE 1
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 2
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 4
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 8
-#include "exec/softmmu_header.h"
-
-#undef MEMSUFFIX
-#undef ACCESS_TYPE
-
-#undef env
-#undef OUTSIDE_JIT
-
-#ifdef __cplusplus
-};  /* end of extern "C" */
-#endif
-
-#endif  // QEMU_SOFTMMU_OUTSIDE_JIT_H
diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h
index 66f2844..21d9701 100644
--- a/include/exec/softmmu_template.h
+++ b/include/exec/softmmu_template.h
@@ -27,24 +27,40 @@
 
 #if DATA_SIZE == 8
 #define SUFFIX q
-#define USUFFIX q
-#define DATA_TYPE uint64_t
+#define LSUFFIX q
+#define SDATA_TYPE  int64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
-#define DATA_TYPE uint32_t
+#define LSUFFIX l
+#define SDATA_TYPE  int32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
-#define USUFFIX uw
-#define DATA_TYPE uint16_t
+#define LSUFFIX uw
+#define SDATA_TYPE  int16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
-#define USUFFIX ub
-#define DATA_TYPE uint8_t
+#define LSUFFIX ub
+#define SDATA_TYPE  int8_t
 #else
 #error unsupported data size
 #endif
 
+#define DATA_TYPE   glue(u, SDATA_TYPE)
+
+/* For the benefit of TCG generated code, we want to avoid the complication
+   of ABI-specific return type promotion and always return a value extended
+   to the register size of the host.  This is tcg_target_long, except in the
+   case of a 32-bit host and 64-bit data, and for that we always have
+   uint64_t.  Don't bother with this widened value for SOFTMMU_CODE_ACCESS.  */
+#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8
+# define WORD_TYPE  DATA_TYPE
+# define USUFFIX    SUFFIX
+#else
+# define WORD_TYPE  tcg_target_ulong
+# define USUFFIX    glue(u, SUFFIX)
+# define SSUFFIX    glue(s, SUFFIX)
+#endif
+
 #ifdef SOFTMMU_CODE_ACCESS
 #define READ_ACCESS_TYPE 2
 #define ADDR_READ addr_code
@@ -53,7 +69,50 @@
 #define ADDR_READ addr_read
 #endif
 
-#if defined(CONFIG_MEMCHECK) && !defined(OUTSIDE_JIT) && !defined(SOFTMMU_CODE_ACCESS)
+#if DATA_SIZE == 8
+# define BSWAP(X)  bswap64(X)
+#elif DATA_SIZE == 4
+# define BSWAP(X)  bswap32(X)
+#elif DATA_SIZE == 2
+# define BSWAP(X)  bswap16(X)
+#else
+# define BSWAP(X)  (X)
+#endif
+
+#ifdef TARGET_WORDS_BIGENDIAN
+# define TGT_BE(X)  (X)
+# define TGT_LE(X)  BSWAP(X)
+#else
+# define TGT_BE(X)  BSWAP(X)
+# define TGT_LE(X)  (X)
+#endif
+
+#if DATA_SIZE == 1
+# define helper_le_ld_name  glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
+# define helper_be_ld_name  helper_le_ld_name
+# define helper_le_lds_name glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX)
+# define helper_be_lds_name helper_le_lds_name
+# define helper_le_st_name  glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
+# define helper_be_st_name  helper_le_st_name
+#else
+# define helper_le_ld_name  glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
+# define helper_be_ld_name  glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
+# define helper_le_lds_name glue(glue(helper_le_ld, SSUFFIX), MMUSUFFIX)
+# define helper_be_lds_name glue(glue(helper_be_ld, SSUFFIX), MMUSUFFIX)
+# define helper_le_st_name  glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
+# define helper_be_st_name  glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
+#endif
+
+#ifdef TARGET_WORDS_BIGENDIAN
+# define helper_te_ld_name  helper_be_ld_name
+# define helper_te_st_name  helper_be_st_name
+#else
+# define helper_te_ld_name  helper_le_ld_name
+# define helper_te_st_name  helper_le_st_name
+#endif
+
+
+#if defined(CONFIG_ANDROID_MEMCHECK) && !defined(SOFTMMU_CODE_ACCESS)
 /*
  * Support for memory access checker.
  * We need to instrument __ldx/__stx_mmu routines implemented in this file with
@@ -63,355 +122,557 @@
  * to instrument code that is used by emulator itself (OUTSIDE_JIT controls
  * that).
  */
-#define CONFIG_MEMCHECK_MMU
-#include "memcheck/memcheck_api.h"
-#endif  // CONFIG_MEMCHECK && !OUTSIDE_JIT && !SOFTMMU_CODE_ACCESS
+#define CONFIG_ANDROID_MEMCHECK_MMU
+#include "android/qemu/memcheck/memcheck_api.h"
+#endif  // CONFIG_ANDROID_MEMCHECK && !SOFTMMU_CODE_ACCESS
 
-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                        int mmu_idx,
-                                                        void *retaddr);
-static inline DATA_TYPE glue(io_read, SUFFIX)(hwaddr physaddr,
+static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
+                                              hwaddr physaddr,
                                               target_ulong addr,
-                                              void *retaddr)
+                                              uintptr_t retaddr)
 {
-    DATA_TYPE res;
-    int index;
-    index = (physaddr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+    uint64_t val;
+    int index = (physaddr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
-    env->mem_io_pc = (unsigned long)retaddr;
+    env->mem_io_pc = retaddr;
     if (index > (IO_MEM_NOTDIRTY >> IO_MEM_SHIFT)
             && !can_do_io(env)) {
-        cpu_io_recompile(env, (uintptr_t)retaddr);
+        cpu_io_recompile(env, retaddr);
     }
 
     env->mem_io_vaddr = addr;
 #if SHIFT <= 2
-    res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
+    val = io_mem_read(index, physaddr, 1 << SHIFT);
 #else
 #ifdef TARGET_WORDS_BIGENDIAN
-    res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32;
-    res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    val = (uint64_t)io_mem_read(index, physaddr, 4) << 32;
+    val |= io_mem_read(index, physaddr + 4, 4);
 #else
-    res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
-    res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
+    val = io_mem_read(index, physaddr, 4);
+    val |= (uint64_t)io_mem_read(index, physaddr + 4, 4) << 32;
 #endif
 #endif /* SHIFT > 2 */
-    return res;
+    return val;
 }
 
-/* handle all cases except unaligned access which span two pages */
-DATA_TYPE REGPARM glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                      int mmu_idx)
+#ifdef SOFTMMU_CODE_ACCESS
+static __attribute__((unused))
+#endif
+WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
+                            uintptr_t retaddr)
 {
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+    uintptr_t haddr;
     DATA_TYPE res;
-    int index;
-    target_ulong tlb_addr;
-    hwaddr ioaddr;
-    unsigned long addend;
-    void *retaddr;
-#ifdef CONFIG_MEMCHECK_MMU
-    int invalidate_cache = 0;
-#endif  // CONFIG_MEMCHECK_MMU
 
-    /* test if there is match for unaligned or IO access */
-    /* XXX: could done more in memory macro in a non portable way */
-    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- redo:
-    tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
-    if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            retaddr = GETPC();
-            ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
-        } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
-            /* This is not I/O access: do access verification. */
-#ifdef CONFIG_MEMCHECK_MMU
-            /* We only validate access to the guest's user space, for which
-             * mmu_idx is set to 1. */
-            if (memcheck_instrument_mmu && mmu_idx == 1 &&
-                memcheck_validate_ld(addr, DATA_SIZE, (target_ulong)(ptrdiff_t)GETPC())) {
-                /* Memory read breaks page boundary. So, if required, we
-                 * must invalidate two caches in TLB. */
-                invalidate_cache = 2;
-            }
-#endif  // CONFIG_MEMCHECK_MMU
-            /* slow unaligned access (it spans two pages or IO) */
-        do_unaligned_access:
-            retaddr = GETPC();
+    /* Adjust the given return address.  */
+    retaddr -= GETPC_ADJ;
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+    int invalidate_cache = 0;
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+
+    /* If the TLB entry is for a different page, reload and try again.  */
+    if ((addr & TARGET_PAGE_MASK)
+         != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
-#endif
-            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr,
-                                                         mmu_idx, retaddr);
-        } else {
-#ifdef CONFIG_MEMCHECK_MMU
-            /* We only validate access to the guest's user space, for which
-             * mmu_idx is set to 1. */
-            if (memcheck_instrument_mmu && mmu_idx == 1) {
-                invalidate_cache = memcheck_validate_ld(addr, DATA_SIZE,
-                                                        (target_ulong)(ptrdiff_t)GETPC());
-            }
-#endif  // CONFIG_MEMCHECK_MMU
-            /* unaligned/aligned access in the same page */
-#ifdef ALIGNED_ONLY
-            if ((addr & (DATA_SIZE - 1)) != 0) {
-                retaddr = GETPC();
-                do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
-            }
-#endif
-            addend = env->tlb_table[mmu_idx][index].addend;
-            res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(long)(addr+addend));
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
         }
-#ifdef CONFIG_MEMCHECK_MMU
-        if (invalidate_cache) {
-            /* Accessed memory is under memchecker control. We must invalidate
-             * containing page(s) in order to make sure that next access to them
-             * will invoke _ld/_st_mmu. */
-            env->tlb_table[mmu_idx][index].addr_read ^= TARGET_PAGE_MASK;
-            env->tlb_table[mmu_idx][index].addr_write ^= TARGET_PAGE_MASK;
-            if ((invalidate_cache == 2) && (index < CPU_TLB_SIZE)) {
-                // Read crossed page boundaris. Invalidate second cache too.
-                env->tlb_table[mmu_idx][index + 1].addr_read ^= TARGET_PAGE_MASK;
-                env->tlb_table[mmu_idx][index + 1].addr_write ^= TARGET_PAGE_MASK;
-            }
-        }
-#endif  // CONFIG_MEMCHECK_MMU
-    } else {
-        /* the page is not in the TLB : fill it */
-        retaddr = GETPC();
-#ifdef ALIGNED_ONLY
-        if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
-        tlb_fill(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
-        goto redo;
+        tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+        tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
     }
+
+    /* Handle an IO access.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        hwaddr ioaddr;
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            goto do_unaligned_access;
+        }
+        ioaddr = env->iotlb[mmu_idx][index];
+
+        /* ??? Note that the io helpers always read data in the target
+           byte ordering.  We should push the LE/BE request down into io.  */
+        res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
+        res = TGT_LE(res);
+        return res;
+    }
+
+    /* Handle slow unaligned access (it spans two pages or IO).  */
+    if (DATA_SIZE > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                    >= TARGET_PAGE_SIZE)) {
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+        /* We only validate access to the guest's user space, for which
+         * mmu_idx is set to 1. */
+        if (memcheck_instrument_mmu && mmu_idx == 1 &&
+            memcheck_validate_ld(addr, DATA_SIZE, (target_ulong)(retaddr - GETPC_ADJ))) {
+            /* Memory read breaks page boundary. So, if required, we
+             * must invalidate two caches in TLB. */
+            invalidate_cache = 2;
+        }
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+        target_ulong addr1, addr2;
+        DATA_TYPE res1, res2;
+        unsigned shift;
+    do_unaligned_access:
+#ifdef ALIGNED_ONLY
+        do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+#endif
+        addr1 = addr & ~(DATA_SIZE - 1);
+        addr2 = addr1 + DATA_SIZE;
+        /* Note the adjustment at the beginning of the function.
+           Undo that for the recursion.  */
+        res1 = helper_le_ld_name(env, addr1, mmu_idx, retaddr + GETPC_ADJ);
+        res2 = helper_le_ld_name(env, addr2, mmu_idx, retaddr + GETPC_ADJ);
+        shift = (addr & (DATA_SIZE - 1)) * 8;
+
+        /* Little-endian combine.  */
+        res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
+        return res;
+    }
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+    if (DATA_SIZE == 1) {
+        if (memcheck_instrument_mmu && mmu_idx == 1) {
+            invalidate_cache = memcheck_validate_ld(addr, DATA_SIZE,
+                                                    (target_ulong)(retaddr + GETPC_ADJ));
+        }
+    }
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+
+    /* Handle aligned access or unaligned access in the same page.  */
+#ifdef ALIGNED_ONLY
+    if ((addr & (DATA_SIZE - 1)) != 0) {
+        do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+    }
+#endif
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+#if DATA_SIZE == 1
+    res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr);
+#else
+    res = glue(glue(ld, LSUFFIX), _le_p)((uint8_t *)haddr);
+#endif
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+    if (invalidate_cache) {
+        /* Accessed memory is under memchecker control. We must invalidate
+         * containing page(s) in order to make sure that next access to them
+         * will invoke _ld/_st_mmu. */
+        env->tlb_table[mmu_idx][index].addr_read ^= TARGET_PAGE_MASK;
+        env->tlb_table[mmu_idx][index].addr_write ^= TARGET_PAGE_MASK;
+        if ((invalidate_cache == 2) && (index < CPU_TLB_SIZE)) {
+            // Read crossed page boundaris. Invalidate second cache too.
+            env->tlb_table[mmu_idx][index + 1].addr_read ^= TARGET_PAGE_MASK;
+            env->tlb_table[mmu_idx][index + 1].addr_write ^= TARGET_PAGE_MASK;
+        }
+    }
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
     return res;
 }
 
-/* handle all unaligned cases */
-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                        int mmu_idx,
-                                                        void *retaddr)
-{
-    DATA_TYPE res, res1, res2;
-    int index, shift;
-    hwaddr ioaddr;
-    unsigned long addend;
-    target_ulong tlb_addr, addr1, addr2;
-
-    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- redo:
-    tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
-    if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
-        } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
-        do_unaligned_access:
-            /* slow unaligned access (it spans two pages) */
-            addr1 = addr & ~(DATA_SIZE - 1);
-            addr2 = addr1 + DATA_SIZE;
-            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
-                                                          mmu_idx, retaddr);
-            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
-                                                          mmu_idx, retaddr);
-            shift = (addr & (DATA_SIZE - 1)) * 8;
-#ifdef TARGET_WORDS_BIGENDIAN
-            res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
-#else
-            res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
+#if DATA_SIZE > 1
+#ifdef SOFTMMU_CODE_ACCESS
+static __attribute__((unused))
 #endif
-            res = (DATA_TYPE)res;
-        } else {
-            /* unaligned/aligned access in the same page */
-            addend = env->tlb_table[mmu_idx][index].addend;
-            res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(long)(addr+addend));
+WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
+                            uintptr_t retaddr)
+{
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+    uintptr_t haddr;
+    DATA_TYPE res;
+
+    /* Adjust the given return address.  */
+    retaddr -= GETPC_ADJ;
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+    int invalidate_cache = 0;
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+
+    /* If the TLB entry is for a different page, reload and try again.  */
+    if ((addr & TARGET_PAGE_MASK)
+         != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+#ifdef ALIGNED_ONLY
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
         }
-    } else {
-        /* the page is not in the TLB : fill it */
-        tlb_fill(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
-        goto redo;
+#endif
+        tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+        tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
     }
+
+    /* Handle an IO access.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        hwaddr ioaddr;
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            goto do_unaligned_access;
+        }
+        ioaddr = env->iotlb[mmu_idx][index];
+
+        /* ??? Note that the io helpers always read data in the target
+           byte ordering.  We should push the LE/BE request down into io.  */
+        res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
+        res = TGT_BE(res);
+        return res;
+    }
+
+    /* Handle slow unaligned access (it spans two pages or IO).  */
+    if (DATA_SIZE > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                    >= TARGET_PAGE_SIZE)) {
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+        /* We only validate access to the guest's user space, for which
+         * mmu_idx is set to 1. */
+        if (memcheck_instrument_mmu && mmu_idx == 1 &&
+            memcheck_validate_ld(addr, DATA_SIZE, (target_ulong)(retaddr - GETPC_ADJ))) {
+            /* Memory read breaks page boundary. So, if required, we
+             * must invalidate two caches in TLB. */
+            invalidate_cache = 2;
+        }
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+        target_ulong addr1, addr2;
+        DATA_TYPE res1, res2;
+        unsigned shift;
+    do_unaligned_access:
+#ifdef ALIGNED_ONLY
+        do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+#endif
+        addr1 = addr & ~(DATA_SIZE - 1);
+        addr2 = addr1 + DATA_SIZE;
+        /* Note the adjustment at the beginning of the function.
+           Undo that for the recursion.  */
+        res1 = helper_be_ld_name(env, addr1, mmu_idx, retaddr + GETPC_ADJ);
+        res2 = helper_be_ld_name(env, addr2, mmu_idx, retaddr + GETPC_ADJ);
+        shift = (addr & (DATA_SIZE - 1)) * 8;
+
+        /* Big-endian combine.  */
+        res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
+        return res;
+    }
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+    if (DATA_SIZE == 1) {
+        if (memcheck_instrument_mmu && mmu_idx == 1) {
+            invalidate_cache = memcheck_validate_ld(addr, DATA_SIZE,
+                                                    (target_ulong)(retaddr + GETPC_ADJ));
+        }
+    }
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+
+    /* Handle aligned access or unaligned access in the same page.  */
+#ifdef ALIGNED_ONLY
+    if ((addr & (DATA_SIZE - 1)) != 0) {
+        do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+    }
+#endif
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr);
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+    if (invalidate_cache) {
+        /* Accessed memory is under memchecker control. We must invalidate
+         * containing page(s) in order to make sure that next access to them
+         * will invoke _ld/_st_mmu. */
+        env->tlb_table[mmu_idx][index].addr_read ^= TARGET_PAGE_MASK;
+        env->tlb_table[mmu_idx][index].addr_write ^= TARGET_PAGE_MASK;
+        if ((invalidate_cache == 2) && (index < CPU_TLB_SIZE)) {
+            // Read crossed page boundaris. Invalidate second cache too.
+            env->tlb_table[mmu_idx][index + 1].addr_read ^= TARGET_PAGE_MASK;
+            env->tlb_table[mmu_idx][index + 1].addr_write ^= TARGET_PAGE_MASK;
+        }
+    }
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
     return res;
 }
+#endif /* DATA_SIZE > 1 */
+
+DATA_TYPE
+glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
+                                         int mmu_idx)
+{
+    return helper_te_ld_name (env, addr, mmu_idx, GETRA());
+}
 
 #ifndef SOFTMMU_CODE_ACCESS
 
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                   DATA_TYPE val,
-                                                   int mmu_idx,
-                                                   void *retaddr);
+/* Provide signed versions of the load routines as well.  We can of course
+   avoid this for 64-bit data, or for 32-bit data on 32-bit host.  */
+#if DATA_SIZE * 8 < TCG_TARGET_REG_BITS
+WORD_TYPE helper_le_lds_name(CPUArchState *env, target_ulong addr,
+                             int mmu_idx, uintptr_t retaddr)
+{
+    return (SDATA_TYPE)helper_le_ld_name(env, addr, mmu_idx, retaddr);
+}
 
-static inline void glue(io_write, SUFFIX)(hwaddr physaddr,
+# if DATA_SIZE > 1
+WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr,
+                             int mmu_idx, uintptr_t retaddr)
+{
+    return (SDATA_TYPE)helper_be_ld_name(env, addr, mmu_idx, retaddr);
+}
+# endif
+#endif
+
+static inline void glue(io_write, SUFFIX)(CPUArchState *env,
+                                          hwaddr physaddr,
                                           DATA_TYPE val,
                                           target_ulong addr,
-                                          void *retaddr)
+                                          uintptr_t retaddr)
 {
-    int index;
-    index = (physaddr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+    int index = (physaddr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
     if (index > (IO_MEM_NOTDIRTY >> IO_MEM_SHIFT)
             && !can_do_io(env)) {
-        cpu_io_recompile(env, (uintptr_t)retaddr);
+        cpu_io_recompile(env, retaddr);
     }
 
     env->mem_io_vaddr = addr;
-    env->mem_io_pc = (unsigned long)retaddr;
+    env->mem_io_pc = retaddr;
 #if SHIFT <= 2
-    io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
+    io_mem_write(index, physaddr, val, 1 << SHIFT);
 #else
 #ifdef TARGET_WORDS_BIGENDIAN
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val);
+    io_mem_write(index, physaddr, val >> 32, 4);
+    io_mem_write(index, physaddr + 4, val, 4);
 #else
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
+    io_mem_write(index, physaddr, val, 4);
+    io_mem_write(index, physaddr + 4, val >> 32, 4);
 #endif
 #endif /* SHIFT > 2 */
 }
 
-void REGPARM glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                 DATA_TYPE val,
-                                                 int mmu_idx)
+void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
+                       int mmu_idx, uintptr_t retaddr)
 {
-    hwaddr ioaddr;
-    unsigned long addend;
-    target_ulong tlb_addr;
-    void *retaddr;
-    int index;
-#ifdef CONFIG_MEMCHECK_MMU
-    int invalidate_cache = 0;
-#endif  // CONFIG_MEMCHECK_MMU
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    uintptr_t haddr;
 
-    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- redo:
-    tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
-    if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            retaddr = GETPC();
-            ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
-        } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
-            /* This is not I/O access: do access verification. */
-#ifdef CONFIG_MEMCHECK_MMU
-            /* We only validate access to the guest's user space, for which
-             * mmu_idx is set to 1. */
-            if (memcheck_instrument_mmu && mmu_idx == 1 &&
-                memcheck_validate_st(addr, DATA_SIZE, (uint64_t)val,
-                                     (target_ulong)(ptrdiff_t)GETPC())) {
-                /* Memory write breaks page boundary. So, if required, we
-                 * must invalidate two caches in TLB. */
-                invalidate_cache = 2;
-            }
-#endif  // CONFIG_MEMCHECK_MMU
-        do_unaligned_access:
-            retaddr = GETPC();
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+    int invalidate_cache = 0;
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+
+    /* Adjust the given return address.  */
+    retaddr -= GETPC_ADJ;
+
+    /* If the TLB entry is for a different page, reload and try again.  */
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(addr, 1, mmu_idx, retaddr);
-#endif
-            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(addr, val,
-                                                   mmu_idx, retaddr);
-        } else {
-#ifdef CONFIG_MEMCHECK_MMU
-            /* We only validate access to the guest's user space, for which
-             * mmu_idx is set to 1. */
-            if (memcheck_instrument_mmu && mmu_idx == 1) {
-                invalidate_cache = memcheck_validate_st(addr, DATA_SIZE,
-                                                        (uint64_t)val,
-                                                        (target_ulong)(ptrdiff_t)GETPC());
-            }
-#endif  // CONFIG_MEMCHECK_MMU
-            /* aligned/unaligned access in the same page */
-#ifdef ALIGNED_ONLY
-            if ((addr & (DATA_SIZE - 1)) != 0) {
-                retaddr = GETPC();
-                do_unaligned_access(addr, 1, mmu_idx, retaddr);
-            }
-#endif
-            addend = env->tlb_table[mmu_idx][index].addend;
-            glue(glue(st, SUFFIX), _raw)((uint8_t *)(long)(addr+addend), val);
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
         }
-#ifdef CONFIG_MEMCHECK_MMU
-        if (invalidate_cache) {
-            /* Accessed memory is under memchecker control. We must invalidate
-             * containing page(s) in order to make sure that next access to them
-             * will invoke _ld/_st_mmu. */
-            env->tlb_table[mmu_idx][index].addr_read ^= TARGET_PAGE_MASK;
-            env->tlb_table[mmu_idx][index].addr_write ^= TARGET_PAGE_MASK;
-            if ((invalidate_cache == 2) && (index < CPU_TLB_SIZE)) {
-                // Write crossed page boundaris. Invalidate second cache too.
-                env->tlb_table[mmu_idx][index + 1].addr_read ^= TARGET_PAGE_MASK;
-                env->tlb_table[mmu_idx][index + 1].addr_write ^= TARGET_PAGE_MASK;
-            }
-        }
-#endif  // CONFIG_MEMCHECK_MMU
-    } else {
-        /* the page is not in the TLB : fill it */
-        retaddr = GETPC();
-#ifdef ALIGNED_ONLY
-        if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(addr, 1, mmu_idx, retaddr);
 #endif
-        tlb_fill(addr, 1, mmu_idx, retaddr);
-        goto redo;
+        tlb_fill(env, addr, 1, mmu_idx, retaddr);
+        tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
     }
+
+    /* Handle an IO access.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        hwaddr ioaddr;
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            goto do_unaligned_access;
+        }
+        ioaddr = env->iotlb[mmu_idx][index];
+
+        /* ??? Note that the io helpers always read data in the target
+           byte ordering.  We should push the LE/BE request down into io.  */
+        val = TGT_LE(val);
+        glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
+        return;
+    }
+
+    /* Handle slow unaligned access (it spans two pages or IO).  */
+    if (DATA_SIZE > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                     >= TARGET_PAGE_SIZE)) {
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+        /* We only validate access to the guest's user space, for which
+         * mmu_idx is set to 1. */
+        if (memcheck_instrument_mmu && mmu_idx == 1 &&
+            memcheck_validate_st(addr, DATA_SIZE, (uint64_t)val,
+                                 (target_ulong)(retaddr + GETPC_ADJ))) {
+            /* Memory write breaks page boundary. So, if required, we
+             * must invalidate two caches in TLB. */
+            invalidate_cache = 2;
+        }
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+        int i;
+    do_unaligned_access:
+#ifdef ALIGNED_ONLY
+        do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+#endif
+        /* XXX: not efficient, but simple */
+        /* Note: relies on the fact that tlb_fill() does not remove the
+         * previous page from the TLB cache.  */
+        for (i = DATA_SIZE - 1; i >= 0; i--) {
+            /* Little-endian extract.  */
+            uint8_t val8 = val >> (i * 8);
+            /* Note the adjustment at the beginning of the function.
+               Undo that for the recursion.  */
+            glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
+                                            mmu_idx, retaddr + GETPC_ADJ);
+        }
+        return;
+    }
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+    if (DATA_SIZE == 1) {
+        /* We only validate access to the guest's user space, for which
+         * mmu_idx is set to 1. */
+        if (memcheck_instrument_mmu && mmu_idx == 1) {
+            invalidate_cache = memcheck_validate_st(addr, DATA_SIZE,
+                                                    (uint64_t)val,
+                                                    (target_ulong)(retaddr + GETPC_ADJ));
+        }
+    }
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+
+    /* Handle aligned access or unaligned access in the same page.  */
+#ifdef ALIGNED_ONLY
+    if ((addr & (DATA_SIZE - 1)) != 0) {
+        do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+    }
+#endif
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+#if DATA_SIZE == 1
+    glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
+#else
+    glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val);
+#endif
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+    if (invalidate_cache) {
+        /* Accessed memory is under memchecker control. We must invalidate
+         * containing page(s) in order to make sure that next access to them
+         * will invoke _ld/_st_mmu. */
+        env->tlb_table[mmu_idx][index].addr_read ^= TARGET_PAGE_MASK;
+        env->tlb_table[mmu_idx][index].addr_write ^= TARGET_PAGE_MASK;
+        if ((invalidate_cache == 2) && (index < CPU_TLB_SIZE)) {
+            // Write crossed page boundaris. Invalidate second cache too.
+            env->tlb_table[mmu_idx][index + 1].addr_read ^= TARGET_PAGE_MASK;
+            env->tlb_table[mmu_idx][index + 1].addr_write ^= TARGET_PAGE_MASK;
+        }
+    }
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
 }
 
-/* handles all unaligned cases */
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                   DATA_TYPE val,
-                                                   int mmu_idx,
-                                                   void *retaddr)
+#if DATA_SIZE > 1
+void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
+                       int mmu_idx, uintptr_t retaddr)
 {
-    hwaddr ioaddr;
-    unsigned long addend;
-    target_ulong tlb_addr;
-    int index, i;
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    uintptr_t haddr;
 
-    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- redo:
-    tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
-    if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
-        } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
-        do_unaligned_access:
-            /* XXX: not efficient, but simple */
-            /* Note: relies on the fact that tlb_fill() does not remove the
-             * previous page from the TLB cache.  */
-            for(i = DATA_SIZE - 1; i >= 0; i--) {
-#ifdef TARGET_WORDS_BIGENDIAN
-                glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
-                                          mmu_idx, retaddr);
-#else
-                glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
-                                          mmu_idx, retaddr);
-#endif
-            }
-        } else {
-            /* aligned/unaligned access in the same page */
-            addend = env->tlb_table[mmu_idx][index].addend;
-            glue(glue(st, SUFFIX), _raw)((uint8_t *)(long)(addr+addend), val);
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+    int invalidate_cache = 0;
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+
+    /* Adjust the given return address.  */
+    retaddr -= GETPC_ADJ;
+
+    /* If the TLB entry is for a different page, reload and try again.  */
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+#ifdef ALIGNED_ONLY
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
         }
-    } else {
-        /* the page is not in the TLB : fill it */
-        tlb_fill(addr, 1, mmu_idx, retaddr);
-        goto redo;
+#endif
+        tlb_fill(env, addr, 1, mmu_idx, retaddr);
+        tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
     }
+
+    /* Handle an IO access.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        hwaddr ioaddr;
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            goto do_unaligned_access;
+        }
+        ioaddr = env->iotlb[mmu_idx][index];
+
+        /* ??? Note that the io helpers always read data in the target
+           byte ordering.  We should push the LE/BE request down into io.  */
+        val = TGT_BE(val);
+        glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
+        return;
+    }
+
+    /* Handle slow unaligned access (it spans two pages or IO).  */
+    if (DATA_SIZE > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                     >= TARGET_PAGE_SIZE)) {
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+        /* We only validate access to the guest's user space, for which
+         * mmu_idx is set to 1. */
+        if (memcheck_instrument_mmu && mmu_idx == 1 &&
+            memcheck_validate_st(addr, DATA_SIZE, (uint64_t)val,
+                                 (target_ulong)(retaddr + GETPC_ADJ))) {
+            /* Memory write breaks page boundary. So, if required, we
+             * must invalidate two caches in TLB. */
+            invalidate_cache = 2;
+        }
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+        int i;
+    do_unaligned_access:
+#ifdef ALIGNED_ONLY
+        do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+#endif
+        /* XXX: not efficient, but simple */
+        /* Note: relies on the fact that tlb_fill() does not remove the
+         * previous page from the TLB cache.  */
+        for (i = DATA_SIZE - 1; i >= 0; i--) {
+            /* Big-endian extract.  */
+            uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
+            /* Note the adjustment at the beginning of the function.
+               Undo that for the recursion.  */
+            glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
+                                            mmu_idx, retaddr + GETPC_ADJ);
+        }
+        return;
+    }
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+    if (DATA_SIZE == 1) {
+        /* We only validate access to the guest's user space, for which
+         * mmu_idx is set to 1. */
+        if (memcheck_instrument_mmu && mmu_idx == 1) {
+            invalidate_cache = memcheck_validate_st(addr, DATA_SIZE,
+                                                    (uint64_t)val,
+                                                    (target_ulong)(retaddr + GETPC_ADJ));
+        }
+    }
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+
+    /* Handle aligned access or unaligned access in the same page.  */
+#ifdef ALIGNED_ONLY
+    if ((addr & (DATA_SIZE - 1)) != 0) {
+        do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+    }
+#endif
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
+#ifdef CONFIG_ANDROID_MEMCHECK_MMU
+    if (invalidate_cache) {
+        /* Accessed memory is under memchecker control. We must invalidate
+         * containing page(s) in order to make sure that next access to them
+         * will invoke _ld/_st_mmu. */
+        env->tlb_table[mmu_idx][index].addr_read ^= TARGET_PAGE_MASK;
+        env->tlb_table[mmu_idx][index].addr_write ^= TARGET_PAGE_MASK;
+        if ((invalidate_cache == 2) && (index < CPU_TLB_SIZE)) {
+            // Write crossed page boundaris. Invalidate second cache too.
+            env->tlb_table[mmu_idx][index + 1].addr_read ^= TARGET_PAGE_MASK;
+            env->tlb_table[mmu_idx][index + 1].addr_write ^= TARGET_PAGE_MASK;
+        }
+    }
+#endif  // CONFIG_ANDROID_MEMCHECK_MMU
+}
+#endif /* DATA_SIZE > 1 */
+
+void
+glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
+                                         DATA_TYPE val, int mmu_idx)
+{
+    helper_te_st_name(env, addr, val, mmu_idx, GETRA());
 }
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
@@ -420,6 +681,23 @@
 #undef SHIFT
 #undef DATA_TYPE
 #undef SUFFIX
-#undef USUFFIX
+#undef LSUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
+#undef WORD_TYPE
+#undef SDATA_TYPE
+#undef USUFFIX
+#undef SSUFFIX
+#undef BSWAP
+#undef TGT_BE
+#undef TGT_LE
+#undef CPU_BE
+#undef CPU_LE
+#undef helper_le_ld_name
+#undef helper_be_ld_name
+#undef helper_le_lds_name
+#undef helper_be_lds_name
+#undef helper_le_st_name
+#undef helper_be_st_name
+#undef helper_te_ld_name
+#undef helper_te_st_name
diff --git a/include/hw/android/goldfish/device.h b/include/hw/android/goldfish/device.h
index caf6c7c..40b614e 100644
--- a/include/hw/android/goldfish/device.h
+++ b/include/hw/android/goldfish/device.h
@@ -50,8 +50,7 @@
 void goldfish_battery_set_prop(int ac, int property, int value);
 void goldfish_battery_display(void (* callback)(void *data, const char* string), void *data);
 void goldfish_mmc_init(uint32_t base, int id, BlockDriverState* bs);
-void *goldfish_switch_add(char *name, uint32_t (*writefn)(void *opaque, uint32_t state), void *writeopaque, int id);
-void goldfish_switch_set_state(void *opaque, uint32_t state);
+int goldfish_guest_is_64bit();
 
 // these do not add a device
 void trace_dev_init();
@@ -74,4 +73,14 @@
 #define GFD_MAX_IRQ     32
 #endif
 
+static inline void uint64_set_low(uint64_t *addr, uint32 value)
+{
+    *addr = (*addr & ~(0xFFFFFFFFULL)) | value;
+}
+
+static inline void uint64_set_high(uint64_t *addr, uint32 value)
+{
+    *addr = (*addr & 0xFFFFFFFFULL) | ((uint64_t)value << 32);
+}
+
 #endif  /* GOLDFISH_DEVICE_H */
diff --git a/include/hw/android/goldfish/pipe.h b/include/hw/android/goldfish/pipe.h
index f9c1d94..3d36e90 100644
--- a/include/hw/android/goldfish/pipe.h
+++ b/include/hw/android/goldfish/pipe.h
@@ -12,6 +12,7 @@
 #ifndef _HW_GOLDFISH_PIPE_H
 #define _HW_GOLDFISH_PIPE_H
 
+#include <stdbool.h>
 #include <stdint.h>
 #include "hw/hw.h"
 
@@ -158,6 +159,8 @@
 #define PIPE_REG_PARAMS_ADDR_HIGH    0x1c
 /* write: access with paremeter buffer */
 #define PIPE_REG_ACCESS_PARAMS       0x20
+#define PIPE_REG_CHANNEL_HIGH        0x30 /* read/write: high 32 bit channel id */
+#define PIPE_REG_ADDRESS_HIGH        0x34 /* write: high 32 bit physical address */
 
 /* list of commands for PIPE_REG_COMMAND */
 #define PIPE_CMD_OPEN               1  /* open new channel */
@@ -192,7 +195,7 @@
 #define PIPE_WAKE_READ         (1 << 1)  /* pipe can now be read from */
 #define PIPE_WAKE_WRITE        (1 << 2)  /* pipe can now be written to */
 
-void pipe_dev_init(void);
+void pipe_dev_init(bool newDeviceNaming);
 
 struct access_params{
     uint32_t channel;
@@ -204,4 +207,14 @@
     uint32_t flags;
 };
 
+struct access_params_64 {
+    uint64_t channel;
+    uint32_t size;
+    uint64_t address;
+    uint32_t cmd;
+    uint32_t result;
+    /* reserved for future extension */
+    uint32_t flags;
+};
+
 #endif /* _HW_GOLDFISH_PIPE_H */
diff --git a/include/hw/android/goldfish/vmem.h b/include/hw/android/goldfish/vmem.h
index be7974d..ff5d1c0 100644
--- a/include/hw/android/goldfish/vmem.h
+++ b/include/hw/android/goldfish/vmem.h
@@ -18,10 +18,10 @@
 // cpu_get_phys_page_debug to ensure virtual address translation always works
 // properly, and efficently, under KVM.
 
-int safe_memory_rw_debug(CPUOldState *env, target_ulong addr, uint8_t *buf,
+int safe_memory_rw_debug(CPUState *env, target_ulong addr, uint8_t *buf,
                          int len, int is_write);
 
-hwaddr safe_get_phys_page_debug(CPUOldState *env, target_ulong addr);
+hwaddr safe_get_phys_page_debug(CPUState *env, target_ulong addr);
 
 
 #endif  /* GOLDFISH_VMEM_H */
diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h
index 10f8414..a498333 100644
--- a/include/hw/arm/arm.h
+++ b/include/hw/arm/arm.h
@@ -34,7 +34,7 @@
     hwaddr smp_priv_base;
     int nb_cpus;
     int board_id;
-    int (*atag_board)(struct arm_boot_info *info, void *p);
+    int (*atag_board)(const struct arm_boot_info *info, void *p);
     /* Used internally by arm_boot.c */
     int is_linux;
     hwaddr initrd_size;
diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index 01be0fe..9a405d2 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -73,7 +73,7 @@
     return result;
 }
 
-static const char *glue(lookup_symbol, SZ)(struct syminfo *s, target_ulong orig_addr)
+static const char *glue(lookup_symbol, SZ)(struct syminfo *s, hwaddr orig_addr)
 {
     struct elf_sym *syms = glue(s->disas_symtab.elf, SZ);
     struct elf_sym key;
@@ -143,7 +143,7 @@
         }
 #if defined(TARGET_ARM) || defined (TARGET_MIPS)
         /* The bottom address bit marks a Thumb or MIPS16 symbol.  */
-        syms[i].st_value &= ~(target_ulong)1;
+        syms[i].st_value &= ~(hwaddr)1;
 #endif
         i++;
     }
diff --git a/include/hw/hw.h b/include/hw/hw.h
index 82f9767..21dc8ba 100644
--- a/include/hw/hw.h
+++ b/include/hw/hw.h
@@ -5,6 +5,7 @@
 #include "qemu-common.h"
 #include "hw/irq.h"
 #include "migration/qemu-file.h"
+#include "migration/vmstate.h"
 
 #ifdef NEED_CPU_H
 #include "cpu.h"
@@ -38,27 +39,6 @@
 #endif
 #endif
 
-typedef void SaveStateHandler(QEMUFile *f, void *opaque);
-typedef int SaveLiveStateHandler(QEMUFile *f, int stage, void *opaque);
-typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
-
-int register_savevm(const char *idstr,
-                    int instance_id,
-                    int version_id,
-                    SaveStateHandler *save_state,
-                    LoadStateHandler *load_state,
-                    void *opaque);
-
-int register_savevm_live(const char *idstr,
-                         int instance_id,
-                         int version_id,
-                         SaveLiveStateHandler *save_live_state,
-                         SaveStateHandler *save_state,
-                         LoadStateHandler *load_state,
-                         void *opaque);
-
-void unregister_savevm(const char *idstr, void *opaque);
-
 typedef void QEMUResetHandler(void *opaque);
 
 void qemu_register_reset(QEMUResetHandler *func, int order, void *opaque);
diff --git a/include/hw/mips/mips.h b/include/hw/mips/mips.h
index 5b45e14..97094eb 100644
--- a/include/hw/mips/mips.h
+++ b/include/hw/mips/mips.h
@@ -7,7 +7,6 @@
 
 /* ds1225y.c */
 void *ds1225y_init(hwaddr mem_base, const char *filename);
-void ds1225y_set_protection(void *opaque, int protection);
 
 /* g364fb.c */
 int g364fb_mm_init(hwaddr vram_base,
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
new file mode 100644
index 0000000..e68656d
--- /dev/null
+++ b/include/hw/qdev-core.h
@@ -0,0 +1,129 @@
+#ifndef QDEV_CORE_H
+#define QDEV_CORE_H
+
+#include "hw/irq.h"
+#include "qemu/queue.h"
+
+typedef struct DeviceType DeviceType;
+
+typedef struct DeviceProperty DeviceProperty;
+
+/* This structure should not be accessed directly.  We declare it here
+   so that it can be embedded in individual device state structures.  */
+struct DeviceState {
+    DeviceType *type;
+    BusState *parent_bus;
+    DeviceProperty *props;
+    int num_gpio_out;
+    qemu_irq *gpio_out;
+    int num_gpio_in;
+    qemu_irq *gpio_in;
+    QLIST_HEAD(, BusState) child_bus;
+    NICInfo *nd;
+    QLIST_ENTRY(DeviceState) sibling;
+};
+
+typedef enum {
+    BUS_TYPE_SYSTEM,
+    BUS_TYPE_PCI,
+    BUS_TYPE_SCSI,
+    BUS_TYPE_I2C,
+    BUS_TYPE_SSI
+} BusType;
+
+struct BusState {
+    DeviceState *parent;
+    const char *name;
+    BusType type;
+    QLIST_HEAD(, DeviceState) children;
+    QLIST_ENTRY(BusState) sibling;
+};
+
+/*** Board API.  This should go away once we have a machine config file.  ***/
+
+DeviceState *qdev_create(BusState *bus, const char *name);
+void qdev_init(DeviceState *dev);
+void qdev_free(DeviceState *dev);
+
+/* Set properties between creation and init.  */
+void qdev_set_prop_int(DeviceState *dev, const char *name, uint64_t value);
+void qdev_set_prop_dev(DeviceState *dev, const char *name, DeviceState *value);
+void qdev_set_prop_ptr(DeviceState *dev, const char *name, void *value);
+void qdev_set_netdev(DeviceState *dev, NICInfo *nd);
+
+qemu_irq qdev_get_gpio_in(DeviceState *dev, int n);
+void qdev_connect_gpio_out(DeviceState *dev, int n, qemu_irq pin);
+
+BusState *qdev_get_child_bus(DeviceState *dev, const char *name);
+
+/*** Device API.  ***/
+
+typedef enum {
+    PROP_TYPE_INT,
+    PROP_TYPE_PTR,
+    PROP_TYPE_DEV
+} DevicePropType;
+
+typedef struct {
+    const char *name;
+    DevicePropType type;
+} DevicePropList;
+
+typedef struct DeviceInfo DeviceInfo;
+
+typedef void (*qdev_initfn)(DeviceState *dev, DeviceInfo *info);
+typedef void (*SCSIAttachFn)(DeviceState *host, BlockDriverState *bdrv,
+              int unit);
+
+struct DeviceInfo {
+    const char *name;
+    size_t size;
+    DevicePropList *props;
+
+    /* Private to qdev / bus.  */
+    qdev_initfn init;
+    BusType bus_type;
+};
+
+void qdev_register(DeviceInfo *info);
+
+/* Register device properties.  */
+/* GPIO inputs also double as IRQ sinks.  */
+void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n);
+void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n);
+
+void scsi_bus_new(DeviceState *host, SCSIAttachFn attach);
+
+CharDriverState *qdev_init_chardev(DeviceState *dev);
+
+BusState *qdev_get_parent_bus(DeviceState *dev);
+uint64_t qdev_get_prop_int(DeviceState *dev, const char *name, uint64_t def);
+DeviceState *qdev_get_prop_dev(DeviceState *dev, const char *name);
+/* FIXME: Remove opaque pointer properties.  */
+void *qdev_get_prop_ptr(DeviceState *dev, const char *name);
+
+/* Convery from a base type to a parent type, with compile time checking.  */
+#ifdef __GNUC__
+#define DO_UPCAST(type, field, dev) ( __extension__ ( { \
+    char __attribute__((unused)) offset_must_be_zero[ \
+        -offsetof(type, field)]; \
+    container_of(dev, type, field);}))
+#else
+#define DO_UPCAST(type, field, dev) container_of(dev, type, field)
+#endif
+
+/*** BUS API. ***/
+
+BusState *qbus_create(BusType type, size_t size,
+                      DeviceState *parent, const char *name);
+
+#define FROM_QBUS(type, dev) DO_UPCAST(type, qbus, dev)
+
+/*** monitor commands ***/
+
+void do_info_qtree(Monitor *mon);
+void sysbus_dev_print(Monitor *mon, DeviceState *dev, int indent);
+
+char *qdev_get_dev_path(DeviceState *dev);
+
+#endif  // QDEV_CORE_H
diff --git a/include/hw/qdev.h b/include/hw/qdev.h
index 6777aa0..85313af 100644
--- a/include/hw/qdev.h
+++ b/include/hw/qdev.h
@@ -2,126 +2,6 @@
 #define QDEV_H
 
 #include "hw/hw.h"
-#include "qemu/queue.h"
-
-typedef struct DeviceType DeviceType;
-
-typedef struct DeviceProperty DeviceProperty;
-
-/* This structure should not be accessed directly.  We declare it here
-   so that it can be embedded in individual device state structures.  */
-struct DeviceState {
-    DeviceType *type;
-    BusState *parent_bus;
-    DeviceProperty *props;
-    int num_gpio_out;
-    qemu_irq *gpio_out;
-    int num_gpio_in;
-    qemu_irq *gpio_in;
-    QLIST_HEAD(, BusState) child_bus;
-    NICInfo *nd;
-    QLIST_ENTRY(DeviceState) sibling;
-};
-
-typedef enum {
-    BUS_TYPE_SYSTEM,
-    BUS_TYPE_PCI,
-    BUS_TYPE_SCSI,
-    BUS_TYPE_I2C,
-    BUS_TYPE_SSI
-} BusType;
-
-struct BusState {
-    DeviceState *parent;
-    const char *name;
-    BusType type;
-    QLIST_HEAD(, DeviceState) children;
-    QLIST_ENTRY(BusState) sibling;
-};
-
-/*** Board API.  This should go away once we have a machine config file.  ***/
-
-DeviceState *qdev_create(BusState *bus, const char *name);
-void qdev_init(DeviceState *dev);
-void qdev_free(DeviceState *dev);
-
-/* Set properties between creation and init.  */
-void qdev_set_prop_int(DeviceState *dev, const char *name, uint64_t value);
-void qdev_set_prop_dev(DeviceState *dev, const char *name, DeviceState *value);
-void qdev_set_prop_ptr(DeviceState *dev, const char *name, void *value);
-void qdev_set_netdev(DeviceState *dev, NICInfo *nd);
-
-qemu_irq qdev_get_gpio_in(DeviceState *dev, int n);
-void qdev_connect_gpio_out(DeviceState *dev, int n, qemu_irq pin);
-
-BusState *qdev_get_child_bus(DeviceState *dev, const char *name);
-
-/*** Device API.  ***/
-
-typedef enum {
-    PROP_TYPE_INT,
-    PROP_TYPE_PTR,
-    PROP_TYPE_DEV
-} DevicePropType;
-
-typedef struct {
-    const char *name;
-    DevicePropType type;
-} DevicePropList;
-
-typedef struct DeviceInfo DeviceInfo;
-
-typedef void (*qdev_initfn)(DeviceState *dev, DeviceInfo *info);
-typedef void (*SCSIAttachFn)(DeviceState *host, BlockDriverState *bdrv,
-              int unit);
-
-struct DeviceInfo {
-    const char *name;
-    size_t size;
-    DevicePropList *props;
-
-    /* Private to qdev / bus.  */
-    qdev_initfn init;
-    BusType bus_type;
-};
-
-void qdev_register(DeviceInfo *info);
-
-/* Register device properties.  */
-/* GPIO inputs also double as IRQ sinks.  */
-void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n);
-void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n);
-
-void scsi_bus_new(DeviceState *host, SCSIAttachFn attach);
-
-CharDriverState *qdev_init_chardev(DeviceState *dev);
-
-BusState *qdev_get_parent_bus(DeviceState *dev);
-uint64_t qdev_get_prop_int(DeviceState *dev, const char *name, uint64_t def);
-DeviceState *qdev_get_prop_dev(DeviceState *dev, const char *name);
-/* FIXME: Remove opaque pointer properties.  */
-void *qdev_get_prop_ptr(DeviceState *dev, const char *name);
-
-/* Convery from a base type to a parent type, with compile time checking.  */
-#ifdef __GNUC__
-#define DO_UPCAST(type, field, dev) ( __extension__ ( { \
-    char __attribute__((unused)) offset_must_be_zero[ \
-        -offsetof(type, field)]; \
-    container_of(dev, type, field);}))
-#else
-#define DO_UPCAST(type, field, dev) container_of(dev, type, field)
-#endif
-
-/*** BUS API. ***/
-
-BusState *qbus_create(BusType type, size_t size,
-                      DeviceState *parent, const char *name);
-
-#define FROM_QBUS(type, dev) DO_UPCAST(type, qbus, dev)
-
-/*** monitor commands ***/
-
-void do_info_qtree(Monitor *mon);
-void sysbus_dev_print(Monitor *mon, DeviceState *dev, int indent);
+#include "hw/qdev-core.h"
 
 #endif
diff --git a/include/hw/scsi/scsi-disk.h b/include/hw/scsi/scsi-disk.h
deleted file mode 100644
index f42212b..0000000
--- a/include/hw/scsi/scsi-disk.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef SCSI_DISK_H
-#define SCSI_DISK_H
-
-/* scsi-disk.c */
-enum scsi_reason {
-    SCSI_REASON_DONE, /* Command complete.  */
-    SCSI_REASON_DATA  /* Transfer complete, more data required.  */
-};
-
-typedef struct SCSIDeviceState SCSIDeviceState;
-typedef struct SCSIDevice SCSIDevice;
-typedef void (*scsi_completionfn)(void *opaque, int reason, uint32_t tag,
-                                  uint32_t arg);
-
-struct SCSIDevice
-{
-    SCSIDeviceState *state;
-    void (*destroy)(SCSIDevice *s);
-    int32_t (*send_command)(SCSIDevice *s, uint32_t tag, uint8_t *buf,
-                            int lun);
-    void (*read_data)(SCSIDevice *s, uint32_t tag);
-    int (*write_data)(SCSIDevice *s, uint32_t tag);
-    void (*cancel_io)(SCSIDevice *s, uint32_t tag);
-    uint8_t *(*get_buf)(SCSIDevice *s, uint32_t tag);
-};
-
-SCSIDevice *scsi_disk_init(BlockDriverState *bdrv, int tcq,
-                           scsi_completionfn completion, void *opaque);
-SCSIDevice *scsi_generic_init(BlockDriverState *bdrv, int tcq,
-                           scsi_completionfn completion, void *opaque);
-
-/* cdrom.c */
-int cdrom_read_toc(int nb_sectors, uint8_t *buf, int msf, int start_track);
-int cdrom_read_toc_raw(int nb_sectors, uint8_t *buf, int msf, int session_num);
-
-#endif
diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
index 780dcf7..e1f88bf 100644
--- a/include/hw/xen/xen.h
+++ b/include/hw/xen/xen.h
@@ -8,6 +8,9 @@
  */
 #include <inttypes.h>
 
+#include "hw/irq.h"
+#include "qemu-common.h"
+
 /* xen-machine.c */
 enum xen_mode {
     XEN_EMULATE = 0,  // xen emulation, using xenner (default)
@@ -18,4 +21,35 @@
 extern uint32_t xen_domid;
 extern enum xen_mode xen_mode;
 
+extern bool xen_allowed;
+
+static inline bool xen_enabled(void)
+{
+    return xen_allowed;
+}
+
+int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
+void xen_piix3_set_irq(void *opaque, int irq_num, int level);
+void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len);
+void xen_hvm_inject_msi(uint64_t addr, uint32_t data);
+void xen_cmos_set_s3_resume(void *opaque, int irq, int level);
+
+qemu_irq *xen_interrupt_controller_init(void);
+
+int xen_init(void);
+int xen_hvm_init(MemoryRegion **ram_memory);
+void xenstore_store_pv_console_info(int i, struct CharDriverState *chr);
+
+#if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY)
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
+                   struct MemoryRegion *mr);
+void xen_modified_memory(ram_addr_t start, ram_addr_t length);
+#endif
+
+void xen_register_framebuffer(struct MemoryRegion *mr);
+
+#if defined(CONFIG_XEN) && CONFIG_XEN_CTRL_INTERFACE_VERSION < 400
+#  define HVM_MAX_VCPUS 32
+#endif
+
 #endif /* QEMU_HW_XEN_H */
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
new file mode 100644
index 0000000..f52582b
--- /dev/null
+++ b/include/migration/vmstate.h
@@ -0,0 +1,758 @@
+/*
+ * QEMU migration/snapshot declarations
+ *
+ * Copyright (c) 2009-2011 Red Hat, Inc.
+ *
+ * Original author: Juan Quintela <quintela@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef QEMU_VMSTATE_H
+#define QEMU_VMSTATE_H 1
+
+#ifndef CONFIG_USER_ONLY
+#include <migration/qemu-file.h>
+#endif
+
+typedef void SaveStateHandler(QEMUFile *f, void *opaque);
+typedef int SaveLiveStateHandler(QEMUFile *f, int stage, void *opaque);
+typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
+
+typedef struct SaveVMHandlers {
+#if 0    
+    /* This runs inside the iothread lock.  */
+    void (*set_params)(const MigrationParams *params, void * opaque);
+#endif    
+    SaveStateHandler *save_state;
+    SaveLiveStateHandler *save_live_state;
+#if 0
+    void (*cancel)(void *opaque);
+    int (*save_live_complete)(QEMUFile *f, void *opaque);
+
+    /* This runs both outside and inside the iothread lock.  */
+    bool (*is_active)(void *opaque);
+
+    /* This runs outside the iothread lock in the migration case, and
+     * within the lock in the savevm case.  The callback had better only
+     * use data that is local to the migration thread or protected
+     * by other locks.
+     */
+    int (*save_live_iterate)(QEMUFile *f, void *opaque);
+
+    /* This runs outside the iothread lock!  */
+    int (*save_live_setup)(QEMUFile *f, void *opaque);
+    uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size);
+#endif
+    LoadStateHandler *load_state;
+} SaveVMHandlers;
+
+int register_savevm(DeviceState* dev,
+                    const char *idstr,
+                    int instance_id,
+                    int version_id,
+                    SaveStateHandler *save_state,
+                    LoadStateHandler *load_state,
+                    void *opaque);
+
+int register_savevm_live(DeviceState* dev,
+                         const char *idstr,
+                         int instance_id,
+                         int version_id,
+                         SaveVMHandlers *ops,
+                         void *opaque);
+
+void unregister_savevm(DeviceState* dev, const char *idstr, void *opaque);
+void register_device_unmigratable(DeviceState *dev, const char *idstr,
+                                                                void *opaque);
+
+typedef struct VMStateInfo VMStateInfo;
+typedef struct VMStateDescription VMStateDescription;
+
+struct VMStateInfo {
+    const char *name;
+    int (*get)(QEMUFile *f, void *pv, size_t size);
+    void (*put)(QEMUFile *f, void *pv, size_t size);
+};
+
+enum VMStateFlags {
+    VMS_SINGLE           = 0x001,
+    VMS_POINTER          = 0x002,
+    VMS_ARRAY            = 0x004,
+    VMS_STRUCT           = 0x008,
+    VMS_VARRAY_INT32     = 0x010,  /* Array with size in int32_t field*/
+    VMS_BUFFER           = 0x020,  /* static sized buffer */
+    VMS_ARRAY_OF_POINTER = 0x040,
+    VMS_VARRAY_UINT16    = 0x080,  /* Array with size in uint16_t field */
+    VMS_VBUFFER          = 0x100,  /* Buffer with size in int32_t field */
+    VMS_MULTIPLY         = 0x200,  /* multiply "size" field by field_size */
+    VMS_VARRAY_UINT8     = 0x400,  /* Array with size in uint8_t field*/
+    VMS_VARRAY_UINT32    = 0x800,  /* Array with size in uint32_t field*/
+};
+
+typedef struct {
+    const char *name;
+    size_t offset;
+    size_t size;
+    size_t start;
+    int num;
+    size_t num_offset;
+    size_t size_offset;
+    const VMStateInfo *info;
+    enum VMStateFlags flags;
+    const VMStateDescription *vmsd;
+    int version_id;
+    bool (*field_exists)(void *opaque, int version_id);
+} VMStateField;
+
+typedef struct VMStateSubsection {
+    const VMStateDescription *vmsd;
+    bool (*needed)(void *opaque);
+} VMStateSubsection;
+
+struct VMStateDescription {
+    const char *name;
+    int unmigratable;
+    int version_id;
+    int minimum_version_id;
+    int minimum_version_id_old;
+    LoadStateHandler *load_state_old;
+    int (*pre_load)(void *opaque);
+    int (*post_load)(void *opaque, int version_id);
+    void (*pre_save)(void *opaque);
+    VMStateField *fields;
+    const VMStateSubsection *subsections;
+};
+
+#ifdef CONFIG_USER_ONLY
+extern const VMStateDescription vmstate_dummy;
+#endif
+
+extern const VMStateInfo vmstate_info_bool;
+
+extern const VMStateInfo vmstate_info_int8;
+extern const VMStateInfo vmstate_info_int16;
+extern const VMStateInfo vmstate_info_int32;
+extern const VMStateInfo vmstate_info_int64;
+
+extern const VMStateInfo vmstate_info_uint8_equal;
+extern const VMStateInfo vmstate_info_uint16_equal;
+extern const VMStateInfo vmstate_info_int32_equal;
+extern const VMStateInfo vmstate_info_uint32_equal;
+extern const VMStateInfo vmstate_info_uint64_equal;
+extern const VMStateInfo vmstate_info_int32_le;
+
+extern const VMStateInfo vmstate_info_uint8;
+extern const VMStateInfo vmstate_info_uint16;
+extern const VMStateInfo vmstate_info_uint32;
+extern const VMStateInfo vmstate_info_uint64;
+
+extern const VMStateInfo vmstate_info_float64;
+
+extern const VMStateInfo vmstate_info_timer;
+extern const VMStateInfo vmstate_info_buffer;
+extern const VMStateInfo vmstate_info_unused_buffer;
+extern const VMStateInfo vmstate_info_bitmap;
+
+#define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)
+#define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0)
+#define type_check_pointer(t1,t2) ((t1**)0 - (t2*)0)
+
+#define vmstate_offset_value(_state, _field, _type)                  \
+    (offsetof(_state, _field) +                                      \
+     type_check(_type, typeof_field(_state, _field)))
+
+#define vmstate_offset_pointer(_state, _field, _type)                \
+    (offsetof(_state, _field) +                                      \
+     type_check_pointer(_type, typeof_field(_state, _field)))
+
+#define vmstate_offset_array(_state, _field, _type, _num)            \
+    (offsetof(_state, _field) +                                      \
+     type_check_array(_type, typeof_field(_state, _field), _num))
+
+#define vmstate_offset_2darray(_state, _field, _type, _n1, _n2)      \
+    (offsetof(_state, _field) +                                      \
+     type_check_2darray(_type, typeof_field(_state, _field), _n1, _n2))
+
+#define vmstate_offset_sub_array(_state, _field, _type, _start)      \
+    (offsetof(_state, _field[_start]))
+
+#define vmstate_offset_buffer(_state, _field)                        \
+    vmstate_offset_array(_state, _field, uint8_t,                    \
+                         sizeof(typeof_field(_state, _field)))
+
+#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) { \
+    .name         = (stringify(_field)),                             \
+    .version_id   = (_version),                                      \
+    .field_exists = (_test),                                         \
+    .size         = sizeof(_type),                                   \
+    .info         = &(_info),                                        \
+    .flags        = VMS_SINGLE,                                      \
+    .offset       = vmstate_offset_value(_state, _field, _type),     \
+}
+
+#define VMSTATE_POINTER(_field, _state, _version, _info, _type) {    \
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .info       = &(_info),                                          \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_SINGLE|VMS_POINTER,                            \
+    .offset     = vmstate_offset_value(_state, _field, _type),       \
+}
+
+#define VMSTATE_POINTER_TEST(_field, _state, _test, _info, _type) {  \
+    .name       = (stringify(_field)),                               \
+    .info       = &(_info),                                          \
+    .field_exists = (_test),                                         \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_SINGLE|VMS_POINTER,                            \
+    .offset     = vmstate_offset_value(_state, _field, _type),       \
+}
+
+#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) {\
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .num        = (_num),                                            \
+    .info       = &(_info),                                          \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_ARRAY,                                         \
+    .offset     = vmstate_offset_array(_state, _field, _type, _num), \
+}
+
+#define VMSTATE_2DARRAY(_field, _state, _n1, _n2, _version, _info, _type) { \
+    .name       = (stringify(_field)),                                      \
+    .version_id = (_version),                                               \
+    .num        = (_n1) * (_n2),                                            \
+    .info       = &(_info),                                                 \
+    .size       = sizeof(_type),                                            \
+    .flags      = VMS_ARRAY,                                                \
+    .offset     = vmstate_offset_2darray(_state, _field, _type, _n1, _n2),  \
+}
+
+#define VMSTATE_ARRAY_TEST(_field, _state, _num, _test, _info, _type) {\
+    .name         = (stringify(_field)),                              \
+    .field_exists = (_test),                                          \
+    .num          = (_num),                                           \
+    .info         = &(_info),                                         \
+    .size         = sizeof(_type),                                    \
+    .flags        = VMS_ARRAY,                                        \
+    .offset       = vmstate_offset_array(_state, _field, _type, _num),\
+}
+
+#define VMSTATE_SUB_ARRAY(_field, _state, _start, _num, _version, _info, _type) { \
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .num        = (_num),                                            \
+    .info       = &(_info),                                          \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_ARRAY,                                         \
+    .offset     = vmstate_offset_sub_array(_state, _field, _type, _start), \
+}
+
+#define VMSTATE_ARRAY_INT32_UNSAFE(_field, _state, _field_num, _info, _type) {\
+    .name       = (stringify(_field)),                               \
+    .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+    .info       = &(_info),                                          \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_VARRAY_INT32,                                  \
+    .offset     = offsetof(_state, _field),                          \
+}
+
+#define VMSTATE_VARRAY_INT32(_field, _state, _field_num, _version, _info, _type) {\
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+    .info       = &(_info),                                          \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_VARRAY_INT32|VMS_POINTER,                      \
+    .offset     = vmstate_offset_pointer(_state, _field, _type),     \
+}
+
+#define VMSTATE_VARRAY_UINT32(_field, _state, _field_num, _version, _info, _type) {\
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
+    .info       = &(_info),                                          \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_VARRAY_UINT32|VMS_POINTER,                     \
+    .offset     = vmstate_offset_pointer(_state, _field, _type),     \
+}
+
+#define VMSTATE_VARRAY_UINT16_UNSAFE(_field, _state, _field_num, _version, _info, _type) {\
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\
+    .info       = &(_info),                                          \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_VARRAY_UINT16,                                 \
+    .offset     = offsetof(_state, _field),                          \
+}
+
+#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) { \
+    .name         = (stringify(_field)),                             \
+    .version_id   = (_version),                                      \
+    .field_exists = (_test),                                         \
+    .vmsd         = &(_vmsd),                                        \
+    .size         = sizeof(_type),                                   \
+    .flags        = VMS_STRUCT,                                      \
+    .offset       = vmstate_offset_value(_state, _field, _type),     \
+}
+
+#define VMSTATE_STRUCT_POINTER_V(_field, _state, _version, _vmsd, _type) { \
+    .name         = (stringify(_field)),                             \
+    .version_id   = (_version),                                        \
+    .vmsd         = &(_vmsd),                                        \
+    .size         = sizeof(_type),                                   \
+    .flags        = VMS_STRUCT|VMS_POINTER,                          \
+    .offset       = vmstate_offset_value(_state, _field, _type),     \
+}
+
+#define VMSTATE_STRUCT_POINTER_TEST_V(_field, _state, _test, _version, _vmsd, _type) { \
+    .name         = (stringify(_field)),                             \
+    .version_id   = (_version),                                        \
+    .field_exists = (_test),                                         \
+    .vmsd         = &(_vmsd),                                        \
+    .size         = sizeof(_type),                                   \
+    .flags        = VMS_STRUCT|VMS_POINTER,                          \
+    .offset       = vmstate_offset_value(_state, _field, _type),     \
+}
+
+#define VMSTATE_ARRAY_OF_POINTER(_field, _state, _num, _version, _info, _type) {\
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .num        = (_num),                                            \
+    .info       = &(_info),                                          \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_ARRAY|VMS_ARRAY_OF_POINTER,                    \
+    .offset     = vmstate_offset_array(_state, _field, _type, _num), \
+}
+
+#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, _vmsd, _type) { \
+    .name         = (stringify(_field)),                             \
+    .num          = (_num),                                          \
+    .field_exists = (_test),                                         \
+    .version_id   = (_version),                                      \
+    .vmsd         = &(_vmsd),                                        \
+    .size         = sizeof(_type),                                   \
+    .flags        = VMS_STRUCT|VMS_ARRAY,                            \
+    .offset       = vmstate_offset_array(_state, _field, _type, _num),\
+}
+
+#define VMSTATE_STRUCT_VARRAY_UINT8(_field, _state, _field_num, _version, _vmsd, _type) { \
+    .name       = (stringify(_field)),                               \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint8_t), \
+    .version_id = (_version),                                        \
+    .vmsd       = &(_vmsd),                                          \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_STRUCT|VMS_VARRAY_UINT8,                       \
+    .offset     = offsetof(_state, _field),                          \
+}
+
+#define VMSTATE_STRUCT_VARRAY_POINTER_INT32(_field, _state, _field_num, _vmsd, _type) { \
+    .name       = (stringify(_field)),                               \
+    .version_id = 0,                                                 \
+    .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+    .size       = sizeof(_type),                                     \
+    .vmsd       = &(_vmsd),                                          \
+    .flags      = VMS_POINTER | VMS_VARRAY_INT32 | VMS_STRUCT,       \
+    .offset     = vmstate_offset_pointer(_state, _field, _type),     \
+}
+
+#define VMSTATE_STRUCT_VARRAY_POINTER_UINT32(_field, _state, _field_num, _vmsd, _type) { \
+    .name       = (stringify(_field)),                               \
+    .version_id = 0,                                                 \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
+    .size       = sizeof(_type),                                     \
+    .vmsd       = &(_vmsd),                                          \
+    .flags      = VMS_POINTER | VMS_VARRAY_INT32 | VMS_STRUCT,       \
+    .offset     = vmstate_offset_pointer(_state, _field, _type),     \
+}
+
+#define VMSTATE_STRUCT_VARRAY_POINTER_UINT16(_field, _state, _field_num, _vmsd, _type) { \
+    .name       = (stringify(_field)),                               \
+    .version_id = 0,                                                 \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\
+    .size       = sizeof(_type),                                     \
+    .vmsd       = &(_vmsd),                                          \
+    .flags      = VMS_POINTER | VMS_VARRAY_UINT16 | VMS_STRUCT,      \
+    .offset     = vmstate_offset_pointer(_state, _field, _type),     \
+}
+
+#define VMSTATE_STRUCT_VARRAY_INT32(_field, _state, _field_num, _version, _vmsd, _type) { \
+    .name       = (stringify(_field)),                               \
+    .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+    .version_id = (_version),                                        \
+    .vmsd       = &(_vmsd),                                          \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_STRUCT|VMS_VARRAY_INT32,                       \
+    .offset     = offsetof(_state, _field),                          \
+}
+
+#define VMSTATE_STRUCT_VARRAY_UINT32(_field, _state, _field_num, _version, _vmsd, _type) { \
+    .name       = (stringify(_field)),                               \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint32_t), \
+    .version_id = (_version),                                        \
+    .vmsd       = &(_vmsd),                                          \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_STRUCT|VMS_VARRAY_UINT32,                      \
+    .offset     = offsetof(_state, _field),                          \
+}
+
+#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) { \
+    .name         = (stringify(_field)),                             \
+    .version_id   = (_version),                                      \
+    .field_exists = (_test),                                         \
+    .size         = (_size - _start),                                \
+    .info         = &vmstate_info_buffer,                            \
+    .flags        = VMS_BUFFER,                                      \
+    .offset       = vmstate_offset_buffer(_state, _field) + _start,  \
+}
+
+#define VMSTATE_VBUFFER_MULTIPLY(_field, _state, _version, _test, _start, _field_size, _multiply) { \
+    .name         = (stringify(_field)),                             \
+    .version_id   = (_version),                                      \
+    .field_exists = (_test),                                         \
+    .size_offset  = vmstate_offset_value(_state, _field_size, uint32_t),\
+    .size         = (_multiply),                                      \
+    .info         = &vmstate_info_buffer,                            \
+    .flags        = VMS_VBUFFER|VMS_POINTER|VMS_MULTIPLY,            \
+    .offset       = offsetof(_state, _field),                        \
+    .start        = (_start),                                        \
+}
+
+#define VMSTATE_VBUFFER(_field, _state, _version, _test, _start, _field_size) { \
+    .name         = (stringify(_field)),                             \
+    .version_id   = (_version),                                      \
+    .field_exists = (_test),                                         \
+    .size_offset  = vmstate_offset_value(_state, _field_size, int32_t),\
+    .info         = &vmstate_info_buffer,                            \
+    .flags        = VMS_VBUFFER|VMS_POINTER,                         \
+    .offset       = offsetof(_state, _field),                        \
+    .start        = (_start),                                        \
+}
+
+#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _start, _field_size) { \
+    .name         = (stringify(_field)),                             \
+    .version_id   = (_version),                                      \
+    .field_exists = (_test),                                         \
+    .size_offset  = vmstate_offset_value(_state, _field_size, uint32_t),\
+    .info         = &vmstate_info_buffer,                            \
+    .flags        = VMS_VBUFFER|VMS_POINTER,                         \
+    .offset       = offsetof(_state, _field),                        \
+    .start        = (_start),                                        \
+}
+
+#define VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, _info, _size) { \
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .size       = (_size),                                           \
+    .info       = &(_info),                                          \
+    .flags      = VMS_BUFFER,                                        \
+    .offset     = offsetof(_state, _field),                          \
+}
+
+#define VMSTATE_BUFFER_POINTER_UNSAFE(_field, _state, _version, _size) { \
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .size       = (_size),                                           \
+    .info       = &vmstate_info_buffer,                              \
+    .flags      = VMS_BUFFER|VMS_POINTER,                            \
+    .offset     = offsetof(_state, _field),                          \
+}
+
+#define VMSTATE_UNUSED_BUFFER(_test, _version, _size) {              \
+    .name         = "unused",                                        \
+    .field_exists = (_test),                                         \
+    .version_id   = (_version),                                      \
+    .size         = (_size),                                         \
+    .info         = &vmstate_info_unused_buffer,                     \
+    .flags        = VMS_BUFFER,                                      \
+}
+
+/* _field_size should be a int32_t field in the _state struct giving the
+ * size of the bitmap _field in bits.
+ */
+#define VMSTATE_BITMAP(_field, _state, _version, _field_size) {      \
+    .name         = (stringify(_field)),                             \
+    .version_id   = (_version),                                      \
+    .size_offset  = vmstate_offset_value(_state, _field_size, int32_t),\
+    .info         = &vmstate_info_bitmap,                            \
+    .flags        = VMS_VBUFFER|VMS_POINTER,                         \
+    .offset       = offsetof(_state, _field),                        \
+}
+
+/* _f : field name
+   _f_n : num of elements field_name
+   _n : num of elements
+   _s : struct state name
+   _v : version
+*/
+
+#define VMSTATE_SINGLE(_field, _state, _version, _info, _type)        \
+    VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type)
+
+#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type)        \
+    VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type)
+
+#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type)          \
+    VMSTATE_STRUCT_POINTER_V(_field, _state, 0, _vmsd, _type)
+
+#define VMSTATE_STRUCT_POINTER_TEST(_field, _state, _test, _vmsd, _type)     \
+    VMSTATE_STRUCT_POINTER_TEST_V(_field, _state, _test, 0, _vmsd, _type)
+
+#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \
+    VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version,   \
+            _vmsd, _type)
+
+#define VMSTATE_BOOL_V(_f, _s, _v)                                    \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_bool, bool)
+
+#define VMSTATE_INT8_V(_f, _s, _v)                                    \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int8, int8_t)
+#define VMSTATE_INT16_V(_f, _s, _v)                                   \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int16, int16_t)
+#define VMSTATE_INT32_V(_f, _s, _v)                                   \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int32, int32_t)
+#define VMSTATE_INT64_V(_f, _s, _v)                                   \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int64, int64_t)
+
+#define VMSTATE_UINT8_V(_f, _s, _v)                                   \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint8, uint8_t)
+#define VMSTATE_UINT16_V(_f, _s, _v)                                  \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint16, uint16_t)
+#define VMSTATE_UINT32_V(_f, _s, _v)                                  \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32, uint32_t)
+#define VMSTATE_UINT64_V(_f, _s, _v)                                  \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64, uint64_t)
+
+#define VMSTATE_BOOL(_f, _s)                                          \
+    VMSTATE_BOOL_V(_f, _s, 0)
+
+#define VMSTATE_INT8(_f, _s)                                          \
+    VMSTATE_INT8_V(_f, _s, 0)
+#define VMSTATE_INT16(_f, _s)                                         \
+    VMSTATE_INT16_V(_f, _s, 0)
+#define VMSTATE_INT32(_f, _s)                                         \
+    VMSTATE_INT32_V(_f, _s, 0)
+#define VMSTATE_INT64(_f, _s)                                         \
+    VMSTATE_INT64_V(_f, _s, 0)
+
+#define VMSTATE_UINT8(_f, _s)                                         \
+    VMSTATE_UINT8_V(_f, _s, 0)
+#define VMSTATE_UINT16(_f, _s)                                        \
+    VMSTATE_UINT16_V(_f, _s, 0)
+#define VMSTATE_UINT32(_f, _s)                                        \
+    VMSTATE_UINT32_V(_f, _s, 0)
+#define VMSTATE_UINT64(_f, _s)                                        \
+    VMSTATE_UINT64_V(_f, _s, 0)
+
+#define VMSTATE_UINT8_EQUAL(_f, _s)                                   \
+    VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint8_equal, uint8_t)
+
+#define VMSTATE_UINT16_EQUAL(_f, _s)                                  \
+    VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint16_equal, uint16_t)
+
+#define VMSTATE_UINT16_EQUAL_V(_f, _s, _v)                            \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint16_equal, uint16_t)
+
+#define VMSTATE_INT32_EQUAL(_f, _s)                                   \
+    VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_equal, int32_t)
+
+#define VMSTATE_UINT32_EQUAL_V(_f, _s, _v)                            \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32_equal, uint32_t)
+
+#define VMSTATE_UINT32_EQUAL(_f, _s)                                  \
+    VMSTATE_UINT32_EQUAL_V(_f, _s, 0)
+
+#define VMSTATE_UINT64_EQUAL_V(_f, _s, _v)                            \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64_equal, uint64_t)
+
+#define VMSTATE_UINT64_EQUAL(_f, _s)                                  \
+    VMSTATE_UINT64_EQUAL_V(_f, _s, 0)
+
+#define VMSTATE_INT32_LE(_f, _s)                                   \
+    VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_le, int32_t)
+
+#define VMSTATE_UINT8_TEST(_f, _s, _t)                               \
+    VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint8, uint8_t)
+
+#define VMSTATE_UINT16_TEST(_f, _s, _t)                               \
+    VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint16, uint16_t)
+
+#define VMSTATE_UINT32_TEST(_f, _s, _t)                                  \
+    VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint32, uint32_t)
+
+
+#define VMSTATE_FLOAT64_V(_f, _s, _v)                                 \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_float64, float64)
+
+#define VMSTATE_FLOAT64(_f, _s)                                       \
+    VMSTATE_FLOAT64_V(_f, _s, 0)
+
+#define VMSTATE_TIMER_TEST(_f, _s, _test)                             \
+    VMSTATE_POINTER_TEST(_f, _s, _test, vmstate_info_timer, QEMUTimer *)
+
+#define VMSTATE_TIMER_V(_f, _s, _v)                                   \
+    VMSTATE_POINTER(_f, _s, _v, vmstate_info_timer, QEMUTimer *)
+
+#define VMSTATE_TIMER(_f, _s)                                         \
+    VMSTATE_TIMER_V(_f, _s, 0)
+
+#define VMSTATE_TIMER_ARRAY(_f, _s, _n)                              \
+    VMSTATE_ARRAY_OF_POINTER(_f, _s, _n, 0, vmstate_info_timer, QEMUTimer *)
+
+#define VMSTATE_BOOL_ARRAY_V(_f, _s, _n, _v)                         \
+    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_bool, bool)
+
+#define VMSTATE_BOOL_ARRAY(_f, _s, _n)                               \
+    VMSTATE_BOOL_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT16_ARRAY_V(_f, _s, _n, _v)                         \
+    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint16, uint16_t)
+
+#define VMSTATE_UINT16_2DARRAY_V(_f, _s, _n1, _n2, _v)                \
+    VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint16, uint16_t)
+
+#define VMSTATE_UINT16_ARRAY(_f, _s, _n)                               \
+    VMSTATE_UINT16_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT16_2DARRAY(_f, _s, _n1, _n2)                      \
+    VMSTATE_UINT16_2DARRAY_V(_f, _s, _n1, _n2, 0)
+
+#define VMSTATE_UINT8_2DARRAY_V(_f, _s, _n1, _n2, _v)                 \
+    VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint8, uint8_t)
+
+#define VMSTATE_UINT8_ARRAY_V(_f, _s, _n, _v)                         \
+    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint8, uint8_t)
+
+#define VMSTATE_UINT8_ARRAY(_f, _s, _n)                               \
+    VMSTATE_UINT8_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT8_2DARRAY(_f, _s, _n1, _n2)                       \
+    VMSTATE_UINT8_2DARRAY_V(_f, _s, _n1, _n2, 0)
+
+#define VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v)                        \
+    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint32, uint32_t)
+
+#define VMSTATE_UINT32_ARRAY(_f, _s, _n)                              \
+    VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v)                        \
+    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint64, uint64_t)
+
+#define VMSTATE_UINT64_ARRAY(_f, _s, _n)                              \
+    VMSTATE_UINT64_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v)                         \
+    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int16, int16_t)
+
+#define VMSTATE_INT16_ARRAY(_f, _s, _n)                               \
+    VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_INT32_ARRAY_V(_f, _s, _n, _v)                         \
+    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int32, int32_t)
+
+#define VMSTATE_INT32_ARRAY(_f, _s, _n)                               \
+    VMSTATE_INT32_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT32_SUB_ARRAY(_f, _s, _start, _num)                \
+    VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_uint32, uint32_t)
+
+#define VMSTATE_UINT32_ARRAY(_f, _s, _n)                              \
+    VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_INT64_ARRAY_V(_f, _s, _n, _v)                         \
+    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int64, int64_t)
+
+#define VMSTATE_INT64_ARRAY(_f, _s, _n)                               \
+    VMSTATE_INT64_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, _v)                       \
+    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_float64, float64)
+
+#define VMSTATE_FLOAT64_ARRAY(_f, _s, _n)                             \
+    VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_BUFFER_V(_f, _s, _v)                                  \
+    VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f)))
+
+#define VMSTATE_BUFFER(_f, _s)                                        \
+    VMSTATE_BUFFER_V(_f, _s, 0)
+
+#define VMSTATE_PARTIAL_BUFFER(_f, _s, _size)                         \
+    VMSTATE_STATIC_BUFFER(_f, _s, 0, NULL, 0, _size)
+
+#define VMSTATE_BUFFER_START_MIDDLE(_f, _s, _start) \
+    VMSTATE_STATIC_BUFFER(_f, _s, 0, NULL, _start, sizeof(typeof_field(_s, _f)))
+
+#define VMSTATE_PARTIAL_VBUFFER(_f, _s, _size)                        \
+    VMSTATE_VBUFFER(_f, _s, 0, NULL, 0, _size)
+
+#define VMSTATE_PARTIAL_VBUFFER_UINT32(_f, _s, _size)                        \
+    VMSTATE_VBUFFER_UINT32(_f, _s, 0, NULL, 0, _size)
+
+#define VMSTATE_SUB_VBUFFER(_f, _s, _start, _size)                    \
+    VMSTATE_VBUFFER(_f, _s, 0, NULL, _start, _size)
+
+#define VMSTATE_BUFFER_TEST(_f, _s, _test)                            \
+    VMSTATE_STATIC_BUFFER(_f, _s, 0, _test, 0, sizeof(typeof_field(_s, _f)))
+
+#define VMSTATE_BUFFER_UNSAFE(_field, _state, _version, _size)        \
+    VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, vmstate_info_buffer, _size)
+
+#define VMSTATE_UNUSED_V(_v, _size)                                   \
+    VMSTATE_UNUSED_BUFFER(NULL, _v, _size)
+
+#define VMSTATE_UNUSED(_size)                                         \
+    VMSTATE_UNUSED_V(0, _size)
+
+#define VMSTATE_UNUSED_TEST(_test, _size)                             \
+    VMSTATE_UNUSED_BUFFER(_test, 0, _size)
+
+#define VMSTATE_END_OF_LIST()                                         \
+    {}
+
+int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
+                       void *opaque, int version_id);
+void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
+                        void *opaque);
+
+int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
+                                   const VMStateDescription *vmsd,
+                                   void *base, int alias_id,
+                                   int required_for_version);
+
+static inline int vmstate_register(DeviceState *dev, int instance_id,
+                                   const VMStateDescription *vmsd,
+                                   void *opaque)
+{
+    return vmstate_register_with_alias_id(dev, instance_id, vmsd,
+                                          opaque, -1, 0);
+}
+
+void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
+                        void *opaque);
+
+#ifndef CONFIG_ANDROID
+struct MemoryRegion;
+void vmstate_register_ram(struct MemoryRegion *memory, DeviceState *dev);
+void vmstate_unregister_ram(struct MemoryRegion *memory, DeviceState *dev);
+void vmstate_register_ram_global(struct MemoryRegion *memory);
+#endif  // !CONFIG_ANDROID
+
+#endif
diff --git a/include/qemu-common.h b/include/qemu-common.h
index b203354..293c5ab 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -12,6 +12,7 @@
 #ifndef QEMU_COMMON_H
 #define QEMU_COMMON_H
 
+#include <inttypes.h>
 #include <setjmp.h>
 
 #include "qemu/compiler.h"
@@ -80,6 +81,15 @@
 #define TIME_MAX LONG_MAX
 #endif
 
+/* HOST_LONG_BITS is the size of a native pointer in bits. */
+#if UINTPTR_MAX == UINT32_MAX
+# define HOST_LONG_BITS 32
+#elif UINTPTR_MAX == UINT64_MAX
+# define HOST_LONG_BITS 64
+#else
+# error Unknown pointer size
+#endif
+
 typedef int (*fprintf_function)(FILE *f, const char *fmt, ...)
     GCC_FMT_ATTR(2, 3);
 
@@ -98,12 +108,7 @@
     _fullpath(resolved_path, path, _MAX_PATH);
     return resolved_path;
 }
-
-#define PRId64 "I64d"
-#define PRIx64 "I64x"
-#define PRIu64 "I64u"
-#define PRIo64 "I64o"
-#endif
+#endif /* _WIN32 */
 
 /* bottom halves */
 typedef void QEMUBHFunc(void *opaque);
@@ -310,10 +315,6 @@
 /* Force QEMU to process pending events */
 void qemu_notify_event(void);
 
-/* Unblock cpu */
-void qemu_cpu_kick(void *env);
-int qemu_cpu_self(void *env);
-
 /* work queue */
 struct qemu_work_item {
     struct qemu_work_item *next;
@@ -322,12 +323,6 @@
     int done;
 };
 
-#ifdef CONFIG_USER_ONLY
-#define qemu_init_vcpu(env) do { } while (0)
-#else
-void qemu_init_vcpu(void *env);
-#endif
-
 typedef struct QEMUIOVector {
     struct iovec *iov;
     int niov;
diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
new file mode 100644
index 0000000..308bbb7
--- /dev/null
+++ b/include/qemu/bitmap.h
@@ -0,0 +1,222 @@
+/*
+ * Bitmap Module
+ *
+ * Copyright (C) 2010 Corentin Chary <corentin.chary@gmail.com>
+ *
+ * Mostly inspired by (stolen from) linux/bitmap.h and linux/bitops.h
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BITMAP_H
+#define BITMAP_H
+
+#include "qemu-common.h"
+#include "qemu/bitops.h"
+
+/*
+ * The available bitmap operations and their rough meaning in the
+ * case that the bitmap is a single unsigned long are thus:
+ *
+ * Note that nbits should be always a compile time evaluable constant.
+ * Otherwise many inlines will generate horrible code.
+ *
+ * bitmap_zero(dst, nbits)			*dst = 0UL
+ * bitmap_fill(dst, nbits)			*dst = ~0UL
+ * bitmap_copy(dst, src, nbits)			*dst = *src
+ * bitmap_and(dst, src1, src2, nbits)		*dst = *src1 & *src2
+ * bitmap_or(dst, src1, src2, nbits)		*dst = *src1 | *src2
+ * bitmap_xor(dst, src1, src2, nbits)		*dst = *src1 ^ *src2
+ * bitmap_andnot(dst, src1, src2, nbits)	*dst = *src1 & ~(*src2)
+ * bitmap_complement(dst, src, nbits)		*dst = ~(*src)
+ * bitmap_equal(src1, src2, nbits)		Are *src1 and *src2 equal?
+ * bitmap_intersects(src1, src2, nbits) 	Do *src1 and *src2 overlap?
+ * bitmap_empty(src, nbits)			Are all bits zero in *src?
+ * bitmap_full(src, nbits)			Are all bits set in *src?
+ * bitmap_set(dst, pos, nbits)			Set specified bit area
+ * bitmap_clear(dst, pos, nbits)		Clear specified bit area
+ * bitmap_find_next_zero_area(buf, len, pos, n, mask)	Find bit free area
+ */
+
+/*
+ * Also the following operations apply to bitmaps.
+ *
+ * set_bit(bit, addr)			*addr |= bit
+ * clear_bit(bit, addr)			*addr &= ~bit
+ * change_bit(bit, addr)		*addr ^= bit
+ * test_bit(bit, addr)			Is bit set in *addr?
+ * test_and_set_bit(bit, addr)		Set bit and return old value
+ * test_and_clear_bit(bit, addr)	Clear bit and return old value
+ * test_and_change_bit(bit, addr)	Change bit and return old value
+ * find_first_zero_bit(addr, nbits)	Position first zero bit in *addr
+ * find_first_bit(addr, nbits)		Position first set bit in *addr
+ * find_next_zero_bit(addr, nbits, bit)	Position next zero bit in *addr >= bit
+ * find_next_bit(addr, nbits, bit)	Position next set bit in *addr >= bit
+ */
+
+#define BITMAP_LAST_WORD_MASK(nbits)                                    \
+    (                                                                   \
+        ((nbits) % BITS_PER_LONG) ?                                     \
+        (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL                       \
+        )
+
+#define DECLARE_BITMAP(name,bits)                  \
+	unsigned long name[BITS_TO_LONGS(bits)]
+
+#define small_nbits(nbits)                      \
+	((nbits) <= BITS_PER_LONG)
+
+int slow_bitmap_empty(const unsigned long *bitmap, int bits);
+int slow_bitmap_full(const unsigned long *bitmap, int bits);
+int slow_bitmap_equal(const unsigned long *bitmap1,
+                   const unsigned long *bitmap2, int bits);
+void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
+                         int bits);
+void slow_bitmap_shift_right(unsigned long *dst,
+                          const unsigned long *src, int shift, int bits);
+void slow_bitmap_shift_left(unsigned long *dst,
+                         const unsigned long *src, int shift, int bits);
+int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+                 const unsigned long *bitmap2, int bits);
+void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+                 const unsigned long *bitmap2, int bits);
+void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+                  const unsigned long *bitmap2, int bits);
+int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+                    const unsigned long *bitmap2, int bits);
+int slow_bitmap_intersects(const unsigned long *bitmap1,
+			const unsigned long *bitmap2, int bits);
+
+static inline unsigned long *bitmap_new(int nbits)
+{
+    int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+    return g_malloc0(len);
+}
+
+static inline void bitmap_zero(unsigned long *dst, int nbits)
+{
+    if (small_nbits(nbits)) {
+        *dst = 0UL;
+    } else {
+        int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+        memset(dst, 0, len);
+    }
+}
+
+static inline void bitmap_fill(unsigned long *dst, int nbits)
+{
+    size_t nlongs = BITS_TO_LONGS(nbits);
+    if (!small_nbits(nbits)) {
+        int len = (nlongs - 1) * sizeof(unsigned long);
+        memset(dst, 0xff,  len);
+    }
+    dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits);
+}
+
+static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
+                               int nbits)
+{
+    if (small_nbits(nbits)) {
+        *dst = *src;
+    } else {
+        int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+        memcpy(dst, src, len);
+    }
+}
+
+static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
+                             const unsigned long *src2, int nbits)
+{
+    if (small_nbits(nbits)) {
+        return (*dst = *src1 & *src2) != 0;
+    }
+    return slow_bitmap_and(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
+			const unsigned long *src2, int nbits)
+{
+    if (small_nbits(nbits)) {
+        *dst = *src1 | *src2;
+    } else {
+        slow_bitmap_or(dst, src1, src2, nbits);
+    }
+}
+
+static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
+			const unsigned long *src2, int nbits)
+{
+    if (small_nbits(nbits)) {
+        *dst = *src1 ^ *src2;
+    } else {
+        slow_bitmap_xor(dst, src1, src2, nbits);
+    }
+}
+
+static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
+			const unsigned long *src2, int nbits)
+{
+    if (small_nbits(nbits)) {
+        return (*dst = *src1 & ~(*src2)) != 0;
+    }
+    return slow_bitmap_andnot(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
+			int nbits)
+{
+    if (small_nbits(nbits)) {
+        *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
+    } else {
+        slow_bitmap_complement(dst, src, nbits);
+    }
+}
+
+static inline int bitmap_equal(const unsigned long *src1,
+			const unsigned long *src2, int nbits)
+{
+    if (small_nbits(nbits)) {
+        return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
+    } else {
+        return slow_bitmap_equal(src1, src2, nbits);
+    }
+}
+
+static inline int bitmap_empty(const unsigned long *src, int nbits)
+{
+    if (small_nbits(nbits)) {
+        return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
+    } else {
+        return slow_bitmap_empty(src, nbits);
+    }
+}
+
+static inline int bitmap_full(const unsigned long *src, int nbits)
+{
+    if (small_nbits(nbits)) {
+        return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
+    } else {
+        return slow_bitmap_full(src, nbits);
+    }
+}
+
+static inline int bitmap_intersects(const unsigned long *src1,
+			const unsigned long *src2, int nbits)
+{
+    if (small_nbits(nbits)) {
+        return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+    } else {
+        return slow_bitmap_intersects(src1, src2, nbits);
+    }
+}
+
+void bitmap_set(unsigned long *map, int i, int len);
+void bitmap_clear(unsigned long *map, int start, int nr);
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+					 unsigned long size,
+					 unsigned long start,
+					 unsigned int nr,
+					 unsigned long align_mask);
+
+#endif /* BITMAP_H */
diff --git a/include/qemu/log.h b/include/qemu/log.h
index 9072588..fe47eca 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -81,7 +81,7 @@
  *
  * Logs the output of cpu_dump_state().
  */
-static inline void log_cpu_state(CPUOldState *cpu, int flags)
+static inline void log_cpu_state(CPUState *cpu, int flags)
 {
     if (qemu_log_enabled()) {
         cpu_dump_state(cpu, qemu_logfile, fprintf, flags);
@@ -95,7 +95,7 @@
  *
  * Logs the output of cpu_dump_state() if loglevel includes @mask.
  */
-static inline void log_cpu_state_mask(int mask, CPUOldState *cpu, int flags)
+static inline void log_cpu_state_mask(int mask, CPUState *cpu, int flags)
 {
     if (qemu_loglevel & mask) {
         log_cpu_state(cpu, flags);
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 2d6ff85..f4a3fcc 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -92,12 +92,6 @@
 #define inline always_inline
 #endif
 
-#ifdef __i386__
-#define REGPARM __attribute((regparm(3)))
-#else
-#define REGPARM
-#endif
-
 #define qemu_printf printf
 
 int qemu_daemon(int nochdir, int noclose);
diff --git a/include/qemu/tls.h b/include/qemu/tls.h
new file mode 100644
index 0000000..b92ea9d
--- /dev/null
+++ b/include/qemu/tls.h
@@ -0,0 +1,52 @@
+/*
+ * Abstraction layer for defining and using TLS variables
+ *
+ * Copyright (c) 2011 Red Hat, Inc
+ * Copyright (c) 2011 Linaro Limited
+ *
+ * Authors:
+ *  Paolo Bonzini <pbonzini@redhat.com>
+ *  Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_TLS_H
+#define QEMU_TLS_H
+
+/* Per-thread variables. Note that we only have implementations
+ * which are really thread-local on Linux; the dummy implementations
+ * define plain global variables.
+ *
+ * This means that for the moment use should be restricted to
+ * per-VCPU variables, which are OK because:
+ *  - the only -user mode supporting multiple VCPU threads is linux-user
+ *  - TCG system mode is single-threaded regarding VCPUs
+ *  - KVM system mode is multi-threaded but limited to Linux
+ *
+ * TODO: proper implementations via Win32 .tls sections and
+ * POSIX pthread_getspecific.
+ */
+#ifdef __linux__
+#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x)
+#define DEFINE_TLS(type, x)  __thread __typeof__(type) tls__##x
+#define tls_var(x)           tls__##x
+#else
+/* Dummy implementations which define plain global variables */
+#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x)
+#define DEFINE_TLS(type, x)  __typeof__(type) tls__##x
+#define tls_var(x)           tls__##x
+#endif
+
+#endif
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index db7f1d1..a8a7ba8 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -73,8 +73,7 @@
 // NOTE(digit): Remove typedefs below when everything is upstreamed.
 typedef struct DisplayAllocator DisplayAllocator;
 typedef struct IRQState *qemu_irq;
-typedef struct TextConsole TextConsole;
-typedef TextConsole QEMUConsole;
+typedef struct QEMUConsole QEMUConsole;
 typedef struct VLANState VLANState;
 typedef struct VLANClientState VLANClientState;
 
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
new file mode 100644
index 0000000..dcab7d4
--- /dev/null
+++ b/include/qom/cpu.h
@@ -0,0 +1,266 @@
+/*
+ * QEMU CPU model
+ *
+ * Copyright (c) 2012 SUSE LINUX Products GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ */
+#ifndef QEMU_CPU_H
+#define QEMU_CPU_H
+
+#include <signal.h>
+#include "hw/qdev-core.h"
+#include "exec/hwaddr.h"
+#include "qemu/queue.h"
+#include "qemu/thread.h"
+#include "qemu/tls.h"
+#include "qemu/typedefs.h"
+
+typedef int (*WriteCoreDumpFunction)(void *buf, size_t size, void *opaque);
+
+/**
+ * vaddr:
+ * Type wide enough to contain any #target_ulong virtual address.
+ */
+typedef uint64_t vaddr;
+#define VADDR_PRId PRId64
+#define VADDR_PRIu PRIu64
+#define VADDR_PRIo PRIo64
+#define VADDR_PRIx PRIx64
+#define VADDR_PRIX PRIX64
+#define VADDR_MAX UINT64_MAX
+
+typedef struct CPUState CPUState;
+
+typedef void (*CPUUnassignedAccess)(CPUState *cpu, hwaddr addr,
+                                    bool is_write, bool is_exec, int opaque,
+                                    unsigned size);
+
+struct TranslationBlock;
+
+// TODO(digit): Make this a proper QOM object that inherits from
+// DeviceState/DeviceClass.
+struct CPUState {
+    int nr_cores;
+    int nr_threads;
+    int numa_node;
+
+    struct QemuThread *thread;
+
+    uint32_t host_tid; /* host thread ID */
+    int running; /* Nonzero if cpu is currently running(usermode).  */
+    struct QemuCond *halt_cond;
+    struct qemu_work_item *queued_work_first, *queued_work_last;
+
+    uint32_t created;
+    uint32_t stop;   /* Stop request */
+    uint32_t stopped; /* Artificially stopped */
+
+    volatile sig_atomic_t exit_request;
+    uint32_t interrupt_request;
+
+    void *env_ptr; /* CPUArchState */
+    struct TranslationBlock *current_tb; /* currently executing TB  */
+    int singlestep_enabled;
+    struct GDBRegisterState *gdb_regs;
+    QTAILQ_ENTRY(CPUState) node;   /* next CPU sharing TB cache */
+
+    const char *cpu_model_str;
+
+    int kvm_fd;
+    int kvm_vcpu_dirty;
+    struct KVMState *kvm_state;
+    struct kvm_run *kvm_run;
+
+    struct hax_vcpu_state *hax_vcpu;
+
+    /* TODO Move common fields from CPUArchState here. */
+    int cpu_index; /* used by alpha TCG */
+    uint32_t halted; /* used by alpha, cris, ppc TCG */
+};
+
+#define CPU(obj)  ((CPUState*)(obj))
+
+QTAILQ_HEAD(CPUTailQ, CPUState);
+extern struct CPUTailQ cpus;
+#define CPU_NEXT(cpu) QTAILQ_NEXT(cpu, node)
+#define CPU_FOREACH(cpu) QTAILQ_FOREACH(cpu, &cpus, node)
+#define CPU_FOREACH_SAFE(cpu, next_cpu) \
+    QTAILQ_FOREACH_SAFE(cpu, &cpus, node, next_cpu)
+#define first_cpu QTAILQ_FIRST(&cpus)
+
+DECLARE_TLS(CPUState *, current_cpu);
+#define current_cpu tls_var(current_cpu)
+
+// TODO(digit): Remove this.
+#define cpu_single_env ((CPUArchState*)current_cpu->env_ptr)
+
+/**
+ * CPUDumpFlags:
+ * @CPU_DUMP_CODE:
+ * @CPU_DUMP_FPU: dump FPU register state, not just integer
+ * @CPU_DUMP_CCOP: dump info about TCG QEMU's condition code optimization state
+ */
+enum CPUDumpFlags {
+    CPU_DUMP_CODE = 0x00010000,
+    CPU_DUMP_FPU  = 0x00020000,
+    CPU_DUMP_CCOP = 0x00040000,
+};
+
+/**
+ * cpu_dump_state:
+ * @cpu: The CPU whose state is to be dumped.
+ * @f: File to dump to.
+ * @cpu_fprintf: Function to dump with.
+ * @flags: Flags what to dump.
+ *
+ * Dumps CPU state.
+ */
+void cpu_dump_state(CPUState *cpu, FILE *f, fprintf_function cpu_fprintf,
+                    int flags);
+
+/**
+ * cpu_dump_statistics:
+ * @cpu: The CPU whose state is to be dumped.
+ * @f: File to dump to.
+ * @cpu_fprintf: Function to dump with.
+ * @flags: Flags what to dump.
+ *
+ * Dumps CPU statistics.
+ */
+void cpu_dump_statistics(CPUState *cpu, FILE *f, fprintf_function cpu_fprintf,
+                         int flags);
+
+/**
+ * cpu_reset:
+ * @cpu: The CPU whose state is to be reset.
+ */
+void cpu_reset(CPUState *cpu);
+
+/**
+ * qemu_cpu_has_work:
+ * @cpu: The vCPU to check.
+ *
+ * Checks whether the CPU has work to do.
+ *
+ * Returns: %true if the CPU has work, %false otherwise.
+ */
+bool qemu_cpu_has_work(CPUState *cpu);
+
+/**
+ * qemu_cpu_is_self:
+ * @cpu: The vCPU to check against.
+ *
+ * Checks whether the caller is executing on the vCPU thread.
+ *
+ * Returns: %true if called from @cpu's thread, %false otherwise.
+ */
+bool qemu_cpu_is_self(CPUState *cpu);
+
+/**
+ * qemu_cpu_kick:
+ * @cpu: The vCPU to kick.
+ *
+ * Kicks @cpu's thread.
+ */
+void qemu_cpu_kick(CPUState *cpu);
+
+/**
+ * cpu_is_stopped:
+ * @cpu: The CPU to check.
+ *
+ * Checks whether the CPU is stopped.
+ *
+ * Returns: %true if run state is not running or if artificially stopped;
+ * %false otherwise.
+ */
+bool cpu_is_stopped(CPUState *cpu);
+
+/**
+ * run_on_cpu:
+ * @cpu: The vCPU to run on.
+ * @func: The function to be executed.
+ * @data: Data to pass to the function.
+ *
+ * Schedules the function @func for execution on the vCPU @cpu.
+ */
+void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
+
+/**
+ * qemu_get_cpu:
+ * @index: The CPUState@cpu_index value of the CPU to obtain.
+ *
+ * Gets a CPU matching @index.
+ *
+ * Returns: The CPU or %NULL if there is no matching CPU.
+ */
+CPUState *qemu_get_cpu(int index);
+
+/**
+ * cpu_interrupt:
+ * @cpu: The CPU to set an interrupt on.
+ * @mask: The interupts to set.
+ *
+ * Invokes the interrupt handler.
+ */
+void cpu_interrupt(CPUState *cpu, int mask);
+
+/**
+ * cpu_reset_interrupt:
+ * @cpu: The CPU to clear the interrupt on.
+ * @mask: The interrupt mask to clear.
+ *
+ * Resets interrupts on the vCPU @cpu.
+ */
+void cpu_reset_interrupt(CPUState *cpu, int mask);
+
+/**
+ * cpu_exit:
+ * @cpu: The CPU to exit.
+ *
+ * Requests the CPU @cpu to exit execution.
+ */
+void cpu_exit(CPUState *cpu);
+
+/**
+ * cpu_resume:
+ * @cpu: The CPU to resume.
+ *
+ * Resumes CPU, i.e. puts CPU into runnable state.
+ */
+void cpu_resume(CPUState *cpu);
+
+/**
+ * qemu_init_vcpu:
+ * @cpu: The vCPU to initialize.
+ *
+ * Initializes a vCPU.
+ */
+void qemu_init_vcpu(CPUState *cpu);
+
+#define SSTEP_ENABLE  0x1  /* Enable simulated HW single stepping */
+#define SSTEP_NOIRQ   0x2  /* Do not use IRQ while single stepping */
+#define SSTEP_NOTIMER 0x4  /* Do not Timers while single stepping */
+
+/**
+ * cpu_single_step:
+ * @cpu: CPU to the flags for.
+ * @enabled: Flags to enable.
+ *
+ * Enables or disables single-stepping for @cpu.
+ */
+void cpu_single_step(CPUState *cpu, int enabled);
+
+#endif  // QEMU_CPU_H
diff --git a/include/qom/object.h b/include/qom/object.h
index e420e10..a275db2 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -985,12 +985,12 @@
  *   ambiguous match
  *
  * There are two types of supported paths--absolute paths and partial paths.
- *
+ * 
  * Absolute paths are derived from the root object and can follow child<> or
  * link<> properties.  Since they can follow link<> properties, they can be
  * arbitrarily long.  Absolute paths look like absolute filenames and are
  * prefixed with a leading slash.
- *
+ * 
  * Partial paths look like relative filenames.  They do not begin with a
  * prefix.  The matching rules for partial paths are subtle but designed to make
  * specifying objects easy.  At each level of the composition tree, the partial
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 7f86d89..f86026a 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -38,10 +38,10 @@
 
 int kvm_init(int smp_cpus);
 
-int kvm_init_vcpu(CPUOldState *env);
+int kvm_init_vcpu(CPUState *env);
 int kvm_sync_vcpus(void);
 
-int kvm_cpu_exec(CPUOldState *env);
+int kvm_cpu_exec(CPUState *env);
 
 void kvm_set_phys_mem(hwaddr start_addr,
                       ram_addr_t size,
@@ -61,12 +61,12 @@
 int kvm_coalesce_mmio_region(hwaddr start, ram_addr_t size);
 int kvm_uncoalesce_mmio_region(hwaddr start, ram_addr_t size);
 
-int kvm_insert_breakpoint(CPUOldState *current_env, target_ulong addr,
+int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
                           target_ulong len, int type);
-int kvm_remove_breakpoint(CPUOldState *current_env, target_ulong addr,
+int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
                           target_ulong len, int type);
-void kvm_remove_all_breakpoints(CPUOldState *current_env);
-int kvm_update_guest_debug(CPUOldState *env, unsigned long reinject_trap);
+void kvm_remove_all_breakpoints(CPUState *current_env);
+int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap);
 
 /* internal API */
 
@@ -77,28 +77,28 @@
 
 int kvm_vm_ioctl(KVMState *s, int type, ...);
 
-int kvm_vcpu_ioctl(CPUOldState *env, int type, ...);
+int kvm_vcpu_ioctl(CPUState *env, int type, ...);
 
-int kvm_get_mp_state(CPUOldState *env);
-int kvm_put_mp_state(CPUOldState *env);
+int kvm_get_mp_state(CPUState *env);
+int kvm_put_mp_state(CPUState *env);
 
 /* Arch specific hooks */
 
-int kvm_arch_post_run(CPUOldState *env, struct kvm_run *run);
+int kvm_arch_post_run(CPUState *env, struct kvm_run *run);
 
-int kvm_arch_vcpu_run(CPUOldState *env);
+int kvm_arch_vcpu_run(CPUState *env);
 
-int kvm_arch_handle_exit(CPUOldState *env, struct kvm_run *run);
+int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run);
 
-int kvm_arch_pre_run(CPUOldState *env, struct kvm_run *run);
+int kvm_arch_pre_run(CPUState *env, struct kvm_run *run);
 
-int kvm_arch_get_registers(CPUOldState *env);
+int kvm_arch_get_registers(CPUState *env);
 
-int kvm_arch_put_registers(CPUOldState *env);
+int kvm_arch_put_registers(CPUState *env);
 
 int kvm_arch_init(KVMState *s, int smp_cpus);
 
-int kvm_arch_init_vcpu(CPUOldState *env);
+int kvm_arch_init_vcpu(CPUState *env);
 
 struct kvm_guest_debug;
 struct kvm_debug_exit_arch;
@@ -114,14 +114,14 @@
 
 int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info);
 
-struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUOldState *env,
+struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
                                                  target_ulong pc);
 
-int kvm_sw_breakpoints_active(CPUOldState *env);
+int kvm_sw_breakpoints_active(CPUState *env);
 
-int kvm_arch_insert_sw_breakpoint(CPUOldState *current_env,
+int kvm_arch_insert_sw_breakpoint(CPUState *current_env,
                                   struct kvm_sw_breakpoint *bp);
-int kvm_arch_remove_sw_breakpoint(CPUOldState *current_env,
+int kvm_arch_remove_sw_breakpoint(CPUState *current_env,
                                   struct kvm_sw_breakpoint *bp);
 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
                                   target_ulong len, int type);
@@ -129,31 +129,31 @@
                                   target_ulong len, int type);
 void kvm_arch_remove_all_hw_breakpoints(void);
 
-void kvm_arch_update_guest_debug(CPUOldState *env, struct kvm_guest_debug *dbg);
+void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg);
 
 int kvm_check_extension(KVMState *s, unsigned int extension);
 
-uint32_t kvm_arch_get_supported_cpuid(CPUOldState *env, uint32_t function,
+uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
                                       int reg);
 
 /* generic hooks - to be moved/refactored once there are more users */
 #ifdef CONFIG_HAX
-void hax_vcpu_sync_state(CPUOldState *env, int modified);
+void hax_vcpu_sync_state(CPUState *cpu, int modified);
 #endif
-static inline void cpu_synchronize_state(CPUOldState *env, int modified)
+static inline void cpu_synchronize_state(CPUState *cpu, int modified)
 {
     if (kvm_enabled()) {
         if (modified)
-            kvm_arch_put_registers(env);
+            kvm_arch_put_registers(cpu);
         else
-            kvm_arch_get_registers(env);
+            kvm_arch_get_registers(cpu);
     }
 #ifdef CONFIG_HAX
-    hax_vcpu_sync_state(env, modified);
+    hax_vcpu_sync_state(cpu, modified);
 #endif
 }
 
-int kvm_get_sregs(CPUOldState *env);
+int kvm_get_sregs(CPUState *env);
 
 
 #endif
diff --git a/kvm-all.c b/kvm-all.c
index d92868b..8f0838b 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -87,15 +87,13 @@
 
 static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
                                          hwaddr start_addr,
-                                         hwaddr end_addr)
+                                         ram_addr_t size)
 {
     int i;
 
     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
         KVMSlot *mem = &s->slots[i];
-
-        if (start_addr == mem->start_addr &&
-            end_addr == mem->start_addr + mem->memory_size) {
+        if (start_addr == mem->start_addr && size == mem->memory_size) {
             return mem;
         }
     }
@@ -108,7 +106,7 @@
  */
 static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
                                             hwaddr start_addr,
-                                            hwaddr end_addr)
+                                            ram_addr_t size)
 {
     KVMSlot *found = NULL;
     int i;
@@ -116,15 +114,29 @@
     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
         KVMSlot *mem = &s->slots[i];
 
-        if (mem->memory_size == 0 ||
-            (found && found->start_addr < mem->start_addr)) {
+        // Skip empty slots.
+        if (!mem->memory_size)
+            continue;
+
+        // Skip non-overlapping slots, conditions are:
+        //    start_addr + size <= mem->start_addr ||
+        //    start_addr >= mem->start_addr + mem->memory_size
+        //
+        // However, we want to avoid wrapping errors, so avoid
+        // additions and only compare positive values.
+        if (start_addr <= mem->start_addr) {
+            if (mem->start_addr - start_addr >= size) {
+                continue;
+            }
+        } else if (start_addr - mem->start_addr >= mem->memory_size) {
             continue;
         }
 
-        if (end_addr > mem->start_addr &&
-            start_addr < mem->start_addr + mem->memory_size) {
-            found = mem;
+        if (found && found->start_addr < mem->start_addr) {
+            continue;
         }
+
+        found = mem;
     }
 
     return found;
@@ -146,7 +158,7 @@
 }
 
 
-int kvm_init_vcpu(CPUOldState *env)
+int kvm_init_vcpu(CPUState *cpu)
 {
     KVMState *s = kvm_state;
     long mmap_size;
@@ -154,14 +166,14 @@
 
     dprintf("kvm_init_vcpu\n");
 
-    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
+    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, cpu->cpu_index);
     if (ret < 0) {
         dprintf("kvm_create_vcpu failed\n");
         goto err;
     }
 
-    env->kvm_fd = ret;
-    env->kvm_state = s;
+    cpu->kvm_fd = ret;
+    cpu->kvm_state = s;
 
     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
     if (mmap_size < 0) {
@@ -169,48 +181,48 @@
         goto err;
     }
 
-    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
-                        env->kvm_fd, 0);
-    if (env->kvm_run == MAP_FAILED) {
+    cpu->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                        cpu->kvm_fd, 0);
+    if (cpu->kvm_run == MAP_FAILED) {
         ret = -errno;
         dprintf("mmap'ing vcpu state failed\n");
         goto err;
     }
 
-    ret = kvm_arch_init_vcpu(env);
+    ret = kvm_arch_init_vcpu(cpu);
 
 err:
     return ret;
 }
 
-int kvm_put_mp_state(CPUOldState *env)
+int kvm_put_mp_state(CPUState *cpu)
 {
+    CPUArchState *env = cpu->env_ptr;
     struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
 
-    return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
+    return kvm_vcpu_ioctl(cpu, KVM_SET_MP_STATE, &mp_state);
 }
 
-int kvm_get_mp_state(CPUOldState *env)
+int kvm_get_mp_state(CPUState *cpu)
 {
     struct kvm_mp_state mp_state;
     int ret;
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
+    ret = kvm_vcpu_ioctl(cpu, KVM_GET_MP_STATE, &mp_state);
     if (ret < 0) {
         return ret;
     }
+    CPUArchState *env = cpu->env_ptr;
     env->mp_state = mp_state.mp_state;
     return 0;
 }
 
 int kvm_sync_vcpus(void)
 {
-    CPUOldState *env;
+    CPUState *cpu;
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        int ret;
-
-        ret = kvm_arch_put_registers(env);
+    CPU_FOREACH(cpu) {
+        int ret = kvm_arch_put_registers(cpu);
         if (ret)
             return ret;
     }
@@ -225,7 +237,7 @@
                                       ram_addr_t size, int flags, int mask)
 {
     KVMState *s = kvm_state;
-    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
+    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, size);
     int old_flags;
 
     if (mem == NULL)  {
@@ -308,19 +320,19 @@
 
     d.dirty_bitmap = NULL;
     while (start_addr < end_addr) {
-        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
+        ram_addr_t start_size = (ram_addr_t)(end_addr - start_addr);
+
+        mem = kvm_lookup_overlapping_slot(s, start_addr, start_size);
         if (mem == NULL) {
             break;
         }
 
         size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
-        if (!d.dirty_bitmap) {
-            d.dirty_bitmap = g_malloc(size);
-        } else if (size > allocated_size) {
+        if (size > allocated_size) {
             d.dirty_bitmap = g_realloc(d.dirty_bitmap, size);
+            allocated_size = size;
         }
-        allocated_size = size;
-        memset(d.dirty_bitmap, 0, allocated_size);
+        memset(d.dirty_bitmap, 0, size);
 
         d.slot = mem->slot;
 
@@ -331,7 +343,7 @@
         }
 
         for (phys_addr = mem->start_addr, addr = mem->phys_offset;
-             phys_addr < mem->start_addr + mem->memory_size;
+             phys_addr - mem->start_addr < mem->memory_size;
              phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
             unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
             unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
@@ -343,6 +355,11 @@
             }
         }
         start_addr = phys_addr;
+        if (!start_addr) {
+            // Handle wrap-around, which happens when a slot is mapped
+            // at the end of the physical address space.
+            break;
+        }
     }
     g_free(d.dirty_bitmap);
 
@@ -523,7 +540,7 @@
     return ret;
 }
 
-static int kvm_handle_io(CPUOldState *env, uint16_t port, void *data,
+static int kvm_handle_io(CPUState *cpu, uint16_t port, void *data,
                          int direction, int size, uint32_t count)
 {
     int i;
@@ -562,7 +579,7 @@
     return 1;
 }
 
-static void kvm_run_coalesced_mmio(CPUOldState *env, struct kvm_run *run)
+static void kvm_run_coalesced_mmio(CPUState *cpu, struct kvm_run *run)
 {
 #ifdef KVM_CAP_COALESCED_MMIO
     KVMState *s = kvm_state;
@@ -583,23 +600,24 @@
 #endif
 }
 
-int kvm_cpu_exec(CPUOldState *env)
+int kvm_cpu_exec(CPUState *cpu)
 {
-    struct kvm_run *run = env->kvm_run;
+    CPUArchState *env = cpu->env_ptr;
+    struct kvm_run *run = cpu->kvm_run;
     int ret;
 
     dprintf("kvm_cpu_exec()\n");
 
     do {
-        if (env->exit_request) {
+        if (cpu->exit_request) {
             dprintf("interrupt exit requested\n");
             ret = 0;
             break;
         }
 
-        kvm_arch_pre_run(env, run);
-        ret = kvm_arch_vcpu_run(env);
-        kvm_arch_post_run(env, run);
+        kvm_arch_pre_run(cpu, run);
+        ret = kvm_arch_vcpu_run(cpu);
+        kvm_arch_post_run(cpu, run);
 
         if (ret == -EINTR || ret == -EAGAIN) {
             dprintf("io window exit\n");
@@ -612,13 +630,13 @@
             abort();
         }
 
-        kvm_run_coalesced_mmio(env, run);
+        kvm_run_coalesced_mmio(cpu, run);
 
         ret = 0; /* exit loop */
         switch (run->exit_reason) {
         case KVM_EXIT_IO:
             dprintf("handle_io\n");
-            ret = kvm_handle_io(env, run->io.port,
+            ret = kvm_handle_io(cpu, run->io.port,
                                 (uint8_t *)run + run->io.data_offset,
                                 run->io.direction,
                                 run->io.size,
@@ -653,8 +671,8 @@
             dprintf("kvm_exit_debug\n");
 #ifdef KVM_CAP_SET_GUEST_DEBUG
             if (kvm_arch_debug(&run->debug.arch)) {
-                gdb_set_stop_cpu(env);
-                vm_stop(EXCP_DEBUG);
+                gdb_set_stop_cpu(cpu);
+                vm_stop(EXCP_DEBUG);\
                 env->exception_index = EXCP_DEBUG;
                 return 0;
             }
@@ -664,13 +682,13 @@
             break;
         default:
             dprintf("kvm_arch_handle_exit\n");
-            ret = kvm_arch_handle_exit(env, run);
+            ret = kvm_arch_handle_exit(cpu, run);
             break;
         }
     } while (ret > 0);
 
-    if (env->exit_request) {
-        env->exit_request = 0;
+    if (cpu->exit_request) {
+        cpu->exit_request = 0;
         env->exception_index = EXCP_INTERRUPT;
     }
 
@@ -688,8 +706,7 @@
 
     if (start_addr & ~TARGET_PAGE_MASK) {
         if (flags >= IO_MEM_UNASSIGNED) {
-            if (!kvm_lookup_overlapping_slot(s, start_addr,
-                                             start_addr + size)) {
+            if (!kvm_lookup_overlapping_slot(s, start_addr, size)) {
                 return;
             }
             fprintf(stderr, "Unaligned split of a KVM memory slot\n");
@@ -703,7 +720,7 @@
     phys_offset &= ~IO_MEM_ROM;
 
     while (1) {
-        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
+        mem = kvm_lookup_overlapping_slot(s, start_addr, size);
         if (!mem) {
             break;
         }
@@ -849,7 +866,7 @@
     return ret;
 }
 
-int kvm_vcpu_ioctl(CPUOldState *env, int type, ...)
+int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
 {
     int ret;
     void *arg;
@@ -859,7 +876,7 @@
     arg = va_arg(ap, void *);
     va_end(ap);
 
-    ret = ioctl(env->kvm_fd, type, arg);
+    ret = ioctl(cpu->kvm_fd, type, arg);
     if (ret == -1)
         ret = -errno;
 
@@ -896,46 +913,45 @@
 }
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
-struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUOldState *env,
+struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
                                                  target_ulong pc)
 {
     struct kvm_sw_breakpoint *bp;
 
-    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
+    QTAILQ_FOREACH(bp, &cpu->kvm_state->kvm_sw_breakpoints, entry) {
         if (bp->pc == pc)
             return bp;
     }
     return NULL;
 }
 
-int kvm_sw_breakpoints_active(CPUOldState *env)
+int kvm_sw_breakpoints_active(CPUState *cpu)
 {
-    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
+    return !QTAILQ_EMPTY(&cpu->kvm_state->kvm_sw_breakpoints);
 }
 
-int kvm_update_guest_debug(CPUOldState *env, unsigned long reinject_trap)
+int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
 {
     struct kvm_guest_debug dbg;
 
     dbg.control = 0;
-    if (env->singlestep_enabled)
+    if (cpu->singlestep_enabled)
         dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
 
-    kvm_arch_update_guest_debug(env, &dbg);
+    kvm_arch_update_guest_debug(cpu, &dbg);
     dbg.control |= reinject_trap;
 
-    return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg);
+    return kvm_vcpu_ioctl(cpu, KVM_SET_GUEST_DEBUG, &dbg);
 }
 
-int kvm_insert_breakpoint(CPUOldState *current_env, target_ulong addr,
+int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
                           target_ulong len, int type)
 {
     struct kvm_sw_breakpoint *bp;
-    CPUOldState *env;
     int err;
 
     if (type == GDB_BREAKPOINT_SW) {
-        bp = kvm_find_sw_breakpoint(current_env, addr);
+        bp = kvm_find_sw_breakpoint(cpu, addr);
         if (bp) {
             bp->use_count++;
             return 0;
@@ -947,13 +963,13 @@
 
         bp->pc = addr;
         bp->use_count = 1;
-        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
+        err = kvm_arch_insert_sw_breakpoint(cpu, bp);
         if (err) {
             free(bp);
             return err;
         }
 
-        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
+        QTAILQ_INSERT_HEAD(&cpu->kvm_state->kvm_sw_breakpoints,
                           bp, entry);
     } else {
         err = kvm_arch_insert_hw_breakpoint(addr, len, type);
@@ -961,23 +977,22 @@
             return err;
     }
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        err = kvm_update_guest_debug(env, 0);
+    CPU_FOREACH(cpu) {
+        err = kvm_update_guest_debug(cpu, 0);
         if (err)
             return err;
     }
     return 0;
 }
 
-int kvm_remove_breakpoint(CPUOldState *current_env, target_ulong addr,
+int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
                           target_ulong len, int type)
 {
     struct kvm_sw_breakpoint *bp;
-    CPUOldState *env;
     int err;
 
     if (type == GDB_BREAKPOINT_SW) {
-        bp = kvm_find_sw_breakpoint(current_env, addr);
+        bp = kvm_find_sw_breakpoint(cpu, addr);
         if (!bp)
             return -ENOENT;
 
@@ -986,11 +1001,11 @@
             return 0;
         }
 
-        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
+        err = kvm_arch_remove_sw_breakpoint(cpu, bp);
         if (err)
             return err;
 
-        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
+        QTAILQ_REMOVE(&cpu->kvm_state->kvm_sw_breakpoints, bp, entry);
         g_free(bp);
     } else {
         err = kvm_arch_remove_hw_breakpoint(addr, len, type);
@@ -998,55 +1013,55 @@
             return err;
     }
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        err = kvm_update_guest_debug(env, 0);
+    CPU_FOREACH(cpu) {
+        err = kvm_update_guest_debug(cpu, 0);
         if (err)
             return err;
     }
     return 0;
 }
 
-void kvm_remove_all_breakpoints(CPUOldState *current_env)
+void kvm_remove_all_breakpoints(CPUState *cpu)
 {
     struct kvm_sw_breakpoint *bp, *next;
-    KVMState *s = current_env->kvm_state;
-    CPUOldState *env;
+    KVMState *s = cpu->kvm_state;
 
     QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
-        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
+        if (kvm_arch_remove_sw_breakpoint(cpu, bp) != 0) {
             /* Try harder to find a CPU that currently sees the breakpoint. */
-            for (env = first_cpu; env != NULL; env = env->next_cpu) {
-                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
+            CPU_FOREACH(cpu) {
+                if (kvm_arch_remove_sw_breakpoint(cpu, bp) == 0)
                     break;
             }
         }
     }
     kvm_arch_remove_all_hw_breakpoints();
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu)
-        kvm_update_guest_debug(env, 0);
+    CPU_FOREACH(cpu) {
+        kvm_update_guest_debug(cpu, 0);
+    }
 }
 
 #else /* !KVM_CAP_SET_GUEST_DEBUG */
 
-int kvm_update_guest_debug(CPUOldState *env, unsigned long reinject_trap)
+int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
 {
     return -EINVAL;
 }
 
-int kvm_insert_breakpoint(CPUOldState *current_env, target_ulong addr,
+int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
                           target_ulong len, int type)
 {
     return -EINVAL;
 }
 
-int kvm_remove_breakpoint(CPUOldState *current_env, target_ulong addr,
+int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
                           target_ulong len, int type)
 {
     return -EINVAL;
 }
 
-void kvm_remove_all_breakpoints(CPUOldState *current_env)
+void kvm_remove_all_breakpoints(CPUState *cpu)
 {
 }
 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
diff --git a/main-loop.c b/main-loop.c
index 0634ef0..e8d40ce 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -74,7 +74,7 @@
     } while ((len == -1 && errno == EINTR) || len > 0);
 }
 
-static int qemu_event_init(void)
+static int qemu_main_loop_event_init(void)
 {
     int err;
     int fds[2];
@@ -109,7 +109,7 @@
 {
 }
 
-static int qemu_event_init(void)
+static int qemu_main_loop_event_init(void)
 {
     qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
     if (!qemu_event_handle) {
@@ -123,7 +123,7 @@
 
 int qemu_init_main_loop(void)
 {
-    return qemu_event_init();
+    return qemu_main_loop_event_init();
 }
 
 #ifndef _WIN32
diff --git a/memory-android.c b/memory-android.c
new file mode 100644
index 0000000..777f6e5
--- /dev/null
+++ b/memory-android.c
@@ -0,0 +1,16 @@
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "qemu/host-utils.h"
+
+uint64_t io_mem_read(int io_index, hwaddr addr, unsigned size)
+{
+    return _io_mem_read[io_index][ctzl(size)](io_mem_opaque[io_index],
+                                              addr);
+}
+
+void io_mem_write(int io_index, hwaddr addr,
+                  uint64_t val, unsigned size)
+{
+    _io_mem_write[io_index][ctzl(size)](io_mem_opaque[io_index],
+                                        addr, val);
+}
diff --git a/qemu_timers.h b/qemu_timers.h
deleted file mode 100644
index 9940e0f..0000000
--- a/qemu_timers.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _QEMU_TIMERS_H
-#define _QEMU_TIMERS_H
-
-#include "migration/qemu-file.h"
-#include "qemu/timer.h"
-
-#endif /* _QEMU_TIMERS_H */
diff --git a/savevm.c b/savevm.c
index 69f56b9..504b2ce 100644
--- a/savevm.c
+++ b/savevm.c
@@ -72,19 +72,22 @@
 
 #include "qemu-common.h"
 #include "hw/hw.h"
+#include "hw/qdev.h"
 #include "net/net.h"
 #include "monitor/monitor.h"
 #include "sysemu/sysemu.h"
-#include "qemu/iov.h"
-#include "qemu/timer.h"
 #include "sysemu/char.h"
 #include "sysemu/blockdev.h"
 #include "block/block.h"
 #include "audio/audio.h"
 #include "migration/migration.h"
-#include "qemu/sockets.h"
-#include "qemu/queue.h"
 #include "migration/qemu-file.h"
+#include "migration/vmstate.h"
+#include "qemu/bitmap.h"
+#include "qemu/iov.h"
+#include "qemu/sockets.h"
+#include "qemu/timer.h"
+#include "qemu/queue.h"
 #include "android/snapshot.h"
 
 
@@ -1081,28 +1084,6 @@
 }
 
 
-/* timer */
-
-void timer_put(QEMUFile *f, QEMUTimer *ts)
-{
-    uint64_t expire_time;
-
-    expire_time = timer_expire_time_ns(ts);
-    qemu_put_be64(f, expire_time);
-}
-
-void timer_get(QEMUFile *f, QEMUTimer *ts)
-{
-    uint64_t expire_time;
-
-    expire_time = qemu_get_be64(f);
-    if (expire_time != -1) {
-        timer_mod_ns(ts, expire_time);
-    } else {
-        timer_del(ts);
-    }
-}
-
 void  qemu_put_struct(QEMUFile*  f, const QField*  fields, const void*  s)
 {
     const QField*  qf = fields;
@@ -1214,84 +1195,841 @@
     return *((float*) bytes);
 }
 
-typedef struct SaveStateEntry {
+/* timer */
+
+void timer_put(QEMUFile *f, QEMUTimer *ts)
+{
+    uint64_t expire_time;
+
+    expire_time = timer_expire_time_ns(ts);
+    qemu_put_be64(f, expire_time);
+}
+
+void timer_get(QEMUFile *f, QEMUTimer *ts)
+{
+    uint64_t expire_time;
+
+    expire_time = qemu_get_be64(f);
+    if (expire_time != -1) {
+        timer_mod_ns(ts, expire_time);
+    } else {
+        timer_del(ts);
+    }
+}
+
+
+/* bool */
+
+static int get_bool(QEMUFile *f, void *pv, size_t size)
+{
+    bool *v = pv;
+    *v = qemu_get_byte(f);
+    return 0;
+}
+
+static void put_bool(QEMUFile *f, void *pv, size_t size)
+{
+    bool *v = pv;
+    qemu_put_byte(f, *v);
+}
+
+const VMStateInfo vmstate_info_bool = {
+    .name = "bool",
+    .get  = get_bool,
+    .put  = put_bool,
+};
+
+/* 8 bit int */
+
+static int get_int8(QEMUFile *f, void *pv, size_t size)
+{
+    int8_t *v = pv;
+    qemu_get_s8s(f, v);
+    return 0;
+}
+
+static void put_int8(QEMUFile *f, void *pv, size_t size)
+{
+    int8_t *v = pv;
+    qemu_put_s8s(f, v);
+}
+
+const VMStateInfo vmstate_info_int8 = {
+    .name = "int8",
+    .get  = get_int8,
+    .put  = put_int8,
+};
+
+/* 16 bit int */
+
+static int get_int16(QEMUFile *f, void *pv, size_t size)
+{
+    int16_t *v = pv;
+    qemu_get_sbe16s(f, v);
+    return 0;
+}
+
+static void put_int16(QEMUFile *f, void *pv, size_t size)
+{
+    int16_t *v = pv;
+    qemu_put_sbe16s(f, v);
+}
+
+const VMStateInfo vmstate_info_int16 = {
+    .name = "int16",
+    .get  = get_int16,
+    .put  = put_int16,
+};
+
+/* 32 bit int */
+
+static int get_int32(QEMUFile *f, void *pv, size_t size)
+{
+    int32_t *v = pv;
+    qemu_get_sbe32s(f, v);
+    return 0;
+}
+
+static void put_int32(QEMUFile *f, void *pv, size_t size)
+{
+    int32_t *v = pv;
+    qemu_put_sbe32s(f, v);
+}
+
+const VMStateInfo vmstate_info_int32 = {
+    .name = "int32",
+    .get  = get_int32,
+    .put  = put_int32,
+};
+
+/* 32 bit int. See that the received value is the same than the one
+   in the field */
+
+static int get_int32_equal(QEMUFile *f, void *pv, size_t size)
+{
+    int32_t *v = pv;
+    int32_t v2;
+    qemu_get_sbe32s(f, &v2);
+
+    if (*v == v2)
+        return 0;
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_int32_equal = {
+    .name = "int32 equal",
+    .get  = get_int32_equal,
+    .put  = put_int32,
+};
+
+/* 32 bit int. See that the received value is the less or the same
+   than the one in the field */
+
+static int get_int32_le(QEMUFile *f, void *pv, size_t size)
+{
+    int32_t *old = pv;
+    int32_t new;
+    qemu_get_sbe32s(f, &new);
+
+    if (*old <= new)
+        return 0;
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_int32_le = {
+    .name = "int32 equal",
+    .get  = get_int32_le,
+    .put  = put_int32,
+};
+
+/* 64 bit int */
+
+static int get_int64(QEMUFile *f, void *pv, size_t size)
+{
+    int64_t *v = pv;
+    qemu_get_sbe64s(f, v);
+    return 0;
+}
+
+static void put_int64(QEMUFile *f, void *pv, size_t size)
+{
+    int64_t *v = pv;
+    qemu_put_sbe64s(f, v);
+}
+
+const VMStateInfo vmstate_info_int64 = {
+    .name = "int64",
+    .get  = get_int64,
+    .put  = put_int64,
+};
+
+/* 8 bit unsigned int */
+
+static int get_uint8(QEMUFile *f, void *pv, size_t size)
+{
+    uint8_t *v = pv;
+    qemu_get_8s(f, v);
+    return 0;
+}
+
+static void put_uint8(QEMUFile *f, void *pv, size_t size)
+{
+    uint8_t *v = pv;
+    qemu_put_8s(f, v);
+}
+
+const VMStateInfo vmstate_info_uint8 = {
+    .name = "uint8",
+    .get  = get_uint8,
+    .put  = put_uint8,
+};
+
+/* 16 bit unsigned int */
+
+static int get_uint16(QEMUFile *f, void *pv, size_t size)
+{
+    uint16_t *v = pv;
+    qemu_get_be16s(f, v);
+    return 0;
+}
+
+static void put_uint16(QEMUFile *f, void *pv, size_t size)
+{
+    uint16_t *v = pv;
+    qemu_put_be16s(f, v);
+}
+
+const VMStateInfo vmstate_info_uint16 = {
+    .name = "uint16",
+    .get  = get_uint16,
+    .put  = put_uint16,
+};
+
+/* 32 bit unsigned int */
+
+static int get_uint32(QEMUFile *f, void *pv, size_t size)
+{
+    uint32_t *v = pv;
+    qemu_get_be32s(f, v);
+    return 0;
+}
+
+static void put_uint32(QEMUFile *f, void *pv, size_t size)
+{
+    uint32_t *v = pv;
+    qemu_put_be32s(f, v);
+}
+
+const VMStateInfo vmstate_info_uint32 = {
+    .name = "uint32",
+    .get  = get_uint32,
+    .put  = put_uint32,
+};
+
+/* 32 bit uint. See that the received value is the same than the one
+   in the field */
+
+static int get_uint32_equal(QEMUFile *f, void *pv, size_t size)
+{
+    uint32_t *v = pv;
+    uint32_t v2;
+    qemu_get_be32s(f, &v2);
+
+    if (*v == v2) {
+        return 0;
+    }
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_uint32_equal = {
+    .name = "uint32 equal",
+    .get  = get_uint32_equal,
+    .put  = put_uint32,
+};
+
+/* 64 bit unsigned int */
+
+static int get_uint64(QEMUFile *f, void *pv, size_t size)
+{
+    uint64_t *v = pv;
+    qemu_get_be64s(f, v);
+    return 0;
+}
+
+static void put_uint64(QEMUFile *f, void *pv, size_t size)
+{
+    uint64_t *v = pv;
+    qemu_put_be64s(f, v);
+}
+
+const VMStateInfo vmstate_info_uint64 = {
+    .name = "uint64",
+    .get  = get_uint64,
+    .put  = put_uint64,
+};
+
+/* 64 bit unsigned int. See that the received value is the same than the one
+   in the field */
+
+static int get_uint64_equal(QEMUFile *f, void *pv, size_t size)
+{
+    uint64_t *v = pv;
+    uint64_t v2;
+    qemu_get_be64s(f, &v2);
+
+    if (*v == v2) {
+        return 0;
+    }
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_uint64_equal = {
+    .name = "int64 equal",
+    .get  = get_uint64_equal,
+    .put  = put_uint64,
+};
+
+/* 8 bit int. See that the received value is the same than the one
+   in the field */
+
+static int get_uint8_equal(QEMUFile *f, void *pv, size_t size)
+{
+    uint8_t *v = pv;
+    uint8_t v2;
+    qemu_get_8s(f, &v2);
+
+    if (*v == v2)
+        return 0;
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_uint8_equal = {
+    .name = "uint8 equal",
+    .get  = get_uint8_equal,
+    .put  = put_uint8,
+};
+
+/* 16 bit unsigned int int. See that the received value is the same than the one
+   in the field */
+
+static int get_uint16_equal(QEMUFile *f, void *pv, size_t size)
+{
+    uint16_t *v = pv;
+    uint16_t v2;
+    qemu_get_be16s(f, &v2);
+
+    if (*v == v2)
+        return 0;
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_uint16_equal = {
+    .name = "uint16 equal",
+    .get  = get_uint16_equal,
+    .put  = put_uint16,
+};
+
+/* floating point */
+
+static int get_float64(QEMUFile *f, void *pv, size_t size)
+{
+    float64 *v = pv;
+
+    *v = make_float64(qemu_get_be64(f));
+    return 0;
+}
+
+static void put_float64(QEMUFile *f, void *pv, size_t size)
+{
+    uint64_t *v = pv;
+
+    qemu_put_be64(f, float64_val(*v));
+}
+
+const VMStateInfo vmstate_info_float64 = {
+    .name = "float64",
+    .get  = get_float64,
+    .put  = put_float64,
+};
+
+/* timers  */
+
+static int get_timer(QEMUFile *f, void *pv, size_t size)
+{
+    QEMUTimer *v = pv;
+    timer_get(f, v);
+    return 0;
+}
+
+static void put_timer(QEMUFile *f, void *pv, size_t size)
+{
+    QEMUTimer *v = pv;
+    timer_put(f, v);
+}
+
+const VMStateInfo vmstate_info_timer = {
+    .name = "timer",
+    .get  = get_timer,
+    .put  = put_timer,
+};
+
+/* uint8_t buffers */
+
+static int get_buffer(QEMUFile *f, void *pv, size_t size)
+{
+    uint8_t *v = pv;
+    qemu_get_buffer(f, v, size);
+    return 0;
+}
+
+static void put_buffer(QEMUFile *f, void *pv, size_t size)
+{
+    uint8_t *v = pv;
+    qemu_put_buffer(f, v, size);
+}
+
+const VMStateInfo vmstate_info_buffer = {
+    .name = "buffer",
+    .get  = get_buffer,
+    .put  = put_buffer,
+};
+
+/* unused buffers: space that was used for some fields that are
+   not useful anymore */
+
+static int get_unused_buffer(QEMUFile *f, void *pv, size_t size)
+{
+    uint8_t buf[1024];
+    int block_len;
+
+    while (size > 0) {
+        block_len = MIN(sizeof(buf), size);
+        size -= block_len;
+        qemu_get_buffer(f, buf, block_len);
+    }
+   return 0;
+}
+
+static void put_unused_buffer(QEMUFile *f, void *pv, size_t size)
+{
+    static const uint8_t buf[1024];
+    int block_len;
+
+    while (size > 0) {
+        block_len = MIN(sizeof(buf), size);
+        size -= block_len;
+        qemu_put_buffer(f, buf, block_len);
+    }
+}
+
+const VMStateInfo vmstate_info_unused_buffer = {
+    .name = "unused_buffer",
+    .get  = get_unused_buffer,
+    .put  = put_unused_buffer,
+};
+
+/* bitmaps (as defined by bitmap.h). Note that size here is the size
+ * of the bitmap in bits. The on-the-wire format of a bitmap is 64
+ * bit words with the bits in big endian order. The in-memory format
+ * is an array of 'unsigned long', which may be either 32 or 64 bits.
+ */
+/* This is the number of 64 bit words sent over the wire */
+#define BITS_TO_U64S(nr) DIV_ROUND_UP(nr, 64)
+static int get_bitmap(QEMUFile *f, void *pv, size_t size)
+{
+    unsigned long *bmp = pv;
+    int i, idx = 0;
+    for (i = 0; i < BITS_TO_U64S(size); i++) {
+        uint64_t w = qemu_get_be64(f);
+        bmp[idx++] = w;
+        if (sizeof(unsigned long) == 4 && idx < BITS_TO_LONGS(size)) {
+            bmp[idx++] = w >> 32;
+        }
+    }
+    return 0;
+}
+
+static void put_bitmap(QEMUFile *f, void *pv, size_t size)
+{
+    unsigned long *bmp = pv;
+    int i, idx = 0;
+    for (i = 0; i < BITS_TO_U64S(size); i++) {
+        uint64_t w = bmp[idx++];
+        if (sizeof(unsigned long) == 4 && idx < BITS_TO_LONGS(size)) {
+            w |= ((uint64_t)bmp[idx++]) << 32;
+        }
+        qemu_put_be64(f, w);
+    }
+}
+
+const VMStateInfo vmstate_info_bitmap = {
+    .name = "bitmap",
+    .get = get_bitmap,
+    .put = put_bitmap,
+};
+
+typedef struct CompatEntry {
     char idstr[256];
     int instance_id;
+} CompatEntry;
+
+typedef struct SaveStateEntry {
+    QTAILQ_ENTRY(SaveStateEntry) entry;
+    char idstr[256];
+    int instance_id;
+    int alias_id;
     int version_id;
     int section_id;
-    SaveLiveStateHandler *save_live_state;
-    SaveStateHandler *save_state;
-    LoadStateHandler *load_state;
+    SaveVMHandlers *ops;
+    const VMStateDescription *vmsd;
     void *opaque;
-    struct SaveStateEntry *next;
+    CompatEntry *compat;
+    int no_migrate;
+    int is_ram;
 } SaveStateEntry;
 
-static SaveStateEntry *first_se;
+
+static QTAILQ_HEAD(savevm_handlers, SaveStateEntry) savevm_handlers =
+    QTAILQ_HEAD_INITIALIZER(savevm_handlers);
+static int global_section_id;
+
+static int calculate_new_instance_id(const char *idstr)
+{
+    SaveStateEntry *se;
+    int instance_id = 0;
+
+    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+        if (strcmp(idstr, se->idstr) == 0
+            && instance_id <= se->instance_id) {
+            instance_id = se->instance_id + 1;
+        }
+    }
+    return instance_id;
+}
+
+static int calculate_compat_instance_id(const char *idstr)
+{
+    SaveStateEntry *se;
+    int instance_id = 0;
+
+    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+        if (!se->compat)
+            continue;
+
+        if (strcmp(idstr, se->compat->idstr) == 0
+            && instance_id <= se->compat->instance_id) {
+            instance_id = se->compat->instance_id + 1;
+        }
+    }
+    return instance_id;
+}
 
 /* TODO: Individual devices generally have very little idea about the rest
    of the system, so instance_id should be removed/replaced.
    Meanwhile pass -1 as instance_id if you do not already have a clearly
    distinguishing id for all instances of your device class. */
-int register_savevm_live(const char *idstr,
+int register_savevm_live(DeviceState *dev,
+                         const char *idstr,
                          int instance_id,
                          int version_id,
-                         SaveLiveStateHandler *save_live_state,
-                         SaveStateHandler *save_state,
-                         LoadStateHandler *load_state,
+                         SaveVMHandlers *ops,
                          void *opaque)
 {
-    SaveStateEntry *se, **pse;
-    static int global_section_id;
+    SaveStateEntry *se;
 
-    se = g_malloc(sizeof(SaveStateEntry));
-    pstrcpy(se->idstr, sizeof(se->idstr), idstr);
-    se->instance_id = (instance_id == -1) ? 0 : instance_id;
+    se = g_malloc0(sizeof(SaveStateEntry));
     se->version_id = version_id;
     se->section_id = global_section_id++;
-    se->save_live_state = save_live_state;
-    se->save_state = save_state;
-    se->load_state = load_state;
+    se->ops = ops;
     se->opaque = opaque;
-    se->next = NULL;
-
-    /* add at the end of list */
-    pse = &first_se;
-    while (*pse != NULL) {
-        if (instance_id == -1
-                && strcmp(se->idstr, (*pse)->idstr) == 0
-                && se->instance_id <= (*pse)->instance_id)
-            se->instance_id = (*pse)->instance_id + 1;
-        pse = &(*pse)->next;
+    se->vmsd = NULL;
+    se->no_migrate = 0;
+    /* if this is a live_savem then set is_ram */
+    if (ops->save_live_state != NULL) {
+        se->is_ram = 1;
     }
-    *pse = se;
+
+    if (dev) {
+        char *id = qdev_get_dev_path(dev);
+        if (id) {
+            pstrcpy(se->idstr, sizeof(se->idstr), id);
+            pstrcat(se->idstr, sizeof(se->idstr), "/");
+            g_free(id);
+
+            se->compat = g_malloc0(sizeof(CompatEntry));
+            pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr);
+            se->compat->instance_id = instance_id == -1 ?
+                         calculate_compat_instance_id(idstr) : instance_id;
+            instance_id = -1;
+        }
+    }
+    pstrcat(se->idstr, sizeof(se->idstr), idstr);
+
+    if (instance_id == -1) {
+        se->instance_id = calculate_new_instance_id(se->idstr);
+    } else {
+        se->instance_id = instance_id;
+    }
+    assert(!se->compat || se->instance_id == 0);
+    /* add at the end of list */
+    QTAILQ_INSERT_TAIL(&savevm_handlers, se, entry);
     return 0;
 }
 
-int register_savevm(const char *idstr,
+int register_savevm(DeviceState *dev,
+                    const char *idstr,
                     int instance_id,
                     int version_id,
                     SaveStateHandler *save_state,
                     LoadStateHandler *load_state,
                     void *opaque)
 {
-    return register_savevm_live(idstr, instance_id, version_id,
-                                NULL, save_state, load_state, opaque);
+    SaveVMHandlers *ops = g_malloc0(sizeof(SaveVMHandlers));
+    ops->save_state = save_state;
+    ops->load_state = load_state;
+    return register_savevm_live(dev, idstr, instance_id, version_id,
+                                ops, opaque);
 }
 
-void unregister_savevm(const char *idstr, void *opaque)
+void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
 {
-    SaveStateEntry **pse;
+    SaveStateEntry *se, *new_se;
+    char id[256] = "";
 
-    pse = &first_se;
-    while (*pse != NULL) {
-        if (strcmp((*pse)->idstr, idstr) == 0 && (*pse)->opaque == opaque) {
-            SaveStateEntry *next = (*pse)->next;
-            g_free(*pse);
-            *pse = next;
-            continue;
+    if (dev) {
+        char *path = qdev_get_dev_path(dev);
+        if (path) {
+            pstrcpy(id, sizeof(id), path);
+            pstrcat(id, sizeof(id), "/");
+            g_free(path);
         }
-        pse = &(*pse)->next;
     }
+    pstrcat(id, sizeof(id), idstr);
+
+    QTAILQ_FOREACH_SAFE(se, &savevm_handlers, entry, new_se) {
+        if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
+            QTAILQ_REMOVE(&savevm_handlers, se, entry);
+            if (se->compat) {
+                g_free(se->compat);
+            }
+            g_free(se->ops);
+            g_free(se);
+        }
+    }
+}
+
+int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
+                                   const VMStateDescription *vmsd,
+                                   void *opaque, int alias_id,
+                                   int required_for_version)
+{
+    SaveStateEntry *se;
+
+    /* If this triggers, alias support can be dropped for the vmsd. */
+    assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
+
+    se = g_malloc0(sizeof(SaveStateEntry));
+    se->version_id = vmsd->version_id;
+    se->section_id = global_section_id++;
+    se->opaque = opaque;
+    se->vmsd = vmsd;
+    se->alias_id = alias_id;
+    se->no_migrate = vmsd->unmigratable;
+
+    if (dev) {
+        char *id = qdev_get_dev_path(dev);
+        if (id) {
+            pstrcpy(se->idstr, sizeof(se->idstr), id);
+            pstrcat(se->idstr, sizeof(se->idstr), "/");
+            g_free(id);
+
+            se->compat = g_malloc0(sizeof(CompatEntry));
+            pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
+            se->compat->instance_id = instance_id == -1 ?
+                         calculate_compat_instance_id(vmsd->name) : instance_id;
+            instance_id = -1;
+        }
+    }
+    pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
+
+    if (instance_id == -1) {
+        se->instance_id = calculate_new_instance_id(se->idstr);
+    } else {
+        se->instance_id = instance_id;
+    }
+    assert(!se->compat || se->instance_id == 0);
+    /* add at the end of list */
+    QTAILQ_INSERT_TAIL(&savevm_handlers, se, entry);
+    return 0;
+}
+
+void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
+                        void *opaque)
+{
+    SaveStateEntry *se, *new_se;
+
+    QTAILQ_FOREACH_SAFE(se, &savevm_handlers, entry, new_se) {
+        if (se->vmsd == vmsd && se->opaque == opaque) {
+            QTAILQ_REMOVE(&savevm_handlers, se, entry);
+            if (se->compat) {
+                g_free(se->compat);
+            }
+            g_free(se);
+        }
+    }
+}
+
+static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
+                                    void *opaque);
+static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
+                                   void *opaque);
+
+int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
+                       void *opaque, int version_id)
+{
+    VMStateField *field = vmsd->fields;
+    int ret;
+
+    if (version_id > vmsd->version_id) {
+        return -EINVAL;
+    }
+    if (version_id < vmsd->minimum_version_id_old) {
+        return -EINVAL;
+    }
+    if  (version_id < vmsd->minimum_version_id) {
+        return vmsd->load_state_old(f, opaque, version_id);
+    }
+    if (vmsd->pre_load) {
+        int ret = vmsd->pre_load(opaque);
+        if (ret)
+            return ret;
+    }
+    while(field->name) {
+        if ((field->field_exists &&
+             field->field_exists(opaque, version_id)) ||
+            (!field->field_exists &&
+             field->version_id <= version_id)) {
+            void *base_addr = opaque + field->offset;
+            int i, n_elems = 1;
+            int size = field->size;
+
+            if (field->flags & VMS_VBUFFER) {
+                size = *(int32_t *)(opaque+field->size_offset);
+                if (field->flags & VMS_MULTIPLY) {
+                    size *= field->size;
+                }
+            }
+            if (field->flags & VMS_ARRAY) {
+                n_elems = field->num;
+            } else if (field->flags & VMS_VARRAY_INT32) {
+                n_elems = *(int32_t *)(opaque+field->num_offset);
+            } else if (field->flags & VMS_VARRAY_UINT32) {
+                n_elems = *(uint32_t *)(opaque+field->num_offset);
+            } else if (field->flags & VMS_VARRAY_UINT16) {
+                n_elems = *(uint16_t *)(opaque+field->num_offset);
+            } else if (field->flags & VMS_VARRAY_UINT8) {
+                n_elems = *(uint8_t *)(opaque+field->num_offset);
+            }
+            if (field->flags & VMS_POINTER) {
+                base_addr = *(void **)base_addr + field->start;
+            }
+            for (i = 0; i < n_elems; i++) {
+                void *addr = base_addr + size * i;
+
+                if (field->flags & VMS_ARRAY_OF_POINTER) {
+                    addr = *(void **)addr;
+                }
+                if (field->flags & VMS_STRUCT) {
+                    ret = vmstate_load_state(f, field->vmsd, addr, field->vmsd->version_id);
+                } else {
+                    ret = field->info->get(f, addr, size);
+
+                }
+                if (ret < 0) {
+                    return ret;
+                }
+            }
+        }
+        field++;
+    }
+    ret = vmstate_subsection_load(f, vmsd, opaque);
+    if (ret != 0) {
+        return ret;
+    }
+    if (vmsd->post_load) {
+        return vmsd->post_load(opaque, version_id);
+    }
+    return 0;
+}
+
+void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
+                        void *opaque)
+{
+    VMStateField *field = vmsd->fields;
+
+    if (vmsd->pre_save) {
+        vmsd->pre_save(opaque);
+    }
+    while(field->name) {
+        if (!field->field_exists ||
+            field->field_exists(opaque, vmsd->version_id)) {
+            void *base_addr = opaque + field->offset;
+            int i, n_elems = 1;
+            int size = field->size;
+
+            if (field->flags & VMS_VBUFFER) {
+                size = *(int32_t *)(opaque+field->size_offset);
+                if (field->flags & VMS_MULTIPLY) {
+                    size *= field->size;
+                }
+            }
+            if (field->flags & VMS_ARRAY) {
+                n_elems = field->num;
+            } else if (field->flags & VMS_VARRAY_INT32) {
+                n_elems = *(int32_t *)(opaque+field->num_offset);
+            } else if (field->flags & VMS_VARRAY_UINT32) {
+                n_elems = *(uint32_t *)(opaque+field->num_offset);
+            } else if (field->flags & VMS_VARRAY_UINT16) {
+                n_elems = *(uint16_t *)(opaque+field->num_offset);
+            } else if (field->flags & VMS_VARRAY_UINT8) {
+                n_elems = *(uint8_t *)(opaque+field->num_offset);
+            }
+            if (field->flags & VMS_POINTER) {
+                base_addr = *(void **)base_addr + field->start;
+            }
+            for (i = 0; i < n_elems; i++) {
+                void *addr = base_addr + size * i;
+
+                if (field->flags & VMS_ARRAY_OF_POINTER) {
+                    addr = *(void **)addr;
+                }
+                if (field->flags & VMS_STRUCT) {
+                    vmstate_save_state(f, field->vmsd, addr);
+                } else {
+                    field->info->put(f, addr, size);
+                }
+            }
+        }
+        field++;
+    }
+    vmstate_subsection_save(f, vmsd, opaque);
+}
+
+static int vmstate_load(QEMUFile *f, SaveStateEntry *se, int version_id)
+{
+    if (!se->vmsd) {         /* Old style */
+        return se->ops->load_state(f, se->opaque, version_id);
+    }
+    return vmstate_load_state(f, se->vmsd, se->opaque, version_id);
+}
+
+static void vmstate_save(QEMUFile *f, SaveStateEntry *se)
+{
+    if (!se->vmsd) {         /* Old style */
+        se->ops->save_state(f, se->opaque);
+        return;
+    }
+    vmstate_save_state(f,se->vmsd, se->opaque);
 }
 
 #define QEMU_VM_FILE_MAGIC           0x5145564d
@@ -1303,6 +2041,20 @@
 #define QEMU_VM_SECTION_PART         0x02
 #define QEMU_VM_SECTION_END          0x03
 #define QEMU_VM_SECTION_FULL         0x04
+#define QEMU_VM_SUBSECTION           0x05
+
+bool qemu_savevm_state_blocked(Error **errp)
+{
+    SaveStateEntry *se;
+
+    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+        if (se->no_migrate) {
+            error_set(errp, QERR_MIGRATION_NOT_SUPPORTED, se->idstr);
+            return true;
+        }
+    }
+    return false;
+}
 
 int qemu_savevm_state_begin(QEMUFile *f)
 {
@@ -1311,12 +2063,19 @@
     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
     qemu_put_be32(f, QEMU_VM_FILE_VERSION);
 
-    for (se = first_se; se != NULL; se = se->next) {
+    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
         int len;
 
-        if (se->save_live_state == NULL)
+        if (!se->ops || !se->ops->save_live_state) {
             continue;
-
+        }
+#if 0
+        if (se->ops && se->ops->is_active) {
+            if (!se->ops->is_active(se->opaque)) {
+                continue;
+            }
+        }
+#endif
         /* Section type */
         qemu_put_byte(f, QEMU_VM_SECTION_START);
         qemu_put_be32(f, se->section_id);
@@ -1329,7 +2088,7 @@
         qemu_put_be32(f, se->instance_id);
         qemu_put_be32(f, se->version_id);
 
-        se->save_live_state(f, QEMU_VM_SECTION_START, se->opaque);
+        se->ops->save_live_state(f, QEMU_VM_SECTION_START, se->opaque);
     }
 
     return qemu_file_get_error(f);
@@ -1340,15 +2099,26 @@
     SaveStateEntry *se;
     int ret = 1;
 
-    for (se = first_se; se != NULL; se = se->next) {
-        if (se->save_live_state == NULL)
+    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+        if (!se->ops || !se->ops->save_live_state) {
             continue;
-
+        }
+#if 0
+        if (se->ops && se->ops->is_active) {
+            if (!se->ops->is_active(se->opaque)) {
+                continue;
+            }
+        }
+#endif
+        if (qemu_file_rate_limit(f)) {
+            return 0;
+        }
+        //trace_savevm_section_start();
         /* Section type */
         qemu_put_byte(f, QEMU_VM_SECTION_PART);
         qemu_put_be32(f, se->section_id);
 
-        ret &= !!se->save_live_state(f, QEMU_VM_SECTION_PART, se->opaque);
+        ret &= !!se->ops->save_live_state(f, QEMU_VM_SECTION_PART, se->opaque);
     }
 
     if (ret)
@@ -1361,22 +2131,33 @@
 {
     SaveStateEntry *se;
 
-    for (se = first_se; se != NULL; se = se->next) {
-        if (se->save_live_state == NULL)
-            continue;
+    // cpu_synchronize_all_states();
 
+    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+        if (!se->ops || se->ops->save_live_state == NULL) {
+            continue;
+        }
+#if 0
+        if (se->ops && se->ops->is_active) {
+            if (!se->ops->is_active(se->opaque)) {
+                continue;
+            }
+        }
+#endif
+        //trace_savevm_section_start();
         /* Section type */
         qemu_put_byte(f, QEMU_VM_SECTION_END);
         qemu_put_be32(f, se->section_id);
 
-        se->save_live_state(f, QEMU_VM_SECTION_END, se->opaque);
+        se->ops->save_live_state(f, QEMU_VM_SECTION_END, se->opaque);
     }
 
-    for(se = first_se; se != NULL; se = se->next) {
+    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
         int len;
 
-        if (se->save_state == NULL)
+        if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
             continue;
+        }
 
         /* Section type */
         qemu_put_byte(f, QEMU_VM_SECTION_FULL);
@@ -1390,10 +2171,11 @@
         qemu_put_be32(f, se->instance_id);
         qemu_put_be32(f, se->version_id);
 
-        se->save_state(f, se->opaque);
+        vmstate_save(f, se);
     }
 
     qemu_put_byte(f, QEMU_VM_EOF);
+    qemu_fflush(f);
 
     return qemu_file_get_error(f);
 }
@@ -1403,6 +2185,10 @@
     int saved_vm_running;
     int ret;
 
+    if (qemu_savevm_state_blocked(NULL)) {
+        return -EINVAL;
+    }
+
     saved_vm_running = vm_running;
     vm_stop(0);
 
@@ -1433,73 +2219,120 @@
 {
     SaveStateEntry *se;
 
-    for(se = first_se; se != NULL; se = se->next) {
+    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
         if (!strcmp(se->idstr, idstr) &&
-            instance_id == se->instance_id)
+            (instance_id == se->instance_id ||
+             instance_id == se->alias_id))
             return se;
+        /* Migrating from an older version? */
+        if (strstr(se->idstr, idstr) && se->compat) {
+            if (!strcmp(se->compat->idstr, idstr) &&
+                (instance_id == se->compat->instance_id ||
+                 instance_id == se->alias_id))
+                return se;
+        }
     }
     return NULL;
 }
 
+static const VMStateDescription *vmstate_get_subsection(const VMStateSubsection *sub, char *idstr)
+{
+    while(sub && sub->needed) {
+        if (strcmp(idstr, sub->vmsd->name) == 0) {
+            return sub->vmsd;
+        }
+        sub++;
+    }
+    return NULL;
+}
+
+static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
+                                   void *opaque)
+{
+    while (qemu_peek_byte(f, 0) == QEMU_VM_SUBSECTION) {
+        char idstr[256];
+        int ret;
+        uint8_t version_id, len, size;
+        const VMStateDescription *sub_vmsd;
+
+        len = qemu_peek_byte(f, 1);
+        if (len < strlen(vmsd->name) + 1) {
+            /* subsection name has be be "section_name/a" */
+            return 0;
+        }
+        size = qemu_peek_buffer(f, (uint8_t *)idstr, len, 2);
+        if (size != len) {
+            return 0;
+        }
+        idstr[size] = 0;
+
+        if (strncmp(vmsd->name, idstr, strlen(vmsd->name)) != 0) {
+            /* it don't have a valid subsection name */
+            return 0;
+        }
+        sub_vmsd = vmstate_get_subsection(vmsd->subsections, idstr);
+        if (sub_vmsd == NULL) {
+            return -ENOENT;
+        }
+        qemu_file_skip(f, 1); /* subsection */
+        qemu_file_skip(f, 1); /* len */
+        qemu_file_skip(f, len); /* idstr */
+        version_id = qemu_get_be32(f);
+
+        ret = vmstate_load_state(f, sub_vmsd, opaque, version_id);
+        if (ret) {
+            return ret;
+        }
+    }
+    return 0;
+}
+
+static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
+                                    void *opaque)
+{
+    const VMStateSubsection *sub = vmsd->subsections;
+
+    while (sub && sub->needed) {
+        if (sub->needed(opaque)) {
+            const VMStateDescription *vmsd = sub->vmsd;
+            uint8_t len;
+
+            qemu_put_byte(f, QEMU_VM_SUBSECTION);
+            len = strlen(vmsd->name);
+            qemu_put_byte(f, len);
+            qemu_put_buffer(f, (uint8_t *)vmsd->name, len);
+            qemu_put_be32(f, vmsd->version_id);
+            vmstate_save_state(f, vmsd, opaque);
+        }
+        sub++;
+    }
+}
+
 typedef struct LoadStateEntry {
+    QLIST_ENTRY(LoadStateEntry) entry;
     SaveStateEntry *se;
     int section_id;
     int version_id;
-    struct LoadStateEntry *next;
 } LoadStateEntry;
 
-static int qemu_loadvm_state_v2(QEMUFile *f)
-{
-    SaveStateEntry *se;
-    int len, ret, instance_id, record_len, version_id;
-    int64_t total_len, end_pos, cur_pos;
-    char idstr[256];
-
-    total_len = qemu_get_be64(f);
-    end_pos = total_len + qemu_ftell(f);
-    for(;;) {
-        if (qemu_ftell(f) >= end_pos)
-            break;
-        len = qemu_get_byte(f);
-        qemu_get_buffer(f, (uint8_t *)idstr, len);
-        idstr[len] = '\0';
-        instance_id = qemu_get_be32(f);
-        version_id = qemu_get_be32(f);
-        record_len = qemu_get_be32(f);
-        cur_pos = qemu_ftell(f);
-        se = find_se(idstr, instance_id);
-        if (!se) {
-            fprintf(stderr, "qemu: warning: instance 0x%x of device '%s' not present in current VM\n",
-                    instance_id, idstr);
-        } else {
-            ret = se->load_state(f, se->opaque, version_id);
-            if (ret < 0) {
-                fprintf(stderr, "qemu: warning: error while loading state for instance 0x%x of device '%s'\n",
-                        instance_id, idstr);
-                return ret;
-            }
-        }
-        /* always seek to exact end of record */
-        qemu_file_skip(f, cur_pos + record_len - qemu_ftell(f));
-    }
-
-    return qemu_file_get_error(f);
-}
-
 int qemu_loadvm_state(QEMUFile *f)
 {
-    LoadStateEntry *first_le = NULL;
+    QLIST_HEAD(, LoadStateEntry) loadvm_handlers =
+        QLIST_HEAD_INITIALIZER(loadvm_handlers);
+    LoadStateEntry *le, *new_le;
     uint8_t section_type;
     unsigned int v;
     int ret;
 
+    if (qemu_savevm_state_blocked(NULL)) {
+        return -EINVAL;
+    }
+
     v = qemu_get_be32(f);
     if (v != QEMU_VM_FILE_MAGIC)
         return -EINVAL;
 
     v = qemu_get_be32(f);
-    if (v == QEMU_VM_FILE_VERSION_COMPAT)
-        return qemu_loadvm_state_v2(f);
     if (v < QEMU_VM_FILE_VERSION) {
         fprintf(stderr, "Snapshot format %d is too old for this version of the emulator, please create a new one.\n", v);
         return -ENOTSUP;
@@ -1510,7 +2343,6 @@
 
     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
         uint32_t instance_id, version_id, section_id;
-        LoadStateEntry *le;
         SaveStateEntry *se;
         char idstr[257];
         int len;
@@ -1548,12 +2380,12 @@
             le->se = se;
             le->section_id = section_id;
             le->version_id = version_id;
-            le->next = first_le;
-            first_le = le;
+            QLIST_INSERT_HEAD(&loadvm_handlers, le, entry);
 
-            if (le->se->load_state(f, le->se->opaque, le->version_id)) {
-                fprintf(stderr, "savevm: unable to load section %s\n", idstr);
-                ret = -EINVAL;
+            ret = vmstate_load(f, le->se, le->version_id);
+            if (ret < 0) {
+                fprintf(stderr, "qemu: warning: error while loading state for instance 0x%x of device '%s'\n",
+                        instance_id, idstr);
                 goto out;
             }
             break;
@@ -1561,14 +2393,23 @@
         case QEMU_VM_SECTION_END:
             section_id = qemu_get_be32(f);
 
-            for (le = first_le; le && le->section_id != section_id; le = le->next);
+            QLIST_FOREACH(le, &loadvm_handlers, entry) {
+                if (le->section_id == section_id) {
+                    break;
+                }
+            }
             if (le == NULL) {
                 fprintf(stderr, "Unknown savevm section %d\n", section_id);
                 ret = -EINVAL;
                 goto out;
             }
 
-            le->se->load_state(f, le->se->opaque, le->version_id);
+            ret = vmstate_load(f, le->se, le->version_id);
+            if (ret < 0) {
+                fprintf(stderr, "qemu: warning: error while loading state section id %d\n",
+                        section_id);
+                goto out;
+            }
             break;
         default:
             fprintf(stderr, "Unknown savevm section type %d\n", section_type);
@@ -1577,17 +2418,19 @@
         }
     }
 
+    //cpu_synchronize_all_post_init();
+
     ret = 0;
 
 out:
-    while (first_le) {
-        LoadStateEntry *le = first_le;
-        first_le = first_le->next;
+    QLIST_FOREACH_SAFE(le, &loadvm_handlers, entry, new_le) {
+        QLIST_REMOVE(le, entry);
         g_free(le);
     }
 
-    if (qemu_file_get_error(f))
-      ret = qemu_file_get_error(f);
+    if (ret == 0) {
+        ret = qemu_file_get_error(f);
+    }
 
     return ret;
 }
diff --git a/slirp-android/slirp.c b/slirp-android/slirp.c
index 4d3dc9f..b739320 100644
--- a/slirp-android/slirp.c
+++ b/slirp-android/slirp.c
@@ -264,7 +264,7 @@
 
     alias_addr_ip = special_addr_ip | CTL_ALIAS;
     getouraddr();
-    register_savevm("slirp", 0, 1, slirp_state_save, slirp_state_load, NULL);
+    register_savevm(NULL, "slirp", 0, 1, slirp_state_save, slirp_state_load, NULL);
 
     slirp_net_forward_init();
 }
diff --git a/slirp-android/slirp.h b/slirp-android/slirp.h
index b592093..4068a4f 100644
--- a/slirp-android/slirp.h
+++ b/slirp-android/slirp.h
@@ -18,6 +18,7 @@
 #include "version.h"
 #endif
 #include "config-host.h"
+#include <stdint.h>
 #include "slirp_config.h"
 
 #include <stddef.h>
diff --git a/slirp-android/slirp_config.h b/slirp-android/slirp_config.h
index 79cbc72..1b2f50d 100644
--- a/slirp-android/slirp_config.h
+++ b/slirp-android/slirp_config.h
@@ -151,6 +151,17 @@
 /* Define to sizeof(int) */
 #define SIZEOF_INT 4
 
+/* Should come from qemu-common.h - howver there's alot of stuff there. */
+#ifndef HOST_LONG_BITS
+#if UINTPTR_MAX == UINT32_MAX
+# define HOST_LONG_BITS 32
+#elif UINTPTR_MAX == UINT64_MAX
+# define HOST_LONG_BITS 64
+#else
+# error Unknown pointer size
+#endif
+#endif
+
 /* Define to sizeof(char *) */
 #define SIZEOF_CHAR_P (HOST_LONG_BITS / 8)
 
diff --git a/softmmu_outside_jit.c b/softmmu_outside_jit.c
deleted file mode 100644
index b72ca0f..0000000
--- a/softmmu_outside_jit.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Copyright (C) 2007-2009 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-/*
- * Contains SOFTMMU macros expansion for ldx_user and stx_user routines used
- * outside of JIT. The issue is that regular implementation of these routines
- * assumes that pointer to CPU environment is stored in ebp register, which
- * is true for calls made inside JIT, but is not necessarily true for calls
- * made outside of JIT. The way SOFTMMU macros are expanded in this header
- * enforces ldx/stx routines to use CPU environment stored in cpu_single_env
- * variable.
- */
-
-#include "qemu-common.h"
-#include "cpu.h"
-#include "exec/exec-all.h"
-
-#define OUTSIDE_JIT
-#define MMUSUFFIX       _outside_jit
-#define GETPC()         NULL
-#define env             cpu_single_env
-#define ACCESS_TYPE     1
-#define CPU_MMU_INDEX   (cpu_mmu_index(env))
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
-#undef CPU_MMU_INDEX
-#undef ACCESS_TYPE
-#undef env
-#undef MMUSUFFIX
diff --git a/tap-win32.c b/tap-win32.c
index 734ad59..53a6bd7 100644
--- a/tap-win32.c
+++ b/tap-win32.c
@@ -576,7 +576,6 @@
     } version;
     DWORD version_len;
     DWORD idThread;
-    HANDLE hThread;
 
     if (prefered_name != NULL)
         snprintf(name_buffer, sizeof(name_buffer), "%s", prefered_name);
@@ -620,8 +619,8 @@
 
     *phandle = &tap_overlapped;
 
-    hThread = CreateThread(NULL, 0, tap_win32_thread_entry,
-                           (LPVOID)&tap_overlapped, 0, &idThread);
+    (void) CreateThread(NULL, 0, tap_win32_thread_entry,
+                        (LPVOID)&tap_overlapped, 0, &idThread);
     return 0;
 }
 
diff --git a/target-arm/arm-semi.c b/target-arm/arm-semi.c
index f3726e3..ed8d740 100644
--- a/target-arm/arm-semi.c
+++ b/target-arm/arm-semi.c
@@ -122,8 +122,9 @@
 static target_ulong syscall_err;
 #endif
 
-static void arm_semi_cb(CPUARMState *env, target_ulong ret, target_ulong err)
+static void arm_semi_cb(CPUState *cpu, target_ulong ret, target_ulong err)
 {
+    CPUARMState *env = cpu->env_ptr;
 #ifdef CONFIG_USER_ONLY
     TaskState *ts = env->opaque;
 #endif
@@ -152,12 +153,13 @@
     }
 }
 
-static void arm_semi_flen_cb(CPUARMState *env, target_ulong ret, target_ulong err)
+static void arm_semi_flen_cb(CPUState *cpu, target_ulong ret, target_ulong err)
 {
     /* The size is always stored in big-endian order, extract
        the value. We assume the size always fit in 32 bits.  */
+    CPUARMState *env = cpu->env_ptr;
     uint32_t size;
-    cpu_memory_rw_debug(env, env->regs[13]-64+32, (uint8_t *)&size, 4, 0);
+    cpu_memory_rw_debug(cpu, env->regs[13]-64+32, (uint8_t *)&size, 4, 0);
     env->regs[0] = be32_to_cpu(size);
 #ifdef CONFIG_USER_ONLY
     ((TaskState *)env->opaque)->swi_errno = err;
@@ -549,7 +551,7 @@
         exit(0);
     default:
         fprintf(stderr, "qemu: Unsupported SemiHosting SWI 0x%02x\n", nr);
-        cpu_dump_state(env, stderr, fprintf, 0);
+        cpu_dump_state(ENV_GET_CPU(env), stderr, fprintf, 0);
         abort();
     }
 }
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
new file mode 100644
index 0000000..fcb5c1a
--- /dev/null
+++ b/target-arm/cpu-qom.h
@@ -0,0 +1,21 @@
+#ifndef QEMU_ARM_CPU_QOM_H
+#define QEMU_ARM_CPU_QOM_H
+
+#include "qemu/osdep.h"
+#include "qom/cpu.h"
+
+typedef struct ARMCPU {
+    CPUState parent_obj;
+
+    CPUARMState env;
+} ARMCPU;
+
+static inline ARMCPU *arm_env_get_cpu(CPUARMState *env)
+{
+    return container_of(env, ARMCPU, env);
+}
+
+#define ENV_GET_CPU(e)  CPU(arm_env_get_cpu(e))
+#define ENV_OFFSET offsetof(ARMCPU, env)
+
+#endif  // QEMU_ARM_CPU_QOM_H
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 35b802c..61754e9 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -168,9 +168,12 @@
         uint32_t c7_par;  /* Translation result. */
         uint32_t c9_insn; /* Cache lockdown registers.  */
         uint32_t c9_data;
-        uint32_t c9_pmcr_data; /* Performance Monitor Control Register */
-        uint32_t c9_useren; /* user enable register */
-        uint32_t c9_inten; /* interrupt enable set/clear register */
+        uint32_t c9_pmcr; /* performance monitor control register */
+        uint32_t c9_pmcnten; /* perf monitor counter enables */
+        uint32_t c9_pmovsr; /* perf monitor overflow status */
+        uint32_t c9_pmxevtyper; /* perf monitor event type */
+        uint32_t c9_pmuserenr; /* perf monitor user enable */
+        uint32_t c9_pminten; /* perf monitor interrupt enables */
         uint32_t c12_vbar; /* secure/nonsecure vector base address register. */
         uint32_t c12_mvbar; /* monitor vector base address register. */
         uint32_t c13_fcse; /* FCSE PID.  */
@@ -264,9 +267,11 @@
         void *opaque;
     } cp[15];
     void *nvic;
-    struct arm_boot_info *boot_info;
+    const struct arm_boot_info *boot_info;
 } CPUARMState;
 
+#include "cpu-qom.h"
+
 CPUARMState *cpu_arm_init(const char *cpu_model);
 void arm_translate_init(void);
 int cpu_arm_exec(CPUARMState *s);
@@ -299,7 +304,7 @@
 int cpu_arm_signal_handler(int host_signum, void *pinfo,
                            void *puc);
 int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw,
-                              int mmu_idx, int is_softmuu);
+                              int mmu_idx);
 #define cpu_handle_mmu_fault cpu_arm_handle_mmu_fault
 
 static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
@@ -434,6 +439,7 @@
     ARM_FEATURE_V4T,
     ARM_FEATURE_V5,
     ARM_FEATURE_STRONGARM,
+    ARM_FEATURE_VAPA, /* cp15 VA to PA lookups */
     ARM_FEATURE_TRUSTZONE, /* TrustZone Security Extensions. */
 };
 
@@ -762,6 +768,7 @@
 #define ARM_CPUID_PXA270_C5   0x69054117
 #define ARM_CPUID_ARM1136     0x4117b363
 #define ARM_CPUID_ARM1136_R2  0x4107b362
+#define ARM_CPUID_ARM1176     0x410fb767
 #define ARM_CPUID_ARM11MPCORE 0x410fb022
 #define ARM_CPUID_CORTEXA8    0x410fc080
 #define ARM_CPUID_CORTEXA8_R2 0x412fc083
@@ -792,7 +799,7 @@
 #define cpu_signal_handler cpu_arm_signal_handler
 #define cpu_list arm_cpu_list
 
-#define CPU_SAVE_VERSION 3
+#define CPU_SAVE_VERSION 4
 
 /* MMU modes definitions */
 #define MMU_MODE0_SUFFIX _kernel
@@ -895,4 +902,17 @@
     *cs_base = 0;
 }
 
+static inline bool cpu_has_work(CPUState *cpu)
+{
+    return (cpu->interrupt_request &
+            (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB));
+}
+
+#include "exec/exec-all.h"
+
+static inline void cpu_pc_from_tb(CPUARMState *env, TranslationBlock *tb)
+{
+    env->regs[15] = tb->pc;
+}
+
 #endif
diff --git a/target-arm/exec.h b/target-arm/exec.h
deleted file mode 100644
index 664ae86..0000000
--- a/target-arm/exec.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- *  ARM execution defines
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#include "config.h"
-#include "dyngen-exec.h"
-
-GLOBAL_REGISTER_VARIABLE_DECL struct CPUARMState *env asm(AREG0);
-
-#include "cpu.h"
-#include "exec/exec-all.h"
-
-static inline void env_to_regs(void)
-{
-}
-
-static inline void regs_to_env(void)
-{
-}
-
-static inline int cpu_has_work(CPUARMState *env)
-{
-    return (env->interrupt_request &
-            (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB));
-}
-
-static inline int cpu_halted(CPUARMState *env) {
-    if (!env->halted)
-        return 0;
-    /* An interrupt wakes the CPU even if the I and F CPSR bits are
-       set.  We use EXITTB to silently wake CPU without causing an
-       actual interrupt.  */
-    if (cpu_has_work(env)) {
-        env->halted = 0;
-        return 0;
-    }
-    return EXCP_HALTED;
-}
-
-#if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-#endif
-
-void raise_exception(int);
-static inline void cpu_pc_from_tb(CPUARMState *env, TranslationBlock *tb)
-{
-    env->regs[15] = tb->pc;
-}
-
diff --git a/target-arm/helper-android.c b/target-arm/helper-android.c
index 809e8f4..ec82f13 100644
--- a/target-arm/helper-android.c
+++ b/target-arm/helper-android.c
@@ -23,8 +23,8 @@
     }
 }
 
-#ifdef CONFIG_MEMCHECK
-#include "memcheck/memcheck_api.h"
+#ifdef CONFIG_ANDROID_MEMCHECK
+#include "android/qemu/memcheck/memcheck_api.h"
 
 void HELPER(on_call)(target_ulong pc, target_ulong ret) {
     memcheck_on_call(pc, ret);
@@ -33,4 +33,4 @@
 void HELPER(on_ret)(target_ulong ret) {
     memcheck_on_ret(ret);
 }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
diff --git a/target-arm/helper-android.h b/target-arm/helper-android.h
index a13b39c..f4d4500 100644
--- a/target-arm/helper-android.h
+++ b/target-arm/helper-android.h
@@ -1,5 +1,5 @@
 /* This file must be included from helper.h */
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
 /* Hooks to translated BL/BLX. This callback is used to build thread's
  * calling stack.
  * Param:
@@ -13,5 +13,4 @@
  *  Pointer contains guest PC where BL/BLX will return.
  */
 DEF_HELPER_1(on_ret, void, i32)
-#endif  // CONFIG_MEMCHECK
-#include "exec/def-helper.h"
+#endif  // CONFIG_ANDROID_MEMCHECK
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 6cbdcd4..15fa0f5 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -39,6 +39,12 @@
 static uint32_t arm1136_cp15_c0_c2[8] =
 { 0x00140011, 0x12002111, 0x11231111, 0x01102131, 0x141, 0, 0, 0 };
 
+static uint32_t arm1176_cp15_c0_c1[8] =
+{ 0x111, 0x11, 0x33, 0, 0x01130003, 0x10030302, 0x01222100, 0 };
+
+static uint32_t arm1176_cp15_c0_c2[8] =
+{ 0x0140011, 0x12002111, 0x11231121, 0x01102131, 0x01141, 0, 0, 0 };
+
 static uint32_t cpu_arm_find_by_name(const char *name);
 
 static inline void set_feature(CPUARMState *env, int feature)
@@ -74,13 +80,24 @@
         env->cp15.c0_cachetype = 0x1dd20d2;
         env->cp15.c1_sys = 0x00090078;
         break;
-    case ARM_CPUID_ARM1136_R2:
     case ARM_CPUID_ARM1136:
+        /* This is the 1136 r1, which is a v6K core */
+        set_feature(env, ARM_FEATURE_V6K);
+        /* Fall through */
+    case ARM_CPUID_ARM1136_R2:
+        /* What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an
+         * older core than plain "arm1136". In particular this does not
+         * have the v6K features.
+         */
         set_feature(env, ARM_FEATURE_V4T);
         set_feature(env, ARM_FEATURE_V5);
         set_feature(env, ARM_FEATURE_V6);
         set_feature(env, ARM_FEATURE_VFP);
         set_feature(env, ARM_FEATURE_AUXCR);
+        /* These ID register values are correct for 1136 but may be wrong
+         * for 1136_r2 (in particular r0p2 does not actually implement most
+         * of the ID registers).
+         */
         env->vfp.xregs[ARM_VFP_FPSID] = 0x410120b4;
         env->vfp.xregs[ARM_VFP_MVFR0] = 0x11111111;
         env->vfp.xregs[ARM_VFP_MVFR1] = 0x00000000;
@@ -89,6 +106,21 @@
         env->cp15.c0_cachetype = 0x1dd20d2;
         env->cp15.c1_sys = 0x00050078;
         break;
+    case ARM_CPUID_ARM1176:
+        set_feature(env, ARM_FEATURE_V4T);
+        set_feature(env, ARM_FEATURE_V5);
+        set_feature(env, ARM_FEATURE_V6);
+        set_feature(env, ARM_FEATURE_V6K);
+        set_feature(env, ARM_FEATURE_VFP);
+        set_feature(env, ARM_FEATURE_AUXCR);
+        env->vfp.xregs[ARM_VFP_FPSID] = 0x410120b5;
+        env->vfp.xregs[ARM_VFP_MVFR0] = 0x11111111;
+        env->vfp.xregs[ARM_VFP_MVFR1] = 0x00000000;
+        memcpy(env->cp15.c0_c1, arm1176_cp15_c0_c1, 8 * sizeof(uint32_t));
+        memcpy(env->cp15.c0_c2, arm1176_cp15_c0_c2, 8 * sizeof(uint32_t));
+        env->cp15.c0_cachetype = 0x1dd20d2;
+        env->cp15.c1_sys = 0x00050078;
+        break;
     case ARM_CPUID_ARM11MPCORE:
         set_feature(env, ARM_FEATURE_V4T);
         set_feature(env, ARM_FEATURE_V5);
@@ -96,6 +128,7 @@
         set_feature(env, ARM_FEATURE_V6K);
         set_feature(env, ARM_FEATURE_VFP);
         set_feature(env, ARM_FEATURE_AUXCR);
+        set_feature(env, ARM_FEATURE_VAPA);
         env->vfp.xregs[ARM_VFP_FPSID] = 0x410120b4;
         env->vfp.xregs[ARM_VFP_MVFR0] = 0x11111111;
         env->vfp.xregs[ARM_VFP_MVFR1] = 0x00000000;
@@ -253,15 +286,21 @@
         cpu_abort(env, "Bad CPU ID: %x\n", id);
         break;
     }
+
+    /* Some features automatically imply others: */
+    if (arm_feature(env, ARM_FEATURE_V7)) {
+        set_feature(env, ARM_FEATURE_VAPA);
+    }
 }
 
-void cpu_reset(CPUARMState *env)
+void cpu_reset(CPUState *cpu)
 {
+    CPUARMState *env = cpu->env_ptr;
     uint32_t id;
 
     if (qemu_loglevel_mask(CPU_LOG_RESET)) {
-        qemu_log("CPU Reset (CPU %d)\n", env->cpu_index);
-        log_cpu_state(env, 0);
+        qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index);
+        log_cpu_state(cpu, 0);
     }
 
     id = env->cp15.c0_cpuid;
@@ -306,6 +345,10 @@
     }
     env->vfp.xregs[ARM_VFP_FPEXC] = 0;
     env->cp15.c2_base_mask = 0xffffc000u;
+    /* v7 performance monitor control register: same implementor
+     * field as main ID register, and we implement no event counters.
+     */
+    env->cp15.c9_pmcr = (id & 0xff000000);
 #endif
     set_flush_to_zero(1, &env->vfp.standard_fp_status);
     set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
@@ -371,6 +414,7 @@
 
 CPUARMState *cpu_arm_init(const char *cpu_model)
 {
+    ARMCPU *arm_cpu;
     CPUARMState *env;
     uint32_t id;
     static int inited = 0;
@@ -378,27 +422,31 @@
     id = cpu_arm_find_by_name(cpu_model);
     if (id == 0)
         return NULL;
-    env = g_malloc0(sizeof(CPUARMState));
+    arm_cpu = g_malloc0(sizeof(ARMCPU));
+    env = &arm_cpu->env;
+    ENV_GET_CPU(env)->env_ptr = env;
+
+    CPUState *cpu = ENV_GET_CPU(env);
     cpu_exec_init(env);
     if (!inited) {
         inited = 1;
         arm_translate_init();
     }
 
-    env->cpu_model_str = cpu_model;
+    cpu->cpu_model_str = cpu_model;
     env->cp15.c0_cpuid = id;
-    cpu_reset(env);
+    cpu_reset(cpu);
     if (arm_feature(env, ARM_FEATURE_NEON)) {
-        gdb_register_coprocessor(env, vfp_gdb_get_reg, vfp_gdb_set_reg,
+        gdb_register_coprocessor(cpu, vfp_gdb_get_reg, vfp_gdb_set_reg,
                                  51, "arm-neon.xml", 0);
     } else if (arm_feature(env, ARM_FEATURE_VFP3)) {
-        gdb_register_coprocessor(env, vfp_gdb_get_reg, vfp_gdb_set_reg,
+        gdb_register_coprocessor(cpu, vfp_gdb_get_reg, vfp_gdb_set_reg,
                                  35, "arm-vfp3.xml", 0);
     } else if (arm_feature(env, ARM_FEATURE_VFP)) {
-        gdb_register_coprocessor(env, vfp_gdb_get_reg, vfp_gdb_set_reg,
+        gdb_register_coprocessor(cpu, vfp_gdb_get_reg, vfp_gdb_set_reg,
                                  19, "arm-vfp.xml", 0);
     }
-    qemu_init_vcpu(env);
+    qemu_init_vcpu(cpu);
     return env;
 }
 
@@ -413,6 +461,7 @@
     { ARM_CPUID_ARM1026, "arm1026"},
     { ARM_CPUID_ARM1136, "arm1136"},
     { ARM_CPUID_ARM1136_R2, "arm1136-r2"},
+    { ARM_CPUID_ARM1176, "arm1176"},
     { ARM_CPUID_ARM11MPCORE, "arm11mpcore"},
     { ARM_CPUID_CORTEXM3, "cortex-m3"},
     { ARM_CPUID_CORTEXA8, "cortex-a8"},
@@ -576,7 +625,7 @@
 }
 
 int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw,
-                              int mmu_idx, int is_softmmu)
+                              int mmu_idx)
 {
     if (rw == 2) {
         env->exception_index = EXCP_PREFETCH_ABORT;
@@ -634,7 +683,7 @@
 extern int semihosting_enabled;
 
 /* Map CPU modes onto saved register banks.  */
-static inline int bank_number (int mode)
+static inline int bank_number (CPUARMState *env, int mode)
 {
     switch (mode) {
     case ARM_CPU_MODE_USR:
@@ -653,7 +702,7 @@
     case ARM_CPU_MODE_SMC:
         return 6;
     }
-    cpu_abort(cpu_single_env, "Bad mode %x\n", mode);
+    cpu_abort(env, "Bad mode %x\n", mode);
     return -1;
 }
 
@@ -674,12 +723,12 @@
         memcpy (env->regs + 8, env->fiq_regs, 5 * sizeof(uint32_t));
     }
 
-    i = bank_number(old_mode);
+    i = bank_number(env, old_mode);
     env->banked_r13[i] = env->regs[13];
     env->banked_r14[i] = env->regs[14];
     env->banked_spsr[i] = env->spsr;
 
-    i = bank_number(mode);
+    i = bank_number(env, mode);
     env->regs[13] = env->banked_r13[i];
     env->regs[14] = env->banked_r14[i];
     env->spsr = env->banked_spsr[i];
@@ -773,7 +822,7 @@
     case EXCP_BKPT:
         if (semihosting_enabled) {
             int nr;
-            nr = lduw_code(env->regs[15]) & 0xff;
+            nr = cpu_lduw_code(env, env->regs[15]) & 0xff;
             if (nr == 0xab) {
                 env->regs[15] += 2;
                 env->regs[0] = do_arm_semihosting(env);
@@ -844,9 +893,9 @@
         if (semihosting_enabled) {
             /* Check for semihosting interrupt.  */
             if (env->thumb) {
-                mask = lduw_code(env->regs[15] - 2) & 0xff;
+                mask = cpu_lduw_code(env, env->regs[15] - 2) & 0xff;
             } else {
-                mask = ldl_code(env->regs[15] - 4) & 0xffffff;
+                mask = cpu_ldl_code(env, env->regs[15] - 4) & 0xffffff;
             }
             /* Only intercept calls from privileged modes, to provide some
                semblance of security.  */
@@ -866,7 +915,7 @@
     case EXCP_BKPT:
         /* See if this is a semihosting syscall.  */
         if (env->thumb && semihosting_enabled) {
-            mask = lduw_code(env->regs[15]) & 0xff;
+            mask = cpu_lduw_code(env, env->regs[15]) & 0xff;
             if (mask == 0xab
                   && (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR) {
                 env->regs[15] += 2;
@@ -949,7 +998,7 @@
     }
     env->regs[14] = env->regs[15] + offset;
     env->regs[15] = addr;
-    env->interrupt_request |= CPU_INTERRUPT_EXITTB;
+    ENV_GET_CPU(env)->interrupt_request |= CPU_INTERRUPT_EXITTB;
 }
 
 /* Check section/page access permissions.
@@ -996,7 +1045,7 @@
   case 6:
       return prot_ro;
   case 7:
-      if (!arm_feature (env, ARM_FEATURE_V7))
+      if (!arm_feature (env, ARM_FEATURE_V6K))
           return 0;
       return prot_ro;
   default:
@@ -1282,9 +1331,9 @@
 static
 #endif
 int get_phys_addr(CPUARMState *env, uint32_t address,
-                                int access_type, int is_user,
-                                uint32_t *phys_ptr, int *prot,
-                                target_ulong *page_size)
+                  int access_type, int is_user,
+                  uint32_t *phys_ptr, int *prot,
+                  target_ulong *page_size)
 {
     /* Fast Context Switch Extension.  */
     if (address < 0x02000000)
@@ -1298,8 +1347,8 @@
         return 0;
     } else if (arm_feature(env, ARM_FEATURE_MPU)) {
         *page_size = TARGET_PAGE_SIZE;
-	return get_phys_addr_mpu(env, address, access_type, is_user, phys_ptr,
-				 prot);
+    return get_phys_addr_mpu(env, address, access_type, is_user, phys_ptr,
+                             prot);
     } else if (env->cp15.c1_sys & (1 << 23)) {
         return get_phys_addr_v6(env, address, access_type, is_user, phys_ptr,
                                 prot, page_size);
@@ -1310,7 +1359,7 @@
 }
 
 int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address,
-                              int access_type, int mmu_idx, int is_softmmu)
+                              int access_type, int mmu_idx)
 {
     uint32_t phys_addr;
     target_ulong page_size;
@@ -1324,7 +1373,8 @@
         /* Map a single [sub]page.  */
         phys_addr &= ~(uint32_t)0x3ff;
         address &= ~(uint32_t)0x3ff;
-        tlb_set_page (env, address, phys_addr, prot, mmu_idx, page_size);
+        tlb_set_page (env, address, phys_addr, prot | PAGE_EXEC, mmu_idx,
+                      page_size);
         return 0;
     }
 
@@ -1563,7 +1613,7 @@
             goto bad_reg;
         }
         /* No cache, so nothing to do except VA->PA translations. */
-        if (arm_feature(env, ARM_FEATURE_V6K)) {
+        if (arm_feature(env, ARM_FEATURE_VAPA)) {
             switch (crm) {
             case 4:
                 if (arm_feature(env, ARM_FEATURE_V7)) {
@@ -1660,68 +1710,80 @@
         case 2:
             /* Not implemented.  */
             goto bad_reg;
-        case 12: /* performance monitor control */
-            if (arm_feature(env, ARM_FEATURE_V7)) {
-                switch (op2) {
-                case 0: /* performance monitor control */
-                    env->cp15.c9_pmcr_data = val;
-                    break;
-                case 1: /* count enable set */
-                case 2: /* count enable clear */
-                case 3: /* overflow flag status */
-                case 4: /* software increment */
-                case 5: /* performance counter selection */
-                    /* not implemented */
-                    goto bad_reg;
-                default:
-                    goto bad_reg;
-                }
-            } else {
+        case 12: /* Performance monitor control */
+            /* Performance monitors are implementation defined in v7,
+             * but with an ARM recommended set of registers, which we
+             * follow (although we don't actually implement any counters)
+             */
+            if (!arm_feature(env, ARM_FEATURE_V7)) {
+                goto bad_reg;
+            }
+            switch (op2) {
+            case 0: /* performance monitor control register */
+                /* only the DP, X, D and E bits are writable */
+                env->cp15.c9_pmcr &= ~0x39;
+                env->cp15.c9_pmcr |= (val & 0x39);
+                break;
+            case 1: /* Count enable set register */
+                val &= (1 << 31);
+                env->cp15.c9_pmcnten |= val;
+                break;
+            case 2: /* Count enable clear */
+                val &= (1 << 31);
+                env->cp15.c9_pmcnten &= ~val;
+                break;
+            case 3: /* Overflow flag status */
+                env->cp15.c9_pmovsr &= ~val;
+                break;
+            case 4: /* Software increment */
+                /* RAZ/WI since we don't implement the software-count event */
+                break;
+            case 5: /* Event counter selection register */
+                /* Since we don't implement any events, writing to this register
+                 * is actually UNPREDICTABLE. So we choose to RAZ/WI.
+                 */
+                break;
+            default:
                 goto bad_reg;
             }
             break;
-        case 13: /* performance counters */
-            if (arm_feature(env, ARM_FEATURE_V7)) {
-                switch (op2) {
-                case 0: /* cycle count */
-                case 1: /* event selection */
-                case 2: /* performance monitor count */
-                    /* not implemented */
-                    goto bad_reg;
-                default:
-                    goto bad_reg;
-                }
-            } else {
+        case 13: /* Performance counters */
+            if (!arm_feature(env, ARM_FEATURE_V7)) {
+                goto bad_reg;
+            }
+            switch (op2) {
+            case 0: /* Cycle count register: not implemented, so RAZ/WI */
+                break;
+            case 1: /* Event type select */
+                env->cp15.c9_pmxevtyper = val & 0xff;
+                break;
+            case 2: /* Event count register */
+                /* Unimplemented (we have no events), RAZ/WI */
+                break;
+            default:
                 goto bad_reg;
             }
             break;
-        case 14: /* performance monitor control */
-            if (arm_feature(env, ARM_FEATURE_V7)) {
-                switch (op2) {
-                case 0: /* user enable */
-                    if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) {
-                        goto bad_reg;
-                    }
-                    env->cp15.c9_useren = val & 1;
-                    break;
-                case 1: /* interrupt enable set */
-                    if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) {
-                        goto bad_reg;
-                    }
-                    env->cp15.c9_inten |= val & 0xf;
-                    break;
-                case 2: /* interrupt enable clear */
-                    if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) {
-                        goto bad_reg;
-                    }
-                    env->cp15.c9_inten &= ~(val & 0xf);
-                    break;
-                default:
-                    goto bad_reg;
-                }
-            } else {
+        case 14: /* Performance monitor control */
+            if (!arm_feature(env, ARM_FEATURE_V7)) {
                 goto bad_reg;
             }
+            switch (op2) {
+            case 0: /* user enable */
+                env->cp15.c9_pmuserenr = val & 1;
+                /* changes access rights for cp registers, so flush tbs */
+                tb_flush(env);
+                break;
+            case 1: /* interrupt enable set */
+                /* We have no event counters so only the C bit can be changed */
+                val &= (1 << 31);
+                env->cp15.c9_pminten |= val;
+                break;
+            case 2: /* interrupt enable clear */
+                val &= (1 << 31);
+                env->cp15.c9_pminten &= ~val;
+                break;
+            }
             break;
         default:
             goto bad_reg;
@@ -1807,7 +1869,7 @@
                 env->cp15.c15_threadid = val & 0xffff;
                 break;
             case 8: /* Wait-for-interrupt (deprecated).  */
-                cpu_interrupt(env, CPU_INTERRUPT_HALT);
+                cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_HALT);
                 break;
             default:
                 goto bad_reg;
@@ -1853,7 +1915,7 @@
                      */
                     if (arm_feature(env, ARM_FEATURE_V7) ||
                         ARM_CPUID(env) == ARM_CPUID_ARM11MPCORE) {
-                        int mpidr = env->cpu_index;
+                        int mpidr = ENV_GET_CPU(env)->cpu_index;
                         /* We don't support setting cluster ID ([8..11])
                          * so these bits always RAZ.
                          */
@@ -2063,60 +2125,81 @@
         return 0;
     case 8: /* MMU TLB control.  */
         goto bad_reg;
-    case 9: /* Cache lockdown.  */
-        switch (op1) {
-        case 0:
-	    if (arm_feature(env, ARM_FEATURE_OMAPCP))
-		return 0;
-            switch (crm) {
-            case 0: /* L1 cache */
-            switch (op2) {
-            case 0:
-                return env->cp15.c9_data;
-            case 1:
-                return env->cp15.c9_insn;
-            default:
-                goto bad_reg;
-            }
-                break;
-            case 12:
+    case 9:
+        switch (crm) {
+        case 0: /* Cache lockdown */
+            switch (op1) {
+            case 0: /* L1 cache.  */
+                if (arm_feature(env, ARM_FEATURE_OMAPCP)) {
+                    return 0;
+                }
                 switch (op2) {
                 case 0:
-                    return env->cp15.c9_pmcr_data;
+                    return env->cp15.c9_data;
+                case 1:
+                    return env->cp15.c9_insn;
                 default:
                     goto bad_reg;
                 }
-                break;
-            case 14: /* performance monitor control */
-                if (arm_feature(env, ARM_FEATURE_V7)) {
-                    switch (op2) {
-                    case 0: /* user enable */
-                        return env->cp15.c9_useren;
-                    case 1: /* interrupt enable set */
-                    case 2: /* interrupt enable clear */
-                        if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) {
-                            goto bad_reg;
-                        }
-                        return env->cp15.c9_inten;
-                    default:
-                        goto bad_reg;
-                    }
-                } else {
+            case 1: /* L2 cache */
+                if (crm != 0) {
                     goto bad_reg;
                 }
-                break;
+                /* L2 Lockdown and Auxiliary control.  */
+                return 0;
             default:
                 goto bad_reg;
             }
             break;
-        case 1: /* L2 cache */
-            if (crm != 0)
+        case 12: /* Performance monitor control */
+            if (!arm_feature(env, ARM_FEATURE_V7)) {
                 goto bad_reg;
-            /* L2 Lockdown and Auxiliary control.  */
-            return 0;
+            }
+            switch (op2) {
+            case 0: /* performance monitor control register */
+                return env->cp15.c9_pmcr;
+            case 1: /* count enable set */
+            case 2: /* count enable clear */
+                return env->cp15.c9_pmcnten;
+            case 3: /* overflow flag status */
+                return env->cp15.c9_pmovsr;
+            case 4: /* software increment */
+            case 5: /* event counter selection register */
+                return 0; /* Unimplemented, RAZ/WI */
+            default:
+                goto bad_reg;
+            }
+        case 13: /* Performance counters */
+            if (!arm_feature(env, ARM_FEATURE_V7)) {
+                goto bad_reg;
+            }
+            switch (op2) {
+            case 1: /* Event type select */
+                return env->cp15.c9_pmxevtyper;
+            case 0: /* Cycle count register */
+            case 2: /* Event count register */
+                /* Unimplemented, so RAZ/WI */
+                return 0;
+            default:
+                goto bad_reg;
+            }
+        case 14: /* Performance monitor control */
+            if (!arm_feature(env, ARM_FEATURE_V7)) {
+                goto bad_reg;
+            }
+            switch (op2) {
+            case 0: /* user enable */
+                return env->cp15.c9_pmuserenr;
+            case 1: /* interrupt enable set */
+            case 2: /* interrupt enable clear */
+                return env->cp15.c9_pminten;
+            default:
+                goto bad_reg;
+            }
         default:
             goto bad_reg;
         }
+        break;
     case 10: /* MMU TLB lockdown.  */
         /* ??? TLB lockdown not implemented.  */
         return 0;
@@ -2192,7 +2275,7 @@
     if ((env->uncached_cpsr & CPSR_M) == mode) {
         env->regs[13] = val;
     } else {
-    env->banked_r13[bank_number(mode)] = val;
+    env->banked_r13[bank_number(env, mode)] = val;
 }
 }
 
@@ -2201,7 +2284,7 @@
     if ((env->uncached_cpsr & CPSR_M) == mode) {
         return env->regs[13];
     } else {
-    return env->banked_r13[bank_number(mode)];
+    return env->banked_r13[bank_number(env, mode)];
     }
 }
 
@@ -2228,11 +2311,11 @@
         return env->v7m.current_sp ? env->regs[13] : env->v7m.other_sp;
     case 16: /* PRIMASK */
         return (env->uncached_cpsr & CPSR_I) != 0;
-    case 17: /* FAULTMASK */
-        return (env->uncached_cpsr & CPSR_F) != 0;
-    case 18: /* BASEPRI */
-    case 19: /* BASEPRI_MAX */
+    case 17: /* BASEPRI */
+    case 18: /* BASEPRI_MAX */
         return env->v7m.basepri;
+    case 19: /* FAULTMASK */
+        return (env->uncached_cpsr & CPSR_F) != 0;
     case 20: /* CONTROL */
         return env->v7m.control;
     default:
@@ -2284,20 +2367,20 @@
         else
             env->uncached_cpsr &= ~CPSR_I;
         break;
-    case 17: /* FAULTMASK */
+    case 17: /* BASEPRI */
+        env->v7m.basepri = val & 0xff;
+        break;
+    case 18: /* BASEPRI_MAX */
+        val &= 0xff;
+        if (val != 0 && (val < env->v7m.basepri || env->v7m.basepri == 0))
+            env->v7m.basepri = val;
+        break;
+    case 19: /* FAULTMASK */
         if (val & 1)
             env->uncached_cpsr |= CPSR_F;
         else
             env->uncached_cpsr &= ~CPSR_F;
         break;
-    case 18: /* BASEPRI */
-        env->v7m.basepri = val & 0xff;
-        break;
-    case 19: /* BASEPRI_MAX */
-        val &= 0xff;
-        if (val != 0 && (val < env->v7m.basepri || env->v7m.basepri == 0))
-            env->v7m.basepri = val;
-        break;
     case 20: /* CONTROL */
         env->v7m.control = val & 3;
         switch_v7m_sp(env, (val & 2) != 0);
@@ -2686,13 +2769,15 @@
 #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
 
 #define VFP_BINOP(name) \
-float32 VFP_HELPER(name, s)(float32 a, float32 b, CPUARMState *env) \
+float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
 { \
-    return float32_ ## name (a, b, &env->vfp.fp_status); \
+    float_status *fpst = fpstp; \
+    return float32_ ## name (a, b, fpst); \
 } \
-float64 VFP_HELPER(name, d)(float64 a, float64 b, CPUARMState *env) \
+float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \
 { \
-    return float64_ ## name (a, b, &env->vfp.fp_status); \
+    float_status *fpst = fpstp; \
+    return float64_ ## name (a, b, fpst); \
 }
 VFP_BINOP(add)
 VFP_BINOP(sub)
diff --git a/target-arm/helper.h b/target-arm/helper.h
index b40bdb0..5b5566e 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -4,12 +4,12 @@
 DEF_HELPER_1(sxtb16, i32, i32)
 DEF_HELPER_1(uxtb16, i32, i32)
 
-DEF_HELPER_2(add_setq, i32, i32, i32)
-DEF_HELPER_2(add_saturate, i32, i32, i32)
-DEF_HELPER_2(sub_saturate, i32, i32, i32)
-DEF_HELPER_2(add_usaturate, i32, i32, i32)
-DEF_HELPER_2(sub_usaturate, i32, i32, i32)
-DEF_HELPER_1(double_saturate, i32, s32)
+DEF_HELPER_3(add_setq, i32, env, i32, i32)
+DEF_HELPER_3(add_saturate, i32, env, i32, i32)
+DEF_HELPER_3(sub_saturate, i32, env, i32, i32)
+DEF_HELPER_3(add_usaturate, i32, env, i32, i32)
+DEF_HELPER_3(sub_usaturate, i32, env, i32, i32)
+DEF_HELPER_2(double_saturate, i32, env, s32)
 DEF_HELPER_2(sdiv, s32, s32, s32)
 DEF_HELPER_2(udiv, i32, i32, i32)
 DEF_HELPER_1(rbit, i32, i32)
@@ -40,21 +40,21 @@
 PAS_OP(uh)
 #undef PAS_OP
 
-DEF_HELPER_2(ssat, i32, i32, i32)
-DEF_HELPER_2(usat, i32, i32, i32)
-DEF_HELPER_2(ssat16, i32, i32, i32)
-DEF_HELPER_2(usat16, i32, i32, i32)
+DEF_HELPER_3(ssat, i32, env, i32, i32)
+DEF_HELPER_3(usat, i32, env, i32, i32)
+DEF_HELPER_3(ssat16, i32, env, i32, i32)
+DEF_HELPER_3(usat16, i32, env, i32, i32)
 
 DEF_HELPER_2(usad8, i32, i32, i32)
 
 DEF_HELPER_1(logicq_cc, i32, i64)
 
 DEF_HELPER_3(sel_flags, i32, i32, i32, i32)
-DEF_HELPER_1(exception, void, i32)
-DEF_HELPER_0(wfi, void)
+DEF_HELPER_2(exception, void, env, i32)
+DEF_HELPER_1(wfi, void, env)
 
-DEF_HELPER_2(cpsr_write, void, i32, i32)
-DEF_HELPER_0(cpsr_read, i32)
+DEF_HELPER_3(cpsr_write, void, env, i32, i32)
+DEF_HELPER_1(cpsr_read, i32, env)
 
 DEF_HELPER_3(v7m_msr, void, env, i32, i32)
 DEF_HELPER_2(v7m_mrs, i32, env, i32)
@@ -68,20 +68,20 @@
 DEF_HELPER_2(get_r13_banked, i32, env, i32)
 DEF_HELPER_3(set_r13_banked, void, env, i32, i32)
 
-DEF_HELPER_1(get_user_reg, i32, i32)
-DEF_HELPER_2(set_user_reg, void, i32, i32)
+DEF_HELPER_2(get_user_reg, i32, env, i32)
+DEF_HELPER_3(set_user_reg, void, env, i32, i32)
 
 DEF_HELPER_1(vfp_get_fpscr, i32, env)
 DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
 
-DEF_HELPER_3(vfp_adds, f32, f32, f32, env)
-DEF_HELPER_3(vfp_addd, f64, f64, f64, env)
-DEF_HELPER_3(vfp_subs, f32, f32, f32, env)
-DEF_HELPER_3(vfp_subd, f64, f64, f64, env)
-DEF_HELPER_3(vfp_muls, f32, f32, f32, env)
-DEF_HELPER_3(vfp_muld, f64, f64, f64, env)
-DEF_HELPER_3(vfp_divs, f32, f32, f32, env)
-DEF_HELPER_3(vfp_divd, f64, f64, f64, env)
+DEF_HELPER_3(vfp_adds, f32, f32, f32, ptr)
+DEF_HELPER_3(vfp_addd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_subs, f32, f32, f32, ptr)
+DEF_HELPER_3(vfp_subd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_muls, f32, f32, f32, ptr)
+DEF_HELPER_3(vfp_muld, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_divs, f32, f32, f32, ptr)
+DEF_HELPER_3(vfp_divd, f64, f64, f64, ptr)
 DEF_HELPER_1(vfp_negs, f32, f32)
 DEF_HELPER_1(vfp_negd, f64, f64)
 DEF_HELPER_1(vfp_abss, f32, f32)
@@ -138,38 +138,38 @@
 DEF_HELPER_2(rsqrte_f32, f32, f32, env)
 DEF_HELPER_2(recpe_u32, i32, i32, env)
 DEF_HELPER_2(rsqrte_u32, i32, i32, env)
-DEF_HELPER_4(neon_tbl, i32, i32, i32, i32, i32)
+DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
 
-DEF_HELPER_2(add_cc, i32, i32, i32)
-DEF_HELPER_2(adc_cc, i32, i32, i32)
-DEF_HELPER_2(sub_cc, i32, i32, i32)
-DEF_HELPER_2(sbc_cc, i32, i32, i32)
+DEF_HELPER_3(add_cc, i32, env, i32, i32)
+DEF_HELPER_3(adc_cc, i32, env, i32, i32)
+DEF_HELPER_3(sub_cc, i32, env, i32, i32)
+DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
 
 DEF_HELPER_2(shl, i32, i32, i32)
 DEF_HELPER_2(shr, i32, i32, i32)
 DEF_HELPER_2(sar, i32, i32, i32)
-DEF_HELPER_2(shl_cc, i32, i32, i32)
-DEF_HELPER_2(shr_cc, i32, i32, i32)
-DEF_HELPER_2(sar_cc, i32, i32, i32)
-DEF_HELPER_2(ror_cc, i32, i32, i32)
+DEF_HELPER_3(shl_cc, i32, env, i32, i32)
+DEF_HELPER_3(shr_cc, i32, env, i32, i32)
+DEF_HELPER_3(sar_cc, i32, env, i32, i32)
+DEF_HELPER_3(ror_cc, i32, env, i32, i32)
 
 /* neon_helper.c */
-DEF_HELPER_2(neon_qadd_u8, i32, i32, i32)
-DEF_HELPER_2(neon_qadd_s8, i32, i32, i32)
-DEF_HELPER_2(neon_qadd_u16, i32, i32, i32)
-DEF_HELPER_2(neon_qadd_s16, i32, i32, i32)
-DEF_HELPER_2(neon_qadd_u32, i32, i32, i32)
-DEF_HELPER_2(neon_qadd_s32, i32, i32, i32)
-DEF_HELPER_2(neon_qsub_u8, i32, i32, i32)
-DEF_HELPER_2(neon_qsub_s8, i32, i32, i32)
-DEF_HELPER_2(neon_qsub_u16, i32, i32, i32)
-DEF_HELPER_2(neon_qsub_s16, i32, i32, i32)
-DEF_HELPER_2(neon_qsub_u32, i32, i32, i32)
-DEF_HELPER_2(neon_qsub_s32, i32, i32, i32)
-DEF_HELPER_2(neon_qadd_u64, i64, i64, i64)
-DEF_HELPER_2(neon_qadd_s64, i64, i64, i64)
-DEF_HELPER_2(neon_qsub_u64, i64, i64, i64)
-DEF_HELPER_2(neon_qsub_s64, i64, i64, i64)
+DEF_HELPER_3(neon_qadd_u8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qadd_s8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qadd_u16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qadd_s16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qadd_u32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qadd_s32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qsub_s16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qsub_u32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qsub_s32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qadd_u64, i64, env, i64, i64)
+DEF_HELPER_3(neon_qadd_s64, i64, env, i64, i64)
+DEF_HELPER_3(neon_qsub_u64, i64, env, i64, i64)
+DEF_HELPER_3(neon_qsub_s64, i64, env, i64, i64)
 
 DEF_HELPER_2(neon_hadd_s8, i32, i32, i32)
 DEF_HELPER_2(neon_hadd_u8, i32, i32, i32)
@@ -247,26 +247,26 @@
 DEF_HELPER_2(neon_rshl_s32, i32, i32, i32)
 DEF_HELPER_2(neon_rshl_u64, i64, i64, i64)
 DEF_HELPER_2(neon_rshl_s64, i64, i64, i64)
-DEF_HELPER_2(neon_qshl_u8, i32, i32, i32)
-DEF_HELPER_2(neon_qshl_s8, i32, i32, i32)
-DEF_HELPER_2(neon_qshl_u16, i32, i32, i32)
-DEF_HELPER_2(neon_qshl_s16, i32, i32, i32)
-DEF_HELPER_2(neon_qshl_u32, i32, i32, i32)
-DEF_HELPER_2(neon_qshl_s32, i32, i32, i32)
-DEF_HELPER_2(neon_qshl_u64, i64, i64, i64)
-DEF_HELPER_2(neon_qshl_s64, i64, i64, i64)
-DEF_HELPER_2(neon_qshlu_s8, i32, i32, i32);
-DEF_HELPER_2(neon_qshlu_s16, i32, i32, i32);
-DEF_HELPER_2(neon_qshlu_s32, i32, i32, i32);
-DEF_HELPER_2(neon_qshlu_s64, i64, i64, i64);
-DEF_HELPER_2(neon_qrshl_u8, i32, i32, i32)
-DEF_HELPER_2(neon_qrshl_s8, i32, i32, i32)
-DEF_HELPER_2(neon_qrshl_u16, i32, i32, i32)
-DEF_HELPER_2(neon_qrshl_s16, i32, i32, i32)
-DEF_HELPER_2(neon_qrshl_u32, i32, i32, i32)
-DEF_HELPER_2(neon_qrshl_s32, i32, i32, i32)
-DEF_HELPER_2(neon_qrshl_u64, i64, i64, i64)
-DEF_HELPER_2(neon_qrshl_s64, i64, i64, i64)
+DEF_HELPER_3(neon_qshl_u8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshl_s8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshl_u16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshl_s16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshl_u32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshl_s32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshl_u64, i64, env, i64, i64)
+DEF_HELPER_3(neon_qshl_s64, i64, env, i64, i64)
+DEF_HELPER_3(neon_qshlu_s8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshlu_s16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshlu_s32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshlu_s64, i64, env, i64, i64)
+DEF_HELPER_3(neon_qrshl_u8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrshl_s8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrshl_u16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrshl_s16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64)
+DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64)
 
 DEF_HELPER_2(neon_add_u8, i32, i32, i32)
 DEF_HELPER_2(neon_add_u16, i32, i32, i32)
@@ -295,22 +295,22 @@
 DEF_HELPER_1(neon_cls_s32, i32, i32)
 DEF_HELPER_1(neon_cnt_u8, i32, i32)
 
-DEF_HELPER_2(neon_qdmulh_s16, i32, i32, i32)
-DEF_HELPER_2(neon_qrdmulh_s16, i32, i32, i32)
-DEF_HELPER_2(neon_qdmulh_s32, i32, i32, i32)
-DEF_HELPER_2(neon_qrdmulh_s32, i32, i32, i32)
+DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qdmulh_s32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrdmulh_s32, i32, env, i32, i32)
 
 DEF_HELPER_1(neon_narrow_u8, i32, i64)
 DEF_HELPER_1(neon_narrow_u16, i32, i64)
-DEF_HELPER_1(neon_unarrow_sat8, i32, i64)
-DEF_HELPER_1(neon_narrow_sat_u8, i32, i64)
-DEF_HELPER_1(neon_narrow_sat_s8, i32, i64)
-DEF_HELPER_1(neon_unarrow_sat16, i32, i64)
-DEF_HELPER_1(neon_narrow_sat_u16, i32, i64)
-DEF_HELPER_1(neon_narrow_sat_s16, i32, i64)
-DEF_HELPER_1(neon_unarrow_sat32, i32, i64)
-DEF_HELPER_1(neon_narrow_sat_u32, i32, i64)
-DEF_HELPER_1(neon_narrow_sat_s32, i32, i64)
+DEF_HELPER_2(neon_unarrow_sat8, i32, env, i64)
+DEF_HELPER_2(neon_narrow_sat_u8, i32, env, i64)
+DEF_HELPER_2(neon_narrow_sat_s8, i32, env, i64)
+DEF_HELPER_2(neon_unarrow_sat16, i32, env, i64)
+DEF_HELPER_2(neon_narrow_sat_u16, i32, env, i64)
+DEF_HELPER_2(neon_narrow_sat_s16, i32, env, i64)
+DEF_HELPER_2(neon_unarrow_sat32, i32, env, i64)
+DEF_HELPER_2(neon_narrow_sat_u32, i32, env, i64)
+DEF_HELPER_2(neon_narrow_sat_s32, i32, env, i64)
 DEF_HELPER_1(neon_narrow_high_u8, i32, i64)
 DEF_HELPER_1(neon_narrow_high_u16, i32, i64)
 DEF_HELPER_1(neon_narrow_round_high_u8, i32, i64)
@@ -326,8 +326,8 @@
 DEF_HELPER_2(neon_paddl_u32, i64, i64, i64)
 DEF_HELPER_2(neon_subl_u16, i64, i64, i64)
 DEF_HELPER_2(neon_subl_u32, i64, i64, i64)
-DEF_HELPER_2(neon_addl_saturate_s32, i64, i64, i64)
-DEF_HELPER_2(neon_addl_saturate_s64, i64, i64, i64)
+DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64)
+DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64)
 DEF_HELPER_2(neon_abdl_u16, i64, i32, i32)
 DEF_HELPER_2(neon_abdl_s16, i64, i32, i32)
 DEF_HELPER_2(neon_abdl_u32, i64, i32, i32)
@@ -343,24 +343,21 @@
 DEF_HELPER_1(neon_negl_u32, i64, i64)
 DEF_HELPER_1(neon_negl_u64, i64, i64)
 
-DEF_HELPER_1(neon_qabs_s8, i32, i32)
-DEF_HELPER_1(neon_qabs_s16, i32, i32)
-DEF_HELPER_1(neon_qabs_s32, i32, i32)
-DEF_HELPER_1(neon_qneg_s8, i32, i32)
-DEF_HELPER_1(neon_qneg_s16, i32, i32)
-DEF_HELPER_1(neon_qneg_s32, i32, i32)
+DEF_HELPER_2(neon_qabs_s8, i32, env, i32)
+DEF_HELPER_2(neon_qabs_s16, i32, env, i32)
+DEF_HELPER_2(neon_qabs_s32, i32, env, i32)
+DEF_HELPER_2(neon_qneg_s8, i32, env, i32)
+DEF_HELPER_2(neon_qneg_s16, i32, env, i32)
+DEF_HELPER_2(neon_qneg_s32, i32, env, i32)
 
-DEF_HELPER_2(neon_min_f32, i32, i32, i32)
-DEF_HELPER_2(neon_max_f32, i32, i32, i32)
-DEF_HELPER_2(neon_abd_f32, i32, i32, i32)
-DEF_HELPER_2(neon_add_f32, i32, i32, i32)
-DEF_HELPER_2(neon_sub_f32, i32, i32, i32)
-DEF_HELPER_2(neon_mul_f32, i32, i32, i32)
-DEF_HELPER_2(neon_ceq_f32, i32, i32, i32)
-DEF_HELPER_2(neon_cge_f32, i32, i32, i32)
-DEF_HELPER_2(neon_cgt_f32, i32, i32, i32)
-DEF_HELPER_2(neon_acge_f32, i32, i32, i32)
-DEF_HELPER_2(neon_acgt_f32, i32, i32, i32)
+DEF_HELPER_3(neon_min_f32, i32, i32, i32, ptr)
+DEF_HELPER_3(neon_max_f32, i32, i32, i32, ptr)
+DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr)
+DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr)
+DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr)
+DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr)
+DEF_HELPER_3(neon_acge_f32, i32, i32, i32, ptr)
+DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, ptr)
 
 /* iwmmxt_helper.c */
 DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64)
@@ -375,47 +372,47 @@
 DEF_HELPER_2(iwmmxt_macuw, i64, i64, i64)
 DEF_HELPER_1(iwmmxt_setpsr_nz, i32, i64)
 
-#define DEF_IWMMXT_HELPER_SIZE(name) \
-DEF_HELPER_2(iwmmxt_##name##b, i64, i64, i64) \
-DEF_HELPER_2(iwmmxt_##name##w, i64, i64, i64) \
-DEF_HELPER_2(iwmmxt_##name##l, i64, i64, i64) \
+#define DEF_IWMMXT_HELPER_SIZE_ENV(name) \
+DEF_HELPER_3(iwmmxt_##name##b, i64, env, i64, i64) \
+DEF_HELPER_3(iwmmxt_##name##w, i64, env, i64, i64) \
+DEF_HELPER_3(iwmmxt_##name##l, i64, env, i64, i64) \
 
-DEF_IWMMXT_HELPER_SIZE(unpackl)
-DEF_IWMMXT_HELPER_SIZE(unpackh)
+DEF_IWMMXT_HELPER_SIZE_ENV(unpackl)
+DEF_IWMMXT_HELPER_SIZE_ENV(unpackh)
 
-DEF_HELPER_1(iwmmxt_unpacklub, i64, i64)
-DEF_HELPER_1(iwmmxt_unpackluw, i64, i64)
-DEF_HELPER_1(iwmmxt_unpacklul, i64, i64)
-DEF_HELPER_1(iwmmxt_unpackhub, i64, i64)
-DEF_HELPER_1(iwmmxt_unpackhuw, i64, i64)
-DEF_HELPER_1(iwmmxt_unpackhul, i64, i64)
-DEF_HELPER_1(iwmmxt_unpacklsb, i64, i64)
-DEF_HELPER_1(iwmmxt_unpacklsw, i64, i64)
-DEF_HELPER_1(iwmmxt_unpacklsl, i64, i64)
-DEF_HELPER_1(iwmmxt_unpackhsb, i64, i64)
-DEF_HELPER_1(iwmmxt_unpackhsw, i64, i64)
-DEF_HELPER_1(iwmmxt_unpackhsl, i64, i64)
+DEF_HELPER_2(iwmmxt_unpacklub, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackluw, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpacklul, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackhub, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackhuw, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackhul, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpacklsb, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpacklsw, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpacklsl, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackhsb, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackhsw, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackhsl, i64, env, i64)
 
-DEF_IWMMXT_HELPER_SIZE(cmpeq)
-DEF_IWMMXT_HELPER_SIZE(cmpgtu)
-DEF_IWMMXT_HELPER_SIZE(cmpgts)
+DEF_IWMMXT_HELPER_SIZE_ENV(cmpeq)
+DEF_IWMMXT_HELPER_SIZE_ENV(cmpgtu)
+DEF_IWMMXT_HELPER_SIZE_ENV(cmpgts)
 
-DEF_IWMMXT_HELPER_SIZE(mins)
-DEF_IWMMXT_HELPER_SIZE(minu)
-DEF_IWMMXT_HELPER_SIZE(maxs)
-DEF_IWMMXT_HELPER_SIZE(maxu)
+DEF_IWMMXT_HELPER_SIZE_ENV(mins)
+DEF_IWMMXT_HELPER_SIZE_ENV(minu)
+DEF_IWMMXT_HELPER_SIZE_ENV(maxs)
+DEF_IWMMXT_HELPER_SIZE_ENV(maxu)
 
-DEF_IWMMXT_HELPER_SIZE(subn)
-DEF_IWMMXT_HELPER_SIZE(addn)
-DEF_IWMMXT_HELPER_SIZE(subu)
-DEF_IWMMXT_HELPER_SIZE(addu)
-DEF_IWMMXT_HELPER_SIZE(subs)
-DEF_IWMMXT_HELPER_SIZE(adds)
+DEF_IWMMXT_HELPER_SIZE_ENV(subn)
+DEF_IWMMXT_HELPER_SIZE_ENV(addn)
+DEF_IWMMXT_HELPER_SIZE_ENV(subu)
+DEF_IWMMXT_HELPER_SIZE_ENV(addu)
+DEF_IWMMXT_HELPER_SIZE_ENV(subs)
+DEF_IWMMXT_HELPER_SIZE_ENV(adds)
 
-DEF_HELPER_2(iwmmxt_avgb0, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_avgb1, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_avgw0, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_avgw1, i64, i64, i64)
+DEF_HELPER_3(iwmmxt_avgb0, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64)
 
 DEF_HELPER_2(iwmmxt_msadb, i64, i64, i64)
 
@@ -434,26 +431,26 @@
 DEF_HELPER_1(iwmmxt_msbw, i32, i64)
 DEF_HELPER_1(iwmmxt_msbl, i32, i64)
 
-DEF_HELPER_2(iwmmxt_srlw, i64, i64, i32)
-DEF_HELPER_2(iwmmxt_srll, i64, i64, i32)
-DEF_HELPER_2(iwmmxt_srlq, i64, i64, i32)
-DEF_HELPER_2(iwmmxt_sllw, i64, i64, i32)
-DEF_HELPER_2(iwmmxt_slll, i64, i64, i32)
-DEF_HELPER_2(iwmmxt_sllq, i64, i64, i32)
-DEF_HELPER_2(iwmmxt_sraw, i64, i64, i32)
-DEF_HELPER_2(iwmmxt_sral, i64, i64, i32)
-DEF_HELPER_2(iwmmxt_sraq, i64, i64, i32)
-DEF_HELPER_2(iwmmxt_rorw, i64, i64, i32)
-DEF_HELPER_2(iwmmxt_rorl, i64, i64, i32)
-DEF_HELPER_2(iwmmxt_rorq, i64, i64, i32)
-DEF_HELPER_2(iwmmxt_shufh, i64, i64, i32)
+DEF_HELPER_3(iwmmxt_srlw, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_srll, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_srlq, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_sllw, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_slll, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_sllq, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_sraw, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_sral, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_sraq, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_rorw, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_rorl, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_rorq, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_shufh, i64, env, i64, i32)
 
-DEF_HELPER_2(iwmmxt_packuw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_packul, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_packuq, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_packsw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_packsl, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_packsq, i64, i64, i64)
+DEF_HELPER_3(iwmmxt_packuw, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_packul, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_packuq, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_packsw, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_packsl, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_packsq, i64, env, i64, i64)
 
 DEF_HELPER_3(iwmmxt_muladdsl, i64, i64, i32, i32)
 DEF_HELPER_3(iwmmxt_muladdsw, i64, i64, i32, i32)
@@ -461,17 +458,17 @@
 
 DEF_HELPER_2(set_teecr, void, env, i32)
 
-DEF_HELPER_2(neon_unzip8, void, i32, i32)
-DEF_HELPER_2(neon_unzip16, void, i32, i32)
-DEF_HELPER_2(neon_qunzip8, void, i32, i32)
-DEF_HELPER_2(neon_qunzip16, void, i32, i32)
-DEF_HELPER_2(neon_qunzip32, void, i32, i32)
-DEF_HELPER_2(neon_zip8, void, i32, i32)
-DEF_HELPER_2(neon_zip16, void, i32, i32)
-DEF_HELPER_2(neon_qzip8, void, i32, i32)
-DEF_HELPER_2(neon_qzip16, void, i32, i32)
-DEF_HELPER_2(neon_qzip32, void, i32, i32)
-DEF_HELPER_1(neon_vldst_all, void, i32)
+DEF_HELPER_3(neon_unzip8, void, env, i32, i32)
+DEF_HELPER_3(neon_unzip16, void, env, i32, i32)
+DEF_HELPER_3(neon_qunzip8, void, env, i32, i32)
+DEF_HELPER_3(neon_qunzip16, void, env, i32, i32)
+DEF_HELPER_3(neon_qunzip32, void, env, i32, i32)
+DEF_HELPER_3(neon_zip8, void, env, i32, i32)
+DEF_HELPER_3(neon_zip16, void, env, i32, i32)
+DEF_HELPER_3(neon_qzip8, void, env, i32, i32)
+DEF_HELPER_3(neon_qzip16, void, env, i32, i32)
+DEF_HELPER_3(neon_qzip32, void, env, i32, i32)
+DEF_HELPER_2(neon_vldst_all, void, env, i32)
 
 #include "helper-android.h"
 #include "exec/def-helper.h"
diff --git a/target-arm/iwmmxt_helper.c b/target-arm/iwmmxt_helper.c
index ebe6eb9..ff5e733 100644
--- a/target-arm/iwmmxt_helper.c
+++ b/target-arm/iwmmxt_helper.c
@@ -23,7 +23,8 @@
 #include <stdio.h>
 
 #include "cpu.h"
-#include "exec.h"
+#include "exec/exec-all.h"
+#include "qemu/bitops.h"
 #include "helper.h"
 
 /* iwMMXt macros extracted from GNU gdb.  */
@@ -162,7 +163,8 @@
     SIMD64_SET(NBIT64(x), SIMD_NBIT) | \
     SIMD64_SET(ZBIT64(x), SIMD_ZBIT)
 #define IWMMXT_OP_UNPACK(S, SH0, SH1, SH2, SH3)			\
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(uint64_t a, uint64_t b) \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(CPUARMState *env, \
+                                                 uint64_t a, uint64_t b) \
 {								\
     a =							        \
         (((a >> SH0) & 0xff) << 0) | (((b >> SH0) & 0xff) << 8) |	\
@@ -176,7 +178,8 @@
         NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7);		\
     return a;                                                   \
 }								\
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(uint64_t a, uint64_t b) \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(CPUARMState *env, \
+                                        uint64_t a, uint64_t b) \
 {								\
     a =							        \
         (((a >> SH0) & 0xffff) << 0) |				\
@@ -188,7 +191,8 @@
         NZBIT8(a >> 32, 2) | NZBIT8(a >> 48, 3);		\
     return a;                                                   \
 }								\
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(uint64_t a, uint64_t b) \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(CPUARMState *env, \
+                                        uint64_t a, uint64_t b) \
 {								\
     a =							        \
         (((a >> SH0) & 0xffffffff) << 0) |			\
@@ -197,7 +201,8 @@
         NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1);		\
     return a;                                                   \
 }								\
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(uint64_t x)   \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(CPUARMState *env, \
+                                                  uint64_t x)   \
 {								\
     x =							        \
         (((x >> SH0) & 0xff) << 0) |				\
@@ -209,7 +214,8 @@
         NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);		\
     return x;                                                   \
 }								\
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(uint64_t x)   \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(CPUARMState *env, \
+                                                  uint64_t x)   \
 {								\
     x =							        \
         (((x >> SH0) & 0xffff) << 0) |				\
@@ -218,13 +224,15 @@
         NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1);		\
     return x;                                                   \
 }								\
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ul)))(uint64_t x)   \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ul)))(CPUARMState *env, \
+                                                  uint64_t x)   \
 {								\
     x = (((x >> SH0) & 0xffffffff) << 0);			\
     env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0);	\
     return x;                                                   \
 }								\
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(uint64_t x)   \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(CPUARMState *env, \
+                                                  uint64_t x)   \
 {								\
     x =							        \
         ((uint64_t) EXTEND8H((x >> SH0) & 0xff) << 0) |	        \
@@ -236,7 +244,8 @@
         NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);		\
     return x;                                                   \
 }								\
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(uint64_t x)   \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(CPUARMState *env, \
+                                                  uint64_t x)   \
 {								\
     x =							        \
         ((uint64_t) EXTEND16((x >> SH0) & 0xffff) << 0) |	\
@@ -245,7 +254,8 @@
         NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1);		\
     return x;                                                   \
 }								\
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sl)))(uint64_t x)   \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sl)))(CPUARMState *env, \
+                                                  uint64_t x)   \
 {								\
     x = EXTEND32((x >> SH0) & 0xffffffff);			\
     env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0);	\
@@ -255,7 +265,8 @@
 IWMMXT_OP_UNPACK(h, 32, 40, 48, 56)
 
 #define IWMMXT_OP_CMP(SUFF, Tb, Tw, Tl, O)			\
-uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(uint64_t a, uint64_t b) \
+uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(CPUARMState *env,    \
+                                        uint64_t a, uint64_t b) \
 {								\
     a =							        \
         CMP(0, Tb, O, 0xff) | CMP(8, Tb, O, 0xff) |		\
@@ -269,7 +280,8 @@
         NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7);		\
     return a;                                                   \
 }								\
-uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(uint64_t a, uint64_t b) \
+uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(CPUARMState *env,    \
+                                        uint64_t a, uint64_t b) \
 {								\
     a = CMP(0, Tw, O, 0xffff) | CMP(16, Tw, O, 0xffff) |	\
         CMP(32, Tw, O, 0xffff) | CMP(48, Tw, O, 0xffff);	\
@@ -278,7 +290,8 @@
         NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3);		\
     return a;                                                   \
 }								\
-uint64_t HELPER(glue(iwmmxt_, glue(SUFF, l)))(uint64_t a, uint64_t b) \
+uint64_t HELPER(glue(iwmmxt_, glue(SUFF, l)))(CPUARMState *env,    \
+                                        uint64_t a, uint64_t b) \
 {								\
     a = CMP(0, Tl, O, 0xffffffff) |				\
         CMP(32, Tl, O, 0xffffffff);				\
@@ -317,7 +330,7 @@
 #define AVGB(SHR) ((( \
         ((a >> SHR) & 0xff) + ((b >> SHR) & 0xff) + round) >> 1) << SHR)
 #define IWMMXT_OP_AVGB(r)                                                 \
-uint64_t HELPER(iwmmxt_avgb##r)(uint64_t a, uint64_t b)                   \
+uint64_t HELPER(iwmmxt_avgb##r)(CPUARMState *env, uint64_t a, uint64_t b)    \
 {                                                                         \
     const int round = r;                                                  \
     a = AVGB(0) | AVGB(8) | AVGB(16) | AVGB(24) |                         \
@@ -341,7 +354,7 @@
 #define AVGW(SHR) ((( \
         ((a >> SHR) & 0xffff) + ((b >> SHR) & 0xffff) + round) >> 1) << SHR)
 #define IWMMXT_OP_AVGW(r)                                               \
-uint64_t HELPER(iwmmxt_avgw##r)(uint64_t a, uint64_t b)                 \
+uint64_t HELPER(iwmmxt_avgw##r)(CPUARMState *env, uint64_t a, uint64_t b)  \
 {                                                                       \
     const int round = r;                                                \
     a = AVGW(0) | AVGW(16) | AVGW(32) | AVGW(48);                       \
@@ -452,7 +465,7 @@
 }
 
 /* FIXME: Split wCASF setting into a separate op to avoid env use.  */
-uint64_t HELPER(iwmmxt_srlw)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_srlw)(CPUARMState *env, uint64_t x, uint32_t n)
 {
     x = (((x & (0xffffll << 0)) >> n) & (0xffffll << 0)) |
         (((x & (0xffffll << 16)) >> n) & (0xffffll << 16)) |
@@ -464,7 +477,7 @@
     return x;
 }
 
-uint64_t HELPER(iwmmxt_srll)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_srll)(CPUARMState *env, uint64_t x, uint32_t n)
 {
     x = ((x & (0xffffffffll << 0)) >> n) |
         ((x >> n) & (0xffffffffll << 32));
@@ -473,14 +486,14 @@
     return x;
 }
 
-uint64_t HELPER(iwmmxt_srlq)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_srlq)(CPUARMState *env, uint64_t x, uint32_t n)
 {
     x >>= n;
     env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
     return x;
 }
 
-uint64_t HELPER(iwmmxt_sllw)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_sllw)(CPUARMState *env, uint64_t x, uint32_t n)
 {
     x = (((x & (0xffffll << 0)) << n) & (0xffffll << 0)) |
         (((x & (0xffffll << 16)) << n) & (0xffffll << 16)) |
@@ -492,7 +505,7 @@
     return x;
 }
 
-uint64_t HELPER(iwmmxt_slll)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_slll)(CPUARMState *env, uint64_t x, uint32_t n)
 {
     x = ((x << n) & (0xffffffffll << 0)) |
         ((x & (0xffffffffll << 32)) << n);
@@ -501,14 +514,14 @@
     return x;
 }
 
-uint64_t HELPER(iwmmxt_sllq)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_sllq)(CPUARMState *env, uint64_t x, uint32_t n)
 {
     x <<= n;
     env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
     return x;
 }
 
-uint64_t HELPER(iwmmxt_sraw)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_sraw)(CPUARMState *env, uint64_t x, uint32_t n)
 {
     x = ((uint64_t) ((EXTEND16(x >> 0) >> n) & 0xffff) << 0) |
         ((uint64_t) ((EXTEND16(x >> 16) >> n) & 0xffff) << 16) |
@@ -520,7 +533,7 @@
     return x;
 }
 
-uint64_t HELPER(iwmmxt_sral)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_sral)(CPUARMState *env, uint64_t x, uint32_t n)
 {
     x = (((EXTEND32(x >> 0) >> n) & 0xffffffff) << 0) |
         (((EXTEND32(x >> 32) >> n) & 0xffffffff) << 32);
@@ -529,14 +542,14 @@
     return x;
 }
 
-uint64_t HELPER(iwmmxt_sraq)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_sraq)(CPUARMState *env, uint64_t x, uint32_t n)
 {
     x = (int64_t) x >> n;
     env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
     return x;
 }
 
-uint64_t HELPER(iwmmxt_rorw)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_rorw)(CPUARMState *env, uint64_t x, uint32_t n)
 {
     x = ((((x & (0xffffll << 0)) >> n) |
           ((x & (0xffffll << 0)) << (16 - n))) & (0xffffll << 0)) |
@@ -552,7 +565,7 @@
     return x;
 }
 
-uint64_t HELPER(iwmmxt_rorl)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_rorl)(CPUARMState *env, uint64_t x, uint32_t n)
 {
     x = ((x & (0xffffffffll << 0)) >> n) |
         ((x >> n) & (0xffffffffll << 32)) |
@@ -563,14 +576,14 @@
     return x;
 }
 
-uint64_t HELPER(iwmmxt_rorq)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_rorq)(CPUARMState *env, uint64_t x, uint32_t n)
 {
-    x = (x >> n) | (x << (64 - n));
+    x = ror64(x, n);
     env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
     return x;
 }
 
-uint64_t HELPER(iwmmxt_shufh)(uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_shufh)(CPUARMState *env, uint64_t x, uint32_t n)
 {
     x = (((x >> ((n << 4) & 0x30)) & 0xffff) << 0) |
         (((x >> ((n << 2) & 0x30)) & 0xffff) << 16) |
@@ -583,7 +596,7 @@
 }
 
 /* TODO: Unsigned-Saturation */
-uint64_t HELPER(iwmmxt_packuw)(uint64_t a, uint64_t b)
+uint64_t HELPER(iwmmxt_packuw)(CPUARMState *env, uint64_t a, uint64_t b)
 {
     a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) |
         (((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) |
@@ -597,7 +610,7 @@
     return a;
 }
 
-uint64_t HELPER(iwmmxt_packul)(uint64_t a, uint64_t b)
+uint64_t HELPER(iwmmxt_packul)(CPUARMState *env, uint64_t a, uint64_t b)
 {
     a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) |
         (((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48);
@@ -607,7 +620,7 @@
     return a;
 }
 
-uint64_t HELPER(iwmmxt_packuq)(uint64_t a, uint64_t b)
+uint64_t HELPER(iwmmxt_packuq)(CPUARMState *env, uint64_t a, uint64_t b)
 {
     a = (a & 0xffffffff) | ((b & 0xffffffff) << 32);
     env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
@@ -616,7 +629,7 @@
 }
 
 /* TODO: Signed-Saturation */
-uint64_t HELPER(iwmmxt_packsw)(uint64_t a, uint64_t b)
+uint64_t HELPER(iwmmxt_packsw)(CPUARMState *env, uint64_t a, uint64_t b)
 {
     a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) |
         (((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) |
@@ -630,7 +643,7 @@
     return a;
 }
 
-uint64_t HELPER(iwmmxt_packsl)(uint64_t a, uint64_t b)
+uint64_t HELPER(iwmmxt_packsl)(CPUARMState *env, uint64_t a, uint64_t b)
 {
     a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) |
         (((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48);
@@ -640,7 +653,7 @@
     return a;
 }
 
-uint64_t HELPER(iwmmxt_packsq)(uint64_t a, uint64_t b)
+uint64_t HELPER(iwmmxt_packsq)(CPUARMState *env, uint64_t a, uint64_t b)
 {
     a = (a & 0xffffffff) | ((b & 0xffffffff) << 32);
     env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 61239c3..557e3e0 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -48,9 +48,12 @@
     qemu_put_be32(f, env->cp15.c7_par);
     qemu_put_be32(f, env->cp15.c9_insn);
     qemu_put_be32(f, env->cp15.c9_data);
-    qemu_put_be32(f, env->cp15.c9_pmcr_data);
-    qemu_put_be32(f, env->cp15.c9_useren);
-    qemu_put_be32(f, env->cp15.c9_inten);
+    qemu_put_be32(f, env->cp15.c9_pmcr);
+    qemu_put_be32(f, env->cp15.c9_pmcnten);
+    qemu_put_be32(f, env->cp15.c9_pmovsr);
+    qemu_put_be32(f, env->cp15.c9_pmxevtyper);
+    qemu_put_be32(f, env->cp15.c9_pmuserenr);
+    qemu_put_be32(f, env->cp15.c9_pminten);
     qemu_put_be32(f, env->cp15.c13_fcse);
     qemu_put_be32(f, env->cp15.c13_context);
     qemu_put_be32(f, env->cp15.c13_tls1);
@@ -111,13 +114,15 @@
     }
 }
 
+#define CPU_SAVE_VERSION_LEGACY  3
+
 int cpu_load(QEMUFile *f, void *opaque, int version_id)
 {
     CPUARMState *env = (CPUARMState *)opaque;
     int i;
     uint32_t val;
 
-    if (version_id != CPU_SAVE_VERSION)
+    if (version_id != CPU_SAVE_VERSION && version_id != CPU_SAVE_VERSION_LEGACY)
         return -EINVAL;
 
     for (i = 0; i < 16; i++) {
@@ -164,9 +169,18 @@
     env->cp15.c7_par = qemu_get_be32(f);
     env->cp15.c9_insn = qemu_get_be32(f);
     env->cp15.c9_data = qemu_get_be32(f);
-    env->cp15.c9_pmcr_data = qemu_get_be32(f);
-    env->cp15.c9_useren = qemu_get_be32(f);
-    env->cp15.c9_inten = qemu_get_be32(f);
+    if (version_id == CPU_SAVE_VERSION_LEGACY) {
+        (void)qemu_get_be32(f);
+        (void)qemu_get_be32(f);
+        (void)qemu_get_be32(f);
+    } else {
+        env->cp15.c9_pmcr = qemu_get_be32(f);
+        env->cp15.c9_pmcnten = qemu_get_be32(f);
+        env->cp15.c9_pmovsr = qemu_get_be32(f);
+        env->cp15.c9_pmxevtyper = qemu_get_be32(f);
+        env->cp15.c9_pmuserenr = qemu_get_be32(f);
+        env->cp15.c9_pminten = qemu_get_be32(f);
+    }
     env->cp15.c13_fcse = qemu_get_be32(f);
     env->cp15.c13_context = qemu_get_be32(f);
     env->cp15.c13_tls1 = qemu_get_be32(f);
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 9165519..1d48e30 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -4,21 +4,19 @@
  * Copyright (c) 2007, 2008 CodeSourcery.
  * Written by Paul Brook
  *
- * This code is licenced under the GNU GPL v2.
+ * This code is licensed under the GNU GPL v2.
  */
 #include <stdlib.h>
 #include <stdio.h>
 
 #include "cpu.h"
-#include "exec.h"
+#include "exec/exec-all.h"
 #include "helper.h"
 
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
 
-#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q
-
-#define NFS (&env->vfp.standard_fp_status)
+#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q
 
 #define NEON_TYPE1(name, type) \
 typedef struct \
@@ -115,6 +113,10 @@
 uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \
 NEON_VOP_BODY(vtype, n)
 
+#define NEON_VOP_ENV(name, vtype, n) \
+uint32_t HELPER(glue(neon_,name))(CPUARMState *env, uint32_t arg1, uint32_t arg2) \
+NEON_VOP_BODY(vtype, n)
+
 /* Pairwise operations.  */
 /* For 32-bit elements each segment only contains a single element, so
    the elementwise and pairwise operations are the same.  */
@@ -163,14 +165,14 @@
         dest = tmp; \
     }} while(0)
 #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
-NEON_VOP(qadd_u8, neon_u8, 4)
+NEON_VOP_ENV(qadd_u8, neon_u8, 4)
 #undef NEON_FN
 #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
-NEON_VOP(qadd_u16, neon_u16, 2)
+NEON_VOP_ENV(qadd_u16, neon_u16, 2)
 #undef NEON_FN
 #undef NEON_USAT
 
-uint32_t HELPER(neon_qadd_u32)(uint32_t a, uint32_t b)
+uint32_t HELPER(neon_qadd_u32)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (res < a) {
@@ -180,7 +182,7 @@
     return res;
 }
 
-uint64_t HELPER(neon_qadd_u64)(uint64_t src1, uint64_t src2)
+uint64_t HELPER(neon_qadd_u64)(CPUARMState *env, uint64_t src1, uint64_t src2)
 {
     uint64_t res;
 
@@ -205,14 +207,14 @@
     dest = tmp; \
     } while(0)
 #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
-NEON_VOP(qadd_s8, neon_s8, 4)
+NEON_VOP_ENV(qadd_s8, neon_s8, 4)
 #undef NEON_FN
 #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
-NEON_VOP(qadd_s16, neon_s16, 2)
+NEON_VOP_ENV(qadd_s16, neon_s16, 2)
 #undef NEON_FN
 #undef NEON_SSAT
 
-uint32_t HELPER(neon_qadd_s32)(uint32_t a, uint32_t b)
+uint32_t HELPER(neon_qadd_s32)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
@@ -222,7 +224,7 @@
     return res;
 }
 
-uint64_t HELPER(neon_qadd_s64)(uint64_t src1, uint64_t src2)
+uint64_t HELPER(neon_qadd_s64)(CPUARMState *env, uint64_t src1, uint64_t src2)
 {
     uint64_t res;
 
@@ -243,14 +245,14 @@
         dest = tmp; \
     }} while(0)
 #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
-NEON_VOP(qsub_u8, neon_u8, 4)
+NEON_VOP_ENV(qsub_u8, neon_u8, 4)
 #undef NEON_FN
 #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
-NEON_VOP(qsub_u16, neon_u16, 2)
+NEON_VOP_ENV(qsub_u16, neon_u16, 2)
 #undef NEON_FN
 #undef NEON_USAT
 
-uint32_t HELPER(neon_qsub_u32)(uint32_t a, uint32_t b)
+uint32_t HELPER(neon_qsub_u32)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a - b;
     if (res > a) {
@@ -260,7 +262,7 @@
     return res;
 }
 
-uint64_t HELPER(neon_qsub_u64)(uint64_t src1, uint64_t src2)
+uint64_t HELPER(neon_qsub_u64)(CPUARMState *env, uint64_t src1, uint64_t src2)
 {
     uint64_t res;
 
@@ -286,14 +288,14 @@
     dest = tmp; \
     } while(0)
 #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
-NEON_VOP(qsub_s8, neon_s8, 4)
+NEON_VOP_ENV(qsub_s8, neon_s8, 4)
 #undef NEON_FN
 #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
-NEON_VOP(qsub_s16, neon_s16, 2)
+NEON_VOP_ENV(qsub_s16, neon_s16, 2)
 #undef NEON_FN
 #undef NEON_SSAT
 
-uint32_t HELPER(neon_qsub_s32)(uint32_t a, uint32_t b)
+uint32_t HELPER(neon_qsub_s32)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a - b;
     if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
@@ -303,7 +305,7 @@
     return res;
 }
 
-uint64_t HELPER(neon_qsub_s64)(uint64_t src1, uint64_t src2)
+uint64_t HELPER(neon_qsub_s64)(CPUARMState *env, uint64_t src1, uint64_t src2)
 {
     uint64_t res;
 
@@ -528,7 +530,7 @@
 #undef NEON_FN
 
 /* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bits accumulator.  */
+ * intermediate 64 bit accumulator.  */
 uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
 {
     int32_t dest;
@@ -545,8 +547,8 @@
     return dest;
 }
 
-/* Handling addition overflow with 64 bits inputs values is more
- * tricky than with 32 bits values.  */
+/* Handling addition overflow with 64 bit input values is more
+ * tricky than with 32 bit values.  */
 uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
@@ -588,7 +590,7 @@
 #undef NEON_FN
 
 /* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bits accumulator.  */
+ * intermediate 64 bit accumulator.  */
 uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop)
 {
     uint32_t dest;
@@ -606,8 +608,8 @@
     return dest;
 }
 
-/* Handling addition overflow with 64 bits inputs values is more
- * tricky than with 32 bits values.  */
+/* Handling addition overflow with 64 bit input values is more
+ * tricky than with 32 bit values.  */
 uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
@@ -654,12 +656,12 @@
             dest = ~0; \
         } \
     }} while (0)
-NEON_VOP(qshl_u8, neon_u8, 4)
-NEON_VOP(qshl_u16, neon_u16, 2)
-NEON_VOP(qshl_u32, neon_u32, 1)
+NEON_VOP_ENV(qshl_u8, neon_u8, 4)
+NEON_VOP_ENV(qshl_u16, neon_u16, 2)
+NEON_VOP_ENV(qshl_u32, neon_u32, 1)
 #undef NEON_FN
 
-uint64_t HELPER(neon_qshl_u64)(uint64_t val, uint64_t shiftop)
+uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
     if (shift >= 64) {
@@ -709,12 +711,12 @@
             } \
         } \
     }} while (0)
-NEON_VOP(qshl_s8, neon_s8, 4)
-NEON_VOP(qshl_s16, neon_s16, 2)
-NEON_VOP(qshl_s32, neon_s32, 1)
+NEON_VOP_ENV(qshl_s8, neon_s8, 4)
+NEON_VOP_ENV(qshl_s16, neon_s16, 2)
+NEON_VOP_ENV(qshl_s32, neon_s32, 1)
 #undef NEON_FN
 
-uint64_t HELPER(neon_qshl_s64)(uint64_t valop, uint64_t shiftop)
+uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
     int64_t val = valop;
@@ -764,29 +766,28 @@
             } \
         } \
     }} while (0)
-NEON_VOP(qshlu_s8, neon_u8, 4)
-NEON_VOP(qshlu_s16, neon_u16, 2)
+NEON_VOP_ENV(qshlu_s8, neon_u8, 4)
+NEON_VOP_ENV(qshlu_s16, neon_u16, 2)
 #undef NEON_FN
 
-uint32_t HELPER(neon_qshlu_s32)(uint32_t valop, uint32_t shiftop)
+uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop)
 {
     if ((int32_t)valop < 0) {
         SET_QC();
         return 0;
     }
-    return helper_neon_qshl_u32(valop, shiftop);
+    return helper_neon_qshl_u32(env, valop, shiftop);
 }
 
-uint64_t HELPER(neon_qshlu_s64)(uint64_t valop, uint64_t shiftop)
+uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
 {
     if ((int64_t)valop < 0) {
         SET_QC();
         return 0;
     }
-    return helper_neon_qshl_u64(valop, shiftop);
+    return helper_neon_qshl_u64(env, valop, shiftop);
 }
 
-/* FIXME: This is wrong.  */
 #define NEON_FN(dest, src1, src2) do { \
     int8_t tmp; \
     tmp = (int8_t)src2; \
@@ -810,13 +811,13 @@
             dest = ~0; \
         } \
     }} while (0)
-NEON_VOP(qrshl_u8, neon_u8, 4)
-NEON_VOP(qrshl_u16, neon_u16, 2)
+NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
+NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
 #undef NEON_FN
 
 /* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bits accumulator.  */
-uint32_t HELPER(neon_qrshl_u32)(uint32_t val, uint32_t shiftop)
+ * intermediate 64 bit accumulator.  */
+uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shiftop)
 {
     uint32_t dest;
     int8_t shift = (int8_t)shiftop;
@@ -844,9 +845,9 @@
     return dest;
 }
 
-/* Handling addition overflow with 64 bits inputs values is more
- * tricky than with 32 bits values.  */
-uint64_t HELPER(neon_qrshl_u64)(uint64_t val, uint64_t shiftop)
+/* Handling addition overflow with 64 bit input values is more
+ * tricky than with 32 bit values.  */
+uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
     if (shift >= 64) {
@@ -907,13 +908,13 @@
             } \
         } \
     }} while (0)
-NEON_VOP(qrshl_s8, neon_s8, 4)
-NEON_VOP(qrshl_s16, neon_s16, 2)
+NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
+NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
 #undef NEON_FN
 
 /* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bits accumulator.  */
-uint32_t HELPER(neon_qrshl_s32)(uint32_t valop, uint32_t shiftop)
+ * intermediate 64 bit accumulator.  */
+uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop)
 {
     int32_t dest;
     int32_t val = (int32_t)valop;
@@ -940,9 +941,9 @@
     return dest;
 }
 
-/* Handling addition overflow with 64 bits inputs values is more
- * tricky than with 32 bits values.  */
-uint64_t HELPER(neon_qrshl_s64)(uint64_t valop, uint64_t shiftop)
+/* Handling addition overflow with 64 bit input values is more
+ * tricky than with 32 bit values.  */
+uint64_t HELPER(neon_qrshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
     int64_t val = valop;
@@ -1151,10 +1152,10 @@
     dest = tmp >> 16; \
     } while(0)
 #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0)
-NEON_VOP(qdmulh_s16, neon_s16, 2)
+NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)
 #undef NEON_FN
 #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1)
-NEON_VOP(qrdmulh_s16, neon_s16, 2)
+NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)
 #undef NEON_FN
 #undef NEON_QDMULH16
 
@@ -1177,10 +1178,10 @@
     dest = tmp >> 32; \
     } while(0)
 #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0)
-NEON_VOP(qdmulh_s32, neon_s32, 1)
+NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)
 #undef NEON_FN
 #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1)
-NEON_VOP(qrdmulh_s32, neon_s32, 1)
+NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)
 #undef NEON_FN
 #undef NEON_QDMULH32
 
@@ -1221,7 +1222,7 @@
     return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
 }
 
-uint32_t HELPER(neon_unarrow_sat8)(uint64_t x)
+uint32_t HELPER(neon_unarrow_sat8)(CPUARMState *env, uint64_t x)
 {
     uint16_t s;
     uint8_t d;
@@ -1248,7 +1249,7 @@
     return res;
 }
 
-uint32_t HELPER(neon_narrow_sat_u8)(uint64_t x)
+uint32_t HELPER(neon_narrow_sat_u8)(CPUARMState *env, uint64_t x)
 {
     uint16_t s;
     uint8_t d;
@@ -1271,7 +1272,7 @@
     return res;
 }
 
-uint32_t HELPER(neon_narrow_sat_s8)(uint64_t x)
+uint32_t HELPER(neon_narrow_sat_s8)(CPUARMState *env, uint64_t x)
 {
     int16_t s;
     uint8_t d;
@@ -1294,7 +1295,7 @@
     return res;
 }
 
-uint32_t HELPER(neon_unarrow_sat16)(uint64_t x)
+uint32_t HELPER(neon_unarrow_sat16)(CPUARMState *env, uint64_t x)
 {
     uint32_t high;
     uint32_t low;
@@ -1317,7 +1318,7 @@
     return low | (high << 16);
 }
 
-uint32_t HELPER(neon_narrow_sat_u16)(uint64_t x)
+uint32_t HELPER(neon_narrow_sat_u16)(CPUARMState *env, uint64_t x)
 {
     uint32_t high;
     uint32_t low;
@@ -1334,7 +1335,7 @@
     return low | (high << 16);
 }
 
-uint32_t HELPER(neon_narrow_sat_s16)(uint64_t x)
+uint32_t HELPER(neon_narrow_sat_s16)(CPUARMState *env, uint64_t x)
 {
     int32_t low;
     int32_t high;
@@ -1351,7 +1352,7 @@
     return (uint16_t)low | (high << 16);
 }
 
-uint32_t HELPER(neon_unarrow_sat32)(uint64_t x)
+uint32_t HELPER(neon_unarrow_sat32)(CPUARMState *env, uint64_t x)
 {
     if (x & 0x8000000000000000ull) {
         SET_QC();
@@ -1364,7 +1365,7 @@
     return x;
 }
 
-uint32_t HELPER(neon_narrow_sat_u32)(uint64_t x)
+uint32_t HELPER(neon_narrow_sat_u32)(CPUARMState *env, uint64_t x)
 {
     if (x > 0xffffffffu) {
         SET_QC();
@@ -1373,7 +1374,7 @@
     return x;
 }
 
-uint32_t HELPER(neon_narrow_sat_s32)(uint64_t x)
+uint32_t HELPER(neon_narrow_sat_s32)(CPUARMState *env, uint64_t x)
 {
     if ((int64_t)x != (int32_t)x) {
         SET_QC();
@@ -1480,7 +1481,7 @@
     return (a - b) ^ mask;
 }
 
-uint64_t HELPER(neon_addl_saturate_s32)(uint64_t a, uint64_t b)
+uint64_t HELPER(neon_addl_saturate_s32)(CPUARMState *env, uint64_t a, uint64_t b)
 {
     uint32_t x, y;
     uint32_t low, high;
@@ -1502,7 +1503,7 @@
     return low | ((uint64_t)high << 32);
 }
 
-uint64_t HELPER(neon_addl_saturate_s64)(uint64_t a, uint64_t b)
+uint64_t HELPER(neon_addl_saturate_s64)(CPUARMState *env, uint64_t a, uint64_t b)
 {
     uint64_t result;
 
@@ -1669,7 +1670,7 @@
     return -x;
 }
 
-/* Saturnating sign manuipulation.  */
+/* Saturating sign manipulation.  */
 /* ??? Make these use NEON_VOP1 */
 #define DO_QABS8(x) do { \
     if (x == (int8_t)0x80) { \
@@ -1678,7 +1679,7 @@
     } else if (x < 0) { \
         x = -x; \
     }} while (0)
-uint32_t HELPER(neon_qabs_s8)(uint32_t x)
+uint32_t HELPER(neon_qabs_s8)(CPUARMState *env, uint32_t x)
 {
     neon_s8 vec;
     NEON_UNPACK(neon_s8, vec, x);
@@ -1698,7 +1699,7 @@
     } else { \
         x = -x; \
     }} while (0)
-uint32_t HELPER(neon_qneg_s8)(uint32_t x)
+uint32_t HELPER(neon_qneg_s8)(CPUARMState *env, uint32_t x)
 {
     neon_s8 vec;
     NEON_UNPACK(neon_s8, vec, x);
@@ -1718,7 +1719,7 @@
     } else if (x < 0) { \
         x = -x; \
     }} while (0)
-uint32_t HELPER(neon_qabs_s16)(uint32_t x)
+uint32_t HELPER(neon_qabs_s16)(CPUARMState *env, uint32_t x)
 {
     neon_s16 vec;
     NEON_UNPACK(neon_s16, vec, x);
@@ -1736,7 +1737,7 @@
     } else { \
         x = -x; \
     }} while (0)
-uint32_t HELPER(neon_qneg_s16)(uint32_t x)
+uint32_t HELPER(neon_qneg_s16)(CPUARMState *env, uint32_t x)
 {
     neon_s16 vec;
     NEON_UNPACK(neon_s16, vec, x);
@@ -1747,7 +1748,7 @@
 }
 #undef DO_QNEG16
 
-uint32_t HELPER(neon_qabs_s32)(uint32_t x)
+uint32_t HELPER(neon_qabs_s32)(CPUARMState *env, uint32_t x)
 {
     if (x == SIGNBIT) {
         SET_QC();
@@ -1758,7 +1759,7 @@
     return x;
 }
 
-uint32_t HELPER(neon_qneg_s32)(uint32_t x)
+uint32_t HELPER(neon_qneg_s32)(CPUARMState *env, uint32_t x)
 {
     if (x == SIGNBIT) {
         SET_QC();
@@ -1770,74 +1771,67 @@
 }
 
 /* NEON Float helpers.  */
-uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b)
+uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b, void *fpstp)
 {
-    return float32_val(float32_min(make_float32(a), make_float32(b), NFS));
+    float_status *fpst = fpstp;
+    return float32_val(float32_min(make_float32(a), make_float32(b), fpst));
 }
 
-uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b)
+uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b, void *fpstp)
 {
-    return float32_val(float32_max(make_float32(a), make_float32(b), NFS));
+    float_status *fpst = fpstp;
+    return float32_val(float32_max(make_float32(a), make_float32(b), fpst));
 }
 
-uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b)
+uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
 {
+    float_status *fpst = fpstp;
     float32 f0 = make_float32(a);
     float32 f1 = make_float32(b);
-    return float32_val(float32_abs(float32_sub(f0, f1, NFS)));
-}
-
-uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b)
-{
-    return float32_val(float32_add(make_float32(a), make_float32(b), NFS));
-}
-
-uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b)
-{
-    return float32_val(float32_sub(make_float32(a), make_float32(b), NFS));
-}
-
-uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b)
-{
-    return float32_val(float32_mul(make_float32(a), make_float32(b), NFS));
+    return float32_val(float32_abs(float32_sub(f0, f1, fpst)));
 }
 
 /* Floating point comparisons produce an integer result.
  * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
  * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires.
  */
-uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b)
+uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b, void *fpstp)
 {
-    return -float32_eq_quiet(make_float32(a), make_float32(b), NFS);
+    float_status *fpst = fpstp;
+    return -float32_eq_quiet(make_float32(a), make_float32(b), fpst);
 }
 
-uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b)
+uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b, void *fpstp)
 {
-    return -float32_le(make_float32(b), make_float32(a), NFS);
+    float_status *fpst = fpstp;
+    return -float32_le(make_float32(b), make_float32(a), fpst);
 }
 
-uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b)
+uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b, void *fpstp)
 {
-    return -float32_lt(make_float32(b), make_float32(a), NFS);
+    float_status *fpst = fpstp;
+    return -float32_lt(make_float32(b), make_float32(a), fpst);
 }
 
-uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b)
+uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b, void *fpstp)
 {
+    float_status *fpst = fpstp;
     float32 f0 = float32_abs(make_float32(a));
     float32 f1 = float32_abs(make_float32(b));
-    return -float32_le(f1, f0, NFS);
+    return -float32_le(f1, f0, fpst);
 }
 
-uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b)
+uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, void *fpstp)
 {
+    float_status *fpst = fpstp;
     float32 f0 = float32_abs(make_float32(a));
     float32 f1 = float32_abs(make_float32(b));
-    return -float32_lt(f1, f0, NFS);
+    return -float32_lt(f1, f0, fpst);
 }
 
 #define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1))
 
-void HELPER(neon_qunzip8)(uint32_t rd, uint32_t rm)
+void HELPER(neon_qunzip8)(CPUARMState *env, uint32_t rd, uint32_t rm)
 {
     uint64_t zm0 = float64_val(env->vfp.regs[rm]);
     uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
@@ -1865,7 +1859,7 @@
     env->vfp.regs[rd + 1] = make_float64(d1);
 }
 
-void HELPER(neon_qunzip16)(uint32_t rd, uint32_t rm)
+void HELPER(neon_qunzip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
 {
     uint64_t zm0 = float64_val(env->vfp.regs[rm]);
     uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
@@ -1885,7 +1879,7 @@
     env->vfp.regs[rd + 1] = make_float64(d1);
 }
 
-void HELPER(neon_qunzip32)(uint32_t rd, uint32_t rm)
+void HELPER(neon_qunzip32)(CPUARMState *env, uint32_t rd, uint32_t rm)
 {
     uint64_t zm0 = float64_val(env->vfp.regs[rm]);
     uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
@@ -1901,7 +1895,7 @@
     env->vfp.regs[rd + 1] = make_float64(d1);
 }
 
-void HELPER(neon_unzip8)(uint32_t rd, uint32_t rm)
+void HELPER(neon_unzip8)(CPUARMState *env, uint32_t rd, uint32_t rm)
 {
     uint64_t zm = float64_val(env->vfp.regs[rm]);
     uint64_t zd = float64_val(env->vfp.regs[rd]);
@@ -1917,7 +1911,7 @@
     env->vfp.regs[rd] = make_float64(d0);
 }
 
-void HELPER(neon_unzip16)(uint32_t rd, uint32_t rm)
+void HELPER(neon_unzip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
 {
     uint64_t zm = float64_val(env->vfp.regs[rm]);
     uint64_t zd = float64_val(env->vfp.regs[rd]);
@@ -1929,7 +1923,7 @@
     env->vfp.regs[rd] = make_float64(d0);
 }
 
-void HELPER(neon_qzip8)(uint32_t rd, uint32_t rm)
+void HELPER(neon_qzip8)(CPUARMState *env, uint32_t rd, uint32_t rm)
 {
     uint64_t zm0 = float64_val(env->vfp.regs[rm]);
     uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
@@ -1957,7 +1951,7 @@
     env->vfp.regs[rd + 1] = make_float64(d1);
 }
 
-void HELPER(neon_qzip16)(uint32_t rd, uint32_t rm)
+void HELPER(neon_qzip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
 {
     uint64_t zm0 = float64_val(env->vfp.regs[rm]);
     uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
@@ -1977,7 +1971,7 @@
     env->vfp.regs[rd + 1] = make_float64(d1);
 }
 
-void HELPER(neon_qzip32)(uint32_t rd, uint32_t rm)
+void HELPER(neon_qzip32)(CPUARMState *env, uint32_t rd, uint32_t rm)
 {
     uint64_t zm0 = float64_val(env->vfp.regs[rm]);
     uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
@@ -1993,7 +1987,7 @@
     env->vfp.regs[rd + 1] = make_float64(d1);
 }
 
-void HELPER(neon_zip8)(uint32_t rd, uint32_t rm)
+void HELPER(neon_zip8)(CPUARMState *env, uint32_t rd, uint32_t rm)
 {
     uint64_t zm = float64_val(env->vfp.regs[rm]);
     uint64_t zd = float64_val(env->vfp.regs[rd]);
@@ -2009,7 +2003,7 @@
     env->vfp.regs[rd] = make_float64(d0);
 }
 
-void HELPER(neon_zip16)(uint32_t rd, uint32_t rm)
+void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
 {
     uint64_t zm = float64_val(env->vfp.regs[rm]);
     uint64_t zd = float64_val(env->vfp.regs[rd]);
diff --git a/target-arm/op_addsub.h b/target-arm/op_addsub.h
index c02c92a..ca4a189 100644
--- a/target-arm/op_addsub.h
+++ b/target-arm/op_addsub.h
@@ -4,7 +4,7 @@
  * Copyright (c) 2007 CodeSourcery.
  * Written by Paul Brook
  *
- * This code is licenced under the GPL.
+ * This code is licensed under the GPL.
  */
 
 #ifdef ARITH_GE
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 168edd1..e3c4644 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -16,19 +16,22 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
-#include "exec.h"
+#include "cpu.h"
+#include "tcg.h"
 #include "helper.h"
 
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
 
-void raise_exception(int tt)
+#if !defined(CONFIG_USER_ONLY)
+static void raise_exception(CPUARMState *env, int tt)
 {
     env->exception_index = tt;
     cpu_loop_exit(env);
 }
+#endif
 
-uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def,
+uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def,
                           uint32_t rn, uint32_t maxindex)
 {
     uint32_t val;
@@ -50,8 +53,11 @@
     return val;
 }
 
+#undef env
+
 #if !defined(CONFIG_USER_ONLY)
 
+
 #define MMUSUFFIX _mmu
 
 #define SHIFT 0
@@ -69,33 +75,19 @@
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
-/* XXX: fix it to restore all registers */
-void tlb_fill (target_ulong addr, int is_write, int mmu_idx, void *retaddr)
+void tlb_fill(CPUARMState *env, target_ulong addr, int is_write, int mmu_idx,
+              uintptr_t retaddr)
 {
-    TranslationBlock *tb;
-    CPUARMState *saved_env;
-    unsigned long pc;
     int ret;
 
-    /* XXX: hack to restore env in all cases, even if not called from
-       generated code */
-    saved_env = env;
-    env = cpu_single_env;
-    ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx, 1);
+    ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            pc = (unsigned long)retaddr;
-            tb = tb_find_pc(pc);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, pc);
-            }
+            cpu_restore_state(env, retaddr);
         }
-        raise_exception(env->exception_index);
+        raise_exception(env, env->exception_index);
     }
-    env = saved_env;
 }
 
 void HELPER(set_cp)(CPUARMState *env, uint32_t insn, uint32_t val)
@@ -107,7 +99,7 @@
 
     if (env->cp[cp_num].cp_write)
         env->cp[cp_num].cp_write(env->cp[cp_num].opaque,
-                                 cp_info, src, operand, val, GETPC());
+                                 cp_info, src, operand, val, (void*)GETPC());
         }
 
 uint32_t HELPER(get_cp)(CPUARMState *env, uint32_t insn)
@@ -119,7 +111,7 @@
 
     if (env->cp[cp_num].cp_read)
         return env->cp[cp_num].cp_read(env->cp[cp_num].opaque,
-                                       cp_info, dest, operand, GETPC());
+                                       cp_info, dest, operand, (void*)GETPC());
         return 0;
 }
 
@@ -141,9 +133,7 @@
 
 #endif
 
-/* FIXME: Pass an axplicit pointer to QF to CPUARMState, and move saturating
-   instructions into helper.c  */
-uint32_t HELPER(add_setq)(uint32_t a, uint32_t b)
+uint32_t HELPER(add_setq)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT))
@@ -151,7 +141,7 @@
     return res;
 }
 
-uint32_t HELPER(add_saturate)(uint32_t a, uint32_t b)
+uint32_t HELPER(add_saturate)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
@@ -161,7 +151,7 @@
     return res;
 }
 
-uint32_t HELPER(sub_saturate)(uint32_t a, uint32_t b)
+uint32_t HELPER(sub_saturate)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a - b;
     if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
@@ -171,7 +161,7 @@
     return res;
 }
 
-uint32_t HELPER(double_saturate)(int32_t val)
+uint32_t HELPER(double_saturate)(CPUARMState *env, int32_t val)
 {
     uint32_t res;
     if (val >= 0x40000000) {
@@ -186,7 +176,7 @@
     return res;
 }
 
-uint32_t HELPER(add_usaturate)(uint32_t a, uint32_t b)
+uint32_t HELPER(add_usaturate)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (res < a) {
@@ -196,7 +186,7 @@
     return res;
 }
 
-uint32_t HELPER(sub_usaturate)(uint32_t a, uint32_t b)
+uint32_t HELPER(sub_usaturate)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a - b;
     if (res > a) {
@@ -207,7 +197,7 @@
 }
 
 /* Signed saturation.  */
-static inline uint32_t do_ssat(int32_t val, int shift)
+static inline uint32_t do_ssat(CPUARMState *env, int32_t val, int shift)
 {
     int32_t top;
     uint32_t mask;
@@ -225,7 +215,7 @@
 }
 
 /* Unsigned saturation.  */
-static inline uint32_t do_usat(int32_t val, int shift)
+static inline uint32_t do_usat(CPUARMState *env, int32_t val, int shift)
 {
     uint32_t max;
 
@@ -241,62 +231,62 @@
 }
 
 /* Signed saturate.  */
-uint32_t HELPER(ssat)(uint32_t x, uint32_t shift)
+uint32_t HELPER(ssat)(CPUARMState *env, uint32_t x, uint32_t shift)
 {
-    return do_ssat(x, shift);
+    return do_ssat(env, x, shift);
 }
 
 /* Dual halfword signed saturate.  */
-uint32_t HELPER(ssat16)(uint32_t x, uint32_t shift)
+uint32_t HELPER(ssat16)(CPUARMState *env, uint32_t x, uint32_t shift)
 {
     uint32_t res;
 
-    res = (uint16_t)do_ssat((int16_t)x, shift);
-    res |= do_ssat(((int32_t)x) >> 16, shift) << 16;
+    res = (uint16_t)do_ssat(env, (int16_t)x, shift);
+    res |= do_ssat(env, ((int32_t)x) >> 16, shift) << 16;
     return res;
 }
 
 /* Unsigned saturate.  */
-uint32_t HELPER(usat)(uint32_t x, uint32_t shift)
+uint32_t HELPER(usat)(CPUARMState *env, uint32_t x, uint32_t shift)
 {
-    return do_usat(x, shift);
+    return do_usat(env, x, shift);
 }
 
 /* Dual halfword unsigned saturate.  */
-uint32_t HELPER(usat16)(uint32_t x, uint32_t shift)
+uint32_t HELPER(usat16)(CPUARMState *env, uint32_t x, uint32_t shift)
 {
     uint32_t res;
 
-    res = (uint16_t)do_usat((int16_t)x, shift);
-    res |= do_usat(((int32_t)x) >> 16, shift) << 16;
+    res = (uint16_t)do_usat(env, (int16_t)x, shift);
+    res |= do_usat(env, ((int32_t)x) >> 16, shift) << 16;
     return res;
 }
 
-void HELPER(wfi)(void)
+void HELPER(wfi)(CPUARMState *env)
 {
     env->exception_index = EXCP_HLT;
-    env->halted = 1;
+    ENV_GET_CPU(env)->halted = 1;
     cpu_loop_exit(env);
 }
 
-void HELPER(exception)(uint32_t excp)
+void HELPER(exception)(CPUARMState *env, uint32_t excp)
 {
     env->exception_index = excp;
     cpu_loop_exit(env);
 }
 
-uint32_t HELPER(cpsr_read)(void)
+uint32_t HELPER(cpsr_read)(CPUARMState *env)
 {
     return cpsr_read(env) & ~CPSR_EXEC;
 }
 
-void HELPER(cpsr_write)(uint32_t val, uint32_t mask)
+void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask)
 {
     cpsr_write(env, val, mask);
 }
 
 /* Access to user mode registers from privileged modes.  */
-uint32_t HELPER(get_user_reg)(uint32_t regno)
+uint32_t HELPER(get_user_reg)(CPUARMState *env, uint32_t regno)
 {
     uint32_t val;
 
@@ -313,7 +303,7 @@
     return val;
 }
 
-void HELPER(set_user_reg)(uint32_t regno, uint32_t val)
+void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val)
 {
     if (regno == 13) {
         env->banked_r13[0] = val;
@@ -331,7 +321,7 @@
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
 
-uint32_t HELPER (add_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER (add_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     result = a + b;
@@ -341,7 +331,7 @@
     return result;
 }
 
-uint32_t HELPER(adc_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(adc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     if (!env->CF) {
@@ -356,7 +346,7 @@
     return result;
 }
 
-uint32_t HELPER(sub_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(sub_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     result = a - b;
@@ -366,7 +356,7 @@
     return result;
 }
 
-uint32_t HELPER(sbc_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(sbc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     if (!env->CF) {
@@ -407,7 +397,7 @@
     return (int32_t)x >> shift;
 }
 
-uint32_t HELPER(shl_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(shl_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -423,7 +413,7 @@
     return x;
 }
 
-uint32_t HELPER(shr_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(shr_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -439,7 +429,7 @@
     return x;
 }
 
-uint32_t HELPER(sar_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(sar_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -452,7 +442,7 @@
     return x;
 }
 
-uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift1, shift;
     shift1 = i & 0xff;
@@ -467,7 +457,7 @@
     }
 }
 
-void HELPER(neon_vldst_all)(uint32_t insn)
+void HELPER(neon_vldst_all)(CPUARMState *env, uint32_t insn)
 {
 #if defined(CONFIG_USER_ONLY)
 #define LDB(addr) ldub(addr)
@@ -480,14 +470,14 @@
 #define STQ(addr, val) stq(addr, val)
 #else
     int user = cpu_mmu_index(env);
-#define LDB(addr) slow_ldb_mmu(addr, user, GETPC())
-#define LDW(addr) slow_ldw_mmu(addr, user, GETPC())
-#define LDL(addr) slow_ldl_mmu(addr, user, GETPC())
-#define LDQ(addr) slow_ldq_mmu(addr, user, GETPC())
-#define STB(addr, val) slow_stb_mmu(addr, val, user, GETPC())
-#define STW(addr, val) slow_stw_mmu(addr, val, user, GETPC())
-#define STL(addr, val) slow_stl_mmu(addr, val, user, GETPC())
-#define STQ(addr, val) slow_stq_mmu(addr, val, user, GETPC())
+#define LDB(addr) helper_ldb_mmu(env, addr, user)
+#define LDW(addr) helper_le_lduw_mmu(env, addr, user, GETPC())
+#define LDL(addr) helper_le_ldul_mmu(env, addr, user, GETPC())
+#define LDQ(addr) helper_le_ldq_mmu(env, addr, user, GETPC())
+#define STB(addr, val) helper_stb_mmu(env, addr, val, user)
+#define STW(addr, val) helper_le_stw_mmu(env, addr, val, user, GETPC())
+#define STL(addr, val) helper_le_stl_mmu(env, addr, val, user, GETPC())
+#define STQ(addr, val) helper_le_stq_mmu(env, addr, val, user, GETPC())
 #endif
     static const struct {
         int nregs;
diff --git a/target-arm/qom-cpu.h b/target-arm/qom-cpu.h
new file mode 100644
index 0000000..db5718b
--- /dev/null
+++ b/target-arm/qom-cpu.h
@@ -0,0 +1,21 @@
+#ifndef QEMU_ARM_CPU_QOM_H
+#define QEMU_ARM_CPU_QOM_H
+
+#include "qemu/osdep.h"
+#include "qom/cpu.h"
+
+typedef struct ARMCPU {
+    CPUState parent_obj;
+
+    CPUARMState env;
+};
+
+static inline ARMCPU *arm_env_get_cpu(CPUARMState *env)
+{
+    return container_of(env, ARMCPU, env);
+}
+
+#define ENV_GET_CPU(e)  CPU(arm_env_get_cpu(e))
+#define ENV_OFFSET offsetof(ARMCPU, env)
+
+#endif  // QEMU_ARM_CPU_QOM_H
diff --git a/target-arm/translate-android.h b/target-arm/translate-android.h
index 61e934d..d5f4749 100644
--- a/target-arm/translate-android.h
+++ b/target-arm/translate-android.h
@@ -8,7 +8,7 @@
  *****
  *****/
 
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
 
 /*
  * Memchecker addition in this module is intended to inject qemu callback into
@@ -25,8 +25,8 @@
  * guaranteed.
  */
 
-#include "memcheck/memcheck_proc_management.h"
-#include "memcheck/memcheck_api.h"
+#include "android/qemu/memcheck/memcheck_proc_management.h"
+#include "android/qemu/memcheck/memcheck_api.h"
 
 /* Array of return addresses detected in gen_intermediate_code_internal. */
 AddrArray   ret_addresses = { 0 };
@@ -93,7 +93,7 @@
  *  boolean: 1 if THUMB instruction is BL/BLX, or 0 if it's not.
  */
 static inline int
-is_thumb_bl_or_blx(uint16_t insn, target_ulong pc, target_ulong* ret_off)
+is_thumb_bl_or_blx(CPUARMState* env, uint16_t insn, target_ulong pc, target_ulong* ret_off)
 {
     /* THUMB BLX(register):      0100 0111 1xxx xxxx
      * THUMB BL(1-stimmediate):  1111 0xxx xxxx xxxx
@@ -104,7 +104,7 @@
         return 1;
     } else if ((insn & 0xF800) == 0xF000) {     // THUMB BL(X)(imm)
         // This is a 32-bit THUMB. Get the second half of the instuction.
-        insn = lduw_code(pc + 2);
+        insn = cpu_lduw_code(env, pc + 2);
         if ((insn & 0xC000) == 0xC000) {
             *ret_off = 4;
             return 1;
@@ -135,9 +135,9 @@
     if ((0x90000000 <= addr && addr <= 0xBFFFFFFF)) {
         /* Address belongs to a module that always loads at this fixed address.
          * So, we can keep this address in the global array. */
-        ret = addrarray_add(&ret_addresses, get_phys_addr_code(env, addr));
+        ret = addrarray_add(&ret_addresses, get_page_addr_code(env, addr));
     } else {
-        ret = addrarray_add(&ret_addresses, get_phys_addr_code(env, addr));
+        ret = addrarray_add(&ret_addresses, get_page_addr_code(env, addr));
     }
     assert(ret != 0);
 
@@ -153,10 +153,10 @@
          * code contains it. This inconsistency will lead to an immanent
          * segmentation fault.*/
         TranslationBlock* tb;
-        const target_ulong phys_pc = get_phys_addr_code(env, addr);
+        const target_ulong phys_pc = get_page_addr_code(env, addr);
         const target_ulong phys_page1 = phys_pc & TARGET_PAGE_MASK;
 
-        for(tb = tb_phys_hash[tb_phys_hash_func(phys_pc)]; tb != NULL;
+        for(tb = tcg_ctx.tb_ctx.tb_phys_hash[tb_phys_hash_func(phys_pc)]; tb != NULL;
             tb = tb->phys_hash_next) {
             if (tb->pc == addr && tb->page_addr[0] == phys_page1) {
                 tb_phys_invalidate(tb, -1);
@@ -175,9 +175,9 @@
 is_ret_address(CPUARMState* env, target_ulong addr)
 {
     if ((0x90000000 <= addr && addr <= 0xBFFFFFFF)) {
-        return addrarray_check(&ret_addresses, get_phys_addr_code(env, addr));
+        return addrarray_check(&ret_addresses, get_page_addr_code(env, addr));
     } else {
-        return addrarray_check(&ret_addresses, get_phys_addr_code(env, addr));
+        return addrarray_check(&ret_addresses, get_page_addr_code(env, addr));
     }
 }
 
@@ -225,7 +225,7 @@
         if (is_ret_address(env, s->pc)) { \
             set_on_ret(s->pc); \
         } \
-        if (is_thumb_bl_or_blx(insn, s->pc, &ret_off)) { \
+        if (is_thumb_bl_or_blx(env, insn, s->pc, &ret_off)) { \
             set_on_call(s->pc, s->pc + ret_off); \
             if (!s->search_pc) { \
                 register_ret_address(env, s->pc + ret_off); \
@@ -248,14 +248,14 @@
 #  define ANDROID_END_CODEGEN() \
     do { \
         if (memcheck_enabled && dc->user) { \
-            j = gen_opc_ptr - gen_opc_buf; \
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; \
             lj++; \
             while (lj <= j) \
-                gen_opc_instr_start[lj++] = 0; \
+                tcg_ctx.gen_opc_instr_start[lj++] = 0; \
         } \
     } while (0)
 
-#else /* !CONFIG_MEMCHECK */
+#else /* !CONFIG_ANDROID_MEMCHECK */
 
 #  define ANDROID_WATCH_CALLSTACK_ARM     ((void)0)
 #  define ANDROID_WATCH_CALLSTACK_THUMB   ((void)0)
@@ -264,4 +264,4 @@
 #  define ANDROID_CHECK_CODEGEN_PC(s)      (s)
 #  define ANDROID_END_CODEGEN()            ((void)0)
 
-#endif  /* !CONFIG_MEMCHECK */
+#endif  /* !CONFIG_ANDROID_MEMCHECK */
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 83d0e8f..20e2e89 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -66,13 +66,12 @@
     int vfp_enabled;
     int vec_len;
     int vec_stride;
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
     int search_pc;
 #endif
 } DisasContext;
 
 #include "translate-android.h"
-
 static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE];
 
 #if defined(CONFIG_USER_ONLY)
@@ -133,9 +132,6 @@
     cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
         offsetof(CPUARMState, exclusive_info), "exclusive_info");
 #endif
-
-#define GEN_HELPER 2
-#include "helper.h"
 }
 
 static inline TCGv load_cpu_offset(int offset)
@@ -205,7 +201,7 @@
 static inline void gen_set_cpsr(TCGv var, uint32_t mask)
 {
     TCGv tmp_mask = tcg_const_i32(mask);
-    gen_helper_cpsr_write(var, tmp_mask);
+    gen_helper_cpsr_write(cpu_env, var, tmp_mask);
     tcg_temp_free_i32(tmp_mask);
 }
 /* Set NZCV flags from the high 4 bits of var.  */
@@ -215,7 +211,7 @@
 {
     TCGv tmp = tcg_temp_new_i32();
     tcg_gen_movi_i32(tmp, excp);
-    gen_helper_exception(tmp);
+    gen_helper_exception(cpu_env, tmp);
     tcg_temp_free_i32(tmp);
 }
 
@@ -496,10 +492,10 @@
 {
     if (flags) {
         switch (shiftop) {
-        case 0: gen_helper_shl_cc(var, var, shift); break;
-        case 1: gen_helper_shr_cc(var, var, shift); break;
-        case 2: gen_helper_sar_cc(var, var, shift); break;
-        case 3: gen_helper_ror_cc(var, var, shift); break;
+        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
+        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
+        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
+        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
         }
     } else {
         switch (shiftop) {
@@ -905,13 +901,29 @@
     }
 }
 
+static TCGv_ptr get_fpstatus_ptr(int neon)
+{
+    TCGv_ptr statusptr = tcg_temp_new_ptr();
+    int offset;
+    if (neon) {
+        offset = offsetof(CPUARMState, vfp.standard_fp_status);
+    } else {
+        offset = offsetof(CPUARMState, vfp.fp_status);
+    }
+    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
+    return statusptr;
+}
+
 #define VFP_OP2(name)                                                 \
 static inline void gen_vfp_##name(int dp)                             \
 {                                                                     \
-    if (dp)                                                           \
-        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, cpu_env); \
-    else                                                              \
-        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, cpu_env); \
+    TCGv_ptr fpst = get_fpstatus_ptr(0);                              \
+    if (dp) {                                                         \
+        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst);    \
+    } else {                                                          \
+        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst);    \
+    }                                                                 \
+    tcg_temp_free_ptr(fpst);                                          \
 }
 
 VFP_OP2(add)
@@ -924,10 +936,11 @@
 static inline void gen_vfp_F1_mul(int dp)
 {
     /* Like gen_vfp_mul() but put result in F1 */
+    TCGv_ptr fpst = get_fpstatus_ptr(0);
     if (dp) {
-        gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, cpu_env);
+        gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
     } else {
-        gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, cpu_env);
+        gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
     }
 }
 
@@ -992,14 +1005,7 @@
 #define VFP_GEN_ITOF(name) \
 static inline void gen_vfp_##name(int dp, int neon) \
 { \
-    TCGv_ptr statusptr = tcg_temp_new_ptr(); \
-    int offset; \
-    if (neon) { \
-        offset = offsetof(CPUARMState, vfp.standard_fp_status); \
-    } else { \
-        offset = offsetof(CPUARMState, vfp.fp_status); \
-    } \
-    tcg_gen_addi_ptr(statusptr, cpu_env, offset); \
+    TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
     if (dp) { \
         gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
     } else { \
@@ -1015,14 +1021,7 @@
 #define VFP_GEN_FTOI(name) \
 static inline void gen_vfp_##name(int dp, int neon) \
 { \
-    TCGv_ptr statusptr = tcg_temp_new_ptr(); \
-    int offset; \
-    if (neon) { \
-        offset = offsetof(CPUARMState, vfp.standard_fp_status); \
-    } else { \
-        offset = offsetof(CPUARMState, vfp.fp_status); \
-    } \
-    tcg_gen_addi_ptr(statusptr, cpu_env, offset); \
+    TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
     if (dp) { \
         gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
     } else { \
@@ -1041,14 +1040,7 @@
 static inline void gen_vfp_##name(int dp, int shift, int neon) \
 { \
     TCGv tmp_shift = tcg_const_i32(shift); \
-    TCGv_ptr statusptr = tcg_temp_new_ptr(); \
-    int offset; \
-    if (neon) { \
-        offset = offsetof(CPUARMState, vfp.standard_fp_status); \
-    } else { \
-        offset = offsetof(CPUARMState, vfp.fp_status); \
-    } \
-    tcg_gen_addi_ptr(statusptr, cpu_env, offset); \
+    TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
     if (dp) { \
         gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, statusptr); \
     } else { \
@@ -1219,15 +1211,22 @@
     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
 }
 
-#define IWMMXT_OP_SIZE(name) \
-IWMMXT_OP(name##b) \
-IWMMXT_OP(name##w) \
-IWMMXT_OP(name##l)
+#define IWMMXT_OP_ENV(name) \
+static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
+{ \
+    iwmmxt_load_reg(cpu_V1, rn); \
+    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
+}
 
-#define IWMMXT_OP_1(name) \
+#define IWMMXT_OP_ENV_SIZE(name) \
+IWMMXT_OP_ENV(name##b) \
+IWMMXT_OP_ENV(name##w) \
+IWMMXT_OP_ENV(name##l)
+
+#define IWMMXT_OP_ENV1(name) \
 static inline void gen_op_iwmmxt_##name##_M0(void) \
 { \
-    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0); \
+    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
 }
 
 IWMMXT_OP(maddsq)
@@ -1241,51 +1240,51 @@
 IWMMXT_OP(macsw)
 IWMMXT_OP(macuw)
 
-IWMMXT_OP_SIZE(unpackl)
-IWMMXT_OP_SIZE(unpackh)
+IWMMXT_OP_ENV_SIZE(unpackl)
+IWMMXT_OP_ENV_SIZE(unpackh)
 
-IWMMXT_OP_1(unpacklub)
-IWMMXT_OP_1(unpackluw)
-IWMMXT_OP_1(unpacklul)
-IWMMXT_OP_1(unpackhub)
-IWMMXT_OP_1(unpackhuw)
-IWMMXT_OP_1(unpackhul)
-IWMMXT_OP_1(unpacklsb)
-IWMMXT_OP_1(unpacklsw)
-IWMMXT_OP_1(unpacklsl)
-IWMMXT_OP_1(unpackhsb)
-IWMMXT_OP_1(unpackhsw)
-IWMMXT_OP_1(unpackhsl)
+IWMMXT_OP_ENV1(unpacklub)
+IWMMXT_OP_ENV1(unpackluw)
+IWMMXT_OP_ENV1(unpacklul)
+IWMMXT_OP_ENV1(unpackhub)
+IWMMXT_OP_ENV1(unpackhuw)
+IWMMXT_OP_ENV1(unpackhul)
+IWMMXT_OP_ENV1(unpacklsb)
+IWMMXT_OP_ENV1(unpacklsw)
+IWMMXT_OP_ENV1(unpacklsl)
+IWMMXT_OP_ENV1(unpackhsb)
+IWMMXT_OP_ENV1(unpackhsw)
+IWMMXT_OP_ENV1(unpackhsl)
 
-IWMMXT_OP_SIZE(cmpeq)
-IWMMXT_OP_SIZE(cmpgtu)
-IWMMXT_OP_SIZE(cmpgts)
+IWMMXT_OP_ENV_SIZE(cmpeq)
+IWMMXT_OP_ENV_SIZE(cmpgtu)
+IWMMXT_OP_ENV_SIZE(cmpgts)
 
-IWMMXT_OP_SIZE(mins)
-IWMMXT_OP_SIZE(minu)
-IWMMXT_OP_SIZE(maxs)
-IWMMXT_OP_SIZE(maxu)
+IWMMXT_OP_ENV_SIZE(mins)
+IWMMXT_OP_ENV_SIZE(minu)
+IWMMXT_OP_ENV_SIZE(maxs)
+IWMMXT_OP_ENV_SIZE(maxu)
 
-IWMMXT_OP_SIZE(subn)
-IWMMXT_OP_SIZE(addn)
-IWMMXT_OP_SIZE(subu)
-IWMMXT_OP_SIZE(addu)
-IWMMXT_OP_SIZE(subs)
-IWMMXT_OP_SIZE(adds)
+IWMMXT_OP_ENV_SIZE(subn)
+IWMMXT_OP_ENV_SIZE(addn)
+IWMMXT_OP_ENV_SIZE(subu)
+IWMMXT_OP_ENV_SIZE(addu)
+IWMMXT_OP_ENV_SIZE(subs)
+IWMMXT_OP_ENV_SIZE(adds)
 
-IWMMXT_OP(avgb0)
-IWMMXT_OP(avgb1)
-IWMMXT_OP(avgw0)
-IWMMXT_OP(avgw1)
+IWMMXT_OP_ENV(avgb0)
+IWMMXT_OP_ENV(avgb1)
+IWMMXT_OP_ENV(avgw0)
+IWMMXT_OP_ENV(avgw1)
 
 IWMMXT_OP(msadb)
 
-IWMMXT_OP(packuw)
-IWMMXT_OP(packul)
-IWMMXT_OP(packuq)
-IWMMXT_OP(packsw)
-IWMMXT_OP(packsl)
-IWMMXT_OP(packsq)
+IWMMXT_OP_ENV(packuw)
+IWMMXT_OP_ENV(packul)
+IWMMXT_OP_ENV(packuq)
+IWMMXT_OP_ENV(packsw)
+IWMMXT_OP_ENV(packsl)
+IWMMXT_OP_ENV(packsq)
 
 static void gen_op_iwmmxt_set_mup(void)
 {
@@ -2019,13 +2018,13 @@
         }
         switch ((insn >> 22) & 3) {
         case 1:
-            gen_helper_iwmmxt_srlw(cpu_M0, cpu_M0, tmp);
+            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         case 2:
-            gen_helper_iwmmxt_srll(cpu_M0, cpu_M0, tmp);
+            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         case 3:
-            gen_helper_iwmmxt_srlq(cpu_M0, cpu_M0, tmp);
+            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         }
         tcg_temp_free_i32(tmp);
@@ -2047,13 +2046,13 @@
         }
         switch ((insn >> 22) & 3) {
         case 1:
-            gen_helper_iwmmxt_sraw(cpu_M0, cpu_M0, tmp);
+            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         case 2:
-            gen_helper_iwmmxt_sral(cpu_M0, cpu_M0, tmp);
+            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         case 3:
-            gen_helper_iwmmxt_sraq(cpu_M0, cpu_M0, tmp);
+            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         }
         tcg_temp_free_i32(tmp);
@@ -2075,13 +2074,13 @@
         }
         switch ((insn >> 22) & 3) {
         case 1:
-            gen_helper_iwmmxt_sllw(cpu_M0, cpu_M0, tmp);
+            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         case 2:
-            gen_helper_iwmmxt_slll(cpu_M0, cpu_M0, tmp);
+            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         case 3:
-            gen_helper_iwmmxt_sllq(cpu_M0, cpu_M0, tmp);
+            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         }
         tcg_temp_free_i32(tmp);
@@ -2103,21 +2102,21 @@
                 tcg_temp_free_i32(tmp);
                 return 1;
             }
-            gen_helper_iwmmxt_rorw(cpu_M0, cpu_M0, tmp);
+            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         case 2:
             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
                 tcg_temp_free_i32(tmp);
                 return 1;
             }
-            gen_helper_iwmmxt_rorl(cpu_M0, cpu_M0, tmp);
+            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         case 3:
             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
                 tcg_temp_free_i32(tmp);
                 return 1;
             }
-            gen_helper_iwmmxt_rorq(cpu_M0, cpu_M0, tmp);
+            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         }
         tcg_temp_free_i32(tmp);
@@ -2251,7 +2250,7 @@
         rd0 = (insn >> 16) & 0xf;
         gen_op_iwmmxt_movq_M0_wRn(rd0);
         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
-        gen_helper_iwmmxt_shufh(cpu_M0, cpu_M0, tmp);
+        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
         tcg_temp_free(tmp);
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
@@ -2477,23 +2476,33 @@
     return 0;
 }
 
-static int cp15_user_ok(uint32_t insn)
+static int cp15_user_ok(CPUARMState *env, uint32_t insn)
 {
     int cpn = (insn >> 16) & 0xf;
     int cpm = insn & 0xf;
     int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
 
+    if (arm_feature(env, ARM_FEATURE_V7) && cpn == 9) {
+        /* Performance monitor registers fall into three categories:
+         *  (a) always UNDEF in usermode
+         *  (b) UNDEF only if PMUSERENR.EN is 0
+         *  (c) always read OK and UNDEF on write (PMUSERENR only)
+         */
+        if ((cpm == 12 && (op < 6)) ||
+            (cpm == 13 && (op < 3))) {
+            return env->cp15.c9_pmuserenr;
+        } else if (cpm == 14 && op == 0 && (insn & ARM_CP_RW_BIT)) {
+            /* PMUSERENR, read only */
+            return 1;
+        }
+        return 0;
+    }
+
     if (cpn == 13 && cpm == 0) {
         /* TLS register.  */
         if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT)))
             return 1;
     }
-    if (cpn == 7) {
-        /* ISB, DSB, DMB.  */
-        if ((cpm == 5 && op == 4)
-                || (cpm == 10 && (op == 4 || op == 5)))
-            return 1;
-    }
     return 0;
 }
 
@@ -2569,39 +2578,60 @@
         /* cdp */
         return 1;
     }
-    if (IS_USER(s) && !cp15_user_ok(insn)) {
-        return 1;
-    }
-
-    /* Pre-v7 versions of the architecture implemented WFI via coprocessor
-     * instructions rather than a separate instruction.
+    /* We special case a number of cp15 instructions which were used
+     * for things which are real instructions in ARMv7. This allows
+     * them to work in linux-user mode which doesn't provide functional
+     * get_cp15/set_cp15 helpers, and is more efficient anyway.
      */
-    if ((insn & 0x0fff0fff) == 0x0e070f90) {
+    switch ((insn & 0x0fff0fff)) {
+    case 0x0e070f90:
         /* 0,c7,c0,4: Standard v6 WFI (also used in some pre-v6 cores).
          * In v7, this must NOP.
          */
+        if (IS_USER(s)) {
+            return 1;
+        }
         if (!arm_feature(env, ARM_FEATURE_V7)) {
             /* Wait for interrupt.  */
             gen_set_pc_im(s->pc);
             s->is_jmp = DISAS_WFI;
         }
         return 0;
-    }
-
-    if ((insn & 0x0fff0fff) == 0x0e070f58) {
+    case 0x0e070f58:
         /* 0,c7,c8,2: Not all pre-v6 cores implemented this WFI,
          * so this is slightly over-broad.
          */
-        if (!arm_feature(env, ARM_FEATURE_V6)) {
+        if (!IS_USER(s) && !arm_feature(env, ARM_FEATURE_V6)) {
             /* Wait for interrupt.  */
             gen_set_pc_im(s->pc);
             s->is_jmp = DISAS_WFI;
             return 0;
         }
-        /* Otherwise fall through to handle via helper function.
+        /* Otherwise continue to handle via helper function.
          * In particular, on v7 and some v6 cores this is one of
          * the VA-PA registers.
          */
+        break;
+    case 0x0e070f3d:
+        /* 0,c7,c13,1: prefetch-by-MVA in v6, NOP in v7 */
+        if (arm_feature(env, ARM_FEATURE_V6)) {
+            return IS_USER(s) ? 1 : 0;
+        }
+        break;
+    case 0x0e070f95: /* 0,c7,c5,4 : ISB */
+    case 0x0e070f9a: /* 0,c7,c10,4: DSB */
+    case 0x0e070fba: /* 0,c7,c10,5: DMB */
+        /* Barriers in both v6 and v7 */
+        if (arm_feature(env, ARM_FEATURE_V6)) {
+            return 0;
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (IS_USER(s) && !cp15_user_ok(env, insn)) {
+        return 1;
     }
 
     rd = (insn >> 12) & 0xf;
@@ -3046,6 +3076,17 @@
                     /* Source and destination the same.  */
                     gen_mov_F0_vreg(dp, rd);
                     break;
+                case 4:
+                case 5:
+                case 6:
+                case 7:
+                    /* VCVTB, VCVTT: only present with the halfprec extension,
+                     * UNPREDICTABLE if bit 8 is set (we choose to UNDEF)
+                     */
+                    if (dp || !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
+                        return 1;
+                    }
+                    /* Otherwise fall through */
                 default:
                     /* One source operand.  */
                     gen_mov_F0_vreg(dp, rm);
@@ -3142,24 +3183,18 @@
                         gen_vfp_sqrt(dp);
                         break;
                     case 4: /* vcvtb.f32.f16 */
-                        if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
-                          return 1;
                         tmp = gen_vfp_mrs();
                         tcg_gen_ext16u_i32(tmp, tmp);
                         gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
                         tcg_temp_free_i32(tmp);
                         break;
                     case 5: /* vcvtt.f32.f16 */
-                        if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
-                          return 1;
                         tmp = gen_vfp_mrs();
                         tcg_gen_shri_i32(tmp, tmp, 16);
                         gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
                         tcg_temp_free_i32(tmp);
                         break;
                     case 6: /* vcvtb.f16.f32 */
-                        if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
-                          return 1;
                         tmp = tcg_temp_new_i32();
                         gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
                         gen_mov_F0_vreg(0, rd);
@@ -3170,8 +3205,6 @@
                         gen_vfp_msr(tmp);
                         break;
                     case 7: /* vcvtt.f16.f32 */
-                        if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
-                          return 1;
                         tmp = tcg_temp_new_i32();
                         gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
                         tcg_gen_shli_i32(tmp, tmp, 16);
@@ -3372,17 +3405,18 @@
                 VFP_DREG_D(rd, insn);
             else
                 rd = VFP_SREG_D(insn);
-            if (s->thumb && rn == 15) {
-                addr = tcg_temp_new_i32();
-                tcg_gen_movi_i32(addr, s->pc & ~2);
-            } else {
-                addr = load_reg(s, rn);
-            }
             if ((insn & 0x01200000) == 0x01000000) {
                 /* Single load/store */
                 offset = (insn & 0xff) << 2;
                 if ((insn & (1 << 23)) == 0)
                     offset = -offset;
+                if (s->thumb && rn == 15) {
+                    /* This is actually UNPREDICTABLE */
+                    addr = tcg_temp_new_i32();
+                    tcg_gen_movi_i32(addr, s->pc & ~2);
+                } else {
+                    addr = load_reg(s, rn);
+                }
                 tcg_gen_addi_i32(addr, addr, offset);
                 if (insn & (1 << 20)) {
                     gen_vfp_ld(s, dp, addr);
@@ -3394,11 +3428,34 @@
                 tcg_temp_free_i32(addr);
             } else {
                 /* load/store multiple */
+                int w = insn & (1 << 21);
                 if (dp)
                     n = (insn >> 1) & 0x7f;
                 else
                     n = insn & 0xff;
 
+                if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
+                    /* P == U , W == 1  => UNDEF */
+                    return 1;
+                }
+                if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
+                    /* UNPREDICTABLE cases for bad immediates: we choose to
+                     * UNDEF to avoid generating huge numbers of TCG ops
+                     */
+                    return 1;
+                }
+                if (rn == 15 && w) {
+                    /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
+                    return 1;
+                }
+
+                if (s->thumb && rn == 15) {
+                    /* This is actually UNPREDICTABLE */
+                    addr = tcg_temp_new_i32();
+                    tcg_gen_movi_i32(addr, s->pc & ~2);
+                } else {
+                    addr = load_reg(s, rn);
+                }
                 if (insn & (1 << 24)) /* pre-decrement */
                     tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
 
@@ -3420,7 +3477,7 @@
                     tcg_gen_add_i32(addr, addr, tmp);
                 }
                 tcg_temp_free_i32(tmp);
-                if (insn & (1 << 21)) {
+                if (w) {
                     /* writeback */
                     if (insn & (1 << 24))
                         offset = -offset * n;
@@ -3632,6 +3689,29 @@
 #define gen_helper_neon_pmin_s32  gen_helper_neon_min_s32
 #define gen_helper_neon_pmin_u32  gen_helper_neon_min_u32
 
+#define GEN_NEON_INTEGER_OP_ENV(name) do { \
+    switch ((size << 1) | u) { \
+    case 0: \
+        gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
+        break; \
+    case 1: \
+        gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
+        break; \
+    case 2: \
+        gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
+        break; \
+    case 3: \
+        gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
+        break; \
+    case 4: \
+        gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
+        break; \
+    case 5: \
+        gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
+        break; \
+    default: return 1; \
+    }} while (0)
+
 #define GEN_NEON_INTEGER_OP(name) do { \
     switch ((size << 1) | u) { \
     case 0: \
@@ -3695,13 +3775,13 @@
     if (q) {
         switch (size) {
         case 0:
-            gen_helper_neon_qunzip8(tmp, tmp2);
+            gen_helper_neon_qunzip8(cpu_env, tmp, tmp2);
             break;
         case 1:
-            gen_helper_neon_qunzip16(tmp, tmp2);
+            gen_helper_neon_qunzip16(cpu_env, tmp, tmp2);
             break;
         case 2:
-            gen_helper_neon_qunzip32(tmp, tmp2);
+            gen_helper_neon_qunzip32(cpu_env, tmp, tmp2);
             break;
         default:
             abort();
@@ -3709,10 +3789,10 @@
     } else {
         switch (size) {
         case 0:
-            gen_helper_neon_unzip8(tmp, tmp2);
+            gen_helper_neon_unzip8(cpu_env, tmp, tmp2);
             break;
         case 1:
-            gen_helper_neon_unzip16(tmp, tmp2);
+            gen_helper_neon_unzip16(cpu_env, tmp, tmp2);
             break;
         default:
             abort();
@@ -3734,13 +3814,13 @@
     if (q) {
         switch (size) {
         case 0:
-            gen_helper_neon_qzip8(tmp, tmp2);
+            gen_helper_neon_qzip8(cpu_env, tmp, tmp2);
             break;
         case 1:
-            gen_helper_neon_qzip16(tmp, tmp2);
+            gen_helper_neon_qzip16(cpu_env, tmp, tmp2);
             break;
         case 2:
-            gen_helper_neon_qzip32(tmp, tmp2);
+            gen_helper_neon_qzip32(cpu_env, tmp, tmp2);
             break;
         default:
             abort();
@@ -3748,10 +3828,10 @@
     } else {
         switch (size) {
         case 0:
-            gen_helper_neon_zip8(tmp, tmp2);
+            gen_helper_neon_zip8(cpu_env, tmp, tmp2);
             break;
         case 1:
-            gen_helper_neon_zip16(tmp, tmp2);
+            gen_helper_neon_zip16(cpu_env, tmp, tmp2);
             break;
         default:
             abort();
@@ -3875,7 +3955,7 @@
             return 1;
         }
         addr = tcg_const_i32(insn);
-        gen_helper_neon_vldst_all(addr);
+        gen_helper_neon_vldst_all(cpu_env, addr);
         tcg_temp_free_i32(addr);
         stride = nregs * 8;
     } else {
@@ -4066,9 +4146,9 @@
 static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src)
 {
     switch (size) {
-    case 0: gen_helper_neon_narrow_sat_s8(dest, src); break;
-    case 1: gen_helper_neon_narrow_sat_s16(dest, src); break;
-    case 2: gen_helper_neon_narrow_sat_s32(dest, src); break;
+    case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
+    case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
+    case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
     default: abort();
     }
 }
@@ -4076,9 +4156,9 @@
 static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src)
 {
     switch (size) {
-    case 0: gen_helper_neon_narrow_sat_u8(dest, src); break;
-    case 1: gen_helper_neon_narrow_sat_u16(dest, src); break;
-    case 2: gen_helper_neon_narrow_sat_u32(dest, src); break;
+    case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
+    case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
+    case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
     default: abort();
     }
 }
@@ -4086,9 +4166,9 @@
 static inline void gen_neon_unarrow_sats(int size, TCGv dest, TCGv_i64 src)
 {
     switch (size) {
-    case 0: gen_helper_neon_unarrow_sat8(dest, src); break;
-    case 1: gen_helper_neon_unarrow_sat16(dest, src); break;
-    case 2: gen_helper_neon_unarrow_sat32(dest, src); break;
+    case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
+    case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
+    case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
     default: abort();
     }
 }
@@ -4180,8 +4260,8 @@
 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
 {
     switch (size) {
-    case 1: gen_helper_neon_addl_saturate_s32(op0, op0, op1); break;
-    case 2: gen_helper_neon_addl_saturate_s64(op0, op0, op1); break;
+    case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
+    case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
     default: abort();
     }
 }
@@ -4457,16 +4537,20 @@
                 switch (op) {
                 case NEON_3R_VQADD:
                     if (u) {
-                        gen_helper_neon_qadd_u64(cpu_V0, cpu_V0, cpu_V1);
+                        gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
+                                                 cpu_V0, cpu_V1);
                     } else {
-                        gen_helper_neon_qadd_s64(cpu_V0, cpu_V0, cpu_V1);
+                        gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
+                                                 cpu_V0, cpu_V1);
                     }
                     break;
                 case NEON_3R_VQSUB:
                     if (u) {
-                        gen_helper_neon_qsub_u64(cpu_V0, cpu_V0, cpu_V1);
+                        gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
+                                                 cpu_V0, cpu_V1);
                     } else {
-                        gen_helper_neon_qsub_s64(cpu_V0, cpu_V0, cpu_V1);
+                        gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
+                                                 cpu_V0, cpu_V1);
                     }
                     break;
                 case NEON_3R_VSHL:
@@ -4478,9 +4562,11 @@
                     break;
                 case NEON_3R_VQSHL:
                     if (u) {
-                        gen_helper_neon_qshl_u64(cpu_V0, cpu_V1, cpu_V0);
+                        gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
+                                                 cpu_V1, cpu_V0);
                     } else {
-                        gen_helper_neon_qshl_s64(cpu_V0, cpu_V1, cpu_V0);
+                        gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
+                                                 cpu_V1, cpu_V0);
                     }
                     break;
                 case NEON_3R_VRSHL:
@@ -4492,9 +4578,11 @@
                     break;
                 case NEON_3R_VQRSHL:
                     if (u) {
-                        gen_helper_neon_qrshl_u64(cpu_V0, cpu_V1, cpu_V0);
+                        gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
+                                                  cpu_V1, cpu_V0);
                     } else {
-                        gen_helper_neon_qrshl_s64(cpu_V0, cpu_V1, cpu_V0);
+                        gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
+                                                  cpu_V1, cpu_V0);
                     }
                     break;
                 case NEON_3R_VADD_VSUB:
@@ -4592,7 +4680,7 @@
             GEN_NEON_INTEGER_OP(hadd);
             break;
         case NEON_3R_VQADD:
-            GEN_NEON_INTEGER_OP(qadd);
+            GEN_NEON_INTEGER_OP_ENV(qadd);
             break;
         case NEON_3R_VRHADD:
             GEN_NEON_INTEGER_OP(rhadd);
@@ -4635,7 +4723,7 @@
             GEN_NEON_INTEGER_OP(hsub);
             break;
         case NEON_3R_VQSUB:
-            GEN_NEON_INTEGER_OP(qsub);
+            GEN_NEON_INTEGER_OP_ENV(qsub);
             break;
         case NEON_3R_VCGT:
             GEN_NEON_INTEGER_OP(cgt);
@@ -4647,13 +4735,13 @@
             GEN_NEON_INTEGER_OP(shl);
             break;
         case NEON_3R_VQSHL:
-            GEN_NEON_INTEGER_OP(qshl);
+            GEN_NEON_INTEGER_OP_ENV(qshl);
             break;
         case NEON_3R_VRSHL:
             GEN_NEON_INTEGER_OP(rshl);
             break;
         case NEON_3R_VQRSHL:
-            GEN_NEON_INTEGER_OP(qrshl);
+            GEN_NEON_INTEGER_OP_ENV(qrshl);
             break;
         case NEON_3R_VMAX:
             GEN_NEON_INTEGER_OP(max);
@@ -4735,14 +4823,22 @@
         case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high.  */
             if (!u) { /* VQDMULH */
                 switch (size) {
-                case 1: gen_helper_neon_qdmulh_s16(tmp, tmp, tmp2); break;
-                case 2: gen_helper_neon_qdmulh_s32(tmp, tmp, tmp2); break;
+                case 1:
+                    gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
+                    break;
+                case 2:
+                    gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
+                    break;
                 default: abort();
                 }
             } else { /* VQRDMULH */
                 switch (size) {
-                case 1: gen_helper_neon_qrdmulh_s16(tmp, tmp, tmp2); break;
-                case 2: gen_helper_neon_qrdmulh_s32(tmp, tmp, tmp2); break;
+                case 1:
+                    gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
+                    break;
+                case 2:
+                    gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
+                    break;
                 default: abort();
                 }
             }
@@ -4756,57 +4852,78 @@
             }
             break;
         case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
+        {
+            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
             switch ((u << 2) | size) {
             case 0: /* VADD */
-                gen_helper_neon_add_f32(tmp, tmp, tmp2);
+            case 4: /* VPADD */
+                gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
                 break;
             case 2: /* VSUB */
-                gen_helper_neon_sub_f32(tmp, tmp, tmp2);
-                break;
-            case 4: /* VPADD */
-                gen_helper_neon_add_f32(tmp, tmp, tmp2);
+                gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
                 break;
             case 6: /* VABD */
-                gen_helper_neon_abd_f32(tmp, tmp, tmp2);
+                gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
                 break;
             default:
                 abort();
             }
+            tcg_temp_free_ptr(fpstatus);
             break;
+        }
         case NEON_3R_FLOAT_MULTIPLY:
-            gen_helper_neon_mul_f32(tmp, tmp, tmp2);
+        {
+            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+            gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
             if (!u) {
                 tcg_temp_free_i32(tmp2);
                 tmp2 = neon_load_reg(rd, pass);
                 if (size == 0) {
-                    gen_helper_neon_add_f32(tmp, tmp, tmp2);
+                    gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
                 } else {
-                    gen_helper_neon_sub_f32(tmp, tmp2, tmp);
+                    gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
                 }
             }
+            tcg_temp_free_ptr(fpstatus);
             break;
+        }
         case NEON_3R_FLOAT_CMP:
+        {
+            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
             if (!u) {
-                gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
+                gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
             } else {
-                if (size == 0)
-                    gen_helper_neon_cge_f32(tmp, tmp, tmp2);
-                else
-                    gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
+                if (size == 0) {
+                    gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
+                } else {
+                    gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
+                }
             }
+            tcg_temp_free_ptr(fpstatus);
             break;
+        }
         case NEON_3R_FLOAT_ACMP:
-            if (size == 0)
-                gen_helper_neon_acge_f32(tmp, tmp, tmp2);
-            else
-                gen_helper_neon_acgt_f32(tmp, tmp, tmp2);
+        {
+            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+            if (size == 0) {
+                gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
+            } else {
+                gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
+            }
+            tcg_temp_free_ptr(fpstatus);
             break;
+        }
         case NEON_3R_FLOAT_MINMAX:
-            if (size == 0)
-                gen_helper_neon_max_f32(tmp, tmp, tmp2);
-            else
-                gen_helper_neon_min_f32(tmp, tmp, tmp2);
+        {
+            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+            if (size == 0) {
+                gen_helper_neon_max_f32(tmp, tmp, tmp2, fpstatus);
+            } else {
+                gen_helper_neon_min_f32(tmp, tmp, tmp2, fpstatus);
+            }
+            tcg_temp_free_ptr(fpstatus);
             break;
+        }
         case NEON_3R_VRECPS_VRSQRTS:
             if (size == 0)
                 gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
@@ -4913,14 +5030,15 @@
                             gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
                             break;
                         case 6: /* VQSHLU */
-                            gen_helper_neon_qshlu_s64(cpu_V0, cpu_V0, cpu_V1);
+                            gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
+                                                      cpu_V0, cpu_V1);
                             break;
                         case 7: /* VQSHL */
                             if (u) {
-                                gen_helper_neon_qshl_u64(cpu_V0,
+                                gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
                                                          cpu_V0, cpu_V1);
                             } else {
-                                gen_helper_neon_qshl_s64(cpu_V0,
+                                gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
                                                          cpu_V0, cpu_V1);
                             }
                             break;
@@ -4971,20 +5089,23 @@
                         case 6: /* VQSHLU */
                             switch (size) {
                             case 0:
-                                gen_helper_neon_qshlu_s8(tmp, tmp, tmp2);
+                                gen_helper_neon_qshlu_s8(tmp, cpu_env,
+                                                         tmp, tmp2);
                                 break;
                             case 1:
-                                gen_helper_neon_qshlu_s16(tmp, tmp, tmp2);
+                                gen_helper_neon_qshlu_s16(tmp, cpu_env,
+                                                          tmp, tmp2);
                                 break;
                             case 2:
-                                gen_helper_neon_qshlu_s32(tmp, tmp, tmp2);
+                                gen_helper_neon_qshlu_s32(tmp, cpu_env,
+                                                          tmp, tmp2);
                                 break;
                             default:
                                 abort();
                             }
                             break;
                         case 7: /* VQSHL */
-                            GEN_NEON_INTEGER_OP(qshl);
+                            GEN_NEON_INTEGER_OP_ENV(qshl);
                             break;
                         }
                         tcg_temp_free_i32(tmp2);
@@ -5493,18 +5614,20 @@
                         tmp2 = neon_load_reg(rn, pass);
                         if (op == 12) {
                             if (size == 1) {
-                                gen_helper_neon_qdmulh_s16(tmp, tmp, tmp2);
+                                gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
                             } else {
-                                gen_helper_neon_qdmulh_s32(tmp, tmp, tmp2);
+                                gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
                             }
                         } else if (op == 13) {
                             if (size == 1) {
-                                gen_helper_neon_qrdmulh_s16(tmp, tmp, tmp2);
+                                gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
                             } else {
-                                gen_helper_neon_qrdmulh_s32(tmp, tmp, tmp2);
+                                gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
                             }
                         } else if (op & 1) {
-                            gen_helper_neon_mul_f32(tmp, tmp, tmp2);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
+                            tcg_temp_free_ptr(fpstatus);
                         } else {
                             switch (size) {
                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
@@ -5522,14 +5645,22 @@
                                 gen_neon_add(size, tmp, tmp2);
                                 break;
                             case 1:
-                                gen_helper_neon_add_f32(tmp, tmp, tmp2);
+                            {
+                                TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                                gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
+                                tcg_temp_free_ptr(fpstatus);
                                 break;
+                            }
                             case 4:
                                 gen_neon_rsb(size, tmp, tmp2);
                                 break;
                             case 5:
-                                gen_helper_neon_sub_f32(tmp, tmp2, tmp);
+                            {
+                                TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                                gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
+                                tcg_temp_free_ptr(fpstatus);
                                 break;
+                            }
                             default:
                                 abort();
                             }
@@ -5863,17 +5994,29 @@
                             break;
                         case NEON_2RM_VQABS:
                             switch (size) {
-                            case 0: gen_helper_neon_qabs_s8(tmp, tmp); break;
-                            case 1: gen_helper_neon_qabs_s16(tmp, tmp); break;
-                            case 2: gen_helper_neon_qabs_s32(tmp, tmp); break;
+                            case 0:
+                                gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
+                                break;
+                            case 1:
+                                gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
+                                break;
+                            case 2:
+                                gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
+                                break;
                             default: abort();
                             }
                             break;
                         case NEON_2RM_VQNEG:
                             switch (size) {
-                            case 0: gen_helper_neon_qneg_s8(tmp, tmp); break;
-                            case 1: gen_helper_neon_qneg_s16(tmp, tmp); break;
-                            case 2: gen_helper_neon_qneg_s32(tmp, tmp); break;
+                            case 0:
+                                gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
+                                break;
+                            case 1:
+                                gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
+                                break;
+                            case 2:
+                                gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
+                                break;
                             default: abort();
                             }
                             break;
@@ -5927,30 +6070,49 @@
                             tcg_temp_free(tmp2);
                             break;
                         case NEON_2RM_VCGT0_F:
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
                             tmp2 = tcg_const_i32(0);
-                            gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
+                            gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
                             tcg_temp_free(tmp2);
                             break;
+                        }
                         case NEON_2RM_VCGE0_F:
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
                             tmp2 = tcg_const_i32(0);
-                            gen_helper_neon_cge_f32(tmp, tmp, tmp2);
+                            gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
                             tcg_temp_free(tmp2);
+                            tcg_temp_free_ptr(fpstatus);
                             break;
+                        }
                         case NEON_2RM_VCEQ0_F:
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
                             tmp2 = tcg_const_i32(0);
-                            gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
+                            gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
                             tcg_temp_free(tmp2);
+                            tcg_temp_free_ptr(fpstatus);
                             break;
+                        }
                         case NEON_2RM_VCLE0_F:
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
                             tmp2 = tcg_const_i32(0);
-                            gen_helper_neon_cge_f32(tmp, tmp2, tmp);
+                            gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
                             tcg_temp_free(tmp2);
+                            tcg_temp_free_ptr(fpstatus);
                             break;
+                        }
                         case NEON_2RM_VCLT0_F:
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
                             tmp2 = tcg_const_i32(0);
-                            gen_helper_neon_cgt_f32(tmp, tmp2, tmp);
+                            gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
                             tcg_temp_free(tmp2);
+                            tcg_temp_free_ptr(fpstatus);
                             break;
+                        }
                         case NEON_2RM_VABS_F:
                             gen_vfp_abs(0);
                             break;
@@ -6028,7 +6190,7 @@
                 tmp2 = neon_load_reg(rm, 0);
                 tmp4 = tcg_const_i32(rn);
                 tmp5 = tcg_const_i32(n);
-                gen_helper_neon_tbl(tmp2, tmp2, tmp, tmp4, tmp5);
+                gen_helper_neon_tbl(tmp2, cpu_env, tmp2, tmp, tmp4, tmp5);
                 tcg_temp_free_i32(tmp);
                 if (insn & (1 << 6)) {
                     tmp = neon_load_reg(rd, 1);
@@ -6037,7 +6199,7 @@
                     tcg_gen_movi_i32(tmp, 0);
                 }
                 tmp3 = neon_load_reg(rm, 1);
-                gen_helper_neon_tbl(tmp3, tmp3, tmp, tmp4, tmp5);
+                gen_helper_neon_tbl(tmp3, cpu_env, tmp3, tmp, tmp4, tmp5);
                 tcg_temp_free_i32(tmp5);
                 tcg_temp_free_i32(tmp4);
                 neon_store_reg(rd, 0, tmp2);
@@ -6421,7 +6583,7 @@
     TCGv addr;
     TCGv_i64 tmp64;
 
-    insn = ldl_code(s->pc);
+    insn = cpu_ldl_code(env, s->pc);
 
     ANDROID_WATCH_CALLSTACK_ARM(s);
 
@@ -6711,7 +6873,7 @@
                     tmp = load_cpu_field(spsr);
                 } else {
                     tmp = tcg_temp_new_i32();
-                    gen_helper_cpsr_read(tmp);
+                    gen_helper_cpsr_read(tmp, cpu_env);
                 }
                 store_reg(s, rd, tmp);
             }
@@ -6762,11 +6924,11 @@
             tmp = load_reg(s, rm);
             tmp2 = load_reg(s, rn);
             if (op1 & 2)
-                gen_helper_double_saturate(tmp2, tmp2);
+                gen_helper_double_saturate(tmp2, cpu_env, tmp2);
             if (op1 & 1)
-                gen_helper_sub_saturate(tmp, tmp, tmp2);
+                gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2);
             else
-                gen_helper_add_saturate(tmp, tmp, tmp2);
+                gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
             tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
             break;
@@ -6808,7 +6970,7 @@
                 tcg_temp_free_i64(tmp64);
                 if ((sh & 2) == 0) {
                     tmp2 = load_reg(s, rn);
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                     tcg_temp_free_i32(tmp2);
                 }
                 store_reg(s, rd, tmp);
@@ -6828,7 +6990,7 @@
                 } else {
                     if (op1 == 0) {
                         tmp2 = load_reg(s, rn);
-                        gen_helper_add_setq(tmp, tmp, tmp2);
+                        gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                         tcg_temp_free_i32(tmp2);
                     }
                     store_reg(s, rd, tmp);
@@ -6902,11 +7064,11 @@
                 if (IS_USER(s)) {
                     goto illegal_op;
                 }
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
                 gen_exception_return(s, tmp);
             } else {
                 if (set_cc) {
-                    gen_helper_sub_cc(tmp, tmp, tmp2);
+                    gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
                 } else {
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 }
@@ -6915,7 +7077,7 @@
             break;
         case 0x03:
             if (set_cc) {
-                gen_helper_sub_cc(tmp, tmp2, tmp);
+                gen_helper_sub_cc(tmp, cpu_env, tmp2, tmp);
             } else {
                 tcg_gen_sub_i32(tmp, tmp2, tmp);
             }
@@ -6923,7 +7085,7 @@
             break;
         case 0x04:
             if (set_cc) {
-                gen_helper_add_cc(tmp, tmp, tmp2);
+                gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
             } else {
                 tcg_gen_add_i32(tmp, tmp, tmp2);
             }
@@ -6931,7 +7093,7 @@
             break;
         case 0x05:
             if (set_cc) {
-                gen_helper_adc_cc(tmp, tmp, tmp2);
+                gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
             } else {
                 gen_add_carry(tmp, tmp, tmp2);
             }
@@ -6939,7 +7101,7 @@
             break;
         case 0x06:
             if (set_cc) {
-                gen_helper_sbc_cc(tmp, tmp, tmp2);
+                gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
             } else {
                 gen_sub_carry(tmp, tmp, tmp2);
             }
@@ -6947,7 +7109,7 @@
             break;
         case 0x07:
             if (set_cc) {
-                gen_helper_sbc_cc(tmp, tmp2, tmp);
+                gen_helper_sbc_cc(tmp, cpu_env, tmp2, tmp);
             } else {
                 gen_sub_carry(tmp, tmp2, tmp);
             }
@@ -6969,13 +7131,13 @@
             break;
         case 0x0a:
             if (set_cc) {
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp);
             break;
         case 0x0b:
             if (set_cc) {
-                gen_helper_add_cc(tmp, tmp, tmp2);
+                gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp);
             break;
@@ -7292,9 +7454,9 @@
                         sh = (insn >> 16) & 0x1f;
                         tmp2 = tcg_const_i32(sh);
                         if (insn & (1 << 22))
-                          gen_helper_usat(tmp, tmp, tmp2);
+                          gen_helper_usat(tmp, cpu_env, tmp, tmp2);
                         else
-                          gen_helper_ssat(tmp, tmp, tmp2);
+                          gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
                         tcg_temp_free_i32(tmp2);
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x00300fe0) == 0x00200f20) {
@@ -7303,9 +7465,9 @@
                         sh = (insn >> 16) & 0x1f;
                         tmp2 = tcg_const_i32(sh);
                         if (insn & (1 << 22))
-                          gen_helper_usat16(tmp, tmp, tmp2);
+                          gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
                         else
-                          gen_helper_ssat16(tmp, tmp, tmp2);
+                          gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
                         tcg_temp_free_i32(tmp2);
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x00700fe0) == 0x00000fa0) {
@@ -7402,7 +7564,7 @@
                              * however it may overflow considered as a signed
                              * operation, in which case we must set the Q flag.
                              */
-                            gen_helper_add_setq(tmp, tmp, tmp2);
+                            gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                         }
                         tcg_temp_free_i32(tmp2);
                         if (insn & (1 << 22)) {
@@ -7418,7 +7580,7 @@
                             if (rd != 15)
                               {
                                 tmp2 = load_reg(s, rd);
-                                gen_helper_add_setq(tmp, tmp, tmp2);
+                                gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                                 tcg_temp_free_i32(tmp2);
                               }
                             store_reg(s, rn, tmp);
@@ -7582,7 +7744,7 @@
                             tmp = gen_ld32(addr, IS_USER(s));
                             if (user) {
                                 tmp2 = tcg_const_i32(i);
-                                gen_helper_set_user_reg(tmp2, tmp);
+                                gen_helper_set_user_reg(cpu_env, tmp2, tmp);
                                 tcg_temp_free_i32(tmp2);
                                 tcg_temp_free_i32(tmp);
                             } else if (i == rn) {
@@ -7601,7 +7763,7 @@
                             } else if (user) {
                                 tmp = tcg_temp_new_i32();
                                 tmp2 = tcg_const_i32(i);
-                                gen_helper_get_user_reg(tmp, tmp2);
+                                gen_helper_get_user_reg(tmp, cpu_env, tmp2);
                                 tcg_temp_free_i32(tmp2);
                             } else {
                                 tmp = load_reg(s, i);
@@ -7729,31 +7891,31 @@
         break;
     case 8: /* add */
         if (conds)
-            gen_helper_add_cc(t0, t0, t1);
+            gen_helper_add_cc(t0, cpu_env, t0, t1);
         else
             tcg_gen_add_i32(t0, t0, t1);
         break;
     case 10: /* adc */
         if (conds)
-            gen_helper_adc_cc(t0, t0, t1);
+            gen_helper_adc_cc(t0, cpu_env, t0, t1);
         else
             gen_adc(t0, t1);
         break;
     case 11: /* sbc */
         if (conds)
-            gen_helper_sbc_cc(t0, t0, t1);
+            gen_helper_sbc_cc(t0, cpu_env, t0, t1);
         else
             gen_sub_carry(t0, t0, t1);
         break;
     case 13: /* sub */
         if (conds)
-            gen_helper_sub_cc(t0, t0, t1);
+            gen_helper_sub_cc(t0, cpu_env, t0, t1);
         else
             tcg_gen_sub_i32(t0, t0, t1);
         break;
     case 14: /* rsb */
         if (conds)
-            gen_helper_sub_cc(t0, t1, t0);
+            gen_helper_sub_cc(t0, cpu_env, t1, t0);
         else
             tcg_gen_sub_i32(t0, t1, t0);
         break;
@@ -7826,7 +7988,7 @@
         /* Fall through to 32-bit decode.  */
     }
 
-    insn = lduw_code(s->pc);
+    insn = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     insn |= (uint32_t)insn_hw1 << 16;
 
@@ -7975,7 +8137,7 @@
                     gen_st32(tmp, addr, 0);
                     tcg_gen_addi_i32(addr, addr, 4);
                     tmp = tcg_temp_new_i32();
-                    gen_helper_cpsr_read(tmp);
+                    gen_helper_cpsr_read(tmp, cpu_env);
                     gen_st32(tmp, addr, 0);
                     if (insn & (1 << 21)) {
                         if ((insn & (1 << 24)) == 0) {
@@ -8159,11 +8321,11 @@
                 tmp = load_reg(s, rn);
                 tmp2 = load_reg(s, rm);
                 if (op & 1)
-                    gen_helper_double_saturate(tmp, tmp);
+                    gen_helper_double_saturate(tmp, cpu_env, tmp);
                 if (op & 2)
-                    gen_helper_sub_saturate(tmp, tmp2, tmp);
+                    gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp);
                 else
-                    gen_helper_add_saturate(tmp, tmp, tmp2);
+                    gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
             } else {
                 tmp = load_reg(s, rn);
@@ -8219,7 +8381,7 @@
                 tcg_temp_free_i32(tmp2);
                 if (rs != 15) {
                     tmp2 = load_reg(s, rs);
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                     tcg_temp_free_i32(tmp2);
                 }
                 break;
@@ -8236,13 +8398,13 @@
                      * however it may overflow considered as a signed
                      * operation, in which case we must set the Q flag.
                      */
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                 }
                 tcg_temp_free_i32(tmp2);
                 if (rs != 15)
                   {
                     tmp2 = load_reg(s, rs);
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                     tcg_temp_free_i32(tmp2);
                   }
                 break;
@@ -8259,7 +8421,7 @@
                 if (rs != 15)
                   {
                     tmp2 = load_reg(s, rs);
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                     tcg_temp_free_i32(tmp2);
                   }
                 break;
@@ -8500,7 +8662,7 @@
                             gen_helper_v7m_mrs(tmp, cpu_env, addr);
                             tcg_temp_free_i32(addr);
                         } else {
-                            gen_helper_cpsr_read(tmp);
+                            gen_helper_cpsr_read(tmp, cpu_env);
                         }
                         store_reg(s, rd, tmp);
                         break;
@@ -8589,15 +8751,15 @@
                         if (op & 4) {
                             /* Unsigned.  */
                             if ((op & 1) && shift == 0)
-                                gen_helper_usat16(tmp, tmp, tmp2);
+                                gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
                             else
-                                gen_helper_usat(tmp, tmp, tmp2);
+                                gen_helper_usat(tmp, cpu_env, tmp, tmp2);
                         } else {
                             /* Signed.  */
                             if ((op & 1) && shift == 0)
-                                gen_helper_ssat16(tmp, tmp, tmp2);
+                                gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
                             else
-                                gen_helper_ssat(tmp, tmp, tmp2);
+                                gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
                         }
                         tcg_temp_free_i32(tmp2);
                         break;
@@ -8858,7 +9020,7 @@
         }
     }
 
-    insn = lduw_code(s->pc);
+    insn = cpu_lduw_code(env, s->pc);
 
     ANDROID_WATCH_CALLSTACK_THUMB(s);
 
@@ -8886,12 +9048,12 @@
                 if (s->condexec_mask)
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_sub_cc(tmp, tmp, tmp2);
+                    gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
             } else {
                 if (s->condexec_mask)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_add_cc(tmp, tmp, tmp2);
+                    gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
@@ -8922,7 +9084,7 @@
             tcg_gen_movi_i32(tmp2, insn & 0xff);
             switch (op) {
             case 1: /* cmp */
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
                 tcg_temp_free_i32(tmp);
                 tcg_temp_free_i32(tmp2);
                 break;
@@ -8930,7 +9092,7 @@
                 if (s->condexec_mask)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_add_cc(tmp, tmp, tmp2);
+                    gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 store_reg(s, rd, tmp);
                 break;
@@ -8938,7 +9100,7 @@
                 if (s->condexec_mask)
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_sub_cc(tmp, tmp, tmp2);
+                    gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 store_reg(s, rd, tmp);
                 break;
@@ -8974,7 +9136,7 @@
             case 1: /* cmp */
                 tmp = load_reg(s, rd);
                 tmp2 = load_reg(s, rm);
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 tcg_temp_free_i32(tmp);
                 break;
@@ -9037,7 +9199,7 @@
             if (s->condexec_mask) {
                 gen_helper_shl(tmp2, tmp2, tmp);
             } else {
-                gen_helper_shl_cc(tmp2, tmp2, tmp);
+                gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
             }
             break;
@@ -9045,7 +9207,7 @@
             if (s->condexec_mask) {
                 gen_helper_shr(tmp2, tmp2, tmp);
             } else {
-                gen_helper_shr_cc(tmp2, tmp2, tmp);
+                gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
             }
             break;
@@ -9053,7 +9215,7 @@
             if (s->condexec_mask) {
                 gen_helper_sar(tmp2, tmp2, tmp);
             } else {
-                gen_helper_sar_cc(tmp2, tmp2, tmp);
+                gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
             }
             break;
@@ -9061,20 +9223,20 @@
             if (s->condexec_mask)
                 gen_adc(tmp, tmp2);
             else
-                gen_helper_adc_cc(tmp, tmp, tmp2);
+                gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
             break;
         case 0x6: /* sbc */
             if (s->condexec_mask)
                 gen_sub_carry(tmp, tmp, tmp2);
             else
-                gen_helper_sbc_cc(tmp, tmp, tmp2);
+                gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
             break;
         case 0x7: /* ror */
             if (s->condexec_mask) {
                 tcg_gen_andi_i32(tmp, tmp, 0x1f);
                 tcg_gen_rotr_i32(tmp2, tmp2, tmp);
             } else {
-                gen_helper_ror_cc(tmp2, tmp2, tmp);
+                gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
             }
             break;
@@ -9087,14 +9249,14 @@
             if (s->condexec_mask)
                 tcg_gen_neg_i32(tmp, tmp2);
             else
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
             break;
         case 0xa: /* cmp */
-            gen_helper_sub_cc(tmp, tmp, tmp2);
+            gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
             rd = 16;
             break;
         case 0xb: /* cmn */
-            gen_helper_add_cc(tmp, tmp, tmp2);
+            gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
             rd = 16;
             break;
         case 0xc: /* orr */
@@ -9548,11 +9710,11 @@
 
     dc->tb = tb;
 
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     dc->is_jmp = DISAS_NEXT;
     dc->pc = pc_start;
-    dc->singlestep_enabled = env->singlestep_enabled;
+    dc->singlestep_enabled = ENV_GET_CPU(env)->singlestep_enabled;
     dc->condjmp = 0;
     dc->thumb = ARM_TBFLAG_THUMB(tb->flags);
     dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
@@ -9656,16 +9818,16 @@
         }
 
         if (ANDROID_CHECK_CODEGEN_PC(search_pc)) {
-            j = gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j)
-                    gen_opc_instr_start[lj++] = 0;
+                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
             }
-            gen_opc_pc[lj] = dc->pc;
+            tcg_ctx.gen_opc_pc[lj] = dc->pc;
             gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
-            gen_opc_instr_start[lj] = 1;
-            gen_opc_icount[lj] = num_insns;
+            tcg_ctx.gen_opc_instr_start[lj] = 1;
+            tcg_ctx.gen_opc_icount[lj] = num_insns;
         }
 
         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
@@ -9703,8 +9865,8 @@
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.  */
         num_insns ++;
-    } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end &&
-             !env->singlestep_enabled &&
+    } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
+             !ENV_GET_CPU(env)->singlestep_enabled &&
              !singlestep &&
              dc->pc < next_page_start &&
              num_insns < max_insns);
@@ -9721,7 +9883,7 @@
     /* At this stage dc->condjmp will only be set when the skipped
        instruction was a conditional branch or trap, and the PC has
        already been written.  */
-    if (unlikely(env->singlestep_enabled)) {
+    if (unlikely(ENV_GET_CPU(env)->singlestep_enabled)) {
         /* Make sure the pc is updated, and raise a debug exception.  */
         if (dc->condjmp) {
             gen_set_condexec(dc);
@@ -9772,7 +9934,7 @@
             /* nothing more to generate */
             break;
         case DISAS_WFI:
-            gen_helper_wfi();
+            gen_helper_wfi(cpu_env);
             break;
         case DISAS_SWI:
             gen_exception(EXCP_SWI);
@@ -9791,7 +9953,7 @@
 
 done_generating:
     gen_icount_end(tb, num_insns);
-    *gen_opc_ptr = INDEX_op_end;
+    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -9802,10 +9964,10 @@
     }
 #endif
     if (search_pc) {
-        j = gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j)
-            gen_opc_instr_start[lj++] = 0;
+            tcg_ctx.gen_opc_instr_start[lj++] = 0;
     } else {
         ANDROID_END_CODEGEN();
         tb->size = dc->pc - pc_start;
@@ -9828,9 +9990,10 @@
   "???", "???", "???", "und", "???", "???", "???", "sys"
 };
 
-void cpu_dump_state(CPUARMState *env, FILE *f, fprintf_function cpu_fprintf,
+void cpu_dump_state(CPUState *cpu, FILE *f, fprintf_function cpu_fprintf,
                     int flags)
 {
+    CPUARMState *env = cpu->env_ptr;
     int i;
 #if 0
     union {
@@ -9882,6 +10045,6 @@
 
 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
 {
-    env->regs[15] = gen_opc_pc[pc_pos];
+    env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
     env->condexec_bits = gen_opc_condexec_bits[pc_pos];
 }
diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
new file mode 100644
index 0000000..91c9c25
--- /dev/null
+++ b/target-i386/cc_helper.c
@@ -0,0 +1,278 @@
+/*
+ *  x86 condition code helpers
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ */
+
+#include "cpu.h"
+#include "helper.h"
+
+const uint8_t parity_table[256] = {
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+};
+
+#define SHIFT 0
+#include "cc_helper_template.h"
+#undef SHIFT
+
+#define SHIFT 1
+#include "cc_helper_template.h"
+#undef SHIFT
+
+#define SHIFT 2
+#include "cc_helper_template.h"
+#undef SHIFT
+
+#ifdef TARGET_X86_64
+#define SHIFT 3
+#include "cc_helper_template.h"
+#undef SHIFT
+#endif
+
+static int compute_all_eflags(CPUX86State *env)
+{
+    return CC_SRC;
+}
+
+static int compute_c_eflags(CPUX86State *env)
+{
+    return CC_SRC & CC_C;
+}
+
+uint32_t helper_cc_compute_all(CPUX86State *env, int op)
+{
+    switch (op) {
+    default: /* should never happen */ return 0;
+
+    case CC_OP_EFLAGS: return compute_all_eflags(env);
+
+    case CC_OP_MULB: return compute_all_mulb(env);
+    case CC_OP_MULW: return compute_all_mulw(env);
+    case CC_OP_MULL: return compute_all_mull(env);
+
+    case CC_OP_ADDB: return compute_all_addb(env);
+    case CC_OP_ADDW: return compute_all_addw(env);
+    case CC_OP_ADDL: return compute_all_addl(env);
+
+    case CC_OP_ADCB: return compute_all_adcb(env);
+    case CC_OP_ADCW: return compute_all_adcw(env);
+    case CC_OP_ADCL: return compute_all_adcl(env);
+
+    case CC_OP_SUBB: return compute_all_subb(env);
+    case CC_OP_SUBW: return compute_all_subw(env);
+    case CC_OP_SUBL: return compute_all_subl(env);
+
+    case CC_OP_SBBB: return compute_all_sbbb(env);
+    case CC_OP_SBBW: return compute_all_sbbw(env);
+    case CC_OP_SBBL: return compute_all_sbbl(env);
+
+    case CC_OP_LOGICB: return compute_all_logicb(env);
+    case CC_OP_LOGICW: return compute_all_logicw(env);
+    case CC_OP_LOGICL: return compute_all_logicl(env);
+
+    case CC_OP_INCB: return compute_all_incb(env);
+    case CC_OP_INCW: return compute_all_incw(env);
+    case CC_OP_INCL: return compute_all_incl(env);
+
+    case CC_OP_DECB: return compute_all_decb(env);
+    case CC_OP_DECW: return compute_all_decw(env);
+    case CC_OP_DECL: return compute_all_decl(env);
+
+    case CC_OP_SHLB: return compute_all_shlb(env);
+    case CC_OP_SHLW: return compute_all_shlw(env);
+    case CC_OP_SHLL: return compute_all_shll(env);
+
+    case CC_OP_SARB: return compute_all_sarb(env);
+    case CC_OP_SARW: return compute_all_sarw(env);
+    case CC_OP_SARL: return compute_all_sarl(env);
+
+#ifdef TARGET_X86_64
+    case CC_OP_MULQ: return compute_all_mulq(env);
+
+    case CC_OP_ADDQ: return compute_all_addq(env);
+
+    case CC_OP_ADCQ: return compute_all_adcq(env);
+
+    case CC_OP_SUBQ: return compute_all_subq(env);
+
+    case CC_OP_SBBQ: return compute_all_sbbq(env);
+
+    case CC_OP_LOGICQ: return compute_all_logicq(env);
+
+    case CC_OP_INCQ: return compute_all_incq(env);
+
+    case CC_OP_DECQ: return compute_all_decq(env);
+
+    case CC_OP_SHLQ: return compute_all_shlq(env);
+
+    case CC_OP_SARQ: return compute_all_sarq(env);
+#endif
+    }
+}
+
+uint32_t cpu_cc_compute_all(CPUArchState *env1, int op)
+{
+    return helper_cc_compute_all(env1, op);
+}
+
+uint32_t helper_cc_compute_c(CPUX86State *env, int op)
+{
+    switch (op) {
+    default: /* should never happen */ return 0;
+
+    case CC_OP_EFLAGS: return compute_c_eflags(env);
+
+    case CC_OP_MULB: return compute_c_mull(env);
+    case CC_OP_MULW: return compute_c_mull(env);
+    case CC_OP_MULL: return compute_c_mull(env);
+
+    case CC_OP_ADDB: return compute_c_addb(env);
+    case CC_OP_ADDW: return compute_c_addw(env);
+    case CC_OP_ADDL: return compute_c_addl(env);
+
+    case CC_OP_ADCB: return compute_c_adcb(env);
+    case CC_OP_ADCW: return compute_c_adcw(env);
+    case CC_OP_ADCL: return compute_c_adcl(env);
+
+    case CC_OP_SUBB: return compute_c_subb(env);
+    case CC_OP_SUBW: return compute_c_subw(env);
+    case CC_OP_SUBL: return compute_c_subl(env);
+
+    case CC_OP_SBBB: return compute_c_sbbb(env);
+    case CC_OP_SBBW: return compute_c_sbbw(env);
+    case CC_OP_SBBL: return compute_c_sbbl(env);
+
+    case CC_OP_LOGICB: return compute_c_logicb(env);
+    case CC_OP_LOGICW: return compute_c_logicw(env);
+    case CC_OP_LOGICL: return compute_c_logicl(env);
+
+    case CC_OP_INCB: return compute_c_incl(env);
+    case CC_OP_INCW: return compute_c_incl(env);
+    case CC_OP_INCL: return compute_c_incl(env);
+
+    case CC_OP_DECB: return compute_c_incl(env);
+    case CC_OP_DECW: return compute_c_incl(env);
+    case CC_OP_DECL: return compute_c_incl(env);
+
+    case CC_OP_SHLB: return compute_c_shlb(env);
+    case CC_OP_SHLW: return compute_c_shlw(env);
+    case CC_OP_SHLL: return compute_c_shll(env);
+
+    case CC_OP_SARB: return compute_c_sarl(env);
+    case CC_OP_SARW: return compute_c_sarl(env);
+    case CC_OP_SARL: return compute_c_sarl(env);
+
+#ifdef TARGET_X86_64
+    case CC_OP_MULQ: return compute_c_mull(env);
+
+    case CC_OP_ADDQ: return compute_c_addq(env);
+
+    case CC_OP_ADCQ: return compute_c_adcq(env);
+
+    case CC_OP_SUBQ: return compute_c_subq(env);
+
+    case CC_OP_SBBQ: return compute_c_sbbq(env);
+
+    case CC_OP_LOGICQ: return compute_c_logicq(env);
+
+    case CC_OP_INCQ: return compute_c_incl(env);
+
+    case CC_OP_DECQ: return compute_c_incl(env);
+
+    case CC_OP_SHLQ: return compute_c_shlq(env);
+
+    case CC_OP_SARQ: return compute_c_sarl(env);
+#endif
+    }
+}
+
+void helper_write_eflags(CPUX86State *env,
+                         target_ulong t0, uint32_t update_mask)
+{
+    cpu_load_eflags(env, t0, update_mask);
+}
+
+target_ulong helper_read_eflags(CPUX86State *env)
+{
+    uint32_t eflags;
+    eflags = helper_cc_compute_all(env, CC_OP);
+    eflags |= (DF & DF_MASK);
+    eflags |= env->eflags & ~(VM_MASK | RF_MASK);
+    return eflags;
+}
+
+void helper_clts(CPUX86State *env)
+{
+    env->cr[0] &= ~CR0_TS_MASK;
+    env->hflags &= ~HF_TS_MASK;
+}
+
+void helper_reset_rf(CPUX86State *env)
+{
+    env->eflags &= ~RF_MASK;
+}
+
+void helper_cli(CPUX86State *env)
+{
+    env->eflags &= ~IF_MASK;
+}
+
+void helper_sti(CPUX86State *env)
+{
+    env->eflags |= IF_MASK;
+}
+
+void helper_set_inhibit_irq(CPUX86State *env)
+{
+    env->hflags |= HF_INHIBIT_IRQ_MASK;
+}
+
+void helper_reset_inhibit_irq(CPUX86State *env)
+{
+    env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+}
diff --git a/target-i386/helper_template.h b/target-i386/cc_helper_template.h
similarity index 72%
rename from target-i386/helper_template.h
rename to target-i386/cc_helper_template.h
index c1087ac..16f5d91 100644
--- a/target-i386/helper_template.h
+++ b/target-i386/cc_helper_template.h
@@ -52,7 +52,7 @@
 
 /* dynamic flags computation */
 
-static int glue(compute_all_add, SUFFIX)(void)
+static int glue(compute_all_add, SUFFIX)(CPUX86State *env)
 {
     int cf, pf, af, zf, sf, of;
     target_long src1, src2;
@@ -67,7 +67,7 @@
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_add, SUFFIX)(void)
+static int glue(compute_c_add, SUFFIX)(CPUX86State *env)
 {
     int cf;
     target_long src1;
@@ -76,7 +76,7 @@
     return cf;
 }
 
-static int glue(compute_all_adc, SUFFIX)(void)
+static int glue(compute_all_adc, SUFFIX)(CPUX86State *env)
 {
     int cf, pf, af, zf, sf, of;
     target_long src1, src2;
@@ -91,7 +91,7 @@
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_adc, SUFFIX)(void)
+static int glue(compute_c_adc, SUFFIX)(CPUX86State *env)
 {
     int cf;
     target_long src1;
@@ -100,7 +100,7 @@
     return cf;
 }
 
-static int glue(compute_all_sub, SUFFIX)(void)
+static int glue(compute_all_sub, SUFFIX)(CPUX86State *env)
 {
     int cf, pf, af, zf, sf, of;
     target_long src1, src2;
@@ -115,7 +115,7 @@
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_sub, SUFFIX)(void)
+static int glue(compute_c_sub, SUFFIX)(CPUX86State *env)
 {
     int cf;
     target_long src1, src2;
@@ -125,7 +125,7 @@
     return cf;
 }
 
-static int glue(compute_all_sbb, SUFFIX)(void)
+static int glue(compute_all_sbb, SUFFIX)(CPUX86State *env)
 {
     int cf, pf, af, zf, sf, of;
     target_long src1, src2;
@@ -140,7 +140,7 @@
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_sbb, SUFFIX)(void)
+static int glue(compute_c_sbb, SUFFIX)(CPUX86State *env)
 {
     int cf;
     target_long src1, src2;
@@ -150,7 +150,7 @@
     return cf;
 }
 
-static int glue(compute_all_logic, SUFFIX)(void)
+static int glue(compute_all_logic, SUFFIX)(CPUX86State *env)
 {
     int cf, pf, af, zf, sf, of;
     cf = 0;
@@ -162,12 +162,12 @@
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_logic, SUFFIX)(void)
+static int glue(compute_c_logic, SUFFIX)(CPUX86State *env)
 {
     return 0;
 }
 
-static int glue(compute_all_inc, SUFFIX)(void)
+static int glue(compute_all_inc, SUFFIX)(CPUX86State *env)
 {
     int cf, pf, af, zf, sf, of;
     target_long src1, src2;
@@ -183,13 +183,13 @@
 }
 
 #if DATA_BITS == 32
-static int glue(compute_c_inc, SUFFIX)(void)
+static int glue(compute_c_inc, SUFFIX)(CPUX86State *env)
 {
     return CC_SRC;
 }
 #endif
 
-static int glue(compute_all_dec, SUFFIX)(void)
+static int glue(compute_all_dec, SUFFIX)(CPUX86State *env)
 {
     int cf, pf, af, zf, sf, of;
     target_long src1, src2;
@@ -204,7 +204,7 @@
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_all_shl, SUFFIX)(void)
+static int glue(compute_all_shl, SUFFIX)(CPUX86State *env)
 {
     int cf, pf, af, zf, sf, of;
     cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C;
@@ -217,19 +217,19 @@
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_shl, SUFFIX)(void)
+static int glue(compute_c_shl, SUFFIX)(CPUX86State *env)
 {
     return (CC_SRC >> (DATA_BITS - 1)) & CC_C;
 }
 
 #if DATA_BITS == 32
-static int glue(compute_c_sar, SUFFIX)(void)
+static int glue(compute_c_sar, SUFFIX)(CPUX86State *env)
 {
     return CC_SRC & 1;
 }
 #endif
 
-static int glue(compute_all_sar, SUFFIX)(void)
+static int glue(compute_all_sar, SUFFIX)(CPUX86State *env)
 {
     int cf, pf, af, zf, sf, of;
     cf = CC_SRC & 1;
@@ -243,7 +243,7 @@
 }
 
 #if DATA_BITS == 32
-static int glue(compute_c_mul, SUFFIX)(void)
+static int glue(compute_c_mul, SUFFIX)(CPUX86State *env)
 {
     int cf;
     cf = (CC_SRC != 0);
@@ -253,7 +253,7 @@
 
 /* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
    CF are modified and it is slower to do that. */
-static int glue(compute_all_mul, SUFFIX)(void)
+static int glue(compute_all_mul, SUFFIX)(CPUX86State *env)
 {
     int cf, pf, af, zf, sf, of;
     cf = (CC_SRC != 0);
@@ -265,66 +265,6 @@
     return cf | pf | af | zf | sf | of;
 }
 
-/* shifts */
-
-target_ulong glue(helper_rcl, SUFFIX)(target_ulong t0, target_ulong t1)
-{
-    int count, eflags;
-    target_ulong src;
-    target_long res;
-
-    count = t1 & SHIFT1_MASK;
-#if DATA_BITS == 16
-    count = rclw_table[count];
-#elif DATA_BITS == 8
-    count = rclb_table[count];
-#endif
-    if (count) {
-        eflags = helper_cc_compute_all(CC_OP);
-        t0 &= DATA_MASK;
-        src = t0;
-        res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1));
-        if (count > 1)
-            res |= t0 >> (DATA_BITS + 1 - count);
-        t0 = res;
-        env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
-            (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
-            ((src >> (DATA_BITS - count)) & CC_C);
-    } else {
-        env->cc_tmp = -1;
-    }
-    return t0;
-}
-
-target_ulong glue(helper_rcr, SUFFIX)(target_ulong t0, target_ulong t1)
-{
-    int count, eflags;
-    target_ulong src;
-    target_long res;
-
-    count = t1 & SHIFT1_MASK;
-#if DATA_BITS == 16
-    count = rclw_table[count];
-#elif DATA_BITS == 8
-    count = rclb_table[count];
-#endif
-    if (count) {
-        eflags = helper_cc_compute_all(CC_OP);
-        t0 &= DATA_MASK;
-        src = t0;
-        res = (t0 >> count) | ((target_ulong)(eflags & CC_C) << (DATA_BITS - count));
-        if (count > 1)
-            res |= t0 << (DATA_BITS + 1 - count);
-        t0 = res;
-        env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
-            (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
-            ((src >> (count - 1)) & CC_C);
-    } else {
-        env->cc_tmp = -1;
-    }
-    return t0;
-}
-
 #undef DATA_BITS
 #undef SHIFT_MASK
 #undef SHIFT1_MASK
diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
new file mode 100644
index 0000000..a1678bc
--- /dev/null
+++ b/target-i386/cpu-qom.h
@@ -0,0 +1,21 @@
+#ifndef QEMU_X86_CPU_QOM_H
+#define QEMU_X86_CPU_QOM_H
+
+#include "qemu/osdep.h"
+#include "qom/cpu.h"
+
+typedef struct X86CPU {
+    CPUState parent_obj;
+
+    CPUX86State env;
+} X86CPU;
+
+static inline X86CPU *x86_env_get_cpu(CPUX86State *env)
+{
+    return container_of(env, X86CPU, env);
+}
+
+#define ENV_GET_CPU(e)  CPU(x86_env_get_cpu(e))
+#define ENV_OFFSET offsetof(X86CPU, env)
+
+#endif  // QEMU_X86_CPU_QOM_H
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 44f29e1..45bac7d 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -604,7 +604,7 @@
 #define CPU_INTERRUPT_TPR       CPU_INTERRUPT_TGT_INT_3
 
 
-enum {
+typedef enum {
     CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
     CC_OP_EFLAGS,  /* all cc are explicitly computed, CC_SRC = flags */
 
@@ -659,7 +659,7 @@
     CC_OP_SARQ,
 
     CC_OP_NB,
-};
+} CCOp;
 
 typedef struct SegmentCache {
     uint32_t selector;
@@ -777,7 +777,6 @@
     XMMReg xmm_regs[CPU_NB_REGS];
     XMMReg xmm_t0;
     MMXReg mmx_t0;
-    target_ulong cc_tmp; /* temporary for rcr/rcl */
 
     /* sysenter registers */
     uint32_t sysenter_cs;
@@ -859,6 +858,8 @@
     uint64 *mce_banks;
 } CPUX86State;
 
+#include "cpu-qom.h"
+
 CPUX86State *cpu_x86_init(const char *cpu_model);
 int cpu_x86_exec(CPUX86State *s);
 void cpu_x86_close(CPUX86State *s);
@@ -956,7 +957,7 @@
 
 /* helper.c */
 int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
-                             int is_write, int mmu_idx, int is_softmmu);
+                             int is_write, int mmu_idx);
 void cpu_x86_set_a20(CPUX86State *env, int a20_state);
 void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                    uint32_t *eax, uint32_t *ebx,
@@ -1033,6 +1034,45 @@
     return (env->hflags & HF_CPL_MASK) == 3 ? 1 : 0;
 }
 
+#undef EAX
+#define EAX (env->regs[R_EAX])
+#undef ECX
+#define ECX (env->regs[R_ECX])
+#undef EDX
+#define EDX (env->regs[R_EDX])
+#undef EBX
+#define EBX (env->regs[R_EBX])
+#undef ESP
+#define ESP (env->regs[R_ESP])
+#undef EBP
+#define EBP (env->regs[R_EBP])
+#undef ESI
+#define ESI (env->regs[R_ESI])
+#undef EDI
+#define EDI (env->regs[R_EDI])
+#undef EIP
+#define EIP (env->eip)
+#define DF  (env->df)
+
+#define CC_SRC (env->cc_src)
+#define CC_DST (env->cc_dst)
+#define CC_OP  (env->cc_op)
+
+/* n must be a constant to be efficient */
+static inline target_long lshift(target_long x, int n)
+{
+    if (n >= 0)
+        return x << n;
+    else
+        return x >> (-n);
+}
+
+/* float macros */
+#define FT0    (env->ft0)
+#define ST0    (env->fpregs[env->fpstt].d)
+#define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
+#define ST1    ST(1)
+
 /* translate.c */
 void optimize_flags_init(void);
 
@@ -1056,13 +1096,25 @@
 }
 #endif
 
-//#include "exec/cpu-all.h"
-#include "exec/exec-all.h"
-
-struct TranslationBlock;
-
+#include "exec/cpu-all.h"
 #include "svm.h"
 
+static inline bool cpu_has_work(CPUState *cpu)
+{
+    int work;
+    CPUX86State *env = cpu->env_ptr;
+
+    work = (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
+           (env->eflags & IF_MASK);
+    work |= cpu->interrupt_request & CPU_INTERRUPT_NMI;
+    work |= cpu->interrupt_request & CPU_INTERRUPT_INIT;
+    work |= cpu->interrupt_request & CPU_INTERRUPT_SIPI;
+
+    return work;
+}
+
+#include "exec/exec-all.h"
+
 static inline void cpu_pc_from_tb(CPUX86State *env, TranslationBlock *tb)
 {
     env->eip = tb->pc - tb->cs_base;
@@ -1081,4 +1133,54 @@
 void apic_sipi(CPUX86State *env);
 void do_cpu_init(CPUX86State *env);
 void do_cpu_sipi(CPUX86State *env);
+
+/* excp_helper.c */
+void do_interrupt(CPUArchState *env);
+void do_interrupt_x86_hardirq(CPUArchState *env, int intno, int is_hw);
+//void QEMU_NORETURN raise_exception_err(int exception_index, int error_code);
+void QEMU_NORETURN raise_exception(CPUArchState *env, int exception_index);
+void QEMU_NORETURN raise_exception_err(CPUX86State *env,
+                                       int exception_index,
+                                       int error_code);
+
+void QEMU_NORETURN raise_interrupt(CPUX86State *env,
+                                   int intno, int is_int, int error_code,
+                                   int next_eip_addend);
+
+void do_smm_enter(CPUArchState *env1);
+
+void svm_check_intercept(CPUArchState *env1, uint32_t type);
+
+/* cc_helper.c */
+const uint8_t parity_table[256];
+uint32_t cpu_cc_compute_all(CPUArchState *env1, int op);
+
+static inline uint32_t cpu_compute_eflags(CPUX86State *env)
+{
+    return env->eflags | cpu_cc_compute_all(env, CC_OP) | (DF & DF_MASK);
+}
+
+/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */
+static inline void cpu_load_eflags(CPUX86State *env,
+                                   int eflags, int update_mask)
+{
+    CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    DF = 1 - (2 * ((eflags >> 10) & 1));
+    env->eflags = (env->eflags & ~update_mask) |
+        (eflags & update_mask) | 0x2;
+}
+
+/* load efer and update the corresponding hflags. XXX: do consistency
+   checks with cpuid bits ? */
+static inline void cpu_load_efer(CPUX86State *env, uint64_t val)
+{
+    env->efer = val;
+    env->hflags &= ~(HF_LMA_MASK | HF_SVME_MASK);
+    if (env->efer & MSR_EFER_LMA)
+        env->hflags |= HF_LMA_MASK;
+    if (env->efer & MSR_EFER_SVME)
+        env->hflags |= HF_SVME_MASK;
+}
+
+
 #endif /* CPU_I386_H */
diff --git a/target-i386/excp_helper.c b/target-i386/excp_helper.c
new file mode 100644
index 0000000..5d973e6
--- /dev/null
+++ b/target-i386/excp_helper.c
@@ -0,0 +1,120 @@
+/*
+ *  x86 exception helpers
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "qemu/log.h"
+#include "sysemu/sysemu.h"
+#include "helper.h"
+
+#if 0
+#define raise_exception_err(a, b)\
+do {\
+    qemu_log("raise_exception line=%d\n", __LINE__);\
+    (raise_exception_err)(a, b);\
+} while (0)
+#endif
+
+void helper_raise_interrupt(CPUX86State *env, int intno, int next_eip_addend)
+{
+    raise_interrupt(env, intno, 1, 0, next_eip_addend);
+}
+
+void helper_raise_exception(CPUX86State *env, int exception_index)
+{
+    raise_exception(env, exception_index);
+}
+
+/*
+ * Check nested exceptions and change to double or triple fault if
+ * needed. It should only be called, if this is not an interrupt.
+ * Returns the new exception number.
+ */
+static int check_exception(CPUX86State *env, int intno, int *error_code)
+{
+    int first_contributory = env->old_exception == 0 ||
+                              (env->old_exception >= 10 &&
+                               env->old_exception <= 13);
+    int second_contributory = intno == 0 ||
+                               (intno >= 10 && intno <= 13);
+
+    qemu_log_mask(CPU_LOG_INT, "check_exception old: 0x%x new 0x%x\n",
+                env->old_exception, intno);
+
+#if !defined(CONFIG_USER_ONLY)
+    if (env->old_exception == EXCP08_DBLE) {
+        if (env->hflags & HF_SVMI_MASK)
+            helper_vmexit(env, SVM_EXIT_SHUTDOWN, 0); /* does not return */
+
+        qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
+
+        qemu_system_reset_request();
+        return EXCP_HLT;
+    }
+#endif
+
+    if ((first_contributory && second_contributory)
+        || (env->old_exception == EXCP0E_PAGE &&
+            (second_contributory || (intno == EXCP0E_PAGE)))) {
+        intno = EXCP08_DBLE;
+        *error_code = 0;
+    }
+
+    if (second_contributory || (intno == EXCP0E_PAGE) ||
+        (intno == EXCP08_DBLE))
+        env->old_exception = intno;
+
+    return intno;
+}
+
+/*
+ * Signal an interruption. It is executed in the main CPU loop.
+ * is_int is TRUE if coming from the int instruction. next_eip is the
+ * EIP value AFTER the interrupt instruction. It is only relevant if
+ * is_int is TRUE.
+ */
+void QEMU_NORETURN raise_interrupt(CPUX86State *env,
+                                   int intno, int is_int, int error_code,
+                                   int next_eip_addend)
+{
+    if (!is_int) {
+        helper_svm_check_intercept_param(env, SVM_EXIT_EXCP_BASE + intno, error_code);
+        intno = check_exception(env, intno, &error_code);
+    } else {
+        helper_svm_check_intercept_param(env, SVM_EXIT_SWINT, 0);
+    }
+
+    env->exception_index = intno;
+    env->error_code = error_code;
+    env->exception_is_int = is_int;
+    env->exception_next_eip = env->eip + next_eip_addend;
+    cpu_loop_exit(env);
+}
+
+/* shortcuts to generate exceptions */
+
+void raise_exception_err(CPUX86State *env,
+                         int exception_index, int error_code)
+{
+    raise_interrupt(env, exception_index, 0, error_code, 0);
+}
+
+void raise_exception(CPUX86State *env, int exception_index)
+{
+    raise_interrupt(env, exception_index, 0, 0, 0);
+}
diff --git a/target-i386/exec.h b/target-i386/exec.h
deleted file mode 100644
index 00a248f..0000000
--- a/target-i386/exec.h
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- *  i386 execution defines
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
- */
-#include "config.h"
-#include "dyngen-exec.h"
-
-/* XXX: factorize this mess */
-#ifdef TARGET_X86_64
-#define TARGET_LONG_BITS 64
-#else
-#define TARGET_LONG_BITS 32
-#endif
-
-#include "exec/cpu-defs.h"
-
-GLOBAL_REGISTER_VARIABLE_DECL struct CPUX86State *env asm(AREG0);
-
-#include "qemu-common.h"
-#include "qemu/log.h"
-
-#define EAX (env->regs[R_EAX])
-#define ECX (env->regs[R_ECX])
-#define EDX (env->regs[R_EDX])
-#define EBX (env->regs[R_EBX])
-#define ESP (env->regs[R_ESP])
-#define EBP (env->regs[R_EBP])
-#define ESI (env->regs[R_ESI])
-#define EDI (env->regs[R_EDI])
-#define EIP (env->eip)
-#define DF  (env->df)
-
-#define CC_SRC (env->cc_src)
-#define CC_DST (env->cc_dst)
-#define CC_OP  (env->cc_op)
-
-/* float macros */
-#define FT0    (env->ft0)
-#define ST0    (env->fpregs[env->fpstt].d)
-#define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
-#define ST1    ST(1)
-
-#include "cpu.h"
-#include "exec/exec-all.h"
-
-/* op_helper.c */
-void do_interrupt(int intno, int is_int, int error_code,
-                  target_ulong next_eip, int is_hw);
-void do_interrupt_user(int intno, int is_int, int error_code,
-                       target_ulong next_eip);
-void QEMU_NORETURN raise_exception_err(int exception_index, int error_code);
-void QEMU_NORETURN raise_exception(int exception_index);
-void do_smm_enter(void);
-
-/* n must be a constant to be efficient */
-static inline target_long lshift(target_long x, int n)
-{
-    if (n >= 0)
-        return x << n;
-    else
-        return x >> (-n);
-}
-
-#include "helper.h"
-
-static inline void svm_check_intercept(uint32_t type)
-{
-    helper_svm_check_intercept_param(type, 0);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-
-#include "exec/softmmu_exec.h"
-
-#endif /* !defined(CONFIG_USER_ONLY) */
-
-#define RC_MASK         0xc00
-#define RC_NEAR		0x000
-#define RC_DOWN		0x400
-#define RC_UP		0x800
-#define RC_CHOP		0xc00
-
-#define MAXTAN 9223372036854775808.0
-
-/* the following deal with x86 long double-precision numbers */
-#define MAXEXPD 0x7fff
-#define EXPBIAS 16383
-#define EXPD(fp)	(fp.l.upper & 0x7fff)
-#define SIGND(fp)	((fp.l.upper) & 0x8000)
-#define MANTD(fp)       (fp.l.lower)
-#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
-
-static inline void fpush(void)
-{
-    env->fpstt = (env->fpstt - 1) & 7;
-    env->fptags[env->fpstt] = 0; /* validate stack entry */
-}
-
-static inline void fpop(void)
-{
-    env->fptags[env->fpstt] = 1; /* invvalidate stack entry */
-    env->fpstt = (env->fpstt + 1) & 7;
-}
-
-static inline floatx80 helper_fldt(target_ulong ptr)
-{
-    floatx80 temp;
-
-    temp.low = ldq(ptr);
-    temp.high = lduw(ptr + 8);
-    return temp;
-}
-
-static inline void helper_fstt(floatx80 f, target_ulong ptr)
-{
-    stq(ptr, f.low);
-    stw(ptr + 8, f.high);
-}
-
-#define FPUS_IE (1 << 0)
-#define FPUS_DE (1 << 1)
-#define FPUS_ZE (1 << 2)
-#define FPUS_OE (1 << 3)
-#define FPUS_UE (1 << 4)
-#define FPUS_PE (1 << 5)
-#define FPUS_SF (1 << 6)
-#define FPUS_SE (1 << 7)
-#define FPUS_B  (1 << 15)
-
-#define FPUC_EM 0x3f
-
-static inline uint32_t compute_eflags(void)
-{
-    return env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
-}
-
-/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */
-static inline void load_eflags(int eflags, int update_mask)
-{
-    CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
-    DF = 1 - (2 * ((eflags >> 10) & 1));
-    env->eflags = (env->eflags & ~update_mask) |
-        (eflags & update_mask) | 0x2;
-}
-
-static inline void env_to_regs(void)
-{
-#ifdef reg_EAX
-    EAX = env->regs[R_EAX];
-#endif
-#ifdef reg_ECX
-    ECX = env->regs[R_ECX];
-#endif
-#ifdef reg_EDX
-    EDX = env->regs[R_EDX];
-#endif
-#ifdef reg_EBX
-    EBX = env->regs[R_EBX];
-#endif
-#ifdef reg_ESP
-    ESP = env->regs[R_ESP];
-#endif
-#ifdef reg_EBP
-    EBP = env->regs[R_EBP];
-#endif
-#ifdef reg_ESI
-    ESI = env->regs[R_ESI];
-#endif
-#ifdef reg_EDI
-    EDI = env->regs[R_EDI];
-#endif
-}
-
-static inline void regs_to_env(void)
-{
-#ifdef reg_EAX
-    env->regs[R_EAX] = EAX;
-#endif
-#ifdef reg_ECX
-    env->regs[R_ECX] = ECX;
-#endif
-#ifdef reg_EDX
-    env->regs[R_EDX] = EDX;
-#endif
-#ifdef reg_EBX
-    env->regs[R_EBX] = EBX;
-#endif
-#ifdef reg_ESP
-    env->regs[R_ESP] = ESP;
-#endif
-#ifdef reg_EBP
-    env->regs[R_EBP] = EBP;
-#endif
-#ifdef reg_ESI
-    env->regs[R_ESI] = ESI;
-#endif
-#ifdef reg_EDI
-    env->regs[R_EDI] = EDI;
-#endif
-}
-
-static inline int cpu_has_work(CPUX86State *env)
-{
-    int work;
-
-    work = (env->interrupt_request & CPU_INTERRUPT_HARD) &&
-           (env->eflags & IF_MASK);
-    work |= env->interrupt_request & CPU_INTERRUPT_NMI;
-    work |= env->interrupt_request & CPU_INTERRUPT_INIT;
-    work |= env->interrupt_request & CPU_INTERRUPT_SIPI;
-
-    return work;
-}
-
-static inline int cpu_halted(CPUX86State *env) {
-    /* handle exit of HALTED state */
-    if (!env->halted)
-        return 0;
-    /* disable halt condition */
-    if (cpu_has_work(env)) {
-        env->halted = 0;
-        return 0;
-    }
-    return EXCP_HALTED;
-}
-
-/* load efer and update the corresponding hflags. XXX: do consistency
-   checks with cpuid bits ? */
-static inline void cpu_load_efer(CPUX86State *env, uint64_t val)
-{
-    env->efer = val;
-    env->hflags &= ~(HF_LMA_MASK | HF_SVME_MASK);
-    if (env->efer & MSR_EFER_LMA)
-        env->hflags |= HF_LMA_MASK;
-    if (env->efer & MSR_EFER_SVME)
-        env->hflags |= HF_SVME_MASK;
-}
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
new file mode 100644
index 0000000..b67f82b
--- /dev/null
+++ b/target-i386/fpu_helper.c
@@ -0,0 +1,1195 @@
+/*
+ *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+#include "cpu.h"
+#include "helper.h"
+#include "qemu/aes.h"
+#include "qemu/host-utils.h"
+
+#if !defined(CONFIG_USER_ONLY)
+#include "exec/softmmu_exec.h"
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+#define RC_MASK         0xc00
+#define RC_NEAR         0x000
+#define RC_DOWN         0x400
+#define RC_UP           0x800
+#define RC_CHOP         0xc00
+
+#define MAXTAN 9223372036854775808.0
+
+/* the following deal with x86 long double-precision numbers */
+#define MAXEXPD 0x7fff
+#define EXPBIAS 16383
+#define EXPD(fp)        (fp.l.upper & 0x7fff)
+#define SIGND(fp)       ((fp.l.upper) & 0x8000)
+#define MANTD(fp)       (fp.l.lower)
+#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
+
+#define floatx80_lg2 make_floatx80( 0x3ffd, 0x9a209a84fbcff799LL )
+#define floatx80_l2e make_floatx80( 0x3fff, 0xb8aa3b295c17f0bcLL )
+#define floatx80_l2t make_floatx80( 0x4000, 0xd49a784bcd1b8afeLL )
+
+static const floatx80 f15rk[7] =
+{
+    floatx80_zero,
+    floatx80_one,
+    floatx80_pi,
+    floatx80_lg2,
+    floatx80_ln2,
+    floatx80_l2e,
+    floatx80_l2t,
+};
+
+static inline void fpush(CPUX86State *env)
+{
+    env->fpstt = (env->fpstt - 1) & 7;
+    env->fptags[env->fpstt] = 0; /* validate stack entry */
+}
+
+static inline void fpop(CPUX86State *env)
+{
+    env->fptags[env->fpstt] = 1; /* invvalidate stack entry */
+    env->fpstt = (env->fpstt + 1) & 7;
+}
+
+static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr)
+{
+    floatx80 temp;
+
+    temp.low = cpu_ldq_data(env, ptr);
+    temp.high = cpu_lduw_data(env, ptr + 8);
+    return temp;
+}
+
+static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr)
+{
+    cpu_stq_data(env, ptr, f.low);
+    cpu_stw_data(env, ptr + 8, f.high);
+}
+
+#define FPUS_IE (1 << 0)
+#define FPUS_DE (1 << 1)
+#define FPUS_ZE (1 << 2)
+#define FPUS_OE (1 << 3)
+#define FPUS_UE (1 << 4)
+#define FPUS_PE (1 << 5)
+#define FPUS_SF (1 << 6)
+#define FPUS_SE (1 << 7)
+#define FPUS_B  (1 << 15)
+
+#define FPUC_EM 0x3f
+
+/* x87 FPU helpers */
+
+static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
+{
+    union {
+        float64 f64;
+        double d;
+    } u;
+
+    u.f64 = floatx80_to_float64(a, &env->fp_status);
+    return u.d;
+}
+
+static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
+{
+    union {
+        float64 f64;
+        double d;
+    } u;
+
+    u.d = a;
+    return float64_to_floatx80(u.f64, &env->fp_status);
+}
+
+static void fpu_set_exception(CPUX86State *env, int mask)
+{
+    env->fpus |= mask;
+    if (env->fpus & (~env->fpuc & FPUC_EM))
+        env->fpus |= FPUS_SE | FPUS_B;
+}
+
+static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
+{
+    if (floatx80_is_zero(b)) {
+        fpu_set_exception(env, FPUS_ZE);
+    }
+    return floatx80_div(a, b, &env->fp_status);
+}
+
+static void fpu_raise_exception(CPUX86State *env)
+{
+    if (env->cr[0] & CR0_NE_MASK) {
+        raise_exception(env, EXCP10_COPR);
+    }
+#if !defined(CONFIG_USER_ONLY)
+    else {
+        cpu_set_ferr(env);
+    }
+#endif
+}
+
+void helper_flds_FT0(CPUX86State *env, uint32_t val)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = val;
+    FT0 = float32_to_floatx80(u.f, &env->fp_status);
+}
+
+void helper_fldl_FT0(CPUX86State *env, uint64_t val)
+{
+    union {
+        float64 f;
+        uint64_t i;
+    } u;
+    u.i = val;
+    FT0 = float64_to_floatx80(u.f, &env->fp_status);
+}
+
+void helper_fildl_FT0(CPUX86State *env, int32_t val)
+{
+    FT0 = int32_to_floatx80(val, &env->fp_status);
+}
+
+void helper_flds_ST0(CPUX86State *env, uint32_t val)
+{
+    int new_fpstt;
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    new_fpstt = (env->fpstt - 1) & 7;
+    u.i = val;
+    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fldl_ST0(CPUX86State *env, uint64_t val)
+{
+    int new_fpstt;
+    union {
+        float64 f;
+        uint64_t i;
+    } u;
+    new_fpstt = (env->fpstt - 1) & 7;
+    u.i = val;
+    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fildl_ST0(CPUX86State *env, int32_t val)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fildll_ST0(CPUX86State *env, int64_t val)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+uint32_t helper_fsts_ST0(CPUX86State *env)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = floatx80_to_float32(ST0, &env->fp_status);
+    return u.i;
+}
+
+uint64_t helper_fstl_ST0(CPUX86State *env)
+{
+    union {
+        float64 f;
+        uint64_t i;
+    } u;
+    u.f = floatx80_to_float64(ST0, &env->fp_status);
+    return u.i;
+}
+
+int32_t helper_fist_ST0(CPUX86State *env)
+{
+    int32_t val;
+    val = floatx80_to_int32(ST0, &env->fp_status);
+    if (val != (int16_t)val)
+        val = -32768;
+    return val;
+}
+
+int32_t helper_fistl_ST0(CPUX86State *env)
+{
+    int32_t val;
+    val = floatx80_to_int32(ST0, &env->fp_status);
+    return val;
+}
+
+int64_t helper_fistll_ST0(CPUX86State *env)
+{
+    int64_t val;
+    val = floatx80_to_int64(ST0, &env->fp_status);
+    return val;
+}
+
+int32_t helper_fistt_ST0(CPUX86State *env)
+{
+    int32_t val;
+    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
+    if (val != (int16_t)val)
+        val = -32768;
+    return val;
+}
+
+int32_t helper_fisttl_ST0(CPUX86State *env)
+{
+    int32_t val;
+    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
+    return val;
+}
+
+int64_t helper_fisttll_ST0(CPUX86State *env)
+{
+    int64_t val;
+    val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
+    return val;
+}
+
+void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = helper_fldt(env, ptr);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
+{
+    helper_fstt(env, ST0, ptr);
+}
+
+void helper_fpush(CPUX86State *env)
+{
+    fpush(env);
+}
+
+void helper_fpop(CPUX86State *env)
+{
+    fpop(env);
+}
+
+void helper_fdecstp(CPUX86State *env)
+{
+    env->fpstt = (env->fpstt - 1) & 7;
+    env->fpus &= (~0x4700);
+}
+
+void helper_fincstp(CPUX86State *env)
+{
+    env->fpstt = (env->fpstt + 1) & 7;
+    env->fpus &= (~0x4700);
+}
+
+/* FPU move */
+
+void helper_ffree_STN(CPUX86State *env, int st_index)
+{
+    env->fptags[(env->fpstt + st_index) & 7] = 1;
+}
+
+void helper_fmov_ST0_FT0(CPUX86State *env)
+{
+    ST0 = FT0;
+}
+
+void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
+{
+    FT0 = ST(st_index);
+}
+
+void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
+{
+    ST0 = ST(st_index);
+}
+
+void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
+{
+    ST(st_index) = ST0;
+}
+
+void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
+{
+    floatx80 tmp;
+    tmp = ST(st_index);
+    ST(st_index) = ST0;
+    ST0 = tmp;
+}
+
+/* FPU operations */
+
+static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
+
+void helper_fcom_ST0_FT0(CPUX86State *env)
+{
+    int ret;
+
+    ret = floatx80_compare(ST0, FT0, &env->fp_status);
+    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
+}
+
+void helper_fucom_ST0_FT0(CPUX86State *env)
+{
+    int ret;
+
+    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
+    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret+ 1];
+}
+
+static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
+
+void helper_fcomi_ST0_FT0(CPUX86State *env)
+{
+    int eflags;
+    int ret;
+
+    ret = floatx80_compare(ST0, FT0, &env->fp_status);
+    eflags = helper_cc_compute_all(env, CC_OP);
+    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
+    CC_SRC = eflags;
+}
+
+void helper_fucomi_ST0_FT0(CPUX86State *env)
+{
+    int eflags;
+    int ret;
+
+    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
+    eflags = helper_cc_compute_all(env, CC_OP);
+    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
+    CC_SRC = eflags;
+}
+
+void helper_fadd_ST0_FT0(CPUX86State *env)
+{
+    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
+}
+
+void helper_fmul_ST0_FT0(CPUX86State *env)
+{
+    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
+}
+
+void helper_fsub_ST0_FT0(CPUX86State *env)
+{
+    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
+}
+
+void helper_fsubr_ST0_FT0(CPUX86State *env)
+{
+    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
+}
+
+void helper_fdiv_ST0_FT0(CPUX86State *env)
+{
+    ST0 = helper_fdiv(env, ST0, FT0);
+}
+
+void helper_fdivr_ST0_FT0(CPUX86State *env)
+{
+    ST0 = helper_fdiv(env, FT0, ST0);
+}
+
+/* fp operations between STN and ST0 */
+
+void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
+{
+    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
+}
+
+void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
+{
+    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
+}
+
+void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
+{
+    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
+}
+
+void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
+{
+    floatx80 *p;
+    p = &ST(st_index);
+    *p = floatx80_sub(ST0, *p, &env->fp_status);
+}
+
+void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
+{
+    floatx80 *p;
+    p = &ST(st_index);
+    *p = helper_fdiv(env, *p, ST0);
+}
+
+void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
+{
+    floatx80 *p;
+    p = &ST(st_index);
+    *p = helper_fdiv(env, ST0, *p);
+}
+
+/* misc FPU operations */
+void helper_fchs_ST0(CPUX86State *env)
+{
+    ST0 = floatx80_chs(ST0);
+}
+
+void helper_fabs_ST0(CPUX86State *env)
+{
+    ST0 = floatx80_abs(ST0);
+}
+
+void helper_fld1_ST0(CPUX86State *env)
+{
+    ST0 = f15rk[1];
+}
+
+void helper_fldl2t_ST0(CPUX86State *env)
+{
+    ST0 = f15rk[6];
+}
+
+void helper_fldl2e_ST0(CPUX86State *env)
+{
+    ST0 = f15rk[5];
+}
+
+void helper_fldpi_ST0(CPUX86State *env)
+{
+    ST0 = f15rk[2];
+}
+
+void helper_fldlg2_ST0(CPUX86State *env)
+{
+    ST0 = f15rk[3];
+}
+
+void helper_fldln2_ST0(CPUX86State *env)
+{
+    ST0 = f15rk[4];
+}
+
+void helper_fldz_ST0(CPUX86State *env)
+{
+    ST0 = f15rk[0];
+}
+
+void helper_fldz_FT0(CPUX86State *env)
+{
+    FT0 = f15rk[0];
+}
+
+uint32_t helper_fnstsw(CPUX86State *env)
+{
+    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+}
+
+uint32_t helper_fnstcw(CPUX86State *env)
+{
+    return env->fpuc;
+}
+
+static void update_fp_status(CPUX86State *env)
+{
+    int rnd_type;
+
+    /* set rounding mode */
+    switch(env->fpuc & RC_MASK) {
+    default:
+    case RC_NEAR:
+        rnd_type = float_round_nearest_even;
+        break;
+    case RC_DOWN:
+        rnd_type = float_round_down;
+        break;
+    case RC_UP:
+        rnd_type = float_round_up;
+        break;
+    case RC_CHOP:
+        rnd_type = float_round_to_zero;
+        break;
+    }
+    set_float_rounding_mode(rnd_type, &env->fp_status);
+    switch((env->fpuc >> 8) & 3) {
+    case 0:
+        rnd_type = 32;
+        break;
+    case 2:
+        rnd_type = 64;
+        break;
+    case 3:
+    default:
+        rnd_type = 80;
+        break;
+    }
+    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
+}
+
+void helper_fldcw(CPUX86State *env, uint32_t val)
+{
+    env->fpuc = val;
+    update_fp_status(env);
+}
+
+void helper_fclex(CPUX86State *env)
+{
+    env->fpus &= 0x7f00;
+}
+
+void helper_fwait(CPUX86State *env)
+{
+    if (env->fpus & FPUS_SE)
+        fpu_raise_exception(env);
+}
+
+void helper_fninit(CPUX86State *env)
+{
+    env->fpus = 0;
+    env->fpstt = 0;
+    env->fpuc = 0x37f;
+    env->fptags[0] = 1;
+    env->fptags[1] = 1;
+    env->fptags[2] = 1;
+    env->fptags[3] = 1;
+    env->fptags[4] = 1;
+    env->fptags[5] = 1;
+    env->fptags[6] = 1;
+    env->fptags[7] = 1;
+}
+
+/* BCD ops */
+
+void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
+{
+    floatx80 tmp;
+    uint64_t val;
+    unsigned int v;
+    int i;
+
+    val = 0;
+    for(i = 8; i >= 0; i--) {
+        v = cpu_ldub_data(env, ptr + i);
+        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
+    }
+    tmp = int64_to_floatx80(val, &env->fp_status);
+    if (cpu_ldub_data(env, ptr + 9) & 0x80) {
+        floatx80_chs(tmp);
+    }
+    fpush(env);
+    ST0 = tmp;
+}
+
+void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
+{
+    int v;
+    target_ulong mem_ref, mem_end;
+    int64_t val;
+
+    val = floatx80_to_int64(ST0, &env->fp_status);
+    mem_ref = ptr;
+    mem_end = mem_ref + 9;
+    if (val < 0) {
+        cpu_stb_data(env, mem_end, 0x80);
+        val = -val;
+    } else {
+        cpu_stb_data(env, mem_end, 0x00);
+    }
+    while (mem_ref < mem_end) {
+        if (val == 0)
+            break;
+        v = val % 100;
+        val = val / 100;
+        v = ((v / 10) << 4) | (v % 10);
+        cpu_stb_data(env, mem_ref++, v);
+    }
+    while (mem_ref < mem_end) {
+        cpu_stb_data(env, mem_ref++, 0);
+    }
+}
+
+void helper_f2xm1(CPUX86State *env)
+{
+    double val = floatx80_to_double(env, ST0);
+    val = pow(2.0, val) - 1.0;
+    ST0 = double_to_floatx80(env, val);
+}
+
+void helper_fyl2x(CPUX86State *env)
+{
+    double fptemp = floatx80_to_double(env, ST0);
+
+    if (fptemp>0.0){
+        fptemp = log(fptemp)/log(2.0);   /* log2(ST) */
+        fptemp *= floatx80_to_double(env, ST1);
+        ST1 = double_to_floatx80(env, fptemp);
+        fpop(env);
+    } else {
+        env->fpus &= (~0x4700);
+        env->fpus |= 0x400;
+    }
+}
+
+void helper_fptan(CPUX86State *env)
+{
+    double fptemp = floatx80_to_double(env, ST0);
+
+    if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        fptemp = tan(fptemp);
+        ST0 = double_to_floatx80(env, fptemp);
+        fpush(env);
+        ST0 = floatx80_one;
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg| < 2**52 only */
+    }
+}
+
+void helper_fpatan(CPUX86State *env)
+{
+    double fptemp, fpsrcop;
+
+    fpsrcop = floatx80_to_double(env, ST1);
+    fptemp = floatx80_to_double(env, ST0);
+    ST1 = double_to_floatx80(env, atan2(fpsrcop,fptemp));
+    fpop(env);
+}
+
+void helper_fxtract(CPUX86State *env)
+{
+    CPU_LDoubleU temp;
+    unsigned int expdif;
+
+    temp.d = ST0;
+    expdif = EXPD(temp) - EXPBIAS;
+    /*DP exponent bias*/
+    ST0 = int32_to_floatx80(expdif, &env->fp_status);
+    fpush(env);
+    BIASEXPONENT(temp);
+    ST0 = temp.d;
+}
+
+void helper_fprem1(CPUX86State *env)
+{
+    double st0, st1, dblq, fpsrcop, fptemp;
+    CPU_LDoubleU fpsrcop1, fptemp1;
+    int expdif;
+    signed long long int q;
+
+    st0 = floatx80_to_double(env, ST0);
+    st1 = floatx80_to_double(env, ST1);
+
+    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
+        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
+        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+        return;
+    }
+
+    fpsrcop = st0;
+    fptemp = st1;
+    fpsrcop1.d = ST0;
+    fptemp1.d = ST1;
+    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+
+    if (expdif < 0) {
+        /* optimisation? taken from the AMD docs */
+        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+        /* ST0 is unchanged */
+        return;
+    }
+
+    if (expdif < 53) {
+        dblq = fpsrcop / fptemp;
+        /* round dblq towards nearest integer */
+        dblq = rint(dblq);
+        st0 = fpsrcop - fptemp * dblq;
+
+        /* convert dblq to q by truncating towards zero */
+        if (dblq < 0.0)
+           q = (signed long long int)(-dblq);
+        else
+           q = (signed long long int)dblq;
+
+        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+                                /* (C0,C3,C1) <-- (q2,q1,q0) */
+        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
+        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
+        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
+    } else {
+        env->fpus |= 0x400;  /* C2 <-- 1 */
+        fptemp = pow(2.0, expdif - 50);
+        fpsrcop = (st0 / st1) / fptemp;
+        /* fpsrcop = integer obtained by chopping */
+        fpsrcop = (fpsrcop < 0.0) ?
+                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
+        st0 -= (st1 * fpsrcop * fptemp);
+    }
+    ST0 = double_to_floatx80(env, st0);
+}
+
+void helper_fprem(CPUX86State *env)
+{
+    double st0, st1, dblq, fpsrcop, fptemp;
+    CPU_LDoubleU fpsrcop1, fptemp1;
+    int expdif;
+    signed long long int q;
+
+    st0 = floatx80_to_double(env, ST0);
+    st1 = floatx80_to_double(env, ST1);
+
+    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
+       ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
+       env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+       return;
+    }
+
+    fpsrcop = st0;
+    fptemp = st1;
+    fpsrcop1.d = ST0;
+    fptemp1.d = ST1;
+    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+
+    if (expdif < 0) {
+        /* optimisation? taken from the AMD docs */
+        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
+        /* ST0 is unchanged */
+        return;
+    }
+
+    if (expdif < 53) {
+        dblq = fpsrcop / fptemp; /* ST0 / ST1*/;
+        /* round dblq towards zero */
+        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
+        st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
+
+        /* convert dblq to q by truncating towards zero */
+        if (dblq < 0.0) {
+           q = (signed long long int)(-dblq);
+        } else {
+           q = (signed long long int)dblq;
+        }
+
+        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
+                              /* (C0,C3,C1) <-- (q2,q1,q0) */
+        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
+        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
+        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
+    } else {
+        int N = 32 + (expdif % 32); /* as per AMD docs */
+        env->fpus |= 0x400;  /* C2 <-- 1 */
+        fptemp = pow(2.0, (double)(expdif - N));
+        fpsrcop = (st0 / st1) / fptemp;
+        /* fpsrcop = integer obtained by chopping */
+        fpsrcop = (fpsrcop < 0.0) ?
+                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
+        st0 -= (st1 * fpsrcop * fptemp);
+    }
+    ST0 = double_to_floatx80(env, st0);
+}
+
+void helper_fyl2xp1(CPUX86State *env)
+{
+    double fptemp = floatx80_to_double(env, ST0);
+
+    if ((fptemp+1.0)>0.0) {
+        fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
+        fptemp *= floatx80_to_double(env, ST1);
+        ST1 = double_to_floatx80(env, fptemp);
+        fpop(env);
+    } else {
+        env->fpus &= (~0x4700);
+        env->fpus |= 0x400;
+    }
+}
+
+void helper_fsqrt(CPUX86State *env)
+{
+    double fptemp = floatx80_to_double(env, ST0);
+
+    if (fptemp<0.0) {
+        env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
+        env->fpus |= 0x400;
+    }
+    ST0 = floatx80_sqrt(ST0, &env->fp_status);
+}
+
+void helper_fsincos(CPUX86State *env)
+{
+    double fptemp = floatx80_to_double(env, ST0);
+
+    if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = double_to_floatx80(env, sin(fptemp));
+        fpush(env);
+        ST0 = double_to_floatx80(env, cos(fptemp));
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg| < 2**63 only */
+    }
+}
+
+void helper_frndint(CPUX86State *env)
+{
+    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
+}
+
+void helper_fscale(CPUX86State *env)
+{
+    double st0 = floatx80_to_double(env, ST0);
+    double st1 = floatx80_to_double(env, ST1);
+    double val = ldexp(st0, (int)st1);
+    ST0 = double_to_floatx80(env, val);
+}
+
+void helper_fsin(CPUX86State *env)
+{
+    double fptemp = floatx80_to_double(env, ST0);
+
+    if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = double_to_floatx80(env, sin(fptemp));
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg| < 2**53 only */
+    }
+}
+
+void helper_fcos(CPUX86State *env)
+{
+    double fptemp = floatx80_to_double(env, ST0);
+
+    if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = double_to_floatx80(env, cos(fptemp));
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg5 < 2**63 only */
+    }
+}
+
+void helper_fxam_ST0(CPUX86State *env)
+{
+    CPU_LDoubleU temp;
+    int expdif;
+
+    temp.d = ST0;
+
+    env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
+    if (SIGND(temp))
+        env->fpus |= 0x200; /* C1 <-- 1 */
+
+    /* XXX: test fptags too */
+    expdif = EXPD(temp);
+    if (expdif == MAXEXPD) {
+        if (MANTD(temp) == 0x8000000000000000ULL) {
+            env->fpus |=  0x500 /*Infinity*/;
+        } else {
+            env->fpus |=  0x100 /*NaN*/;
+        }
+    } else if (expdif == 0) {
+        if (MANTD(temp) == 0) {
+            env->fpus |=  0x4000 /*Zero*/;
+        } else {
+            env->fpus |= 0x4400 /*Denormal*/;
+        }
+    } else {
+        env->fpus |= 0x400;
+    }
+}
+
+void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
+{
+    int fpus, fptag, exp, i;
+    uint64_t mant;
+    CPU_LDoubleU tmp;
+
+    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+    fptag = 0;
+    for (i=7; i>=0; i--) {
+        fptag <<= 2;
+        if (env->fptags[i]) {
+            fptag |= 3;
+        } else {
+            tmp.d = env->fpregs[i].d;
+            exp = EXPD(tmp);
+            mant = MANTD(tmp);
+            if (exp == 0 && mant == 0) {
+                /* zero */
+                fptag |= 1;
+            } else if (exp == 0 || exp == MAXEXPD
+                       || (mant & (1LL << 63)) == 0) {
+                /* NaNs, infinity, denormal */
+                fptag |= 2;
+            }
+        }
+    }
+    if (data32) {
+        /* 32 bit */
+        cpu_stl_data(env, ptr, env->fpuc);
+        cpu_stl_data(env, ptr + 4, fpus);
+        cpu_stl_data(env, ptr + 8, fptag);
+        cpu_stl_data(env, ptr + 12, 0); /* fpip */
+        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
+        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
+        cpu_stl_data(env, ptr + 24, 0); /* fpos */
+    } else {
+        /* 16 bit */
+        cpu_stw_data(env, ptr, env->fpuc);
+        cpu_stw_data(env, ptr + 2, fpus);
+        cpu_stw_data(env, ptr + 4, fptag);
+        cpu_stw_data(env, ptr + 6, 0);
+        cpu_stw_data(env, ptr + 8, 0);
+        cpu_stw_data(env, ptr + 10, 0);
+        cpu_stw_data(env, ptr + 12, 0);
+    }
+}
+
+void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
+{
+    int i, fpus, fptag;
+
+    if (data32) {
+        env->fpuc = cpu_lduw_data(env, ptr);
+        fpus = cpu_lduw_data(env, ptr + 4);
+        fptag = cpu_lduw_data(env, ptr + 8);
+    }
+    else {
+        env->fpuc = cpu_lduw_data(env, ptr);
+        fpus = cpu_lduw_data(env, ptr + 2);
+        fptag = cpu_lduw_data(env, ptr + 4);
+    }
+    env->fpstt = (fpus >> 11) & 7;
+    env->fpus = fpus & ~0x3800;
+    for(i = 0;i < 8; i++) {
+        env->fptags[i] = ((fptag & 3) == 3);
+        fptag >>= 2;
+    }
+}
+
+void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
+{
+    floatx80 tmp;
+    int i;
+
+    helper_fstenv(env, ptr, data32);
+
+    ptr += (14 << data32);
+    for(i = 0;i < 8; i++) {
+        tmp = ST(i);
+        helper_fstt(env, tmp, ptr);
+        ptr += 10;
+    }
+
+    /* fninit */
+    env->fpus = 0;
+    env->fpstt = 0;
+    env->fpuc = 0x37f;
+    env->fptags[0] = 1;
+    env->fptags[1] = 1;
+    env->fptags[2] = 1;
+    env->fptags[3] = 1;
+    env->fptags[4] = 1;
+    env->fptags[5] = 1;
+    env->fptags[6] = 1;
+    env->fptags[7] = 1;
+}
+
+void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
+{
+    floatx80 tmp;
+    int i;
+
+    helper_fldenv(env, ptr, data32);
+    ptr += (14 << data32);
+
+    for(i = 0;i < 8; i++) {
+        tmp = helper_fldt(env, ptr);
+        ST(i) = tmp;
+        ptr += 10;
+    }
+}
+
+void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
+{
+    int fpus, fptag, i, nb_xmm_regs;
+    floatx80 tmp;
+    target_ulong addr;
+
+    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+    fptag = 0;
+    for(i = 0; i < 8; i++) {
+        fptag |= (env->fptags[i] << i);
+    }
+    cpu_stw_data(env, ptr, env->fpuc);
+    cpu_stw_data(env, ptr + 2, fpus);
+    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
+#ifdef TARGET_X86_64
+    if (data64) {
+        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
+        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
+    } else
+#endif
+    {
+        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
+        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
+        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
+        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
+    }
+
+    addr = ptr + 0x20;
+    for(i = 0;i < 8; i++) {
+        tmp = ST(i);
+        helper_fstt(env, tmp, addr);
+        addr += 16;
+    }
+
+    if (env->cr[4] & CR4_OSFXSR_MASK) {
+        /* XXX: finish it */
+        cpu_stl_data(env, ptr + 0x18, env->mxcsr); /* mxcsr */
+        cpu_stl_data(env, ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
+        if (env->hflags & HF_CS64_MASK)
+            nb_xmm_regs = 16;
+        else
+            nb_xmm_regs = 8;
+        addr = ptr + 0xa0;
+        /* Fast FXSAVE leaves out the XMM registers */
+        if (!(env->efer & MSR_EFER_FFXSR)
+          || (env->hflags & HF_CPL_MASK)
+          || !(env->hflags & HF_LMA_MASK)) {
+            for(i = 0; i < nb_xmm_regs; i++) {
+                cpu_stq_data(env, addr, env->xmm_regs[i].XMM_Q(0));
+                cpu_stq_data(env, addr + 8, env->xmm_regs[i].XMM_Q(1));
+                addr += 16;
+            }
+        }
+    }
+}
+
+void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
+{
+    int i, fpus, fptag, nb_xmm_regs;
+    floatx80 tmp;
+    target_ulong addr;
+
+    env->fpuc = cpu_lduw_data(env, ptr);
+    fpus = cpu_lduw_data(env, ptr + 2);
+    fptag = cpu_lduw_data(env, ptr + 4);
+    env->fpstt = (fpus >> 11) & 7;
+    env->fpus = fpus & ~0x3800;
+    fptag ^= 0xff;
+    for(i = 0;i < 8; i++) {
+        env->fptags[i] = ((fptag >> i) & 1);
+    }
+
+    addr = ptr + 0x20;
+    for(i = 0;i < 8; i++) {
+        tmp = helper_fldt(env, addr);
+        ST(i) = tmp;
+        addr += 16;
+    }
+
+    if (env->cr[4] & CR4_OSFXSR_MASK) {
+        /* XXX: finish it */
+        env->mxcsr = cpu_ldl_data(env, ptr + 0x18);
+        //ldl(ptr + 0x1c);
+        if (env->hflags & HF_CS64_MASK)
+            nb_xmm_regs = 16;
+        else
+            nb_xmm_regs = 8;
+        addr = ptr + 0xa0;
+        /* Fast FXRESTORE leaves out the XMM registers */
+        if (!(env->efer & MSR_EFER_FFXSR)
+          || (env->hflags & HF_CPL_MASK)
+          || !(env->hflags & HF_LMA_MASK)) {
+            for(i = 0; i < nb_xmm_regs; i++) {
+                env->xmm_regs[i].XMM_Q(0) = cpu_ldq_data(env, addr);
+                env->xmm_regs[i].XMM_Q(1) = cpu_ldq_data(env, addr + 8);
+                addr += 16;
+            }
+        }
+    }
+}
+
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
+{
+    CPU_LDoubleU temp;
+
+    temp.d = f;
+    *pmant = temp.l.lower;
+    *pexp = temp.l.upper;
+}
+
+floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
+{
+    CPU_LDoubleU temp;
+
+    temp.l.upper = upper;
+    temp.l.lower = mant;
+    return temp.d;
+}
+
+/* MMX/SSE */
+/* XXX: optimize by storing fptt and fptags in the static cpu state */
+void helper_enter_mmx(CPUX86State *env)
+{
+    env->fpstt = 0;
+    memset(env->fptags, 0, sizeof(env->fptags));
+}
+
+void helper_emms(CPUX86State *env)
+{
+    /* set to empty state */
+    memset(env->fptags, 1, sizeof(env->fptags));
+}
+
+/* XXX: suppress */
+void helper_movq(CPUX86State *env, void *d, void *s)
+{
+    *(uint64_t *)d = *(uint64_t *)s;
+}
+
+#define SHIFT 0
+#include "ops_sse.h"
+
+#define SHIFT 1
+#include "ops_sse.h"
diff --git a/target-i386/hax-all.c b/target-i386/hax-all.c
index c41a874..05f01ed 100644
--- a/target-i386/hax-all.c
+++ b/target-i386/hax-all.c
@@ -40,8 +40,9 @@
 }
 
 /* Currently non-PG modes are emulated by QEMU */
-int hax_vcpu_emulation_mode(CPUX86State *env)
+int hax_vcpu_emulation_mode(CPUState *cpu)
 {
+    CPUX86State *env = cpu->env_ptr;
     return !(env->cr[0] & CR0_PG_MASK);
 }
 
@@ -51,7 +52,7 @@
     tlb_flush(env, 1);
     tb_flush(env);
     /* Sync the vcpu state from hax kernel module */
-    hax_vcpu_sync_state(env, 0);
+    hax_vcpu_sync_state(ENV_GET_CPU(env), 0);
     return 0;
 }
 
@@ -59,47 +60,47 @@
  * Check whether to break the translation block loop
  * Break tbloop after one MMIO emulation, or after finish emulation mode
  */
-static int hax_stop_tbloop(CPUX86State *env)
+static int hax_stop_tbloop(CPUState *cpu)
 {
-    switch (env->hax_vcpu->emulation_state)
+    switch (cpu->hax_vcpu->emulation_state)
     {
         case HAX_EMULATE_STATE_MMIO:
             return 1;
         case HAX_EMULATE_STATE_INITIAL:
         case HAX_EMULATE_STATE_REAL:
-            if (!hax_vcpu_emulation_mode(env))
+            if (!hax_vcpu_emulation_mode(cpu))
                 return 1;
             break;
         default:
             dprint("Invalid emulation state in hax_sto_tbloop state %x\n",
-              env->hax_vcpu->emulation_state);
+              cpu->hax_vcpu->emulation_state);
             break;
     }
 
     return 0;
 }
 
-int hax_stop_emulation(CPUX86State *env)
+int hax_stop_emulation(CPUState *cpu)
 {
-    if (hax_stop_tbloop(env))
+    if (hax_stop_tbloop(cpu))
     {
-        env->hax_vcpu->emulation_state =  HAX_EMULATE_STATE_NONE;
+        cpu->hax_vcpu->emulation_state =  HAX_EMULATE_STATE_NONE;
         /*
          * QEMU emulation changes vcpu state,
          * Sync the vcpu state to HAX kernel module
          */
-        hax_vcpu_sync_state(env, 1);
+        hax_vcpu_sync_state(cpu, 1);
         return 1;
     }
 
     return 0;
 }
 
-int hax_stop_translate(CPUX86State *env)
+int hax_stop_translate(CPUState *cpu)
 {
     struct hax_vcpu_state *vstate;
 
-    vstate = env->hax_vcpu;
+    vstate = cpu->hax_vcpu;
     assert(vstate->emulation_state);
     if (vstate->emulation_state == HAX_EMULATE_STATE_MMIO )
         return 1;
@@ -112,9 +113,9 @@
     return size >= sizeof(struct hax_tunnel);
 }
 
-hax_fd hax_vcpu_get_fd(CPUX86State *env)
+hax_fd hax_vcpu_get_fd(CPUState *cpu)
 {
-    struct hax_vcpu_state *vcpu = env->hax_vcpu;
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
     if (!vcpu)
         return HAX_INVALID_FD;
     return vcpu->fd;
@@ -234,9 +235,9 @@
     return -1;
 }
 
-int hax_vcpu_destroy(CPUX86State *env)
+int hax_vcpu_destroy(CPUState *cpu)
 {
-    struct hax_vcpu_state *vcpu = env->hax_vcpu;
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 
     if (!hax_global.vm)
     {
@@ -257,19 +258,19 @@
     return 0;
 }
 
-int hax_init_vcpu(CPUX86State *env)
+int hax_init_vcpu(CPUState *cpu)
 {
     int ret;
 
-    ret = hax_vcpu_create(env->cpu_index);
+    ret = hax_vcpu_create(cpu->cpu_index);
     if (ret < 0)
     {
         dprint("Failed to create HAX vcpu\n");
         exit(-1);
     }
 
-    env->hax_vcpu = hax_global.vm->vcpus[env->cpu_index];
-    env->hax_vcpu->emulation_state = HAX_EMULATE_STATE_INITIAL;
+    cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
+    cpu->hax_vcpu->emulation_state = HAX_EMULATE_STATE_INITIAL;
 
     return ret;
 }
@@ -467,24 +468,25 @@
     return 0;
 }
 
-static int hax_vcpu_interrupt(CPUX86State *env)
+static int hax_vcpu_interrupt(CPUState *cpu)
 {
-    struct hax_vcpu_state *vcpu = env->hax_vcpu;
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
     struct hax_tunnel *ht = vcpu->tunnel;
+    CPUX86State *env = cpu->env_ptr;
 
     /*
      * Try to inject an interrupt if the guest can accept it
      * Unlike KVM, the HAX kernel module checks the eflags, instead.
      */
     if (ht->ready_for_interrupt_injection &&
-      (env->interrupt_request & CPU_INTERRUPT_HARD))
+      (cpu->interrupt_request & CPU_INTERRUPT_HARD))
     {
         int irq;
 
-        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+        cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
         irq = cpu_get_pic_interrupt(env);
         if (irq >= 0) {
-            hax_inject_interrupt(env, irq);
+            hax_inject_interrupt(cpu, irq);
         }
     }
 
@@ -494,16 +496,16 @@
      * a return to userspace as soon as the guest is ready to receive
      * an interrupt.
      */
-    if ((env->interrupt_request & CPU_INTERRUPT_HARD))
+    if ((cpu->interrupt_request & CPU_INTERRUPT_HARD))
         ht->request_interrupt_window = 1;
     else
         ht->request_interrupt_window = 0;
     return 0;
 }
 
-void hax_raise_event(CPUX86State *env)
+void hax_raise_event(CPUState *cpu)
 {
-    struct hax_vcpu_state *vcpu = env->hax_vcpu;
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 
     if (!vcpu)
         return;
@@ -521,13 +523,14 @@
  * 5. An unknown VMX-exit happens
  */
 extern void qemu_system_reset_request(void);
-static int hax_vcpu_hax_exec(CPUX86State *env)
+static int hax_vcpu_hax_exec(CPUState *cpu)
 {
     int ret = 0;
-    struct hax_vcpu_state *vcpu = env->hax_vcpu;
+    CPUX86State *env = cpu->env_ptr;
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
     struct hax_tunnel *ht = vcpu->tunnel;
 
-    if (hax_vcpu_emulation_mode(env))
+    if (hax_vcpu_emulation_mode(cpu))
     {
         dprint("Trying to vcpu execute at eip:%lx\n", env->eip);
         return  HAX_EMUL_EXITLOOP;
@@ -536,12 +539,12 @@
     do {
         int hax_ret;
 
-        if (env->exit_request) {
+        if (cpu->exit_request) {
             ret = HAX_EMUL_EXITLOOP ;
             break;
         }
 
-        hax_vcpu_interrupt(env);
+        hax_vcpu_interrupt(cpu);
 
         hax_ret = hax_vcpu_run(vcpu);
 
@@ -580,22 +583,22 @@
                 dprint("VCPU shutdown request\n");
                 qemu_system_reset_request();
                 hax_prepare_emulation(env);
-                cpu_dump_state(env, stderr, fprintf, 0);
+                cpu_dump_state(cpu, stderr, fprintf, 0);
                 ret = HAX_EMUL_EXITLOOP;
                 break;
             case HAX_EXIT_UNKNOWN_VMEXIT:
                 dprint("Unknown VMX exit %x from guest\n", ht->_exit_reason);
                 qemu_system_reset_request();
                 hax_prepare_emulation(env);
-                cpu_dump_state(env, stderr, fprintf, 0);
+                cpu_dump_state(cpu, stderr, fprintf, 0);
                 ret = HAX_EMUL_EXITLOOP;
                 break;
             case HAX_EXIT_HLT:
-                if (!(env->interrupt_request & CPU_INTERRUPT_HARD) &&
-                  !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
+                if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
+                  !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
                     /* hlt instruction with interrupt disabled is shutdown */
                     env->eflags |= IF_MASK;
-                    env->halted = 1;
+                    cpu->halted = 1;
                     env->exception_index = EXCP_HLT;
                     ret = HAX_EMUL_HLT;
                 }
@@ -608,14 +611,14 @@
                 dprint("Unknow exit %x from hax\n", ht->_exit_status);
                 qemu_system_reset_request();
                 hax_prepare_emulation(env);
-                cpu_dump_state(env, stderr, fprintf, 0);
+                cpu_dump_state(cpu, stderr, fprintf, 0);
                 ret = HAX_EMUL_EXITLOOP;
                 break;
         }
     }while (!ret);
 
-    if (env->exit_request) {
-        env->exit_request = 0;
+    if (cpu->exit_request) {
+        cpu->exit_request = 0;
         env->exception_index = EXCP_INTERRUPT;
     }
     return ret;
@@ -624,27 +627,27 @@
 /*
  * return 1 when need to emulate, 0 when need to exit loop
  */
-int hax_vcpu_exec(CPUX86State *env)
+int hax_vcpu_exec(CPUState *cpu)
 {
     int next = 0, ret = 0;
     struct hax_vcpu_state *vcpu;
+    CPUX86State *env = cpu->env_ptr;
 
-    if (env->hax_vcpu->emulation_state != HAX_EMULATE_STATE_NONE)
+    if (cpu->hax_vcpu->emulation_state != HAX_EMULATE_STATE_NONE)
         return 1;
 
-    vcpu = env->hax_vcpu;
-    next = hax_vcpu_hax_exec(env);
+    vcpu = cpu->hax_vcpu;
+    next = hax_vcpu_hax_exec(cpu);
     switch (next)
     {
         case HAX_EMUL_ONE:
             ret = 1;
-            env->hax_vcpu->emulation_state = HAX_EMULATE_STATE_MMIO;
+            vcpu->emulation_state = HAX_EMULATE_STATE_MMIO;
             hax_prepare_emulation(env);
             break;
         case HAX_EMUL_REAL:
             ret = 1;
-            env->hax_vcpu->emulation_state =
-              HAX_EMULATE_STATE_REAL;
+            vcpu->emulation_state = HAX_EMULATE_STATE_REAL;
             hax_prepare_emulation(env);
             break;
         case HAX_EMUL_HLT:
@@ -824,13 +827,14 @@
 
 static int hax_sync_vcpu_register(CPUX86State *env, int set)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
     struct vcpu_state_t regs;
     int ret;
     memset(&regs, 0, sizeof(struct vcpu_state_t));
 
     if (!set)
     {
-        ret = hax_sync_vcpu_state(env, &regs, 0);
+        ret = hax_sync_vcpu_state(cpu, &regs, 0);
         if (ret < 0)
             return -1;
     }
@@ -844,7 +848,16 @@
     hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
     hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
     hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
-
+#ifdef TARGET_X86_64
+    hax_getput_reg(&regs._r8, &env->regs[8], set);
+    hax_getput_reg(&regs._r9, &env->regs[9], set);
+    hax_getput_reg(&regs._r10, &env->regs[10], set);
+    hax_getput_reg(&regs._r11, &env->regs[11], set);
+    hax_getput_reg(&regs._r12, &env->regs[12], set);
+    hax_getput_reg(&regs._r13, &env->regs[13], set);
+    hax_getput_reg(&regs._r14, &env->regs[14], set);
+    hax_getput_reg(&regs._r15, &env->regs[15], set);
+#endif
     hax_getput_reg(&regs._rflags, &env->eflags, set);
     hax_getput_reg(&regs._rip, &env->eip, set);
 
@@ -868,7 +881,7 @@
 
     if (set)
     {
-        ret = hax_sync_vcpu_state(env, &regs, 1);
+        ret = hax_sync_vcpu_state(cpu, &regs, 1);
         if (ret < 0)
             return -1;
     }
@@ -895,8 +908,16 @@
     msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
     msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
     msrs[n++].entry = MSR_IA32_TSC;
+#ifdef TARGET_X86_64
+    msrs[n++].entry = MSR_STAR;
+    msrs[n++].entry = MSR_LSTAR;
+    msrs[n++].entry = MSR_CSTAR;
+    msrs[n++].entry = MSR_FMASK;
+    msrs[n++].entry = MSR_KERNELGSBASE;
+#endif
+
     md.nr_msr = n;
-    ret = hax_sync_msr(env, &md, 0);
+    ret = hax_sync_msr(ENV_GET_CPU(env), &md, 0);
     if (ret < 0)
         return ret;
 
@@ -914,6 +935,23 @@
             case MSR_IA32_TSC:
                 env->tsc = msrs[i].value;
                 break;
+#ifdef TARGET_X86_64
+            case MSR_STAR:
+                env->star = msrs[i].value;
+                break;
+            case MSR_LSTAR:
+                env->lstar = msrs[i].value;
+                break;
+            case MSR_CSTAR:
+                env->cstar = msrs[i].value;
+                break;
+            case MSR_FMASK:
+                env->fmask = msrs[i].value;
+                break;
+            case MSR_KERNELGSBASE:
+                env->kernelgsbase = msrs[i].value;
+                break;
+#endif
         }
     }
 
@@ -932,10 +970,19 @@
     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
     hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
+#ifdef TARGET_X86_64
+    hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
+    hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
+    hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
+    hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
+    hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
+    hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
+#endif
+
     md.nr_msr = n;
     md.done = 0;
 
-    return hax_sync_msr(env, &md, 1);
+    return hax_sync_msr(ENV_GET_CPU(env), &md, 1);
 
 }
 
@@ -944,7 +991,7 @@
     struct fx_layout fpu;
     int i, ret;
 
-    ret = hax_sync_fpu(env, &fpu, 0);
+    ret = hax_sync_fpu(ENV_GET_CPU(env), &fpu, 0);
     if (ret < 0)
         return ret;
 
@@ -981,11 +1028,12 @@
 
     fpu.mxcsr = env->mxcsr;
 
-    return hax_sync_fpu(env, &fpu, 1);
+    return hax_sync_fpu(ENV_GET_CPU(env), &fpu, 1);
 }
 
-int hax_arch_get_registers(CPUX86State *env)
+int hax_arch_get_registers(CPUState *cpu)
 {
+    CPUX86State *env = cpu->env_ptr;
     int ret;
 
     ret = hax_sync_vcpu_register(env, 0);
@@ -1003,9 +1051,10 @@
     return 0;
 }
 
-static int hax_arch_set_registers(CPUX86State *env)
+static int hax_arch_set_registers(CPUState *cpu)
 {
     int ret;
+    CPUX86State *env = cpu->env_ptr;
     ret = hax_sync_vcpu_register(env, 1);
 
     if (ret < 0)
@@ -1029,13 +1078,13 @@
     return 0;
 }
 
-void hax_vcpu_sync_state(CPUX86State *env, int modified)
+void hax_vcpu_sync_state(CPUState *cpu, int modified)
 {
     if (hax_enabled()) {
         if (modified)
-            hax_arch_set_registers(env);
+            hax_arch_set_registers(cpu);
         else
-            hax_arch_get_registers(env);
+            hax_arch_get_registers(cpu);
     }
 }
 
@@ -1045,20 +1094,12 @@
  */
 int hax_sync_vcpus(void)
 {
-    if (hax_enabled())
-    {
-        CPUX86State *env;
+    if (hax_enabled()) {
+        CPUState *cpu;
 
-        env = first_cpu;
-        if (!env)
-            return 0;
-
-        for (; env != NULL; env = env->next_cpu) {
-            int ret;
-
-            ret = hax_arch_set_registers(env);
-            if (ret < 0)
-            {
+        CPU_FOREACH(cpu) {
+            int ret = hax_arch_set_registers(cpu);
+            if (ret < 0) {
                 dprint("Failed to sync HAX vcpu context\n");
                 exit(1);
             }
@@ -1070,14 +1111,12 @@
 
 void hax_reset_vcpu_state(void *opaque)
 {
-    CPUX86State *env;
-    for (env = first_cpu; env != NULL; env = env->next_cpu)
-    {
-        if (env->hax_vcpu)
-        {
-            env->hax_vcpu->emulation_state  = HAX_EMULATE_STATE_INITIAL;
-            env->hax_vcpu->tunnel->user_event_pending = 0;
-            env->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
+    CPUState *cpu;
+    CPU_FOREACH(cpu) {
+        if (cpu->hax_vcpu) {
+            cpu->hax_vcpu->emulation_state  = HAX_EMULATE_STATE_INITIAL;
+            cpu->hax_vcpu->tunnel->user_event_pending = 0;
+            cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
         }
     }
 }
diff --git a/target-i386/hax-darwin.c b/target-i386/hax-darwin.c
index c5fd741..3e85f1f 100644
--- a/target-i386/hax-darwin.c
+++ b/target-i386/hax-darwin.c
@@ -73,7 +73,7 @@
 
     info.pa_start = start_addr;
     info.size = size;
-    info.va = (uint64_t)qemu_get_ram_ptr(phys_offset);
+    info.va = (uint64_t)(uintptr_t)qemu_get_ram_ptr(phys_offset);
     info.flags = (flags & IO_MEM_ROM) ? 1 : 0;
 
     ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_SET_RAM, pinfo);
@@ -252,8 +252,8 @@
         return ret;
     }
 
-    vcpu->tunnel = (struct hax_tunnel *)(info.va);
-    vcpu->iobuf = (unsigned char *)(info.io_va);
+    vcpu->tunnel = (struct hax_tunnel *)(uintptr_t)(info.va);
+    vcpu->iobuf = (unsigned char *)(uintptr_t)(info.io_va);
     return 0;
 }
 
@@ -265,11 +265,11 @@
     return ret;
 }
 
-int hax_sync_fpu(CPUX86State *env, struct fx_layout *fl, int set)
+int hax_sync_fpu(CPUState *cpu, struct fx_layout *fl, int set)
 {
     int ret, fd;
 
-    fd = hax_vcpu_get_fd(env);
+    fd = hax_vcpu_get_fd(cpu);
     if (fd <= 0)
         return -1;
 
@@ -280,11 +280,11 @@
     return ret;
 }
 
-int hax_sync_msr(CPUX86State *env, struct hax_msr_data *msrs, int set)
+int hax_sync_msr(CPUState *cpu, struct hax_msr_data *msrs, int set)
 {
     int ret, fd;
 
-    fd = hax_vcpu_get_fd(env);
+    fd = hax_vcpu_get_fd(cpu);
     if (fd <= 0)
         return -1;
     if (set)
@@ -294,11 +294,11 @@
     return ret;
 }
 
-int hax_sync_vcpu_state(CPUX86State *env, struct vcpu_state_t *state, int set)
+int hax_sync_vcpu_state(CPUState *cpu, struct vcpu_state_t *state, int set)
 {
     int ret, fd;
 
-    fd = hax_vcpu_get_fd(env);
+    fd = hax_vcpu_get_fd(cpu);
     if (fd <= 0)
         return -1;
 
@@ -309,11 +309,11 @@
     return ret;
 }
 
-int hax_inject_interrupt(CPUX86State *env, int vector)
+int hax_inject_interrupt(CPUState *cpu, int vector)
 {
     int ret, fd;
 
-    fd = hax_vcpu_get_fd(env);
+    fd = hax_vcpu_get_fd(cpu);
     if (fd <= 0)
         return -1;
 
diff --git a/target-i386/hax-i386.h b/target-i386/hax-i386.h
index 3dd91a0..6feafd8 100644
--- a/target-i386/hax-i386.h
+++ b/target-i386/hax-i386.h
@@ -55,18 +55,18 @@
 };
 
 /* Functions exported to host specific mode */
-hax_fd hax_vcpu_get_fd(CPUX86State *env);
+hax_fd hax_vcpu_get_fd(CPUState *cpu);
 int valid_hax_tunnel_size(uint16_t size);
 
 /* Host specific functions */
 int hax_mod_version(struct hax_state *hax, struct hax_module_version *version);
-int hax_inject_interrupt(CPUX86State *env, int vector);
+int hax_inject_interrupt(CPUState *cpu, int vector);
 struct hax_vm *hax_vm_create(struct hax_state *hax);
 int hax_vcpu_run(struct hax_vcpu_state *vcpu);
 int hax_vcpu_create(int id);
-int hax_sync_vcpu_state(CPUX86State *env, struct vcpu_state_t *state, int set);
-int hax_sync_msr(CPUX86State *env, struct hax_msr_data *msrs, int set);
-int hax_sync_fpu(CPUX86State *env, struct fx_layout *fl, int set);
+int hax_sync_vcpu_state(CPUState *cpu, struct vcpu_state_t *state, int set);
+int hax_sync_msr(CPUState *cpu, struct hax_msr_data *msrs, int set);
+int hax_sync_fpu(CPUState *cpu, struct fx_layout *fl, int set);
 int hax_vm_destroy(struct hax_vm *vm);
 int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap);
 int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion);
diff --git a/target-i386/hax-windows.c b/target-i386/hax-windows.c
index 46d6bf6..5db687b 100644
--- a/target-i386/hax-windows.c
+++ b/target-i386/hax-windows.c
@@ -87,8 +87,9 @@
       (LPOVERLAPPED) NULL);
 
     if (!ret) {
-        dprint("Failed to allocate %x memory\n", size);
-        return ret;
+        dprint("HAX: Failed to allocate %x memory (address %llx)\n", 
+               size, (unsigned long long)va);
+        return -1;
     }
 
     return 0;
@@ -398,14 +399,14 @@
         return 0;
 }
 
-int hax_sync_fpu(CPUX86State *env, struct fx_layout *fl, int set)
+int hax_sync_fpu(CPUState *cpu, struct fx_layout *fl, int set)
 {
     int ret;
     hax_fd fd;
     HANDLE hDeviceVCPU;
     DWORD dSize = 0;
 
-    fd = hax_vcpu_get_fd(env);
+    fd = hax_vcpu_get_fd(cpu);
     if (hax_invalid_fd(fd))
         return -1;
 
@@ -431,14 +432,14 @@
         return 0;
 }
 
-int hax_sync_msr(CPUX86State *env, struct hax_msr_data *msrs, int set)
+int hax_sync_msr(CPUState *cpu, struct hax_msr_data *msrs, int set)
 {
     int ret;
     hax_fd fd;
     HANDLE hDeviceVCPU;
     DWORD dSize = 0;
 
-    fd = hax_vcpu_get_fd(env);
+    fd = hax_vcpu_get_fd(cpu);
     if (hax_invalid_fd(fd))
         return -1;
     hDeviceVCPU = fd;
@@ -463,14 +464,14 @@
         return 0;
 }
 
-int hax_sync_vcpu_state(CPUX86State *env, struct vcpu_state_t *state, int set)
+int hax_sync_vcpu_state(CPUState *cpu, struct vcpu_state_t *state, int set)
 {
     int ret;
     hax_fd fd;
     HANDLE hDeviceVCPU;
     DWORD dSize;
 
-    fd = hax_vcpu_get_fd(env);
+    fd = hax_vcpu_get_fd(cpu);
     if (hax_invalid_fd(fd))
         return -1;
 
@@ -496,14 +497,14 @@
         return 0;
 }
 
-int hax_inject_interrupt(CPUX86State *env, int vector)
+int hax_inject_interrupt(CPUState *cpu, int vector)
 {
     int ret;
     hax_fd fd;
     HANDLE hDeviceVCPU;
     DWORD dSize;
 
-    fd = hax_vcpu_get_fd(env);
+    fd = hax_vcpu_get_fd(cpu);
     if (hax_invalid_fd(fd))
         return -1;
 
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 29af579..3e25e3a 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -25,7 +25,6 @@
 #include <signal.h>
 
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "qemu-common.h"
 #include "sysemu/kvm.h"
 #include "exec/hax.h"
@@ -463,13 +462,14 @@
 }
 
 /* NOTE: must be called outside the CPU execute loop */
-void cpu_reset(CPUX86State *env)
+void cpu_reset(CPUState *cpu)
 {
+    CPUX86State *env = cpu->env_ptr;
     int i;
 
     if (qemu_loglevel_mask(CPU_LOG_RESET)) {
-        qemu_log("CPU Reset (CPU %d)\n", env->cpu_index);
-        log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
+        qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index);
+        log_cpu_state(cpu, X86_DUMP_FPU | X86_DUMP_CCOP);
     }
 
     memset(env, 0, offsetof(CPUX86State, breakpoints));
@@ -652,20 +652,21 @@
     cpu_fprintf(f, "\n");
 }
 
-void cpu_dump_state(CPUX86State *env, FILE *f,
+void cpu_dump_state(CPUState *cpu, FILE *f,
                     int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
                     int flags)
 {
+    CPUX86State *env = cpu->env_ptr;
     int eflags, i, nb;
     char cc_op_name[32];
     static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" };
 
     if (kvm_enabled())
-        kvm_arch_get_registers(env);
+        kvm_arch_get_registers(cpu);
 
 #ifdef CONFIG_HAX
     if (hax_enabled())
-        hax_arch_get_registers(env);
+        hax_arch_get_registers(cpu);
 #endif
 
     eflags = env->eflags;
@@ -705,7 +706,7 @@
                     (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1,
                     (int)(env->a20_mask >> 20) & 1,
                     (env->hflags >> HF_SMM_SHIFT) & 1,
-                    env->halted);
+                    cpu->halted);
     } else
 #endif
     {
@@ -732,7 +733,7 @@
                     (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1,
                     (int)(env->a20_mask >> 20) & 1,
                     (env->hflags >> HF_SMM_SHIFT) & 1,
-                    env->halted);
+                    cpu->halted);
     }
 
     for(i = 0; i < 6; i++) {
@@ -843,7 +844,7 @@
 #endif
         /* if the cpu is currently executing code, we must unlink it and
            all the potentially executing TB */
-        cpu_interrupt(env, CPU_INTERRUPT_EXITTB);
+        cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_EXITTB);
 
         /* when a20 is changed, all the MMU mappings are invalid, so
            we must flush everything */
@@ -929,7 +930,7 @@
 #if defined(CONFIG_USER_ONLY)
 
 int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
-                             int is_write, int mmu_idx, int is_softmmu)
+                             int is_write, int mmu_idx)
 {
     /* user mode only emulation */
     is_write &= 1;
@@ -959,14 +960,13 @@
    -1 = cannot handle fault
    0  = nothing more to do
    1  = generate PF fault
-   2  = soft MMU activation required for this block
 */
 int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
-                             int is_write1, int mmu_idx, int is_softmmu)
+                             int is_write1, int mmu_idx)
 {
     uint64_t ptep, pte;
     target_ulong pde_addr, pte_addr;
-    int error_code, is_dirty, prot, page_size, ret, is_write, is_user;
+    int error_code, is_dirty, prot, page_size, is_write, is_user;
     hwaddr paddr;
     uint32_t page_offset;
     target_ulong vaddr, virt_addr;
@@ -1227,8 +1227,8 @@
     paddr = (pte & TARGET_PAGE_MASK) + page_offset;
     vaddr = virt_addr + page_offset;
 
-    ret = tlb_set_page_exec(env, vaddr, paddr, prot, mmu_idx, is_softmmu);
-    return ret;
+    tlb_set_page(env, vaddr, paddr, prot, mmu_idx, page_size);
+    return 0;
  do_fault_protect:
     error_code = PG_ERROR_P_MASK;
  do_fault:
@@ -1414,10 +1414,6 @@
     return hit_enabled;
 }
 
-static CPUDebugExcpHandler *prev_debug_excp_handler;
-
-void raise_exception(int exception_index);
-
 static void breakpoint_handler(CPUX86State *env)
 {
     CPUBreakpoint *bp;
@@ -1426,7 +1422,7 @@
         if (env->watchpoint_hit->flags & BP_CPU) {
             env->watchpoint_hit = NULL;
             if (check_hw_breakpoints(env, 0))
-                raise_exception(EXCP01_DB);
+                raise_exception(env, EXCP01_DB);
             else
                 cpu_resume_from_signal(env, NULL);
         }
@@ -1435,13 +1431,11 @@
             if (bp->pc == env->eip) {
                 if (bp->flags & BP_CPU) {
                     check_hw_breakpoints(env, 1);
-                    raise_exception(EXCP01_DB);
+                    raise_exception(env, EXCP01_DB);
                 }
                 break;
             }
     }
-    if (prev_debug_excp_handler)
-        prev_debug_excp_handler(env);
 }
 
 
@@ -1487,7 +1481,7 @@
         banks[3] = misc;
         cenv->mcg_status = mcg_status;
         banks[1] = status;
-        cpu_interrupt(cenv, CPU_INTERRUPT_MCE);
+        cpu_interrupt(ENV_GET_CPU(cenv), CPU_INTERRUPT_MCE);
     } else if (!(banks[1] & MCI_STATUS_VAL)
                || !(banks[1] & MCI_STATUS_UC)) {
         if (banks[1] & MCI_STATUS_VAL)
@@ -1752,20 +1746,25 @@
 
 CPUX86State *cpu_x86_init(const char *cpu_model)
 {
+    X86CPU *x86_cpu;
     CPUX86State *env;
     static int inited;
 
-    env = g_malloc0(sizeof(CPUX86State));
+    x86_cpu = g_malloc0(sizeof(X86CPU));
+    env = &x86_cpu->env;
+    ENV_GET_CPU(env)->env_ptr = env;
+    CPUState *cpu = ENV_GET_CPU(env);
+
     cpu_exec_init(env);
-    env->cpu_model_str = cpu_model;
+    cpu->cpu_model_str = cpu_model;
+
 
     /* init various static tables */
     if (!inited) {
         inited = 1;
         optimize_flags_init();
 #ifndef CONFIG_USER_ONLY
-        prev_debug_excp_handler =
-            cpu_set_debug_excp_handler(breakpoint_handler);
+        cpu_set_debug_excp_handler(breakpoint_handler);
 #endif
     }
     if (cpu_x86_register(env, cpu_model) < 0) {
@@ -1773,22 +1772,22 @@
         return NULL;
     }
     mce_init(env);
-    cpu_reset(env);
+    cpu_reset(cpu);
 
-    qemu_init_vcpu(env);
+    qemu_init_vcpu(cpu);
 
     if (kvm_enabled()) {
         kvm_trim_features(&env->cpuid_features,
-                          kvm_arch_get_supported_cpuid(env, 1, R_EDX),
+                          kvm_arch_get_supported_cpuid(cpu, 1, R_EDX),
                           feature_name);
         kvm_trim_features(&env->cpuid_ext_features,
-                          kvm_arch_get_supported_cpuid(env, 1, R_ECX),
+                          kvm_arch_get_supported_cpuid(cpu, 1, R_ECX),
                           ext_feature_name);
         kvm_trim_features(&env->cpuid_ext2_features,
-                          kvm_arch_get_supported_cpuid(env, 0x80000001, R_EDX),
+                          kvm_arch_get_supported_cpuid(cpu, 0x80000001, R_EDX),
                           ext2_feature_name);
         kvm_trim_features(&env->cpuid_ext3_features,
-                          kvm_arch_get_supported_cpuid(env, 0x80000001, R_ECX),
+                          kvm_arch_get_supported_cpuid(cpu, 0x80000001, R_ECX),
                           ext3_feature_name);
     }
 
@@ -1798,9 +1797,10 @@
 #if !defined(CONFIG_USER_ONLY)
 void do_cpu_init(CPUX86State *env)
 {
-    int sipi = env->interrupt_request & CPU_INTERRUPT_SIPI;
-    cpu_reset(env);
-    env->interrupt_request = sipi;
+    CPUState *cpu = ENV_GET_CPU(env);
+    int sipi = cpu->interrupt_request & CPU_INTERRUPT_SIPI;
+    cpu_reset(cpu);
+    cpu->interrupt_request = sipi;
     apic_init_reset(env);
 }
 
diff --git a/target-i386/helper.h b/target-i386/helper.h
index d03e00e..24f9e20 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -1,92 +1,92 @@
 #include "exec/def-helper.h"
 
-DEF_HELPER_FLAGS_1(cc_compute_all, TCG_CALL_PURE, i32, int)
-DEF_HELPER_FLAGS_1(cc_compute_c, TCG_CALL_PURE, i32, int)
+DEF_HELPER_FLAGS_2(cc_compute_all, TCG_CALL_NO_SE, i32, env, int)
+DEF_HELPER_FLAGS_2(cc_compute_c, TCG_CALL_NO_SE, i32, env, int)
 
 DEF_HELPER_0(lock, void)
 DEF_HELPER_0(unlock, void)
-DEF_HELPER_2(write_eflags, void, tl, i32)
-DEF_HELPER_0(read_eflags, tl)
-DEF_HELPER_1(divb_AL, void, tl)
-DEF_HELPER_1(idivb_AL, void, tl)
-DEF_HELPER_1(divw_AX, void, tl)
-DEF_HELPER_1(idivw_AX, void, tl)
-DEF_HELPER_1(divl_EAX, void, tl)
-DEF_HELPER_1(idivl_EAX, void, tl)
+DEF_HELPER_3(write_eflags, void, env, tl, i32)
+DEF_HELPER_1(read_eflags, tl, env)
+DEF_HELPER_2(divb_AL, void, env, tl)
+DEF_HELPER_2(idivb_AL, void, env, tl)
+DEF_HELPER_2(divw_AX, void, env, tl)
+DEF_HELPER_2(idivw_AX, void, env, tl)
+DEF_HELPER_2(divl_EAX, void, env, tl)
+DEF_HELPER_2(idivl_EAX, void, env, tl)
 #ifdef TARGET_X86_64
-DEF_HELPER_1(mulq_EAX_T0, void, tl)
-DEF_HELPER_1(imulq_EAX_T0, void, tl)
-DEF_HELPER_2(imulq_T0_T1, tl, tl, tl)
-DEF_HELPER_1(divq_EAX, void, tl)
-DEF_HELPER_1(idivq_EAX, void, tl)
+DEF_HELPER_2(mulq_EAX_T0, void, env, tl)
+DEF_HELPER_2(imulq_EAX_T0, void, env, tl)
+DEF_HELPER_3(imulq_T0_T1, tl, env, tl, tl)
+DEF_HELPER_2(divq_EAX, void, env, tl)
+DEF_HELPER_2(idivq_EAX, void, env, tl)
 #endif
 
-DEF_HELPER_1(aam, void, int)
-DEF_HELPER_1(aad, void, int)
-DEF_HELPER_0(aaa, void)
-DEF_HELPER_0(aas, void)
-DEF_HELPER_0(daa, void)
-DEF_HELPER_0(das, void)
+DEF_HELPER_2(aam, void, env, int)
+DEF_HELPER_2(aad, void, env, int)
+DEF_HELPER_1(aaa, void, env)
+DEF_HELPER_1(aas, void, env)
+DEF_HELPER_1(daa, void, env)
+DEF_HELPER_1(das, void, env)
 
-DEF_HELPER_1(lsl, tl, tl)
-DEF_HELPER_1(lar, tl, tl)
-DEF_HELPER_1(verr, void, tl)
-DEF_HELPER_1(verw, void, tl)
-DEF_HELPER_1(lldt, void, int)
-DEF_HELPER_1(ltr, void, int)
-DEF_HELPER_2(load_seg, void, int, int)
-DEF_HELPER_3(ljmp_protected, void, int, tl, int)
-DEF_HELPER_4(lcall_real, void, int, tl, int, int)
-DEF_HELPER_4(lcall_protected, void, int, tl, int, int)
-DEF_HELPER_1(iret_real, void, int)
-DEF_HELPER_2(iret_protected, void, int, int)
-DEF_HELPER_2(lret_protected, void, int, int)
-DEF_HELPER_1(read_crN, tl, int)
-DEF_HELPER_2(write_crN, void, int, tl)
-DEF_HELPER_1(lmsw, void, tl)
-DEF_HELPER_0(clts, void)
-DEF_HELPER_2(movl_drN_T0, void, int, tl)
-DEF_HELPER_1(invlpg, void, tl)
+DEF_HELPER_2(lsl, tl, env, tl)
+DEF_HELPER_2(lar, tl, env, tl)
+DEF_HELPER_2(verr, void, env, tl)
+DEF_HELPER_2(verw, void, env, tl)
+DEF_HELPER_2(lldt, void, env, int)
+DEF_HELPER_2(ltr, void, env, int)
+DEF_HELPER_3(load_seg, void, env, int, int)
+DEF_HELPER_4(ljmp_protected, void, env, int, tl, int)
+DEF_HELPER_5(lcall_real, void, env, int, tl, int, int)
+DEF_HELPER_5(lcall_protected, void, env, int, tl, int, int)
+DEF_HELPER_2(iret_real, void, env, int)
+DEF_HELPER_3(iret_protected, void, env, int, int)
+DEF_HELPER_3(lret_protected, void, env, int, int)
+DEF_HELPER_2(read_crN, tl, env, int)
+DEF_HELPER_3(write_crN, void, env, int, tl)
+DEF_HELPER_2(lmsw, void, env, tl)
+DEF_HELPER_1(clts, void, env)
+DEF_HELPER_3(movl_drN_T0, void, env, int, tl)
+DEF_HELPER_2(invlpg, void, env, tl)
 
-DEF_HELPER_3(enter_level, void, int, int, tl)
+DEF_HELPER_4(enter_level, void, env, int, int, tl)
 #ifdef TARGET_X86_64
-DEF_HELPER_3(enter64_level, void, int, int, tl)
+DEF_HELPER_4(enter64_level, void, env, int, int, tl)
 #endif
-DEF_HELPER_0(sysenter, void)
-DEF_HELPER_1(sysexit, void, int)
+DEF_HELPER_1(sysenter, void, env)
+DEF_HELPER_2(sysexit, void, env, int)
 #ifdef TARGET_X86_64
-DEF_HELPER_1(syscall, void, int)
-DEF_HELPER_1(sysret, void, int)
+DEF_HELPER_2(syscall, void, env, int)
+DEF_HELPER_2(sysret, void, env, int)
 #endif
-DEF_HELPER_1(hlt, void, int)
-DEF_HELPER_1(monitor, void, tl)
-DEF_HELPER_1(mwait, void, int)
-DEF_HELPER_0(debug, void)
-DEF_HELPER_0(reset_rf, void)
-DEF_HELPER_2(raise_interrupt, void, int, int)
-DEF_HELPER_1(raise_exception, void, int)
-DEF_HELPER_0(cli, void)
-DEF_HELPER_0(sti, void)
-DEF_HELPER_0(set_inhibit_irq, void)
-DEF_HELPER_0(reset_inhibit_irq, void)
-DEF_HELPER_2(boundw, void, tl, int)
-DEF_HELPER_2(boundl, void, tl, int)
-DEF_HELPER_0(rsm, void)
-DEF_HELPER_1(into, void, int)
-DEF_HELPER_1(cmpxchg8b, void, tl)
+DEF_HELPER_2(hlt, void, env, int)
+DEF_HELPER_2(monitor, void, env, tl)
+DEF_HELPER_2(mwait, void, env, int)
+DEF_HELPER_1(debug, void, env)
+DEF_HELPER_1(reset_rf, void, env)
+DEF_HELPER_3(raise_interrupt, void, env, int, int)
+DEF_HELPER_2(raise_exception, void, env, int)
+DEF_HELPER_1(cli, void, env)
+DEF_HELPER_1(sti, void, env)
+DEF_HELPER_1(set_inhibit_irq, void, env)
+DEF_HELPER_1(reset_inhibit_irq, void, env)
+DEF_HELPER_3(boundw, void, env, tl, int)
+DEF_HELPER_3(boundl, void, env, tl, int)
+DEF_HELPER_1(rsm, void, env)
+DEF_HELPER_2(into, void, env, int)
+DEF_HELPER_2(cmpxchg8b, void, env, tl)
 #ifdef TARGET_X86_64
-DEF_HELPER_1(cmpxchg16b, void, tl)
+DEF_HELPER_2(cmpxchg16b, void, env, tl)
 #endif
-DEF_HELPER_0(single_step, void)
-DEF_HELPER_0(cpuid, void)
-DEF_HELPER_0(rdtsc, void)
-DEF_HELPER_0(rdpmc, void)
-DEF_HELPER_0(rdmsr, void)
-DEF_HELPER_0(wrmsr, void)
+DEF_HELPER_1(single_step, void, env)
+DEF_HELPER_1(cpuid, void, env)
+DEF_HELPER_1(rdtsc, void, env)
+DEF_HELPER_1(rdpmc, void, env)
+DEF_HELPER_1(rdmsr, void, env)
+DEF_HELPER_1(wrmsr, void, env)
 
-DEF_HELPER_1(check_iob, void, i32)
-DEF_HELPER_1(check_iow, void, i32)
-DEF_HELPER_1(check_iol, void, i32)
+DEF_HELPER_2(check_iob, void, env, i32)
+DEF_HELPER_2(check_iow, void, env, i32)
+DEF_HELPER_2(check_iol, void, env, i32)
 DEF_HELPER_2(outb, void, i32, i32)
 DEF_HELPER_1(inb, tl, i32)
 DEF_HELPER_2(outw, void, i32, i32)
@@ -94,125 +94,125 @@
 DEF_HELPER_2(outl, void, i32, i32)
 DEF_HELPER_1(inl, tl, i32)
 
-DEF_HELPER_2(svm_check_intercept_param, void, i32, i64)
-DEF_HELPER_2(vmexit, void, i32, i64)
-DEF_HELPER_3(svm_check_io, void, i32, i32, i32)
-DEF_HELPER_2(vmrun, void, int, int)
-DEF_HELPER_0(vmmcall, void)
-DEF_HELPER_1(vmload, void, int)
-DEF_HELPER_1(vmsave, void, int)
-DEF_HELPER_0(stgi, void)
-DEF_HELPER_0(clgi, void)
-DEF_HELPER_0(skinit, void)
-DEF_HELPER_1(invlpga, void, int)
+DEF_HELPER_3(svm_check_intercept_param, void, env, i32, i64)
+DEF_HELPER_3(vmexit, void, env, i32, i64)
+DEF_HELPER_4(svm_check_io, void, env, i32, i32, i32)
+DEF_HELPER_3(vmrun, void, env, int, int)
+DEF_HELPER_1(vmmcall, void, env)
+DEF_HELPER_2(vmload, void, env, int)
+DEF_HELPER_2(vmsave, void, env, int)
+DEF_HELPER_1(stgi, void, env)
+DEF_HELPER_1(clgi, void, env)
+DEF_HELPER_1(skinit, void, env)
+DEF_HELPER_2(invlpga, void, env, int)
 
 /* x86 FPU */
 
-DEF_HELPER_1(flds_FT0, void, i32)
-DEF_HELPER_1(fldl_FT0, void, i64)
-DEF_HELPER_1(fildl_FT0, void, s32)
-DEF_HELPER_1(flds_ST0, void, i32)
-DEF_HELPER_1(fldl_ST0, void, i64)
-DEF_HELPER_1(fildl_ST0, void, s32)
-DEF_HELPER_1(fildll_ST0, void, s64)
-DEF_HELPER_0(fsts_ST0, i32)
-DEF_HELPER_0(fstl_ST0, i64)
-DEF_HELPER_0(fist_ST0, s32)
-DEF_HELPER_0(fistl_ST0, s32)
-DEF_HELPER_0(fistll_ST0, s64)
-DEF_HELPER_0(fistt_ST0, s32)
-DEF_HELPER_0(fisttl_ST0, s32)
-DEF_HELPER_0(fisttll_ST0, s64)
-DEF_HELPER_1(fldt_ST0, void, tl)
-DEF_HELPER_1(fstt_ST0, void, tl)
-DEF_HELPER_0(fpush, void)
-DEF_HELPER_0(fpop, void)
-DEF_HELPER_0(fdecstp, void)
-DEF_HELPER_0(fincstp, void)
-DEF_HELPER_1(ffree_STN, void, int)
-DEF_HELPER_0(fmov_ST0_FT0, void)
-DEF_HELPER_1(fmov_FT0_STN, void, int)
-DEF_HELPER_1(fmov_ST0_STN, void, int)
-DEF_HELPER_1(fmov_STN_ST0, void, int)
-DEF_HELPER_1(fxchg_ST0_STN, void, int)
-DEF_HELPER_0(fcom_ST0_FT0, void)
-DEF_HELPER_0(fucom_ST0_FT0, void)
-DEF_HELPER_0(fcomi_ST0_FT0, void)
-DEF_HELPER_0(fucomi_ST0_FT0, void)
-DEF_HELPER_0(fadd_ST0_FT0, void)
-DEF_HELPER_0(fmul_ST0_FT0, void)
-DEF_HELPER_0(fsub_ST0_FT0, void)
-DEF_HELPER_0(fsubr_ST0_FT0, void)
-DEF_HELPER_0(fdiv_ST0_FT0, void)
-DEF_HELPER_0(fdivr_ST0_FT0, void)
-DEF_HELPER_1(fadd_STN_ST0, void, int)
-DEF_HELPER_1(fmul_STN_ST0, void, int)
-DEF_HELPER_1(fsub_STN_ST0, void, int)
-DEF_HELPER_1(fsubr_STN_ST0, void, int)
-DEF_HELPER_1(fdiv_STN_ST0, void, int)
-DEF_HELPER_1(fdivr_STN_ST0, void, int)
-DEF_HELPER_0(fchs_ST0, void)
-DEF_HELPER_0(fabs_ST0, void)
-DEF_HELPER_0(fxam_ST0, void)
-DEF_HELPER_0(fld1_ST0, void)
-DEF_HELPER_0(fldl2t_ST0, void)
-DEF_HELPER_0(fldl2e_ST0, void)
-DEF_HELPER_0(fldpi_ST0, void)
-DEF_HELPER_0(fldlg2_ST0, void)
-DEF_HELPER_0(fldln2_ST0, void)
-DEF_HELPER_0(fldz_ST0, void)
-DEF_HELPER_0(fldz_FT0, void)
-DEF_HELPER_0(fnstsw, i32)
-DEF_HELPER_0(fnstcw, i32)
-DEF_HELPER_1(fldcw, void, i32)
-DEF_HELPER_0(fclex, void)
-DEF_HELPER_0(fwait, void)
-DEF_HELPER_0(fninit, void)
-DEF_HELPER_1(fbld_ST0, void, tl)
-DEF_HELPER_1(fbst_ST0, void, tl)
-DEF_HELPER_0(f2xm1, void)
-DEF_HELPER_0(fyl2x, void)
-DEF_HELPER_0(fptan, void)
-DEF_HELPER_0(fpatan, void)
-DEF_HELPER_0(fxtract, void)
-DEF_HELPER_0(fprem1, void)
-DEF_HELPER_0(fprem, void)
-DEF_HELPER_0(fyl2xp1, void)
-DEF_HELPER_0(fsqrt, void)
-DEF_HELPER_0(fsincos, void)
-DEF_HELPER_0(frndint, void)
-DEF_HELPER_0(fscale, void)
-DEF_HELPER_0(fsin, void)
-DEF_HELPER_0(fcos, void)
-DEF_HELPER_2(fstenv, void, tl, int)
-DEF_HELPER_2(fldenv, void, tl, int)
-DEF_HELPER_2(fsave, void, tl, int)
-DEF_HELPER_2(frstor, void, tl, int)
-DEF_HELPER_2(fxsave, void, tl, int)
-DEF_HELPER_2(fxrstor, void, tl, int)
+DEF_HELPER_2(flds_FT0, void, env, i32)
+DEF_HELPER_2(fldl_FT0, void, env, i64)
+DEF_HELPER_2(fildl_FT0, void, env, s32)
+DEF_HELPER_2(flds_ST0, void, env, i32)
+DEF_HELPER_2(fldl_ST0, void, env, i64)
+DEF_HELPER_2(fildl_ST0, void, env, s32)
+DEF_HELPER_2(fildll_ST0, void, env, s64)
+DEF_HELPER_1(fsts_ST0, i32, env)
+DEF_HELPER_1(fstl_ST0, i64, env)
+DEF_HELPER_1(fist_ST0, s32, env)
+DEF_HELPER_1(fistl_ST0, s32, env)
+DEF_HELPER_1(fistll_ST0, s64, env)
+DEF_HELPER_1(fistt_ST0, s32, env)
+DEF_HELPER_1(fisttl_ST0, s32, env)
+DEF_HELPER_1(fisttll_ST0, s64, env)
+DEF_HELPER_2(fldt_ST0, void, env, tl)
+DEF_HELPER_2(fstt_ST0, void, env, tl)
+DEF_HELPER_1(fpush, void, env)
+DEF_HELPER_1(fpop, void, env)
+DEF_HELPER_1(fdecstp, void, env)
+DEF_HELPER_1(fincstp, void, env)
+DEF_HELPER_2(ffree_STN, void, env, int)
+DEF_HELPER_1(fmov_ST0_FT0, void, env)
+DEF_HELPER_2(fmov_FT0_STN, void, env, int)
+DEF_HELPER_2(fmov_ST0_STN, void, env, int)
+DEF_HELPER_2(fmov_STN_ST0, void, env, int)
+DEF_HELPER_2(fxchg_ST0_STN, void, env, int)
+DEF_HELPER_1(fcom_ST0_FT0, void, env)
+DEF_HELPER_1(fucom_ST0_FT0, void, env)
+DEF_HELPER_1(fcomi_ST0_FT0, void, env)
+DEF_HELPER_1(fucomi_ST0_FT0, void, env)
+DEF_HELPER_1(fadd_ST0_FT0, void, env)
+DEF_HELPER_1(fmul_ST0_FT0, void, env)
+DEF_HELPER_1(fsub_ST0_FT0, void, env)
+DEF_HELPER_1(fsubr_ST0_FT0, void, env)
+DEF_HELPER_1(fdiv_ST0_FT0, void, env)
+DEF_HELPER_1(fdivr_ST0_FT0, void, env)
+DEF_HELPER_2(fadd_STN_ST0, void, env, int)
+DEF_HELPER_2(fmul_STN_ST0, void, env, int)
+DEF_HELPER_2(fsub_STN_ST0, void, env, int)
+DEF_HELPER_2(fsubr_STN_ST0, void, env, int)
+DEF_HELPER_2(fdiv_STN_ST0, void, env, int)
+DEF_HELPER_2(fdivr_STN_ST0, void, env, int)
+DEF_HELPER_1(fchs_ST0, void, env)
+DEF_HELPER_1(fabs_ST0, void, env)
+DEF_HELPER_1(fxam_ST0, void, env)
+DEF_HELPER_1(fld1_ST0, void, env)
+DEF_HELPER_1(fldl2t_ST0, void, env)
+DEF_HELPER_1(fldl2e_ST0, void, env)
+DEF_HELPER_1(fldpi_ST0, void, env)
+DEF_HELPER_1(fldlg2_ST0, void, env)
+DEF_HELPER_1(fldln2_ST0, void, env)
+DEF_HELPER_1(fldz_ST0, void, env)
+DEF_HELPER_1(fldz_FT0, void, env)
+DEF_HELPER_1(fnstsw, i32, env)
+DEF_HELPER_1(fnstcw, i32, env)
+DEF_HELPER_2(fldcw, void, env, i32)
+DEF_HELPER_1(fclex, void, env)
+DEF_HELPER_1(fwait, void, env)
+DEF_HELPER_1(fninit, void, env)
+DEF_HELPER_2(fbld_ST0, void, env, tl)
+DEF_HELPER_2(fbst_ST0, void, env, tl)
+DEF_HELPER_1(f2xm1, void, env)
+DEF_HELPER_1(fyl2x, void, env)
+DEF_HELPER_1(fptan, void, env)
+DEF_HELPER_1(fpatan, void, env)
+DEF_HELPER_1(fxtract, void, env)
+DEF_HELPER_1(fprem1, void, env)
+DEF_HELPER_1(fprem, void, env)
+DEF_HELPER_1(fyl2xp1, void, env)
+DEF_HELPER_1(fsqrt, void, env)
+DEF_HELPER_1(fsincos, void, env)
+DEF_HELPER_1(frndint, void, env)
+DEF_HELPER_1(fscale, void, env)
+DEF_HELPER_1(fsin, void, env)
+DEF_HELPER_1(fcos, void, env)
+DEF_HELPER_3(fstenv, void, env, tl, int)
+DEF_HELPER_3(fldenv, void, env, tl, int)
+DEF_HELPER_3(fsave, void, env, tl, int)
+DEF_HELPER_3(frstor, void, env, tl, int)
+DEF_HELPER_3(fxsave, void, env, tl, int)
+DEF_HELPER_3(fxrstor, void, env, tl, int)
 DEF_HELPER_1(bsf, tl, tl)
 DEF_HELPER_1(bsr, tl, tl)
 
 /* MMX/SSE */
 
-DEF_HELPER_0(enter_mmx, void)
-DEF_HELPER_0(emms, void)
-DEF_HELPER_2(movq, void, ptr, ptr)
+DEF_HELPER_1(enter_mmx, void, env)
+DEF_HELPER_1(emms, void, env)
+DEF_HELPER_3(movq, void, env, ptr, ptr)
 
 #define SHIFT 0
 #include "ops_sse_header.h"
 #define SHIFT 1
 #include "ops_sse_header.h"
 
-DEF_HELPER_2(rclb, tl, tl, tl)
-DEF_HELPER_2(rclw, tl, tl, tl)
-DEF_HELPER_2(rcll, tl, tl, tl)
-DEF_HELPER_2(rcrb, tl, tl, tl)
-DEF_HELPER_2(rcrw, tl, tl, tl)
-DEF_HELPER_2(rcrl, tl, tl, tl)
+DEF_HELPER_3(rclb, tl, env, tl, tl)
+DEF_HELPER_3(rclw, tl, env, tl, tl)
+DEF_HELPER_3(rcll, tl, env, tl, tl)
+DEF_HELPER_3(rcrb, tl, env, tl, tl)
+DEF_HELPER_3(rcrw, tl, env, tl, tl)
+DEF_HELPER_3(rcrl, tl, env, tl, tl)
 #ifdef TARGET_X86_64
-DEF_HELPER_2(rclq, tl, tl, tl)
-DEF_HELPER_2(rcrq, tl, tl, tl)
+DEF_HELPER_3(rclq, tl, env, tl, tl)
+DEF_HELPER_3(rcrq, tl, env, tl, tl)
 #endif
 
 #include "exec/def-helper.h"
diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c
new file mode 100644
index 0000000..81159d1
--- /dev/null
+++ b/target-i386/int_helper.c
@@ -0,0 +1,467 @@
+/*
+ *  x86 integer helpers
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "qemu/host-utils.h"
+#include "helper.h"
+
+//#define DEBUG_MULDIV
+
+/* modulo 17 table */
+static const uint8_t rclw_table[32] = {
+    0, 1, 2, 3, 4, 5, 6, 7,
+    8, 9,10,11,12,13,14,15,
+   16, 0, 1, 2, 3, 4, 5, 6,
+    7, 8, 9,10,11,12,13,14,
+};
+
+/* modulo 9 table */
+static const uint8_t rclb_table[32] = {
+    0, 1, 2, 3, 4, 5, 6, 7,
+    8, 0, 1, 2, 3, 4, 5, 6,
+    7, 8, 0, 1, 2, 3, 4, 5,
+    6, 7, 8, 0, 1, 2, 3, 4,
+};
+
+/* division, flags are undefined */
+
+void helper_divb_AL(CPUX86State *env, target_ulong t0)
+{
+    unsigned int num, den, q, r;
+
+    num = (EAX & 0xffff);
+    den = (t0 & 0xff);
+    if (den == 0) {
+        raise_exception(env, EXCP00_DIVZ);
+    }
+    q = (num / den);
+    if (q > 0xff)
+        raise_exception(env, EXCP00_DIVZ);
+    q &= 0xff;
+    r = (num % den) & 0xff;
+    EAX = (EAX & ~0xffff) | (r << 8) | q;
+}
+
+void helper_idivb_AL(CPUX86State *env, target_ulong t0)
+{
+    int num, den, q, r;
+
+    num = (int16_t)EAX;
+    den = (int8_t)t0;
+    if (den == 0) {
+        raise_exception(env, EXCP00_DIVZ);
+    }
+    q = (num / den);
+    if (q != (int8_t)q)
+        raise_exception(env, EXCP00_DIVZ);
+    q &= 0xff;
+    r = (num % den) & 0xff;
+    EAX = (EAX & ~0xffff) | (r << 8) | q;
+}
+
+void helper_divw_AX(CPUX86State *env, target_ulong t0)
+{
+    unsigned int num, den, q, r;
+
+    num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
+    den = (t0 & 0xffff);
+    if (den == 0) {
+        raise_exception(env, EXCP00_DIVZ);
+    }
+    q = (num / den);
+    if (q > 0xffff)
+        raise_exception(env, EXCP00_DIVZ);
+    q &= 0xffff;
+    r = (num % den) & 0xffff;
+    EAX = (EAX & ~0xffff) | q;
+    EDX = (EDX & ~0xffff) | r;
+}
+
+void helper_idivw_AX(CPUX86State *env, target_ulong t0)
+{
+    int num, den, q, r;
+
+    num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
+    den = (int16_t)t0;
+    if (den == 0) {
+        raise_exception(env, EXCP00_DIVZ);
+    }
+    q = (num / den);
+    if (q != (int16_t)q)
+        raise_exception(env, EXCP00_DIVZ);
+    q &= 0xffff;
+    r = (num % den) & 0xffff;
+    EAX = (EAX & ~0xffff) | q;
+    EDX = (EDX & ~0xffff) | r;
+}
+
+void helper_divl_EAX(CPUX86State *env, target_ulong t0)
+{
+    unsigned int den, r;
+    uint64_t num, q;
+
+    num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+    den = t0;
+    if (den == 0) {
+        raise_exception(env, EXCP00_DIVZ);
+    }
+    q = (num / den);
+    r = (num % den);
+    if (q > 0xffffffff)
+        raise_exception(env, EXCP00_DIVZ);
+    EAX = (uint32_t)q;
+    EDX = (uint32_t)r;
+}
+
+void helper_idivl_EAX(CPUX86State *env, target_ulong t0)
+{
+    int den, r;
+    int64_t num, q;
+
+    num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+    den = t0;
+    if (den == 0) {
+        raise_exception(env, EXCP00_DIVZ);
+    }
+    q = (num / den);
+    r = (num % den);
+    if (q != (int32_t)q)
+        raise_exception(env, EXCP00_DIVZ);
+    EAX = (uint32_t)q;
+    EDX = (uint32_t)r;
+}
+
+/* bcd */
+
+/* XXX: exception */
+void helper_aam(CPUX86State *env, int base)
+{
+    int al, ah;
+    al = EAX & 0xff;
+    ah = al / base;
+    al = al % base;
+    EAX = (EAX & ~0xffff) | al | (ah << 8);
+    CC_DST = al;
+}
+
+void helper_aad(CPUX86State *env, int base)
+{
+    int al, ah;
+    al = EAX & 0xff;
+    ah = (EAX >> 8) & 0xff;
+    al = ((ah * base) + al) & 0xff;
+    EAX = (EAX & ~0xffff) | al;
+    CC_DST = al;
+}
+
+void helper_aaa(CPUX86State *env)
+{
+    int icarry;
+    int al, ah, af;
+    int eflags;
+
+    eflags = helper_cc_compute_all(env, CC_OP);
+    af = eflags & CC_A;
+    al = EAX & 0xff;
+    ah = (EAX >> 8) & 0xff;
+
+    icarry = (al > 0xf9);
+    if (((al & 0x0f) > 9 ) || af) {
+        al = (al + 6) & 0x0f;
+        ah = (ah + 1 + icarry) & 0xff;
+        eflags |= CC_C | CC_A;
+    } else {
+        eflags &= ~(CC_C | CC_A);
+        al &= 0x0f;
+    }
+    EAX = (EAX & ~0xffff) | al | (ah << 8);
+    CC_SRC = eflags;
+}
+
+void helper_aas(CPUX86State *env)
+{
+    int icarry;
+    int al, ah, af;
+    int eflags;
+
+    eflags = helper_cc_compute_all(env, CC_OP);
+    af = eflags & CC_A;
+    al = EAX & 0xff;
+    ah = (EAX >> 8) & 0xff;
+
+    icarry = (al < 6);
+    if (((al & 0x0f) > 9 ) || af) {
+        al = (al - 6) & 0x0f;
+        ah = (ah - 1 - icarry) & 0xff;
+        eflags |= CC_C | CC_A;
+    } else {
+        eflags &= ~(CC_C | CC_A);
+        al &= 0x0f;
+    }
+    EAX = (EAX & ~0xffff) | al | (ah << 8);
+    CC_SRC = eflags;
+}
+
+void helper_daa(CPUX86State *env)
+{
+    int al, af, cf;
+    int eflags;
+
+    eflags = helper_cc_compute_all(env, CC_OP);
+    cf = eflags & CC_C;
+    af = eflags & CC_A;
+    al = EAX & 0xff;
+
+    eflags = 0;
+    if (((al & 0x0f) > 9 ) || af) {
+        al = (al + 6) & 0xff;
+        eflags |= CC_A;
+    }
+    if ((al > 0x9f) || cf) {
+        al = (al + 0x60) & 0xff;
+        eflags |= CC_C;
+    }
+    EAX = (EAX & ~0xff) | al;
+    /* well, speed is not an issue here, so we compute the flags by hand */
+    eflags |= (al == 0) << 6; /* zf */
+    eflags |= parity_table[al]; /* pf */
+    eflags |= (al & 0x80); /* sf */
+    CC_SRC = eflags;
+}
+
+void helper_das(CPUX86State *env)
+{
+    int al, al1, af, cf;
+    int eflags;
+
+    eflags = helper_cc_compute_all(env, CC_OP);
+    cf = eflags & CC_C;
+    af = eflags & CC_A;
+    al = EAX & 0xff;
+
+    eflags = 0;
+    al1 = al;
+    if (((al & 0x0f) > 9 ) || af) {
+        eflags |= CC_A;
+        if (al < 6 || cf)
+            eflags |= CC_C;
+        al = (al - 6) & 0xff;
+    }
+    if ((al1 > 0x99) || cf) {
+        al = (al - 0x60) & 0xff;
+        eflags |= CC_C;
+    }
+    EAX = (EAX & ~0xff) | al;
+    /* well, speed is not an issue here, so we compute the flags by hand */
+    eflags |= (al == 0) << 6; /* zf */
+    eflags |= parity_table[al]; /* pf */
+    eflags |= (al & 0x80); /* sf */
+    CC_SRC = eflags;
+}
+
+#ifdef TARGET_X86_64
+
+static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+    *plow += a;
+    /* carry test */
+    if (*plow < a)
+        (*phigh)++;
+    *phigh += b;
+}
+
+static void neg128(uint64_t *plow, uint64_t *phigh)
+{
+    *plow = ~ *plow;
+    *phigh = ~ *phigh;
+    add128(plow, phigh, 1, 0);
+}
+
+/* return TRUE if overflow */
+static int div64(uint64_t *plow, uint64_t *phigh, uint64_t b)
+{
+    uint64_t q, r, a1, a0;
+    int i, qb, ab;
+
+    a0 = *plow;
+    a1 = *phigh;
+    if (a1 == 0) {
+        q = a0 / b;
+        r = a0 % b;
+        *plow = q;
+        *phigh = r;
+    } else {
+        if (a1 >= b)
+            return 1;
+        /* XXX: use a better algorithm */
+        for(i = 0; i < 64; i++) {
+            ab = a1 >> 63;
+            a1 = (a1 << 1) | (a0 >> 63);
+            if (ab || a1 >= b) {
+                a1 -= b;
+                qb = 1;
+            } else {
+                qb = 0;
+            }
+            a0 = (a0 << 1) | qb;
+        }
+#if defined(DEBUG_MULDIV)
+        printf("div: 0x%016" PRIx64 "%016" PRIx64 " / 0x%016" PRIx64 ": q=0x%016" PRIx64 " r=0x%016" PRIx64 "\n",
+               *phigh, *plow, b, a0, a1);
+#endif
+        *plow = a0;
+        *phigh = a1;
+    }
+    return 0;
+}
+
+/* return TRUE if overflow */
+static int idiv64(uint64_t *plow, uint64_t *phigh, int64_t b)
+{
+    int sa, sb;
+    sa = ((int64_t)*phigh < 0);
+    if (sa)
+        neg128(plow, phigh);
+    sb = (b < 0);
+    if (sb)
+        b = -b;
+    if (div64(plow, phigh, b) != 0)
+        return 1;
+    if (sa ^ sb) {
+        if (*plow > (1ULL << 63))
+            return 1;
+        *plow = - *plow;
+    } else {
+        if (*plow >= (1ULL << 63))
+            return 1;
+    }
+    if (sa)
+        *phigh = - *phigh;
+    return 0;
+}
+
+void helper_mulq_EAX_T0(CPUX86State *env, target_ulong t0)
+{
+    uint64_t r0, r1;
+
+    mulu64(&r0, &r1, EAX, t0);
+    EAX = r0;
+    EDX = r1;
+    CC_DST = r0;
+    CC_SRC = r1;
+}
+
+void helper_imulq_EAX_T0(CPUX86State *env, target_ulong t0)
+{
+    uint64_t r0, r1;
+
+    muls64(&r0, &r1, EAX, t0);
+    EAX = r0;
+    EDX = r1;
+    CC_DST = r0;
+    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
+}
+
+target_ulong helper_imulq_T0_T1(CPUX86State *env, target_ulong t0, target_ulong t1)
+{
+    uint64_t r0, r1;
+
+    muls64(&r0, &r1, t0, t1);
+    CC_DST = r0;
+    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
+    return r0;
+}
+
+void helper_divq_EAX(CPUX86State *env, target_ulong t0)
+{
+    uint64_t r0, r1;
+    if (t0 == 0) {
+        raise_exception(env, EXCP00_DIVZ);
+    }
+    r0 = EAX;
+    r1 = EDX;
+    if (div64(&r0, &r1, t0))
+        raise_exception(env, EXCP00_DIVZ);
+    EAX = r0;
+    EDX = r1;
+}
+
+void helper_idivq_EAX(CPUX86State *env, target_ulong t0)
+{
+    uint64_t r0, r1;
+    if (t0 == 0) {
+        raise_exception(env, EXCP00_DIVZ);
+    }
+    r0 = EAX;
+    r1 = EDX;
+    if (idiv64(&r0, &r1, t0))
+        raise_exception(env, EXCP00_DIVZ);
+    EAX = r0;
+    EDX = r1;
+}
+
+#endif
+
+#define SHIFT 0
+#include "shift_helper_template.h"
+#undef SHIFT
+
+#define SHIFT 1
+#include "shift_helper_template.h"
+#undef SHIFT
+
+#define SHIFT 2
+#include "shift_helper_template.h"
+#undef SHIFT
+
+#ifdef TARGET_X86_64
+#define SHIFT 3
+#include "shift_helper_template.h"
+#undef SHIFT
+#endif
+
+/* bit operations */
+target_ulong helper_bsf(target_ulong t0)
+{
+    int count;
+    target_ulong res;
+
+    res = t0;
+    count = 0;
+    while ((res & 1) == 0) {
+        count++;
+        res >>= 1;
+    }
+    return count;
+}
+
+target_ulong helper_bsr(target_ulong t0)
+{
+    int count;
+    target_ulong res, mask;
+
+    res = t0;
+    count = TARGET_LONG_BITS - 1;
+    mask = (target_ulong)1 << (TARGET_LONG_BITS - 1);
+    while ((res & mask) == 0) {
+        count--;
+        res <<= 1;
+    }
+    return count;
+}
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index e96d5b5..465ae4e 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -67,19 +67,19 @@
     return cpuid;
 }
 
-uint32_t kvm_arch_get_supported_cpuid(CPUX86State *env, uint32_t function, int reg)
+uint32_t kvm_arch_get_supported_cpuid(CPUState *cpu, uint32_t function, int reg)
 {
     struct kvm_cpuid2 *cpuid;
     int i, max;
     uint32_t ret = 0;
     uint32_t cpuid_1_edx;
 
-    if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) {
+    if (!kvm_check_extension(cpu->kvm_state, KVM_CAP_EXT_CPUID)) {
         return -1U;
     }
 
     max = 1;
-    while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) {
+    while ((cpuid = try_get_cpuid(cpu->kvm_state, max)) == NULL) {
         max *= 2;
     }
 
@@ -101,7 +101,7 @@
                     /* On Intel, kvm returns cpuid according to the Intel spec,
                      * so add missing bits according to the AMD spec:
                      */
-                    cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, R_EDX);
+                    cpuid_1_edx = kvm_arch_get_supported_cpuid(cpu, 1, R_EDX);
                     ret |= cpuid_1_edx & 0xdfeff7ff;
                 }
                 break;
@@ -116,7 +116,7 @@
 
 #else
 
-uint32_t kvm_arch_get_supported_cpuid(CPUX86State *env, uint32_t function, int reg)
+uint32_t kvm_arch_get_supported_cpuid(CPUState *cpu, uint32_t function, int reg)
 {
     return -1U;
 }
@@ -127,7 +127,7 @@
 #define KVM_MP_STATE_RUNNABLE 0
 #endif
 
-int kvm_arch_init_vcpu(CPUX86State *env)
+int kvm_arch_init_vcpu(CPUState *cpu)
 {
     struct {
         struct kvm_cpuid2 cpuid;
@@ -135,6 +135,7 @@
     } __attribute__((packed)) cpuid_data;
     uint32_t limit, i, j, cpuid_i;
     uint32_t unused;
+    CPUX86State *env = cpu->env_ptr;
 
     env->mp_state = KVM_MP_STATE_RUNNABLE;
 
@@ -202,13 +203,14 @@
 
     cpuid_data.cpuid.nent = cpuid_i;
 
-    return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
+    return kvm_vcpu_ioctl(cpu, KVM_SET_CPUID2, &cpuid_data);
 }
 
 static int kvm_has_msr_star(CPUX86State *env)
 {
     static int has_msr_star;
     int ret;
+    CPUState *cpu = ENV_GET_CPU(env);
 
     /* first time */
     if (has_msr_star == 0) {
@@ -219,7 +221,7 @@
         /* Obtain MSR list from KVM.  These are the MSRs that we must
          * save/restore */
         msr_list.nmsrs = 0;
-        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
+        ret = kvm_ioctl(cpu->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
         if (ret < 0)
             return 0;
 
@@ -227,7 +229,7 @@
                                     msr_list.nmsrs * sizeof(msr_list.indices[0]));
 
         kvm_msr_list->nmsrs = msr_list.nmsrs;
-        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
+        ret = kvm_ioctl(cpu->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
         if (ret >= 0) {
             int i;
 
@@ -333,7 +335,7 @@
     int ret = 0;
 
     if (!set) {
-        ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
+        ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_GET_REGS, &regs);
         if (ret < 0)
             return ret;
     }
@@ -361,7 +363,7 @@
     kvm_getput_reg(&regs.rip, &env->eip, set);
 
     if (set)
-        ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
+        ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_REGS, &regs);
 
     return ret;
 }
@@ -381,7 +383,7 @@
     memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
     fpu.mxcsr = env->mxcsr;
 
-    return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu);
+    return kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_FPU, &fpu);
 }
 
 static int kvm_put_sregs(CPUX86State *env)
@@ -433,7 +435,7 @@
 
     sregs.efer = env->efer;
 
-    return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
+    return kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_SREGS, &sregs);
 }
 
 static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
@@ -467,7 +469,7 @@
 #endif
     msr_data.info.nmsrs = n;
 
-    return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data);
+    return kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_MSRS, &msr_data);
 
 }
 
@@ -477,7 +479,7 @@
     struct kvm_fpu fpu;
     int i, ret;
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu);
+    ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_GET_FPU, &fpu);
     if (ret < 0)
         return ret;
 
@@ -493,13 +495,14 @@
     return 0;
 }
 
-int kvm_get_sregs(CPUX86State *env)
+int kvm_get_sregs(CPUState *cpu)
 {
+    CPUX86State *env = cpu->env_ptr;
     struct kvm_sregs sregs;
     uint32_t hflags;
     int ret;
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
+    ret = kvm_vcpu_ioctl(cpu, KVM_GET_SREGS, &sregs);
     if (ret < 0)
         return ret;
 
@@ -599,7 +602,7 @@
     msrs[n++].index = MSR_LSTAR;
 #endif
     msr_data.info.nmsrs = n;
-    ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data);
+    ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_GET_MSRS, &msr_data);
     if (ret < 0)
         return ret;
 
@@ -640,8 +643,9 @@
     return 0;
 }
 
-int kvm_arch_put_registers(CPUX86State *env)
+int kvm_arch_put_registers(CPUState *cpu)
 {
+    CPUX86State *env = cpu->env_ptr;
     int ret;
 
     ret = kvm_getput_regs(env, 1);
@@ -660,20 +664,21 @@
     if (ret < 0)
         return ret;
 
-    ret = kvm_put_mp_state(env);
+    ret = kvm_put_mp_state(cpu);
     if (ret < 0)
         return ret;
 
-    ret = kvm_get_mp_state(env);
+    ret = kvm_get_mp_state(cpu);
     if (ret < 0)
         return ret;
 
     return 0;
 }
 
-int kvm_arch_get_registers(CPUX86State *env)
+int kvm_arch_get_registers(CPUState *cpu)
 {
     int ret;
+    CPUX86State *env = cpu->env_ptr;
 
     ret = kvm_getput_regs(env, 0);
     if (ret < 0)
@@ -683,7 +688,7 @@
     if (ret < 0)
         return ret;
 
-    ret = kvm_get_sregs(env);
+    ret = kvm_get_sregs(cpu);
     if (ret < 0)
         return ret;
 
@@ -694,32 +699,34 @@
     return 0;
 }
 
-int kvm_arch_vcpu_run(CPUX86State *env)
+int kvm_arch_vcpu_run(CPUState *cpu)
 {
 #ifdef CONFIG_KVM_GS_RESTORE
     if (gs_need_restore  != KVM_GS_RESTORE_NO)
-        return no_gs_ioctl(env->kvm_fd, KVM_RUN, 0);
+        return no_gs_ioctl(cpu->kvm_fd, KVM_RUN, 0);
     else
 #endif
-        return kvm_vcpu_ioctl(env, KVM_RUN, 0);
+        return kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
 }
 
-int kvm_arch_pre_run(CPUX86State *env, struct kvm_run *run)
+int kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
 {
+    CPUX86State *env = cpu->env_ptr;
+
     /* Try to inject an interrupt if the guest can accept it */
     if (run->ready_for_interrupt_injection &&
-        (env->interrupt_request & CPU_INTERRUPT_HARD) &&
+        (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
         (env->eflags & IF_MASK)) {
         int irq;
 
-        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+        cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
         irq = cpu_get_pic_interrupt(env);
         if (irq >= 0) {
             struct kvm_interrupt intr;
             intr.irq = irq;
             /* FIXME: errors */
             dprintf("injected interrupt %d\n", irq);
-            kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
+            kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr);
         }
     }
 
@@ -727,7 +734,7 @@
      * interrupt, request an interrupt window exit.  This will
      * cause a return to userspace as soon as the guest is ready to
      * receive interrupts. */
-    if ((env->interrupt_request & CPU_INTERRUPT_HARD))
+    if ((cpu->interrupt_request & CPU_INTERRUPT_HARD))
         run->request_interrupt_window = 1;
     else
         run->request_interrupt_window = 0;
@@ -742,8 +749,9 @@
     return 0;
 }
 
-int kvm_arch_post_run(CPUX86State *env, struct kvm_run *run)
+int kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
 {
+    CPUX86State *env = cpu->env_ptr;
 #ifdef CONFIG_KVM_GS_RESTORE
     gs_base_post_run();
 #endif
@@ -758,12 +766,14 @@
     return 0;
 }
 
-static int kvm_handle_halt(CPUX86State *env)
+static int kvm_handle_halt(CPUState *cpu)
 {
-    if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
+    CPUX86State *env = cpu->env_ptr;
+
+    if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
           (env->eflags & IF_MASK)) &&
-        !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
-        env->halted = 1;
+        !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
+        cpu->halted = 1;
         env->exception_index = EXCP_HLT;
         return 0;
     }
@@ -771,14 +781,14 @@
     return 1;
 }
 
-int kvm_arch_handle_exit(CPUX86State *env, struct kvm_run *run)
+int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run)
 {
     int ret = 0;
 
     switch (run->exit_reason) {
     case KVM_EXIT_HLT:
         dprintf("handle_hlt\n");
-        ret = kvm_handle_halt(env);
+        ret = kvm_handle_halt(cpu);
         break;
     }
 
@@ -786,22 +796,22 @@
 }
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
-int kvm_arch_insert_sw_breakpoint(CPUX86State *env, struct kvm_sw_breakpoint *bp)
+int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
 {
     const static uint8_t int3 = 0xcc;
 
-    if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
-        cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1))
+    if (cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
+        cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&int3, 1, 1))
         return -EINVAL;
     return 0;
 }
 
-int kvm_arch_remove_sw_breakpoint(CPUX86State *env, struct kvm_sw_breakpoint *bp)
+int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
 {
     uint8_t int3;
 
-    if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
-        cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1))
+    if (cpu_memory_rw_debug(cpu, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
+        cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1))
         return -EINVAL;
     return 0;
 }
@@ -894,7 +904,7 @@
 
     if (arch_info->exception == 1) {
         if (arch_info->dr6 & (1 << 14)) {
-            if (cpu_single_env->singlestep_enabled)
+            if (current_cpu->singlestep_enabled)
                 handle = 1;
         } else {
             for (n = 0; n < 4; n++)
@@ -917,18 +927,18 @@
                         break;
                     }
         }
-    } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc))
+    } else if (kvm_find_sw_breakpoint(current_cpu, arch_info->pc))
         handle = 1;
 
     if (!handle)
-        kvm_update_guest_debug(cpu_single_env,
+        kvm_update_guest_debug(current_cpu,
                         (arch_info->exception == 1) ?
                         KVM_GUESTDBG_INJECT_DB : KVM_GUESTDBG_INJECT_BP);
 
     return handle;
 }
 
-void kvm_arch_update_guest_debug(CPUX86State *env, struct kvm_guest_debug *dbg)
+void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
 {
     const uint8_t type_code[] = {
         [GDB_BREAKPOINT_HW] = 0x0,
@@ -940,7 +950,7 @@
     };
     int n;
 
-    if (kvm_sw_breakpoints_active(env))
+    if (kvm_sw_breakpoints_active(cpu))
         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
 
     if (nb_hw_breakpoint > 0) {
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 65409ac..396cafb 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -3,7 +3,7 @@
 #include "hw/i386/pc.h"
 #include "hw/isa/isa.h"
 
-#include "exec/exec-all.h"
+#include "cpu.h"
 #include "sysemu/kvm.h"
 
 static void cpu_put_seg(QEMUFile *f, SegmentCache *dt)
@@ -30,7 +30,7 @@
     int32_t a20_mask;
     int i;
 
-    cpu_synchronize_state(env, 0);
+    cpu_synchronize_state(ENV_GET_CPU(env), 0);
 
     for(i = 0; i < CPU_NB_REGS; i++)
         qemu_put_betls(f, &env->regs[i]);
@@ -255,7 +255,7 @@
         qemu_get_be64s(f, &env->pat);
         qemu_get_be32s(f, &env->hflags2);
         if (version_id < 6)
-            qemu_get_be32s(f, &env->halted);
+            qemu_get_be32s(f, &ENV_GET_CPU(env)->halted);
 
         qemu_get_be64s(f, &env->vm_hsave);
         qemu_get_be64s(f, &env->vm_vmcb);
@@ -306,6 +306,6 @@
     /* XXX: compute redundant hflags bits */
     env->hflags = hflags;
     tlb_flush(env, 1);
-    cpu_synchronize_state(env, 1);
+    cpu_synchronize_state(ENV_GET_CPU(env), 1);
     return 0;
 }
diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c
new file mode 100644
index 0000000..9500bab
--- /dev/null
+++ b/target-i386/mem_helper.c
@@ -0,0 +1,146 @@
+/*
+ *  x86 memory access helpers
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "helper.h"
+
+#if !defined(CONFIG_USER_ONLY)
+#include "exec/softmmu_exec.h"
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+/* broken thread support */
+
+static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+
+void helper_lock(void)
+{
+    spin_lock(&global_cpu_lock);
+}
+
+void helper_unlock(void)
+{
+    spin_unlock(&global_cpu_lock);
+}
+
+void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
+{
+    uint64_t d;
+    int eflags;
+
+    eflags = helper_cc_compute_all(env, CC_OP);
+    d = cpu_ldq_data(env, a0);
+    if (d == (((uint64_t)EDX << 32) | (uint32_t)EAX)) {
+        cpu_stq_data(env, a0, ((uint64_t)ECX << 32) | (uint32_t)EBX);
+        eflags |= CC_Z;
+    } else {
+        /* always do the store */
+        cpu_stq_data(env, a0, d);
+        EDX = (uint32_t)(d >> 32);
+        EAX = (uint32_t)d;
+        eflags &= ~CC_Z;
+    }
+    CC_SRC = eflags;
+}
+
+#ifdef TARGET_X86_64
+void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
+{
+    uint64_t d0, d1;
+    int eflags;
+
+    if ((a0 & 0xf) != 0)
+        raise_exception(env, EXCP0D_GPF);
+    eflags = helper_cc_compute_all(env, CC_OP);
+    d0 = cpu_ldq_data(env, a0);
+    d1 = cpu_ldq_data(env, a0 + 8);
+    if (d0 == EAX && d1 == EDX) {
+        cpu_stq_data(env, a0, EBX);
+        cpu_stq_data(env, a0 + 8, ECX);
+        eflags |= CC_Z;
+    } else {
+        /* always do the store */
+        cpu_stq_data(env, a0, d0);
+        cpu_stq_data(env, a0 + 8, d1);
+        EDX = d1;
+        EAX = d0;
+        eflags &= ~CC_Z;
+    }
+    CC_SRC = eflags;
+}
+#endif
+
+void helper_boundw(CPUX86State *env, target_ulong a0, int v)
+{
+    int low, high;
+    low = cpu_ldsw_data(env, a0);
+    high = cpu_ldsw_data(env, a0 + 2);
+    v = (int16_t)v;
+    if (v < low || v > high) {
+        raise_exception(env, EXCP05_BOUND);
+    }
+}
+
+void helper_boundl(CPUX86State *env, target_ulong a0, int v)
+{
+    int low, high;
+    low = cpu_ldl_data(env, a0);
+    high = cpu_ldl_data(env, a0 + 4);
+    if (v < low || v > high) {
+        raise_exception(env, EXCP05_BOUND);
+    }
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+#define MMUSUFFIX _mmu
+
+#define SHIFT 0
+#include "exec/softmmu_template.h"
+
+#define SHIFT 1
+#include "exec/softmmu_template.h"
+
+#define SHIFT 2
+#include "exec/softmmu_template.h"
+
+#define SHIFT 3
+#include "exec/softmmu_template.h"
+
+#endif
+
+#if !defined(CONFIG_USER_ONLY)
+/* try to fill the TLB and return an exception if error. If retaddr is
+   NULL, it means that the function was called in C code (i.e. not
+   from generated code or from helper.c) */
+/* XXX: fix it to restore all registers */
+void tlb_fill(CPUX86State* env, target_ulong addr, int is_write, int mmu_idx,
+              uintptr_t retaddr)
+{
+    int ret;
+
+    ret = cpu_x86_handle_mmu_fault(env, addr, is_write, mmu_idx);
+    if (ret) {
+        if (retaddr) {
+            /* now we have a real cpu fault */
+            cpu_restore_state(env, retaddr);
+        }
+        raise_exception_err(env, env->exception_index, env->error_code);
+    }
+}
+#endif
diff --git a/target-i386/misc_helper.c b/target-i386/misc_helper.c
new file mode 100644
index 0000000..d8dc895
--- /dev/null
+++ b/target-i386/misc_helper.c
@@ -0,0 +1,564 @@
+/*
+ *  x86 misc helpers
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "exec/ioport.h"
+#include "helper.h"
+
+#if !defined(CONFIG_USER_ONLY)
+#include "exec/softmmu_exec.h"
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+/* check if Port I/O is allowed in TSS */
+static inline void check_io(CPUX86State *env, int addr, int size)
+{
+    int io_offset, val, mask;
+
+    /* TSS must be a valid 32 bit one */
+    if (!(env->tr.flags & DESC_P_MASK) ||
+        ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 ||
+        env->tr.limit < 103)
+        goto fail;
+    io_offset = cpu_lduw_kernel(env, env->tr.base + 0x66);
+    io_offset += (addr >> 3);
+    /* Note: the check needs two bytes */
+    if ((io_offset + 1) > env->tr.limit)
+        goto fail;
+    val = cpu_lduw_kernel(env, env->tr.base + io_offset);
+    val >>= (addr & 7);
+    mask = (1 << size) - 1;
+    /* all bits must be zero to allow the I/O */
+    if ((val & mask) != 0) {
+    fail:
+        raise_exception_err(env, EXCP0D_GPF, 0);
+    }
+}
+
+void helper_check_iob(CPUX86State *env, uint32_t t0)
+{
+    check_io(env, t0, 1);
+}
+
+void helper_check_iow(CPUX86State *env, uint32_t t0)
+{
+    check_io(env, t0, 2);
+}
+
+void helper_check_iol(CPUX86State *env, uint32_t t0)
+{
+    check_io(env, t0, 4);
+}
+
+void helper_outb(uint32_t port, uint32_t data)
+{
+    cpu_outb(port, data & 0xff);
+}
+
+target_ulong helper_inb(uint32_t port)
+{
+    return cpu_inb(port);
+}
+
+void helper_outw(uint32_t port, uint32_t data)
+{
+    cpu_outw(port, data & 0xffff);
+}
+
+target_ulong helper_inw(uint32_t port)
+{
+    return cpu_inw(port);
+}
+
+void helper_outl(uint32_t port, uint32_t data)
+{
+    cpu_outl(port, data);
+}
+
+target_ulong helper_inl(uint32_t port)
+{
+    return cpu_inl(port);
+}
+
+void helper_into(CPUX86State *env, int next_eip_addend)
+{
+    int eflags;
+    eflags = helper_cc_compute_all(env, CC_OP);
+    if (eflags & CC_O) {
+        raise_interrupt(env, EXCP04_INTO, 1, 0, next_eip_addend);
+    }
+}
+
+void helper_single_step(CPUX86State *env)
+{
+#ifndef CONFIG_USER_ONLY
+    check_hw_breakpoints(env, 1);
+    env->dr[6] |= DR6_BS;
+#endif
+    raise_exception(env, EXCP01_DB);
+}
+
+void helper_cpuid(CPUX86State *env)
+{
+    uint32_t eax, ebx, ecx, edx;
+
+    helper_svm_check_intercept_param(env, SVM_EXIT_CPUID, 0);
+
+    cpu_x86_cpuid(env, (uint32_t)EAX, (uint32_t)ECX, &eax, &ebx, &ecx, &edx);
+    EAX = eax;
+    EBX = ebx;
+    ECX = ecx;
+    EDX = edx;
+}
+
+#if defined(CONFIG_USER_ONLY)
+target_ulong helper_read_crN(CPUX86State *env, int reg)
+{
+    return 0;
+}
+
+void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
+{
+}
+
+void helper_movl_drN_T0(CPUX86State *env, int reg, target_ulong t0)
+{
+}
+#else
+target_ulong helper_read_crN(CPUX86State *env, int reg)
+{
+    target_ulong val;
+
+    helper_svm_check_intercept_param(env, SVM_EXIT_READ_CR0 + reg, 0);
+    switch(reg) {
+    default:
+        val = env->cr[reg];
+        break;
+    case 8:
+        if (!(env->hflags2 & HF2_VINTR_MASK)) {
+            val = cpu_get_apic_tpr(env);
+        } else {
+            val = env->v_tpr;
+        }
+        break;
+    }
+    return val;
+}
+
+void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
+{
+    helper_svm_check_intercept_param(env, SVM_EXIT_WRITE_CR0 + reg, 0);
+    switch(reg) {
+    case 0:
+        cpu_x86_update_cr0(env, t0);
+        break;
+    case 3:
+        cpu_x86_update_cr3(env, t0);
+        break;
+    case 4:
+        cpu_x86_update_cr4(env, t0);
+        break;
+    case 8:
+        if (!(env->hflags2 & HF2_VINTR_MASK)) {
+            cpu_set_apic_tpr(env, t0);
+        }
+        env->v_tpr = t0 & 0x0f;
+        break;
+    default:
+        env->cr[reg] = t0;
+        break;
+    }
+}
+
+void helper_movl_drN_T0(CPUX86State *env, int reg, target_ulong t0)
+{
+    int i;
+
+    if (reg < 4) {
+        hw_breakpoint_remove(env, reg);
+        env->dr[reg] = t0;
+        hw_breakpoint_insert(env, reg);
+    } else if (reg == 7) {
+        for (i = 0; i < 4; i++)
+            hw_breakpoint_remove(env, i);
+        env->dr[7] = t0;
+        for (i = 0; i < 4; i++)
+            hw_breakpoint_insert(env, i);
+    } else
+        env->dr[reg] = t0;
+}
+#endif
+
+void helper_lmsw(CPUX86State *env, target_ulong t0)
+{
+    /* only 4 lower bits of CR0 are modified. PE cannot be set to zero
+       if already set to one. */
+    t0 = (env->cr[0] & ~0xe) | (t0 & 0xf);
+    helper_write_crN(env, 0, t0);
+}
+
+void helper_invlpg(CPUX86State *env, target_ulong addr)
+{
+    helper_svm_check_intercept_param(env, SVM_EXIT_INVLPG, 0);
+    tlb_flush_page(env, addr);
+}
+
+void helper_rdtsc(CPUX86State *env)
+{
+    uint64_t val;
+
+    if ((env->cr[4] & CR4_TSD_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
+        raise_exception(env, EXCP0D_GPF);
+    }
+    helper_svm_check_intercept_param(env, SVM_EXIT_RDTSC, 0);
+
+    val = cpu_get_tsc(env) + env->tsc_offset;
+    EAX = (uint32_t)(val);
+    EDX = (uint32_t)(val >> 32);
+}
+
+void helper_rdpmc(CPUX86State *env)
+{
+    if ((env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
+        raise_exception(env, EXCP0D_GPF);
+    }
+    helper_svm_check_intercept_param(env, SVM_EXIT_RDPMC, 0);
+
+    /* currently unimplemented */
+    raise_exception_err(env, EXCP06_ILLOP, 0);
+}
+
+#if defined(CONFIG_USER_ONLY)
+void helper_wrmsr(CPUX86State *env)
+{
+}
+
+void helper_rdmsr(CPUX86State *env)
+{
+}
+#else
+void helper_wrmsr(CPUX86State *env)
+{
+    uint64_t val;
+
+    helper_svm_check_intercept_param(env, SVM_EXIT_MSR, 1);
+
+    val = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+
+    switch((uint32_t)ECX) {
+    case MSR_IA32_SYSENTER_CS:
+        env->sysenter_cs = val & 0xffff;
+        break;
+    case MSR_IA32_SYSENTER_ESP:
+        env->sysenter_esp = val;
+        break;
+    case MSR_IA32_SYSENTER_EIP:
+        env->sysenter_eip = val;
+        break;
+    case MSR_IA32_APICBASE:
+        cpu_set_apic_base(env, val);
+        break;
+    case MSR_EFER:
+        {
+            uint64_t update_mask;
+            update_mask = 0;
+            if (env->cpuid_ext2_features & CPUID_EXT2_SYSCALL)
+                update_mask |= MSR_EFER_SCE;
+            if (env->cpuid_ext2_features & CPUID_EXT2_LM)
+                update_mask |= MSR_EFER_LME;
+            if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR)
+                update_mask |= MSR_EFER_FFXSR;
+            if (env->cpuid_ext2_features & CPUID_EXT2_NX)
+                update_mask |= MSR_EFER_NXE;
+            if (env->cpuid_ext3_features & CPUID_EXT3_SVM)
+                update_mask |= MSR_EFER_SVME;
+            if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR)
+                update_mask |= MSR_EFER_FFXSR;
+            cpu_load_efer(env, (env->efer & ~update_mask) |
+                          (val & update_mask));
+        }
+        break;
+    case MSR_STAR:
+        env->star = val;
+        break;
+    case MSR_PAT:
+        env->pat = val;
+        break;
+    case MSR_VM_HSAVE_PA:
+        env->vm_hsave = val;
+        break;
+#ifdef TARGET_X86_64
+    case MSR_LSTAR:
+        env->lstar = val;
+        break;
+    case MSR_CSTAR:
+        env->cstar = val;
+        break;
+    case MSR_FMASK:
+        env->fmask = val;
+        break;
+    case MSR_FSBASE:
+        env->segs[R_FS].base = val;
+        break;
+    case MSR_GSBASE:
+        env->segs[R_GS].base = val;
+        break;
+    case MSR_KERNELGSBASE:
+        env->kernelgsbase = val;
+        break;
+#endif
+    case MSR_MTRRphysBase(0):
+    case MSR_MTRRphysBase(1):
+    case MSR_MTRRphysBase(2):
+    case MSR_MTRRphysBase(3):
+    case MSR_MTRRphysBase(4):
+    case MSR_MTRRphysBase(5):
+    case MSR_MTRRphysBase(6):
+    case MSR_MTRRphysBase(7):
+        env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base = val;
+        break;
+    case MSR_MTRRphysMask(0):
+    case MSR_MTRRphysMask(1):
+    case MSR_MTRRphysMask(2):
+    case MSR_MTRRphysMask(3):
+    case MSR_MTRRphysMask(4):
+    case MSR_MTRRphysMask(5):
+    case MSR_MTRRphysMask(6):
+    case MSR_MTRRphysMask(7):
+        env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask = val;
+        break;
+    case MSR_MTRRfix64K_00000:
+        env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix64K_00000] = val;
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1] = val;
+        break;
+    case MSR_MTRRfix4K_C0000:
+    case MSR_MTRRfix4K_C8000:
+    case MSR_MTRRfix4K_D0000:
+    case MSR_MTRRfix4K_D8000:
+    case MSR_MTRRfix4K_E0000:
+    case MSR_MTRRfix4K_E8000:
+    case MSR_MTRRfix4K_F0000:
+    case MSR_MTRRfix4K_F8000:
+        env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3] = val;
+        break;
+    case MSR_MTRRdefType:
+        env->mtrr_deftype = val;
+        break;
+    case MSR_MCG_STATUS:
+        env->mcg_status = val;
+        break;
+    case MSR_MCG_CTL:
+        if ((env->mcg_cap & MCG_CTL_P)
+            && (val == 0 || val == ~(uint64_t)0))
+            env->mcg_ctl = val;
+        break;
+    default:
+        if ((uint32_t)ECX >= MSR_MC0_CTL
+            && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
+            uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+            if ((offset & 0x3) != 0
+                || (val == 0 || val == ~(uint64_t)0))
+                env->mce_banks[offset] = val;
+            break;
+        }
+        /* XXX: exception ? */
+        break;
+    }
+}
+
+void helper_rdmsr(CPUX86State *env)
+{
+    uint64_t val;
+
+    helper_svm_check_intercept_param(env, SVM_EXIT_MSR, 0);
+
+    switch((uint32_t)ECX) {
+    case MSR_IA32_SYSENTER_CS:
+        val = env->sysenter_cs;
+        break;
+    case MSR_IA32_SYSENTER_ESP:
+        val = env->sysenter_esp;
+        break;
+    case MSR_IA32_SYSENTER_EIP:
+        val = env->sysenter_eip;
+        break;
+    case MSR_IA32_APICBASE:
+        val = cpu_get_apic_base(env);
+        break;
+    case MSR_EFER:
+        val = env->efer;
+        break;
+    case MSR_STAR:
+        val = env->star;
+        break;
+    case MSR_PAT:
+        val = env->pat;
+        break;
+    case MSR_VM_HSAVE_PA:
+        val = env->vm_hsave;
+        break;
+    case MSR_IA32_PERF_STATUS:
+        /* tsc_increment_by_tick */
+        val = 1000ULL;
+        /* CPU multiplier */
+        val |= (((uint64_t)4ULL) << 40);
+        break;
+#ifdef TARGET_X86_64
+    case MSR_LSTAR:
+        val = env->lstar;
+        break;
+    case MSR_CSTAR:
+        val = env->cstar;
+        break;
+    case MSR_FMASK:
+        val = env->fmask;
+        break;
+    case MSR_FSBASE:
+        val = env->segs[R_FS].base;
+        break;
+    case MSR_GSBASE:
+        val = env->segs[R_GS].base;
+        break;
+    case MSR_KERNELGSBASE:
+        val = env->kernelgsbase;
+        break;
+#endif
+    case MSR_MTRRphysBase(0):
+    case MSR_MTRRphysBase(1):
+    case MSR_MTRRphysBase(2):
+    case MSR_MTRRphysBase(3):
+    case MSR_MTRRphysBase(4):
+    case MSR_MTRRphysBase(5):
+    case MSR_MTRRphysBase(6):
+    case MSR_MTRRphysBase(7):
+        val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base;
+        break;
+    case MSR_MTRRphysMask(0):
+    case MSR_MTRRphysMask(1):
+    case MSR_MTRRphysMask(2):
+    case MSR_MTRRphysMask(3):
+    case MSR_MTRRphysMask(4):
+    case MSR_MTRRphysMask(5):
+    case MSR_MTRRphysMask(6):
+    case MSR_MTRRphysMask(7):
+        val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask;
+        break;
+    case MSR_MTRRfix64K_00000:
+        val = env->mtrr_fixed[0];
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1];
+        break;
+    case MSR_MTRRfix4K_C0000:
+    case MSR_MTRRfix4K_C8000:
+    case MSR_MTRRfix4K_D0000:
+    case MSR_MTRRfix4K_D8000:
+    case MSR_MTRRfix4K_E0000:
+    case MSR_MTRRfix4K_E8000:
+    case MSR_MTRRfix4K_F0000:
+    case MSR_MTRRfix4K_F8000:
+        val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3];
+        break;
+    case MSR_MTRRdefType:
+        val = env->mtrr_deftype;
+        break;
+    case MSR_MTRRcap:
+        if (env->cpuid_features & CPUID_MTRR)
+            val = MSR_MTRRcap_VCNT | MSR_MTRRcap_FIXRANGE_SUPPORT | MSR_MTRRcap_WC_SUPPORTED;
+        else
+            /* XXX: exception ? */
+            val = 0;
+        break;
+    case MSR_MCG_CAP:
+        val = env->mcg_cap;
+        break;
+    case MSR_MCG_CTL:
+        if (env->mcg_cap & MCG_CTL_P)
+            val = env->mcg_ctl;
+        else
+            val = 0;
+        break;
+    case MSR_MCG_STATUS:
+        val = env->mcg_status;
+        break;
+    default:
+        if ((uint32_t)ECX >= MSR_MC0_CTL
+            && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
+            uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+            val = env->mce_banks[offset];
+            break;
+        }
+        /* XXX: exception ? */
+        val = 0;
+        break;
+    }
+    EAX = (uint32_t)(val);
+    EDX = (uint32_t)(val >> 32);
+}
+#endif
+
+static void do_hlt(CPUX86State *env)
+{
+    env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
+    ENV_GET_CPU(env)->halted = 1;
+    env->exception_index = EXCP_HLT;
+    cpu_loop_exit(env);
+}
+
+void helper_hlt(CPUX86State *env, int next_eip_addend)
+{
+    helper_svm_check_intercept_param(env, SVM_EXIT_HLT, 0);
+    EIP += next_eip_addend;
+
+    do_hlt(env);
+}
+
+void helper_monitor(CPUX86State *env, target_ulong ptr)
+{
+    if ((uint32_t)ECX != 0)
+        raise_exception(env, EXCP0D_GPF);
+    /* XXX: store address ? */
+    helper_svm_check_intercept_param(env, SVM_EXIT_MONITOR, 0);
+}
+
+void helper_mwait(CPUX86State *env, int next_eip_addend)
+{
+    if ((uint32_t)ECX != 0)
+        raise_exception(env, EXCP0D_GPF);
+    helper_svm_check_intercept_param(env, SVM_EXIT_MWAIT, 0);
+    EIP += next_eip_addend;
+
+    /* XXX: not complete but not completely erroneous */
+    CPUState *cpu = ENV_GET_CPU(env);
+    if (cpu->cpu_index != 0 || QTAILQ_NEXT(cpu, node) != NULL) {
+        /* more than one CPU: do not sleep because another CPU may
+           wake this one */
+    } else {
+        do_hlt(env);
+    }
+}
+
+void helper_debug(CPUX86State *env)
+{
+    env->exception_index = EXCP_DEBUG;
+    cpu_loop_exit(env);
+}
diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
deleted file mode 100644
index 1cdc2bf..0000000
--- a/target-i386/op_helper.c
+++ /dev/null
@@ -1,5619 +0,0 @@
-/*
- *  i386 helpers
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
- */
-#include <math.h>
-
-#define CPU_NO_GLOBAL_REGS
-#include "exec.h"
-#include "exec/exec-all.h"
-#include "qemu/host-utils.h"
-
-//#define DEBUG_PCALL
-
-
-#ifdef DEBUG_PCALL
-#  define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__)
-#  define LOG_PCALL_STATE(env) \
-          log_cpu_state_mask(CPU_LOG_PCALL, (env), X86_DUMP_CCOP)
-#else
-#  define LOG_PCALL(...) do { } while (0)
-#  define LOG_PCALL_STATE(env) do { } while (0)
-#endif
-
-
-#if 0
-#define raise_exception_err(a, b)\
-do {\
-    qemu_log("raise_exception line=%d\n", __LINE__);\
-    (raise_exception_err)(a, b);\
-} while (0)
-#endif
-
-static const uint8_t parity_table[256] = {
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-};
-
-/* modulo 17 table */
-static const uint8_t rclw_table[32] = {
-    0, 1, 2, 3, 4, 5, 6, 7,
-    8, 9,10,11,12,13,14,15,
-   16, 0, 1, 2, 3, 4, 5, 6,
-    7, 8, 9,10,11,12,13,14,
-};
-
-/* modulo 9 table */
-static const uint8_t rclb_table[32] = {
-    0, 1, 2, 3, 4, 5, 6, 7,
-    8, 0, 1, 2, 3, 4, 5, 6,
-    7, 8, 0, 1, 2, 3, 4, 5,
-    6, 7, 8, 0, 1, 2, 3, 4,
-};
-
-#define floatx80_lg2 make_floatx80( 0x3ffd, 0x9a209a84fbcff799LL )
-#define floatx80_l2e make_floatx80( 0x3fff, 0xb8aa3b295c17f0bcLL )
-#define floatx80_l2t make_floatx80( 0x4000, 0xd49a784bcd1b8afeLL )
-
-static const floatx80 f15rk[7] =
-{
-    floatx80_zero,
-    floatx80_one,
-    floatx80_pi,
-    floatx80_lg2,
-    floatx80_ln2,
-    floatx80_l2e,
-    floatx80_l2t,
-};
-
-/* broken thread support */
-
-static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
-
-void helper_lock(void)
-{
-    spin_lock(&global_cpu_lock);
-}
-
-void helper_unlock(void)
-{
-    spin_unlock(&global_cpu_lock);
-}
-
-void helper_write_eflags(target_ulong t0, uint32_t update_mask)
-{
-    load_eflags(t0, update_mask);
-}
-
-target_ulong helper_read_eflags(void)
-{
-    uint32_t eflags;
-    eflags = helper_cc_compute_all(CC_OP);
-    eflags |= (DF & DF_MASK);
-    eflags |= env->eflags & ~(VM_MASK | RF_MASK);
-    return eflags;
-}
-
-/* return non zero if error */
-static inline int load_segment(uint32_t *e1_ptr, uint32_t *e2_ptr,
-                               int selector)
-{
-    SegmentCache *dt;
-    int index;
-    target_ulong ptr;
-
-    if (selector & 0x4)
-        dt = &env->ldt;
-    else
-        dt = &env->gdt;
-    index = selector & ~7;
-    if ((index + 7) > dt->limit)
-        return -1;
-    ptr = dt->base + index;
-    *e1_ptr = ldl_kernel(ptr);
-    *e2_ptr = ldl_kernel(ptr + 4);
-    return 0;
-}
-
-static inline unsigned int get_seg_limit(uint32_t e1, uint32_t e2)
-{
-    unsigned int limit;
-    limit = (e1 & 0xffff) | (e2 & 0x000f0000);
-    if (e2 & DESC_G_MASK)
-        limit = (limit << 12) | 0xfff;
-    return limit;
-}
-
-static inline uint32_t get_seg_base(uint32_t e1, uint32_t e2)
-{
-    return ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000));
-}
-
-static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1, uint32_t e2)
-{
-    sc->base = get_seg_base(e1, e2);
-    sc->limit = get_seg_limit(e1, e2);
-    sc->flags = e2;
-}
-
-/* init the segment cache in vm86 mode. */
-static inline void load_seg_vm(int seg, int selector)
-{
-    selector &= 0xffff;
-    cpu_x86_load_seg_cache(env, seg, selector,
-                           (selector << 4), 0xffff, 0);
-}
-
-static inline void get_ss_esp_from_tss(uint32_t *ss_ptr,
-                                       uint32_t *esp_ptr, int dpl)
-{
-    int type, index, shift;
-
-#if 0
-    {
-        int i;
-        printf("TR: base=%p limit=%x\n", env->tr.base, env->tr.limit);
-        for(i=0;i<env->tr.limit;i++) {
-            printf("%02x ", env->tr.base[i]);
-            if ((i & 7) == 7) printf("\n");
-        }
-        printf("\n");
-    }
-#endif
-
-    if (!(env->tr.flags & DESC_P_MASK))
-        cpu_abort(env, "invalid tss");
-    type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
-    if ((type & 7) != 1)
-        cpu_abort(env, "invalid tss type");
-    shift = type >> 3;
-    index = (dpl * 4 + 2) << shift;
-    if (index + (4 << shift) - 1 > env->tr.limit)
-        raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
-    if (shift == 0) {
-        *esp_ptr = lduw_kernel(env->tr.base + index);
-        *ss_ptr = lduw_kernel(env->tr.base + index + 2);
-    } else {
-        *esp_ptr = ldl_kernel(env->tr.base + index);
-        *ss_ptr = lduw_kernel(env->tr.base + index + 4);
-    }
-}
-
-/* XXX: merge with load_seg() */
-static void tss_load_seg(int seg_reg, int selector)
-{
-    uint32_t e1, e2;
-    int rpl, dpl, cpl;
-
-    if ((selector & 0xfffc) != 0) {
-        if (load_segment(&e1, &e2, selector) != 0)
-            raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
-        if (!(e2 & DESC_S_MASK))
-            raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
-        rpl = selector & 3;
-        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-        cpl = env->hflags & HF_CPL_MASK;
-        if (seg_reg == R_CS) {
-            if (!(e2 & DESC_CS_MASK))
-                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
-            /* XXX: is it correct ? */
-            if (dpl != rpl)
-                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
-            if ((e2 & DESC_C_MASK) && dpl > rpl)
-                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
-        } else if (seg_reg == R_SS) {
-            /* SS must be writable data */
-            if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK))
-                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
-            if (dpl != cpl || dpl != rpl)
-                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
-        } else {
-            /* not readable code */
-            if ((e2 & DESC_CS_MASK) && !(e2 & DESC_R_MASK))
-                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
-            /* if data or non conforming code, checks the rights */
-            if (((e2 >> DESC_TYPE_SHIFT) & 0xf) < 12) {
-                if (dpl < cpl || dpl < rpl)
-                    raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
-            }
-        }
-        if (!(e2 & DESC_P_MASK))
-            raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
-        cpu_x86_load_seg_cache(env, seg_reg, selector,
-                       get_seg_base(e1, e2),
-                       get_seg_limit(e1, e2),
-                       e2);
-    } else {
-        if (seg_reg == R_SS || seg_reg == R_CS)
-            raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
-    }
-}
-
-#define SWITCH_TSS_JMP  0
-#define SWITCH_TSS_IRET 1
-#define SWITCH_TSS_CALL 2
-
-/* XXX: restore CPU state in registers (PowerPC case) */
-static void switch_tss(int tss_selector,
-                       uint32_t e1, uint32_t e2, int source,
-                       uint32_t next_eip)
-{
-    int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, v1, v2, i;
-    target_ulong tss_base;
-    uint32_t new_regs[8], new_segs[6];
-    uint32_t new_eflags, new_eip, new_cr3, new_ldt;
-    uint32_t old_eflags, eflags_mask;
-    SegmentCache *dt;
-    int index;
-    target_ulong ptr;
-
-    type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
-    LOG_PCALL("switch_tss: sel=0x%04x type=%d src=%d\n", tss_selector, type, source);
-
-    /* if task gate, we read the TSS segment and we load it */
-    if (type == 5) {
-        if (!(e2 & DESC_P_MASK))
-            raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc);
-        tss_selector = e1 >> 16;
-        if (tss_selector & 4)
-            raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
-        if (load_segment(&e1, &e2, tss_selector) != 0)
-            raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
-        if (e2 & DESC_S_MASK)
-            raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
-        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
-        if ((type & 7) != 1)
-            raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
-    }
-
-    if (!(e2 & DESC_P_MASK))
-        raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc);
-
-    if (type & 8)
-        tss_limit_max = 103;
-    else
-        tss_limit_max = 43;
-    tss_limit = get_seg_limit(e1, e2);
-    tss_base = get_seg_base(e1, e2);
-    if ((tss_selector & 4) != 0 ||
-        tss_limit < tss_limit_max)
-        raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
-    old_type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
-    if (old_type & 8)
-        old_tss_limit_max = 103;
-    else
-        old_tss_limit_max = 43;
-
-    /* read all the registers from the new TSS */
-    if (type & 8) {
-        /* 32 bit */
-        new_cr3 = ldl_kernel(tss_base + 0x1c);
-        new_eip = ldl_kernel(tss_base + 0x20);
-        new_eflags = ldl_kernel(tss_base + 0x24);
-        for(i = 0; i < 8; i++)
-            new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
-        for(i = 0; i < 6; i++)
-            new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
-        new_ldt = lduw_kernel(tss_base + 0x60);
-        ldl_kernel(tss_base + 0x64);
-    } else {
-        /* 16 bit */
-        new_cr3 = 0;
-        new_eip = lduw_kernel(tss_base + 0x0e);
-        new_eflags = lduw_kernel(tss_base + 0x10);
-        for(i = 0; i < 8; i++)
-            new_regs[i] = lduw_kernel(tss_base + (0x12 + i * 2)) | 0xffff0000;
-        for(i = 0; i < 4; i++)
-            new_segs[i] = lduw_kernel(tss_base + (0x22 + i * 4));
-        new_ldt = lduw_kernel(tss_base + 0x2a);
-        new_segs[R_FS] = 0;
-        new_segs[R_GS] = 0;
-    }
-
-    /* NOTE: we must avoid memory exceptions during the task switch,
-       so we make dummy accesses before */
-    /* XXX: it can still fail in some cases, so a bigger hack is
-       necessary to valid the TLB after having done the accesses */
-
-    v1 = ldub_kernel(env->tr.base);
-    v2 = ldub_kernel(env->tr.base + old_tss_limit_max);
-    stb_kernel(env->tr.base, v1);
-    stb_kernel(env->tr.base + old_tss_limit_max, v2);
-
-    /* clear busy bit (it is restartable) */
-    if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) {
-        target_ulong ptr;
-        uint32_t e2;
-        ptr = env->gdt.base + (env->tr.selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
-        e2 &= ~DESC_TSS_BUSY_MASK;
-        stl_kernel(ptr + 4, e2);
-    }
-    old_eflags = compute_eflags();
-    if (source == SWITCH_TSS_IRET)
-        old_eflags &= ~NT_MASK;
-
-    /* save the current state in the old TSS */
-    if (type & 8) {
-        /* 32 bit */
-        stl_kernel(env->tr.base + 0x20, next_eip);
-        stl_kernel(env->tr.base + 0x24, old_eflags);
-        stl_kernel(env->tr.base + (0x28 + 0 * 4), EAX);
-        stl_kernel(env->tr.base + (0x28 + 1 * 4), ECX);
-        stl_kernel(env->tr.base + (0x28 + 2 * 4), EDX);
-        stl_kernel(env->tr.base + (0x28 + 3 * 4), EBX);
-        stl_kernel(env->tr.base + (0x28 + 4 * 4), ESP);
-        stl_kernel(env->tr.base + (0x28 + 5 * 4), EBP);
-        stl_kernel(env->tr.base + (0x28 + 6 * 4), ESI);
-        stl_kernel(env->tr.base + (0x28 + 7 * 4), EDI);
-        for(i = 0; i < 6; i++)
-            stw_kernel(env->tr.base + (0x48 + i * 4), env->segs[i].selector);
-    } else {
-        /* 16 bit */
-        stw_kernel(env->tr.base + 0x0e, next_eip);
-        stw_kernel(env->tr.base + 0x10, old_eflags);
-        stw_kernel(env->tr.base + (0x12 + 0 * 2), EAX);
-        stw_kernel(env->tr.base + (0x12 + 1 * 2), ECX);
-        stw_kernel(env->tr.base + (0x12 + 2 * 2), EDX);
-        stw_kernel(env->tr.base + (0x12 + 3 * 2), EBX);
-        stw_kernel(env->tr.base + (0x12 + 4 * 2), ESP);
-        stw_kernel(env->tr.base + (0x12 + 5 * 2), EBP);
-        stw_kernel(env->tr.base + (0x12 + 6 * 2), ESI);
-        stw_kernel(env->tr.base + (0x12 + 7 * 2), EDI);
-        for(i = 0; i < 4; i++)
-            stw_kernel(env->tr.base + (0x22 + i * 4), env->segs[i].selector);
-    }
-
-    /* now if an exception occurs, it will occurs in the next task
-       context */
-
-    if (source == SWITCH_TSS_CALL) {
-        stw_kernel(tss_base, env->tr.selector);
-        new_eflags |= NT_MASK;
-    }
-
-    /* set busy bit */
-    if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_CALL) {
-        target_ulong ptr;
-        uint32_t e2;
-        ptr = env->gdt.base + (tss_selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
-        e2 |= DESC_TSS_BUSY_MASK;
-        stl_kernel(ptr + 4, e2);
-    }
-
-    /* set the new CPU state */
-    /* from this point, any exception which occurs can give problems */
-    env->cr[0] |= CR0_TS_MASK;
-    env->hflags |= HF_TS_MASK;
-    env->tr.selector = tss_selector;
-    env->tr.base = tss_base;
-    env->tr.limit = tss_limit;
-    env->tr.flags = e2 & ~DESC_TSS_BUSY_MASK;
-
-    if ((type & 8) && (env->cr[0] & CR0_PG_MASK)) {
-        cpu_x86_update_cr3(env, new_cr3);
-    }
-
-    /* load all registers without an exception, then reload them with
-       possible exception */
-    env->eip = new_eip;
-    eflags_mask = TF_MASK | AC_MASK | ID_MASK |
-        IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK;
-    if (!(type & 8))
-        eflags_mask &= 0xffff;
-    load_eflags(new_eflags, eflags_mask);
-    /* XXX: what to do in 16 bit case ? */
-    EAX = new_regs[0];
-    ECX = new_regs[1];
-    EDX = new_regs[2];
-    EBX = new_regs[3];
-    ESP = new_regs[4];
-    EBP = new_regs[5];
-    ESI = new_regs[6];
-    EDI = new_regs[7];
-    if (new_eflags & VM_MASK) {
-        for(i = 0; i < 6; i++)
-            load_seg_vm(i, new_segs[i]);
-        /* in vm86, CPL is always 3 */
-        cpu_x86_set_cpl(env, 3);
-    } else {
-        /* CPL is set the RPL of CS */
-        cpu_x86_set_cpl(env, new_segs[R_CS] & 3);
-        /* first just selectors as the rest may trigger exceptions */
-        for(i = 0; i < 6; i++)
-            cpu_x86_load_seg_cache(env, i, new_segs[i], 0, 0, 0);
-    }
-
-    env->ldt.selector = new_ldt & ~4;
-    env->ldt.base = 0;
-    env->ldt.limit = 0;
-    env->ldt.flags = 0;
-
-    /* load the LDT */
-    if (new_ldt & 4)
-        raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
-
-    if ((new_ldt & 0xfffc) != 0) {
-        dt = &env->gdt;
-        index = new_ldt & ~7;
-        if ((index + 7) > dt->limit)
-            raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
-        ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
-        if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
-            raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
-        if (!(e2 & DESC_P_MASK))
-            raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
-        load_seg_cache_raw_dt(&env->ldt, e1, e2);
-    }
-
-    /* load the segments */
-    if (!(new_eflags & VM_MASK)) {
-        tss_load_seg(R_CS, new_segs[R_CS]);
-        tss_load_seg(R_SS, new_segs[R_SS]);
-        tss_load_seg(R_ES, new_segs[R_ES]);
-        tss_load_seg(R_DS, new_segs[R_DS]);
-        tss_load_seg(R_FS, new_segs[R_FS]);
-        tss_load_seg(R_GS, new_segs[R_GS]);
-    }
-
-    /* check that EIP is in the CS segment limits */
-    if (new_eip > env->segs[R_CS].limit) {
-        /* XXX: different exception if CALL ? */
-        raise_exception_err(EXCP0D_GPF, 0);
-    }
-
-#ifndef CONFIG_USER_ONLY
-    /* reset local breakpoints */
-    if (env->dr[7] & 0x55) {
-        for (i = 0; i < 4; i++) {
-            if (hw_breakpoint_enabled(env->dr[7], i) == 0x1)
-                hw_breakpoint_remove(env, i);
-        }
-        env->dr[7] &= ~0x55;
-    }
-#endif
-}
-
-/* check if Port I/O is allowed in TSS */
-static inline void check_io(int addr, int size)
-{
-    int io_offset, val, mask;
-
-    /* TSS must be a valid 32 bit one */
-    if (!(env->tr.flags & DESC_P_MASK) ||
-        ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 ||
-        env->tr.limit < 103)
-        goto fail;
-    io_offset = lduw_kernel(env->tr.base + 0x66);
-    io_offset += (addr >> 3);
-    /* Note: the check needs two bytes */
-    if ((io_offset + 1) > env->tr.limit)
-        goto fail;
-    val = lduw_kernel(env->tr.base + io_offset);
-    val >>= (addr & 7);
-    mask = (1 << size) - 1;
-    /* all bits must be zero to allow the I/O */
-    if ((val & mask) != 0) {
-    fail:
-        raise_exception_err(EXCP0D_GPF, 0);
-    }
-}
-
-void helper_check_iob(uint32_t t0)
-{
-    check_io(t0, 1);
-}
-
-void helper_check_iow(uint32_t t0)
-{
-    check_io(t0, 2);
-}
-
-void helper_check_iol(uint32_t t0)
-{
-    check_io(t0, 4);
-}
-
-void helper_outb(uint32_t port, uint32_t data)
-{
-    cpu_outb(port, data & 0xff);
-}
-
-target_ulong helper_inb(uint32_t port)
-{
-    return cpu_inb(port);
-}
-
-void helper_outw(uint32_t port, uint32_t data)
-{
-    cpu_outw(port, data & 0xffff);
-}
-
-target_ulong helper_inw(uint32_t port)
-{
-    return cpu_inw(port);
-}
-
-void helper_outl(uint32_t port, uint32_t data)
-{
-    cpu_outl(port, data);
-}
-
-target_ulong helper_inl(uint32_t port)
-{
-    return cpu_inl(port);
-}
-
-static inline unsigned int get_sp_mask(unsigned int e2)
-{
-    if (e2 & DESC_B_MASK)
-        return 0xffffffff;
-    else
-        return 0xffff;
-}
-
-static int exeption_has_error_code(int intno)
-{
-        switch(intno) {
-        case 8:
-        case 10:
-        case 11:
-        case 12:
-        case 13:
-        case 14:
-        case 17:
-            return 1;
-        }
-	return 0;
-}
-
-#ifdef TARGET_X86_64
-#define SET_ESP(val, sp_mask)\
-do {\
-    if ((sp_mask) == 0xffff)\
-        ESP = (ESP & ~0xffff) | ((val) & 0xffff);\
-    else if ((sp_mask) == 0xffffffffLL)\
-        ESP = (uint32_t)(val);\
-    else\
-        ESP = (val);\
-} while (0)
-#else
-#define SET_ESP(val, sp_mask) ESP = (ESP & ~(sp_mask)) | ((val) & (sp_mask))
-#endif
-
-/* in 64-bit machines, this can overflow. So this segment addition macro
- * can be used to trim the value to 32-bit whenever needed */
-#define SEG_ADDL(ssp, sp, sp_mask) ((uint32_t)((ssp) + (sp & (sp_mask))))
-
-/* XXX: add a is_user flag to have proper security support */
-#define PUSHW(ssp, sp, sp_mask, val)\
-{\
-    sp -= 2;\
-    stw_kernel((ssp) + (sp & (sp_mask)), (val));\
-}
-
-#define PUSHL(ssp, sp, sp_mask, val)\
-{\
-    sp -= 4;\
-    stl_kernel(SEG_ADDL(ssp, sp, sp_mask), (uint32_t)(val));\
-}
-
-#define POPW(ssp, sp, sp_mask, val)\
-{\
-    val = lduw_kernel((ssp) + (sp & (sp_mask)));\
-    sp += 2;\
-}
-
-#define POPL(ssp, sp, sp_mask, val)\
-{\
-    val = (uint32_t)ldl_kernel(SEG_ADDL(ssp, sp, sp_mask));\
-    sp += 4;\
-}
-
-/* protected mode interrupt */
-static void do_interrupt_protected(int intno, int is_int, int error_code,
-                                   unsigned int next_eip, int is_hw)
-{
-    SegmentCache *dt;
-    target_ulong ptr, ssp;
-    int type, dpl, selector, ss_dpl, cpl;
-    int has_error_code, new_stack, shift;
-    uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0;
-    uint32_t old_eip, sp_mask;
-
-    has_error_code = 0;
-    if (!is_int && !is_hw)
-        has_error_code = exeption_has_error_code(intno);
-    if (is_int)
-        old_eip = next_eip;
-    else
-        old_eip = env->eip;
-
-    dt = &env->idt;
-    if (intno * 8 + 7 > dt->limit)
-        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
-    ptr = dt->base + intno * 8;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
-    /* check gate type */
-    type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
-    switch(type) {
-    case 5: /* task gate */
-        /* must do that check here to return the correct error code */
-        if (!(e2 & DESC_P_MASK))
-            raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2);
-        switch_tss(intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip);
-        if (has_error_code) {
-            int type;
-            uint32_t mask;
-            /* push the error code */
-            type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
-            shift = type >> 3;
-            if (env->segs[R_SS].flags & DESC_B_MASK)
-                mask = 0xffffffff;
-            else
-                mask = 0xffff;
-            esp = (ESP - (2 << shift)) & mask;
-            ssp = env->segs[R_SS].base + esp;
-            if (shift)
-                stl_kernel(ssp, error_code);
-            else
-                stw_kernel(ssp, error_code);
-            SET_ESP(esp, mask);
-        }
-        return;
-    case 6: /* 286 interrupt gate */
-    case 7: /* 286 trap gate */
-    case 14: /* 386 interrupt gate */
-    case 15: /* 386 trap gate */
-        break;
-    default:
-        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
-        break;
-    }
-    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-    cpl = env->hflags & HF_CPL_MASK;
-    /* check privilege if software int */
-    if (is_int && dpl < cpl)
-        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
-    /* check valid bit */
-    if (!(e2 & DESC_P_MASK))
-        raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2);
-    selector = e1 >> 16;
-    offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
-    if ((selector & 0xfffc) == 0)
-        raise_exception_err(EXCP0D_GPF, 0);
-
-    if (load_segment(&e1, &e2, selector) != 0)
-        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-    if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
-        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-    if (dpl > cpl)
-        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-    if (!(e2 & DESC_P_MASK))
-        raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
-    if (!(e2 & DESC_C_MASK) && dpl < cpl) {
-        /* to inner privilege */
-        get_ss_esp_from_tss(&ss, &esp, dpl);
-        if ((ss & 0xfffc) == 0)
-            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
-        if ((ss & 3) != dpl)
-            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
-        if (load_segment(&ss_e1, &ss_e2, ss) != 0)
-            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
-        ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
-        if (ss_dpl != dpl)
-            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
-        if (!(ss_e2 & DESC_S_MASK) ||
-            (ss_e2 & DESC_CS_MASK) ||
-            !(ss_e2 & DESC_W_MASK))
-            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
-        if (!(ss_e2 & DESC_P_MASK))
-            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
-        new_stack = 1;
-        sp_mask = get_sp_mask(ss_e2);
-        ssp = get_seg_base(ss_e1, ss_e2);
-    } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
-        /* to same privilege */
-        if (env->eflags & VM_MASK)
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        new_stack = 0;
-        sp_mask = get_sp_mask(env->segs[R_SS].flags);
-        ssp = env->segs[R_SS].base;
-        esp = ESP;
-        dpl = cpl;
-    } else {
-        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        new_stack = 0; /* avoid warning */
-        sp_mask = 0; /* avoid warning */
-        ssp = 0; /* avoid warning */
-        esp = 0; /* avoid warning */
-    }
-
-    shift = type >> 3;
-
-#if 0
-    /* XXX: check that enough room is available */
-    push_size = 6 + (new_stack << 2) + (has_error_code << 1);
-    if (env->eflags & VM_MASK)
-        push_size += 8;
-    push_size <<= shift;
-#endif
-    if (shift == 1) {
-        if (new_stack) {
-            if (env->eflags & VM_MASK) {
-                PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector);
-                PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector);
-                PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector);
-                PUSHL(ssp, esp, sp_mask, env->segs[R_ES].selector);
-            }
-            PUSHL(ssp, esp, sp_mask, env->segs[R_SS].selector);
-            PUSHL(ssp, esp, sp_mask, ESP);
-        }
-        PUSHL(ssp, esp, sp_mask, compute_eflags());
-        PUSHL(ssp, esp, sp_mask, env->segs[R_CS].selector);
-        PUSHL(ssp, esp, sp_mask, old_eip);
-        if (has_error_code) {
-            PUSHL(ssp, esp, sp_mask, error_code);
-        }
-    } else {
-        if (new_stack) {
-            if (env->eflags & VM_MASK) {
-                PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector);
-                PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector);
-                PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector);
-                PUSHW(ssp, esp, sp_mask, env->segs[R_ES].selector);
-            }
-            PUSHW(ssp, esp, sp_mask, env->segs[R_SS].selector);
-            PUSHW(ssp, esp, sp_mask, ESP);
-        }
-        PUSHW(ssp, esp, sp_mask, compute_eflags());
-        PUSHW(ssp, esp, sp_mask, env->segs[R_CS].selector);
-        PUSHW(ssp, esp, sp_mask, old_eip);
-        if (has_error_code) {
-            PUSHW(ssp, esp, sp_mask, error_code);
-        }
-    }
-
-    if (new_stack) {
-        if (env->eflags & VM_MASK) {
-            cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0);
-            cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0);
-            cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0);
-            cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0, 0);
-        }
-        ss = (ss & ~3) | dpl;
-        cpu_x86_load_seg_cache(env, R_SS, ss,
-                               ssp, get_seg_limit(ss_e1, ss_e2), ss_e2);
-    }
-    SET_ESP(esp, sp_mask);
-
-    selector = (selector & ~3) | dpl;
-    cpu_x86_load_seg_cache(env, R_CS, selector,
-                   get_seg_base(e1, e2),
-                   get_seg_limit(e1, e2),
-                   e2);
-    cpu_x86_set_cpl(env, dpl);
-    env->eip = offset;
-
-    /* interrupt gate clear IF mask */
-    if ((type & 1) == 0) {
-        env->eflags &= ~IF_MASK;
-    }
-    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
-}
-
-#ifdef TARGET_X86_64
-
-#define PUSHQ(sp, val)\
-{\
-    sp -= 8;\
-    stq_kernel(sp, (val));\
-}
-
-#define POPQ(sp, val)\
-{\
-    val = ldq_kernel(sp);\
-    sp += 8;\
-}
-
-static inline target_ulong get_rsp_from_tss(int level)
-{
-    int index;
-
-#if 0
-    printf("TR: base=" TARGET_FMT_lx " limit=%x\n",
-           env->tr.base, env->tr.limit);
-#endif
-
-    if (!(env->tr.flags & DESC_P_MASK))
-        cpu_abort(env, "invalid tss");
-    index = 8 * level + 4;
-    if ((index + 7) > env->tr.limit)
-        raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
-    return ldq_kernel(env->tr.base + index);
-}
-
-/* 64 bit interrupt */
-static void do_interrupt64(int intno, int is_int, int error_code,
-                           target_ulong next_eip, int is_hw)
-{
-    SegmentCache *dt;
-    target_ulong ptr;
-    int type, dpl, selector, cpl, ist;
-    int has_error_code, new_stack;
-    uint32_t e1, e2, e3, ss;
-    target_ulong old_eip, esp, offset;
-
-    has_error_code = 0;
-    if (!is_int && !is_hw)
-        has_error_code = exeption_has_error_code(intno);
-    if (is_int)
-        old_eip = next_eip;
-    else
-        old_eip = env->eip;
-
-    dt = &env->idt;
-    if (intno * 16 + 15 > dt->limit)
-        raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
-    ptr = dt->base + intno * 16;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
-    e3 = ldl_kernel(ptr + 8);
-    /* check gate type */
-    type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
-    switch(type) {
-    case 14: /* 386 interrupt gate */
-    case 15: /* 386 trap gate */
-        break;
-    default:
-        raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
-        break;
-    }
-    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-    cpl = env->hflags & HF_CPL_MASK;
-    /* check privilege if software int */
-    if (is_int && dpl < cpl)
-        raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
-    /* check valid bit */
-    if (!(e2 & DESC_P_MASK))
-        raise_exception_err(EXCP0B_NOSEG, intno * 16 + 2);
-    selector = e1 >> 16;
-    offset = ((target_ulong)e3 << 32) | (e2 & 0xffff0000) | (e1 & 0x0000ffff);
-    ist = e2 & 7;
-    if ((selector & 0xfffc) == 0)
-        raise_exception_err(EXCP0D_GPF, 0);
-
-    if (load_segment(&e1, &e2, selector) != 0)
-        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-    if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
-        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-    if (dpl > cpl)
-        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-    if (!(e2 & DESC_P_MASK))
-        raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
-    if (!(e2 & DESC_L_MASK) || (e2 & DESC_B_MASK))
-        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-    if ((!(e2 & DESC_C_MASK) && dpl < cpl) || ist != 0) {
-        /* to inner privilege */
-        if (ist != 0)
-            esp = get_rsp_from_tss(ist + 3);
-        else
-            esp = get_rsp_from_tss(dpl);
-        esp &= ~0xfLL; /* align stack */
-        ss = 0;
-        new_stack = 1;
-    } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
-        /* to same privilege */
-        if (env->eflags & VM_MASK)
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        new_stack = 0;
-        if (ist != 0)
-            esp = get_rsp_from_tss(ist + 3);
-        else
-            esp = ESP;
-        esp &= ~0xfLL; /* align stack */
-        dpl = cpl;
-    } else {
-        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        new_stack = 0; /* avoid warning */
-        esp = 0; /* avoid warning */
-    }
-
-    PUSHQ(esp, env->segs[R_SS].selector);
-    PUSHQ(esp, ESP);
-    PUSHQ(esp, compute_eflags());
-    PUSHQ(esp, env->segs[R_CS].selector);
-    PUSHQ(esp, old_eip);
-    if (has_error_code) {
-        PUSHQ(esp, error_code);
-    }
-
-    if (new_stack) {
-        ss = 0 | dpl;
-        cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, 0);
-    }
-    ESP = esp;
-
-    selector = (selector & ~3) | dpl;
-    cpu_x86_load_seg_cache(env, R_CS, selector,
-                   get_seg_base(e1, e2),
-                   get_seg_limit(e1, e2),
-                   e2);
-    cpu_x86_set_cpl(env, dpl);
-    env->eip = offset;
-
-    /* interrupt gate clear IF mask */
-    if ((type & 1) == 0) {
-        env->eflags &= ~IF_MASK;
-    }
-    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
-}
-#endif
-
-#ifdef TARGET_X86_64
-#if defined(CONFIG_USER_ONLY)
-void helper_syscall(int next_eip_addend)
-{
-    env->exception_index = EXCP_SYSCALL;
-    env->exception_next_eip = env->eip + next_eip_addend;
-    cpu_loop_exit(env);
-}
-#else
-void helper_syscall(int next_eip_addend)
-{
-    int selector;
-
-    if (!(env->efer & MSR_EFER_SCE)) {
-        raise_exception_err(EXCP06_ILLOP, 0);
-    }
-    selector = (env->star >> 32) & 0xffff;
-    if (env->hflags & HF_LMA_MASK) {
-        int code64;
-
-        ECX = env->eip + next_eip_addend;
-        env->regs[11] = compute_eflags();
-
-        code64 = env->hflags & HF_CS64_MASK;
-
-        cpu_x86_set_cpl(env, 0);
-        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
-                           0, 0xffffffff,
-                               DESC_G_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
-        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_W_MASK | DESC_A_MASK);
-        env->eflags &= ~env->fmask;
-        load_eflags(env->eflags, 0);
-        if (code64)
-            env->eip = env->lstar;
-        else
-            env->eip = env->cstar;
-    } else {
-        ECX = (uint32_t)(env->eip + next_eip_addend);
-
-        cpu_x86_set_cpl(env, 0);
-        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
-                           0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
-        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_W_MASK | DESC_A_MASK);
-        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
-        env->eip = (uint32_t)env->star;
-    }
-}
-#endif
-#endif
-
-#ifdef TARGET_X86_64
-void helper_sysret(int dflag)
-{
-    int cpl, selector;
-
-    if (!(env->efer & MSR_EFER_SCE)) {
-        raise_exception_err(EXCP06_ILLOP, 0);
-    }
-    cpl = env->hflags & HF_CPL_MASK;
-    if (!(env->cr[0] & CR0_PE_MASK) || cpl != 0) {
-        raise_exception_err(EXCP0D_GPF, 0);
-    }
-    selector = (env->star >> 48) & 0xffff;
-    if (env->hflags & HF_LMA_MASK) {
-        if (dflag == 2) {
-            cpu_x86_load_seg_cache(env, R_CS, (selector + 16) | 3,
-                                   0, 0xffffffff,
-                                   DESC_G_MASK | DESC_P_MASK |
-                                   DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
-                                   DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
-                                   DESC_L_MASK);
-            env->eip = ECX;
-        } else {
-            cpu_x86_load_seg_cache(env, R_CS, selector | 3,
-                                   0, 0xffffffff,
-                                   DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                                   DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
-                                   DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
-            env->eip = (uint32_t)ECX;
-        }
-        cpu_x86_load_seg_cache(env, R_SS, selector + 8,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
-                               DESC_W_MASK | DESC_A_MASK);
-        load_eflags((uint32_t)(env->regs[11]), TF_MASK | AC_MASK | ID_MASK |
-                    IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK);
-        cpu_x86_set_cpl(env, 3);
-    } else {
-        cpu_x86_load_seg_cache(env, R_CS, selector | 3,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
-        env->eip = (uint32_t)ECX;
-        cpu_x86_load_seg_cache(env, R_SS, selector + 8,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
-                               DESC_W_MASK | DESC_A_MASK);
-        env->eflags |= IF_MASK;
-        cpu_x86_set_cpl(env, 3);
-    }
-}
-#endif
-
-/* real mode interrupt */
-static void do_interrupt_real(int intno, int is_int, int error_code,
-                              unsigned int next_eip)
-{
-    SegmentCache *dt;
-    target_ulong ptr, ssp;
-    int selector;
-    uint32_t offset, esp;
-    uint32_t old_cs, old_eip;
-
-    /* real mode (simpler !) */
-    dt = &env->idt;
-    if (intno * 4 + 3 > dt->limit)
-        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
-    ptr = dt->base + intno * 4;
-    offset = lduw_kernel(ptr);
-    selector = lduw_kernel(ptr + 2);
-    esp = ESP;
-    ssp = env->segs[R_SS].base;
-    if (is_int)
-        old_eip = next_eip;
-    else
-        old_eip = env->eip;
-    old_cs = env->segs[R_CS].selector;
-    /* XXX: use SS segment size ? */
-    PUSHW(ssp, esp, 0xffff, compute_eflags());
-    PUSHW(ssp, esp, 0xffff, old_cs);
-    PUSHW(ssp, esp, 0xffff, old_eip);
-
-    /* update processor state */
-    ESP = (ESP & ~0xffff) | (esp & 0xffff);
-    env->eip = offset;
-    env->segs[R_CS].selector = selector;
-    env->segs[R_CS].base = (selector << 4);
-    env->eflags &= ~(IF_MASK | TF_MASK | AC_MASK | RF_MASK);
-}
-
-/* fake user mode interrupt */
-void do_interrupt_user(int intno, int is_int, int error_code,
-                       target_ulong next_eip)
-{
-    SegmentCache *dt;
-    target_ulong ptr;
-    int dpl, cpl, shift;
-    uint32_t e2;
-
-    dt = &env->idt;
-    if (env->hflags & HF_LMA_MASK) {
-        shift = 4;
-    } else {
-        shift = 3;
-    }
-    ptr = dt->base + (intno << shift);
-    e2 = ldl_kernel(ptr + 4);
-
-    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-    cpl = env->hflags & HF_CPL_MASK;
-    /* check privilege if software int */
-    if (is_int && dpl < cpl)
-        raise_exception_err(EXCP0D_GPF, (intno << shift) + 2);
-
-    /* Since we emulate only user space, we cannot do more than
-       exiting the emulation with the suitable exception and error
-       code */
-    if (is_int)
-        EIP = next_eip;
-}
-
-#if !defined(CONFIG_USER_ONLY)
-static void handle_even_inj(int intno, int is_int, int error_code,
-		int is_hw, int rm)
-{
-    uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
-    if (!(event_inj & SVM_EVTINJ_VALID)) {
-	    int type;
-	    if (is_int)
-		    type = SVM_EVTINJ_TYPE_SOFT;
-	    else
-		    type = SVM_EVTINJ_TYPE_EXEPT;
-	    event_inj = intno | type | SVM_EVTINJ_VALID;
-	    if (!rm && exeption_has_error_code(intno)) {
-		    event_inj |= SVM_EVTINJ_VALID_ERR;
-		    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err), error_code);
-	    }
-	    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj);
-    }
-}
-#endif
-
-/*
- * Begin execution of an interruption. is_int is TRUE if coming from
- * the int instruction. next_eip is the EIP value AFTER the interrupt
- * instruction. It is only relevant if is_int is TRUE.
- */
-void do_interrupt(int intno, int is_int, int error_code,
-                  target_ulong next_eip, int is_hw)
-{
-    if (qemu_loglevel_mask(CPU_LOG_INT)) {
-        if ((env->cr[0] & CR0_PE_MASK)) {
-            static int count;
-            qemu_log("%6d: v=%02x e=%04x i=%d cpl=%d IP=%04x:" TARGET_FMT_lx " pc=" TARGET_FMT_lx " SP=%04x:" TARGET_FMT_lx,
-                    count, intno, error_code, is_int,
-                    env->hflags & HF_CPL_MASK,
-                    env->segs[R_CS].selector, EIP,
-                    (int)env->segs[R_CS].base + EIP,
-                    env->segs[R_SS].selector, ESP);
-            if (intno == 0x0e) {
-                qemu_log(" CR2=" TARGET_FMT_lx, env->cr[2]);
-            } else {
-                qemu_log(" EAX=" TARGET_FMT_lx, EAX);
-            }
-            qemu_log("\n");
-            log_cpu_state(env, X86_DUMP_CCOP);
-#if 0
-            {
-                int i;
-                uint8_t *ptr;
-                qemu_log("       code=");
-                ptr = env->segs[R_CS].base + env->eip;
-                for(i = 0; i < 16; i++) {
-                    qemu_log(" %02x", ldub(ptr + i));
-                }
-                qemu_log("\n");
-            }
-#endif
-            count++;
-        }
-    }
-    if (env->cr[0] & CR0_PE_MASK) {
-#if !defined(CONFIG_USER_ONLY)
-        if (env->hflags & HF_SVMI_MASK)
-            handle_even_inj(intno, is_int, error_code, is_hw, 0);
-#endif
-#ifdef TARGET_X86_64
-        if (env->hflags & HF_LMA_MASK) {
-            do_interrupt64(intno, is_int, error_code, next_eip, is_hw);
-        } else
-#endif
-        {
-            do_interrupt_protected(intno, is_int, error_code, next_eip, is_hw);
-        }
-    } else {
-#if !defined(CONFIG_USER_ONLY)
-        if (env->hflags & HF_SVMI_MASK)
-            handle_even_inj(intno, is_int, error_code, is_hw, 1);
-#endif
-        do_interrupt_real(intno, is_int, error_code, next_eip);
-    }
-
-#if !defined(CONFIG_USER_ONLY)
-    if (env->hflags & HF_SVMI_MASK) {
-	    uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
-	    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
-    }
-#endif
-}
-
-/* This should come from sysemu.h - if we could include it here... */
-void qemu_system_reset_request(void);
-
-/*
- * Check nested exceptions and change to double or triple fault if
- * needed. It should only be called, if this is not an interrupt.
- * Returns the new exception number.
- */
-static int check_exception(int intno, int *error_code)
-{
-    int first_contributory = env->old_exception == 0 ||
-                              (env->old_exception >= 10 &&
-                               env->old_exception <= 13);
-    int second_contributory = intno == 0 ||
-                               (intno >= 10 && intno <= 13);
-
-    qemu_log_mask(CPU_LOG_INT, "check_exception old: 0x%x new 0x%x\n",
-                env->old_exception, intno);
-
-#if !defined(CONFIG_USER_ONLY)
-    if (env->old_exception == EXCP08_DBLE) {
-        if (env->hflags & HF_SVMI_MASK)
-            helper_vmexit(SVM_EXIT_SHUTDOWN, 0); /* does not return */
-
-        qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
-
-        qemu_system_reset_request();
-        return EXCP_HLT;
-    }
-#endif
-
-    if ((first_contributory && second_contributory)
-        || (env->old_exception == EXCP0E_PAGE &&
-            (second_contributory || (intno == EXCP0E_PAGE)))) {
-        intno = EXCP08_DBLE;
-        *error_code = 0;
-    }
-
-    if (second_contributory || (intno == EXCP0E_PAGE) ||
-        (intno == EXCP08_DBLE))
-        env->old_exception = intno;
-
-    return intno;
-}
-
-/*
- * Signal an interruption. It is executed in the main CPU loop.
- * is_int is TRUE if coming from the int instruction. next_eip is the
- * EIP value AFTER the interrupt instruction. It is only relevant if
- * is_int is TRUE.
- */
-static void QEMU_NORETURN raise_interrupt(int intno, int is_int, int error_code,
-                                          int next_eip_addend)
-{
-    if (!is_int) {
-        helper_svm_check_intercept_param(SVM_EXIT_EXCP_BASE + intno, error_code);
-        intno = check_exception(intno, &error_code);
-    } else {
-        helper_svm_check_intercept_param(SVM_EXIT_SWINT, 0);
-    }
-
-    env->exception_index = intno;
-    env->error_code = error_code;
-    env->exception_is_int = is_int;
-    env->exception_next_eip = env->eip + next_eip_addend;
-    cpu_loop_exit(env);
-}
-
-/* shortcuts to generate exceptions */
-
-void raise_exception_err(int exception_index, int error_code)
-{
-    raise_interrupt(exception_index, 0, error_code, 0);
-}
-
-void raise_exception(int exception_index)
-{
-    raise_interrupt(exception_index, 0, 0, 0);
-}
-
-/* SMM support */
-
-#if defined(CONFIG_USER_ONLY)
-
-void do_smm_enter(void)
-{
-}
-
-void helper_rsm(void)
-{
-}
-
-#else
-
-#ifdef TARGET_X86_64
-#define SMM_REVISION_ID 0x00020064
-#else
-#define SMM_REVISION_ID 0x00020000
-#endif
-
-void do_smm_enter(void)
-{
-    target_ulong sm_state;
-    SegmentCache *dt;
-    int i, offset;
-
-    qemu_log_mask(CPU_LOG_INT, "SMM: enter\n");
-    log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
-
-    env->hflags |= HF_SMM_MASK;
-    cpu_smm_update(env);
-
-    sm_state = env->smbase + 0x8000;
-
-#ifdef TARGET_X86_64
-    for(i = 0; i < 6; i++) {
-        dt = &env->segs[i];
-        offset = 0x7e00 + i * 16;
-        stw_phys(sm_state + offset, dt->selector);
-        stw_phys(sm_state + offset + 2, (dt->flags >> 8) & 0xf0ff);
-        stl_phys(sm_state + offset + 4, dt->limit);
-        stq_phys(sm_state + offset + 8, dt->base);
-    }
-
-    stq_phys(sm_state + 0x7e68, env->gdt.base);
-    stl_phys(sm_state + 0x7e64, env->gdt.limit);
-
-    stw_phys(sm_state + 0x7e70, env->ldt.selector);
-    stq_phys(sm_state + 0x7e78, env->ldt.base);
-    stl_phys(sm_state + 0x7e74, env->ldt.limit);
-    stw_phys(sm_state + 0x7e72, (env->ldt.flags >> 8) & 0xf0ff);
-
-    stq_phys(sm_state + 0x7e88, env->idt.base);
-    stl_phys(sm_state + 0x7e84, env->idt.limit);
-
-    stw_phys(sm_state + 0x7e90, env->tr.selector);
-    stq_phys(sm_state + 0x7e98, env->tr.base);
-    stl_phys(sm_state + 0x7e94, env->tr.limit);
-    stw_phys(sm_state + 0x7e92, (env->tr.flags >> 8) & 0xf0ff);
-
-    stq_phys(sm_state + 0x7ed0, env->efer);
-
-    stq_phys(sm_state + 0x7ff8, EAX);
-    stq_phys(sm_state + 0x7ff0, ECX);
-    stq_phys(sm_state + 0x7fe8, EDX);
-    stq_phys(sm_state + 0x7fe0, EBX);
-    stq_phys(sm_state + 0x7fd8, ESP);
-    stq_phys(sm_state + 0x7fd0, EBP);
-    stq_phys(sm_state + 0x7fc8, ESI);
-    stq_phys(sm_state + 0x7fc0, EDI);
-    for(i = 8; i < 16; i++)
-        stq_phys(sm_state + 0x7ff8 - i * 8, env->regs[i]);
-    stq_phys(sm_state + 0x7f78, env->eip);
-    stl_phys(sm_state + 0x7f70, compute_eflags());
-    stl_phys(sm_state + 0x7f68, env->dr[6]);
-    stl_phys(sm_state + 0x7f60, env->dr[7]);
-
-    stl_phys(sm_state + 0x7f48, env->cr[4]);
-    stl_phys(sm_state + 0x7f50, env->cr[3]);
-    stl_phys(sm_state + 0x7f58, env->cr[0]);
-
-    stl_phys(sm_state + 0x7efc, SMM_REVISION_ID);
-    stl_phys(sm_state + 0x7f00, env->smbase);
-#else
-    stl_phys(sm_state + 0x7ffc, env->cr[0]);
-    stl_phys(sm_state + 0x7ff8, env->cr[3]);
-    stl_phys(sm_state + 0x7ff4, compute_eflags());
-    stl_phys(sm_state + 0x7ff0, env->eip);
-    stl_phys(sm_state + 0x7fec, EDI);
-    stl_phys(sm_state + 0x7fe8, ESI);
-    stl_phys(sm_state + 0x7fe4, EBP);
-    stl_phys(sm_state + 0x7fe0, ESP);
-    stl_phys(sm_state + 0x7fdc, EBX);
-    stl_phys(sm_state + 0x7fd8, EDX);
-    stl_phys(sm_state + 0x7fd4, ECX);
-    stl_phys(sm_state + 0x7fd0, EAX);
-    stl_phys(sm_state + 0x7fcc, env->dr[6]);
-    stl_phys(sm_state + 0x7fc8, env->dr[7]);
-
-    stl_phys(sm_state + 0x7fc4, env->tr.selector);
-    stl_phys(sm_state + 0x7f64, env->tr.base);
-    stl_phys(sm_state + 0x7f60, env->tr.limit);
-    stl_phys(sm_state + 0x7f5c, (env->tr.flags >> 8) & 0xf0ff);
-
-    stl_phys(sm_state + 0x7fc0, env->ldt.selector);
-    stl_phys(sm_state + 0x7f80, env->ldt.base);
-    stl_phys(sm_state + 0x7f7c, env->ldt.limit);
-    stl_phys(sm_state + 0x7f78, (env->ldt.flags >> 8) & 0xf0ff);
-
-    stl_phys(sm_state + 0x7f74, env->gdt.base);
-    stl_phys(sm_state + 0x7f70, env->gdt.limit);
-
-    stl_phys(sm_state + 0x7f58, env->idt.base);
-    stl_phys(sm_state + 0x7f54, env->idt.limit);
-
-    for(i = 0; i < 6; i++) {
-        dt = &env->segs[i];
-        if (i < 3)
-            offset = 0x7f84 + i * 12;
-        else
-            offset = 0x7f2c + (i - 3) * 12;
-        stl_phys(sm_state + 0x7fa8 + i * 4, dt->selector);
-        stl_phys(sm_state + offset + 8, dt->base);
-        stl_phys(sm_state + offset + 4, dt->limit);
-        stl_phys(sm_state + offset, (dt->flags >> 8) & 0xf0ff);
-    }
-    stl_phys(sm_state + 0x7f14, env->cr[4]);
-
-    stl_phys(sm_state + 0x7efc, SMM_REVISION_ID);
-    stl_phys(sm_state + 0x7ef8, env->smbase);
-#endif
-    /* init SMM cpu state */
-
-#ifdef TARGET_X86_64
-    cpu_load_efer(env, 0);
-#endif
-    load_eflags(0, ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->eip = 0x00008000;
-    cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
-                           0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0);
-
-    cpu_x86_update_cr0(env,
-                       env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK | CR0_PG_MASK));
-    cpu_x86_update_cr4(env, 0);
-    env->dr[7] = 0x00000400;
-    CC_OP = CC_OP_EFLAGS;
-}
-
-void helper_rsm(void)
-{
-    target_ulong sm_state;
-    int i, offset;
-    uint32_t val;
-
-    sm_state = env->smbase + 0x8000;
-#ifdef TARGET_X86_64
-    cpu_load_efer(env, ldq_phys(sm_state + 0x7ed0));
-
-    for(i = 0; i < 6; i++) {
-        offset = 0x7e00 + i * 16;
-        cpu_x86_load_seg_cache(env, i,
-                               lduw_phys(sm_state + offset),
-                               ldq_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
-                               (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
-    }
-
-    env->gdt.base = ldq_phys(sm_state + 0x7e68);
-    env->gdt.limit = ldl_phys(sm_state + 0x7e64);
-
-    env->ldt.selector = lduw_phys(sm_state + 0x7e70);
-    env->ldt.base = ldq_phys(sm_state + 0x7e78);
-    env->ldt.limit = ldl_phys(sm_state + 0x7e74);
-    env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
-
-    env->idt.base = ldq_phys(sm_state + 0x7e88);
-    env->idt.limit = ldl_phys(sm_state + 0x7e84);
-
-    env->tr.selector = lduw_phys(sm_state + 0x7e90);
-    env->tr.base = ldq_phys(sm_state + 0x7e98);
-    env->tr.limit = ldl_phys(sm_state + 0x7e94);
-    env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
-
-    EAX = ldq_phys(sm_state + 0x7ff8);
-    ECX = ldq_phys(sm_state + 0x7ff0);
-    EDX = ldq_phys(sm_state + 0x7fe8);
-    EBX = ldq_phys(sm_state + 0x7fe0);
-    ESP = ldq_phys(sm_state + 0x7fd8);
-    EBP = ldq_phys(sm_state + 0x7fd0);
-    ESI = ldq_phys(sm_state + 0x7fc8);
-    EDI = ldq_phys(sm_state + 0x7fc0);
-    for(i = 8; i < 16; i++)
-        env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
-    env->eip = ldq_phys(sm_state + 0x7f78);
-    load_eflags(ldl_phys(sm_state + 0x7f70),
-                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->dr[6] = ldl_phys(sm_state + 0x7f68);
-    env->dr[7] = ldl_phys(sm_state + 0x7f60);
-
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
-
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
-    if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
-    }
-#else
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
-    load_eflags(ldl_phys(sm_state + 0x7ff4),
-                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->eip = ldl_phys(sm_state + 0x7ff0);
-    EDI = ldl_phys(sm_state + 0x7fec);
-    ESI = ldl_phys(sm_state + 0x7fe8);
-    EBP = ldl_phys(sm_state + 0x7fe4);
-    ESP = ldl_phys(sm_state + 0x7fe0);
-    EBX = ldl_phys(sm_state + 0x7fdc);
-    EDX = ldl_phys(sm_state + 0x7fd8);
-    ECX = ldl_phys(sm_state + 0x7fd4);
-    EAX = ldl_phys(sm_state + 0x7fd0);
-    env->dr[6] = ldl_phys(sm_state + 0x7fcc);
-    env->dr[7] = ldl_phys(sm_state + 0x7fc8);
-
-    env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
-    env->tr.base = ldl_phys(sm_state + 0x7f64);
-    env->tr.limit = ldl_phys(sm_state + 0x7f60);
-    env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
-
-    env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
-    env->ldt.base = ldl_phys(sm_state + 0x7f80);
-    env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
-    env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
-
-    env->gdt.base = ldl_phys(sm_state + 0x7f74);
-    env->gdt.limit = ldl_phys(sm_state + 0x7f70);
-
-    env->idt.base = ldl_phys(sm_state + 0x7f58);
-    env->idt.limit = ldl_phys(sm_state + 0x7f54);
-
-    for(i = 0; i < 6; i++) {
-        if (i < 3)
-            offset = 0x7f84 + i * 12;
-        else
-            offset = 0x7f2c + (i - 3) * 12;
-        cpu_x86_load_seg_cache(env, i,
-                               ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
-                               ldl_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
-                               (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
-    }
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
-
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
-    if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
-    }
-#endif
-    CC_OP = CC_OP_EFLAGS;
-    env->hflags &= ~HF_SMM_MASK;
-    cpu_smm_update(env);
-
-    qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n");
-    log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
-}
-
-#endif /* !CONFIG_USER_ONLY */
-
-
-/* division, flags are undefined */
-
-void helper_divb_AL(target_ulong t0)
-{
-    unsigned int num, den, q, r;
-
-    num = (EAX & 0xffff);
-    den = (t0 & 0xff);
-    if (den == 0) {
-        raise_exception(EXCP00_DIVZ);
-    }
-    q = (num / den);
-    if (q > 0xff)
-        raise_exception(EXCP00_DIVZ);
-    q &= 0xff;
-    r = (num % den) & 0xff;
-    EAX = (EAX & ~0xffff) | (r << 8) | q;
-}
-
-void helper_idivb_AL(target_ulong t0)
-{
-    int num, den, q, r;
-
-    num = (int16_t)EAX;
-    den = (int8_t)t0;
-    if (den == 0) {
-        raise_exception(EXCP00_DIVZ);
-    }
-    q = (num / den);
-    if (q != (int8_t)q)
-        raise_exception(EXCP00_DIVZ);
-    q &= 0xff;
-    r = (num % den) & 0xff;
-    EAX = (EAX & ~0xffff) | (r << 8) | q;
-}
-
-void helper_divw_AX(target_ulong t0)
-{
-    unsigned int num, den, q, r;
-
-    num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
-    den = (t0 & 0xffff);
-    if (den == 0) {
-        raise_exception(EXCP00_DIVZ);
-    }
-    q = (num / den);
-    if (q > 0xffff)
-        raise_exception(EXCP00_DIVZ);
-    q &= 0xffff;
-    r = (num % den) & 0xffff;
-    EAX = (EAX & ~0xffff) | q;
-    EDX = (EDX & ~0xffff) | r;
-}
-
-void helper_idivw_AX(target_ulong t0)
-{
-    int num, den, q, r;
-
-    num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
-    den = (int16_t)t0;
-    if (den == 0) {
-        raise_exception(EXCP00_DIVZ);
-    }
-    q = (num / den);
-    if (q != (int16_t)q)
-        raise_exception(EXCP00_DIVZ);
-    q &= 0xffff;
-    r = (num % den) & 0xffff;
-    EAX = (EAX & ~0xffff) | q;
-    EDX = (EDX & ~0xffff) | r;
-}
-
-void helper_divl_EAX(target_ulong t0)
-{
-    unsigned int den, r;
-    uint64_t num, q;
-
-    num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
-    den = t0;
-    if (den == 0) {
-        raise_exception(EXCP00_DIVZ);
-    }
-    q = (num / den);
-    r = (num % den);
-    if (q > 0xffffffff)
-        raise_exception(EXCP00_DIVZ);
-    EAX = (uint32_t)q;
-    EDX = (uint32_t)r;
-}
-
-void helper_idivl_EAX(target_ulong t0)
-{
-    int den, r;
-    int64_t num, q;
-
-    num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
-    den = t0;
-    if (den == 0) {
-        raise_exception(EXCP00_DIVZ);
-    }
-    q = (num / den);
-    r = (num % den);
-    if (q != (int32_t)q)
-        raise_exception(EXCP00_DIVZ);
-    EAX = (uint32_t)q;
-    EDX = (uint32_t)r;
-}
-
-/* bcd */
-
-/* XXX: exception */
-void helper_aam(int base)
-{
-    int al, ah;
-    al = EAX & 0xff;
-    ah = al / base;
-    al = al % base;
-    EAX = (EAX & ~0xffff) | al | (ah << 8);
-    CC_DST = al;
-}
-
-void helper_aad(int base)
-{
-    int al, ah;
-    al = EAX & 0xff;
-    ah = (EAX >> 8) & 0xff;
-    al = ((ah * base) + al) & 0xff;
-    EAX = (EAX & ~0xffff) | al;
-    CC_DST = al;
-}
-
-void helper_aaa(void)
-{
-    int icarry;
-    int al, ah, af;
-    int eflags;
-
-    eflags = helper_cc_compute_all(CC_OP);
-    af = eflags & CC_A;
-    al = EAX & 0xff;
-    ah = (EAX >> 8) & 0xff;
-
-    icarry = (al > 0xf9);
-    if (((al & 0x0f) > 9 ) || af) {
-        al = (al + 6) & 0x0f;
-        ah = (ah + 1 + icarry) & 0xff;
-        eflags |= CC_C | CC_A;
-    } else {
-        eflags &= ~(CC_C | CC_A);
-        al &= 0x0f;
-    }
-    EAX = (EAX & ~0xffff) | al | (ah << 8);
-    CC_SRC = eflags;
-}
-
-void helper_aas(void)
-{
-    int icarry;
-    int al, ah, af;
-    int eflags;
-
-    eflags = helper_cc_compute_all(CC_OP);
-    af = eflags & CC_A;
-    al = EAX & 0xff;
-    ah = (EAX >> 8) & 0xff;
-
-    icarry = (al < 6);
-    if (((al & 0x0f) > 9 ) || af) {
-        al = (al - 6) & 0x0f;
-        ah = (ah - 1 - icarry) & 0xff;
-        eflags |= CC_C | CC_A;
-    } else {
-        eflags &= ~(CC_C | CC_A);
-        al &= 0x0f;
-    }
-    EAX = (EAX & ~0xffff) | al | (ah << 8);
-    CC_SRC = eflags;
-}
-
-void helper_daa(void)
-{
-    int al, af, cf;
-    int eflags;
-
-    eflags = helper_cc_compute_all(CC_OP);
-    cf = eflags & CC_C;
-    af = eflags & CC_A;
-    al = EAX & 0xff;
-
-    eflags = 0;
-    if (((al & 0x0f) > 9 ) || af) {
-        al = (al + 6) & 0xff;
-        eflags |= CC_A;
-    }
-    if ((al > 0x9f) || cf) {
-        al = (al + 0x60) & 0xff;
-        eflags |= CC_C;
-    }
-    EAX = (EAX & ~0xff) | al;
-    /* well, speed is not an issue here, so we compute the flags by hand */
-    eflags |= (al == 0) << 6; /* zf */
-    eflags |= parity_table[al]; /* pf */
-    eflags |= (al & 0x80); /* sf */
-    CC_SRC = eflags;
-}
-
-void helper_das(void)
-{
-    int al, al1, af, cf;
-    int eflags;
-
-    eflags = helper_cc_compute_all(CC_OP);
-    cf = eflags & CC_C;
-    af = eflags & CC_A;
-    al = EAX & 0xff;
-
-    eflags = 0;
-    al1 = al;
-    if (((al & 0x0f) > 9 ) || af) {
-        eflags |= CC_A;
-        if (al < 6 || cf)
-            eflags |= CC_C;
-        al = (al - 6) & 0xff;
-    }
-    if ((al1 > 0x99) || cf) {
-        al = (al - 0x60) & 0xff;
-        eflags |= CC_C;
-    }
-    EAX = (EAX & ~0xff) | al;
-    /* well, speed is not an issue here, so we compute the flags by hand */
-    eflags |= (al == 0) << 6; /* zf */
-    eflags |= parity_table[al]; /* pf */
-    eflags |= (al & 0x80); /* sf */
-    CC_SRC = eflags;
-}
-
-void helper_into(int next_eip_addend)
-{
-    int eflags;
-    eflags = helper_cc_compute_all(CC_OP);
-    if (eflags & CC_O) {
-        raise_interrupt(EXCP04_INTO, 1, 0, next_eip_addend);
-    }
-}
-
-void helper_cmpxchg8b(target_ulong a0)
-{
-    uint64_t d;
-    int eflags;
-
-    eflags = helper_cc_compute_all(CC_OP);
-    d = ldq(a0);
-    if (d == (((uint64_t)EDX << 32) | (uint32_t)EAX)) {
-        stq(a0, ((uint64_t)ECX << 32) | (uint32_t)EBX);
-        eflags |= CC_Z;
-    } else {
-        /* always do the store */
-        stq(a0, d);
-        EDX = (uint32_t)(d >> 32);
-        EAX = (uint32_t)d;
-        eflags &= ~CC_Z;
-    }
-    CC_SRC = eflags;
-}
-
-#ifdef TARGET_X86_64
-void helper_cmpxchg16b(target_ulong a0)
-{
-    uint64_t d0, d1;
-    int eflags;
-
-    if ((a0 & 0xf) != 0)
-        raise_exception(EXCP0D_GPF);
-    eflags = helper_cc_compute_all(CC_OP);
-    d0 = ldq(a0);
-    d1 = ldq(a0 + 8);
-    if (d0 == EAX && d1 == EDX) {
-        stq(a0, EBX);
-        stq(a0 + 8, ECX);
-        eflags |= CC_Z;
-    } else {
-        /* always do the store */
-        stq(a0, d0);
-        stq(a0 + 8, d1);
-        EDX = d1;
-        EAX = d0;
-        eflags &= ~CC_Z;
-    }
-    CC_SRC = eflags;
-}
-#endif
-
-void helper_single_step(void)
-{
-#ifndef CONFIG_USER_ONLY
-    check_hw_breakpoints(env, 1);
-    env->dr[6] |= DR6_BS;
-#endif
-    raise_exception(EXCP01_DB);
-}
-
-void helper_cpuid(void)
-{
-    uint32_t eax, ebx, ecx, edx;
-
-    helper_svm_check_intercept_param(SVM_EXIT_CPUID, 0);
-
-    cpu_x86_cpuid(env, (uint32_t)EAX, (uint32_t)ECX, &eax, &ebx, &ecx, &edx);
-    EAX = eax;
-    EBX = ebx;
-    ECX = ecx;
-    EDX = edx;
-}
-
-void helper_enter_level(int level, int data32, target_ulong t1)
-{
-    target_ulong ssp;
-    uint32_t esp_mask, esp, ebp;
-
-    esp_mask = get_sp_mask(env->segs[R_SS].flags);
-    ssp = env->segs[R_SS].base;
-    ebp = EBP;
-    esp = ESP;
-    if (data32) {
-        /* 32 bit */
-        esp -= 4;
-        while (--level) {
-            esp -= 4;
-            ebp -= 4;
-            stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask)));
-        }
-        esp -= 4;
-        stl(ssp + (esp & esp_mask), t1);
-    } else {
-        /* 16 bit */
-        esp -= 2;
-        while (--level) {
-            esp -= 2;
-            ebp -= 2;
-            stw(ssp + (esp & esp_mask), lduw(ssp + (ebp & esp_mask)));
-        }
-        esp -= 2;
-        stw(ssp + (esp & esp_mask), t1);
-    }
-}
-
-#ifdef TARGET_X86_64
-void helper_enter64_level(int level, int data64, target_ulong t1)
-{
-    target_ulong esp, ebp;
-    ebp = EBP;
-    esp = ESP;
-
-    if (data64) {
-        /* 64 bit */
-        esp -= 8;
-        while (--level) {
-            esp -= 8;
-            ebp -= 8;
-            stq(esp, ldq(ebp));
-        }
-        esp -= 8;
-        stq(esp, t1);
-    } else {
-        /* 16 bit */
-        esp -= 2;
-        while (--level) {
-            esp -= 2;
-            ebp -= 2;
-            stw(esp, lduw(ebp));
-        }
-        esp -= 2;
-        stw(esp, t1);
-    }
-}
-#endif
-
-void helper_lldt(int selector)
-{
-    SegmentCache *dt;
-    uint32_t e1, e2;
-    int index, entry_limit;
-    target_ulong ptr;
-
-    selector &= 0xffff;
-    if ((selector & 0xfffc) == 0) {
-        /* XXX: NULL selector case: invalid LDT */
-        env->ldt.base = 0;
-        env->ldt.limit = 0;
-    } else {
-        if (selector & 0x4)
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        dt = &env->gdt;
-        index = selector & ~7;
-#ifdef TARGET_X86_64
-        if (env->hflags & HF_LMA_MASK)
-            entry_limit = 15;
-        else
-#endif
-            entry_limit = 7;
-        if ((index + entry_limit) > dt->limit)
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
-        if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        if (!(e2 & DESC_P_MASK))
-            raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
-#ifdef TARGET_X86_64
-        if (env->hflags & HF_LMA_MASK) {
-            uint32_t e3;
-            e3 = ldl_kernel(ptr + 8);
-            load_seg_cache_raw_dt(&env->ldt, e1, e2);
-            env->ldt.base |= (target_ulong)e3 << 32;
-        } else
-#endif
-        {
-            load_seg_cache_raw_dt(&env->ldt, e1, e2);
-        }
-    }
-    env->ldt.selector = selector;
-}
-
-void helper_ltr(int selector)
-{
-    SegmentCache *dt;
-    uint32_t e1, e2;
-    int index, type, entry_limit;
-    target_ulong ptr;
-
-    selector &= 0xffff;
-    if ((selector & 0xfffc) == 0) {
-        /* NULL selector case: invalid TR */
-        env->tr.base = 0;
-        env->tr.limit = 0;
-        env->tr.flags = 0;
-    } else {
-        if (selector & 0x4)
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        dt = &env->gdt;
-        index = selector & ~7;
-#ifdef TARGET_X86_64
-        if (env->hflags & HF_LMA_MASK)
-            entry_limit = 15;
-        else
-#endif
-            entry_limit = 7;
-        if ((index + entry_limit) > dt->limit)
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
-        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
-        if ((e2 & DESC_S_MASK) ||
-            (type != 1 && type != 9))
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        if (!(e2 & DESC_P_MASK))
-            raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
-#ifdef TARGET_X86_64
-        if (env->hflags & HF_LMA_MASK) {
-            uint32_t e3, e4;
-            e3 = ldl_kernel(ptr + 8);
-            e4 = ldl_kernel(ptr + 12);
-            if ((e4 >> DESC_TYPE_SHIFT) & 0xf)
-                raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-            load_seg_cache_raw_dt(&env->tr, e1, e2);
-            env->tr.base |= (target_ulong)e3 << 32;
-        } else
-#endif
-        {
-            load_seg_cache_raw_dt(&env->tr, e1, e2);
-        }
-        e2 |= DESC_TSS_BUSY_MASK;
-        stl_kernel(ptr + 4, e2);
-    }
-    env->tr.selector = selector;
-}
-
-/* only works if protected mode and not VM86. seg_reg must be != R_CS */
-void helper_load_seg(int seg_reg, int selector)
-{
-    uint32_t e1, e2;
-    int cpl, dpl, rpl;
-    SegmentCache *dt;
-    int index;
-    target_ulong ptr;
-
-    selector &= 0xffff;
-    cpl = env->hflags & HF_CPL_MASK;
-    if ((selector & 0xfffc) == 0) {
-        /* null selector case */
-        if (seg_reg == R_SS
-#ifdef TARGET_X86_64
-            && (!(env->hflags & HF_CS64_MASK) || cpl == 3)
-#endif
-            )
-            raise_exception_err(EXCP0D_GPF, 0);
-        cpu_x86_load_seg_cache(env, seg_reg, selector, 0, 0, 0);
-    } else {
-
-        if (selector & 0x4)
-            dt = &env->ldt;
-        else
-            dt = &env->gdt;
-        index = selector & ~7;
-        if ((index + 7) > dt->limit)
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
-
-        if (!(e2 & DESC_S_MASK))
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        rpl = selector & 3;
-        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-        if (seg_reg == R_SS) {
-            /* must be writable segment */
-            if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK))
-                raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-            if (rpl != cpl || dpl != cpl)
-                raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        } else {
-            /* must be readable segment */
-            if ((e2 & (DESC_CS_MASK | DESC_R_MASK)) == DESC_CS_MASK)
-                raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-
-            if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
-                /* if not conforming code, test rights */
-                if (dpl < cpl || dpl < rpl)
-                    raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-            }
-        }
-
-        if (!(e2 & DESC_P_MASK)) {
-            if (seg_reg == R_SS)
-                raise_exception_err(EXCP0C_STACK, selector & 0xfffc);
-            else
-                raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
-        }
-
-        /* set the access bit if not already set */
-        if (!(e2 & DESC_A_MASK)) {
-            e2 |= DESC_A_MASK;
-            stl_kernel(ptr + 4, e2);
-        }
-
-        cpu_x86_load_seg_cache(env, seg_reg, selector,
-                       get_seg_base(e1, e2),
-                       get_seg_limit(e1, e2),
-                       e2);
-#if 0
-        qemu_log("load_seg: sel=0x%04x base=0x%08lx limit=0x%08lx flags=%08x\n",
-                selector, (unsigned long)sc->base, sc->limit, sc->flags);
-#endif
-    }
-}
-
-/* protected mode jump */
-void helper_ljmp_protected(int new_cs, target_ulong new_eip,
-                           int next_eip_addend)
-{
-    int gate_cs, type;
-    uint32_t e1, e2, cpl, dpl, rpl, limit;
-    target_ulong next_eip;
-
-    if ((new_cs & 0xfffc) == 0)
-        raise_exception_err(EXCP0D_GPF, 0);
-    if (load_segment(&e1, &e2, new_cs) != 0)
-        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-    cpl = env->hflags & HF_CPL_MASK;
-    if (e2 & DESC_S_MASK) {
-        if (!(e2 & DESC_CS_MASK))
-            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-        if (e2 & DESC_C_MASK) {
-            /* conforming code segment */
-            if (dpl > cpl)
-                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-        } else {
-            /* non conforming code segment */
-            rpl = new_cs & 3;
-            if (rpl > cpl)
-                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-            if (dpl != cpl)
-                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-        }
-        if (!(e2 & DESC_P_MASK))
-            raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
-        limit = get_seg_limit(e1, e2);
-        if (new_eip > limit &&
-            !(env->hflags & HF_LMA_MASK) && !(e2 & DESC_L_MASK))
-            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-        cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
-                       get_seg_base(e1, e2), limit, e2);
-        EIP = new_eip;
-    } else {
-        /* jump to call or task gate */
-        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-        rpl = new_cs & 3;
-        cpl = env->hflags & HF_CPL_MASK;
-        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
-        switch(type) {
-        case 1: /* 286 TSS */
-        case 9: /* 386 TSS */
-        case 5: /* task gate */
-            if (dpl < cpl || dpl < rpl)
-                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-            next_eip = env->eip + next_eip_addend;
-            switch_tss(new_cs, e1, e2, SWITCH_TSS_JMP, next_eip);
-            CC_OP = CC_OP_EFLAGS;
-            break;
-        case 4: /* 286 call gate */
-        case 12: /* 386 call gate */
-            if ((dpl < cpl) || (dpl < rpl))
-                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-            if (!(e2 & DESC_P_MASK))
-                raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
-            gate_cs = e1 >> 16;
-            new_eip = (e1 & 0xffff);
-            if (type == 12)
-                new_eip |= (e2 & 0xffff0000);
-            if (load_segment(&e1, &e2, gate_cs) != 0)
-                raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
-            dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-            /* must be code segment */
-            if (((e2 & (DESC_S_MASK | DESC_CS_MASK)) !=
-                 (DESC_S_MASK | DESC_CS_MASK)))
-                raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
-            if (((e2 & DESC_C_MASK) && (dpl > cpl)) ||
-                (!(e2 & DESC_C_MASK) && (dpl != cpl)))
-                raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
-            if (!(e2 & DESC_P_MASK))
-                raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
-            limit = get_seg_limit(e1, e2);
-            if (new_eip > limit)
-                raise_exception_err(EXCP0D_GPF, 0);
-            cpu_x86_load_seg_cache(env, R_CS, (gate_cs & 0xfffc) | cpl,
-                                   get_seg_base(e1, e2), limit, e2);
-            EIP = new_eip;
-            break;
-        default:
-            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-            break;
-        }
-    }
-}
-
-/* real mode call */
-void helper_lcall_real(int new_cs, target_ulong new_eip1,
-                       int shift, int next_eip)
-{
-    int new_eip;
-    uint32_t esp, esp_mask;
-    target_ulong ssp;
-
-    new_eip = new_eip1;
-    esp = ESP;
-    esp_mask = get_sp_mask(env->segs[R_SS].flags);
-    ssp = env->segs[R_SS].base;
-    if (shift) {
-        PUSHL(ssp, esp, esp_mask, env->segs[R_CS].selector);
-        PUSHL(ssp, esp, esp_mask, next_eip);
-    } else {
-        PUSHW(ssp, esp, esp_mask, env->segs[R_CS].selector);
-        PUSHW(ssp, esp, esp_mask, next_eip);
-    }
-
-    SET_ESP(esp, esp_mask);
-    env->eip = new_eip;
-    env->segs[R_CS].selector = new_cs;
-    env->segs[R_CS].base = (new_cs << 4);
-}
-
-/* protected mode call */
-void helper_lcall_protected(int new_cs, target_ulong new_eip,
-                            int shift, int next_eip_addend)
-{
-    int new_stack, i;
-    uint32_t e1, e2, cpl, dpl, rpl, selector, offset, param_count;
-    uint32_t ss = 0, ss_e1 = 0, ss_e2 = 0, sp, type, ss_dpl, sp_mask;
-    uint32_t val, limit, old_sp_mask;
-    target_ulong ssp, old_ssp, next_eip;
-
-    next_eip = env->eip + next_eip_addend;
-    LOG_PCALL("lcall %04x:%08x s=%d\n", new_cs, (uint32_t)new_eip, shift);
-    LOG_PCALL_STATE(env);
-    if ((new_cs & 0xfffc) == 0)
-        raise_exception_err(EXCP0D_GPF, 0);
-    if (load_segment(&e1, &e2, new_cs) != 0)
-        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-    cpl = env->hflags & HF_CPL_MASK;
-    LOG_PCALL("desc=%08x:%08x\n", e1, e2);
-    if (e2 & DESC_S_MASK) {
-        if (!(e2 & DESC_CS_MASK))
-            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-        if (e2 & DESC_C_MASK) {
-            /* conforming code segment */
-            if (dpl > cpl)
-                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-        } else {
-            /* non conforming code segment */
-            rpl = new_cs & 3;
-            if (rpl > cpl)
-                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-            if (dpl != cpl)
-                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-        }
-        if (!(e2 & DESC_P_MASK))
-            raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
-
-#ifdef TARGET_X86_64
-        /* XXX: check 16/32 bit cases in long mode */
-        if (shift == 2) {
-            target_ulong rsp;
-            /* 64 bit case */
-            rsp = ESP;
-            PUSHQ(rsp, env->segs[R_CS].selector);
-            PUSHQ(rsp, next_eip);
-            /* from this point, not restartable */
-            ESP = rsp;
-            cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
-                                   get_seg_base(e1, e2),
-                                   get_seg_limit(e1, e2), e2);
-            EIP = new_eip;
-        } else
-#endif
-        {
-            sp = ESP;
-            sp_mask = get_sp_mask(env->segs[R_SS].flags);
-            ssp = env->segs[R_SS].base;
-            if (shift) {
-                PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector);
-                PUSHL(ssp, sp, sp_mask, next_eip);
-            } else {
-                PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector);
-                PUSHW(ssp, sp, sp_mask, next_eip);
-            }
-
-            limit = get_seg_limit(e1, e2);
-            if (new_eip > limit)
-                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-            /* from this point, not restartable */
-            SET_ESP(sp, sp_mask);
-            cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
-                                   get_seg_base(e1, e2), limit, e2);
-            EIP = new_eip;
-        }
-    } else {
-        /* check gate type */
-        type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
-        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-        rpl = new_cs & 3;
-        switch(type) {
-        case 1: /* available 286 TSS */
-        case 9: /* available 386 TSS */
-        case 5: /* task gate */
-            if (dpl < cpl || dpl < rpl)
-                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-            switch_tss(new_cs, e1, e2, SWITCH_TSS_CALL, next_eip);
-            CC_OP = CC_OP_EFLAGS;
-            return;
-        case 4: /* 286 call gate */
-        case 12: /* 386 call gate */
-            break;
-        default:
-            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-            break;
-        }
-        shift = type >> 3;
-
-        if (dpl < cpl || dpl < rpl)
-            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-        /* check valid bit */
-        if (!(e2 & DESC_P_MASK))
-            raise_exception_err(EXCP0B_NOSEG,  new_cs & 0xfffc);
-        selector = e1 >> 16;
-        offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
-        param_count = e2 & 0x1f;
-        if ((selector & 0xfffc) == 0)
-            raise_exception_err(EXCP0D_GPF, 0);
-
-        if (load_segment(&e1, &e2, selector) != 0)
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-        if (dpl > cpl)
-            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
-        if (!(e2 & DESC_P_MASK))
-            raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
-
-        if (!(e2 & DESC_C_MASK) && dpl < cpl) {
-            /* to inner privilege */
-            get_ss_esp_from_tss(&ss, &sp, dpl);
-            LOG_PCALL("new ss:esp=%04x:%08x param_count=%d ESP=" TARGET_FMT_lx "\n",
-                        ss, sp, param_count, ESP);
-            if ((ss & 0xfffc) == 0)
-                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
-            if ((ss & 3) != dpl)
-                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
-            if (load_segment(&ss_e1, &ss_e2, ss) != 0)
-                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
-            ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
-            if (ss_dpl != dpl)
-                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
-            if (!(ss_e2 & DESC_S_MASK) ||
-                (ss_e2 & DESC_CS_MASK) ||
-                !(ss_e2 & DESC_W_MASK))
-                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
-            if (!(ss_e2 & DESC_P_MASK))
-                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
-
-            //            push_size = ((param_count * 2) + 8) << shift;
-
-            old_sp_mask = get_sp_mask(env->segs[R_SS].flags);
-            old_ssp = env->segs[R_SS].base;
-
-            sp_mask = get_sp_mask(ss_e2);
-            ssp = get_seg_base(ss_e1, ss_e2);
-            if (shift) {
-                PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
-                PUSHL(ssp, sp, sp_mask, ESP);
-                for(i = param_count - 1; i >= 0; i--) {
-                    val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
-                    PUSHL(ssp, sp, sp_mask, val);
-                }
-            } else {
-                PUSHW(ssp, sp, sp_mask, env->segs[R_SS].selector);
-                PUSHW(ssp, sp, sp_mask, ESP);
-                for(i = param_count - 1; i >= 0; i--) {
-                    val = lduw_kernel(old_ssp + ((ESP + i * 2) & old_sp_mask));
-                    PUSHW(ssp, sp, sp_mask, val);
-                }
-            }
-            new_stack = 1;
-        } else {
-            /* to same privilege */
-            sp = ESP;
-            sp_mask = get_sp_mask(env->segs[R_SS].flags);
-            ssp = env->segs[R_SS].base;
-            //            push_size = (4 << shift);
-            new_stack = 0;
-        }
-
-        if (shift) {
-            PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector);
-            PUSHL(ssp, sp, sp_mask, next_eip);
-        } else {
-            PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector);
-            PUSHW(ssp, sp, sp_mask, next_eip);
-        }
-
-        /* from this point, not restartable */
-
-        if (new_stack) {
-            ss = (ss & ~3) | dpl;
-            cpu_x86_load_seg_cache(env, R_SS, ss,
-                                   ssp,
-                                   get_seg_limit(ss_e1, ss_e2),
-                                   ss_e2);
-        }
-
-        selector = (selector & ~3) | dpl;
-        cpu_x86_load_seg_cache(env, R_CS, selector,
-                       get_seg_base(e1, e2),
-                       get_seg_limit(e1, e2),
-                       e2);
-        cpu_x86_set_cpl(env, dpl);
-        SET_ESP(sp, sp_mask);
-        EIP = offset;
-    }
-}
-
-/* real and vm86 mode iret */
-void helper_iret_real(int shift)
-{
-    uint32_t sp, new_cs, new_eip, new_eflags, sp_mask;
-    target_ulong ssp;
-    int eflags_mask;
-
-    sp_mask = 0xffff; /* XXXX: use SS segment size ? */
-    sp = ESP;
-    ssp = env->segs[R_SS].base;
-    if (shift == 1) {
-        /* 32 bits */
-        POPL(ssp, sp, sp_mask, new_eip);
-        POPL(ssp, sp, sp_mask, new_cs);
-        new_cs &= 0xffff;
-        POPL(ssp, sp, sp_mask, new_eflags);
-    } else {
-        /* 16 bits */
-        POPW(ssp, sp, sp_mask, new_eip);
-        POPW(ssp, sp, sp_mask, new_cs);
-        POPW(ssp, sp, sp_mask, new_eflags);
-    }
-    ESP = (ESP & ~sp_mask) | (sp & sp_mask);
-    env->segs[R_CS].selector = new_cs;
-    env->segs[R_CS].base = (new_cs << 4);
-    env->eip = new_eip;
-    if (env->eflags & VM_MASK)
-        eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | RF_MASK | NT_MASK;
-    else
-        eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | IOPL_MASK | RF_MASK | NT_MASK;
-    if (shift == 0)
-        eflags_mask &= 0xffff;
-    load_eflags(new_eflags, eflags_mask);
-    env->hflags2 &= ~HF2_NMI_MASK;
-}
-
-static inline void validate_seg(int seg_reg, int cpl)
-{
-    int dpl;
-    uint32_t e2;
-
-    /* XXX: on x86_64, we do not want to nullify FS and GS because
-       they may still contain a valid base. I would be interested to
-       know how a real x86_64 CPU behaves */
-    if ((seg_reg == R_FS || seg_reg == R_GS) &&
-        (env->segs[seg_reg].selector & 0xfffc) == 0)
-        return;
-
-    e2 = env->segs[seg_reg].flags;
-    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-    if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
-        /* data or non conforming code segment */
-        if (dpl < cpl) {
-            cpu_x86_load_seg_cache(env, seg_reg, 0, 0, 0, 0);
-        }
-    }
-}
-
-/* protected mode iret */
-static inline void helper_ret_protected(int shift, int is_iret, int addend)
-{
-    uint32_t new_cs, new_eflags, new_ss;
-    uint32_t new_es, new_ds, new_fs, new_gs;
-    uint32_t e1, e2, ss_e1, ss_e2;
-    int cpl, dpl, rpl, eflags_mask, iopl;
-    target_ulong ssp, sp, new_eip, new_esp, sp_mask;
-
-#ifdef TARGET_X86_64
-    if (shift == 2)
-        sp_mask = -1;
-    else
-#endif
-        sp_mask = get_sp_mask(env->segs[R_SS].flags);
-    sp = ESP;
-    ssp = env->segs[R_SS].base;
-    new_eflags = 0; /* avoid warning */
-#ifdef TARGET_X86_64
-    if (shift == 2) {
-        POPQ(sp, new_eip);
-        POPQ(sp, new_cs);
-        new_cs &= 0xffff;
-        if (is_iret) {
-            POPQ(sp, new_eflags);
-        }
-    } else
-#endif
-    if (shift == 1) {
-        /* 32 bits */
-        POPL(ssp, sp, sp_mask, new_eip);
-        POPL(ssp, sp, sp_mask, new_cs);
-        new_cs &= 0xffff;
-        if (is_iret) {
-            POPL(ssp, sp, sp_mask, new_eflags);
-            if (new_eflags & VM_MASK)
-                goto return_to_vm86;
-        }
-    } else {
-        /* 16 bits */
-        POPW(ssp, sp, sp_mask, new_eip);
-        POPW(ssp, sp, sp_mask, new_cs);
-        if (is_iret)
-            POPW(ssp, sp, sp_mask, new_eflags);
-    }
-    LOG_PCALL("lret new %04x:" TARGET_FMT_lx " s=%d addend=0x%x\n",
-              new_cs, new_eip, shift, addend);
-    LOG_PCALL_STATE(env);
-    if ((new_cs & 0xfffc) == 0)
-        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-    if (load_segment(&e1, &e2, new_cs) != 0)
-        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-    if (!(e2 & DESC_S_MASK) ||
-        !(e2 & DESC_CS_MASK))
-        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-    cpl = env->hflags & HF_CPL_MASK;
-    rpl = new_cs & 3;
-    if (rpl < cpl)
-        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-    if (e2 & DESC_C_MASK) {
-        if (dpl > rpl)
-            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-    } else {
-        if (dpl != rpl)
-            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
-    }
-    if (!(e2 & DESC_P_MASK))
-        raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
-
-    sp += addend;
-    if (rpl == cpl && (!(env->hflags & HF_CS64_MASK) ||
-                       ((env->hflags & HF_CS64_MASK) && !is_iret))) {
-        /* return to same privilege level */
-        cpu_x86_load_seg_cache(env, R_CS, new_cs,
-                       get_seg_base(e1, e2),
-                       get_seg_limit(e1, e2),
-                       e2);
-    } else {
-        /* return to different privilege level */
-#ifdef TARGET_X86_64
-        if (shift == 2) {
-            POPQ(sp, new_esp);
-            POPQ(sp, new_ss);
-            new_ss &= 0xffff;
-        } else
-#endif
-        if (shift == 1) {
-            /* 32 bits */
-            POPL(ssp, sp, sp_mask, new_esp);
-            POPL(ssp, sp, sp_mask, new_ss);
-            new_ss &= 0xffff;
-        } else {
-            /* 16 bits */
-            POPW(ssp, sp, sp_mask, new_esp);
-            POPW(ssp, sp, sp_mask, new_ss);
-        }
-        LOG_PCALL("new ss:esp=%04x:" TARGET_FMT_lx "\n",
-                    new_ss, new_esp);
-        if ((new_ss & 0xfffc) == 0) {
-#ifdef TARGET_X86_64
-            /* NULL ss is allowed in long mode if cpl != 3*/
-            /* XXX: test CS64 ? */
-            if ((env->hflags & HF_LMA_MASK) && rpl != 3) {
-                cpu_x86_load_seg_cache(env, R_SS, new_ss,
-                                       0, 0xffffffff,
-                                       DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                                       DESC_S_MASK | (rpl << DESC_DPL_SHIFT) |
-                                       DESC_W_MASK | DESC_A_MASK);
-                ss_e2 = DESC_B_MASK; /* XXX: should not be needed ? */
-            } else
-#endif
-            {
-                raise_exception_err(EXCP0D_GPF, 0);
-            }
-        } else {
-            if ((new_ss & 3) != rpl)
-                raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
-            if (load_segment(&ss_e1, &ss_e2, new_ss) != 0)
-                raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
-            if (!(ss_e2 & DESC_S_MASK) ||
-                (ss_e2 & DESC_CS_MASK) ||
-                !(ss_e2 & DESC_W_MASK))
-                raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
-            dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
-            if (dpl != rpl)
-                raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
-            if (!(ss_e2 & DESC_P_MASK))
-                raise_exception_err(EXCP0B_NOSEG, new_ss & 0xfffc);
-            cpu_x86_load_seg_cache(env, R_SS, new_ss,
-                                   get_seg_base(ss_e1, ss_e2),
-                                   get_seg_limit(ss_e1, ss_e2),
-                                   ss_e2);
-        }
-
-        cpu_x86_load_seg_cache(env, R_CS, new_cs,
-                       get_seg_base(e1, e2),
-                       get_seg_limit(e1, e2),
-                       e2);
-        cpu_x86_set_cpl(env, rpl);
-        sp = new_esp;
-#ifdef TARGET_X86_64
-        if (env->hflags & HF_CS64_MASK)
-            sp_mask = -1;
-        else
-#endif
-            sp_mask = get_sp_mask(ss_e2);
-
-        /* validate data segments */
-        validate_seg(R_ES, rpl);
-        validate_seg(R_DS, rpl);
-        validate_seg(R_FS, rpl);
-        validate_seg(R_GS, rpl);
-
-        sp += addend;
-    }
-    SET_ESP(sp, sp_mask);
-    env->eip = new_eip;
-    if (is_iret) {
-        /* NOTE: 'cpl' is the _old_ CPL */
-        eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK;
-        if (cpl == 0)
-            eflags_mask |= IOPL_MASK;
-        iopl = (env->eflags >> IOPL_SHIFT) & 3;
-        if (cpl <= iopl)
-            eflags_mask |= IF_MASK;
-        if (shift == 0)
-            eflags_mask &= 0xffff;
-        load_eflags(new_eflags, eflags_mask);
-    }
-    return;
-
- return_to_vm86:
-    POPL(ssp, sp, sp_mask, new_esp);
-    POPL(ssp, sp, sp_mask, new_ss);
-    POPL(ssp, sp, sp_mask, new_es);
-    POPL(ssp, sp, sp_mask, new_ds);
-    POPL(ssp, sp, sp_mask, new_fs);
-    POPL(ssp, sp, sp_mask, new_gs);
-
-    /* modify processor state */
-    load_eflags(new_eflags, TF_MASK | AC_MASK | ID_MASK |
-                IF_MASK | IOPL_MASK | VM_MASK | NT_MASK | VIF_MASK | VIP_MASK);
-    load_seg_vm(R_CS, new_cs & 0xffff);
-    cpu_x86_set_cpl(env, 3);
-    load_seg_vm(R_SS, new_ss & 0xffff);
-    load_seg_vm(R_ES, new_es & 0xffff);
-    load_seg_vm(R_DS, new_ds & 0xffff);
-    load_seg_vm(R_FS, new_fs & 0xffff);
-    load_seg_vm(R_GS, new_gs & 0xffff);
-
-    env->eip = new_eip & 0xffff;
-    ESP = new_esp;
-}
-
-void helper_iret_protected(int shift, int next_eip)
-{
-    int tss_selector, type;
-    uint32_t e1, e2;
-
-    /* specific case for TSS */
-    if (env->eflags & NT_MASK) {
-#ifdef TARGET_X86_64
-        if (env->hflags & HF_LMA_MASK)
-            raise_exception_err(EXCP0D_GPF, 0);
-#endif
-        tss_selector = lduw_kernel(env->tr.base + 0);
-        if (tss_selector & 4)
-            raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
-        if (load_segment(&e1, &e2, tss_selector) != 0)
-            raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
-        type = (e2 >> DESC_TYPE_SHIFT) & 0x17;
-        /* NOTE: we check both segment and busy TSS */
-        if (type != 3)
-            raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
-        switch_tss(tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip);
-    } else {
-        helper_ret_protected(shift, 1, 0);
-    }
-    env->hflags2 &= ~HF2_NMI_MASK;
-}
-
-void helper_lret_protected(int shift, int addend)
-{
-    helper_ret_protected(shift, 0, addend);
-}
-
-void helper_sysenter(void)
-{
-    if (env->sysenter_cs == 0) {
-        raise_exception_err(EXCP0D_GPF, 0);
-    }
-    env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
-    cpu_x86_set_cpl(env, 0);
-
-#ifdef TARGET_X86_64
-    if (env->hflags & HF_LMA_MASK) {
-        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
-    } else
-#endif
-    {
-        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
-    }
-    cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc,
-                           0, 0xffffffff,
-                           DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                           DESC_S_MASK |
-                           DESC_W_MASK | DESC_A_MASK);
-    ESP = env->sysenter_esp;
-    EIP = env->sysenter_eip;
-}
-
-void helper_sysexit(int dflag)
-{
-    int cpl;
-
-    cpl = env->hflags & HF_CPL_MASK;
-    if (env->sysenter_cs == 0 || cpl != 0) {
-        raise_exception_err(EXCP0D_GPF, 0);
-    }
-    cpu_x86_set_cpl(env, 3);
-#ifdef TARGET_X86_64
-    if (dflag == 2) {
-        cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 32) & 0xfffc) | 3,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
-        cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 40) & 0xfffc) | 3,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
-                               DESC_W_MASK | DESC_A_MASK);
-    } else
-#endif
-    {
-        cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 16) & 0xfffc) | 3,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
-        cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 24) & 0xfffc) | 3,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
-                               DESC_W_MASK | DESC_A_MASK);
-    }
-    ESP = ECX;
-    EIP = EDX;
-}
-
-#if defined(CONFIG_USER_ONLY)
-target_ulong helper_read_crN(int reg)
-{
-    return 0;
-}
-
-void helper_write_crN(int reg, target_ulong t0)
-{
-}
-
-void helper_movl_drN_T0(int reg, target_ulong t0)
-{
-}
-#else
-target_ulong helper_read_crN(int reg)
-{
-    target_ulong val;
-
-    helper_svm_check_intercept_param(SVM_EXIT_READ_CR0 + reg, 0);
-    switch(reg) {
-    default:
-        val = env->cr[reg];
-        break;
-    case 8:
-        if (!(env->hflags2 & HF2_VINTR_MASK)) {
-            val = cpu_get_apic_tpr(env);
-        } else {
-            val = env->v_tpr;
-        }
-        break;
-    }
-    return val;
-}
-
-void helper_write_crN(int reg, target_ulong t0)
-{
-    helper_svm_check_intercept_param(SVM_EXIT_WRITE_CR0 + reg, 0);
-    switch(reg) {
-    case 0:
-        cpu_x86_update_cr0(env, t0);
-        break;
-    case 3:
-        cpu_x86_update_cr3(env, t0);
-        break;
-    case 4:
-        cpu_x86_update_cr4(env, t0);
-        break;
-    case 8:
-        if (!(env->hflags2 & HF2_VINTR_MASK)) {
-            cpu_set_apic_tpr(env, t0);
-        }
-        env->v_tpr = t0 & 0x0f;
-        break;
-    default:
-        env->cr[reg] = t0;
-        break;
-    }
-}
-
-void helper_movl_drN_T0(int reg, target_ulong t0)
-{
-    int i;
-
-    if (reg < 4) {
-        hw_breakpoint_remove(env, reg);
-        env->dr[reg] = t0;
-        hw_breakpoint_insert(env, reg);
-    } else if (reg == 7) {
-        for (i = 0; i < 4; i++)
-            hw_breakpoint_remove(env, i);
-        env->dr[7] = t0;
-        for (i = 0; i < 4; i++)
-            hw_breakpoint_insert(env, i);
-    } else
-        env->dr[reg] = t0;
-}
-#endif
-
-void helper_lmsw(target_ulong t0)
-{
-    /* only 4 lower bits of CR0 are modified. PE cannot be set to zero
-       if already set to one. */
-    t0 = (env->cr[0] & ~0xe) | (t0 & 0xf);
-    helper_write_crN(0, t0);
-}
-
-void helper_clts(void)
-{
-    env->cr[0] &= ~CR0_TS_MASK;
-    env->hflags &= ~HF_TS_MASK;
-}
-
-void helper_invlpg(target_ulong addr)
-{
-    helper_svm_check_intercept_param(SVM_EXIT_INVLPG, 0);
-    tlb_flush_page(env, addr);
-}
-
-void helper_rdtsc(void)
-{
-    uint64_t val;
-
-    if ((env->cr[4] & CR4_TSD_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
-        raise_exception(EXCP0D_GPF);
-    }
-    helper_svm_check_intercept_param(SVM_EXIT_RDTSC, 0);
-
-    val = cpu_get_tsc(env) + env->tsc_offset;
-    EAX = (uint32_t)(val);
-    EDX = (uint32_t)(val >> 32);
-}
-
-void helper_rdpmc(void)
-{
-    if ((env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
-        raise_exception(EXCP0D_GPF);
-    }
-    helper_svm_check_intercept_param(SVM_EXIT_RDPMC, 0);
-
-    /* currently unimplemented */
-    raise_exception_err(EXCP06_ILLOP, 0);
-}
-
-#if defined(CONFIG_USER_ONLY)
-void helper_wrmsr(void)
-{
-}
-
-void helper_rdmsr(void)
-{
-}
-#else
-void helper_wrmsr(void)
-{
-    uint64_t val;
-
-    helper_svm_check_intercept_param(SVM_EXIT_MSR, 1);
-
-    val = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
-
-    switch((uint32_t)ECX) {
-    case MSR_IA32_SYSENTER_CS:
-        env->sysenter_cs = val & 0xffff;
-        break;
-    case MSR_IA32_SYSENTER_ESP:
-        env->sysenter_esp = val;
-        break;
-    case MSR_IA32_SYSENTER_EIP:
-        env->sysenter_eip = val;
-        break;
-    case MSR_IA32_APICBASE:
-        cpu_set_apic_base(env, val);
-        break;
-    case MSR_EFER:
-        {
-            uint64_t update_mask;
-            update_mask = 0;
-            if (env->cpuid_ext2_features & CPUID_EXT2_SYSCALL)
-                update_mask |= MSR_EFER_SCE;
-            if (env->cpuid_ext2_features & CPUID_EXT2_LM)
-                update_mask |= MSR_EFER_LME;
-            if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR)
-                update_mask |= MSR_EFER_FFXSR;
-            if (env->cpuid_ext2_features & CPUID_EXT2_NX)
-                update_mask |= MSR_EFER_NXE;
-            if (env->cpuid_ext3_features & CPUID_EXT3_SVM)
-                update_mask |= MSR_EFER_SVME;
-            if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR)
-                update_mask |= MSR_EFER_FFXSR;
-            cpu_load_efer(env, (env->efer & ~update_mask) |
-                          (val & update_mask));
-        }
-        break;
-    case MSR_STAR:
-        env->star = val;
-        break;
-    case MSR_PAT:
-        env->pat = val;
-        break;
-    case MSR_VM_HSAVE_PA:
-        env->vm_hsave = val;
-        break;
-#ifdef TARGET_X86_64
-    case MSR_LSTAR:
-        env->lstar = val;
-        break;
-    case MSR_CSTAR:
-        env->cstar = val;
-        break;
-    case MSR_FMASK:
-        env->fmask = val;
-        break;
-    case MSR_FSBASE:
-        env->segs[R_FS].base = val;
-        break;
-    case MSR_GSBASE:
-        env->segs[R_GS].base = val;
-        break;
-    case MSR_KERNELGSBASE:
-        env->kernelgsbase = val;
-        break;
-#endif
-    case MSR_MTRRphysBase(0):
-    case MSR_MTRRphysBase(1):
-    case MSR_MTRRphysBase(2):
-    case MSR_MTRRphysBase(3):
-    case MSR_MTRRphysBase(4):
-    case MSR_MTRRphysBase(5):
-    case MSR_MTRRphysBase(6):
-    case MSR_MTRRphysBase(7):
-        env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base = val;
-        break;
-    case MSR_MTRRphysMask(0):
-    case MSR_MTRRphysMask(1):
-    case MSR_MTRRphysMask(2):
-    case MSR_MTRRphysMask(3):
-    case MSR_MTRRphysMask(4):
-    case MSR_MTRRphysMask(5):
-    case MSR_MTRRphysMask(6):
-    case MSR_MTRRphysMask(7):
-        env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask = val;
-        break;
-    case MSR_MTRRfix64K_00000:
-        env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix64K_00000] = val;
-        break;
-    case MSR_MTRRfix16K_80000:
-    case MSR_MTRRfix16K_A0000:
-        env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1] = val;
-        break;
-    case MSR_MTRRfix4K_C0000:
-    case MSR_MTRRfix4K_C8000:
-    case MSR_MTRRfix4K_D0000:
-    case MSR_MTRRfix4K_D8000:
-    case MSR_MTRRfix4K_E0000:
-    case MSR_MTRRfix4K_E8000:
-    case MSR_MTRRfix4K_F0000:
-    case MSR_MTRRfix4K_F8000:
-        env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3] = val;
-        break;
-    case MSR_MTRRdefType:
-        env->mtrr_deftype = val;
-        break;
-    case MSR_MCG_STATUS:
-        env->mcg_status = val;
-        break;
-    case MSR_MCG_CTL:
-        if ((env->mcg_cap & MCG_CTL_P)
-            && (val == 0 || val == ~(uint64_t)0))
-            env->mcg_ctl = val;
-        break;
-    default:
-        if ((uint32_t)ECX >= MSR_MC0_CTL
-            && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
-            uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
-            if ((offset & 0x3) != 0
-                || (val == 0 || val == ~(uint64_t)0))
-                env->mce_banks[offset] = val;
-            break;
-        }
-        /* XXX: exception ? */
-        break;
-    }
-}
-
-void helper_rdmsr(void)
-{
-    uint64_t val;
-
-    helper_svm_check_intercept_param(SVM_EXIT_MSR, 0);
-
-    switch((uint32_t)ECX) {
-    case MSR_IA32_SYSENTER_CS:
-        val = env->sysenter_cs;
-        break;
-    case MSR_IA32_SYSENTER_ESP:
-        val = env->sysenter_esp;
-        break;
-    case MSR_IA32_SYSENTER_EIP:
-        val = env->sysenter_eip;
-        break;
-    case MSR_IA32_APICBASE:
-        val = cpu_get_apic_base(env);
-        break;
-    case MSR_EFER:
-        val = env->efer;
-        break;
-    case MSR_STAR:
-        val = env->star;
-        break;
-    case MSR_PAT:
-        val = env->pat;
-        break;
-    case MSR_VM_HSAVE_PA:
-        val = env->vm_hsave;
-        break;
-    case MSR_IA32_PERF_STATUS:
-        /* tsc_increment_by_tick */
-        val = 1000ULL;
-        /* CPU multiplier */
-        val |= (((uint64_t)4ULL) << 40);
-        break;
-#ifdef TARGET_X86_64
-    case MSR_LSTAR:
-        val = env->lstar;
-        break;
-    case MSR_CSTAR:
-        val = env->cstar;
-        break;
-    case MSR_FMASK:
-        val = env->fmask;
-        break;
-    case MSR_FSBASE:
-        val = env->segs[R_FS].base;
-        break;
-    case MSR_GSBASE:
-        val = env->segs[R_GS].base;
-        break;
-    case MSR_KERNELGSBASE:
-        val = env->kernelgsbase;
-        break;
-#endif
-    case MSR_MTRRphysBase(0):
-    case MSR_MTRRphysBase(1):
-    case MSR_MTRRphysBase(2):
-    case MSR_MTRRphysBase(3):
-    case MSR_MTRRphysBase(4):
-    case MSR_MTRRphysBase(5):
-    case MSR_MTRRphysBase(6):
-    case MSR_MTRRphysBase(7):
-        val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base;
-        break;
-    case MSR_MTRRphysMask(0):
-    case MSR_MTRRphysMask(1):
-    case MSR_MTRRphysMask(2):
-    case MSR_MTRRphysMask(3):
-    case MSR_MTRRphysMask(4):
-    case MSR_MTRRphysMask(5):
-    case MSR_MTRRphysMask(6):
-    case MSR_MTRRphysMask(7):
-        val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask;
-        break;
-    case MSR_MTRRfix64K_00000:
-        val = env->mtrr_fixed[0];
-        break;
-    case MSR_MTRRfix16K_80000:
-    case MSR_MTRRfix16K_A0000:
-        val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1];
-        break;
-    case MSR_MTRRfix4K_C0000:
-    case MSR_MTRRfix4K_C8000:
-    case MSR_MTRRfix4K_D0000:
-    case MSR_MTRRfix4K_D8000:
-    case MSR_MTRRfix4K_E0000:
-    case MSR_MTRRfix4K_E8000:
-    case MSR_MTRRfix4K_F0000:
-    case MSR_MTRRfix4K_F8000:
-        val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3];
-        break;
-    case MSR_MTRRdefType:
-        val = env->mtrr_deftype;
-        break;
-    case MSR_MTRRcap:
-        if (env->cpuid_features & CPUID_MTRR)
-            val = MSR_MTRRcap_VCNT | MSR_MTRRcap_FIXRANGE_SUPPORT | MSR_MTRRcap_WC_SUPPORTED;
-        else
-            /* XXX: exception ? */
-            val = 0;
-        break;
-    case MSR_MCG_CAP:
-        val = env->mcg_cap;
-        break;
-    case MSR_MCG_CTL:
-        if (env->mcg_cap & MCG_CTL_P)
-            val = env->mcg_ctl;
-        else
-            val = 0;
-        break;
-    case MSR_MCG_STATUS:
-        val = env->mcg_status;
-        break;
-    default:
-        if ((uint32_t)ECX >= MSR_MC0_CTL
-            && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
-            uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
-            val = env->mce_banks[offset];
-            break;
-        }
-        /* XXX: exception ? */
-        val = 0;
-        break;
-    }
-    EAX = (uint32_t)(val);
-    EDX = (uint32_t)(val >> 32);
-}
-#endif
-
-target_ulong helper_lsl(target_ulong selector1)
-{
-    unsigned int limit;
-    uint32_t e1, e2, eflags, selector;
-    int rpl, dpl, cpl, type;
-
-    selector = selector1 & 0xffff;
-    eflags = helper_cc_compute_all(CC_OP);
-    if ((selector & 0xfffc) == 0)
-        goto fail;
-    if (load_segment(&e1, &e2, selector) != 0)
-        goto fail;
-    rpl = selector & 3;
-    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-    cpl = env->hflags & HF_CPL_MASK;
-    if (e2 & DESC_S_MASK) {
-        if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) {
-            /* conforming */
-        } else {
-            if (dpl < cpl || dpl < rpl)
-                goto fail;
-        }
-    } else {
-        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
-        switch(type) {
-        case 1:
-        case 2:
-        case 3:
-        case 9:
-        case 11:
-            break;
-        default:
-            goto fail;
-        }
-        if (dpl < cpl || dpl < rpl) {
-        fail:
-            CC_SRC = eflags & ~CC_Z;
-            return 0;
-        }
-    }
-    limit = get_seg_limit(e1, e2);
-    CC_SRC = eflags | CC_Z;
-    return limit;
-}
-
-target_ulong helper_lar(target_ulong selector1)
-{
-    uint32_t e1, e2, eflags, selector;
-    int rpl, dpl, cpl, type;
-
-    selector = selector1 & 0xffff;
-    eflags = helper_cc_compute_all(CC_OP);
-    if ((selector & 0xfffc) == 0)
-        goto fail;
-    if (load_segment(&e1, &e2, selector) != 0)
-        goto fail;
-    rpl = selector & 3;
-    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-    cpl = env->hflags & HF_CPL_MASK;
-    if (e2 & DESC_S_MASK) {
-        if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) {
-            /* conforming */
-        } else {
-            if (dpl < cpl || dpl < rpl)
-                goto fail;
-        }
-    } else {
-        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
-        switch(type) {
-        case 1:
-        case 2:
-        case 3:
-        case 4:
-        case 5:
-        case 9:
-        case 11:
-        case 12:
-            break;
-        default:
-            goto fail;
-        }
-        if (dpl < cpl || dpl < rpl) {
-        fail:
-            CC_SRC = eflags & ~CC_Z;
-            return 0;
-        }
-    }
-    CC_SRC = eflags | CC_Z;
-    return e2 & 0x00f0ff00;
-}
-
-void helper_verr(target_ulong selector1)
-{
-    uint32_t e1, e2, eflags, selector;
-    int rpl, dpl, cpl;
-
-    selector = selector1 & 0xffff;
-    eflags = helper_cc_compute_all(CC_OP);
-    if ((selector & 0xfffc) == 0)
-        goto fail;
-    if (load_segment(&e1, &e2, selector) != 0)
-        goto fail;
-    if (!(e2 & DESC_S_MASK))
-        goto fail;
-    rpl = selector & 3;
-    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-    cpl = env->hflags & HF_CPL_MASK;
-    if (e2 & DESC_CS_MASK) {
-        if (!(e2 & DESC_R_MASK))
-            goto fail;
-        if (!(e2 & DESC_C_MASK)) {
-            if (dpl < cpl || dpl < rpl)
-                goto fail;
-        }
-    } else {
-        if (dpl < cpl || dpl < rpl) {
-        fail:
-            CC_SRC = eflags & ~CC_Z;
-            return;
-        }
-    }
-    CC_SRC = eflags | CC_Z;
-}
-
-void helper_verw(target_ulong selector1)
-{
-    uint32_t e1, e2, eflags, selector;
-    int rpl, dpl, cpl;
-
-    selector = selector1 & 0xffff;
-    eflags = helper_cc_compute_all(CC_OP);
-    if ((selector & 0xfffc) == 0)
-        goto fail;
-    if (load_segment(&e1, &e2, selector) != 0)
-        goto fail;
-    if (!(e2 & DESC_S_MASK))
-        goto fail;
-    rpl = selector & 3;
-    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-    cpl = env->hflags & HF_CPL_MASK;
-    if (e2 & DESC_CS_MASK) {
-        goto fail;
-    } else {
-        if (dpl < cpl || dpl < rpl)
-            goto fail;
-        if (!(e2 & DESC_W_MASK)) {
-        fail:
-            CC_SRC = eflags & ~CC_Z;
-            return;
-        }
-    }
-    CC_SRC = eflags | CC_Z;
-}
-
-/* x87 FPU helpers */
-
-static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
-{
-    union {
-        float64 f64;
-        double d;
-    } u;
-
-    u.f64 = floatx80_to_float64(a, &env->fp_status);
-    return u.d;
-}
-
-static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
-{
-    union {
-        float64 f64;
-        double d;
-    } u;
-
-    u.d = a;
-    return float64_to_floatx80(u.f64, &env->fp_status);
-}
-
-static void fpu_set_exception(int mask)
-{
-    env->fpus |= mask;
-    if (env->fpus & (~env->fpuc & FPUC_EM))
-        env->fpus |= FPUS_SE | FPUS_B;
-}
-
-static inline floatx80 helper_fdiv(floatx80 a, floatx80 b)
-{
-    if (floatx80_is_zero(b)) {
-        fpu_set_exception(FPUS_ZE);
-    }
-    return floatx80_div(a, b, &env->fp_status);
-}
-
-static void fpu_raise_exception(void)
-{
-    if (env->cr[0] & CR0_NE_MASK) {
-        raise_exception(EXCP10_COPR);
-    }
-#if !defined(CONFIG_USER_ONLY)
-    else {
-        cpu_set_ferr(env);
-    }
-#endif
-}
-
-void helper_flds_FT0(uint32_t val)
-{
-    union {
-        float32 f;
-        uint32_t i;
-    } u;
-    u.i = val;
-    FT0 = float32_to_floatx80(u.f, &env->fp_status);
-}
-
-void helper_fldl_FT0(uint64_t val)
-{
-    union {
-        float64 f;
-        uint64_t i;
-    } u;
-    u.i = val;
-    FT0 = float64_to_floatx80(u.f, &env->fp_status);
-}
-
-void helper_fildl_FT0(int32_t val)
-{
-    FT0 = int32_to_floatx80(val, &env->fp_status);
-}
-
-void helper_flds_ST0(uint32_t val)
-{
-    int new_fpstt;
-    union {
-        float32 f;
-        uint32_t i;
-    } u;
-    new_fpstt = (env->fpstt - 1) & 7;
-    u.i = val;
-    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
-    env->fpstt = new_fpstt;
-    env->fptags[new_fpstt] = 0; /* validate stack entry */
-}
-
-void helper_fldl_ST0(uint64_t val)
-{
-    int new_fpstt;
-    union {
-        float64 f;
-        uint64_t i;
-    } u;
-    new_fpstt = (env->fpstt - 1) & 7;
-    u.i = val;
-    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
-    env->fpstt = new_fpstt;
-    env->fptags[new_fpstt] = 0; /* validate stack entry */
-}
-
-void helper_fildl_ST0(int32_t val)
-{
-    int new_fpstt;
-    new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
-    env->fpstt = new_fpstt;
-    env->fptags[new_fpstt] = 0; /* validate stack entry */
-}
-
-void helper_fildll_ST0(int64_t val)
-{
-    int new_fpstt;
-    new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
-    env->fpstt = new_fpstt;
-    env->fptags[new_fpstt] = 0; /* validate stack entry */
-}
-
-uint32_t helper_fsts_ST0(void)
-{
-    union {
-        float32 f;
-        uint32_t i;
-    } u;
-    u.f = floatx80_to_float32(ST0, &env->fp_status);
-    return u.i;
-}
-
-uint64_t helper_fstl_ST0(void)
-{
-    union {
-        float64 f;
-        uint64_t i;
-    } u;
-    u.f = floatx80_to_float64(ST0, &env->fp_status);
-    return u.i;
-}
-
-int32_t helper_fist_ST0(void)
-{
-    int32_t val;
-    val = floatx80_to_int32(ST0, &env->fp_status);
-    if (val != (int16_t)val)
-        val = -32768;
-    return val;
-}
-
-int32_t helper_fistl_ST0(void)
-{
-    int32_t val;
-    val = floatx80_to_int32(ST0, &env->fp_status);
-    return val;
-}
-
-int64_t helper_fistll_ST0(void)
-{
-    int64_t val;
-    val = floatx80_to_int64(ST0, &env->fp_status);
-    return val;
-}
-
-int32_t helper_fistt_ST0(void)
-{
-    int32_t val;
-    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
-    if (val != (int16_t)val)
-        val = -32768;
-    return val;
-}
-
-int32_t helper_fisttl_ST0(void)
-{
-    int32_t val;
-    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
-    return val;
-}
-
-int64_t helper_fisttll_ST0(void)
-{
-    int64_t val;
-    val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
-    return val;
-}
-
-void helper_fldt_ST0(target_ulong ptr)
-{
-    int new_fpstt;
-    new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = helper_fldt(ptr);
-    env->fpstt = new_fpstt;
-    env->fptags[new_fpstt] = 0; /* validate stack entry */
-}
-
-void helper_fstt_ST0(target_ulong ptr)
-{
-    helper_fstt(ST0, ptr);
-}
-
-void helper_fpush(void)
-{
-    fpush();
-}
-
-void helper_fpop(void)
-{
-    fpop();
-}
-
-void helper_fdecstp(void)
-{
-    env->fpstt = (env->fpstt - 1) & 7;
-    env->fpus &= (~0x4700);
-}
-
-void helper_fincstp(void)
-{
-    env->fpstt = (env->fpstt + 1) & 7;
-    env->fpus &= (~0x4700);
-}
-
-/* FPU move */
-
-void helper_ffree_STN(int st_index)
-{
-    env->fptags[(env->fpstt + st_index) & 7] = 1;
-}
-
-void helper_fmov_ST0_FT0(void)
-{
-    ST0 = FT0;
-}
-
-void helper_fmov_FT0_STN(int st_index)
-{
-    FT0 = ST(st_index);
-}
-
-void helper_fmov_ST0_STN(int st_index)
-{
-    ST0 = ST(st_index);
-}
-
-void helper_fmov_STN_ST0(int st_index)
-{
-    ST(st_index) = ST0;
-}
-
-void helper_fxchg_ST0_STN(int st_index)
-{
-    floatx80 tmp;
-    tmp = ST(st_index);
-    ST(st_index) = ST0;
-    ST0 = tmp;
-}
-
-/* FPU operations */
-
-static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
-
-void helper_fcom_ST0_FT0(void)
-{
-    int ret;
-
-    ret = floatx80_compare(ST0, FT0, &env->fp_status);
-    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
-}
-
-void helper_fucom_ST0_FT0(void)
-{
-    int ret;
-
-    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
-    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret+ 1];
-}
-
-static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
-
-void helper_fcomi_ST0_FT0(void)
-{
-    int eflags;
-    int ret;
-
-    ret = floatx80_compare(ST0, FT0, &env->fp_status);
-    eflags = helper_cc_compute_all(CC_OP);
-    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
-    CC_SRC = eflags;
-}
-
-void helper_fucomi_ST0_FT0(void)
-{
-    int eflags;
-    int ret;
-
-    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
-    eflags = helper_cc_compute_all(CC_OP);
-    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
-    CC_SRC = eflags;
-}
-
-void helper_fadd_ST0_FT0(void)
-{
-    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
-}
-
-void helper_fmul_ST0_FT0(void)
-{
-    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
-}
-
-void helper_fsub_ST0_FT0(void)
-{
-    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
-}
-
-void helper_fsubr_ST0_FT0(void)
-{
-    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
-}
-
-void helper_fdiv_ST0_FT0(void)
-{
-    ST0 = helper_fdiv(ST0, FT0);
-}
-
-void helper_fdivr_ST0_FT0(void)
-{
-    ST0 = helper_fdiv(FT0, ST0);
-}
-
-/* fp operations between STN and ST0 */
-
-void helper_fadd_STN_ST0(int st_index)
-{
-    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
-}
-
-void helper_fmul_STN_ST0(int st_index)
-{
-    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
-}
-
-void helper_fsub_STN_ST0(int st_index)
-{
-    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
-}
-
-void helper_fsubr_STN_ST0(int st_index)
-{
-    floatx80 *p;
-    p = &ST(st_index);
-    *p = floatx80_sub(ST0, *p, &env->fp_status);
-}
-
-void helper_fdiv_STN_ST0(int st_index)
-{
-    floatx80 *p;
-    p = &ST(st_index);
-    *p = helper_fdiv(*p, ST0);
-}
-
-void helper_fdivr_STN_ST0(int st_index)
-{
-    floatx80 *p;
-    p = &ST(st_index);
-    *p = helper_fdiv(ST0, *p);
-}
-
-/* misc FPU operations */
-void helper_fchs_ST0(void)
-{
-    ST0 = floatx80_chs(ST0);
-}
-
-void helper_fabs_ST0(void)
-{
-    ST0 = floatx80_abs(ST0);
-}
-
-void helper_fld1_ST0(void)
-{
-    ST0 = f15rk[1];
-}
-
-void helper_fldl2t_ST0(void)
-{
-    ST0 = f15rk[6];
-}
-
-void helper_fldl2e_ST0(void)
-{
-    ST0 = f15rk[5];
-}
-
-void helper_fldpi_ST0(void)
-{
-    ST0 = f15rk[2];
-}
-
-void helper_fldlg2_ST0(void)
-{
-    ST0 = f15rk[3];
-}
-
-void helper_fldln2_ST0(void)
-{
-    ST0 = f15rk[4];
-}
-
-void helper_fldz_ST0(void)
-{
-    ST0 = f15rk[0];
-}
-
-void helper_fldz_FT0(void)
-{
-    FT0 = f15rk[0];
-}
-
-uint32_t helper_fnstsw(void)
-{
-    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
-}
-
-uint32_t helper_fnstcw(void)
-{
-    return env->fpuc;
-}
-
-static void update_fp_status(void)
-{
-    int rnd_type;
-
-    /* set rounding mode */
-    switch(env->fpuc & RC_MASK) {
-    default:
-    case RC_NEAR:
-        rnd_type = float_round_nearest_even;
-        break;
-    case RC_DOWN:
-        rnd_type = float_round_down;
-        break;
-    case RC_UP:
-        rnd_type = float_round_up;
-        break;
-    case RC_CHOP:
-        rnd_type = float_round_to_zero;
-        break;
-    }
-    set_float_rounding_mode(rnd_type, &env->fp_status);
-    switch((env->fpuc >> 8) & 3) {
-    case 0:
-        rnd_type = 32;
-        break;
-    case 2:
-        rnd_type = 64;
-        break;
-    case 3:
-    default:
-        rnd_type = 80;
-        break;
-    }
-    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
-}
-
-void helper_fldcw(uint32_t val)
-{
-    env->fpuc = val;
-    update_fp_status();
-}
-
-void helper_fclex(void)
-{
-    env->fpus &= 0x7f00;
-}
-
-void helper_fwait(void)
-{
-    if (env->fpus & FPUS_SE)
-        fpu_raise_exception();
-}
-
-void helper_fninit(void)
-{
-    env->fpus = 0;
-    env->fpstt = 0;
-    env->fpuc = 0x37f;
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
-}
-
-/* BCD ops */
-
-void helper_fbld_ST0(target_ulong ptr)
-{
-    floatx80 tmp;
-    uint64_t val;
-    unsigned int v;
-    int i;
-
-    val = 0;
-    for(i = 8; i >= 0; i--) {
-        v = ldub(ptr + i);
-        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
-    }
-    tmp = int64_to_floatx80(val, &env->fp_status);
-    if (ldub(ptr + 9) & 0x80) {
-        floatx80_chs(tmp);
-    }
-    fpush();
-    ST0 = tmp;
-}
-
-void helper_fbst_ST0(target_ulong ptr)
-{
-    int v;
-    target_ulong mem_ref, mem_end;
-    int64_t val;
-
-    val = floatx80_to_int64(ST0, &env->fp_status);
-    mem_ref = ptr;
-    mem_end = mem_ref + 9;
-    if (val < 0) {
-        stb(mem_end, 0x80);
-        val = -val;
-    } else {
-        stb(mem_end, 0x00);
-    }
-    while (mem_ref < mem_end) {
-        if (val == 0)
-            break;
-        v = val % 100;
-        val = val / 100;
-        v = ((v / 10) << 4) | (v % 10);
-        stb(mem_ref++, v);
-    }
-    while (mem_ref < mem_end) {
-        stb(mem_ref++, 0);
-    }
-}
-
-void helper_f2xm1(void)
-{
-    double val = floatx80_to_double(env, ST0);
-    val = pow(2.0, val) - 1.0;
-    ST0 = double_to_floatx80(env, val);
-}
-
-void helper_fyl2x(void)
-{
-    double fptemp = floatx80_to_double(env, ST0);
-
-    if (fptemp>0.0){
-        fptemp = log(fptemp)/log(2.0);	 /* log2(ST) */
-        fptemp *= floatx80_to_double(env, ST1);
-        ST1 = double_to_floatx80(env, fptemp);
-        fpop();
-    } else {
-        env->fpus &= (~0x4700);
-        env->fpus |= 0x400;
-    }
-}
-
-void helper_fptan(void)
-{
-    double fptemp = floatx80_to_double(env, ST0);
-
-    if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
-        env->fpus |= 0x400;
-    } else {
-        fptemp = tan(fptemp);
-        ST0 = double_to_floatx80(env, fptemp);
-        fpush();
-        ST0 = floatx80_one;
-        env->fpus &= (~0x400);  /* C2 <-- 0 */
-        /* the above code is for  |arg| < 2**52 only */
-    }
-}
-
-void helper_fpatan(void)
-{
-    double fptemp, fpsrcop;
-
-    fpsrcop = floatx80_to_double(env, ST1);
-    fptemp = floatx80_to_double(env, ST0);
-    ST1 = double_to_floatx80(env, atan2(fpsrcop,fptemp));
-    fpop();
-}
-
-void helper_fxtract(void)
-{
-    CPU_LDoubleU temp;
-    unsigned int expdif;
-
-    temp.d = ST0;
-    expdif = EXPD(temp) - EXPBIAS;
-    /*DP exponent bias*/
-    ST0 = int32_to_floatx80(expdif, &env->fp_status);
-    fpush();
-    BIASEXPONENT(temp);
-    ST0 = temp.d;
-}
-
-void helper_fprem1(void)
-{
-    double st0, st1, dblq, fpsrcop, fptemp;
-    CPU_LDoubleU fpsrcop1, fptemp1;
-    int expdif;
-    signed long long int q;
-
-    st0 = floatx80_to_double(env, ST0);
-    st1 = floatx80_to_double(env, ST1);
-
-    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
-        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
-        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
-        return;
-    }
-
-    fpsrcop = st0;
-    fptemp = st1;
-    fpsrcop1.d = ST0;
-    fptemp1.d = ST1;
-    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
-
-    if (expdif < 0) {
-        /* optimisation? taken from the AMD docs */
-        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
-        /* ST0 is unchanged */
-        return;
-    }
-
-    if (expdif < 53) {
-        dblq = fpsrcop / fptemp;
-        /* round dblq towards nearest integer */
-        dblq = rint(dblq);
-        st0 = fpsrcop - fptemp * dblq;
-
-        /* convert dblq to q by truncating towards zero */
-        if (dblq < 0.0)
-           q = (signed long long int)(-dblq);
-        else
-           q = (signed long long int)dblq;
-
-        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
-                                /* (C0,C3,C1) <-- (q2,q1,q0) */
-        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
-        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
-        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
-    } else {
-        env->fpus |= 0x400;  /* C2 <-- 1 */
-        fptemp = pow(2.0, expdif - 50);
-        fpsrcop = (st0 / st1) / fptemp;
-        /* fpsrcop = integer obtained by chopping */
-        fpsrcop = (fpsrcop < 0.0) ?
-                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
-        st0 -= (st1 * fpsrcop * fptemp);
-    }
-    ST0 = double_to_floatx80(env, st0);
-}
-
-void helper_fprem(void)
-{
-    double st0, st1, dblq, fpsrcop, fptemp;
-    CPU_LDoubleU fpsrcop1, fptemp1;
-    int expdif;
-    signed long long int q;
-
-    st0 = floatx80_to_double(env, ST0);
-    st1 = floatx80_to_double(env, ST1);
-
-    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
-       ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
-       env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
-       return;
-    }
-
-    fpsrcop = st0;
-    fptemp = st1;
-    fpsrcop1.d = ST0;
-    fptemp1.d = ST1;
-    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
-
-    if (expdif < 0) {
-        /* optimisation? taken from the AMD docs */
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        /* ST0 is unchanged */
-        return;
-    }
-
-    if (expdif < 53) {
-        dblq = fpsrcop / fptemp; /* ST0 / ST1*/;
-        /* round dblq towards zero */
-        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
-        st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
-
-        /* convert dblq to q by truncating towards zero */
-        if (dblq < 0.0) {
-           q = (signed long long int)(-dblq);
-        } else {
-           q = (signed long long int)dblq;
-        }
-
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-                              /* (C0,C3,C1) <-- (q2,q1,q0) */
-        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
-        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
-        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
-    } else {
-        int N = 32 + (expdif % 32); /* as per AMD docs */
-        env->fpus |= 0x400;  /* C2 <-- 1 */
-        fptemp = pow(2.0, (double)(expdif - N));
-        fpsrcop = (st0 / st1) / fptemp;
-        /* fpsrcop = integer obtained by chopping */
-        fpsrcop = (fpsrcop < 0.0) ?
-                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
-        st0 -= (st1 * fpsrcop * fptemp);
-    }
-    ST0 = double_to_floatx80(env, st0);
-}
-
-void helper_fyl2xp1(void)
-{
-    double fptemp = floatx80_to_double(env, ST0);
-
-    if ((fptemp+1.0)>0.0) {
-        fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
-        fptemp *= floatx80_to_double(env, ST1);
-        ST1 = double_to_floatx80(env, fptemp);
-        fpop();
-    } else {
-        env->fpus &= (~0x4700);
-        env->fpus |= 0x400;
-    }
-}
-
-void helper_fsqrt(void)
-{
-    double fptemp = floatx80_to_double(env, ST0);
-
-    if (fptemp<0.0) {
-        env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
-        env->fpus |= 0x400;
-    }
-    ST0 = floatx80_sqrt(ST0, &env->fp_status);
-}
-
-void helper_fsincos(void)
-{
-    double fptemp = floatx80_to_double(env, ST0);
-
-    if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
-        env->fpus |= 0x400;
-    } else {
-        ST0 = double_to_floatx80(env, sin(fptemp));
-        fpush();
-        ST0 = double_to_floatx80(env, cos(fptemp));
-        env->fpus &= (~0x400);  /* C2 <-- 0 */
-        /* the above code is for  |arg| < 2**63 only */
-    }
-}
-
-void helper_frndint(void)
-{
-    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
-}
-
-void helper_fscale(void)
-{
-    double st0 = floatx80_to_double(env, ST0);
-    double st1 = floatx80_to_double(env, ST1);
-    double val = ldexp(st0, (int)st1);
-    ST0 = double_to_floatx80(env, val);
-}
-
-void helper_fsin(void)
-{
-    double fptemp = floatx80_to_double(env, ST0);
-
-    if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
-        env->fpus |= 0x400;
-    } else {
-        ST0 = double_to_floatx80(env, sin(fptemp));
-        env->fpus &= (~0x400);  /* C2 <-- 0 */
-        /* the above code is for  |arg| < 2**53 only */
-    }
-}
-
-void helper_fcos(void)
-{
-    double fptemp = floatx80_to_double(env, ST0);
-
-    if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
-        env->fpus |= 0x400;
-    } else {
-        ST0 = double_to_floatx80(env, cos(fptemp));
-        env->fpus &= (~0x400);  /* C2 <-- 0 */
-        /* the above code is for  |arg5 < 2**63 only */
-    }
-}
-
-void helper_fxam_ST0(void)
-{
-    CPU_LDoubleU temp;
-    int expdif;
-
-    temp.d = ST0;
-
-    env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
-    if (SIGND(temp))
-        env->fpus |= 0x200; /* C1 <-- 1 */
-
-    /* XXX: test fptags too */
-    expdif = EXPD(temp);
-    if (expdif == MAXEXPD) {
-        if (MANTD(temp) == 0x8000000000000000ULL) {
-            env->fpus |=  0x500 /*Infinity*/;
-        } else {
-            env->fpus |=  0x100 /*NaN*/;
-        }
-    } else if (expdif == 0) {
-        if (MANTD(temp) == 0) {
-            env->fpus |=  0x4000 /*Zero*/;
-        } else {
-            env->fpus |= 0x4400 /*Denormal*/;
-        }
-    } else {
-        env->fpus |= 0x400;
-    }
-}
-
-void helper_fstenv(target_ulong ptr, int data32)
-{
-    int fpus, fptag, exp, i;
-    uint64_t mant;
-    CPU_LDoubleU tmp;
-
-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
-    fptag = 0;
-    for (i=7; i>=0; i--) {
-	fptag <<= 2;
-	if (env->fptags[i]) {
-            fptag |= 3;
-	} else {
-            tmp.d = env->fpregs[i].d;
-            exp = EXPD(tmp);
-            mant = MANTD(tmp);
-            if (exp == 0 && mant == 0) {
-                /* zero */
-	        fptag |= 1;
-	    } else if (exp == 0 || exp == MAXEXPD
-                       || (mant & (1LL << 63)) == 0) {
-                /* NaNs, infinity, denormal */
-                fptag |= 2;
-            }
-        }
-    }
-    if (data32) {
-        /* 32 bit */
-        stl(ptr, env->fpuc);
-        stl(ptr + 4, fpus);
-        stl(ptr + 8, fptag);
-        stl(ptr + 12, 0); /* fpip */
-        stl(ptr + 16, 0); /* fpcs */
-        stl(ptr + 20, 0); /* fpoo */
-        stl(ptr + 24, 0); /* fpos */
-    } else {
-        /* 16 bit */
-        stw(ptr, env->fpuc);
-        stw(ptr + 2, fpus);
-        stw(ptr + 4, fptag);
-        stw(ptr + 6, 0);
-        stw(ptr + 8, 0);
-        stw(ptr + 10, 0);
-        stw(ptr + 12, 0);
-    }
-}
-
-void helper_fldenv(target_ulong ptr, int data32)
-{
-    int i, fpus, fptag;
-
-    if (data32) {
-        env->fpuc = lduw(ptr);
-        fpus = lduw(ptr + 4);
-        fptag = lduw(ptr + 8);
-    }
-    else {
-        env->fpuc = lduw(ptr);
-        fpus = lduw(ptr + 2);
-        fptag = lduw(ptr + 4);
-    }
-    env->fpstt = (fpus >> 11) & 7;
-    env->fpus = fpus & ~0x3800;
-    for(i = 0;i < 8; i++) {
-        env->fptags[i] = ((fptag & 3) == 3);
-        fptag >>= 2;
-    }
-}
-
-void helper_fsave(target_ulong ptr, int data32)
-{
-    floatx80 tmp;
-    int i;
-
-    helper_fstenv(ptr, data32);
-
-    ptr += (14 << data32);
-    for(i = 0;i < 8; i++) {
-        tmp = ST(i);
-        helper_fstt(tmp, ptr);
-        ptr += 10;
-    }
-
-    /* fninit */
-    env->fpus = 0;
-    env->fpstt = 0;
-    env->fpuc = 0x37f;
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
-}
-
-void helper_frstor(target_ulong ptr, int data32)
-{
-    floatx80 tmp;
-    int i;
-
-    helper_fldenv(ptr, data32);
-    ptr += (14 << data32);
-
-    for(i = 0;i < 8; i++) {
-        tmp = helper_fldt(ptr);
-        ST(i) = tmp;
-        ptr += 10;
-    }
-}
-
-void helper_fxsave(target_ulong ptr, int data64)
-{
-    int fpus, fptag, i, nb_xmm_regs;
-    floatx80 tmp;
-    target_ulong addr;
-
-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
-    fptag = 0;
-    for(i = 0; i < 8; i++) {
-        fptag |= (env->fptags[i] << i);
-    }
-    stw(ptr, env->fpuc);
-    stw(ptr + 2, fpus);
-    stw(ptr + 4, fptag ^ 0xff);
-#ifdef TARGET_X86_64
-    if (data64) {
-        stq(ptr + 0x08, 0); /* rip */
-        stq(ptr + 0x10, 0); /* rdp */
-    } else
-#endif
-    {
-        stl(ptr + 0x08, 0); /* eip */
-        stl(ptr + 0x0c, 0); /* sel  */
-        stl(ptr + 0x10, 0); /* dp */
-        stl(ptr + 0x14, 0); /* sel  */
-    }
-
-    addr = ptr + 0x20;
-    for(i = 0;i < 8; i++) {
-        tmp = ST(i);
-        helper_fstt(tmp, addr);
-        addr += 16;
-    }
-
-    if (env->cr[4] & CR4_OSFXSR_MASK) {
-        /* XXX: finish it */
-        stl(ptr + 0x18, env->mxcsr); /* mxcsr */
-        stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
-        if (env->hflags & HF_CS64_MASK)
-            nb_xmm_regs = 16;
-        else
-            nb_xmm_regs = 8;
-        addr = ptr + 0xa0;
-        /* Fast FXSAVE leaves out the XMM registers */
-        if (!(env->efer & MSR_EFER_FFXSR)
-          || (env->hflags & HF_CPL_MASK)
-          || !(env->hflags & HF_LMA_MASK)) {
-            for(i = 0; i < nb_xmm_regs; i++) {
-                stq(addr, env->xmm_regs[i].XMM_Q(0));
-                stq(addr + 8, env->xmm_regs[i].XMM_Q(1));
-                addr += 16;
-            }
-        }
-    }
-}
-
-void helper_fxrstor(target_ulong ptr, int data64)
-{
-    int i, fpus, fptag, nb_xmm_regs;
-    floatx80 tmp;
-    target_ulong addr;
-
-    env->fpuc = lduw(ptr);
-    fpus = lduw(ptr + 2);
-    fptag = lduw(ptr + 4);
-    env->fpstt = (fpus >> 11) & 7;
-    env->fpus = fpus & ~0x3800;
-    fptag ^= 0xff;
-    for(i = 0;i < 8; i++) {
-        env->fptags[i] = ((fptag >> i) & 1);
-    }
-
-    addr = ptr + 0x20;
-    for(i = 0;i < 8; i++) {
-        tmp = helper_fldt(addr);
-        ST(i) = tmp;
-        addr += 16;
-    }
-
-    if (env->cr[4] & CR4_OSFXSR_MASK) {
-        /* XXX: finish it */
-        env->mxcsr = ldl(ptr + 0x18);
-        //ldl(ptr + 0x1c);
-        if (env->hflags & HF_CS64_MASK)
-            nb_xmm_regs = 16;
-        else
-            nb_xmm_regs = 8;
-        addr = ptr + 0xa0;
-        /* Fast FXRESTORE leaves out the XMM registers */
-        if (!(env->efer & MSR_EFER_FFXSR)
-          || (env->hflags & HF_CPL_MASK)
-          || !(env->hflags & HF_LMA_MASK)) {
-            for(i = 0; i < nb_xmm_regs; i++) {
-                env->xmm_regs[i].XMM_Q(0) = ldq(addr);
-                env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
-                addr += 16;
-            }
-        }
-    }
-}
-
-void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
-{
-    CPU_LDoubleU temp;
-
-    temp.d = f;
-    *pmant = temp.l.lower;
-    *pexp = temp.l.upper;
-}
-
-floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
-{
-    CPU_LDoubleU temp;
-
-    temp.l.upper = upper;
-    temp.l.lower = mant;
-    return temp.d;
-}
-
-#ifdef TARGET_X86_64
-
-//#define DEBUG_MULDIV
-
-static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
-{
-    *plow += a;
-    /* carry test */
-    if (*plow < a)
-        (*phigh)++;
-    *phigh += b;
-}
-
-static void neg128(uint64_t *plow, uint64_t *phigh)
-{
-    *plow = ~ *plow;
-    *phigh = ~ *phigh;
-    add128(plow, phigh, 1, 0);
-}
-
-/* return TRUE if overflow */
-static int div64(uint64_t *plow, uint64_t *phigh, uint64_t b)
-{
-    uint64_t q, r, a1, a0;
-    int i, qb, ab;
-
-    a0 = *plow;
-    a1 = *phigh;
-    if (a1 == 0) {
-        q = a0 / b;
-        r = a0 % b;
-        *plow = q;
-        *phigh = r;
-    } else {
-        if (a1 >= b)
-            return 1;
-        /* XXX: use a better algorithm */
-        for(i = 0; i < 64; i++) {
-            ab = a1 >> 63;
-            a1 = (a1 << 1) | (a0 >> 63);
-            if (ab || a1 >= b) {
-                a1 -= b;
-                qb = 1;
-            } else {
-                qb = 0;
-            }
-            a0 = (a0 << 1) | qb;
-        }
-#if defined(DEBUG_MULDIV)
-        printf("div: 0x%016" PRIx64 "%016" PRIx64 " / 0x%016" PRIx64 ": q=0x%016" PRIx64 " r=0x%016" PRIx64 "\n",
-               *phigh, *plow, b, a0, a1);
-#endif
-        *plow = a0;
-        *phigh = a1;
-    }
-    return 0;
-}
-
-/* return TRUE if overflow */
-static int idiv64(uint64_t *plow, uint64_t *phigh, int64_t b)
-{
-    int sa, sb;
-    sa = ((int64_t)*phigh < 0);
-    if (sa)
-        neg128(plow, phigh);
-    sb = (b < 0);
-    if (sb)
-        b = -b;
-    if (div64(plow, phigh, b) != 0)
-        return 1;
-    if (sa ^ sb) {
-        if (*plow > (1ULL << 63))
-            return 1;
-        *plow = - *plow;
-    } else {
-        if (*plow >= (1ULL << 63))
-            return 1;
-    }
-    if (sa)
-        *phigh = - *phigh;
-    return 0;
-}
-
-void helper_mulq_EAX_T0(target_ulong t0)
-{
-    uint64_t r0, r1;
-
-    mulu64(&r0, &r1, EAX, t0);
-    EAX = r0;
-    EDX = r1;
-    CC_DST = r0;
-    CC_SRC = r1;
-}
-
-void helper_imulq_EAX_T0(target_ulong t0)
-{
-    uint64_t r0, r1;
-
-    muls64(&r0, &r1, EAX, t0);
-    EAX = r0;
-    EDX = r1;
-    CC_DST = r0;
-    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
-}
-
-target_ulong helper_imulq_T0_T1(target_ulong t0, target_ulong t1)
-{
-    uint64_t r0, r1;
-
-    muls64(&r0, &r1, t0, t1);
-    CC_DST = r0;
-    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
-    return r0;
-}
-
-void helper_divq_EAX(target_ulong t0)
-{
-    uint64_t r0, r1;
-    if (t0 == 0) {
-        raise_exception(EXCP00_DIVZ);
-    }
-    r0 = EAX;
-    r1 = EDX;
-    if (div64(&r0, &r1, t0))
-        raise_exception(EXCP00_DIVZ);
-    EAX = r0;
-    EDX = r1;
-}
-
-void helper_idivq_EAX(target_ulong t0)
-{
-    uint64_t r0, r1;
-    if (t0 == 0) {
-        raise_exception(EXCP00_DIVZ);
-    }
-    r0 = EAX;
-    r1 = EDX;
-    if (idiv64(&r0, &r1, t0))
-        raise_exception(EXCP00_DIVZ);
-    EAX = r0;
-    EDX = r1;
-}
-#endif
-
-static void do_hlt(void)
-{
-    env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
-    env->halted = 1;
-    env->exception_index = EXCP_HLT;
-    cpu_loop_exit(env);
-}
-
-void helper_hlt(int next_eip_addend)
-{
-    helper_svm_check_intercept_param(SVM_EXIT_HLT, 0);
-    EIP += next_eip_addend;
-
-    do_hlt();
-}
-
-void helper_monitor(target_ulong ptr)
-{
-    if ((uint32_t)ECX != 0)
-        raise_exception(EXCP0D_GPF);
-    /* XXX: store address ? */
-    helper_svm_check_intercept_param(SVM_EXIT_MONITOR, 0);
-}
-
-void helper_mwait(int next_eip_addend)
-{
-    if ((uint32_t)ECX != 0)
-        raise_exception(EXCP0D_GPF);
-    helper_svm_check_intercept_param(SVM_EXIT_MWAIT, 0);
-    EIP += next_eip_addend;
-
-    /* XXX: not complete but not completely erroneous */
-    if (env->cpu_index != 0 || env->next_cpu != NULL) {
-        /* more than one CPU: do not sleep because another CPU may
-           wake this one */
-    } else {
-        do_hlt();
-    }
-}
-
-void helper_debug(void)
-{
-    env->exception_index = EXCP_DEBUG;
-    cpu_loop_exit(env);
-}
-
-void helper_reset_rf(void)
-{
-    env->eflags &= ~RF_MASK;
-}
-
-void helper_raise_interrupt(int intno, int next_eip_addend)
-{
-    raise_interrupt(intno, 1, 0, next_eip_addend);
-}
-
-void helper_raise_exception(int exception_index)
-{
-    raise_exception(exception_index);
-}
-
-void helper_cli(void)
-{
-    env->eflags &= ~IF_MASK;
-}
-
-void helper_sti(void)
-{
-    env->eflags |= IF_MASK;
-}
-
-#if 0
-/* vm86plus instructions */
-void helper_cli_vm(void)
-{
-    env->eflags &= ~VIF_MASK;
-}
-
-void helper_sti_vm(void)
-{
-    env->eflags |= VIF_MASK;
-    if (env->eflags & VIP_MASK) {
-        raise_exception(EXCP0D_GPF);
-    }
-}
-#endif
-
-void helper_set_inhibit_irq(void)
-{
-    env->hflags |= HF_INHIBIT_IRQ_MASK;
-}
-
-void helper_reset_inhibit_irq(void)
-{
-    env->hflags &= ~HF_INHIBIT_IRQ_MASK;
-}
-
-void helper_boundw(target_ulong a0, int v)
-{
-    int low, high;
-    low = ldsw(a0);
-    high = ldsw(a0 + 2);
-    v = (int16_t)v;
-    if (v < low || v > high) {
-        raise_exception(EXCP05_BOUND);
-    }
-}
-
-void helper_boundl(target_ulong a0, int v)
-{
-    int low, high;
-    low = ldl(a0);
-    high = ldl(a0 + 4);
-    if (v < low || v > high) {
-        raise_exception(EXCP05_BOUND);
-    }
-}
-
-static float approx_rsqrt(float a)
-{
-    return 1.0 / sqrt(a);
-}
-
-static float approx_rcp(float a)
-{
-    return 1.0 / a;
-}
-
-#if !defined(CONFIG_USER_ONLY)
-
-#define MMUSUFFIX _mmu
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
-#endif
-
-#if !defined(CONFIG_USER_ONLY)
-/* try to fill the TLB and return an exception if error. If retaddr is
-   NULL, it means that the function was called in C code (i.e. not
-   from generated code or from helper.c) */
-/* XXX: fix it to restore all registers */
-void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
-{
-    TranslationBlock *tb;
-    int ret;
-    unsigned long pc;
-    CPUX86State *saved_env;
-
-    /* XXX: hack to restore env in all cases, even if not called from
-       generated code */
-    saved_env = env;
-    env = cpu_single_env;
-
-    ret = cpu_x86_handle_mmu_fault(env, addr, is_write, mmu_idx, 1);
-    if (ret) {
-        if (retaddr) {
-            /* now we have a real cpu fault */
-            pc = (unsigned long)retaddr;
-            tb = tb_find_pc(pc);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, pc);
-            }
-        }
-        raise_exception_err(env->exception_index, env->error_code);
-    }
-    env = saved_env;
-}
-#endif
-
-/* Secure Virtual Machine helpers */
-
-#if defined(CONFIG_USER_ONLY)
-
-void helper_vmrun(int aflag, int next_eip_addend)
-{
-}
-void helper_vmmcall(void)
-{
-}
-void helper_vmload(int aflag)
-{
-}
-void helper_vmsave(int aflag)
-{
-}
-void helper_stgi(void)
-{
-}
-void helper_clgi(void)
-{
-}
-void helper_skinit(void)
-{
-}
-void helper_invlpga(int aflag)
-{
-}
-void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
-{
-}
-void helper_svm_check_intercept_param(uint32_t type, uint64_t param)
-{
-}
-
-void helper_svm_check_io(uint32_t port, uint32_t param,
-                         uint32_t next_eip_addend)
-{
-}
-#else
-
-static inline void svm_save_seg(hwaddr addr,
-                                const SegmentCache *sc)
-{
-    stw_phys(addr + offsetof(struct vmcb_seg, selector),
-             sc->selector);
-    stq_phys(addr + offsetof(struct vmcb_seg, base),
-             sc->base);
-    stl_phys(addr + offsetof(struct vmcb_seg, limit),
-             sc->limit);
-    stw_phys(addr + offsetof(struct vmcb_seg, attrib),
-             ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00));
-}
-
-static inline void svm_load_seg(hwaddr addr, SegmentCache *sc)
-{
-    unsigned int flags;
-
-    sc->selector = lduw_phys(addr + offsetof(struct vmcb_seg, selector));
-    sc->base = ldq_phys(addr + offsetof(struct vmcb_seg, base));
-    sc->limit = ldl_phys(addr + offsetof(struct vmcb_seg, limit));
-    flags = lduw_phys(addr + offsetof(struct vmcb_seg, attrib));
-    sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12);
-}
-
-static inline void svm_load_seg_cache(hwaddr addr,
-                                      CPUX86State *env, int seg_reg)
-{
-    SegmentCache sc1, *sc = &sc1;
-    svm_load_seg(addr, sc);
-    cpu_x86_load_seg_cache(env, seg_reg, sc->selector,
-                           sc->base, sc->limit, sc->flags);
-}
-
-void helper_vmrun(int aflag, int next_eip_addend)
-{
-    target_ulong addr;
-    uint32_t event_inj;
-    uint32_t int_ctl;
-
-    helper_svm_check_intercept_param(SVM_EXIT_VMRUN, 0);
-
-    if (aflag == 2)
-        addr = EAX;
-    else
-        addr = (uint32_t)EAX;
-
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmrun! " TARGET_FMT_lx "\n", addr);
-
-    env->vm_vmcb = addr;
-
-    /* save the current CPU state in the hsave page */
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base), env->gdt.base);
-    stl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit);
-
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base), env->idt.base);
-    stl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit), env->idt.limit);
-
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0), env->cr[0]);
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr2), env->cr[2]);
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3), env->cr[3]);
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4), env->cr[4]);
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6), env->dr[6]);
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7), env->dr[7]);
-
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer), env->efer);
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags), compute_eflags());
-
-    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.es),
-                  &env->segs[R_ES]);
-    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.cs),
-                 &env->segs[R_CS]);
-    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ss),
-                 &env->segs[R_SS]);
-    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ds),
-                 &env->segs[R_DS]);
-
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip),
-             EIP + next_eip_addend);
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp), ESP);
-    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax), EAX);
-
-    /* load the interception bitmaps so we do not need to access the
-       vmcb in svm mode */
-    env->intercept            = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept));
-    env->intercept_cr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_read));
-    env->intercept_cr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write));
-    env->intercept_dr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read));
-    env->intercept_dr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write));
-    env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
-
-    /* enable intercepts */
-    env->hflags |= HF_SVMI_MASK;
-
-    env->tsc_offset = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.tsc_offset));
-
-    env->gdt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
-
-    env->idt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
-
-    /* clear exit_info_2 so we behave like the real hardware */
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
-
-    cpu_x86_update_cr0(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0)));
-    cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
-    cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
-    env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
-    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
-    env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK);
-    if (int_ctl & V_INTR_MASKING_MASK) {
-        env->v_tpr = int_ctl & V_TPR_MASK;
-        env->hflags2 |= HF2_VINTR_MASK;
-        if (env->eflags & IF_MASK)
-            env->hflags2 |= HF2_HIF_MASK;
-    }
-
-    cpu_load_efer(env,
-                  ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer)));
-    env->eflags = 0;
-    load_eflags(ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)),
-                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    CC_OP = CC_OP_EFLAGS;
-
-    svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.es),
-                       env, R_ES);
-    svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.cs),
-                       env, R_CS);
-    svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ss),
-                       env, R_SS);
-    svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ds),
-                       env, R_DS);
-
-    EIP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip));
-    env->eip = EIP;
-    ESP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp));
-    EAX = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax));
-    env->dr[7] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7));
-    env->dr[6] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6));
-    cpu_x86_set_cpl(env, ldub_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl)));
-
-    /* FIXME: guest state consistency checks */
-
-    switch(ldub_phys(env->vm_vmcb + offsetof(struct vmcb, control.tlb_ctl))) {
-        case TLB_CONTROL_DO_NOTHING:
-            break;
-        case TLB_CONTROL_FLUSH_ALL_ASID:
-            /* FIXME: this is not 100% correct but should work for now */
-            tlb_flush(env, 1);
-        break;
-    }
-
-    env->hflags2 |= HF2_GIF_MASK;
-
-    if (int_ctl & V_IRQ_MASK) {
-        env->interrupt_request |= CPU_INTERRUPT_VIRQ;
-    }
-
-    /* maybe we need to inject an event */
-    event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
-    if (event_inj & SVM_EVTINJ_VALID) {
-        uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
-        uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
-        uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
-
-        qemu_log_mask(CPU_LOG_TB_IN_ASM, "Injecting(%#hx): ", valid_err);
-        /* FIXME: need to implement valid_err */
-        switch (event_inj & SVM_EVTINJ_TYPE_MASK) {
-        case SVM_EVTINJ_TYPE_INTR:
-                env->exception_index = vector;
-                env->error_code = event_inj_err;
-                env->exception_is_int = 0;
-                env->exception_next_eip = -1;
-                qemu_log_mask(CPU_LOG_TB_IN_ASM, "INTR");
-                /* XXX: is it always correct ? */
-                do_interrupt(vector, 0, 0, 0, 1);
-                break;
-        case SVM_EVTINJ_TYPE_NMI:
-                env->exception_index = EXCP02_NMI;
-                env->error_code = event_inj_err;
-                env->exception_is_int = 0;
-                env->exception_next_eip = EIP;
-                qemu_log_mask(CPU_LOG_TB_IN_ASM, "NMI");
-                cpu_loop_exit(env);
-                break;
-        case SVM_EVTINJ_TYPE_EXEPT:
-                env->exception_index = vector;
-                env->error_code = event_inj_err;
-                env->exception_is_int = 0;
-                env->exception_next_eip = -1;
-                qemu_log_mask(CPU_LOG_TB_IN_ASM, "EXEPT");
-                cpu_loop_exit(env);
-                break;
-        case SVM_EVTINJ_TYPE_SOFT:
-                env->exception_index = vector;
-                env->error_code = event_inj_err;
-                env->exception_is_int = 1;
-                env->exception_next_eip = EIP;
-                qemu_log_mask(CPU_LOG_TB_IN_ASM, "SOFT");
-                cpu_loop_exit(env);
-                break;
-        }
-        qemu_log_mask(CPU_LOG_TB_IN_ASM, " %#x %#x\n", env->exception_index, env->error_code);
-    }
-}
-
-void helper_vmmcall(void)
-{
-    helper_svm_check_intercept_param(SVM_EXIT_VMMCALL, 0);
-    raise_exception(EXCP06_ILLOP);
-}
-
-void helper_vmload(int aflag)
-{
-    target_ulong addr;
-    helper_svm_check_intercept_param(SVM_EXIT_VMLOAD, 0);
-
-    if (aflag == 2)
-        addr = EAX;
-    else
-        addr = (uint32_t)EAX;
-
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmload! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
-                addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
-                env->segs[R_FS].base);
-
-    svm_load_seg_cache(addr + offsetof(struct vmcb, save.fs),
-                       env, R_FS);
-    svm_load_seg_cache(addr + offsetof(struct vmcb, save.gs),
-                       env, R_GS);
-    svm_load_seg(addr + offsetof(struct vmcb, save.tr),
-                 &env->tr);
-    svm_load_seg(addr + offsetof(struct vmcb, save.ldtr),
-                 &env->ldt);
-
-#ifdef TARGET_X86_64
-    env->kernelgsbase = ldq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base));
-    env->lstar = ldq_phys(addr + offsetof(struct vmcb, save.lstar));
-    env->cstar = ldq_phys(addr + offsetof(struct vmcb, save.cstar));
-    env->fmask = ldq_phys(addr + offsetof(struct vmcb, save.sfmask));
-#endif
-    env->star = ldq_phys(addr + offsetof(struct vmcb, save.star));
-    env->sysenter_cs = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_cs));
-    env->sysenter_esp = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_esp));
-    env->sysenter_eip = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_eip));
-}
-
-void helper_vmsave(int aflag)
-{
-    target_ulong addr;
-    helper_svm_check_intercept_param(SVM_EXIT_VMSAVE, 0);
-
-    if (aflag == 2)
-        addr = EAX;
-    else
-        addr = (uint32_t)EAX;
-
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmsave! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
-                addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
-                env->segs[R_FS].base);
-
-    svm_save_seg(addr + offsetof(struct vmcb, save.fs),
-                 &env->segs[R_FS]);
-    svm_save_seg(addr + offsetof(struct vmcb, save.gs),
-                 &env->segs[R_GS]);
-    svm_save_seg(addr + offsetof(struct vmcb, save.tr),
-                 &env->tr);
-    svm_save_seg(addr + offsetof(struct vmcb, save.ldtr),
-                 &env->ldt);
-
-#ifdef TARGET_X86_64
-    stq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base), env->kernelgsbase);
-    stq_phys(addr + offsetof(struct vmcb, save.lstar), env->lstar);
-    stq_phys(addr + offsetof(struct vmcb, save.cstar), env->cstar);
-    stq_phys(addr + offsetof(struct vmcb, save.sfmask), env->fmask);
-#endif
-    stq_phys(addr + offsetof(struct vmcb, save.star), env->star);
-    stq_phys(addr + offsetof(struct vmcb, save.sysenter_cs), env->sysenter_cs);
-    stq_phys(addr + offsetof(struct vmcb, save.sysenter_esp), env->sysenter_esp);
-    stq_phys(addr + offsetof(struct vmcb, save.sysenter_eip), env->sysenter_eip);
-}
-
-void helper_stgi(void)
-{
-    helper_svm_check_intercept_param(SVM_EXIT_STGI, 0);
-    env->hflags2 |= HF2_GIF_MASK;
-}
-
-void helper_clgi(void)
-{
-    helper_svm_check_intercept_param(SVM_EXIT_CLGI, 0);
-    env->hflags2 &= ~HF2_GIF_MASK;
-}
-
-void helper_skinit(void)
-{
-    helper_svm_check_intercept_param(SVM_EXIT_SKINIT, 0);
-    /* XXX: not implemented */
-    raise_exception(EXCP06_ILLOP);
-}
-
-void helper_invlpga(int aflag)
-{
-    target_ulong addr;
-    helper_svm_check_intercept_param(SVM_EXIT_INVLPGA, 0);
-
-    if (aflag == 2)
-        addr = EAX;
-    else
-        addr = (uint32_t)EAX;
-
-    /* XXX: could use the ASID to see if it is needed to do the
-       flush */
-    tlb_flush_page(env, addr);
-}
-
-void helper_svm_check_intercept_param(uint32_t type, uint64_t param)
-{
-    if (likely(!(env->hflags & HF_SVMI_MASK)))
-        return;
-    switch(type) {
-    case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR0 + 8:
-        if (env->intercept_cr_read & (1 << (type - SVM_EXIT_READ_CR0))) {
-            helper_vmexit(type, param);
-        }
-        break;
-    case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR0 + 8:
-        if (env->intercept_cr_write & (1 << (type - SVM_EXIT_WRITE_CR0))) {
-            helper_vmexit(type, param);
-        }
-        break;
-    case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR0 + 7:
-        if (env->intercept_dr_read & (1 << (type - SVM_EXIT_READ_DR0))) {
-            helper_vmexit(type, param);
-        }
-        break;
-    case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR0 + 7:
-        if (env->intercept_dr_write & (1 << (type - SVM_EXIT_WRITE_DR0))) {
-            helper_vmexit(type, param);
-        }
-        break;
-    case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 31:
-        if (env->intercept_exceptions & (1 << (type - SVM_EXIT_EXCP_BASE))) {
-            helper_vmexit(type, param);
-        }
-        break;
-    case SVM_EXIT_MSR:
-        if (env->intercept & (1ULL << (SVM_EXIT_MSR - SVM_EXIT_INTR))) {
-            /* FIXME: this should be read in at vmrun (faster this way?) */
-            uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.msrpm_base_pa));
-            uint32_t t0, t1;
-            switch((uint32_t)ECX) {
-            case 0 ... 0x1fff:
-                t0 = (ECX * 2) % 8;
-                t1 = ECX / 8;
-                break;
-            case 0xc0000000 ... 0xc0001fff:
-                t0 = (8192 + ECX - 0xc0000000) * 2;
-                t1 = (t0 / 8);
-                t0 %= 8;
-                break;
-            case 0xc0010000 ... 0xc0011fff:
-                t0 = (16384 + ECX - 0xc0010000) * 2;
-                t1 = (t0 / 8);
-                t0 %= 8;
-                break;
-            default:
-                helper_vmexit(type, param);
-                t0 = 0;
-                t1 = 0;
-                break;
-            }
-            if (ldub_phys(addr + t1) & ((1 << param) << t0))
-                helper_vmexit(type, param);
-        }
-        break;
-    default:
-        if (env->intercept & (1ULL << (type - SVM_EXIT_INTR))) {
-            helper_vmexit(type, param);
-        }
-        break;
-    }
-}
-
-void helper_svm_check_io(uint32_t port, uint32_t param,
-                         uint32_t next_eip_addend)
-{
-    if (env->intercept & (1ULL << (SVM_EXIT_IOIO - SVM_EXIT_INTR))) {
-        /* FIXME: this should be read in at vmrun (faster this way?) */
-        uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.iopm_base_pa));
-        uint16_t mask = (1 << ((param >> 4) & 7)) - 1;
-        if(lduw_phys(addr + port / 8) & (mask << (port & 7))) {
-            /* next EIP */
-            stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
-                     env->eip + next_eip_addend);
-            helper_vmexit(SVM_EXIT_IOIO, param | (port << 16));
-        }
-    }
-}
-
-/* Note: currently only 32 bits of exit_code are used */
-void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
-{
-    uint32_t int_ctl;
-
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016" PRIx64 ", " TARGET_FMT_lx ")!\n",
-                exit_code, exit_info_1,
-                ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2)),
-                EIP);
-
-    if(env->hflags & HF_INHIBIT_IRQ_MASK) {
-        stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), SVM_INTERRUPT_SHADOW_MASK);
-        env->hflags &= ~HF_INHIBIT_IRQ_MASK;
-    } else {
-        stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), 0);
-    }
-
-    /* Save the VM state in the vmcb */
-    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.es),
-                 &env->segs[R_ES]);
-    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.cs),
-                 &env->segs[R_CS]);
-    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ss),
-                 &env->segs[R_SS]);
-    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ds),
-                 &env->segs[R_DS]);
-
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base), env->gdt.base);
-    stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit);
-
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base), env->idt.base);
-    stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit), env->idt.limit);
-
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer), env->efer);
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0), env->cr[0]);
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2), env->cr[2]);
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
-
-    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
-    int_ctl &= ~(V_TPR_MASK | V_IRQ_MASK);
-    int_ctl |= env->v_tpr & V_TPR_MASK;
-    if (env->interrupt_request & CPU_INTERRUPT_VIRQ)
-        int_ctl |= V_IRQ_MASK;
-    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
-
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags), compute_eflags());
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip), env->eip);
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp), ESP);
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax), EAX);
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7), env->dr[7]);
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6), env->dr[6]);
-    stb_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl), env->hflags & HF_CPL_MASK);
-
-    /* Reload the host state from vm_hsave */
-    env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK);
-    env->hflags &= ~HF_SVMI_MASK;
-    env->intercept = 0;
-    env->intercept_exceptions = 0;
-    env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
-    env->tsc_offset = 0;
-
-    env->gdt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
-
-    env->idt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
-
-    cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
-    cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
-    cpu_x86_update_cr3(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3)));
-    /* we need to set the efer after the crs so the hidden flags get
-       set properly */
-    cpu_load_efer(env,
-                  ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer)));
-    env->eflags = 0;
-    load_eflags(ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)),
-                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    CC_OP = CC_OP_EFLAGS;
-
-    svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.es),
-                       env, R_ES);
-    svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.cs),
-                       env, R_CS);
-    svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ss),
-                       env, R_SS);
-    svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ds),
-                       env, R_DS);
-
-    EIP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip));
-    ESP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp));
-    EAX = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax));
-
-    env->dr[6] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6));
-    env->dr[7] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7));
-
-    /* other setups */
-    cpu_x86_set_cpl(env, 0);
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code);
-    stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), exit_info_1);
-
-    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info),
-             ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)));
-    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info_err),
-             ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err)));
-
-    env->hflags2 &= ~HF2_GIF_MASK;
-    /* FIXME: Resets the current ASID register to zero (host ASID). */
-
-    /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */
-
-    /* Clears the TSC_OFFSET inside the processor. */
-
-    /* If the host is in PAE mode, the processor reloads the host's PDPEs
-       from the page table indicated the host's CR3. If the PDPEs contain
-       illegal state, the processor causes a shutdown. */
-
-    /* Forces CR0.PE = 1, RFLAGS.VM = 0. */
-    env->cr[0] |= CR0_PE_MASK;
-    env->eflags &= ~VM_MASK;
-
-    /* Disables all breakpoints in the host DR7 register. */
-
-    /* Checks the reloaded host state for consistency. */
-
-    /* If the host's rIP reloaded by #VMEXIT is outside the limit of the
-       host's code segment or non-canonical (in the case of long mode), a
-       #GP fault is delivered inside the host.) */
-
-    /* remove any pending exception */
-    env->exception_index = -1;
-    env->error_code = 0;
-    env->old_exception = -1;
-
-    cpu_loop_exit(env);
-}
-
-#endif
-
-/* MMX/SSE */
-/* XXX: optimize by storing fptt and fptags in the static cpu state */
-void helper_enter_mmx(void)
-{
-    env->fpstt = 0;
-    memset(env->fptags, 0, sizeof(env->fptags));
-}
-
-void helper_emms(void)
-{
-    /* set to empty state */
-    memset(env->fptags, 1, sizeof(env->fptags));
-}
-
-/* XXX: suppress */
-void helper_movq(void *d, void *s)
-{
-    *(uint64_t *)d = *(uint64_t *)s;
-}
-
-#define SHIFT 0
-#include "ops_sse.h"
-
-#define SHIFT 1
-#include "ops_sse.h"
-
-#define SHIFT 0
-#include "helper_template.h"
-#undef SHIFT
-
-#define SHIFT 1
-#include "helper_template.h"
-#undef SHIFT
-
-#define SHIFT 2
-#include "helper_template.h"
-#undef SHIFT
-
-#ifdef TARGET_X86_64
-
-#define SHIFT 3
-#include "helper_template.h"
-#undef SHIFT
-
-#endif
-
-/* bit operations */
-target_ulong helper_bsf(target_ulong t0)
-{
-    int count;
-    target_ulong res;
-
-    res = t0;
-    count = 0;
-    while ((res & 1) == 0) {
-        count++;
-        res >>= 1;
-    }
-    return count;
-}
-
-target_ulong helper_bsr(target_ulong t0)
-{
-    int count;
-    target_ulong res, mask;
-
-    res = t0;
-    count = TARGET_LONG_BITS - 1;
-    mask = (target_ulong)1 << (TARGET_LONG_BITS - 1);
-    while ((res & mask) == 0) {
-        count--;
-        res <<= 1;
-    }
-    return count;
-}
-
-
-static int compute_all_eflags(void)
-{
-    return CC_SRC;
-}
-
-static int compute_c_eflags(void)
-{
-    return CC_SRC & CC_C;
-}
-
-uint32_t helper_cc_compute_all(int op)
-{
-    switch (op) {
-    default: /* should never happen */ return 0;
-
-    case CC_OP_EFLAGS: return compute_all_eflags();
-
-    case CC_OP_MULB: return compute_all_mulb();
-    case CC_OP_MULW: return compute_all_mulw();
-    case CC_OP_MULL: return compute_all_mull();
-
-    case CC_OP_ADDB: return compute_all_addb();
-    case CC_OP_ADDW: return compute_all_addw();
-    case CC_OP_ADDL: return compute_all_addl();
-
-    case CC_OP_ADCB: return compute_all_adcb();
-    case CC_OP_ADCW: return compute_all_adcw();
-    case CC_OP_ADCL: return compute_all_adcl();
-
-    case CC_OP_SUBB: return compute_all_subb();
-    case CC_OP_SUBW: return compute_all_subw();
-    case CC_OP_SUBL: return compute_all_subl();
-
-    case CC_OP_SBBB: return compute_all_sbbb();
-    case CC_OP_SBBW: return compute_all_sbbw();
-    case CC_OP_SBBL: return compute_all_sbbl();
-
-    case CC_OP_LOGICB: return compute_all_logicb();
-    case CC_OP_LOGICW: return compute_all_logicw();
-    case CC_OP_LOGICL: return compute_all_logicl();
-
-    case CC_OP_INCB: return compute_all_incb();
-    case CC_OP_INCW: return compute_all_incw();
-    case CC_OP_INCL: return compute_all_incl();
-
-    case CC_OP_DECB: return compute_all_decb();
-    case CC_OP_DECW: return compute_all_decw();
-    case CC_OP_DECL: return compute_all_decl();
-
-    case CC_OP_SHLB: return compute_all_shlb();
-    case CC_OP_SHLW: return compute_all_shlw();
-    case CC_OP_SHLL: return compute_all_shll();
-
-    case CC_OP_SARB: return compute_all_sarb();
-    case CC_OP_SARW: return compute_all_sarw();
-    case CC_OP_SARL: return compute_all_sarl();
-
-#ifdef TARGET_X86_64
-    case CC_OP_MULQ: return compute_all_mulq();
-
-    case CC_OP_ADDQ: return compute_all_addq();
-
-    case CC_OP_ADCQ: return compute_all_adcq();
-
-    case CC_OP_SUBQ: return compute_all_subq();
-
-    case CC_OP_SBBQ: return compute_all_sbbq();
-
-    case CC_OP_LOGICQ: return compute_all_logicq();
-
-    case CC_OP_INCQ: return compute_all_incq();
-
-    case CC_OP_DECQ: return compute_all_decq();
-
-    case CC_OP_SHLQ: return compute_all_shlq();
-
-    case CC_OP_SARQ: return compute_all_sarq();
-#endif
-    }
-}
-
-uint32_t helper_cc_compute_c(int op)
-{
-    switch (op) {
-    default: /* should never happen */ return 0;
-
-    case CC_OP_EFLAGS: return compute_c_eflags();
-
-    case CC_OP_MULB: return compute_c_mull();
-    case CC_OP_MULW: return compute_c_mull();
-    case CC_OP_MULL: return compute_c_mull();
-
-    case CC_OP_ADDB: return compute_c_addb();
-    case CC_OP_ADDW: return compute_c_addw();
-    case CC_OP_ADDL: return compute_c_addl();
-
-    case CC_OP_ADCB: return compute_c_adcb();
-    case CC_OP_ADCW: return compute_c_adcw();
-    case CC_OP_ADCL: return compute_c_adcl();
-
-    case CC_OP_SUBB: return compute_c_subb();
-    case CC_OP_SUBW: return compute_c_subw();
-    case CC_OP_SUBL: return compute_c_subl();
-
-    case CC_OP_SBBB: return compute_c_sbbb();
-    case CC_OP_SBBW: return compute_c_sbbw();
-    case CC_OP_SBBL: return compute_c_sbbl();
-
-    case CC_OP_LOGICB: return compute_c_logicb();
-    case CC_OP_LOGICW: return compute_c_logicw();
-    case CC_OP_LOGICL: return compute_c_logicl();
-
-    case CC_OP_INCB: return compute_c_incl();
-    case CC_OP_INCW: return compute_c_incl();
-    case CC_OP_INCL: return compute_c_incl();
-
-    case CC_OP_DECB: return compute_c_incl();
-    case CC_OP_DECW: return compute_c_incl();
-    case CC_OP_DECL: return compute_c_incl();
-
-    case CC_OP_SHLB: return compute_c_shlb();
-    case CC_OP_SHLW: return compute_c_shlw();
-    case CC_OP_SHLL: return compute_c_shll();
-
-    case CC_OP_SARB: return compute_c_sarl();
-    case CC_OP_SARW: return compute_c_sarl();
-    case CC_OP_SARL: return compute_c_sarl();
-
-#ifdef TARGET_X86_64
-    case CC_OP_MULQ: return compute_c_mull();
-
-    case CC_OP_ADDQ: return compute_c_addq();
-
-    case CC_OP_ADCQ: return compute_c_adcq();
-
-    case CC_OP_SUBQ: return compute_c_subq();
-
-    case CC_OP_SBBQ: return compute_c_sbbq();
-
-    case CC_OP_LOGICQ: return compute_c_logicq();
-
-    case CC_OP_INCQ: return compute_c_incl();
-
-    case CC_OP_DECQ: return compute_c_incl();
-
-    case CC_OP_SHLQ: return compute_c_shlq();
-
-    case CC_OP_SARQ: return compute_c_sarl();
-#endif
-    }
-}
diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h
index 35ac211..eb24b5f 100644
--- a/target-i386/ops_sse.h
+++ b/target-i386/ops_sse.h
@@ -15,8 +15,7 @@
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #if SHIFT == 0
 #define Reg MMXReg
@@ -36,7 +35,7 @@
 #define SUFFIX _xmm
 #endif
 
-void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *s)
+void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int shift;
 
@@ -60,7 +59,7 @@
     }
 }
 
-void glue(helper_psraw, SUFFIX)(Reg *d, Reg *s)
+void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int shift;
 
@@ -81,7 +80,7 @@
 #endif
 }
 
-void glue(helper_psllw, SUFFIX)(Reg *d, Reg *s)
+void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int shift;
 
@@ -105,7 +104,7 @@
     }
 }
 
-void glue(helper_psrld, SUFFIX)(Reg *d, Reg *s)
+void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int shift;
 
@@ -125,7 +124,7 @@
     }
 }
 
-void glue(helper_psrad, SUFFIX)(Reg *d, Reg *s)
+void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int shift;
 
@@ -142,7 +141,7 @@
 #endif
 }
 
-void glue(helper_pslld, SUFFIX)(Reg *d, Reg *s)
+void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int shift;
 
@@ -162,7 +161,7 @@
     }
 }
 
-void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *s)
+void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int shift;
 
@@ -180,7 +179,7 @@
     }
 }
 
-void glue(helper_psllq, SUFFIX)(Reg *d, Reg *s)
+void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int shift;
 
@@ -199,130 +198,140 @@
 }
 
 #if SHIFT == 1
-void glue(helper_psrldq, SUFFIX)(Reg *d, Reg *s)
+void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int shift, i;
 
     shift = s->L(0);
-    if (shift > 16)
+    if (shift > 16) {
         shift = 16;
-    for(i = 0; i < 16 - shift; i++)
+    }
+    for (i = 0; i < 16 - shift; i++) {
         d->B(i) = d->B(i + shift);
-    for(i = 16 - shift; i < 16; i++)
+    }
+    for (i = 16 - shift; i < 16; i++) {
         d->B(i) = 0;
+    }
 }
 
-void glue(helper_pslldq, SUFFIX)(Reg *d, Reg *s)
+void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int shift, i;
 
     shift = s->L(0);
-    if (shift > 16)
+    if (shift > 16) {
         shift = 16;
-    for(i = 15; i >= shift; i--)
+    }
+    for (i = 15; i >= shift; i--) {
         d->B(i) = d->B(i - shift);
-    for(i = 0; i < shift; i++)
+    }
+    for (i = 0; i < shift; i++) {
         d->B(i) = 0;
+    }
 }
 #endif
 
-#define SSE_HELPER_B(name, F)\
-void glue(name, SUFFIX) (Reg *d, Reg *s)\
-{\
-    d->B(0) = F(d->B(0), s->B(0));\
-    d->B(1) = F(d->B(1), s->B(1));\
-    d->B(2) = F(d->B(2), s->B(2));\
-    d->B(3) = F(d->B(3), s->B(3));\
-    d->B(4) = F(d->B(4), s->B(4));\
-    d->B(5) = F(d->B(5), s->B(5));\
-    d->B(6) = F(d->B(6), s->B(6));\
-    d->B(7) = F(d->B(7), s->B(7));\
-    XMM_ONLY(\
-    d->B(8) = F(d->B(8), s->B(8));\
-    d->B(9) = F(d->B(9), s->B(9));\
-    d->B(10) = F(d->B(10), s->B(10));\
-    d->B(11) = F(d->B(11), s->B(11));\
-    d->B(12) = F(d->B(12), s->B(12));\
-    d->B(13) = F(d->B(13), s->B(13));\
-    d->B(14) = F(d->B(14), s->B(14));\
-    d->B(15) = F(d->B(15), s->B(15));\
-    )\
-}
+#define SSE_HELPER_B(name, F)                                   \
+    void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
+    {                                                           \
+        d->B(0) = F(d->B(0), s->B(0));                          \
+        d->B(1) = F(d->B(1), s->B(1));                          \
+        d->B(2) = F(d->B(2), s->B(2));                          \
+        d->B(3) = F(d->B(3), s->B(3));                          \
+        d->B(4) = F(d->B(4), s->B(4));                          \
+        d->B(5) = F(d->B(5), s->B(5));                          \
+        d->B(6) = F(d->B(6), s->B(6));                          \
+        d->B(7) = F(d->B(7), s->B(7));                          \
+        XMM_ONLY(                                               \
+                 d->B(8) = F(d->B(8), s->B(8));                 \
+                 d->B(9) = F(d->B(9), s->B(9));                 \
+                 d->B(10) = F(d->B(10), s->B(10));              \
+                 d->B(11) = F(d->B(11), s->B(11));              \
+                 d->B(12) = F(d->B(12), s->B(12));              \
+                 d->B(13) = F(d->B(13), s->B(13));              \
+                 d->B(14) = F(d->B(14), s->B(14));              \
+                 d->B(15) = F(d->B(15), s->B(15));              \
+                                                        )       \
+            }
 
-#define SSE_HELPER_W(name, F)\
-void glue(name, SUFFIX) (Reg *d, Reg *s)\
-{\
-    d->W(0) = F(d->W(0), s->W(0));\
-    d->W(1) = F(d->W(1), s->W(1));\
-    d->W(2) = F(d->W(2), s->W(2));\
-    d->W(3) = F(d->W(3), s->W(3));\
-    XMM_ONLY(\
-    d->W(4) = F(d->W(4), s->W(4));\
-    d->W(5) = F(d->W(5), s->W(5));\
-    d->W(6) = F(d->W(6), s->W(6));\
-    d->W(7) = F(d->W(7), s->W(7));\
-    )\
-}
+#define SSE_HELPER_W(name, F)                                   \
+    void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
+    {                                                           \
+        d->W(0) = F(d->W(0), s->W(0));                          \
+        d->W(1) = F(d->W(1), s->W(1));                          \
+        d->W(2) = F(d->W(2), s->W(2));                          \
+        d->W(3) = F(d->W(3), s->W(3));                          \
+        XMM_ONLY(                                               \
+                 d->W(4) = F(d->W(4), s->W(4));                 \
+                 d->W(5) = F(d->W(5), s->W(5));                 \
+                 d->W(6) = F(d->W(6), s->W(6));                 \
+                 d->W(7) = F(d->W(7), s->W(7));                 \
+                                                        )       \
+            }
 
-#define SSE_HELPER_L(name, F)\
-void glue(name, SUFFIX) (Reg *d, Reg *s)\
-{\
-    d->L(0) = F(d->L(0), s->L(0));\
-    d->L(1) = F(d->L(1), s->L(1));\
-    XMM_ONLY(\
-    d->L(2) = F(d->L(2), s->L(2));\
-    d->L(3) = F(d->L(3), s->L(3));\
-    )\
-}
+#define SSE_HELPER_L(name, F)                                   \
+    void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
+    {                                                           \
+        d->L(0) = F(d->L(0), s->L(0));                          \
+        d->L(1) = F(d->L(1), s->L(1));                          \
+        XMM_ONLY(                                               \
+                 d->L(2) = F(d->L(2), s->L(2));                 \
+                 d->L(3) = F(d->L(3), s->L(3));                 \
+                                                        )       \
+            }
 
-#define SSE_HELPER_Q(name, F)\
-void glue(name, SUFFIX) (Reg *d, Reg *s)\
-{\
-    d->Q(0) = F(d->Q(0), s->Q(0));\
-    XMM_ONLY(\
-    d->Q(1) = F(d->Q(1), s->Q(1));\
-    )\
-}
+#define SSE_HELPER_Q(name, F)                                   \
+    void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
+    {                                                           \
+        d->Q(0) = F(d->Q(0), s->Q(0));                          \
+        XMM_ONLY(                                               \
+                 d->Q(1) = F(d->Q(1), s->Q(1));                 \
+                                                        )       \
+            }
 
 #if SHIFT == 0
 static inline int satub(int x)
 {
-    if (x < 0)
+    if (x < 0) {
         return 0;
-    else if (x > 255)
+    } else if (x > 255) {
         return 255;
-    else
+    } else {
         return x;
+    }
 }
 
 static inline int satuw(int x)
 {
-    if (x < 0)
+    if (x < 0) {
         return 0;
-    else if (x > 65535)
+    } else if (x > 65535) {
         return 65535;
-    else
+    } else {
         return x;
+    }
 }
 
 static inline int satsb(int x)
 {
-    if (x < -128)
+    if (x < -128) {
         return -128;
-    else if (x > 127)
+    } else if (x > 127) {
         return 127;
-    else
+    } else {
         return x;
+    }
 }
 
 static inline int satsw(int x)
 {
-    if (x < -32768)
+    if (x < -32768) {
         return -32768;
-    else if (x > 32767)
+    } else if (x > 32767) {
         return 32767;
-    else
+    } else {
         return x;
+    }
 }
 
 #define FADD(a, b) ((a) + (b))
@@ -341,22 +350,22 @@
 #define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
 #define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
 
-#define FAND(a, b) (a) & (b)
+#define FAND(a, b) ((a) & (b))
 #define FANDN(a, b) ((~(a)) & (b))
-#define FOR(a, b) (a) | (b)
-#define FXOR(a, b) (a) ^ (b)
+#define FOR(a, b) ((a) | (b))
+#define FXOR(a, b) ((a) ^ (b))
 
-#define FCMPGTB(a, b) (int8_t)(a) > (int8_t)(b) ? -1 : 0
-#define FCMPGTW(a, b) (int16_t)(a) > (int16_t)(b) ? -1 : 0
-#define FCMPGTL(a, b) (int32_t)(a) > (int32_t)(b) ? -1 : 0
-#define FCMPEQ(a, b) (a) == (b) ? -1 : 0
+#define FCMPGTB(a, b) ((int8_t)(a) > (int8_t)(b) ? -1 : 0)
+#define FCMPGTW(a, b) ((int16_t)(a) > (int16_t)(b) ? -1 : 0)
+#define FCMPGTL(a, b) ((int32_t)(a) > (int32_t)(b) ? -1 : 0)
+#define FCMPEQ(a, b) ((a) == (b) ? -1 : 0)
 
-#define FMULLW(a, b) (a) * (b)
-#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16
-#define FMULHUW(a, b) (a) * (b) >> 16
-#define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16
+#define FMULLW(a, b) ((a) * (b))
+#define FMULHRW(a, b) (((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16)
+#define FMULHUW(a, b) ((a) * (b) >> 16)
+#define FMULHW(a, b) ((int16_t)(a) * (int16_t)(b) >> 16)
 
-#define FAVG(a, b) ((a) + (b) + 1) >> 1
+#define FAVG(a, b) (((a) + (b) + 1) >> 1)
 #endif
 
 SSE_HELPER_B(helper_paddb, FADD)
@@ -408,7 +417,7 @@
 SSE_HELPER_B(helper_pavgb, FAVG)
 SSE_HELPER_W(helper_pavgw, FAVG)
 
-void glue(helper_pmuludq, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_pmuludq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0);
 #if SHIFT == 1
@@ -416,26 +425,27 @@
 #endif
 }
 
-void glue(helper_pmaddwd, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_pmaddwd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int i;
 
-    for(i = 0; i < (2 << SHIFT); i++) {
-        d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) +
-            (int16_t)s->W(2*i+1) * (int16_t)d->W(2*i+1);
+    for (i = 0; i < (2 << SHIFT); i++) {
+        d->L(i) = (int16_t)s->W(2 * i) * (int16_t)d->W(2 * i) +
+            (int16_t)s->W(2 * i + 1) * (int16_t)d->W(2 * i + 1);
     }
 }
 
 #if SHIFT == 0
 static inline int abs1(int a)
 {
-    if (a < 0)
+    if (a < 0) {
         return -a;
-    else
+    } else {
         return a;
+    }
 }
 #endif
-void glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_psadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     unsigned int val;
 
@@ -463,16 +473,19 @@
 #endif
 }
 
-void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s, target_ulong a0)
+void glue(helper_maskmov, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                  target_ulong a0)
 {
     int i;
-    for(i = 0; i < (8 << SHIFT); i++) {
-        if (s->B(i) & 0x80)
-            stb(a0 + i, d->B(i));
+
+    for (i = 0; i < (8 << SHIFT); i++) {
+        if (s->B(i) & 0x80) {
+            cpu_stb_data(env, a0 + i, d->B(i));
+        }
     }
 }
 
-void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val)
+void glue(helper_movl_mm_T0, SUFFIX)(Reg *d, uint32_t val)
 {
     d->L(0) = val;
     d->L(1) = 0;
@@ -482,7 +495,7 @@
 }
 
 #ifdef TARGET_X86_64
-void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val)
+void glue(helper_movq_mm_T0, SUFFIX)(Reg *d, uint64_t val)
 {
     d->Q(0) = val;
 #if SHIFT == 1
@@ -492,9 +505,10 @@
 #endif
 
 #if SHIFT == 0
-void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order)
+void glue(helper_pshufw, SUFFIX)(Reg *d, Reg *s, int order)
 {
     Reg r;
+
     r.W(0) = s->W(order & 3);
     r.W(1) = s->W((order >> 2) & 3);
     r.W(2) = s->W((order >> 4) & 3);
@@ -505,6 +519,7 @@
 void helper_shufps(Reg *d, Reg *s, int order)
 {
     Reg r;
+
     r.L(0) = d->L(order & 3);
     r.L(1) = d->L((order >> 2) & 3);
     r.L(2) = s->L((order >> 4) & 3);
@@ -515,14 +530,16 @@
 void helper_shufpd(Reg *d, Reg *s, int order)
 {
     Reg r;
+
     r.Q(0) = d->Q(order & 1);
     r.Q(1) = s->Q((order >> 1) & 1);
     *d = r;
 }
 
-void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order)
+void glue(helper_pshufd, SUFFIX)(Reg *d, Reg *s, int order)
 {
     Reg r;
+
     r.L(0) = s->L(order & 3);
     r.L(1) = s->L((order >> 2) & 3);
     r.L(2) = s->L((order >> 4) & 3);
@@ -530,9 +547,10 @@
     *d = r;
 }
 
-void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order)
+void glue(helper_pshuflw, SUFFIX)(Reg *d, Reg *s, int order)
 {
     Reg r;
+
     r.W(0) = s->W(order & 3);
     r.W(1) = s->W((order >> 2) & 3);
     r.W(2) = s->W((order >> 4) & 3);
@@ -541,9 +559,10 @@
     *d = r;
 }
 
-void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order)
+void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order)
 {
     Reg r;
+
     r.Q(0) = s->Q(0);
     r.W(4) = s->W(4 + (order & 3));
     r.W(5) = s->W(4 + ((order >> 2) & 3));
@@ -557,38 +576,46 @@
 /* FPU ops */
 /* XXX: not accurate */
 
-#define SSE_HELPER_S(name, F)\
-void helper_ ## name ## ps (Reg *d, Reg *s)\
-{\
-    d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
-    d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
-    d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
-    d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
-}\
-\
-void helper_ ## name ## ss (Reg *d, Reg *s)\
-{\
-    d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
-}\
-void helper_ ## name ## pd (Reg *d, Reg *s)\
-{\
-    d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
-    d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
-}\
-\
-void helper_ ## name ## sd (Reg *d, Reg *s)\
-{\
-    d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
-}
+#define SSE_HELPER_S(name, F)                                           \
+    void helper_ ## name ## ps(CPUX86State *env, Reg *d, Reg *s)        \
+    {                                                                   \
+        d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));                  \
+        d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));                  \
+        d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));                  \
+        d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));                  \
+    }                                                                   \
+                                                                        \
+    void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s)        \
+    {                                                                   \
+        d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));                  \
+    }                                                                   \
+                                                                        \
+    void helper_ ## name ## pd(CPUX86State *env, Reg *d, Reg *s)        \
+    {                                                                   \
+        d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));                  \
+        d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));                  \
+    }                                                                   \
+                                                                        \
+    void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s)        \
+    {                                                                   \
+        d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));                  \
+    }
 
 #define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status)
 #define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status)
 #define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status)
 #define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status)
-#define FPU_MIN(size, a, b) (a) < (b) ? (a) : (b)
-#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b)
 #define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)
 
+/* Note that the choice of comparison op here is important to get the
+ * special cases right: for min and max Intel specifies that (-0,0),
+ * (NaN, anything) and (anything, NaN) return the second argument.
+ */
+#define FPU_MIN(size, a, b)                                     \
+    (float ## size ## _lt(a, b, &env->sse_status) ? (a) : (b))
+#define FPU_MAX(size, a, b)                                     \
+    (float ## size ## _lt(b, a, &env->sse_status) ? (a) : (b))
+
 SSE_HELPER_S(add, FPU_ADD)
 SSE_HELPER_S(sub, FPU_SUB)
 SSE_HELPER_S(mul, FPU_MUL)
@@ -599,34 +626,35 @@
 
 
 /* float to float conversions */
-void helper_cvtps2pd(Reg *d, Reg *s)
+void helper_cvtps2pd(CPUX86State *env, Reg *d, Reg *s)
 {
     float32 s0, s1;
+
     s0 = s->XMM_S(0);
     s1 = s->XMM_S(1);
     d->XMM_D(0) = float32_to_float64(s0, &env->sse_status);
     d->XMM_D(1) = float32_to_float64(s1, &env->sse_status);
 }
 
-void helper_cvtpd2ps(Reg *d, Reg *s)
+void helper_cvtpd2ps(CPUX86State *env, Reg *d, Reg *s)
 {
     d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
     d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status);
     d->Q(1) = 0;
 }
 
-void helper_cvtss2sd(Reg *d, Reg *s)
+void helper_cvtss2sd(CPUX86State *env, Reg *d, Reg *s)
 {
     d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status);
 }
 
-void helper_cvtsd2ss(Reg *d, Reg *s)
+void helper_cvtsd2ss(CPUX86State *env, Reg *d, Reg *s)
 {
     d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
 }
 
 /* integer to float */
-void helper_cvtdq2ps(Reg *d, Reg *s)
+void helper_cvtdq2ps(CPUX86State *env, Reg *d, Reg *s)
 {
     d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status);
     d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status);
@@ -634,51 +662,52 @@
     d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status);
 }
 
-void helper_cvtdq2pd(Reg *d, Reg *s)
+void helper_cvtdq2pd(CPUX86State *env, Reg *d, Reg *s)
 {
     int32_t l0, l1;
+
     l0 = (int32_t)s->XMM_L(0);
     l1 = (int32_t)s->XMM_L(1);
     d->XMM_D(0) = int32_to_float64(l0, &env->sse_status);
     d->XMM_D(1) = int32_to_float64(l1, &env->sse_status);
 }
 
-void helper_cvtpi2ps(XMMReg *d, MMXReg *s)
+void helper_cvtpi2ps(CPUX86State *env, XMMReg *d, MMXReg *s)
 {
     d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status);
     d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status);
 }
 
-void helper_cvtpi2pd(XMMReg *d, MMXReg *s)
+void helper_cvtpi2pd(CPUX86State *env, XMMReg *d, MMXReg *s)
 {
     d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status);
     d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status);
 }
 
-void helper_cvtsi2ss(XMMReg *d, uint32_t val)
+void helper_cvtsi2ss(CPUX86State *env, XMMReg *d, uint32_t val)
 {
     d->XMM_S(0) = int32_to_float32(val, &env->sse_status);
 }
 
-void helper_cvtsi2sd(XMMReg *d, uint32_t val)
+void helper_cvtsi2sd(CPUX86State *env, XMMReg *d, uint32_t val)
 {
     d->XMM_D(0) = int32_to_float64(val, &env->sse_status);
 }
 
 #ifdef TARGET_X86_64
-void helper_cvtsq2ss(XMMReg *d, uint64_t val)
+void helper_cvtsq2ss(CPUX86State *env, XMMReg *d, uint64_t val)
 {
     d->XMM_S(0) = int64_to_float32(val, &env->sse_status);
 }
 
-void helper_cvtsq2sd(XMMReg *d, uint64_t val)
+void helper_cvtsq2sd(CPUX86State *env, XMMReg *d, uint64_t val)
 {
     d->XMM_D(0) = int64_to_float64(val, &env->sse_status);
 }
 #endif
 
 /* float to integer */
-void helper_cvtps2dq(XMMReg *d, XMMReg *s)
+void helper_cvtps2dq(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
     d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
     d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
@@ -686,49 +715,49 @@
     d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status);
 }
 
-void helper_cvtpd2dq(XMMReg *d, XMMReg *s)
+void helper_cvtpd2dq(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
     d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
     d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
     d->XMM_Q(1) = 0;
 }
 
-void helper_cvtps2pi(MMXReg *d, XMMReg *s)
+void helper_cvtps2pi(CPUX86State *env, MMXReg *d, XMMReg *s)
 {
     d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
     d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
 }
 
-void helper_cvtpd2pi(MMXReg *d, XMMReg *s)
+void helper_cvtpd2pi(CPUX86State *env, MMXReg *d, XMMReg *s)
 {
     d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
     d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
 }
 
-int32_t helper_cvtss2si(XMMReg *s)
+int32_t helper_cvtss2si(CPUX86State *env, XMMReg *s)
 {
     return float32_to_int32(s->XMM_S(0), &env->sse_status);
 }
 
-int32_t helper_cvtsd2si(XMMReg *s)
+int32_t helper_cvtsd2si(CPUX86State *env, XMMReg *s)
 {
     return float64_to_int32(s->XMM_D(0), &env->sse_status);
 }
 
 #ifdef TARGET_X86_64
-int64_t helper_cvtss2sq(XMMReg *s)
+int64_t helper_cvtss2sq(CPUX86State *env, XMMReg *s)
 {
     return float32_to_int64(s->XMM_S(0), &env->sse_status);
 }
 
-int64_t helper_cvtsd2sq(XMMReg *s)
+int64_t helper_cvtsd2sq(CPUX86State *env, XMMReg *s)
 {
     return float64_to_int64(s->XMM_D(0), &env->sse_status);
 }
 #endif
 
 /* float to integer truncated */
-void helper_cvttps2dq(XMMReg *d, XMMReg *s)
+void helper_cvttps2dq(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
     d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
     d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
@@ -736,156 +765,223 @@
     d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status);
 }
 
-void helper_cvttpd2dq(XMMReg *d, XMMReg *s)
+void helper_cvttpd2dq(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
     d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
     d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
     d->XMM_Q(1) = 0;
 }
 
-void helper_cvttps2pi(MMXReg *d, XMMReg *s)
+void helper_cvttps2pi(CPUX86State *env, MMXReg *d, XMMReg *s)
 {
     d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
     d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
 }
 
-void helper_cvttpd2pi(MMXReg *d, XMMReg *s)
+void helper_cvttpd2pi(CPUX86State *env, MMXReg *d, XMMReg *s)
 {
     d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
     d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
 }
 
-int32_t helper_cvttss2si(XMMReg *s)
+int32_t helper_cvttss2si(CPUX86State *env, XMMReg *s)
 {
     return float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
 }
 
-int32_t helper_cvttsd2si(XMMReg *s)
+int32_t helper_cvttsd2si(CPUX86State *env, XMMReg *s)
 {
     return float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
 }
 
 #ifdef TARGET_X86_64
-int64_t helper_cvttss2sq(XMMReg *s)
+int64_t helper_cvttss2sq(CPUX86State *env, XMMReg *s)
 {
     return float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status);
 }
 
-int64_t helper_cvttsd2sq(XMMReg *s)
+int64_t helper_cvttsd2sq(CPUX86State *env, XMMReg *s)
 {
     return float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status);
 }
 #endif
 
-void helper_rsqrtps(XMMReg *d, XMMReg *s)
+void helper_rsqrtps(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
-    d->XMM_S(1) = approx_rsqrt(s->XMM_S(1));
-    d->XMM_S(2) = approx_rsqrt(s->XMM_S(2));
-    d->XMM_S(3) = approx_rsqrt(s->XMM_S(3));
+    d->XMM_S(0) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(0), &env->sse_status),
+                              &env->sse_status);
+    d->XMM_S(1) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(1), &env->sse_status),
+                              &env->sse_status);
+    d->XMM_S(2) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(2), &env->sse_status),
+                              &env->sse_status);
+    d->XMM_S(3) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(3), &env->sse_status),
+                              &env->sse_status);
 }
 
-void helper_rsqrtss(XMMReg *d, XMMReg *s)
+void helper_rsqrtss(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
+    d->XMM_S(0) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(0), &env->sse_status),
+                              &env->sse_status);
 }
 
-void helper_rcpps(XMMReg *d, XMMReg *s)
+void helper_rcpps(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = approx_rcp(s->XMM_S(0));
-    d->XMM_S(1) = approx_rcp(s->XMM_S(1));
-    d->XMM_S(2) = approx_rcp(s->XMM_S(2));
-    d->XMM_S(3) = approx_rcp(s->XMM_S(3));
+    d->XMM_S(0) = float32_div(float32_one, s->XMM_S(0), &env->sse_status);
+    d->XMM_S(1) = float32_div(float32_one, s->XMM_S(1), &env->sse_status);
+    d->XMM_S(2) = float32_div(float32_one, s->XMM_S(2), &env->sse_status);
+    d->XMM_S(3) = float32_div(float32_one, s->XMM_S(3), &env->sse_status);
 }
 
-void helper_rcpss(XMMReg *d, XMMReg *s)
+void helper_rcpss(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = approx_rcp(s->XMM_S(0));
+    d->XMM_S(0) = float32_div(float32_one, s->XMM_S(0), &env->sse_status);
 }
 
-void helper_haddps(XMMReg *d, XMMReg *s)
+static inline uint64_t helper_extrq(uint64_t src, int shift, int len)
+{
+    uint64_t mask;
+
+    if (len == 0) {
+        mask = ~0LL;
+    } else {
+        mask = (1ULL << len) - 1;
+    }
+    return (src >> shift) & mask;
+}
+
+void helper_extrq_r(CPUX86State *env, XMMReg *d, XMMReg *s)
+{
+    d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), s->XMM_B(1), s->XMM_B(0));
+}
+
+void helper_extrq_i(CPUX86State *env, XMMReg *d, int index, int length)
+{
+    d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), index, length);
+}
+
+static inline uint64_t helper_insertq(uint64_t src, int shift, int len)
+{
+    uint64_t mask;
+
+    if (len == 0) {
+        mask = ~0ULL;
+    } else {
+        mask = (1ULL << len) - 1;
+    }
+    return (src & ~(mask << shift)) | ((src & mask) << shift);
+}
+
+void helper_insertq_r(CPUX86State *env, XMMReg *d, XMMReg *s)
+{
+    d->XMM_Q(0) = helper_insertq(s->XMM_Q(0), s->XMM_B(9), s->XMM_B(8));
+}
+
+void helper_insertq_i(CPUX86State *env, XMMReg *d, int index, int length)
+{
+    d->XMM_Q(0) = helper_insertq(d->XMM_Q(0), index, length);
+}
+
+void helper_haddps(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
     XMMReg r;
-    r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1);
-    r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3);
-    r.XMM_S(2) = s->XMM_S(0) + s->XMM_S(1);
-    r.XMM_S(3) = s->XMM_S(2) + s->XMM_S(3);
+
+    r.XMM_S(0) = float32_add(d->XMM_S(0), d->XMM_S(1), &env->sse_status);
+    r.XMM_S(1) = float32_add(d->XMM_S(2), d->XMM_S(3), &env->sse_status);
+    r.XMM_S(2) = float32_add(s->XMM_S(0), s->XMM_S(1), &env->sse_status);
+    r.XMM_S(3) = float32_add(s->XMM_S(2), s->XMM_S(3), &env->sse_status);
     *d = r;
 }
 
-void helper_haddpd(XMMReg *d, XMMReg *s)
+void helper_haddpd(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
     XMMReg r;
-    r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1);
-    r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1);
+
+    r.XMM_D(0) = float64_add(d->XMM_D(0), d->XMM_D(1), &env->sse_status);
+    r.XMM_D(1) = float64_add(s->XMM_D(0), s->XMM_D(1), &env->sse_status);
     *d = r;
 }
 
-void helper_hsubps(XMMReg *d, XMMReg *s)
+void helper_hsubps(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
     XMMReg r;
-    r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1);
-    r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3);
-    r.XMM_S(2) = s->XMM_S(0) - s->XMM_S(1);
-    r.XMM_S(3) = s->XMM_S(2) - s->XMM_S(3);
+
+    r.XMM_S(0) = float32_sub(d->XMM_S(0), d->XMM_S(1), &env->sse_status);
+    r.XMM_S(1) = float32_sub(d->XMM_S(2), d->XMM_S(3), &env->sse_status);
+    r.XMM_S(2) = float32_sub(s->XMM_S(0), s->XMM_S(1), &env->sse_status);
+    r.XMM_S(3) = float32_sub(s->XMM_S(2), s->XMM_S(3), &env->sse_status);
     *d = r;
 }
 
-void helper_hsubpd(XMMReg *d, XMMReg *s)
+void helper_hsubpd(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
     XMMReg r;
-    r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1);
-    r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1);
+
+    r.XMM_D(0) = float64_sub(d->XMM_D(0), d->XMM_D(1), &env->sse_status);
+    r.XMM_D(1) = float64_sub(s->XMM_D(0), s->XMM_D(1), &env->sse_status);
     *d = r;
 }
 
-void helper_addsubps(XMMReg *d, XMMReg *s)
+void helper_addsubps(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0);
-    d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1);
-    d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2);
-    d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3);
+    d->XMM_S(0) = float32_sub(d->XMM_S(0), s->XMM_S(0), &env->sse_status);
+    d->XMM_S(1) = float32_add(d->XMM_S(1), s->XMM_S(1), &env->sse_status);
+    d->XMM_S(2) = float32_sub(d->XMM_S(2), s->XMM_S(2), &env->sse_status);
+    d->XMM_S(3) = float32_add(d->XMM_S(3), s->XMM_S(3), &env->sse_status);
 }
 
-void helper_addsubpd(XMMReg *d, XMMReg *s)
+void helper_addsubpd(CPUX86State *env, XMMReg *d, XMMReg *s)
 {
-    d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0);
-    d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1);
+    d->XMM_D(0) = float64_sub(d->XMM_D(0), s->XMM_D(0), &env->sse_status);
+    d->XMM_D(1) = float64_add(d->XMM_D(1), s->XMM_D(1), &env->sse_status);
 }
 
 /* XXX: unordered */
-#define SSE_HELPER_CMP(name, F)\
-void helper_ ## name ## ps (Reg *d, Reg *s)\
-{\
-    d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
-    d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
-    d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
-    d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
-}\
-\
-void helper_ ## name ## ss (Reg *d, Reg *s)\
-{\
-    d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
-}\
-void helper_ ## name ## pd (Reg *d, Reg *s)\
-{\
-    d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
-    d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
-}\
-\
-void helper_ ## name ## sd (Reg *d, Reg *s)\
-{\
-    d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
-}
+#define SSE_HELPER_CMP(name, F)                                         \
+    void helper_ ## name ## ps(CPUX86State *env, Reg *d, Reg *s)        \
+    {                                                                   \
+        d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));                  \
+        d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));                  \
+        d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));                  \
+        d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));                  \
+    }                                                                   \
+                                                                        \
+    void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s)        \
+    {                                                                   \
+        d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));                  \
+    }                                                                   \
+                                                                        \
+    void helper_ ## name ## pd(CPUX86State *env, Reg *d, Reg *s)        \
+    {                                                                   \
+        d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));                  \
+        d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));                  \
+    }                                                                   \
+                                                                        \
+    void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s)        \
+    {                                                                   \
+        d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));                  \
+    }
 
-#define FPU_CMPEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? -1 : 0
-#define FPU_CMPLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0
-#define FPU_CMPLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? -1 : 0
-#define FPU_CMPUNORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? - 1 : 0
-#define FPU_CMPNEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? 0 : -1
-#define FPU_CMPNLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1
-#define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1
-#define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPEQ(size, a, b)                                           \
+    (float ## size ## _eq_quiet(a, b, &env->sse_status) ? -1 : 0)
+#define FPU_CMPLT(size, a, b)                                           \
+    (float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0)
+#define FPU_CMPLE(size, a, b)                                           \
+    (float ## size ## _le(a, b, &env->sse_status) ? -1 : 0)
+#define FPU_CMPUNORD(size, a, b)                                        \
+    (float ## size ## _unordered_quiet(a, b, &env->sse_status) ? -1 : 0)
+#define FPU_CMPNEQ(size, a, b)                                          \
+    (float ## size ## _eq_quiet(a, b, &env->sse_status) ? 0 : -1)
+#define FPU_CMPNLT(size, a, b)                                          \
+    (float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1)
+#define FPU_CMPNLE(size, a, b)                                          \
+    (float ## size ## _le(a, b, &env->sse_status) ? 0 : -1)
+#define FPU_CMPORD(size, a, b)                                          \
+    (float ## size ## _unordered_quiet(a, b, &env->sse_status) ? 0 : -1)
 
 SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
 SSE_HELPER_CMP(cmplt, FPU_CMPLT)
@@ -896,9 +992,9 @@
 SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
 SSE_HELPER_CMP(cmpord, FPU_CMPORD)
 
-const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
+static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 
-void helper_ucomiss(Reg *d, Reg *s)
+void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s)
 {
     int ret;
     float32 s0, s1;
@@ -909,7 +1005,7 @@
     CC_SRC = comis_eflags[ret + 1];
 }
 
-void helper_comiss(Reg *d, Reg *s)
+void helper_comiss(CPUX86State *env, Reg *d, Reg *s)
 {
     int ret;
     float32 s0, s1;
@@ -920,7 +1016,7 @@
     CC_SRC = comis_eflags[ret + 1];
 }
 
-void helper_ucomisd(Reg *d, Reg *s)
+void helper_ucomisd(CPUX86State *env, Reg *d, Reg *s)
 {
     int ret;
     float64 d0, d1;
@@ -931,7 +1027,7 @@
     CC_SRC = comis_eflags[ret + 1];
 }
 
-void helper_comisd(Reg *d, Reg *s)
+void helper_comisd(CPUX86State *env, Reg *d, Reg *s)
 {
     int ret;
     float64 d0, d1;
@@ -942,9 +1038,10 @@
     CC_SRC = comis_eflags[ret + 1];
 }
 
-uint32_t helper_movmskps(Reg *s)
+uint32_t helper_movmskps(CPUX86State *env, Reg *s)
 {
     int b0, b1, b2, b3;
+
     b0 = s->XMM_L(0) >> 31;
     b1 = s->XMM_L(1) >> 31;
     b2 = s->XMM_L(2) >> 31;
@@ -952,9 +1049,10 @@
     return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);
 }
 
-uint32_t helper_movmskpd(Reg *s)
+uint32_t helper_movmskpd(CPUX86State *env, Reg *s)
 {
     int b0, b1;
+
     b0 = s->XMM_L(1) >> 31;
     b1 = s->XMM_L(3) >> 31;
     return b0 | (b1 << 1);
@@ -962,9 +1060,10 @@
 
 #endif
 
-uint32_t glue(helper_pmovmskb, SUFFIX)(Reg *s)
+uint32_t glue(helper_pmovmskb, SUFFIX)(CPUX86State *env, Reg *s)
 {
     uint32_t val;
+
     val = 0;
     val |= (s->B(0) >> 7);
     val |= (s->B(1) >> 6) & 0x02;
@@ -987,7 +1086,7 @@
     return val;
 }
 
-void glue(helper_packsswb, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_packsswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     Reg r;
 
@@ -1014,7 +1113,7 @@
     *d = r;
 }
 
-void glue(helper_packuswb, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_packuswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     Reg r;
 
@@ -1041,7 +1140,7 @@
     *d = r;
 }
 
-void glue(helper_packssdw, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     Reg r;
 
@@ -1060,202 +1159,226 @@
     *d = r;
 }
 
-#define UNPCK_OP(base_name, base)                               \
-                                                                \
-void glue(helper_punpck ## base_name ## bw, SUFFIX) (Reg *d, Reg *s)   \
-{                                                               \
-    Reg r;                                              \
-                                                                \
-    r.B(0) = d->B((base << (SHIFT + 2)) + 0);                   \
-    r.B(1) = s->B((base << (SHIFT + 2)) + 0);                   \
-    r.B(2) = d->B((base << (SHIFT + 2)) + 1);                   \
-    r.B(3) = s->B((base << (SHIFT + 2)) + 1);                   \
-    r.B(4) = d->B((base << (SHIFT + 2)) + 2);                   \
-    r.B(5) = s->B((base << (SHIFT + 2)) + 2);                   \
-    r.B(6) = d->B((base << (SHIFT + 2)) + 3);                   \
-    r.B(7) = s->B((base << (SHIFT + 2)) + 3);                   \
-XMM_ONLY(                                                       \
-    r.B(8) = d->B((base << (SHIFT + 2)) + 4);                   \
-    r.B(9) = s->B((base << (SHIFT + 2)) + 4);                   \
-    r.B(10) = d->B((base << (SHIFT + 2)) + 5);                  \
-    r.B(11) = s->B((base << (SHIFT + 2)) + 5);                  \
-    r.B(12) = d->B((base << (SHIFT + 2)) + 6);                  \
-    r.B(13) = s->B((base << (SHIFT + 2)) + 6);                  \
-    r.B(14) = d->B((base << (SHIFT + 2)) + 7);                  \
-    r.B(15) = s->B((base << (SHIFT + 2)) + 7);                  \
-)                                                               \
-    *d = r;                                                     \
-}                                                               \
-                                                                \
-void glue(helper_punpck ## base_name ## wd, SUFFIX) (Reg *d, Reg *s)   \
-{                                                               \
-    Reg r;                                              \
-                                                                \
-    r.W(0) = d->W((base << (SHIFT + 1)) + 0);                   \
-    r.W(1) = s->W((base << (SHIFT + 1)) + 0);                   \
-    r.W(2) = d->W((base << (SHIFT + 1)) + 1);                   \
-    r.W(3) = s->W((base << (SHIFT + 1)) + 1);                   \
-XMM_ONLY(                                                       \
-    r.W(4) = d->W((base << (SHIFT + 1)) + 2);                   \
-    r.W(5) = s->W((base << (SHIFT + 1)) + 2);                   \
-    r.W(6) = d->W((base << (SHIFT + 1)) + 3);                   \
-    r.W(7) = s->W((base << (SHIFT + 1)) + 3);                   \
-)                                                               \
-    *d = r;                                                     \
-}                                                               \
-                                                                \
-void glue(helper_punpck ## base_name ## dq, SUFFIX) (Reg *d, Reg *s)   \
-{                                                               \
-    Reg r;                                              \
-                                                                \
-    r.L(0) = d->L((base << SHIFT) + 0);                         \
-    r.L(1) = s->L((base << SHIFT) + 0);                         \
-XMM_ONLY(                                                       \
-    r.L(2) = d->L((base << SHIFT) + 1);                         \
-    r.L(3) = s->L((base << SHIFT) + 1);                         \
-)                                                               \
-    *d = r;                                                     \
-}                                                               \
-                                                                \
-XMM_ONLY(                                                       \
-void glue(helper_punpck ## base_name ## qdq, SUFFIX) (Reg *d, Reg *s)  \
-{                                                               \
-    Reg r;                                              \
-                                                                \
-    r.Q(0) = d->Q(base);                                        \
-    r.Q(1) = s->Q(base);                                        \
-    *d = r;                                                     \
-}                                                               \
-)
+#define UNPCK_OP(base_name, base)                                       \
+                                                                        \
+    void glue(helper_punpck ## base_name ## bw, SUFFIX)(CPUX86State *env,\
+                                                        Reg *d, Reg *s) \
+    {                                                                   \
+        Reg r;                                                          \
+                                                                        \
+        r.B(0) = d->B((base << (SHIFT + 2)) + 0);                       \
+        r.B(1) = s->B((base << (SHIFT + 2)) + 0);                       \
+        r.B(2) = d->B((base << (SHIFT + 2)) + 1);                       \
+        r.B(3) = s->B((base << (SHIFT + 2)) + 1);                       \
+        r.B(4) = d->B((base << (SHIFT + 2)) + 2);                       \
+        r.B(5) = s->B((base << (SHIFT + 2)) + 2);                       \
+        r.B(6) = d->B((base << (SHIFT + 2)) + 3);                       \
+        r.B(7) = s->B((base << (SHIFT + 2)) + 3);                       \
+        XMM_ONLY(                                                       \
+                 r.B(8) = d->B((base << (SHIFT + 2)) + 4);              \
+                 r.B(9) = s->B((base << (SHIFT + 2)) + 4);              \
+                 r.B(10) = d->B((base << (SHIFT + 2)) + 5);             \
+                 r.B(11) = s->B((base << (SHIFT + 2)) + 5);             \
+                 r.B(12) = d->B((base << (SHIFT + 2)) + 6);             \
+                 r.B(13) = s->B((base << (SHIFT + 2)) + 6);             \
+                 r.B(14) = d->B((base << (SHIFT + 2)) + 7);             \
+                 r.B(15) = s->B((base << (SHIFT + 2)) + 7);             \
+                                                                      ) \
+            *d = r;                                                     \
+    }                                                                   \
+                                                                        \
+    void glue(helper_punpck ## base_name ## wd, SUFFIX)(CPUX86State *env,\
+                                                        Reg *d, Reg *s) \
+    {                                                                   \
+        Reg r;                                                          \
+                                                                        \
+        r.W(0) = d->W((base << (SHIFT + 1)) + 0);                       \
+        r.W(1) = s->W((base << (SHIFT + 1)) + 0);                       \
+        r.W(2) = d->W((base << (SHIFT + 1)) + 1);                       \
+        r.W(3) = s->W((base << (SHIFT + 1)) + 1);                       \
+        XMM_ONLY(                                                       \
+                 r.W(4) = d->W((base << (SHIFT + 1)) + 2);              \
+                 r.W(5) = s->W((base << (SHIFT + 1)) + 2);              \
+                 r.W(6) = d->W((base << (SHIFT + 1)) + 3);              \
+                 r.W(7) = s->W((base << (SHIFT + 1)) + 3);              \
+                                                                      ) \
+            *d = r;                                                     \
+    }                                                                   \
+                                                                        \
+    void glue(helper_punpck ## base_name ## dq, SUFFIX)(CPUX86State *env,\
+                                                        Reg *d, Reg *s) \
+    {                                                                   \
+        Reg r;                                                          \
+                                                                        \
+        r.L(0) = d->L((base << SHIFT) + 0);                             \
+        r.L(1) = s->L((base << SHIFT) + 0);                             \
+        XMM_ONLY(                                                       \
+                 r.L(2) = d->L((base << SHIFT) + 1);                    \
+                 r.L(3) = s->L((base << SHIFT) + 1);                    \
+                                                                      ) \
+            *d = r;                                                     \
+    }                                                                   \
+                                                                        \
+    XMM_ONLY(                                                           \
+             void glue(helper_punpck ## base_name ## qdq, SUFFIX)(CPUX86State \
+                                                                  *env, \
+                                                                  Reg *d, \
+                                                                  Reg *s) \
+             {                                                          \
+                 Reg r;                                                 \
+                                                                        \
+                 r.Q(0) = d->Q(base);                                   \
+                 r.Q(1) = s->Q(base);                                   \
+                 *d = r;                                                \
+             }                                                          \
+                                                                        )
 
 UNPCK_OP(l, 0)
 UNPCK_OP(h, 1)
 
 /* 3DNow! float ops */
 #if SHIFT == 0
-void helper_pi2fd(MMXReg *d, MMXReg *s)
+void helper_pi2fd(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
     d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status);
     d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status);
 }
 
-void helper_pi2fw(MMXReg *d, MMXReg *s)
+void helper_pi2fw(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
     d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status);
     d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status);
 }
 
-void helper_pf2id(MMXReg *d, MMXReg *s)
+void helper_pf2id(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
     d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status);
     d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status);
 }
 
-void helper_pf2iw(MMXReg *d, MMXReg *s)
+void helper_pf2iw(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
-    d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status));
-    d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status));
+    d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0),
+                                                       &env->mmx_status));
+    d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1),
+                                                       &env->mmx_status));
 }
 
-void helper_pfacc(MMXReg *d, MMXReg *s)
+void helper_pfacc(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
     MMXReg r;
+
     r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
     r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
     *d = r;
 }
 
-void helper_pfadd(MMXReg *d, MMXReg *s)
+void helper_pfadd(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
     d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
     d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
 }
 
-void helper_pfcmpeq(MMXReg *d, MMXReg *s)
+void helper_pfcmpeq(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
-    d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
-    d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
+    d->MMX_L(0) = float32_eq_quiet(d->MMX_S(0), s->MMX_S(0),
+                                   &env->mmx_status) ? -1 : 0;
+    d->MMX_L(1) = float32_eq_quiet(d->MMX_S(1), s->MMX_S(1),
+                                   &env->mmx_status) ? -1 : 0;
 }
 
-void helper_pfcmpge(MMXReg *d, MMXReg *s)
+void helper_pfcmpge(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
-    d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
-    d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
+    d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0),
+                             &env->mmx_status) ? -1 : 0;
+    d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1),
+                             &env->mmx_status) ? -1 : 0;
 }
 
-void helper_pfcmpgt(MMXReg *d, MMXReg *s)
+void helper_pfcmpgt(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
-    d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
-    d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
+    d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0),
+                             &env->mmx_status) ? -1 : 0;
+    d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1),
+                             &env->mmx_status) ? -1 : 0;
 }
 
-void helper_pfmax(MMXReg *d, MMXReg *s)
+void helper_pfmax(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
-    if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status))
+    if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status)) {
         d->MMX_S(0) = s->MMX_S(0);
-    if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status))
+    }
+    if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status)) {
         d->MMX_S(1) = s->MMX_S(1);
+    }
 }
 
-void helper_pfmin(MMXReg *d, MMXReg *s)
+void helper_pfmin(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
-    if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status))
+    if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status)) {
         d->MMX_S(0) = s->MMX_S(0);
-    if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status))
+    }
+    if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status)) {
         d->MMX_S(1) = s->MMX_S(1);
+    }
 }
 
-void helper_pfmul(MMXReg *d, MMXReg *s)
+void helper_pfmul(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
     d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
     d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
 }
 
-void helper_pfnacc(MMXReg *d, MMXReg *s)
+void helper_pfnacc(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
     MMXReg r;
+
     r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
     r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
     *d = r;
 }
 
-void helper_pfpnacc(MMXReg *d, MMXReg *s)
+void helper_pfpnacc(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
     MMXReg r;
+
     r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
     r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
     *d = r;
 }
 
-void helper_pfrcp(MMXReg *d, MMXReg *s)
+void helper_pfrcp(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
-    d->MMX_S(0) = approx_rcp(s->MMX_S(0));
+    d->MMX_S(0) = float32_div(float32_one, s->MMX_S(0), &env->mmx_status);
     d->MMX_S(1) = d->MMX_S(0);
 }
 
-void helper_pfrsqrt(MMXReg *d, MMXReg *s)
+void helper_pfrsqrt(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
     d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
-    d->MMX_S(1) = approx_rsqrt(d->MMX_S(1));
+    d->MMX_S(1) = float32_div(float32_one,
+                              float32_sqrt(d->MMX_S(1), &env->mmx_status),
+                              &env->mmx_status);
     d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
     d->MMX_L(0) = d->MMX_L(1);
 }
 
-void helper_pfsub(MMXReg *d, MMXReg *s)
+void helper_pfsub(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
     d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
     d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
 }
 
-void helper_pfsubr(MMXReg *d, MMXReg *s)
+void helper_pfsubr(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
     d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status);
     d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status);
 }
 
-void helper_pswapd(MMXReg *d, MMXReg *s)
+void helper_pswapd(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
     MMXReg r;
+
     r.MMX_L(0) = s->MMX_L(1);
     r.MMX_L(1) = s->MMX_L(0);
     *d = r;
@@ -1263,18 +1386,19 @@
 #endif
 
 /* SSSE3 op helpers */
-void glue(helper_pshufb, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int i;
     Reg r;
 
-    for (i = 0; i < (8 << SHIFT); i++)
+    for (i = 0; i < (8 << SHIFT); i++) {
         r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1)));
+    }
 
     *d = r;
 }
 
-void glue(helper_phaddw, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
     d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
@@ -1286,7 +1410,7 @@
     XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
 }
 
-void glue(helper_phaddd, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
     XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
@@ -1294,7 +1418,7 @@
     XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
 }
 
-void glue(helper_phaddsw, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
     d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
@@ -1306,19 +1430,19 @@
     XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
 }
 
-void glue(helper_pmaddubsw, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-    d->W(0) = satsw((int8_t)s->B( 0) * (uint8_t)d->B( 0) +
-                    (int8_t)s->B( 1) * (uint8_t)d->B( 1));
-    d->W(1) = satsw((int8_t)s->B( 2) * (uint8_t)d->B( 2) +
-                    (int8_t)s->B( 3) * (uint8_t)d->B( 3));
-    d->W(2) = satsw((int8_t)s->B( 4) * (uint8_t)d->B( 4) +
-                    (int8_t)s->B( 5) * (uint8_t)d->B( 5));
-    d->W(3) = satsw((int8_t)s->B( 6) * (uint8_t)d->B( 6) +
-                    (int8_t)s->B( 7) * (uint8_t)d->B( 7));
+    d->W(0) = satsw((int8_t)s->B(0) * (uint8_t)d->B(0) +
+                    (int8_t)s->B(1) * (uint8_t)d->B(1));
+    d->W(1) = satsw((int8_t)s->B(2) * (uint8_t)d->B(2) +
+                    (int8_t)s->B(3) * (uint8_t)d->B(3));
+    d->W(2) = satsw((int8_t)s->B(4) * (uint8_t)d->B(4) +
+                    (int8_t)s->B(5) * (uint8_t)d->B(5));
+    d->W(3) = satsw((int8_t)s->B(6) * (uint8_t)d->B(6) +
+                    (int8_t)s->B(7) * (uint8_t)d->B(7));
 #if SHIFT == 1
-    d->W(4) = satsw((int8_t)s->B( 8) * (uint8_t)d->B( 8) +
-                    (int8_t)s->B( 9) * (uint8_t)d->B( 9));
+    d->W(4) = satsw((int8_t)s->B(8) * (uint8_t)d->B(8) +
+                    (int8_t)s->B(9) * (uint8_t)d->B(9));
     d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) +
                     (int8_t)s->B(11) * (uint8_t)d->B(11));
     d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) +
@@ -1328,7 +1452,7 @@
 #endif
 }
 
-void glue(helper_phsubw, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_phsubw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     d->W(0) = (int16_t)d->W(0) - (int16_t)d->W(1);
     d->W(1) = (int16_t)d->W(2) - (int16_t)d->W(3);
@@ -1340,7 +1464,7 @@
     XMM_ONLY(d->W(7) = (int16_t)s->W(6) - (int16_t)s->W(7));
 }
 
-void glue(helper_phsubd, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_phsubd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     d->L(0) = (int32_t)d->L(0) - (int32_t)d->L(1);
     XMM_ONLY(d->L(1) = (int32_t)d->L(2) - (int32_t)d->L(3));
@@ -1348,7 +1472,7 @@
     XMM_ONLY(d->L(3) = (int32_t)s->L(2) - (int32_t)s->L(3));
 }
 
-void glue(helper_phsubsw, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_phsubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     d->W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1));
     d->W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3));
@@ -1360,24 +1484,25 @@
     XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7)));
 }
 
-#define FABSB(_, x) x > INT8_MAX  ? -(int8_t ) x : x
-#define FABSW(_, x) x > INT16_MAX ? -(int16_t) x : x
-#define FABSL(_, x) x > INT32_MAX ? -(int32_t) x : x
+#define FABSB(_, x) (x > INT8_MAX  ? -(int8_t)x : x)
+#define FABSW(_, x) (x > INT16_MAX ? -(int16_t)x : x)
+#define FABSL(_, x) (x > INT32_MAX ? -(int32_t)x : x)
 SSE_HELPER_B(helper_pabsb, FABSB)
 SSE_HELPER_W(helper_pabsw, FABSW)
 SSE_HELPER_L(helper_pabsd, FABSL)
 
-#define FMULHRSW(d, s) ((int16_t) d * (int16_t) s + 0x4000) >> 15
+#define FMULHRSW(d, s) (((int16_t) d * (int16_t)s + 0x4000) >> 15)
 SSE_HELPER_W(helper_pmulhrsw, FMULHRSW)
 
-#define FSIGNB(d, s) s <= INT8_MAX  ? s ? d : 0 : -(int8_t ) d
-#define FSIGNW(d, s) s <= INT16_MAX ? s ? d : 0 : -(int16_t) d
-#define FSIGNL(d, s) s <= INT32_MAX ? s ? d : 0 : -(int32_t) d
+#define FSIGNB(d, s) (s <= INT8_MAX  ? s ? d : 0 : -(int8_t)d)
+#define FSIGNW(d, s) (s <= INT16_MAX ? s ? d : 0 : -(int16_t)d)
+#define FSIGNL(d, s) (s <= INT32_MAX ? s ? d : 0 : -(int32_t)d)
 SSE_HELPER_B(helper_psignb, FSIGNB)
 SSE_HELPER_W(helper_psignw, FSIGNW)
 SSE_HELPER_L(helper_psignd, FSIGNL)
 
-void glue(helper_palignr, SUFFIX) (Reg *d, Reg *s, int32_t shift)
+void glue(helper_palignr, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                  int32_t shift)
 {
     Reg r;
 
@@ -1389,17 +1514,17 @@
         shift <<= 3;
 #define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0)
 #if SHIFT == 0
-        r.Q(0) = SHR(s->Q(0), shift -   0) |
-                 SHR(d->Q(0), shift -  64);
+        r.Q(0) = SHR(s->Q(0), shift - 0) |
+            SHR(d->Q(0), shift -  64);
 #else
-        r.Q(0) = SHR(s->Q(0), shift -   0) |
-                 SHR(s->Q(1), shift -  64) |
-                 SHR(d->Q(0), shift - 128) |
-                 SHR(d->Q(1), shift - 192);
-        r.Q(1) = SHR(s->Q(0), shift +  64) |
-                 SHR(s->Q(1), shift -   0) |
-                 SHR(d->Q(0), shift -  64) |
-                 SHR(d->Q(1), shift - 128);
+        r.Q(0) = SHR(s->Q(0), shift - 0) |
+            SHR(s->Q(1), shift -  64) |
+            SHR(d->Q(0), shift - 128) |
+            SHR(d->Q(1), shift - 192);
+        r.Q(1) = SHR(s->Q(0), shift + 64) |
+            SHR(s->Q(1), shift -   0) |
+            SHR(d->Q(0), shift -  64) |
+            SHR(d->Q(1), shift - 128);
 #endif
 #undef SHR
     }
@@ -1407,72 +1532,78 @@
     *d = r;
 }
 
-#define XMM0 env->xmm_regs[0]
+#define XMM0 (env->xmm_regs[0])
 
 #if SHIFT == 1
-#define SSE_HELPER_V(name, elem, num, F)\
-void glue(name, SUFFIX) (Reg *d, Reg *s)\
-{\
-    d->elem(0) = F(d->elem(0), s->elem(0), XMM0.elem(0));\
-    d->elem(1) = F(d->elem(1), s->elem(1), XMM0.elem(1));\
-    if (num > 2) {\
-        d->elem(2) = F(d->elem(2), s->elem(2), XMM0.elem(2));\
-        d->elem(3) = F(d->elem(3), s->elem(3), XMM0.elem(3));\
-        if (num > 4) {\
-            d->elem(4) = F(d->elem(4), s->elem(4), XMM0.elem(4));\
-            d->elem(5) = F(d->elem(5), s->elem(5), XMM0.elem(5));\
-            d->elem(6) = F(d->elem(6), s->elem(6), XMM0.elem(6));\
-            d->elem(7) = F(d->elem(7), s->elem(7), XMM0.elem(7));\
-            if (num > 8) {\
-                d->elem(8) = F(d->elem(8), s->elem(8), XMM0.elem(8));\
-                d->elem(9) = F(d->elem(9), s->elem(9), XMM0.elem(9));\
-                d->elem(10) = F(d->elem(10), s->elem(10), XMM0.elem(10));\
-                d->elem(11) = F(d->elem(11), s->elem(11), XMM0.elem(11));\
-                d->elem(12) = F(d->elem(12), s->elem(12), XMM0.elem(12));\
-                d->elem(13) = F(d->elem(13), s->elem(13), XMM0.elem(13));\
-                d->elem(14) = F(d->elem(14), s->elem(14), XMM0.elem(14));\
-                d->elem(15) = F(d->elem(15), s->elem(15), XMM0.elem(15));\
-            }\
-        }\
-    }\
-}
+#define SSE_HELPER_V(name, elem, num, F)                                \
+    void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)           \
+    {                                                                   \
+        d->elem(0) = F(d->elem(0), s->elem(0), XMM0.elem(0));           \
+        d->elem(1) = F(d->elem(1), s->elem(1), XMM0.elem(1));           \
+        if (num > 2) {                                                  \
+            d->elem(2) = F(d->elem(2), s->elem(2), XMM0.elem(2));       \
+            d->elem(3) = F(d->elem(3), s->elem(3), XMM0.elem(3));       \
+            if (num > 4) {                                              \
+                d->elem(4) = F(d->elem(4), s->elem(4), XMM0.elem(4));   \
+                d->elem(5) = F(d->elem(5), s->elem(5), XMM0.elem(5));   \
+                d->elem(6) = F(d->elem(6), s->elem(6), XMM0.elem(6));   \
+                d->elem(7) = F(d->elem(7), s->elem(7), XMM0.elem(7));   \
+                if (num > 8) {                                          \
+                    d->elem(8) = F(d->elem(8), s->elem(8), XMM0.elem(8)); \
+                    d->elem(9) = F(d->elem(9), s->elem(9), XMM0.elem(9)); \
+                    d->elem(10) = F(d->elem(10), s->elem(10), XMM0.elem(10)); \
+                    d->elem(11) = F(d->elem(11), s->elem(11), XMM0.elem(11)); \
+                    d->elem(12) = F(d->elem(12), s->elem(12), XMM0.elem(12)); \
+                    d->elem(13) = F(d->elem(13), s->elem(13), XMM0.elem(13)); \
+                    d->elem(14) = F(d->elem(14), s->elem(14), XMM0.elem(14)); \
+                    d->elem(15) = F(d->elem(15), s->elem(15), XMM0.elem(15)); \
+                }                                                       \
+            }                                                           \
+        }                                                               \
+    }
 
-#define SSE_HELPER_I(name, elem, num, F)\
-void glue(name, SUFFIX) (Reg *d, Reg *s, uint32_t imm)\
-{\
-    d->elem(0) = F(d->elem(0), s->elem(0), ((imm >> 0) & 1));\
-    d->elem(1) = F(d->elem(1), s->elem(1), ((imm >> 1) & 1));\
-    if (num > 2) {\
-        d->elem(2) = F(d->elem(2), s->elem(2), ((imm >> 2) & 1));\
-        d->elem(3) = F(d->elem(3), s->elem(3), ((imm >> 3) & 1));\
-        if (num > 4) {\
-            d->elem(4) = F(d->elem(4), s->elem(4), ((imm >> 4) & 1));\
-            d->elem(5) = F(d->elem(5), s->elem(5), ((imm >> 5) & 1));\
-            d->elem(6) = F(d->elem(6), s->elem(6), ((imm >> 6) & 1));\
-            d->elem(7) = F(d->elem(7), s->elem(7), ((imm >> 7) & 1));\
-            if (num > 8) {\
-                d->elem(8) = F(d->elem(8), s->elem(8), ((imm >> 8) & 1));\
-                d->elem(9) = F(d->elem(9), s->elem(9), ((imm >> 9) & 1));\
-                d->elem(10) = F(d->elem(10), s->elem(10), ((imm >> 10) & 1));\
-                d->elem(11) = F(d->elem(11), s->elem(11), ((imm >> 11) & 1));\
-                d->elem(12) = F(d->elem(12), s->elem(12), ((imm >> 12) & 1));\
-                d->elem(13) = F(d->elem(13), s->elem(13), ((imm >> 13) & 1));\
-                d->elem(14) = F(d->elem(14), s->elem(14), ((imm >> 14) & 1));\
-                d->elem(15) = F(d->elem(15), s->elem(15), ((imm >> 15) & 1));\
-            }\
-        }\
-    }\
-}
+#define SSE_HELPER_I(name, elem, num, F)                                \
+    void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t imm) \
+    {                                                                   \
+        d->elem(0) = F(d->elem(0), s->elem(0), ((imm >> 0) & 1));       \
+        d->elem(1) = F(d->elem(1), s->elem(1), ((imm >> 1) & 1));       \
+        if (num > 2) {                                                  \
+            d->elem(2) = F(d->elem(2), s->elem(2), ((imm >> 2) & 1));   \
+            d->elem(3) = F(d->elem(3), s->elem(3), ((imm >> 3) & 1));   \
+            if (num > 4) {                                              \
+                d->elem(4) = F(d->elem(4), s->elem(4), ((imm >> 4) & 1)); \
+                d->elem(5) = F(d->elem(5), s->elem(5), ((imm >> 5) & 1)); \
+                d->elem(6) = F(d->elem(6), s->elem(6), ((imm >> 6) & 1)); \
+                d->elem(7) = F(d->elem(7), s->elem(7), ((imm >> 7) & 1)); \
+                if (num > 8) {                                          \
+                    d->elem(8) = F(d->elem(8), s->elem(8), ((imm >> 8) & 1)); \
+                    d->elem(9) = F(d->elem(9), s->elem(9), ((imm >> 9) & 1)); \
+                    d->elem(10) = F(d->elem(10), s->elem(10),           \
+                                    ((imm >> 10) & 1));                 \
+                    d->elem(11) = F(d->elem(11), s->elem(11),           \
+                                    ((imm >> 11) & 1));                 \
+                    d->elem(12) = F(d->elem(12), s->elem(12),           \
+                                    ((imm >> 12) & 1));                 \
+                    d->elem(13) = F(d->elem(13), s->elem(13),           \
+                                    ((imm >> 13) & 1));                 \
+                    d->elem(14) = F(d->elem(14), s->elem(14),           \
+                                    ((imm >> 14) & 1));                 \
+                    d->elem(15) = F(d->elem(15), s->elem(15),           \
+                                    ((imm >> 15) & 1));                 \
+                }                                                       \
+            }                                                           \
+        }                                                               \
+    }
 
 /* SSE4.1 op helpers */
-#define FBLENDVB(d, s, m) (m & 0x80) ? s : d
-#define FBLENDVPS(d, s, m) (m & 0x80000000) ? s : d
-#define FBLENDVPD(d, s, m) (m & 0x8000000000000000LL) ? s : d
+#define FBLENDVB(d, s, m) ((m & 0x80) ? s : d)
+#define FBLENDVPS(d, s, m) ((m & 0x80000000) ? s : d)
+#define FBLENDVPD(d, s, m) ((m & 0x8000000000000000LL) ? s : d)
 SSE_HELPER_V(helper_pblendvb, B, 16, FBLENDVB)
 SSE_HELPER_V(helper_blendvps, L, 4, FBLENDVPS)
 SSE_HELPER_V(helper_blendvpd, Q, 2, FBLENDVPD)
 
-void glue(helper_ptest, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_ptest, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     uint64_t zf = (s->Q(0) &  d->Q(0)) | (s->Q(1) &  d->Q(1));
     uint64_t cf = (s->Q(0) & ~d->Q(0)) | (s->Q(1) & ~d->Q(1));
@@ -1480,22 +1611,22 @@
     CC_SRC = (zf ? 0 : CC_Z) | (cf ? 0 : CC_C);
 }
 
-#define SSE_HELPER_F(name, elem, num, F)\
-void glue(name, SUFFIX) (Reg *d, Reg *s)\
-{\
-    d->elem(0) = F(0);\
-    d->elem(1) = F(1);\
-    if (num > 2) {\
-        d->elem(2) = F(2);\
-        d->elem(3) = F(3);\
-        if (num > 4) {\
-            d->elem(4) = F(4);\
-            d->elem(5) = F(5);\
-            d->elem(6) = F(6);\
-            d->elem(7) = F(7);\
-        }\
-    }\
-}
+#define SSE_HELPER_F(name, elem, num, F)        \
+    void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)     \
+    {                                           \
+        d->elem(0) = F(0);                      \
+        d->elem(1) = F(1);                      \
+        if (num > 2) {                          \
+            d->elem(2) = F(2);                  \
+            d->elem(3) = F(3);                  \
+            if (num > 4) {                      \
+                d->elem(4) = F(4);              \
+                d->elem(5) = F(5);              \
+                d->elem(6) = F(6);              \
+                d->elem(7) = F(7);              \
+            }                                   \
+        }                                       \
+    }
 
 SSE_HELPER_F(helper_pmovsxbw, W, 8, (int8_t) s->B)
 SSE_HELPER_F(helper_pmovsxbd, L, 4, (int8_t) s->B)
@@ -1510,16 +1641,16 @@
 SSE_HELPER_F(helper_pmovzxwq, Q, 2, s->W)
 SSE_HELPER_F(helper_pmovzxdq, Q, 2, s->L)
 
-void glue(helper_pmuldq, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_pmuldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-    d->Q(0) = (int64_t) (int32_t) d->L(0) * (int32_t) s->L(0);
-    d->Q(1) = (int64_t) (int32_t) d->L(2) * (int32_t) s->L(2);
+    d->Q(0) = (int64_t)(int32_t) d->L(0) * (int32_t) s->L(0);
+    d->Q(1) = (int64_t)(int32_t) d->L(2) * (int32_t) s->L(2);
 }
 
-#define FCMPEQQ(d, s) d == s ? -1 : 0
+#define FCMPEQQ(d, s) (d == s ? -1 : 0)
 SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ)
 
-void glue(helper_packusdw, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_packusdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     d->W(0) = satuw((int32_t) d->L(0));
     d->W(1) = satuw((int32_t) d->L(1));
@@ -1531,10 +1662,10 @@
     d->W(7) = satuw((int32_t) s->L(3));
 }
 
-#define FMINSB(d, s) MIN((int8_t) d, (int8_t) s)
-#define FMINSD(d, s) MIN((int32_t) d, (int32_t) s)
-#define FMAXSB(d, s) MAX((int8_t) d, (int8_t) s)
-#define FMAXSD(d, s) MAX((int32_t) d, (int32_t) s)
+#define FMINSB(d, s) MIN((int8_t)d, (int8_t)s)
+#define FMINSD(d, s) MIN((int32_t)d, (int32_t)s)
+#define FMAXSB(d, s) MAX((int8_t)d, (int8_t)s)
+#define FMAXSD(d, s) MAX((int32_t)d, (int32_t)s)
 SSE_HELPER_B(helper_pminsb, FMINSB)
 SSE_HELPER_L(helper_pminsd, FMINSD)
 SSE_HELPER_W(helper_pminuw, MIN)
@@ -1544,27 +1675,34 @@
 SSE_HELPER_W(helper_pmaxuw, MAX)
 SSE_HELPER_L(helper_pmaxud, MAX)
 
-#define FMULLD(d, s) (int32_t) d * (int32_t) s
+#define FMULLD(d, s) ((int32_t)d * (int32_t)s)
 SSE_HELPER_L(helper_pmulld, FMULLD)
 
-void glue(helper_phminposuw, SUFFIX) (Reg *d, Reg *s)
+void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int idx = 0;
 
-    if (s->W(1) < s->W(idx))
+    if (s->W(1) < s->W(idx)) {
         idx = 1;
-    if (s->W(2) < s->W(idx))
+    }
+    if (s->W(2) < s->W(idx)) {
         idx = 2;
-    if (s->W(3) < s->W(idx))
+    }
+    if (s->W(3) < s->W(idx)) {
         idx = 3;
-    if (s->W(4) < s->W(idx))
+    }
+    if (s->W(4) < s->W(idx)) {
         idx = 4;
-    if (s->W(5) < s->W(idx))
+    }
+    if (s->W(5) < s->W(idx)) {
         idx = 5;
-    if (s->W(6) < s->W(idx))
+    }
+    if (s->W(6) < s->W(idx)) {
         idx = 6;
-    if (s->W(7) < s->W(idx))
+    }
+    if (s->W(7) < s->W(idx)) {
         idx = 7;
+    }
 
     d->Q(1) = 0;
     d->L(1) = 0;
@@ -1572,12 +1710,13 @@
     d->W(0) = s->W(idx);
 }
 
-void glue(helper_roundps, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                  uint32_t mode)
 {
     signed char prev_rounding_mode;
 
     prev_rounding_mode = env->sse_status.float_rounding_mode;
-    if (!(mode & (1 << 2)))
+    if (!(mode & (1 << 2))) {
         switch (mode & 3) {
         case 0:
             set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
@@ -1592,28 +1731,30 @@
             set_float_rounding_mode(float_round_to_zero, &env->sse_status);
             break;
         }
+    }
 
-    d->L(0) = float64_round_to_int(s->L(0), &env->sse_status);
-    d->L(1) = float64_round_to_int(s->L(1), &env->sse_status);
-    d->L(2) = float64_round_to_int(s->L(2), &env->sse_status);
-    d->L(3) = float64_round_to_int(s->L(3), &env->sse_status);
+    d->XMM_S(0) = float32_round_to_int(s->XMM_S(0), &env->sse_status);
+    d->XMM_S(1) = float32_round_to_int(s->XMM_S(1), &env->sse_status);
+    d->XMM_S(2) = float32_round_to_int(s->XMM_S(2), &env->sse_status);
+    d->XMM_S(3) = float32_round_to_int(s->XMM_S(3), &env->sse_status);
 
 #if 0 /* TODO */
-    if (mode & (1 << 3))
-        set_float_exception_flags(
-                        get_float_exception_flags(&env->sse_status) &
-                        ~float_flag_inexact,
-                        &env->sse_status);
+    if (mode & (1 << 3)) {
+        set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
+                                  ~float_flag_inexact,
+                                  &env->sse_status);
+    }
 #endif
     env->sse_status.float_rounding_mode = prev_rounding_mode;
 }
 
-void glue(helper_roundpd, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                  uint32_t mode)
 {
     signed char prev_rounding_mode;
 
     prev_rounding_mode = env->sse_status.float_rounding_mode;
-    if (!(mode & (1 << 2)))
+    if (!(mode & (1 << 2))) {
         switch (mode & 3) {
         case 0:
             set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
@@ -1628,26 +1769,28 @@
             set_float_rounding_mode(float_round_to_zero, &env->sse_status);
             break;
         }
+    }
 
-    d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status);
-    d->Q(1) = float64_round_to_int(s->Q(1), &env->sse_status);
+    d->XMM_D(0) = float64_round_to_int(s->XMM_D(0), &env->sse_status);
+    d->XMM_D(1) = float64_round_to_int(s->XMM_D(1), &env->sse_status);
 
 #if 0 /* TODO */
-    if (mode & (1 << 3))
-        set_float_exception_flags(
-                        get_float_exception_flags(&env->sse_status) &
-                        ~float_flag_inexact,
-                        &env->sse_status);
+    if (mode & (1 << 3)) {
+        set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
+                                  ~float_flag_inexact,
+                                  &env->sse_status);
+    }
 #endif
     env->sse_status.float_rounding_mode = prev_rounding_mode;
 }
 
-void glue(helper_roundss, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                  uint32_t mode)
 {
     signed char prev_rounding_mode;
 
     prev_rounding_mode = env->sse_status.float_rounding_mode;
-    if (!(mode & (1 << 2)))
+    if (!(mode & (1 << 2))) {
         switch (mode & 3) {
         case 0:
             set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
@@ -1662,25 +1805,27 @@
             set_float_rounding_mode(float_round_to_zero, &env->sse_status);
             break;
         }
+    }
 
-    d->L(0) = float64_round_to_int(s->L(0), &env->sse_status);
+    d->XMM_S(0) = float32_round_to_int(s->XMM_S(0), &env->sse_status);
 
 #if 0 /* TODO */
-    if (mode & (1 << 3))
-        set_float_exception_flags(
-                        get_float_exception_flags(&env->sse_status) &
-                        ~float_flag_inexact,
-                        &env->sse_status);
+    if (mode & (1 << 3)) {
+        set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
+                                  ~float_flag_inexact,
+                                  &env->sse_status);
+    }
 #endif
     env->sse_status.float_rounding_mode = prev_rounding_mode;
 }
 
-void glue(helper_roundsd, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                  uint32_t mode)
 {
     signed char prev_rounding_mode;
 
     prev_rounding_mode = env->sse_status.float_rounding_mode;
-    if (!(mode & (1 << 2)))
+    if (!(mode & (1 << 2))) {
         switch (mode & 3) {
         case 0:
             set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
@@ -1695,67 +1840,81 @@
             set_float_rounding_mode(float_round_to_zero, &env->sse_status);
             break;
         }
+    }
 
-    d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status);
+    d->XMM_D(0) = float64_round_to_int(s->XMM_D(0), &env->sse_status);
 
 #if 0 /* TODO */
-    if (mode & (1 << 3))
-        set_float_exception_flags(
-                        get_float_exception_flags(&env->sse_status) &
-                        ~float_flag_inexact,
-                        &env->sse_status);
+    if (mode & (1 << 3)) {
+        set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
+                                  ~float_flag_inexact,
+                                  &env->sse_status);
+    }
 #endif
     env->sse_status.float_rounding_mode = prev_rounding_mode;
 }
 
-#define FBLENDP(d, s, m) m ? s : d
+#define FBLENDP(d, s, m) (m ? s : d)
 SSE_HELPER_I(helper_blendps, L, 4, FBLENDP)
 SSE_HELPER_I(helper_blendpd, Q, 2, FBLENDP)
 SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP)
 
-void glue(helper_dpps, SUFFIX) (Reg *d, Reg *s, uint32_t mask)
+void glue(helper_dpps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mask)
 {
-    float32 iresult = 0 /*float32_zero*/;
+    float32 iresult = float32_zero;
 
-    if (mask & (1 << 4))
+    if (mask & (1 << 4)) {
         iresult = float32_add(iresult,
-                        float32_mul(d->L(0), s->L(0), &env->sse_status),
-                        &env->sse_status);
-    if (mask & (1 << 5))
+                              float32_mul(d->XMM_S(0), s->XMM_S(0),
+                                          &env->sse_status),
+                              &env->sse_status);
+    }
+    if (mask & (1 << 5)) {
         iresult = float32_add(iresult,
-                        float32_mul(d->L(1), s->L(1), &env->sse_status),
-                        &env->sse_status);
-    if (mask & (1 << 6))
+                              float32_mul(d->XMM_S(1), s->XMM_S(1),
+                                          &env->sse_status),
+                              &env->sse_status);
+    }
+    if (mask & (1 << 6)) {
         iresult = float32_add(iresult,
-                        float32_mul(d->L(2), s->L(2), &env->sse_status),
-                        &env->sse_status);
-    if (mask & (1 << 7))
+                              float32_mul(d->XMM_S(2), s->XMM_S(2),
+                                          &env->sse_status),
+                              &env->sse_status);
+    }
+    if (mask & (1 << 7)) {
         iresult = float32_add(iresult,
-                        float32_mul(d->L(3), s->L(3), &env->sse_status),
-                        &env->sse_status);
-    d->L(0) = (mask & (1 << 0)) ? iresult : 0 /*float32_zero*/;
-    d->L(1) = (mask & (1 << 1)) ? iresult : 0 /*float32_zero*/;
-    d->L(2) = (mask & (1 << 2)) ? iresult : 0 /*float32_zero*/;
-    d->L(3) = (mask & (1 << 3)) ? iresult : 0 /*float32_zero*/;
+                              float32_mul(d->XMM_S(3), s->XMM_S(3),
+                                          &env->sse_status),
+                              &env->sse_status);
+    }
+    d->XMM_S(0) = (mask & (1 << 0)) ? iresult : float32_zero;
+    d->XMM_S(1) = (mask & (1 << 1)) ? iresult : float32_zero;
+    d->XMM_S(2) = (mask & (1 << 2)) ? iresult : float32_zero;
+    d->XMM_S(3) = (mask & (1 << 3)) ? iresult : float32_zero;
 }
 
-void glue(helper_dppd, SUFFIX) (Reg *d, Reg *s, uint32_t mask)
+void glue(helper_dppd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mask)
 {
-    float64 iresult = 0 /*float64_zero*/;
+    float64 iresult = float64_zero;
 
-    if (mask & (1 << 4))
+    if (mask & (1 << 4)) {
         iresult = float64_add(iresult,
-                        float64_mul(d->Q(0), s->Q(0), &env->sse_status),
-                        &env->sse_status);
-    if (mask & (1 << 5))
+                              float64_mul(d->XMM_D(0), s->XMM_D(0),
+                                          &env->sse_status),
+                              &env->sse_status);
+    }
+    if (mask & (1 << 5)) {
         iresult = float64_add(iresult,
-                        float64_mul(d->Q(1), s->Q(1), &env->sse_status),
-                        &env->sse_status);
-    d->Q(0) = (mask & (1 << 0)) ? iresult : 0 /*float64_zero*/;
-    d->Q(1) = (mask & (1 << 1)) ? iresult : 0 /*float64_zero*/;
+                              float64_mul(d->XMM_D(1), s->XMM_D(1),
+                                          &env->sse_status),
+                              &env->sse_status);
+    }
+    d->XMM_D(0) = (mask & (1 << 0)) ? iresult : float64_zero;
+    d->XMM_D(1) = (mask & (1 << 1)) ? iresult : float64_zero;
 }
 
-void glue(helper_mpsadbw, SUFFIX) (Reg *d, Reg *s, uint32_t offset)
+void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                  uint32_t offset)
 {
     int s0 = (offset & 3) << 2;
     int d0 = (offset & 4) << 0;
@@ -1774,27 +1933,29 @@
 }
 
 /* SSE4.2 op helpers */
-/* it's unclear whether signed or unsigned */
-#define FCMPGTQ(d, s) d > s ? -1 : 0
+#define FCMPGTQ(d, s) ((int64_t)d > (int64_t)s ? -1 : 0)
 SSE_HELPER_Q(helper_pcmpgtq, FCMPGTQ)
 
-static inline int pcmp_elen(int reg, uint32_t ctrl)
+static inline int pcmp_elen(CPUX86State *env, int reg, uint32_t ctrl)
 {
     int val;
 
     /* Presence of REX.W is indicated by a bit higher than 7 set */
-    if (ctrl >> 8)
-        val = abs1((int64_t) env->regs[reg]);
-    else
-        val = abs1((int32_t) env->regs[reg]);
+    if (ctrl >> 8) {
+        val = abs1((int64_t)env->regs[reg]);
+    } else {
+        val = abs1((int32_t)env->regs[reg]);
+    }
 
     if (ctrl & 1) {
-        if (val > 8)
+        if (val > 8) {
             return 8;
-    } else
-        if (val > 16)
+        }
+    } else {
+        if (val > 16) {
             return 16;
-
+        }
+    }
     return val;
 }
 
@@ -1803,11 +1964,14 @@
     int val = 0;
 
     if (ctrl & 1) {
-        while (val < 8 && r->W(val))
+        while (val < 8 && r->W(val)) {
             val++;
-    } else
-        while (val < 16 && r->B(val))
+        }
+    } else {
+        while (val < 16 && r->B(val)) {
             val++;
+        }
+    }
 
     return val;
 }
@@ -1820,15 +1984,15 @@
     case 1:
         return r->W(i);
     case 2:
-        return (int8_t) r->B(i);
+        return (int8_t)r->B(i);
     case 3:
     default:
-        return (int16_t) r->W(i);
+        return (int16_t)r->W(i);
     }
 }
 
-static inline unsigned pcmpxstrx(Reg *d, Reg *s,
-                int8_t ctrl, int valids, int validd)
+static inline unsigned pcmpxstrx(CPUX86State *env, Reg *d, Reg *s,
+                                 int8_t ctrl, int valids, int validd)
 {
     unsigned int res = 0;
     int v;
@@ -1845,21 +2009,23 @@
         for (j = valids; j >= 0; j--) {
             res <<= 1;
             v = pcmp_val(s, ctrl, j);
-            for (i = validd; i >= 0; i--)
+            for (i = validd; i >= 0; i--) {
                 res |= (v == pcmp_val(d, ctrl, i));
+            }
         }
         break;
     case 1:
         for (j = valids; j >= 0; j--) {
             res <<= 1;
             v = pcmp_val(s, ctrl, j);
-            for (i = ((validd - 1) | 1); i >= 0; i -= 2)
-                res |= (pcmp_val(d, ctrl, i - 0) <= v &&
-                        pcmp_val(d, ctrl, i - 1) >= v);
+            for (i = ((validd - 1) | 1); i >= 0; i -= 2) {
+                res |= (pcmp_val(d, ctrl, i - 0) >= v &&
+                        pcmp_val(d, ctrl, i - 1) <= v);
+            }
         }
         break;
     case 2:
-        res = (2 << (upper - MAX(valids, validd))) - 1;
+        res = (1 << (upper - MAX(valids, validd))) - 1;
         res <<= MAX(valids, validd) - MIN(valids, validd);
         for (i = MIN(valids, validd); i >= 0; i--) {
             res <<= 1;
@@ -1870,9 +2036,11 @@
     case 3:
         for (j = valids - validd; j >= 0; j--) {
             res <<= 1;
-            res |= 1;
-            for (i = MIN(upper - j, validd); i >= 0; i--)
-                res &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i));
+            v = 1;
+            for (i = MIN(upper - j, validd); i >= 0; i--) {
+                v &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i));
+            }
+            res |= v;
         }
         break;
     }
@@ -1882,105 +2050,93 @@
         res ^= (2 << upper) - 1;
         break;
     case 3:
-        res ^= (2 << valids) - 1;
+        res ^= (1 << (valids + 1)) - 1;
         break;
     }
 
-    if (res)
-       CC_SRC |= CC_C;
-    if (res & 1)
-       CC_SRC |= CC_O;
+    if (res) {
+        CC_SRC |= CC_C;
+    }
+    if (res & 1) {
+        CC_SRC |= CC_O;
+    }
 
     return res;
 }
 
-static inline int rffs1(unsigned int val)
+void glue(helper_pcmpestri, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                    uint32_t ctrl)
 {
-    int ret = 1, hi;
+    unsigned int res = pcmpxstrx(env, d, s, ctrl,
+                                 pcmp_elen(env, R_EDX, ctrl),
+                                 pcmp_elen(env, R_EAX, ctrl));
 
-    for (hi = sizeof(val) * 4; hi; hi /= 2)
-        if (val >> hi) {
-            val >>= hi;
-            ret += hi;
-        }
-
-    return ret;
-}
-
-static inline int ffs1(unsigned int val)
-{
-    int ret = 1, hi;
-
-    for (hi = sizeof(val) * 4; hi; hi /= 2)
-        if (val << hi) {
-            val <<= hi;
-            ret += hi;
-        }
-
-    return ret;
-}
-
-void glue(helper_pcmpestri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
-{
-    unsigned int res = pcmpxstrx(d, s, ctrl,
-                    pcmp_elen(R_EDX, ctrl),
-                    pcmp_elen(R_EAX, ctrl));
-
-    if (res)
-        env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1;
-    else
-        env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
-}
-
-void glue(helper_pcmpestrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
-{
-    int i;
-    unsigned int res = pcmpxstrx(d, s, ctrl,
-                    pcmp_elen(R_EDX, ctrl),
-                    pcmp_elen(R_EAX, ctrl));
-
-    if ((ctrl >> 6) & 1) {
-        if (ctrl & 1)
-            for (i = 0; i <= 8; i--, res >>= 1)
-                d->W(i) = (res & 1) ? ~0 : 0;
-        else
-            for (i = 0; i <= 16; i--, res >>= 1)
-                d->B(i) = (res & 1) ? ~0 : 0;
+    if (res) {
+        env->regs[R_ECX] = (ctrl & (1 << 6)) ? 31 - clz32(res) : ctz32(res);
     } else {
-        d->Q(1) = 0;
-        d->Q(0) = res;
+        env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
     }
 }
 
-void glue(helper_pcmpistri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
-{
-    unsigned int res = pcmpxstrx(d, s, ctrl,
-                    pcmp_ilen(s, ctrl),
-                    pcmp_ilen(d, ctrl));
-
-    if (res)
-        env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1;
-    else
-        env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
-}
-
-void glue(helper_pcmpistrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+void glue(helper_pcmpestrm, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                    uint32_t ctrl)
 {
     int i;
-    unsigned int res = pcmpxstrx(d, s, ctrl,
-                    pcmp_ilen(s, ctrl),
-                    pcmp_ilen(d, ctrl));
+    unsigned int res = pcmpxstrx(env, d, s, ctrl,
+                                 pcmp_elen(env, R_EDX, ctrl),
+                                 pcmp_elen(env, R_EAX, ctrl));
 
     if ((ctrl >> 6) & 1) {
-        if (ctrl & 1)
-            for (i = 0; i <= 8; i--, res >>= 1)
-                d->W(i) = (res & 1) ? ~0 : 0;
-        else
-            for (i = 0; i <= 16; i--, res >>= 1)
-                d->B(i) = (res & 1) ? ~0 : 0;
+        if (ctrl & 1) {
+            for (i = 0; i < 8; i++, res >>= 1) {
+                env->xmm_regs[0].W(i) = (res & 1) ? ~0 : 0;
+            }
+        } else {
+            for (i = 0; i < 16; i++, res >>= 1) {
+                env->xmm_regs[0].B(i) = (res & 1) ? ~0 : 0;
+            }
+        }
     } else {
-        d->Q(1) = 0;
-        d->Q(0) = res;
+        env->xmm_regs[0].Q(1) = 0;
+        env->xmm_regs[0].Q(0) = res;
+    }
+}
+
+void glue(helper_pcmpistri, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                    uint32_t ctrl)
+{
+    unsigned int res = pcmpxstrx(env, d, s, ctrl,
+                                 pcmp_ilen(s, ctrl),
+                                 pcmp_ilen(d, ctrl));
+
+    if (res) {
+        env->regs[R_ECX] = (ctrl & (1 << 6)) ? 31 - clz32(res) : ctz32(res);
+    } else {
+        env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
+    }
+}
+
+void glue(helper_pcmpistrm, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                    uint32_t ctrl)
+{
+    int i;
+    unsigned int res = pcmpxstrx(env, d, s, ctrl,
+                                 pcmp_ilen(s, ctrl),
+                                 pcmp_ilen(d, ctrl));
+
+    if ((ctrl >> 6) & 1) {
+        if (ctrl & 1) {
+            for (i = 0; i < 8; i++, res >>= 1) {
+                env->xmm_regs[0].W(i) = (res & 1) ? ~0 : 0;
+            }
+        } else {
+            for (i = 0; i < 16; i++, res >>= 1) {
+                env->xmm_regs[0].B(i) = (res & 1) ? ~0 : 0;
+            }
+        }
+    } else {
+        env->xmm_regs[0].Q(1) = 0;
+        env->xmm_regs[0].Q(0) = res;
     }
 }
 
@@ -1989,17 +2145,18 @@
 target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len)
 {
     target_ulong crc = (msg & ((target_ulong) -1 >>
-                            (TARGET_LONG_BITS - len))) ^ crc1;
+                               (TARGET_LONG_BITS - len))) ^ crc1;
 
-    while (len--)
+    while (len--) {
         crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_BITREV : 0);
+    }
 
     return crc;
 }
 
 #define POPMASK(i)     ((target_ulong) -1 / ((1LL << (1 << i)) + 1))
-#define POPCOUNT(n, i) (n & POPMASK(i)) + ((n >> (1 << i)) & POPMASK(i))
-target_ulong helper_popcnt(target_ulong n, uint32_t type)
+#define POPCOUNT(n, i) ((n & POPMASK(i)) + ((n >> (1 << i)) & POPMASK(i)))
+target_ulong helper_popcnt(CPUX86State *env, target_ulong n, uint32_t type)
 {
     CC_SRC = n ? 0 : CC_Z;
 
@@ -2007,19 +2164,132 @@
     n = POPCOUNT(n, 1);
     n = POPCOUNT(n, 2);
     n = POPCOUNT(n, 3);
-    if (type == 1)
+    if (type == 1) {
         return n & 0xff;
+    }
 
     n = POPCOUNT(n, 4);
 #ifndef TARGET_X86_64
     return n;
 #else
-    if (type == 2)
+    if (type == 2) {
         return n & 0xff;
+    }
 
     return POPCOUNT(n, 5);
 #endif
 }
+
+void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                    uint32_t ctrl)
+{
+    uint64_t ah, al, b, resh, resl;
+
+    ah = 0;
+    al = d->Q((ctrl & 1) != 0);
+    b = s->Q((ctrl & 16) != 0);
+    resh = resl = 0;
+
+    while (b) {
+        if (b & 1) {
+            resl ^= al;
+            resh ^= ah;
+        }
+        ah = (ah << 1) | (al >> 63);
+        al <<= 1;
+        b >>= 1;
+    }
+
+    d->Q(0) = resl;
+    d->Q(1) = resh;
+}
+
+/* AES-NI op helpers */
+static const uint8_t aes_shifts[16] = {
+    0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
+};
+
+static const uint8_t aes_ishifts[16] = {
+    0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
+};
+
+void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    int i;
+    Reg st = *d;
+    Reg rk = *s;
+
+    for (i = 0 ; i < 4 ; i++) {
+        d->L(i) = rk.L(i) ^ bswap32(AES_Td0[st.B(aes_ishifts[4*i+0])] ^
+                                    AES_Td1[st.B(aes_ishifts[4*i+1])] ^
+                                    AES_Td2[st.B(aes_ishifts[4*i+2])] ^
+                                    AES_Td3[st.B(aes_ishifts[4*i+3])]);
+    }
+}
+
+void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    int i;
+    Reg st = *d;
+    Reg rk = *s;
+
+    for (i = 0; i < 16; i++) {
+        d->B(i) = rk.B(i) ^ (AES_Td4[st.B(aes_ishifts[i])] & 0xff);
+    }
+}
+
+void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    int i;
+    Reg st = *d;
+    Reg rk = *s;
+
+    for (i = 0 ; i < 4 ; i++) {
+        d->L(i) = rk.L(i) ^ bswap32(AES_Te0[st.B(aes_shifts[4*i+0])] ^
+                                    AES_Te1[st.B(aes_shifts[4*i+1])] ^
+                                    AES_Te2[st.B(aes_shifts[4*i+2])] ^
+                                    AES_Te3[st.B(aes_shifts[4*i+3])]);
+    }
+}
+
+void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    int i;
+    Reg st = *d;
+    Reg rk = *s;
+
+    for (i = 0; i < 16; i++) {
+        d->B(i) = rk.B(i) ^ (AES_Te4[st.B(aes_shifts[i])] & 0xff);
+    }
+
+}
+
+void glue(helper_aesimc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    int i;
+    Reg tmp = *s;
+
+    for (i = 0 ; i < 4 ; i++) {
+        d->L(i) = bswap32(AES_Td0[AES_Te4[tmp.B(4*i+0)] & 0xff] ^
+                          AES_Td1[AES_Te4[tmp.B(4*i+1)] & 0xff] ^
+                          AES_Td2[AES_Te4[tmp.B(4*i+2)] & 0xff] ^
+                          AES_Td3[AES_Te4[tmp.B(4*i+3)] & 0xff]);
+    }
+}
+
+void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                          uint32_t ctrl)
+{
+    int i;
+    Reg tmp = *s;
+
+    for (i = 0 ; i < 4 ; i++) {
+        d->B(i) = AES_Te4[tmp.B(i + 4)] & 0xff;
+        d->B(i + 8) = AES_Te4[tmp.B(i + 12)] & 0xff;
+    }
+    d->L(1) = (d->L(0) << 24 | d->L(0) >> 8) ^ ctrl;
+    d->L(3) = (d->L(2) << 24 | d->L(2) >> 8) ^ ctrl;
+}
 #endif
 
 #undef SHIFT
diff --git a/target-i386/ops_sse_header.h b/target-i386/ops_sse_header.h
index 20d47bb..a68c7cc 100644
--- a/target-i386/ops_sse_header.h
+++ b/target-i386/ops_sse_header.h
@@ -14,8 +14,7 @@
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #if SHIFT == 0
 #define Reg MMXReg
@@ -35,31 +34,31 @@
 #define dh_is_signed_XMMReg dh_is_signed_ptr
 #define dh_is_signed_MMXReg dh_is_signed_ptr
 
-DEF_HELPER_2(glue(psrlw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(psraw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(psllw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(psrld, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(psrad, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pslld, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(psrlq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(psllq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(psrlw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(psraw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(psllw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(psrld, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(psrad, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pslld, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(psrlq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(psllq, SUFFIX), void, env, Reg, Reg)
 
 #if SHIFT == 1
-DEF_HELPER_2(glue(psrldq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pslldq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(psrldq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 #endif
 
 #define SSE_HELPER_B(name, F)\
-    DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+    DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
 #define SSE_HELPER_W(name, F)\
-    DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+    DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
 #define SSE_HELPER_L(name, F)\
-    DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+    DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
 #define SSE_HELPER_Q(name, F)\
-    DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+    DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
 SSE_HELPER_B(paddb, FADD)
 SSE_HELPER_W(paddw, FADD)
@@ -110,11 +109,11 @@
 SSE_HELPER_B(pavgb, FAVG)
 SSE_HELPER_W(pavgw, FAVG)
 
-DEF_HELPER_2(glue(pmuludq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmaddwd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(pmuludq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmaddwd, SUFFIX), void, env, Reg, Reg)
 
-DEF_HELPER_2(glue(psadbw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_3(glue(maskmov, SUFFIX), void, Reg, Reg, tl)
+DEF_HELPER_3(glue(psadbw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(maskmov, SUFFIX), void, env, Reg, Reg, tl)
 DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32)
 #ifdef TARGET_X86_64
 DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64)
@@ -134,11 +133,11 @@
 /* FPU ops */
 /* XXX: not accurate */
 
-#define SSE_HELPER_S(name, F)\
-    DEF_HELPER_2(name ## ps , void, Reg, Reg)        \
-    DEF_HELPER_2(name ## ss , void, Reg, Reg)        \
-    DEF_HELPER_2(name ## pd , void, Reg, Reg)        \
-    DEF_HELPER_2(name ## sd , void, Reg, Reg)
+#define SSE_HELPER_S(name, F)                            \
+    DEF_HELPER_3(name ## ps, void, env, Reg, Reg)        \
+    DEF_HELPER_3(name ## ss, void, env, Reg, Reg)        \
+    DEF_HELPER_3(name ## pd, void, env, Reg, Reg)        \
+    DEF_HELPER_3(name ## sd, void, env, Reg, Reg)
 
 SSE_HELPER_S(add, FPU_ADD)
 SSE_HELPER_S(sub, FPU_SUB)
@@ -149,60 +148,64 @@
 SSE_HELPER_S(sqrt, FPU_SQRT)
 
 
-DEF_HELPER_2(cvtps2pd, void, Reg, Reg)
-DEF_HELPER_2(cvtpd2ps, void, Reg, Reg)
-DEF_HELPER_2(cvtss2sd, void, Reg, Reg)
-DEF_HELPER_2(cvtsd2ss, void, Reg, Reg)
-DEF_HELPER_2(cvtdq2ps, void, Reg, Reg)
-DEF_HELPER_2(cvtdq2pd, void, Reg, Reg)
-DEF_HELPER_2(cvtpi2ps, void, XMMReg, MMXReg)
-DEF_HELPER_2(cvtpi2pd, void, XMMReg, MMXReg)
-DEF_HELPER_2(cvtsi2ss, void, XMMReg, i32)
-DEF_HELPER_2(cvtsi2sd, void, XMMReg, i32)
+DEF_HELPER_3(cvtps2pd, void, env, Reg, Reg)
+DEF_HELPER_3(cvtpd2ps, void, env, Reg, Reg)
+DEF_HELPER_3(cvtss2sd, void, env, Reg, Reg)
+DEF_HELPER_3(cvtsd2ss, void, env, Reg, Reg)
+DEF_HELPER_3(cvtdq2ps, void, env, Reg, Reg)
+DEF_HELPER_3(cvtdq2pd, void, env, Reg, Reg)
+DEF_HELPER_3(cvtpi2ps, void, env, XMMReg, MMXReg)
+DEF_HELPER_3(cvtpi2pd, void, env, XMMReg, MMXReg)
+DEF_HELPER_3(cvtsi2ss, void, env, XMMReg, i32)
+DEF_HELPER_3(cvtsi2sd, void, env, XMMReg, i32)
 
 #ifdef TARGET_X86_64
-DEF_HELPER_2(cvtsq2ss, void, XMMReg, i64)
-DEF_HELPER_2(cvtsq2sd, void, XMMReg, i64)
+DEF_HELPER_3(cvtsq2ss, void, env, XMMReg, i64)
+DEF_HELPER_3(cvtsq2sd, void, env, XMMReg, i64)
 #endif
 
-DEF_HELPER_2(cvtps2dq, void, XMMReg, XMMReg)
-DEF_HELPER_2(cvtpd2dq, void, XMMReg, XMMReg)
-DEF_HELPER_2(cvtps2pi, void, MMXReg, XMMReg)
-DEF_HELPER_2(cvtpd2pi, void, MMXReg, XMMReg)
-DEF_HELPER_1(cvtss2si, s32, XMMReg)
-DEF_HELPER_1(cvtsd2si, s32, XMMReg)
+DEF_HELPER_3(cvtps2dq, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(cvtpd2dq, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(cvtps2pi, void, env, MMXReg, XMMReg)
+DEF_HELPER_3(cvtpd2pi, void, env, MMXReg, XMMReg)
+DEF_HELPER_2(cvtss2si, s32, env, XMMReg)
+DEF_HELPER_2(cvtsd2si, s32, env, XMMReg)
 #ifdef TARGET_X86_64
-DEF_HELPER_1(cvtss2sq, s64, XMMReg)
-DEF_HELPER_1(cvtsd2sq, s64, XMMReg)
+DEF_HELPER_2(cvtss2sq, s64, env, XMMReg)
+DEF_HELPER_2(cvtsd2sq, s64, env, XMMReg)
 #endif
 
-DEF_HELPER_2(cvttps2dq, void, XMMReg, XMMReg)
-DEF_HELPER_2(cvttpd2dq, void, XMMReg, XMMReg)
-DEF_HELPER_2(cvttps2pi, void, MMXReg, XMMReg)
-DEF_HELPER_2(cvttpd2pi, void, MMXReg, XMMReg)
-DEF_HELPER_1(cvttss2si, s32, XMMReg)
-DEF_HELPER_1(cvttsd2si, s32, XMMReg)
+DEF_HELPER_3(cvttps2dq, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(cvttpd2dq, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(cvttps2pi, void, env, MMXReg, XMMReg)
+DEF_HELPER_3(cvttpd2pi, void, env, MMXReg, XMMReg)
+DEF_HELPER_2(cvttss2si, s32, env, XMMReg)
+DEF_HELPER_2(cvttsd2si, s32, env, XMMReg)
 #ifdef TARGET_X86_64
-DEF_HELPER_1(cvttss2sq, s64, XMMReg)
-DEF_HELPER_1(cvttsd2sq, s64, XMMReg)
+DEF_HELPER_2(cvttss2sq, s64, env, XMMReg)
+DEF_HELPER_2(cvttsd2sq, s64, env, XMMReg)
 #endif
 
-DEF_HELPER_2(rsqrtps, void, XMMReg, XMMReg)
-DEF_HELPER_2(rsqrtss, void, XMMReg, XMMReg)
-DEF_HELPER_2(rcpps, void, XMMReg, XMMReg)
-DEF_HELPER_2(rcpss, void, XMMReg, XMMReg)
-DEF_HELPER_2(haddps, void, XMMReg, XMMReg)
-DEF_HELPER_2(haddpd, void, XMMReg, XMMReg)
-DEF_HELPER_2(hsubps, void, XMMReg, XMMReg)
-DEF_HELPER_2(hsubpd, void, XMMReg, XMMReg)
-DEF_HELPER_2(addsubps, void, XMMReg, XMMReg)
-DEF_HELPER_2(addsubpd, void, XMMReg, XMMReg)
+DEF_HELPER_3(rsqrtps, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(rsqrtss, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(rcpps, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(rcpss, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(extrq_r, void, env, XMMReg, XMMReg)
+DEF_HELPER_4(extrq_i, void, env, XMMReg, int, int)
+DEF_HELPER_3(insertq_r, void, env, XMMReg, XMMReg)
+DEF_HELPER_4(insertq_i, void, env, XMMReg, int, int)
+DEF_HELPER_3(haddps, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(haddpd, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(hsubps, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(hsubpd, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(addsubps, void, env, XMMReg, XMMReg)
+DEF_HELPER_3(addsubpd, void, env, XMMReg, XMMReg)
 
-#define SSE_HELPER_CMP(name, F)\
-    DEF_HELPER_2( name ## ps , void, Reg, Reg)        \
-    DEF_HELPER_2( name ## ss , void, Reg, Reg)        \
-    DEF_HELPER_2( name ## pd , void, Reg, Reg)        \
-    DEF_HELPER_2( name ## sd , void, Reg, Reg)
+#define SSE_HELPER_CMP(name, F)                           \
+    DEF_HELPER_3(name ## ps, void, env, Reg, Reg)         \
+    DEF_HELPER_3(name ## ss, void, env, Reg, Reg)         \
+    DEF_HELPER_3(name ## pd, void, env, Reg, Reg)         \
+    DEF_HELPER_3(name ## sd, void, env, Reg, Reg)
 
 SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
 SSE_HELPER_CMP(cmplt, FPU_CMPLT)
@@ -213,124 +216,135 @@
 SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
 SSE_HELPER_CMP(cmpord, FPU_CMPORD)
 
-DEF_HELPER_2(ucomiss, void, Reg, Reg)
-DEF_HELPER_2(comiss, void, Reg, Reg)
-DEF_HELPER_2(ucomisd, void, Reg, Reg)
-DEF_HELPER_2(comisd, void, Reg, Reg)
-DEF_HELPER_1(movmskps, i32, Reg)
-DEF_HELPER_1(movmskpd, i32, Reg)
+DEF_HELPER_3(ucomiss, void, env, Reg, Reg)
+DEF_HELPER_3(comiss, void, env, Reg, Reg)
+DEF_HELPER_3(ucomisd, void, env, Reg, Reg)
+DEF_HELPER_3(comisd, void, env, Reg, Reg)
+DEF_HELPER_2(movmskps, i32, env, Reg)
+DEF_HELPER_2(movmskpd, i32, env, Reg)
 #endif
 
-DEF_HELPER_1(glue(pmovmskb, SUFFIX), i32, Reg)
-DEF_HELPER_2(glue(packsswb, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(packuswb, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(packssdw, SUFFIX), void, Reg, Reg)
-#define UNPCK_OP(base_name, base)                               \
-    DEF_HELPER_2(glue(punpck ## base_name ## bw, SUFFIX) , void, Reg, Reg) \
-    DEF_HELPER_2(glue(punpck ## base_name ## wd, SUFFIX) , void, Reg, Reg) \
-    DEF_HELPER_2(glue(punpck ## base_name ## dq, SUFFIX) , void, Reg, Reg)
+DEF_HELPER_2(glue(pmovmskb, SUFFIX), i32, env, Reg)
+DEF_HELPER_3(glue(packsswb, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(packuswb, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(packssdw, SUFFIX), void, env, Reg, Reg)
+#define UNPCK_OP(base_name, base)                                       \
+    DEF_HELPER_3(glue(punpck ## base_name ## bw, SUFFIX), void, env, Reg, Reg) \
+    DEF_HELPER_3(glue(punpck ## base_name ## wd, SUFFIX), void, env, Reg, Reg) \
+    DEF_HELPER_3(glue(punpck ## base_name ## dq, SUFFIX), void, env, Reg, Reg)
 
 UNPCK_OP(l, 0)
 UNPCK_OP(h, 1)
 
 #if SHIFT == 1
-DEF_HELPER_2(glue(punpcklqdq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(punpckhqdq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(punpcklqdq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(punpckhqdq, SUFFIX), void, env, Reg, Reg)
 #endif
 
 /* 3DNow! float ops */
 #if SHIFT == 0
-DEF_HELPER_2(pi2fd, void, MMXReg, MMXReg)
-DEF_HELPER_2(pi2fw, void, MMXReg, MMXReg)
-DEF_HELPER_2(pf2id, void, MMXReg, MMXReg)
-DEF_HELPER_2(pf2iw, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfacc, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfadd, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfcmpeq, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfcmpge, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfcmpgt, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfmax, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfmin, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfmul, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfnacc, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfpnacc, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfrcp, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfrsqrt, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfsub, void, MMXReg, MMXReg)
-DEF_HELPER_2(pfsubr, void, MMXReg, MMXReg)
-DEF_HELPER_2(pswapd, void, MMXReg, MMXReg)
+DEF_HELPER_3(pi2fd, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pi2fw, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pf2id, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pf2iw, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfacc, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfadd, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfcmpeq, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfcmpge, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfcmpgt, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfmax, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfmin, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfmul, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfnacc, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfpnacc, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfrcp, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfrsqrt, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfsub, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pfsubr, void, env, MMXReg, MMXReg)
+DEF_HELPER_3(pswapd, void, env, MMXReg, MMXReg)
 #endif
 
 /* SSSE3 op helpers */
-DEF_HELPER_2(glue(phaddw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(phaddd, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(phaddsw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(phsubw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(phsubd, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(phsubsw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pabsb, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pabsw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pabsd, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmaddubsw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmulhrsw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pshufb, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(psignb, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(psignw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(psignd, SUFFIX), void, Reg, Reg)
-DEF_HELPER_3(glue(palignr, SUFFIX), void, Reg, Reg, s32)
+DEF_HELPER_3(glue(phaddw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(phaddd, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(phaddsw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(phsubw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(phsubd, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(phsubsw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pabsb, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pabsw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pabsd, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmaddubsw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmulhrsw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pshufb, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(psignb, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(psignw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(psignd, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(palignr, SUFFIX), void, env, Reg, Reg, s32)
 
 /* SSE4.1 op helpers */
 #if SHIFT == 1
-DEF_HELPER_2(glue(pblendvb, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(blendvps, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(blendvpd, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(ptest, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmovsxbw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmovsxbd, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmovsxbq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmovsxwd, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmovsxwq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmovsxdq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmovzxbw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmovzxbd, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmovzxbq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmovzxwd, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmovzxwq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmovzxdq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmuldq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pcmpeqq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(packusdw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pminsb, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pminsd, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pminuw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pminud, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmaxsb, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmaxsd, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmaxuw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmaxud, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(pmulld, SUFFIX), void, Reg, Reg)
-DEF_HELPER_2(glue(phminposuw, SUFFIX), void, Reg, Reg)
-DEF_HELPER_3(glue(roundps, SUFFIX), void, Reg, Reg, i32)
-DEF_HELPER_3(glue(roundpd, SUFFIX), void, Reg, Reg, i32)
-DEF_HELPER_3(glue(roundss, SUFFIX), void, Reg, Reg, i32)
-DEF_HELPER_3(glue(roundsd, SUFFIX), void, Reg, Reg, i32)
-DEF_HELPER_3(glue(blendps, SUFFIX), void, Reg, Reg, i32)
-DEF_HELPER_3(glue(blendpd, SUFFIX), void, Reg, Reg, i32)
-DEF_HELPER_3(glue(pblendw, SUFFIX), void, Reg, Reg, i32)
-DEF_HELPER_3(glue(dpps, SUFFIX), void, Reg, Reg, i32)
-DEF_HELPER_3(glue(dppd, SUFFIX), void, Reg, Reg, i32)
-DEF_HELPER_3(glue(mpsadbw, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pblendvb, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(blendvps, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(blendvpd, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(ptest, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmovsxbw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmovsxbd, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmovsxbq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmovsxwd, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmovsxwq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmovsxdq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmovzxbw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmovzxbd, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmovzxbq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmovzxwd, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmovzxwq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmovzxdq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmuldq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pcmpeqq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(packusdw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pminsb, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pminsd, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pminuw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pminud, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmaxsb, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmaxsd, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmaxuw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmaxud, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pmulld, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(phminposuw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(roundps, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(roundpd, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(roundss, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(roundsd, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(blendps, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(blendpd, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(pblendw, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(dpps, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(dppd, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(mpsadbw, SUFFIX), void, env, Reg, Reg, i32)
 #endif
 
 /* SSE4.2 op helpers */
 #if SHIFT == 1
-DEF_HELPER_2(glue(pcmpgtq, SUFFIX), void, Reg, Reg)
-DEF_HELPER_3(glue(pcmpestri, SUFFIX), void, Reg, Reg, i32)
-DEF_HELPER_3(glue(pcmpestrm, SUFFIX), void, Reg, Reg, i32)
-DEF_HELPER_3(glue(pcmpistri, SUFFIX), void, Reg, Reg, i32)
-DEF_HELPER_3(glue(pcmpistrm, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pcmpgtq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(pcmpestri, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(pcmpestrm, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(pcmpistri, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(pcmpistrm, SUFFIX), void, env, Reg, Reg, i32)
 DEF_HELPER_3(crc32, tl, i32, tl, i32)
-DEF_HELPER_2(popcnt, tl, tl, i32)
+DEF_HELPER_3(popcnt, tl, env, tl, i32)
+#endif
+
+/* AES-NI op helpers */
+#if SHIFT == 1
+DEF_HELPER_3(glue(aesdec, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesdeclast, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesenc, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesenclast, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesimc, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, i32)
 #endif
 
 #undef SHIFT
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
new file mode 100644
index 0000000..70ca829
--- /dev/null
+++ b/target-i386/seg_helper.c
@@ -0,0 +1,2233 @@
+/*
+ *  x86 segmentation related helpers:
+ *  TSS, interrupts, system calls, jumps and call/task gates, descriptors
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "qemu/log.h"
+#include "helper.h"
+
+#if !defined(CONFIG_USER_ONLY)
+#include "exec/softmmu_exec.h"
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+//#define DEBUG_PCALL
+
+
+#ifdef DEBUG_PCALL
+#  define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__)
+#  define LOG_PCALL_STATE(env) \
+          log_cpu_state_mask(CPU_LOG_PCALL, (env), X86_DUMP_CCOP)
+#else
+#  define LOG_PCALL(...) do { } while (0)
+#  define LOG_PCALL_STATE(env) do { } while (0)
+#endif
+
+/* return non zero if error */
+static inline int load_segment(CPUX86State *env,
+                               uint32_t *e1_ptr, uint32_t *e2_ptr,
+                               int selector)
+{
+    SegmentCache *dt;
+    int index;
+    target_ulong ptr;
+
+    if (selector & 0x4)
+        dt = &env->ldt;
+    else
+        dt = &env->gdt;
+    index = selector & ~7;
+    if ((index + 7) > dt->limit)
+        return -1;
+    ptr = dt->base + index;
+    *e1_ptr = cpu_ldl_kernel(env, ptr);
+    *e2_ptr = cpu_ldl_kernel(env, ptr + 4);
+    return 0;
+}
+
+static inline unsigned int get_seg_limit(uint32_t e1, uint32_t e2)
+{
+    unsigned int limit;
+    limit = (e1 & 0xffff) | (e2 & 0x000f0000);
+    if (e2 & DESC_G_MASK)
+        limit = (limit << 12) | 0xfff;
+    return limit;
+}
+
+static inline uint32_t get_seg_base(uint32_t e1, uint32_t e2)
+{
+    return ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000));
+}
+
+static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1, uint32_t e2)
+{
+    sc->base = get_seg_base(e1, e2);
+    sc->limit = get_seg_limit(e1, e2);
+    sc->flags = e2;
+}
+
+/* init the segment cache in vm86 mode. */
+static inline void load_seg_vm(CPUX86State *env, int seg, int selector)
+{
+    selector &= 0xffff;
+    cpu_x86_load_seg_cache(env, seg, selector,
+                           (selector << 4), 0xffff, 0);
+}
+
+static inline void get_ss_esp_from_tss(CPUX86State *env,
+                                       uint32_t *ss_ptr,
+                                       uint32_t *esp_ptr, int dpl)
+{
+    int type, index, shift;
+
+#if 0
+    {
+        int i;
+        printf("TR: base=%p limit=%x\n", env->tr.base, env->tr.limit);
+        for(i=0;i<env->tr.limit;i++) {
+            printf("%02x ", env->tr.base[i]);
+            if ((i & 7) == 7) printf("\n");
+        }
+        printf("\n");
+    }
+#endif
+
+    if (!(env->tr.flags & DESC_P_MASK))
+        cpu_abort(env, "invalid tss");
+    type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+    if ((type & 7) != 1)
+        cpu_abort(env, "invalid tss type");
+    shift = type >> 3;
+    index = (dpl * 4 + 2) << shift;
+    if (index + (4 << shift) - 1 > env->tr.limit)
+        raise_exception_err(env, EXCP0A_TSS, env->tr.selector & 0xfffc);
+    if (shift == 0) {
+        *esp_ptr = cpu_lduw_kernel(env, env->tr.base + index);
+        *ss_ptr = cpu_lduw_kernel(env, env->tr.base + index + 2);
+    } else {
+        *esp_ptr = cpu_ldl_kernel(env, env->tr.base + index);
+        *ss_ptr = cpu_lduw_kernel(env, env->tr.base + index + 4);
+    }
+}
+
+/* XXX: merge with load_seg() */
+static void tss_load_seg(CPUX86State *env, int seg_reg, int selector)
+{
+    uint32_t e1, e2;
+    int rpl, dpl, cpl;
+
+    if ((selector & 0xfffc) != 0) {
+        if (load_segment(env, &e1, &e2, selector) != 0)
+            raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
+        if (!(e2 & DESC_S_MASK))
+            raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
+        rpl = selector & 3;
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        cpl = env->hflags & HF_CPL_MASK;
+        if (seg_reg == R_CS) {
+            if (!(e2 & DESC_CS_MASK))
+                raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
+            /* XXX: is it correct ? */
+            if (dpl != rpl)
+                raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
+            if ((e2 & DESC_C_MASK) && dpl > rpl)
+                raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
+        } else if (seg_reg == R_SS) {
+            /* SS must be writable data */
+            if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK))
+                raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
+            if (dpl != cpl || dpl != rpl)
+                raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
+        } else {
+            /* not readable code */
+            if ((e2 & DESC_CS_MASK) && !(e2 & DESC_R_MASK))
+                raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
+            /* if data or non conforming code, checks the rights */
+            if (((e2 >> DESC_TYPE_SHIFT) & 0xf) < 12) {
+                if (dpl < cpl || dpl < rpl)
+                    raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
+            }
+        }
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(env, EXCP0B_NOSEG, selector & 0xfffc);
+        cpu_x86_load_seg_cache(env, seg_reg, selector,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+    } else {
+        if (seg_reg == R_SS || seg_reg == R_CS)
+            raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
+    }
+}
+
+#define SWITCH_TSS_JMP  0
+#define SWITCH_TSS_IRET 1
+#define SWITCH_TSS_CALL 2
+
+/* XXX: restore CPU state in registers (PowerPC case) */
+static void switch_tss(CPUX86State *env,
+                       int tss_selector,
+                       uint32_t e1, uint32_t e2, int source,
+                       uint32_t next_eip)
+{
+    int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, v1, v2, i;
+    target_ulong tss_base;
+    uint32_t new_regs[8], new_segs[6];
+    uint32_t new_eflags, new_eip, new_cr3, new_ldt;
+    uint32_t old_eflags, eflags_mask;
+    SegmentCache *dt;
+    int index;
+    target_ulong ptr;
+
+    type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+    LOG_PCALL("switch_tss: sel=0x%04x type=%d src=%d\n", tss_selector, type, source);
+
+    /* if task gate, we read the TSS segment and we load it */
+    if (type == 5) {
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(env, EXCP0B_NOSEG, tss_selector & 0xfffc);
+        tss_selector = e1 >> 16;
+        if (tss_selector & 4)
+            raise_exception_err(env, EXCP0A_TSS, tss_selector & 0xfffc);
+        if (load_segment(env, &e1, &e2, tss_selector) != 0)
+            raise_exception_err(env, EXCP0D_GPF, tss_selector & 0xfffc);
+        if (e2 & DESC_S_MASK)
+            raise_exception_err(env, EXCP0D_GPF, tss_selector & 0xfffc);
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        if ((type & 7) != 1)
+            raise_exception_err(env, EXCP0D_GPF, tss_selector & 0xfffc);
+    }
+
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(env, EXCP0B_NOSEG, tss_selector & 0xfffc);
+
+    if (type & 8)
+        tss_limit_max = 103;
+    else
+        tss_limit_max = 43;
+    tss_limit = get_seg_limit(e1, e2);
+    tss_base = get_seg_base(e1, e2);
+    if ((tss_selector & 4) != 0 ||
+        tss_limit < tss_limit_max)
+        raise_exception_err(env, EXCP0A_TSS, tss_selector & 0xfffc);
+    old_type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+    if (old_type & 8)
+        old_tss_limit_max = 103;
+    else
+        old_tss_limit_max = 43;
+
+    /* read all the registers from the new TSS */
+    if (type & 8) {
+        /* 32 bit */
+        new_cr3 = cpu_ldl_kernel(env, tss_base + 0x1c);
+        new_eip = cpu_ldl_kernel(env, tss_base + 0x20);
+        new_eflags = cpu_ldl_kernel(env, tss_base + 0x24);
+        for(i = 0; i < 8; i++)
+            new_regs[i] = cpu_ldl_kernel(env, tss_base + (0x28 + i * 4));
+        for(i = 0; i < 6; i++)
+            new_segs[i] = cpu_lduw_kernel(env, tss_base + (0x48 + i * 4));
+        new_ldt = cpu_lduw_kernel(env, tss_base + 0x60);
+        cpu_ldl_kernel(env, tss_base + 0x64);
+    } else {
+        /* 16 bit */
+        new_cr3 = 0;
+        new_eip = cpu_lduw_kernel(env, tss_base + 0x0e);
+        new_eflags = cpu_lduw_kernel(env, tss_base + 0x10);
+        for(i = 0; i < 8; i++)
+            new_regs[i] = cpu_lduw_kernel(env, tss_base + (0x12 + i * 2)) | 0xffff0000;
+        for(i = 0; i < 4; i++)
+            new_segs[i] = cpu_lduw_kernel(env, tss_base + (0x22 + i * 4));
+        new_ldt = cpu_lduw_kernel(env, tss_base + 0x2a);
+        new_segs[R_FS] = 0;
+        new_segs[R_GS] = 0;
+    }
+
+    /* NOTE: we must avoid memory exceptions during the task switch,
+       so we make dummy accesses before */
+    /* XXX: it can still fail in some cases, so a bigger hack is
+       necessary to valid the TLB after having done the accesses */
+
+    v1 = cpu_ldub_kernel(env, env->tr.base);
+    v2 = cpu_ldub_kernel(env, env->tr.base + old_tss_limit_max);
+    cpu_stb_kernel(env, env->tr.base, v1);
+    cpu_stb_kernel(env, env->tr.base + old_tss_limit_max, v2);
+
+    /* clear busy bit (it is restartable) */
+    if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) {
+        target_ulong ptr;
+        uint32_t e2;
+        ptr = env->gdt.base + (env->tr.selector & ~7);
+        e2 = cpu_ldl_kernel(env, ptr + 4);
+        e2 &= ~DESC_TSS_BUSY_MASK;
+        cpu_stl_kernel(env, ptr + 4, e2);
+    }
+    old_eflags = cpu_compute_eflags(env);
+    if (source == SWITCH_TSS_IRET)
+        old_eflags &= ~NT_MASK;
+
+    /* save the current state in the old TSS */
+    if (type & 8) {
+        /* 32 bit */
+        cpu_stl_kernel(env, env->tr.base + 0x20, next_eip);
+        cpu_stl_kernel(env, env->tr.base + 0x24, old_eflags);
+        cpu_stl_kernel(env, env->tr.base + (0x28 + 0 * 4), EAX);
+        cpu_stl_kernel(env, env->tr.base + (0x28 + 1 * 4), ECX);
+        cpu_stl_kernel(env, env->tr.base + (0x28 + 2 * 4), EDX);
+        cpu_stl_kernel(env, env->tr.base + (0x28 + 3 * 4), EBX);
+        cpu_stl_kernel(env, env->tr.base + (0x28 + 4 * 4), ESP);
+        cpu_stl_kernel(env, env->tr.base + (0x28 + 5 * 4), EBP);
+        cpu_stl_kernel(env, env->tr.base + (0x28 + 6 * 4), ESI);
+        cpu_stl_kernel(env, env->tr.base + (0x28 + 7 * 4), EDI);
+        for(i = 0; i < 6; i++)
+            cpu_stw_kernel(env, env->tr.base + (0x48 + i * 4), env->segs[i].selector);
+    } else {
+        /* 16 bit */
+        cpu_stw_kernel(env, env->tr.base + 0x0e, next_eip);
+        cpu_stw_kernel(env, env->tr.base + 0x10, old_eflags);
+        cpu_stw_kernel(env, env->tr.base + (0x12 + 0 * 2), EAX);
+        cpu_stw_kernel(env, env->tr.base + (0x12 + 1 * 2), ECX);
+        cpu_stw_kernel(env, env->tr.base + (0x12 + 2 * 2), EDX);
+        cpu_stw_kernel(env, env->tr.base + (0x12 + 3 * 2), EBX);
+        cpu_stw_kernel(env, env->tr.base + (0x12 + 4 * 2), ESP);
+        cpu_stw_kernel(env, env->tr.base + (0x12 + 5 * 2), EBP);
+        cpu_stw_kernel(env, env->tr.base + (0x12 + 6 * 2), ESI);
+        cpu_stw_kernel(env, env->tr.base + (0x12 + 7 * 2), EDI);
+        for(i = 0; i < 4; i++)
+            cpu_stw_kernel(env, env->tr.base + (0x22 + i * 4), env->segs[i].selector);
+    }
+
+    /* now if an exception occurs, it will occurs in the next task
+       context */
+
+    if (source == SWITCH_TSS_CALL) {
+        cpu_stw_kernel(env, tss_base, env->tr.selector);
+        new_eflags |= NT_MASK;
+    }
+
+    /* set busy bit */
+    if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_CALL) {
+        target_ulong ptr;
+        uint32_t e2;
+        ptr = env->gdt.base + (tss_selector & ~7);
+        e2 = cpu_ldl_kernel(env, ptr + 4);
+        e2 |= DESC_TSS_BUSY_MASK;
+        cpu_stl_kernel(env, ptr + 4, e2);
+    }
+
+    /* set the new CPU state */
+    /* from this point, any exception which occurs can give problems */
+    env->cr[0] |= CR0_TS_MASK;
+    env->hflags |= HF_TS_MASK;
+    env->tr.selector = tss_selector;
+    env->tr.base = tss_base;
+    env->tr.limit = tss_limit;
+    env->tr.flags = e2 & ~DESC_TSS_BUSY_MASK;
+
+    if ((type & 8) && (env->cr[0] & CR0_PG_MASK)) {
+        cpu_x86_update_cr3(env, new_cr3);
+    }
+
+    /* load all registers without an exception, then reload them with
+       possible exception */
+    env->eip = new_eip;
+    eflags_mask = TF_MASK | AC_MASK | ID_MASK |
+        IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK;
+    if (!(type & 8))
+        eflags_mask &= 0xffff;
+    cpu_load_eflags(env, new_eflags, eflags_mask);
+    /* XXX: what to do in 16 bit case ? */
+    EAX = new_regs[0];
+    ECX = new_regs[1];
+    EDX = new_regs[2];
+    EBX = new_regs[3];
+    ESP = new_regs[4];
+    EBP = new_regs[5];
+    ESI = new_regs[6];
+    EDI = new_regs[7];
+    if (new_eflags & VM_MASK) {
+        for(i = 0; i < 6; i++)
+            load_seg_vm(env, i, new_segs[i]);
+        /* in vm86, CPL is always 3 */
+        cpu_x86_set_cpl(env, 3);
+    } else {
+        /* CPL is set the RPL of CS */
+        cpu_x86_set_cpl(env, new_segs[R_CS] & 3);
+        /* first just selectors as the rest may trigger exceptions */
+        for(i = 0; i < 6; i++)
+            cpu_x86_load_seg_cache(env, i, new_segs[i], 0, 0, 0);
+    }
+
+    env->ldt.selector = new_ldt & ~4;
+    env->ldt.base = 0;
+    env->ldt.limit = 0;
+    env->ldt.flags = 0;
+
+    /* load the LDT */
+    if (new_ldt & 4)
+        raise_exception_err(env, EXCP0A_TSS, new_ldt & 0xfffc);
+
+    if ((new_ldt & 0xfffc) != 0) {
+        dt = &env->gdt;
+        index = new_ldt & ~7;
+        if ((index + 7) > dt->limit)
+            raise_exception_err(env, EXCP0A_TSS, new_ldt & 0xfffc);
+        ptr = dt->base + index;
+        e1 = cpu_ldl_kernel(env, ptr);
+        e2 = cpu_ldl_kernel(env, ptr + 4);
+        if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
+            raise_exception_err(env, EXCP0A_TSS, new_ldt & 0xfffc);
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(env, EXCP0A_TSS, new_ldt & 0xfffc);
+        load_seg_cache_raw_dt(&env->ldt, e1, e2);
+    }
+
+    /* load the segments */
+    if (!(new_eflags & VM_MASK)) {
+        tss_load_seg(env, R_CS, new_segs[R_CS]);
+        tss_load_seg(env, R_SS, new_segs[R_SS]);
+        tss_load_seg(env, R_ES, new_segs[R_ES]);
+        tss_load_seg(env, R_DS, new_segs[R_DS]);
+        tss_load_seg(env, R_FS, new_segs[R_FS]);
+        tss_load_seg(env, R_GS, new_segs[R_GS]);
+    }
+
+    /* check that EIP is in the CS segment limits */
+    if (new_eip > env->segs[R_CS].limit) {
+        /* XXX: different exception if CALL ? */
+        raise_exception_err(env, EXCP0D_GPF, 0);
+    }
+
+#ifndef CONFIG_USER_ONLY
+    /* reset local breakpoints */
+    if (env->dr[7] & 0x55) {
+        for (i = 0; i < 4; i++) {
+            if (hw_breakpoint_enabled(env->dr[7], i) == 0x1)
+                hw_breakpoint_remove(env, i);
+        }
+        env->dr[7] &= ~0x55;
+    }
+#endif
+}
+
+static inline unsigned int get_sp_mask(unsigned int e2)
+{
+    if (e2 & DESC_B_MASK)
+        return 0xffffffff;
+    else
+        return 0xffff;
+}
+
+static int exeption_has_error_code(int intno)
+{
+        switch(intno) {
+        case 8:
+        case 10:
+        case 11:
+        case 12:
+        case 13:
+        case 14:
+        case 17:
+            return 1;
+        }
+        return 0;
+}
+
+#ifdef TARGET_X86_64
+#define SET_ESP(val, sp_mask)\
+do {\
+    if ((sp_mask) == 0xffff)\
+        ESP = (ESP & ~0xffff) | ((val) & 0xffff);\
+    else if ((sp_mask) == 0xffffffffLL)\
+        ESP = (uint32_t)(val);\
+    else\
+        ESP = (val);\
+} while (0)
+#else
+#define SET_ESP(val, sp_mask) ESP = (ESP & ~(sp_mask)) | ((val) & (sp_mask))
+#endif
+
+/* in 64-bit machines, this can overflow. So this segment addition macro
+ * can be used to trim the value to 32-bit whenever needed */
+#define SEG_ADDL(ssp, sp, sp_mask) ((uint32_t)((ssp) + (sp & (sp_mask))))
+
+/* XXX: add a is_user flag to have proper security support */
+#define PUSHW(ssp, sp, sp_mask, val)\
+{\
+    sp -= 2;\
+    cpu_stw_kernel(env, (ssp) + (sp & (sp_mask)), (val));\
+}
+
+#define PUSHL(ssp, sp, sp_mask, val)\
+{\
+    sp -= 4;\
+    cpu_stl_kernel(env, SEG_ADDL(ssp, sp, sp_mask), (uint32_t)(val));\
+}
+
+#define POPW(ssp, sp, sp_mask, val)\
+{\
+    val = cpu_lduw_kernel(env, (ssp) + (sp & (sp_mask)));\
+    sp += 2;\
+}
+
+#define POPL(ssp, sp, sp_mask, val)\
+{\
+    val = (uint32_t)cpu_ldl_kernel(env, SEG_ADDL(ssp, sp, sp_mask));\
+    sp += 4;\
+}
+
+/* protected mode interrupt */
+static void do_interrupt_protected(CPUX86State *env,
+                                   int intno, int is_int, int error_code,
+                                   unsigned int next_eip, int is_hw)
+{
+    SegmentCache *dt;
+    target_ulong ptr, ssp;
+    int type, dpl, selector, ss_dpl, cpl;
+    int has_error_code, new_stack, shift;
+    uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0;
+    uint32_t old_eip, sp_mask;
+
+    has_error_code = 0;
+    if (!is_int && !is_hw)
+        has_error_code = exeption_has_error_code(intno);
+    if (is_int)
+        old_eip = next_eip;
+    else
+        old_eip = env->eip;
+
+    dt = &env->idt;
+    if (intno * 8 + 7 > dt->limit)
+        raise_exception_err(env, EXCP0D_GPF, intno * 8 + 2);
+    ptr = dt->base + intno * 8;
+    e1 = cpu_ldl_kernel(env, ptr);
+    e2 = cpu_ldl_kernel(env, ptr + 4);
+    /* check gate type */
+    type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+    switch(type) {
+    case 5: /* task gate */
+        /* must do that check here to return the correct error code */
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(env, EXCP0B_NOSEG, intno * 8 + 2);
+        switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip);
+        if (has_error_code) {
+            int type;
+            uint32_t mask;
+            /* push the error code */
+            type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+            shift = type >> 3;
+            if (env->segs[R_SS].flags & DESC_B_MASK)
+                mask = 0xffffffff;
+            else
+                mask = 0xffff;
+            esp = (ESP - (2 << shift)) & mask;
+            ssp = env->segs[R_SS].base + esp;
+            if (shift)
+                cpu_stl_kernel(env, ssp, error_code);
+            else
+                cpu_stw_kernel(env, ssp, error_code);
+            SET_ESP(esp, mask);
+        }
+        return;
+    case 6: /* 286 interrupt gate */
+    case 7: /* 286 trap gate */
+    case 14: /* 386 interrupt gate */
+    case 15: /* 386 trap gate */
+        break;
+    default:
+        raise_exception_err(env, EXCP0D_GPF, intno * 8 + 2);
+        break;
+    }
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    /* check privilege if software int */
+    if (is_int && dpl < cpl)
+        raise_exception_err(env, EXCP0D_GPF, intno * 8 + 2);
+    /* check valid bit */
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(env, EXCP0B_NOSEG, intno * 8 + 2);
+    selector = e1 >> 16;
+    offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+    if ((selector & 0xfffc) == 0)
+        raise_exception_err(env, EXCP0D_GPF, 0);
+
+    if (load_segment(env, &e1, &e2, selector) != 0)
+        raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+    if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+        raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    if (dpl > cpl)
+        raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(env, EXCP0B_NOSEG, selector & 0xfffc);
+    if (!(e2 & DESC_C_MASK) && dpl < cpl) {
+        /* to inner privilege */
+        get_ss_esp_from_tss(env, &ss, &esp, dpl);
+        if ((ss & 0xfffc) == 0)
+            raise_exception_err(env, EXCP0A_TSS, ss & 0xfffc);
+        if ((ss & 3) != dpl)
+            raise_exception_err(env, EXCP0A_TSS, ss & 0xfffc);
+        if (load_segment(env, &ss_e1, &ss_e2, ss) != 0)
+            raise_exception_err(env, EXCP0A_TSS, ss & 0xfffc);
+        ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+        if (ss_dpl != dpl)
+            raise_exception_err(env, EXCP0A_TSS, ss & 0xfffc);
+        if (!(ss_e2 & DESC_S_MASK) ||
+            (ss_e2 & DESC_CS_MASK) ||
+            !(ss_e2 & DESC_W_MASK))
+            raise_exception_err(env, EXCP0A_TSS, ss & 0xfffc);
+        if (!(ss_e2 & DESC_P_MASK))
+            raise_exception_err(env, EXCP0A_TSS, ss & 0xfffc);
+        new_stack = 1;
+        sp_mask = get_sp_mask(ss_e2);
+        ssp = get_seg_base(ss_e1, ss_e2);
+    } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
+        /* to same privilege */
+        if (env->eflags & VM_MASK)
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        new_stack = 0;
+        sp_mask = get_sp_mask(env->segs[R_SS].flags);
+        ssp = env->segs[R_SS].base;
+        esp = ESP;
+        dpl = cpl;
+    } else {
+        raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        new_stack = 0; /* avoid warning */
+        sp_mask = 0; /* avoid warning */
+        ssp = 0; /* avoid warning */
+        esp = 0; /* avoid warning */
+    }
+
+    shift = type >> 3;
+
+#if 0
+    /* XXX: check that enough room is available */
+    push_size = 6 + (new_stack << 2) + (has_error_code << 1);
+    if (env->eflags & VM_MASK)
+        push_size += 8;
+    push_size <<= shift;
+#endif
+    if (shift == 1) {
+        if (new_stack) {
+            if (env->eflags & VM_MASK) {
+                PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector);
+                PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector);
+                PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector);
+                PUSHL(ssp, esp, sp_mask, env->segs[R_ES].selector);
+            }
+            PUSHL(ssp, esp, sp_mask, env->segs[R_SS].selector);
+            PUSHL(ssp, esp, sp_mask, ESP);
+        }
+        PUSHL(ssp, esp, sp_mask, cpu_compute_eflags(env));
+        PUSHL(ssp, esp, sp_mask, env->segs[R_CS].selector);
+        PUSHL(ssp, esp, sp_mask, old_eip);
+        if (has_error_code) {
+            PUSHL(ssp, esp, sp_mask, error_code);
+        }
+    } else {
+        if (new_stack) {
+            if (env->eflags & VM_MASK) {
+                PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector);
+                PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector);
+                PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector);
+                PUSHW(ssp, esp, sp_mask, env->segs[R_ES].selector);
+            }
+            PUSHW(ssp, esp, sp_mask, env->segs[R_SS].selector);
+            PUSHW(ssp, esp, sp_mask, ESP);
+        }
+        PUSHW(ssp, esp, sp_mask, cpu_compute_eflags(env));
+        PUSHW(ssp, esp, sp_mask, env->segs[R_CS].selector);
+        PUSHW(ssp, esp, sp_mask, old_eip);
+        if (has_error_code) {
+            PUSHW(ssp, esp, sp_mask, error_code);
+        }
+    }
+
+    if (new_stack) {
+        if (env->eflags & VM_MASK) {
+            cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0);
+            cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0);
+            cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0);
+            cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0, 0);
+        }
+        ss = (ss & ~3) | dpl;
+        cpu_x86_load_seg_cache(env, R_SS, ss,
+                               ssp, get_seg_limit(ss_e1, ss_e2), ss_e2);
+    }
+    SET_ESP(esp, sp_mask);
+
+    selector = (selector & ~3) | dpl;
+    cpu_x86_load_seg_cache(env, R_CS, selector,
+                   get_seg_base(e1, e2),
+                   get_seg_limit(e1, e2),
+                   e2);
+    cpu_x86_set_cpl(env, dpl);
+    env->eip = offset;
+
+    /* interrupt gate clear IF mask */
+    if ((type & 1) == 0) {
+        env->eflags &= ~IF_MASK;
+    }
+    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+}
+
+#ifdef TARGET_X86_64
+
+#define PUSHQ(sp, val)\
+{\
+    sp -= 8;\
+    cpu_stq_kernel(env, sp, (val));\
+}
+
+#define POPQ(sp, val)\
+{\
+    val = cpu_ldq_kernel(env, sp);\
+    sp += 8;\
+}
+
+static inline target_ulong get_rsp_from_tss(CPUX86State *env, int level)
+{
+    int index;
+
+#if 0
+    printf("TR: base=" TARGET_FMT_lx " limit=%x\n",
+           env->tr.base, env->tr.limit);
+#endif
+
+    if (!(env->tr.flags & DESC_P_MASK))
+        cpu_abort(env, "invalid tss");
+    index = 8 * level + 4;
+    if ((index + 7) > env->tr.limit)
+        raise_exception_err(env, EXCP0A_TSS, env->tr.selector & 0xfffc);
+    return cpu_ldq_kernel(env, env->tr.base + index);
+}
+
+/* 64 bit interrupt */
+static void do_interrupt64(CPUX86State *env,
+                           int intno, int is_int, int error_code,
+                           target_ulong next_eip, int is_hw)
+{
+    SegmentCache *dt;
+    target_ulong ptr;
+    int type, dpl, selector, cpl, ist;
+    int has_error_code, new_stack;
+    uint32_t e1, e2, e3, ss;
+    target_ulong old_eip, esp, offset;
+
+    has_error_code = 0;
+    if (!is_int && !is_hw)
+        has_error_code = exeption_has_error_code(intno);
+    if (is_int)
+        old_eip = next_eip;
+    else
+        old_eip = env->eip;
+
+    dt = &env->idt;
+    if (intno * 16 + 15 > dt->limit)
+        raise_exception_err(env, EXCP0D_GPF, intno * 16 + 2);
+    ptr = dt->base + intno * 16;
+    e1 = cpu_ldl_kernel(env, ptr);
+    e2 = cpu_ldl_kernel(env, ptr + 4);
+    e3 = cpu_ldl_kernel(env, ptr + 8);
+    /* check gate type */
+    type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+    switch(type) {
+    case 14: /* 386 interrupt gate */
+    case 15: /* 386 trap gate */
+        break;
+    default:
+        raise_exception_err(env, EXCP0D_GPF, intno * 16 + 2);
+        break;
+    }
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    /* check privilege if software int */
+    if (is_int && dpl < cpl)
+        raise_exception_err(env, EXCP0D_GPF, intno * 16 + 2);
+    /* check valid bit */
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(env, EXCP0B_NOSEG, intno * 16 + 2);
+    selector = e1 >> 16;
+    offset = ((target_ulong)e3 << 32) | (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+    ist = e2 & 7;
+    if ((selector & 0xfffc) == 0)
+        raise_exception_err(env, EXCP0D_GPF, 0);
+
+    if (load_segment(env, &e1, &e2, selector) != 0)
+        raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+    if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+        raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    if (dpl > cpl)
+        raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(env, EXCP0B_NOSEG, selector & 0xfffc);
+    if (!(e2 & DESC_L_MASK) || (e2 & DESC_B_MASK))
+        raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+    if ((!(e2 & DESC_C_MASK) && dpl < cpl) || ist != 0) {
+        /* to inner privilege */
+        if (ist != 0)
+            esp = get_rsp_from_tss(env, ist + 3);
+        else
+            esp = get_rsp_from_tss(env, dpl);
+        esp &= ~0xfLL; /* align stack */
+        ss = 0;
+        new_stack = 1;
+    } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
+        /* to same privilege */
+        if (env->eflags & VM_MASK)
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        new_stack = 0;
+        if (ist != 0)
+            esp = get_rsp_from_tss(env, ist + 3);
+        else
+            esp = ESP;
+        esp &= ~0xfLL; /* align stack */
+        dpl = cpl;
+    } else {
+        raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        new_stack = 0; /* avoid warning */
+        esp = 0; /* avoid warning */
+    }
+
+    PUSHQ(esp, env->segs[R_SS].selector);
+    PUSHQ(esp, ESP);
+    PUSHQ(esp, cpu_compute_eflags(env));
+    PUSHQ(esp, env->segs[R_CS].selector);
+    PUSHQ(esp, old_eip);
+    if (has_error_code) {
+        PUSHQ(esp, error_code);
+    }
+
+    if (new_stack) {
+        ss = 0 | dpl;
+        cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, 0);
+    }
+    ESP = esp;
+
+    selector = (selector & ~3) | dpl;
+    cpu_x86_load_seg_cache(env, R_CS, selector,
+                   get_seg_base(e1, e2),
+                   get_seg_limit(e1, e2),
+                   e2);
+    cpu_x86_set_cpl(env, dpl);
+    env->eip = offset;
+
+    /* interrupt gate clear IF mask */
+    if ((type & 1) == 0) {
+        env->eflags &= ~IF_MASK;
+    }
+    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+}
+#endif
+
+#ifdef TARGET_X86_64
+#if defined(CONFIG_USER_ONLY)
+void helper_syscall(CPUX86State *env, int next_eip_addend)
+{
+    env->exception_index = EXCP_SYSCALL;
+    env->exception_next_eip = env->eip + next_eip_addend;
+    cpu_loop_exit(env);
+}
+#else
+void helper_syscall(CPUX86State *env, int next_eip_addend)
+{
+    int selector;
+
+    if (!(env->efer & MSR_EFER_SCE)) {
+        raise_exception_err(env, EXCP06_ILLOP, 0);
+    }
+    selector = (env->star >> 32) & 0xffff;
+    if (env->hflags & HF_LMA_MASK) {
+        int code64;
+
+        ECX = env->eip + next_eip_addend;
+        env->regs[11] = cpu_compute_eflags(env);
+
+        code64 = env->hflags & HF_CS64_MASK;
+
+        cpu_x86_set_cpl(env, 0);
+        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
+                           0, 0xffffffff,
+                               DESC_G_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_W_MASK | DESC_A_MASK);
+        env->eflags &= ~env->fmask;
+        cpu_load_eflags(env, env->eflags, 0);
+        if (code64)
+            env->eip = env->lstar;
+        else
+            env->eip = env->cstar;
+    } else {
+        ECX = (uint32_t)(env->eip + next_eip_addend);
+
+        cpu_x86_set_cpl(env, 0);
+        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
+                           0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_W_MASK | DESC_A_MASK);
+        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
+        env->eip = (uint32_t)env->star;
+    }
+}
+#endif
+#endif
+
+#ifdef TARGET_X86_64
+void helper_sysret(CPUX86State *env, int dflag)
+{
+    int cpl, selector;
+
+    if (!(env->efer & MSR_EFER_SCE)) {
+        raise_exception_err(env, EXCP06_ILLOP, 0);
+    }
+    cpl = env->hflags & HF_CPL_MASK;
+    if (!(env->cr[0] & CR0_PE_MASK) || cpl != 0) {
+        raise_exception_err(env, EXCP0D_GPF, 0);
+    }
+    selector = (env->star >> 48) & 0xffff;
+    if (env->hflags & HF_LMA_MASK) {
+        if (dflag == 2) {
+            cpu_x86_load_seg_cache(env, R_CS, (selector + 16) | 3,
+                                   0, 0xffffffff,
+                                   DESC_G_MASK | DESC_P_MASK |
+                                   DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                                   DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
+                                   DESC_L_MASK);
+            env->eip = ECX;
+        } else {
+            cpu_x86_load_seg_cache(env, R_CS, selector | 3,
+                                   0, 0xffffffff,
+                                   DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                                   DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                                   DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+            env->eip = (uint32_t)ECX;
+        }
+        cpu_x86_load_seg_cache(env, R_SS, selector + 8,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_W_MASK | DESC_A_MASK);
+        cpu_load_eflags(env, (uint32_t)(env->regs[11]), TF_MASK | AC_MASK | ID_MASK |
+                    IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK);
+        cpu_x86_set_cpl(env, 3);
+    } else {
+        cpu_x86_load_seg_cache(env, R_CS, selector | 3,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+        env->eip = (uint32_t)ECX;
+        cpu_x86_load_seg_cache(env, R_SS, selector + 8,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_W_MASK | DESC_A_MASK);
+        env->eflags |= IF_MASK;
+        cpu_x86_set_cpl(env, 3);
+    }
+}
+#endif
+
+/* real mode interrupt */
+static void do_interrupt_real(CPUX86State *env,
+                              int intno, int is_int, int error_code,
+                              unsigned int next_eip)
+{
+    SegmentCache *dt;
+    target_ulong ptr, ssp;
+    int selector;
+    uint32_t offset, esp;
+    uint32_t old_cs, old_eip;
+
+    /* real mode (simpler !) */
+    dt = &env->idt;
+    if (intno * 4 + 3 > dt->limit)
+        raise_exception_err(env, EXCP0D_GPF, intno * 8 + 2);
+    ptr = dt->base + intno * 4;
+    offset = cpu_lduw_kernel(env, ptr);
+    selector = cpu_lduw_kernel(env, ptr + 2);
+    esp = ESP;
+    ssp = env->segs[R_SS].base;
+    if (is_int)
+        old_eip = next_eip;
+    else
+        old_eip = env->eip;
+    old_cs = env->segs[R_CS].selector;
+    /* XXX: use SS segment size ? */
+    PUSHW(ssp, esp, 0xffff, cpu_compute_eflags(env));
+    PUSHW(ssp, esp, 0xffff, old_cs);
+    PUSHW(ssp, esp, 0xffff, old_eip);
+
+    /* update processor state */
+    ESP = (ESP & ~0xffff) | (esp & 0xffff);
+    env->eip = offset;
+    env->segs[R_CS].selector = selector;
+    env->segs[R_CS].base = (selector << 4);
+    env->eflags &= ~(IF_MASK | TF_MASK | AC_MASK | RF_MASK);
+}
+
+#if defined(CONFIG_USER_ONLY)
+/* fake user mode interrupt */
+static void do_interrupt_user(CPUX86State *env,
+                              int intno, int is_int, int error_code,
+                              target_ulong next_eip)
+{
+    SegmentCache *dt;
+    target_ulong ptr;
+    int dpl, cpl, shift;
+    uint32_t e2;
+
+    dt = &env->idt;
+    if (env->hflags & HF_LMA_MASK) {
+        shift = 4;
+    } else {
+        shift = 3;
+    }
+    ptr = dt->base + (intno << shift);
+    e2 = cpu_ldl_kernel(env, ptr + 4);
+
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    /* check privilege if software int */
+    if (is_int && dpl < cpl)
+        raise_exception_err(env, EXCP0D_GPF, (intno << shift) + 2);
+
+    /* Since we emulate only user space, we cannot do more than
+       exiting the emulation with the suitable exception and error
+       code */
+    if (is_int)
+        EIP = next_eip;
+}
+
+#else
+
+static void handle_even_inj(CPUX86State *env,
+                            int intno, int is_int, int error_code,
+                            int is_hw, int rm)
+{
+    uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+    if (!(event_inj & SVM_EVTINJ_VALID)) {
+            int type;
+            if (is_int)
+                    type = SVM_EVTINJ_TYPE_SOFT;
+            else
+                    type = SVM_EVTINJ_TYPE_EXEPT;
+            event_inj = intno | type | SVM_EVTINJ_VALID;
+            if (!rm && exeption_has_error_code(intno)) {
+                    event_inj |= SVM_EVTINJ_VALID_ERR;
+                    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err), error_code);
+            }
+            stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj);
+    }
+}
+#endif
+
+/*
+ * Begin execution of an interruption. is_int is TRUE if coming from
+ * the int instruction. next_eip is the EIP value AFTER the interrupt
+ * instruction. It is only relevant if is_int is TRUE.
+ */
+static void do_interrupt_all(CPUX86State *env,
+                             int intno, int is_int, int error_code,
+                             target_ulong next_eip, int is_hw)
+{
+    if (qemu_loglevel_mask(CPU_LOG_INT)) {
+        if ((env->cr[0] & CR0_PE_MASK)) {
+            static int count;
+            qemu_log("%6d: v=%02x e=%04x i=%d cpl=%d IP=%04x:" TARGET_FMT_lx " pc=" TARGET_FMT_lx " SP=%04x:" TARGET_FMT_lx,
+                    count, intno, error_code, is_int,
+                    env->hflags & HF_CPL_MASK,
+                    env->segs[R_CS].selector, EIP,
+                    (int)env->segs[R_CS].base + EIP,
+                    env->segs[R_SS].selector, ESP);
+            if (intno == 0x0e) {
+                qemu_log(" CR2=" TARGET_FMT_lx, env->cr[2]);
+            } else {
+                qemu_log(" EAX=" TARGET_FMT_lx, EAX);
+            }
+            qemu_log("\n");
+            log_cpu_state(ENV_GET_CPU(env), X86_DUMP_CCOP);
+#if 0
+            {
+                int i;
+                uint8_t *ptr;
+                qemu_log("       code=");
+                ptr = env->segs[R_CS].base + env->eip;
+                for(i = 0; i < 16; i++) {
+                    qemu_log(" %02x", ldub(ptr + i));
+                }
+                qemu_log("\n");
+            }
+#endif
+            count++;
+        }
+    }
+    if (env->cr[0] & CR0_PE_MASK) {
+#if !defined(CONFIG_USER_ONLY)
+        if (env->hflags & HF_SVMI_MASK)
+            handle_even_inj(env, intno, is_int, error_code, is_hw, 0);
+#endif
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            do_interrupt64(env, intno, is_int, error_code, next_eip, is_hw);
+        } else
+#endif
+        {
+            do_interrupt_protected(env, intno, is_int, error_code, next_eip, is_hw);
+        }
+    } else {
+#if !defined(CONFIG_USER_ONLY)
+        if (env->hflags & HF_SVMI_MASK)
+            handle_even_inj(env, intno, is_int, error_code, is_hw, 1);
+#endif
+        do_interrupt_real(env, intno, is_int, error_code, next_eip);
+    }
+
+#if !defined(CONFIG_USER_ONLY)
+    if (env->hflags & HF_SVMI_MASK) {
+        uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+        stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
+    }
+#endif
+}
+
+void do_interrupt(CPUX86State *env)
+{
+#if defined(CONFIG_USER_ONLY)
+    /* if user mode only, we simulate a fake exception
+       which will be handled outside the cpu execution
+       loop */
+    do_interrupt_user(env,
+                      env->exception_index,
+                      env->exception_is_int,
+                      env->error_code,
+                      env->exception_next_eip);
+    /* successfully delivered */
+    env->old_exception = -1;
+#else
+    /* simulate a real cpu exception. On i386, it can
+       trigger new exceptions, but we do not handle
+       double or triple faults yet. */
+    do_interrupt_all(env,
+                     env->exception_index,
+                     env->exception_is_int,
+                     env->error_code,
+                     env->exception_next_eip, 0);
+    /* successfully delivered */
+    env->old_exception = -1;
+#endif
+}
+
+void do_interrupt_x86_hardirq(CPUX86State *env, int intno, int is_hw)
+{
+    do_interrupt_all(env, intno, 0, 0, 0, is_hw);
+}
+
+/* This should come from sysemu.h - if we could include it here... */
+void qemu_system_reset_request(void);
+
+void helper_enter_level(CPUX86State *env,
+                        int level, int data32, target_ulong t1)
+{
+    target_ulong ssp;
+    uint32_t esp_mask, esp, ebp;
+
+    esp_mask = get_sp_mask(env->segs[R_SS].flags);
+    ssp = env->segs[R_SS].base;
+    ebp = EBP;
+    esp = ESP;
+    if (data32) {
+        /* 32 bit */
+        esp -= 4;
+        while (--level) {
+            esp -= 4;
+            ebp -= 4;
+            cpu_stl_data(env, ssp + (esp & esp_mask),
+                         cpu_ldl_data(env, ssp + (ebp & esp_mask)));
+        }
+        esp -= 4;
+        cpu_stl_data(env, ssp + (esp & esp_mask), t1);
+    } else {
+        /* 16 bit */
+        esp -= 2;
+        while (--level) {
+            esp -= 2;
+            ebp -= 2;
+            cpu_stw_data(env, ssp + (esp & esp_mask),
+                         cpu_lduw_data(env, ssp + (ebp & esp_mask)));
+        }
+        esp -= 2;
+        cpu_stw_data(env, ssp + (esp & esp_mask), t1);
+    }
+}
+
+#ifdef TARGET_X86_64
+void helper_enter64_level(CPUX86State *env,
+                          int level, int data64, target_ulong t1)
+{
+    target_ulong esp, ebp;
+    ebp = EBP;
+    esp = ESP;
+
+    if (data64) {
+        /* 64 bit */
+        esp -= 8;
+        while (--level) {
+            esp -= 8;
+            ebp -= 8;
+            cpu_stq_data(env, esp, cpu_ldq_data(env, ebp));
+        }
+        esp -= 8;
+        cpu_stq_data(env, esp, t1);
+    } else {
+        /* 16 bit */
+        esp -= 2;
+        while (--level) {
+            esp -= 2;
+            ebp -= 2;
+            cpu_stw_data(env, esp, cpu_lduw_data(env, ebp));
+        }
+        esp -= 2;
+        cpu_stw_data(env, esp, t1);
+    }
+}
+#endif
+
+void helper_lldt(CPUX86State *env, int selector)
+{
+    SegmentCache *dt;
+    uint32_t e1, e2;
+    int index, entry_limit;
+    target_ulong ptr;
+
+    selector &= 0xffff;
+    if ((selector & 0xfffc) == 0) {
+        /* XXX: NULL selector case: invalid LDT */
+        env->ldt.base = 0;
+        env->ldt.limit = 0;
+    } else {
+        if (selector & 0x4)
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        dt = &env->gdt;
+        index = selector & ~7;
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK)
+            entry_limit = 15;
+        else
+#endif
+            entry_limit = 7;
+        if ((index + entry_limit) > dt->limit)
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        ptr = dt->base + index;
+        e1 = cpu_ldl_kernel(env, ptr);
+        e2 = cpu_ldl_kernel(env, ptr + 4);
+        if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(env, EXCP0B_NOSEG, selector & 0xfffc);
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            uint32_t e3;
+            e3 = cpu_ldl_kernel(env, ptr + 8);
+            load_seg_cache_raw_dt(&env->ldt, e1, e2);
+            env->ldt.base |= (target_ulong)e3 << 32;
+        } else
+#endif
+        {
+            load_seg_cache_raw_dt(&env->ldt, e1, e2);
+        }
+    }
+    env->ldt.selector = selector;
+}
+
+void helper_ltr(CPUX86State *env, int selector)
+{
+    SegmentCache *dt;
+    uint32_t e1, e2;
+    int index, type, entry_limit;
+    target_ulong ptr;
+
+    selector &= 0xffff;
+    if ((selector & 0xfffc) == 0) {
+        /* NULL selector case: invalid TR */
+        env->tr.base = 0;
+        env->tr.limit = 0;
+        env->tr.flags = 0;
+    } else {
+        if (selector & 0x4)
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        dt = &env->gdt;
+        index = selector & ~7;
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK)
+            entry_limit = 15;
+        else
+#endif
+            entry_limit = 7;
+        if ((index + entry_limit) > dt->limit)
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        ptr = dt->base + index;
+        e1 = cpu_ldl_kernel(env, ptr);
+        e2 = cpu_ldl_kernel(env, ptr + 4);
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        if ((e2 & DESC_S_MASK) ||
+            (type != 1 && type != 9))
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(env, EXCP0B_NOSEG, selector & 0xfffc);
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            uint32_t e3, e4;
+            e3 = cpu_ldl_kernel(env, ptr + 8);
+            e4 = cpu_ldl_kernel(env, ptr + 12);
+            if ((e4 >> DESC_TYPE_SHIFT) & 0xf)
+                raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+            load_seg_cache_raw_dt(&env->tr, e1, e2);
+            env->tr.base |= (target_ulong)e3 << 32;
+        } else
+#endif
+        {
+            load_seg_cache_raw_dt(&env->tr, e1, e2);
+        }
+        e2 |= DESC_TSS_BUSY_MASK;
+        cpu_stl_kernel(env, ptr + 4, e2);
+    }
+    env->tr.selector = selector;
+}
+
+/* only works if protected mode and not VM86. seg_reg must be != R_CS */
+void helper_load_seg(CPUX86State *env, int seg_reg, int selector)
+{
+    uint32_t e1, e2;
+    int cpl, dpl, rpl;
+    SegmentCache *dt;
+    int index;
+    target_ulong ptr;
+
+    selector &= 0xffff;
+    cpl = env->hflags & HF_CPL_MASK;
+    if ((selector & 0xfffc) == 0) {
+        /* null selector case */
+        if (seg_reg == R_SS
+#ifdef TARGET_X86_64
+            && (!(env->hflags & HF_CS64_MASK) || cpl == 3)
+#endif
+            )
+            raise_exception_err(env, EXCP0D_GPF, 0);
+        cpu_x86_load_seg_cache(env, seg_reg, selector, 0, 0, 0);
+    } else {
+
+        if (selector & 0x4)
+            dt = &env->ldt;
+        else
+            dt = &env->gdt;
+        index = selector & ~7;
+        if ((index + 7) > dt->limit)
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        ptr = dt->base + index;
+        e1 = cpu_ldl_kernel(env, ptr);
+        e2 = cpu_ldl_kernel(env, ptr + 4);
+
+        if (!(e2 & DESC_S_MASK))
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        rpl = selector & 3;
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        if (seg_reg == R_SS) {
+            /* must be writable segment */
+            if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK))
+                raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+            if (rpl != cpl || dpl != cpl)
+                raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        } else {
+            /* must be readable segment */
+            if ((e2 & (DESC_CS_MASK | DESC_R_MASK)) == DESC_CS_MASK)
+                raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+
+            if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
+                /* if not conforming code, test rights */
+                if (dpl < cpl || dpl < rpl)
+                    raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+            }
+        }
+
+        if (!(e2 & DESC_P_MASK)) {
+            if (seg_reg == R_SS)
+                raise_exception_err(env, EXCP0C_STACK, selector & 0xfffc);
+            else
+                raise_exception_err(env, EXCP0B_NOSEG, selector & 0xfffc);
+        }
+
+        /* set the access bit if not already set */
+        if (!(e2 & DESC_A_MASK)) {
+            e2 |= DESC_A_MASK;
+            cpu_stl_kernel(env, ptr + 4, e2);
+        }
+
+        cpu_x86_load_seg_cache(env, seg_reg, selector,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+#if 0
+        qemu_log("load_seg: sel=0x%04x base=0x%08lx limit=0x%08lx flags=%08x\n",
+                selector, (unsigned long)sc->base, sc->limit, sc->flags);
+#endif
+    }
+}
+
+/* protected mode jump */
+void helper_ljmp_protected(CPUX86State *env,
+                           int new_cs, target_ulong new_eip,
+                           int next_eip_addend)
+{
+    int gate_cs, type;
+    uint32_t e1, e2, cpl, dpl, rpl, limit;
+    target_ulong next_eip;
+
+    if ((new_cs & 0xfffc) == 0)
+        raise_exception_err(env, EXCP0D_GPF, 0);
+    if (load_segment(env, &e1, &e2, new_cs) != 0)
+        raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_S_MASK) {
+        if (!(e2 & DESC_CS_MASK))
+            raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        if (e2 & DESC_C_MASK) {
+            /* conforming code segment */
+            if (dpl > cpl)
+                raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+        } else {
+            /* non conforming code segment */
+            rpl = new_cs & 3;
+            if (rpl > cpl)
+                raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+            if (dpl != cpl)
+                raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+        }
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(env, EXCP0B_NOSEG, new_cs & 0xfffc);
+        limit = get_seg_limit(e1, e2);
+        if (new_eip > limit &&
+            !(env->hflags & HF_LMA_MASK) && !(e2 & DESC_L_MASK))
+            raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+        cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+                       get_seg_base(e1, e2), limit, e2);
+        EIP = new_eip;
+    } else {
+        /* jump to call or task gate */
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        rpl = new_cs & 3;
+        cpl = env->hflags & HF_CPL_MASK;
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        switch(type) {
+        case 1: /* 286 TSS */
+        case 9: /* 386 TSS */
+        case 5: /* task gate */
+            if (dpl < cpl || dpl < rpl)
+                raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+            next_eip = env->eip + next_eip_addend;
+            switch_tss(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip);
+            CC_OP = CC_OP_EFLAGS;
+            break;
+        case 4: /* 286 call gate */
+        case 12: /* 386 call gate */
+            if ((dpl < cpl) || (dpl < rpl))
+                raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+            if (!(e2 & DESC_P_MASK))
+                raise_exception_err(env, EXCP0B_NOSEG, new_cs & 0xfffc);
+            gate_cs = e1 >> 16;
+            new_eip = (e1 & 0xffff);
+            if (type == 12)
+                new_eip |= (e2 & 0xffff0000);
+            if (load_segment(env, &e1, &e2, gate_cs) != 0)
+                raise_exception_err(env, EXCP0D_GPF, gate_cs & 0xfffc);
+            dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+            /* must be code segment */
+            if (((e2 & (DESC_S_MASK | DESC_CS_MASK)) !=
+                 (DESC_S_MASK | DESC_CS_MASK)))
+                raise_exception_err(env, EXCP0D_GPF, gate_cs & 0xfffc);
+            if (((e2 & DESC_C_MASK) && (dpl > cpl)) ||
+                (!(e2 & DESC_C_MASK) && (dpl != cpl)))
+                raise_exception_err(env, EXCP0D_GPF, gate_cs & 0xfffc);
+            if (!(e2 & DESC_P_MASK))
+                raise_exception_err(env, EXCP0D_GPF, gate_cs & 0xfffc);
+            limit = get_seg_limit(e1, e2);
+            if (new_eip > limit)
+                raise_exception_err(env, EXCP0D_GPF, 0);
+            cpu_x86_load_seg_cache(env, R_CS, (gate_cs & 0xfffc) | cpl,
+                                   get_seg_base(e1, e2), limit, e2);
+            EIP = new_eip;
+            break;
+        default:
+            raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+            break;
+        }
+    }
+}
+
+/* real mode call */
+void helper_lcall_real(CPUX86State *env,
+                       int new_cs, target_ulong new_eip1,
+                       int shift, int next_eip)
+{
+    int new_eip;
+    uint32_t esp, esp_mask;
+    target_ulong ssp;
+
+    new_eip = new_eip1;
+    esp = ESP;
+    esp_mask = get_sp_mask(env->segs[R_SS].flags);
+    ssp = env->segs[R_SS].base;
+    if (shift) {
+        PUSHL(ssp, esp, esp_mask, env->segs[R_CS].selector);
+        PUSHL(ssp, esp, esp_mask, next_eip);
+    } else {
+        PUSHW(ssp, esp, esp_mask, env->segs[R_CS].selector);
+        PUSHW(ssp, esp, esp_mask, next_eip);
+    }
+
+    SET_ESP(esp, esp_mask);
+    env->eip = new_eip;
+    env->segs[R_CS].selector = new_cs;
+    env->segs[R_CS].base = (new_cs << 4);
+}
+
+/* protected mode call */
+void helper_lcall_protected(CPUX86State *env,
+                            int new_cs, target_ulong new_eip,
+                            int shift, int next_eip_addend)
+{
+    int new_stack, i;
+    uint32_t e1, e2, cpl, dpl, rpl, selector, offset, param_count;
+    uint32_t ss = 0, ss_e1 = 0, ss_e2 = 0, sp, type, ss_dpl, sp_mask;
+    uint32_t val, limit, old_sp_mask;
+    target_ulong ssp, old_ssp, next_eip;
+
+    next_eip = env->eip + next_eip_addend;
+    LOG_PCALL("lcall %04x:%08x s=%d\n", new_cs, (uint32_t)new_eip, shift);
+    LOG_PCALL_STATE(env);
+    if ((new_cs & 0xfffc) == 0)
+        raise_exception_err(env, EXCP0D_GPF, 0);
+    if (load_segment(env, &e1, &e2, new_cs) != 0)
+        raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+    cpl = env->hflags & HF_CPL_MASK;
+    LOG_PCALL("desc=%08x:%08x\n", e1, e2);
+    if (e2 & DESC_S_MASK) {
+        if (!(e2 & DESC_CS_MASK))
+            raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        if (e2 & DESC_C_MASK) {
+            /* conforming code segment */
+            if (dpl > cpl)
+                raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+        } else {
+            /* non conforming code segment */
+            rpl = new_cs & 3;
+            if (rpl > cpl)
+                raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+            if (dpl != cpl)
+                raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+        }
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(env, EXCP0B_NOSEG, new_cs & 0xfffc);
+
+#ifdef TARGET_X86_64
+        /* XXX: check 16/32 bit cases in long mode */
+        if (shift == 2) {
+            target_ulong rsp;
+            /* 64 bit case */
+            rsp = ESP;
+            PUSHQ(rsp, env->segs[R_CS].selector);
+            PUSHQ(rsp, next_eip);
+            /* from this point, not restartable */
+            ESP = rsp;
+            cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+                                   get_seg_base(e1, e2),
+                                   get_seg_limit(e1, e2), e2);
+            EIP = new_eip;
+        } else
+#endif
+        {
+            sp = ESP;
+            sp_mask = get_sp_mask(env->segs[R_SS].flags);
+            ssp = env->segs[R_SS].base;
+            if (shift) {
+                PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector);
+                PUSHL(ssp, sp, sp_mask, next_eip);
+            } else {
+                PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector);
+                PUSHW(ssp, sp, sp_mask, next_eip);
+            }
+
+            limit = get_seg_limit(e1, e2);
+            if (new_eip > limit)
+                raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+            /* from this point, not restartable */
+            SET_ESP(sp, sp_mask);
+            cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+                                   get_seg_base(e1, e2), limit, e2);
+            EIP = new_eip;
+        }
+    } else {
+        /* check gate type */
+        type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        rpl = new_cs & 3;
+        switch(type) {
+        case 1: /* available 286 TSS */
+        case 9: /* available 386 TSS */
+        case 5: /* task gate */
+            if (dpl < cpl || dpl < rpl)
+                raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+            switch_tss(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip);
+            CC_OP = CC_OP_EFLAGS;
+            return;
+        case 4: /* 286 call gate */
+        case 12: /* 386 call gate */
+            break;
+        default:
+            raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+            break;
+        }
+        shift = type >> 3;
+
+        if (dpl < cpl || dpl < rpl)
+            raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+        /* check valid bit */
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(env, EXCP0B_NOSEG,  new_cs & 0xfffc);
+        selector = e1 >> 16;
+        offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+        param_count = e2 & 0x1f;
+        if ((selector & 0xfffc) == 0)
+            raise_exception_err(env, EXCP0D_GPF, 0);
+
+        if (load_segment(env, &e1, &e2, selector) != 0)
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        if (dpl > cpl)
+            raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(env, EXCP0B_NOSEG, selector & 0xfffc);
+
+        if (!(e2 & DESC_C_MASK) && dpl < cpl) {
+            /* to inner privilege */
+            get_ss_esp_from_tss(env, &ss, &sp, dpl);
+            LOG_PCALL("new ss:esp=%04x:%08x param_count=%d ESP=" TARGET_FMT_lx "\n",
+                        ss, sp, param_count, ESP);
+            if ((ss & 0xfffc) == 0)
+                raise_exception_err(env, EXCP0A_TSS, ss & 0xfffc);
+            if ((ss & 3) != dpl)
+                raise_exception_err(env, EXCP0A_TSS, ss & 0xfffc);
+            if (load_segment(env, &ss_e1, &ss_e2, ss) != 0)
+                raise_exception_err(env, EXCP0A_TSS, ss & 0xfffc);
+            ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+            if (ss_dpl != dpl)
+                raise_exception_err(env, EXCP0A_TSS, ss & 0xfffc);
+            if (!(ss_e2 & DESC_S_MASK) ||
+                (ss_e2 & DESC_CS_MASK) ||
+                !(ss_e2 & DESC_W_MASK))
+                raise_exception_err(env, EXCP0A_TSS, ss & 0xfffc);
+            if (!(ss_e2 & DESC_P_MASK))
+                raise_exception_err(env, EXCP0A_TSS, ss & 0xfffc);
+
+            //            push_size = ((param_count * 2) + 8) << shift;
+
+            old_sp_mask = get_sp_mask(env->segs[R_SS].flags);
+            old_ssp = env->segs[R_SS].base;
+
+            sp_mask = get_sp_mask(ss_e2);
+            ssp = get_seg_base(ss_e1, ss_e2);
+            if (shift) {
+                PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
+                PUSHL(ssp, sp, sp_mask, ESP);
+                for(i = param_count - 1; i >= 0; i--) {
+                    val = cpu_ldl_kernel(env, old_ssp + ((ESP + i * 4) & old_sp_mask));
+                    PUSHL(ssp, sp, sp_mask, val);
+                }
+            } else {
+                PUSHW(ssp, sp, sp_mask, env->segs[R_SS].selector);
+                PUSHW(ssp, sp, sp_mask, ESP);
+                for(i = param_count - 1; i >= 0; i--) {
+                    val = cpu_lduw_kernel(env, old_ssp + ((ESP + i * 2) & old_sp_mask));
+                    PUSHW(ssp, sp, sp_mask, val);
+                }
+            }
+            new_stack = 1;
+        } else {
+            /* to same privilege */
+            sp = ESP;
+            sp_mask = get_sp_mask(env->segs[R_SS].flags);
+            ssp = env->segs[R_SS].base;
+            //            push_size = (4 << shift);
+            new_stack = 0;
+        }
+
+        if (shift) {
+            PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector);
+            PUSHL(ssp, sp, sp_mask, next_eip);
+        } else {
+            PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector);
+            PUSHW(ssp, sp, sp_mask, next_eip);
+        }
+
+        /* from this point, not restartable */
+
+        if (new_stack) {
+            ss = (ss & ~3) | dpl;
+            cpu_x86_load_seg_cache(env, R_SS, ss,
+                                   ssp,
+                                   get_seg_limit(ss_e1, ss_e2),
+                                   ss_e2);
+        }
+
+        selector = (selector & ~3) | dpl;
+        cpu_x86_load_seg_cache(env, R_CS, selector,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+        cpu_x86_set_cpl(env, dpl);
+        SET_ESP(sp, sp_mask);
+        EIP = offset;
+    }
+}
+
+/* real and vm86 mode iret */
+void helper_iret_real(CPUX86State *env, int shift)
+{
+    uint32_t sp, new_cs, new_eip, new_eflags, sp_mask;
+    target_ulong ssp;
+    int eflags_mask;
+
+    sp_mask = 0xffff; /* XXXX: use SS segment size ? */
+    sp = ESP;
+    ssp = env->segs[R_SS].base;
+    if (shift == 1) {
+        /* 32 bits */
+        POPL(ssp, sp, sp_mask, new_eip);
+        POPL(ssp, sp, sp_mask, new_cs);
+        new_cs &= 0xffff;
+        POPL(ssp, sp, sp_mask, new_eflags);
+    } else {
+        /* 16 bits */
+        POPW(ssp, sp, sp_mask, new_eip);
+        POPW(ssp, sp, sp_mask, new_cs);
+        POPW(ssp, sp, sp_mask, new_eflags);
+    }
+    ESP = (ESP & ~sp_mask) | (sp & sp_mask);
+    env->segs[R_CS].selector = new_cs;
+    env->segs[R_CS].base = (new_cs << 4);
+    env->eip = new_eip;
+    if (env->eflags & VM_MASK)
+        eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | RF_MASK | NT_MASK;
+    else
+        eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | IOPL_MASK | RF_MASK | NT_MASK;
+    if (shift == 0)
+        eflags_mask &= 0xffff;
+    cpu_load_eflags(env, new_eflags, eflags_mask);
+    env->hflags2 &= ~HF2_NMI_MASK;
+}
+
+static inline void validate_seg(CPUX86State *env, int seg_reg, int cpl)
+{
+    int dpl;
+    uint32_t e2;
+
+    /* XXX: on x86_64, we do not want to nullify FS and GS because
+       they may still contain a valid base. I would be interested to
+       know how a real x86_64 CPU behaves */
+    if ((seg_reg == R_FS || seg_reg == R_GS) &&
+        (env->segs[seg_reg].selector & 0xfffc) == 0)
+        return;
+
+    e2 = env->segs[seg_reg].flags;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
+        /* data or non conforming code segment */
+        if (dpl < cpl) {
+            cpu_x86_load_seg_cache(env, seg_reg, 0, 0, 0, 0);
+        }
+    }
+}
+
+/* protected mode iret */
+static inline void helper_ret_protected(CPUX86State *env,
+                                        int shift, int is_iret, int addend)
+{
+    uint32_t new_cs, new_eflags, new_ss;
+    uint32_t new_es, new_ds, new_fs, new_gs;
+    uint32_t e1, e2, ss_e1, ss_e2;
+    int cpl, dpl, rpl, eflags_mask, iopl;
+    target_ulong ssp, sp, new_eip, new_esp, sp_mask;
+
+#ifdef TARGET_X86_64
+    if (shift == 2)
+        sp_mask = -1;
+    else
+#endif
+        sp_mask = get_sp_mask(env->segs[R_SS].flags);
+    sp = ESP;
+    ssp = env->segs[R_SS].base;
+    new_eflags = 0; /* avoid warning */
+#ifdef TARGET_X86_64
+    if (shift == 2) {
+        POPQ(sp, new_eip);
+        POPQ(sp, new_cs);
+        new_cs &= 0xffff;
+        if (is_iret) {
+            POPQ(sp, new_eflags);
+        }
+    } else
+#endif
+    if (shift == 1) {
+        /* 32 bits */
+        POPL(ssp, sp, sp_mask, new_eip);
+        POPL(ssp, sp, sp_mask, new_cs);
+        new_cs &= 0xffff;
+        if (is_iret) {
+            POPL(ssp, sp, sp_mask, new_eflags);
+            if (new_eflags & VM_MASK)
+                goto return_to_vm86;
+        }
+    } else {
+        /* 16 bits */
+        POPW(ssp, sp, sp_mask, new_eip);
+        POPW(ssp, sp, sp_mask, new_cs);
+        if (is_iret)
+            POPW(ssp, sp, sp_mask, new_eflags);
+    }
+    LOG_PCALL("lret new %04x:" TARGET_FMT_lx " s=%d addend=0x%x\n",
+              new_cs, new_eip, shift, addend);
+    LOG_PCALL_STATE(env);
+    if ((new_cs & 0xfffc) == 0)
+        raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+    if (load_segment(env, &e1, &e2, new_cs) != 0)
+        raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+    if (!(e2 & DESC_S_MASK) ||
+        !(e2 & DESC_CS_MASK))
+        raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+    cpl = env->hflags & HF_CPL_MASK;
+    rpl = new_cs & 3;
+    if (rpl < cpl)
+        raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    if (e2 & DESC_C_MASK) {
+        if (dpl > rpl)
+            raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+    } else {
+        if (dpl != rpl)
+            raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
+    }
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(env, EXCP0B_NOSEG, new_cs & 0xfffc);
+
+    sp += addend;
+    if (rpl == cpl && (!(env->hflags & HF_CS64_MASK) ||
+                       ((env->hflags & HF_CS64_MASK) && !is_iret))) {
+        /* return to same privilege level */
+        cpu_x86_load_seg_cache(env, R_CS, new_cs,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+    } else {
+        /* return to different privilege level */
+#ifdef TARGET_X86_64
+        if (shift == 2) {
+            POPQ(sp, new_esp);
+            POPQ(sp, new_ss);
+            new_ss &= 0xffff;
+        } else
+#endif
+        if (shift == 1) {
+            /* 32 bits */
+            POPL(ssp, sp, sp_mask, new_esp);
+            POPL(ssp, sp, sp_mask, new_ss);
+            new_ss &= 0xffff;
+        } else {
+            /* 16 bits */
+            POPW(ssp, sp, sp_mask, new_esp);
+            POPW(ssp, sp, sp_mask, new_ss);
+        }
+        LOG_PCALL("new ss:esp=%04x:" TARGET_FMT_lx "\n",
+                    new_ss, new_esp);
+        if ((new_ss & 0xfffc) == 0) {
+#ifdef TARGET_X86_64
+            /* NULL ss is allowed in long mode if cpl != 3*/
+            /* XXX: test CS64 ? */
+            if ((env->hflags & HF_LMA_MASK) && rpl != 3) {
+                cpu_x86_load_seg_cache(env, R_SS, new_ss,
+                                       0, 0xffffffff,
+                                       DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                                       DESC_S_MASK | (rpl << DESC_DPL_SHIFT) |
+                                       DESC_W_MASK | DESC_A_MASK);
+                ss_e2 = DESC_B_MASK; /* XXX: should not be needed ? */
+            } else
+#endif
+            {
+                raise_exception_err(env, EXCP0D_GPF, 0);
+            }
+        } else {
+            if ((new_ss & 3) != rpl)
+                raise_exception_err(env, EXCP0D_GPF, new_ss & 0xfffc);
+            if (load_segment(env, &ss_e1, &ss_e2, new_ss) != 0)
+                raise_exception_err(env, EXCP0D_GPF, new_ss & 0xfffc);
+            if (!(ss_e2 & DESC_S_MASK) ||
+                (ss_e2 & DESC_CS_MASK) ||
+                !(ss_e2 & DESC_W_MASK))
+                raise_exception_err(env, EXCP0D_GPF, new_ss & 0xfffc);
+            dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+            if (dpl != rpl)
+                raise_exception_err(env, EXCP0D_GPF, new_ss & 0xfffc);
+            if (!(ss_e2 & DESC_P_MASK))
+                raise_exception_err(env, EXCP0B_NOSEG, new_ss & 0xfffc);
+            cpu_x86_load_seg_cache(env, R_SS, new_ss,
+                                   get_seg_base(ss_e1, ss_e2),
+                                   get_seg_limit(ss_e1, ss_e2),
+                                   ss_e2);
+        }
+
+        cpu_x86_load_seg_cache(env, R_CS, new_cs,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+        cpu_x86_set_cpl(env, rpl);
+        sp = new_esp;
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_CS64_MASK)
+            sp_mask = -1;
+        else
+#endif
+            sp_mask = get_sp_mask(ss_e2);
+
+        /* validate data segments */
+        validate_seg(env, R_ES, rpl);
+        validate_seg(env, R_DS, rpl);
+        validate_seg(env, R_FS, rpl);
+        validate_seg(env, R_GS, rpl);
+
+        sp += addend;
+    }
+    SET_ESP(sp, sp_mask);
+    env->eip = new_eip;
+    if (is_iret) {
+        /* NOTE: 'cpl' is the _old_ CPL */
+        eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK;
+        if (cpl == 0)
+            eflags_mask |= IOPL_MASK;
+        iopl = (env->eflags >> IOPL_SHIFT) & 3;
+        if (cpl <= iopl)
+            eflags_mask |= IF_MASK;
+        if (shift == 0)
+            eflags_mask &= 0xffff;
+        cpu_load_eflags(env, new_eflags, eflags_mask);
+    }
+    return;
+
+ return_to_vm86:
+    POPL(ssp, sp, sp_mask, new_esp);
+    POPL(ssp, sp, sp_mask, new_ss);
+    POPL(ssp, sp, sp_mask, new_es);
+    POPL(ssp, sp, sp_mask, new_ds);
+    POPL(ssp, sp, sp_mask, new_fs);
+    POPL(ssp, sp, sp_mask, new_gs);
+
+    /* modify processor state */
+    cpu_load_eflags(env, new_eflags, TF_MASK | AC_MASK | ID_MASK |
+                IF_MASK | IOPL_MASK | VM_MASK | NT_MASK | VIF_MASK | VIP_MASK);
+    load_seg_vm(env, R_CS, new_cs & 0xffff);
+    cpu_x86_set_cpl(env, 3);
+    load_seg_vm(env, R_SS, new_ss & 0xffff);
+    load_seg_vm(env, R_ES, new_es & 0xffff);
+    load_seg_vm(env, R_DS, new_ds & 0xffff);
+    load_seg_vm(env, R_FS, new_fs & 0xffff);
+    load_seg_vm(env, R_GS, new_gs & 0xffff);
+
+    env->eip = new_eip & 0xffff;
+    ESP = new_esp;
+}
+
+void helper_iret_protected(CPUX86State *env, int shift, int next_eip)
+{
+    int tss_selector, type;
+    uint32_t e1, e2;
+
+    /* specific case for TSS */
+    if (env->eflags & NT_MASK) {
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK)
+            raise_exception_err(env, EXCP0D_GPF, 0);
+#endif
+        tss_selector = cpu_lduw_kernel(env, env->tr.base + 0);
+        if (tss_selector & 4)
+            raise_exception_err(env, EXCP0A_TSS, tss_selector & 0xfffc);
+        if (load_segment(env, &e1, &e2, tss_selector) != 0)
+            raise_exception_err(env, EXCP0A_TSS, tss_selector & 0xfffc);
+        type = (e2 >> DESC_TYPE_SHIFT) & 0x17;
+        /* NOTE: we check both segment and busy TSS */
+        if (type != 3)
+            raise_exception_err(env, EXCP0A_TSS, tss_selector & 0xfffc);
+        switch_tss(env, tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip);
+    } else {
+        helper_ret_protected(env, shift, 1, 0);
+    }
+    env->hflags2 &= ~HF2_NMI_MASK;
+}
+
+void helper_lret_protected(CPUX86State *env, int shift, int addend)
+{
+    helper_ret_protected(env, shift, 0, addend);
+}
+
+void helper_sysenter(CPUX86State *env)
+{
+    if (env->sysenter_cs == 0) {
+        raise_exception_err(env, EXCP0D_GPF, 0);
+    }
+    env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
+    cpu_x86_set_cpl(env, 0);
+
+#ifdef TARGET_X86_64
+    if (env->hflags & HF_LMA_MASK) {
+        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+    } else
+#endif
+    {
+        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+    }
+    cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc,
+                           0, 0xffffffff,
+                           DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                           DESC_S_MASK |
+                           DESC_W_MASK | DESC_A_MASK);
+    ESP = env->sysenter_esp;
+    EIP = env->sysenter_eip;
+}
+
+void helper_sysexit(CPUX86State *env, int dflag)
+{
+    int cpl;
+
+    cpl = env->hflags & HF_CPL_MASK;
+    if (env->sysenter_cs == 0 || cpl != 0) {
+        raise_exception_err(env, EXCP0D_GPF, 0);
+    }
+    cpu_x86_set_cpl(env, 3);
+#ifdef TARGET_X86_64
+    if (dflag == 2) {
+        cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 32) & 0xfffc) | 3,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+        cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 40) & 0xfffc) | 3,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_W_MASK | DESC_A_MASK);
+    } else
+#endif
+    {
+        cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 16) & 0xfffc) | 3,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+        cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 24) & 0xfffc) | 3,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_W_MASK | DESC_A_MASK);
+    }
+    ESP = ECX;
+    EIP = EDX;
+}
+
+target_ulong helper_lsl(CPUX86State *env, target_ulong selector1)
+{
+    unsigned int limit;
+    uint32_t e1, e2, eflags, selector;
+    int rpl, dpl, cpl, type;
+
+    selector = selector1 & 0xffff;
+    eflags = helper_cc_compute_all(env, CC_OP);
+    if ((selector & 0xfffc) == 0)
+        goto fail;
+    if (load_segment(env, &e1, &e2, selector) != 0)
+        goto fail;
+    rpl = selector & 3;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_S_MASK) {
+        if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) {
+            /* conforming */
+        } else {
+            if (dpl < cpl || dpl < rpl)
+                goto fail;
+        }
+    } else {
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        switch(type) {
+        case 1:
+        case 2:
+        case 3:
+        case 9:
+        case 11:
+            break;
+        default:
+            goto fail;
+        }
+        if (dpl < cpl || dpl < rpl) {
+        fail:
+            CC_SRC = eflags & ~CC_Z;
+            return 0;
+        }
+    }
+    limit = get_seg_limit(e1, e2);
+    CC_SRC = eflags | CC_Z;
+    return limit;
+}
+
+target_ulong helper_lar(CPUX86State *env, target_ulong selector1)
+{
+    uint32_t e1, e2, eflags, selector;
+    int rpl, dpl, cpl, type;
+
+    selector = selector1 & 0xffff;
+    eflags = helper_cc_compute_all(env, CC_OP);
+    if ((selector & 0xfffc) == 0)
+        goto fail;
+    if (load_segment(env, &e1, &e2, selector) != 0)
+        goto fail;
+    rpl = selector & 3;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_S_MASK) {
+        if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) {
+            /* conforming */
+        } else {
+            if (dpl < cpl || dpl < rpl)
+                goto fail;
+        }
+    } else {
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        switch(type) {
+        case 1:
+        case 2:
+        case 3:
+        case 4:
+        case 5:
+        case 9:
+        case 11:
+        case 12:
+            break;
+        default:
+            goto fail;
+        }
+        if (dpl < cpl || dpl < rpl) {
+        fail:
+            CC_SRC = eflags & ~CC_Z;
+            return 0;
+        }
+    }
+    CC_SRC = eflags | CC_Z;
+    return e2 & 0x00f0ff00;
+}
+
+void helper_verr(CPUX86State *env, target_ulong selector1)
+{
+    uint32_t e1, e2, eflags, selector;
+    int rpl, dpl, cpl;
+
+    selector = selector1 & 0xffff;
+    eflags = helper_cc_compute_all(env, CC_OP);
+    if ((selector & 0xfffc) == 0)
+        goto fail;
+    if (load_segment(env, &e1, &e2, selector) != 0)
+        goto fail;
+    if (!(e2 & DESC_S_MASK))
+        goto fail;
+    rpl = selector & 3;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_CS_MASK) {
+        if (!(e2 & DESC_R_MASK))
+            goto fail;
+        if (!(e2 & DESC_C_MASK)) {
+            if (dpl < cpl || dpl < rpl)
+                goto fail;
+        }
+    } else {
+        if (dpl < cpl || dpl < rpl) {
+        fail:
+            CC_SRC = eflags & ~CC_Z;
+            return;
+        }
+    }
+    CC_SRC = eflags | CC_Z;
+}
+
+void helper_verw(CPUX86State *env, target_ulong selector1)
+{
+    uint32_t e1, e2, eflags, selector;
+    int rpl, dpl, cpl;
+
+    selector = selector1 & 0xffff;
+    eflags = helper_cc_compute_all(env, CC_OP);
+    if ((selector & 0xfffc) == 0)
+        goto fail;
+    if (load_segment(env, &e1, &e2, selector) != 0)
+        goto fail;
+    if (!(e2 & DESC_S_MASK))
+        goto fail;
+    rpl = selector & 3;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_CS_MASK) {
+        goto fail;
+    } else {
+        if (dpl < cpl || dpl < rpl)
+            goto fail;
+        if (!(e2 & DESC_W_MASK)) {
+        fail:
+            CC_SRC = eflags & ~CC_Z;
+            return;
+        }
+    }
+    CC_SRC = eflags | CC_Z;
+}
diff --git a/target-i386/shift_helper_template.h b/target-i386/shift_helper_template.h
new file mode 100644
index 0000000..cd06ab1
--- /dev/null
+++ b/target-i386/shift_helper_template.h
@@ -0,0 +1,118 @@
+/*
+ *  i386 helpers
+ *
+ *  Copyright (c) 2008 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ */
+#define DATA_BITS (1 << (3 + SHIFT))
+#define SHIFT_MASK (DATA_BITS - 1)
+#define SIGN_MASK (((target_ulong)1) << (DATA_BITS - 1))
+#if DATA_BITS <= 32
+#define SHIFT1_MASK 0x1f
+#else
+#define SHIFT1_MASK 0x3f
+#endif
+
+#if DATA_BITS == 8
+#define SUFFIX b
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#define DATA_MASK 0xff
+#elif DATA_BITS == 16
+#define SUFFIX w
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#define DATA_MASK 0xffff
+#elif DATA_BITS == 32
+#define SUFFIX l
+#define DATA_TYPE uint32_t
+#define DATA_STYPE int32_t
+#define DATA_MASK 0xffffffff
+#elif DATA_BITS == 64
+#define SUFFIX q
+#define DATA_TYPE uint64_t
+#define DATA_STYPE int64_t
+#define DATA_MASK 0xffffffffffffffffULL
+#else
+#error unhandled operand size
+#endif
+
+/* shifts */
+
+target_ulong glue(helper_rcl, SUFFIX)(CPUX86State *env,
+                                      target_ulong t0, target_ulong t1)
+{
+    int count, eflags;
+    target_ulong src;
+    target_long res;
+
+    count = t1 & SHIFT1_MASK;
+#if DATA_BITS == 16
+    count = rclw_table[count];
+#elif DATA_BITS == 8
+    count = rclb_table[count];
+#endif
+    if (count) {
+        eflags = env->cc_src;
+        t0 &= DATA_MASK;
+        src = t0;
+        res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1));
+        if (count > 1)
+            res |= t0 >> (DATA_BITS + 1 - count);
+        t0 = res;
+        env->cc_src = (eflags & ~(CC_C | CC_O)) |
+            (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
+            ((src >> (DATA_BITS - count)) & CC_C);
+    }
+    return t0;
+}
+
+target_ulong glue(helper_rcr, SUFFIX)(CPUX86State *env,
+                                      target_ulong t0, target_ulong t1)
+{
+    int count, eflags;
+    target_ulong src;
+    target_long res;
+
+    count = t1 & SHIFT1_MASK;
+#if DATA_BITS == 16
+    count = rclw_table[count];
+#elif DATA_BITS == 8
+    count = rclb_table[count];
+#endif
+    if (count) {
+        eflags = env->cc_src;
+        t0 &= DATA_MASK;
+        src = t0;
+        res = (t0 >> count) | ((target_ulong)(eflags & CC_C) << (DATA_BITS - count));
+        if (count > 1)
+            res |= t0 << (DATA_BITS + 1 - count);
+        t0 = res;
+        env->cc_src = (eflags & ~(CC_C | CC_O)) |
+            (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
+            ((src >> (count - 1)) & CC_C);
+    }
+    return t0;
+}
+
+#undef DATA_BITS
+#undef SHIFT_MASK
+#undef SHIFT1_MASK
+#undef SIGN_MASK
+#undef DATA_TYPE
+#undef DATA_STYPE
+#undef DATA_MASK
+#undef SUFFIX
diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c
new file mode 100644
index 0000000..c432277
--- /dev/null
+++ b/target-i386/smm_helper.c
@@ -0,0 +1,294 @@
+/*
+ *  x86 SMM helpers
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "helper.h"
+
+/* SMM support */
+
+#if defined(CONFIG_USER_ONLY)
+
+void do_smm_enter(CPUArchState *env1)
+{
+}
+
+void helper_rsm(CPUX86State *env)
+{
+}
+
+#else
+
+#ifdef TARGET_X86_64
+#define SMM_REVISION_ID 0x00020064
+#else
+#define SMM_REVISION_ID 0x00020000
+#endif
+
+void do_smm_enter(CPUArchState *env)
+{
+    target_ulong sm_state;
+    SegmentCache *dt;
+    int i, offset;
+
+    qemu_log_mask(CPU_LOG_INT, "SMM: enter\n");
+    log_cpu_state_mask(CPU_LOG_INT, ENV_GET_CPU(env), X86_DUMP_CCOP);
+
+    env->hflags |= HF_SMM_MASK;
+    cpu_smm_update(env);
+
+    sm_state = env->smbase + 0x8000;
+
+#ifdef TARGET_X86_64
+    for(i = 0; i < 6; i++) {
+        dt = &env->segs[i];
+        offset = 0x7e00 + i * 16;
+        stw_phys(sm_state + offset, dt->selector);
+        stw_phys(sm_state + offset + 2, (dt->flags >> 8) & 0xf0ff);
+        stl_phys(sm_state + offset + 4, dt->limit);
+        stq_phys(sm_state + offset + 8, dt->base);
+    }
+
+    stq_phys(sm_state + 0x7e68, env->gdt.base);
+    stl_phys(sm_state + 0x7e64, env->gdt.limit);
+
+    stw_phys(sm_state + 0x7e70, env->ldt.selector);
+    stq_phys(sm_state + 0x7e78, env->ldt.base);
+    stl_phys(sm_state + 0x7e74, env->ldt.limit);
+    stw_phys(sm_state + 0x7e72, (env->ldt.flags >> 8) & 0xf0ff);
+
+    stq_phys(sm_state + 0x7e88, env->idt.base);
+    stl_phys(sm_state + 0x7e84, env->idt.limit);
+
+    stw_phys(sm_state + 0x7e90, env->tr.selector);
+    stq_phys(sm_state + 0x7e98, env->tr.base);
+    stl_phys(sm_state + 0x7e94, env->tr.limit);
+    stw_phys(sm_state + 0x7e92, (env->tr.flags >> 8) & 0xf0ff);
+
+    stq_phys(sm_state + 0x7ed0, env->efer);
+
+    stq_phys(sm_state + 0x7ff8, EAX);
+    stq_phys(sm_state + 0x7ff0, ECX);
+    stq_phys(sm_state + 0x7fe8, EDX);
+    stq_phys(sm_state + 0x7fe0, EBX);
+    stq_phys(sm_state + 0x7fd8, ESP);
+    stq_phys(sm_state + 0x7fd0, EBP);
+    stq_phys(sm_state + 0x7fc8, ESI);
+    stq_phys(sm_state + 0x7fc0, EDI);
+    for(i = 8; i < 16; i++)
+        stq_phys(sm_state + 0x7ff8 - i * 8, env->regs[i]);
+    stq_phys(sm_state + 0x7f78, env->eip);
+    stl_phys(sm_state + 0x7f70, cpu_compute_eflags(env));
+    stl_phys(sm_state + 0x7f68, env->dr[6]);
+    stl_phys(sm_state + 0x7f60, env->dr[7]);
+
+    stl_phys(sm_state + 0x7f48, env->cr[4]);
+    stl_phys(sm_state + 0x7f50, env->cr[3]);
+    stl_phys(sm_state + 0x7f58, env->cr[0]);
+
+    stl_phys(sm_state + 0x7efc, SMM_REVISION_ID);
+    stl_phys(sm_state + 0x7f00, env->smbase);
+#else
+    stl_phys(sm_state + 0x7ffc, env->cr[0]);
+    stl_phys(sm_state + 0x7ff8, env->cr[3]);
+    stl_phys(sm_state + 0x7ff4, cpu_compute_eflags(env));
+    stl_phys(sm_state + 0x7ff0, env->eip);
+    stl_phys(sm_state + 0x7fec, EDI);
+    stl_phys(sm_state + 0x7fe8, ESI);
+    stl_phys(sm_state + 0x7fe4, EBP);
+    stl_phys(sm_state + 0x7fe0, ESP);
+    stl_phys(sm_state + 0x7fdc, EBX);
+    stl_phys(sm_state + 0x7fd8, EDX);
+    stl_phys(sm_state + 0x7fd4, ECX);
+    stl_phys(sm_state + 0x7fd0, EAX);
+    stl_phys(sm_state + 0x7fcc, env->dr[6]);
+    stl_phys(sm_state + 0x7fc8, env->dr[7]);
+
+    stl_phys(sm_state + 0x7fc4, env->tr.selector);
+    stl_phys(sm_state + 0x7f64, env->tr.base);
+    stl_phys(sm_state + 0x7f60, env->tr.limit);
+    stl_phys(sm_state + 0x7f5c, (env->tr.flags >> 8) & 0xf0ff);
+
+    stl_phys(sm_state + 0x7fc0, env->ldt.selector);
+    stl_phys(sm_state + 0x7f80, env->ldt.base);
+    stl_phys(sm_state + 0x7f7c, env->ldt.limit);
+    stl_phys(sm_state + 0x7f78, (env->ldt.flags >> 8) & 0xf0ff);
+
+    stl_phys(sm_state + 0x7f74, env->gdt.base);
+    stl_phys(sm_state + 0x7f70, env->gdt.limit);
+
+    stl_phys(sm_state + 0x7f58, env->idt.base);
+    stl_phys(sm_state + 0x7f54, env->idt.limit);
+
+    for(i = 0; i < 6; i++) {
+        dt = &env->segs[i];
+        if (i < 3)
+            offset = 0x7f84 + i * 12;
+        else
+            offset = 0x7f2c + (i - 3) * 12;
+        stl_phys(sm_state + 0x7fa8 + i * 4, dt->selector);
+        stl_phys(sm_state + offset + 8, dt->base);
+        stl_phys(sm_state + offset + 4, dt->limit);
+        stl_phys(sm_state + offset, (dt->flags >> 8) & 0xf0ff);
+    }
+    stl_phys(sm_state + 0x7f14, env->cr[4]);
+
+    stl_phys(sm_state + 0x7efc, SMM_REVISION_ID);
+    stl_phys(sm_state + 0x7ef8, env->smbase);
+#endif
+    /* init SMM cpu state */
+
+#ifdef TARGET_X86_64
+    cpu_load_efer(env, 0);
+#endif
+    cpu_load_eflags(env, 0, ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    env->eip = 0x00008000;
+    cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
+                           0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0);
+
+    cpu_x86_update_cr0(env,
+                       env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK | CR0_PG_MASK));
+    cpu_x86_update_cr4(env, 0);
+    env->dr[7] = 0x00000400;
+    CC_OP = CC_OP_EFLAGS;
+}
+
+void helper_rsm(CPUX86State *env)
+{
+    target_ulong sm_state;
+    int i, offset;
+    uint32_t val;
+
+    sm_state = env->smbase + 0x8000;
+#ifdef TARGET_X86_64
+    cpu_load_efer(env, ldq_phys(sm_state + 0x7ed0));
+
+    for(i = 0; i < 6; i++) {
+        offset = 0x7e00 + i * 16;
+        cpu_x86_load_seg_cache(env, i,
+                               lduw_phys(sm_state + offset),
+                               ldq_phys(sm_state + offset + 8),
+                               ldl_phys(sm_state + offset + 4),
+                               (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
+    }
+
+    env->gdt.base = ldq_phys(sm_state + 0x7e68);
+    env->gdt.limit = ldl_phys(sm_state + 0x7e64);
+
+    env->ldt.selector = lduw_phys(sm_state + 0x7e70);
+    env->ldt.base = ldq_phys(sm_state + 0x7e78);
+    env->ldt.limit = ldl_phys(sm_state + 0x7e74);
+    env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
+
+    env->idt.base = ldq_phys(sm_state + 0x7e88);
+    env->idt.limit = ldl_phys(sm_state + 0x7e84);
+
+    env->tr.selector = lduw_phys(sm_state + 0x7e90);
+    env->tr.base = ldq_phys(sm_state + 0x7e98);
+    env->tr.limit = ldl_phys(sm_state + 0x7e94);
+    env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
+
+    EAX = ldq_phys(sm_state + 0x7ff8);
+    ECX = ldq_phys(sm_state + 0x7ff0);
+    EDX = ldq_phys(sm_state + 0x7fe8);
+    EBX = ldq_phys(sm_state + 0x7fe0);
+    ESP = ldq_phys(sm_state + 0x7fd8);
+    EBP = ldq_phys(sm_state + 0x7fd0);
+    ESI = ldq_phys(sm_state + 0x7fc8);
+    EDI = ldq_phys(sm_state + 0x7fc0);
+    for(i = 8; i < 16; i++)
+        env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
+    env->eip = ldq_phys(sm_state + 0x7f78);
+    cpu_load_eflags(env, ldl_phys(sm_state + 0x7f70),
+                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    env->dr[6] = ldl_phys(sm_state + 0x7f68);
+    env->dr[7] = ldl_phys(sm_state + 0x7f60);
+
+    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
+    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
+    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
+
+    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    if (val & 0x20000) {
+        env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
+    }
+#else
+    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
+    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
+    cpu_load_eflags(env, ldl_phys(sm_state + 0x7ff4),
+                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    env->eip = ldl_phys(sm_state + 0x7ff0);
+    EDI = ldl_phys(sm_state + 0x7fec);
+    ESI = ldl_phys(sm_state + 0x7fe8);
+    EBP = ldl_phys(sm_state + 0x7fe4);
+    ESP = ldl_phys(sm_state + 0x7fe0);
+    EBX = ldl_phys(sm_state + 0x7fdc);
+    EDX = ldl_phys(sm_state + 0x7fd8);
+    ECX = ldl_phys(sm_state + 0x7fd4);
+    EAX = ldl_phys(sm_state + 0x7fd0);
+    env->dr[6] = ldl_phys(sm_state + 0x7fcc);
+    env->dr[7] = ldl_phys(sm_state + 0x7fc8);
+
+    env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
+    env->tr.base = ldl_phys(sm_state + 0x7f64);
+    env->tr.limit = ldl_phys(sm_state + 0x7f60);
+    env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
+
+    env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
+    env->ldt.base = ldl_phys(sm_state + 0x7f80);
+    env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
+    env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
+
+    env->gdt.base = ldl_phys(sm_state + 0x7f74);
+    env->gdt.limit = ldl_phys(sm_state + 0x7f70);
+
+    env->idt.base = ldl_phys(sm_state + 0x7f58);
+    env->idt.limit = ldl_phys(sm_state + 0x7f54);
+
+    for(i = 0; i < 6; i++) {
+        if (i < 3)
+            offset = 0x7f84 + i * 12;
+        else
+            offset = 0x7f2c + (i - 3) * 12;
+        cpu_x86_load_seg_cache(env, i,
+                               ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
+                               ldl_phys(sm_state + offset + 8),
+                               ldl_phys(sm_state + offset + 4),
+                               (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
+    }
+    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
+
+    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    if (val & 0x20000) {
+        env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
+    }
+#endif
+    CC_OP = CC_OP_EFLAGS;
+    env->hflags &= ~HF_SMM_MASK;
+    cpu_smm_update(env);
+
+    qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n");
+    log_cpu_state_mask(CPU_LOG_INT, ENV_GET_CPU(env), X86_DUMP_CCOP);
+}
+
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target-i386/svm.h b/target-i386/svm.h
index a224aea..04193ed 100644
--- a/target-i386/svm.h
+++ b/target-i386/svm.h
@@ -130,7 +130,7 @@
 
 #define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
 
-struct __attribute__ ((__packed__)) vmcb_control_area {
+struct QEMU_PACKED vmcb_control_area {
 	uint16_t intercept_cr_read;
 	uint16_t intercept_cr_write;
 	uint16_t intercept_dr_read;
@@ -162,14 +162,14 @@
 	uint8_t reserved_5[832];
 };
 
-struct __attribute__ ((__packed__)) vmcb_seg {
+struct QEMU_PACKED vmcb_seg {
 	uint16_t selector;
 	uint16_t attrib;
 	uint32_t limit;
 	uint64_t base;
 };
 
-struct __attribute__ ((__packed__)) vmcb_save_area {
+struct QEMU_PACKED vmcb_save_area {
 	struct vmcb_seg es;
 	struct vmcb_seg cs;
 	struct vmcb_seg ss;
@@ -214,7 +214,7 @@
 	uint64_t last_excp_to;
 };
 
-struct __attribute__ ((__packed__)) vmcb {
+struct QEMU_PACKED vmcb {
 	struct vmcb_control_area control;
 	struct vmcb_save_area save;
 };
diff --git a/target-i386/svm_helper.c b/target-i386/svm_helper.c
new file mode 100644
index 0000000..e3b2501
--- /dev/null
+++ b/target-i386/svm_helper.c
@@ -0,0 +1,621 @@
+/*
+ *  x86 SVM helpers
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "exec/cpu-all.h"
+#include "helper.h"
+
+#if !defined(CONFIG_USER_ONLY)
+#include "exec/softmmu_exec.h"
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+/* Secure Virtual Machine helpers */
+
+#if defined(CONFIG_USER_ONLY)
+
+void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
+{
+}
+void helper_vmmcall(CPUX86State *env)
+{
+}
+void helper_vmload(CPUX86State *env, int aflag)
+{
+}
+void helper_vmsave(CPUX86State *env, int aflag)
+{
+}
+void helper_stgi(CPUX86State *env)
+{
+}
+void helper_clgi(CPUX86State *env)
+{
+}
+void helper_skinit(CPUX86State *env)
+{
+}
+void helper_invlpga(CPUX86State *env, int aflag)
+{
+}
+void helper_vmexit(CPUX86State *env,
+                   uint32_t exit_code, uint64_t exit_info_1)
+{
+}
+void helper_svm_check_intercept_param(CPUX86State *env,
+                                      uint32_t type, uint64_t param)
+{
+}
+
+void svm_check_intercept(CPUX86State *env, uint32_t type)
+{
+}
+
+void helper_svm_check_io(CPUX86State* env,
+                         uint32_t port, uint32_t param,
+                         uint32_t next_eip_addend)
+{
+}
+#else
+
+static inline void svm_save_seg(hwaddr addr,
+                                const SegmentCache *sc)
+{
+    stw_phys(addr + offsetof(struct vmcb_seg, selector),
+             sc->selector);
+    stq_phys(addr + offsetof(struct vmcb_seg, base),
+             sc->base);
+    stl_phys(addr + offsetof(struct vmcb_seg, limit),
+             sc->limit);
+    stw_phys(addr + offsetof(struct vmcb_seg, attrib),
+             ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00));
+}
+
+static inline void svm_load_seg(hwaddr addr, SegmentCache *sc)
+{
+    unsigned int flags;
+
+    sc->selector = lduw_phys(addr + offsetof(struct vmcb_seg, selector));
+    sc->base = ldq_phys(addr + offsetof(struct vmcb_seg, base));
+    sc->limit = ldl_phys(addr + offsetof(struct vmcb_seg, limit));
+    flags = lduw_phys(addr + offsetof(struct vmcb_seg, attrib));
+    sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12);
+}
+
+static inline void svm_load_seg_cache(hwaddr addr,
+                                      CPUX86State *env, int seg_reg)
+{
+    SegmentCache sc1, *sc = &sc1;
+    svm_load_seg(addr, sc);
+    cpu_x86_load_seg_cache(env, seg_reg, sc->selector,
+                           sc->base, sc->limit, sc->flags);
+}
+
+void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
+{
+    target_ulong addr;
+    uint32_t event_inj;
+    uint32_t int_ctl;
+
+    helper_svm_check_intercept_param(env, SVM_EXIT_VMRUN, 0);
+
+    if (aflag == 2)
+        addr = EAX;
+    else
+        addr = (uint32_t)EAX;
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmrun! " TARGET_FMT_lx "\n", addr);
+
+    env->vm_vmcb = addr;
+
+    /* save the current CPU state in the hsave page */
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base), env->gdt.base);
+    stl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit);
+
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base), env->idt.base);
+    stl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit), env->idt.limit);
+
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0), env->cr[0]);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr2), env->cr[2]);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3), env->cr[3]);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4), env->cr[4]);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6), env->dr[6]);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7), env->dr[7]);
+
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer), env->efer);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags), cpu_compute_eflags(env));
+
+    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.es),
+                  &env->segs[R_ES]);
+    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.cs),
+                 &env->segs[R_CS]);
+    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ss),
+                 &env->segs[R_SS]);
+    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ds),
+                 &env->segs[R_DS]);
+
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip),
+             EIP + next_eip_addend);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp), ESP);
+    stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax), EAX);
+
+    /* load the interception bitmaps so we do not need to access the
+       vmcb in svm mode */
+    env->intercept            = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept));
+    env->intercept_cr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_read));
+    env->intercept_cr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write));
+    env->intercept_dr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read));
+    env->intercept_dr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write));
+    env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
+
+    /* enable intercepts */
+    env->hflags |= HF_SVMI_MASK;
+
+    env->tsc_offset = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.tsc_offset));
+
+    env->gdt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
+    env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
+
+    env->idt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
+    env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
+
+    /* clear exit_info_2 so we behave like the real hardware */
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
+
+    cpu_x86_update_cr0(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0)));
+    cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
+    cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
+    env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
+    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+    env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK);
+    if (int_ctl & V_INTR_MASKING_MASK) {
+        env->v_tpr = int_ctl & V_TPR_MASK;
+        env->hflags2 |= HF2_VINTR_MASK;
+        if (env->eflags & IF_MASK)
+            env->hflags2 |= HF2_HIF_MASK;
+    }
+
+    cpu_load_efer(env,
+                  ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer)));
+    env->eflags = 0;
+    cpu_load_eflags(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)),
+                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    CC_OP = CC_OP_EFLAGS;
+
+    svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.es),
+                       env, R_ES);
+    svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.cs),
+                       env, R_CS);
+    svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ss),
+                       env, R_SS);
+    svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ds),
+                       env, R_DS);
+
+    EIP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip));
+    env->eip = EIP;
+    ESP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp));
+    EAX = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax));
+    env->dr[7] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7));
+    env->dr[6] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6));
+    cpu_x86_set_cpl(env, ldub_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl)));
+
+    /* FIXME: guest state consistency checks */
+
+    switch(ldub_phys(env->vm_vmcb + offsetof(struct vmcb, control.tlb_ctl))) {
+        case TLB_CONTROL_DO_NOTHING:
+            break;
+        case TLB_CONTROL_FLUSH_ALL_ASID:
+            /* FIXME: this is not 100% correct but should work for now */
+            tlb_flush(env, 1);
+        break;
+    }
+
+    env->hflags2 |= HF2_GIF_MASK;
+
+    if (int_ctl & V_IRQ_MASK) {
+        ENV_GET_CPU(env)->interrupt_request |= CPU_INTERRUPT_VIRQ;
+    }
+
+    /* maybe we need to inject an event */
+    event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+    if (event_inj & SVM_EVTINJ_VALID) {
+        uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
+        uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
+        uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
+
+        qemu_log_mask(CPU_LOG_TB_IN_ASM, "Injecting(%#hx): ", valid_err);
+        /* FIXME: need to implement valid_err */
+        switch (event_inj & SVM_EVTINJ_TYPE_MASK) {
+        case SVM_EVTINJ_TYPE_INTR:
+                env->exception_index = vector;
+                env->error_code = event_inj_err;
+                env->exception_is_int = 0;
+                env->exception_next_eip = -1;
+                qemu_log_mask(CPU_LOG_TB_IN_ASM, "INTR");
+                /* XXX: is it always correct ? */
+                do_interrupt_x86_hardirq(env, vector, 1);
+                break;
+        case SVM_EVTINJ_TYPE_NMI:
+                env->exception_index = EXCP02_NMI;
+                env->error_code = event_inj_err;
+                env->exception_is_int = 0;
+                env->exception_next_eip = EIP;
+                qemu_log_mask(CPU_LOG_TB_IN_ASM, "NMI");
+                cpu_loop_exit(env);
+                break;
+        case SVM_EVTINJ_TYPE_EXEPT:
+                env->exception_index = vector;
+                env->error_code = event_inj_err;
+                env->exception_is_int = 0;
+                env->exception_next_eip = -1;
+                qemu_log_mask(CPU_LOG_TB_IN_ASM, "EXEPT");
+                cpu_loop_exit(env);
+                break;
+        case SVM_EVTINJ_TYPE_SOFT:
+                env->exception_index = vector;
+                env->error_code = event_inj_err;
+                env->exception_is_int = 1;
+                env->exception_next_eip = EIP;
+                qemu_log_mask(CPU_LOG_TB_IN_ASM, "SOFT");
+                cpu_loop_exit(env);
+                break;
+        }
+        qemu_log_mask(CPU_LOG_TB_IN_ASM, " %#x %#x\n", env->exception_index, env->error_code);
+    }
+}
+
+void helper_vmmcall(CPUX86State *env)
+{
+    helper_svm_check_intercept_param(env, SVM_EXIT_VMMCALL, 0);
+    raise_exception(env, EXCP06_ILLOP);
+}
+
+void helper_vmload(CPUX86State *env, int aflag)
+{
+    target_ulong addr;
+    helper_svm_check_intercept_param(env, SVM_EXIT_VMLOAD, 0);
+
+    if (aflag == 2)
+        addr = EAX;
+    else
+        addr = (uint32_t)EAX;
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmload! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
+                addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
+                env->segs[R_FS].base);
+
+    svm_load_seg_cache(addr + offsetof(struct vmcb, save.fs),
+                       env, R_FS);
+    svm_load_seg_cache(addr + offsetof(struct vmcb, save.gs),
+                       env, R_GS);
+    svm_load_seg(addr + offsetof(struct vmcb, save.tr),
+                 &env->tr);
+    svm_load_seg(addr + offsetof(struct vmcb, save.ldtr),
+                 &env->ldt);
+
+#ifdef TARGET_X86_64
+    env->kernelgsbase = ldq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base));
+    env->lstar = ldq_phys(addr + offsetof(struct vmcb, save.lstar));
+    env->cstar = ldq_phys(addr + offsetof(struct vmcb, save.cstar));
+    env->fmask = ldq_phys(addr + offsetof(struct vmcb, save.sfmask));
+#endif
+    env->star = ldq_phys(addr + offsetof(struct vmcb, save.star));
+    env->sysenter_cs = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_cs));
+    env->sysenter_esp = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_esp));
+    env->sysenter_eip = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_eip));
+}
+
+void helper_vmsave(CPUX86State *env, int aflag)
+{
+    target_ulong addr;
+    helper_svm_check_intercept_param(env, SVM_EXIT_VMSAVE, 0);
+
+    if (aflag == 2)
+        addr = EAX;
+    else
+        addr = (uint32_t)EAX;
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmsave! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
+                addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
+                env->segs[R_FS].base);
+
+    svm_save_seg(addr + offsetof(struct vmcb, save.fs),
+                 &env->segs[R_FS]);
+    svm_save_seg(addr + offsetof(struct vmcb, save.gs),
+                 &env->segs[R_GS]);
+    svm_save_seg(addr + offsetof(struct vmcb, save.tr),
+                 &env->tr);
+    svm_save_seg(addr + offsetof(struct vmcb, save.ldtr),
+                 &env->ldt);
+
+#ifdef TARGET_X86_64
+    stq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base), env->kernelgsbase);
+    stq_phys(addr + offsetof(struct vmcb, save.lstar), env->lstar);
+    stq_phys(addr + offsetof(struct vmcb, save.cstar), env->cstar);
+    stq_phys(addr + offsetof(struct vmcb, save.sfmask), env->fmask);
+#endif
+    stq_phys(addr + offsetof(struct vmcb, save.star), env->star);
+    stq_phys(addr + offsetof(struct vmcb, save.sysenter_cs), env->sysenter_cs);
+    stq_phys(addr + offsetof(struct vmcb, save.sysenter_esp), env->sysenter_esp);
+    stq_phys(addr + offsetof(struct vmcb, save.sysenter_eip), env->sysenter_eip);
+}
+
+void helper_stgi(CPUX86State *env)
+{
+    helper_svm_check_intercept_param(env, SVM_EXIT_STGI, 0);
+    env->hflags2 |= HF2_GIF_MASK;
+}
+
+void helper_clgi(CPUX86State *env)
+{
+    helper_svm_check_intercept_param(env, SVM_EXIT_CLGI, 0);
+    env->hflags2 &= ~HF2_GIF_MASK;
+}
+
+void helper_skinit(CPUX86State *env)
+{
+    helper_svm_check_intercept_param(env, SVM_EXIT_SKINIT, 0);
+    /* XXX: not implemented */
+    raise_exception(env, EXCP06_ILLOP);
+}
+
+void helper_invlpga(CPUX86State *env, int aflag)
+{
+    target_ulong addr;
+    helper_svm_check_intercept_param(env, SVM_EXIT_INVLPGA, 0);
+
+    if (aflag == 2)
+        addr = EAX;
+    else
+        addr = (uint32_t)EAX;
+
+    /* XXX: could use the ASID to see if it is needed to do the
+       flush */
+    tlb_flush_page(env, addr);
+}
+
+void helper_svm_check_intercept_param(CPUX86State *env,
+                                      uint32_t type, uint64_t param)
+{
+    if (likely(!(env->hflags & HF_SVMI_MASK)))
+        return;
+    switch(type) {
+    case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR0 + 8:
+        if (env->intercept_cr_read & (1 << (type - SVM_EXIT_READ_CR0))) {
+            helper_vmexit(env, type, param);
+        }
+        break;
+    case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR0 + 8:
+        if (env->intercept_cr_write & (1 << (type - SVM_EXIT_WRITE_CR0))) {
+            helper_vmexit(env, type, param);
+        }
+        break;
+    case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR0 + 7:
+        if (env->intercept_dr_read & (1 << (type - SVM_EXIT_READ_DR0))) {
+            helper_vmexit(env, type, param);
+        }
+        break;
+    case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR0 + 7:
+        if (env->intercept_dr_write & (1 << (type - SVM_EXIT_WRITE_DR0))) {
+            helper_vmexit(env, type, param);
+        }
+        break;
+    case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 31:
+        if (env->intercept_exceptions & (1 << (type - SVM_EXIT_EXCP_BASE))) {
+            helper_vmexit(env, type, param);
+        }
+        break;
+    case SVM_EXIT_MSR:
+        if (env->intercept & (1ULL << (SVM_EXIT_MSR - SVM_EXIT_INTR))) {
+            /* FIXME: this should be read in at vmrun (faster this way?) */
+            uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.msrpm_base_pa));
+            uint32_t t0, t1;
+            switch((uint32_t)ECX) {
+            case 0 ... 0x1fff:
+                t0 = (ECX * 2) % 8;
+                t1 = ECX / 8;
+                break;
+            case 0xc0000000 ... 0xc0001fff:
+                t0 = (8192 + ECX - 0xc0000000) * 2;
+                t1 = (t0 / 8);
+                t0 %= 8;
+                break;
+            case 0xc0010000 ... 0xc0011fff:
+                t0 = (16384 + ECX - 0xc0010000) * 2;
+                t1 = (t0 / 8);
+                t0 %= 8;
+                break;
+            default:
+                helper_vmexit(env, type, param);
+                t0 = 0;
+                t1 = 0;
+                break;
+            }
+            if (ldub_phys(addr + t1) & ((1 << param) << t0))
+                helper_vmexit(env, type, param);
+        }
+        break;
+    default:
+        if (env->intercept & (1ULL << (type - SVM_EXIT_INTR))) {
+            helper_vmexit(env, type, param);
+        }
+        break;
+    }
+}
+
+void svm_check_intercept(CPUArchState *env, uint32_t type)
+{
+    helper_svm_check_intercept_param(env, type, 0);
+}
+
+void helper_svm_check_io(CPUX86State *env,
+                         uint32_t port, uint32_t param,
+                         uint32_t next_eip_addend)
+{
+    if (env->intercept & (1ULL << (SVM_EXIT_IOIO - SVM_EXIT_INTR))) {
+        /* FIXME: this should be read in at vmrun (faster this way?) */
+        uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.iopm_base_pa));
+        uint16_t mask = (1 << ((param >> 4) & 7)) - 1;
+        if(lduw_phys(addr + port / 8) & (mask << (port & 7))) {
+            /* next EIP */
+            stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
+                     env->eip + next_eip_addend);
+            helper_vmexit(env, SVM_EXIT_IOIO, param | (port << 16));
+        }
+    }
+}
+
+/* Note: currently only 32 bits of exit_code are used */
+void helper_vmexit(CPUX86State *env,
+                   uint32_t exit_code, uint64_t exit_info_1)
+{
+    uint32_t int_ctl;
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016" PRIx64 ", " TARGET_FMT_lx ")!\n",
+                exit_code, exit_info_1,
+                ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2)),
+                EIP);
+
+    if(env->hflags & HF_INHIBIT_IRQ_MASK) {
+        stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), SVM_INTERRUPT_SHADOW_MASK);
+        env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+    } else {
+        stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), 0);
+    }
+
+    /* Save the VM state in the vmcb */
+    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.es),
+                 &env->segs[R_ES]);
+    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.cs),
+                 &env->segs[R_CS]);
+    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ss),
+                 &env->segs[R_SS]);
+    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ds),
+                 &env->segs[R_DS]);
+
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base), env->gdt.base);
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit);
+
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base), env->idt.base);
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit), env->idt.limit);
+
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer), env->efer);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0), env->cr[0]);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2), env->cr[2]);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
+
+    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+    int_ctl &= ~(V_TPR_MASK | V_IRQ_MASK);
+    int_ctl |= env->v_tpr & V_TPR_MASK;
+    if (ENV_GET_CPU(env)->interrupt_request & CPU_INTERRUPT_VIRQ)
+        int_ctl |= V_IRQ_MASK;
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
+
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags), cpu_compute_eflags(env));
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip), env->eip);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp), ESP);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax), EAX);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7), env->dr[7]);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6), env->dr[6]);
+    stb_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl), env->hflags & HF_CPL_MASK);
+
+    /* Reload the host state from vm_hsave */
+    env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK);
+    env->hflags &= ~HF_SVMI_MASK;
+    env->intercept = 0;
+    env->intercept_exceptions = 0;
+    ENV_GET_CPU(env)->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
+    env->tsc_offset = 0;
+
+    env->gdt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
+    env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
+
+    env->idt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
+    env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
+
+    cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
+    cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
+    cpu_x86_update_cr3(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3)));
+    /* we need to set the efer after the crs so the hidden flags get
+       set properly */
+    cpu_load_efer(env,
+                  ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer)));
+    env->eflags = 0;
+    cpu_load_eflags(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)),
+                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    CC_OP = CC_OP_EFLAGS;
+
+    svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.es),
+                       env, R_ES);
+    svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.cs),
+                       env, R_CS);
+    svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ss),
+                       env, R_SS);
+    svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ds),
+                       env, R_DS);
+
+    EIP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip));
+    ESP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp));
+    EAX = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax));
+
+    env->dr[6] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6));
+    env->dr[7] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7));
+
+    /* other setups */
+    cpu_x86_set_cpl(env, 0);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code);
+    stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), exit_info_1);
+
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info),
+             ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)));
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info_err),
+             ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err)));
+
+    env->hflags2 &= ~HF2_GIF_MASK;
+    /* FIXME: Resets the current ASID register to zero (host ASID). */
+
+    /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */
+
+    /* Clears the TSC_OFFSET inside the processor. */
+
+    /* If the host is in PAE mode, the processor reloads the host's PDPEs
+       from the page table indicated the host's CR3. If the PDPEs contain
+       illegal state, the processor causes a shutdown. */
+
+    /* Forces CR0.PE = 1, RFLAGS.VM = 0. */
+    env->cr[0] |= CR0_PE_MASK;
+    env->eflags &= ~VM_MASK;
+
+    /* Disables all breakpoints in the host DR7 register. */
+
+    /* Checks the reloaded host state for consistency. */
+
+    /* If the host's rIP reloaded by #VMEXIT is outside the limit of the
+       host's code segment or non-canonical (in the case of long mode), a
+       #GP fault is delivered inside the host.) */
+
+    /* remove any pending exception */
+    env->exception_index = -1;
+    env->error_code = 0;
+    env->old_exception = -1;
+
+    cpu_loop_exit(env);
+}
+
+#endif
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 78552ec..9ae45a5 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -14,8 +14,7 @@
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include <stdarg.h>
 #include <stdlib.h>
@@ -24,8 +23,8 @@
 #include <inttypes.h>
 #include <signal.h>
 
+#include "qemu/host-utils.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "disas/disas.h"
 #include "tcg-op.h"
 
@@ -34,11 +33,16 @@
 #include "helper.h"
 #include "exec/hax.h"
 
+#ifdef _WIN32
+#undef grp2
+#endif
+
 #define PREFIX_REPZ   0x01
 #define PREFIX_REPNZ  0x02
 #define PREFIX_LOCK   0x04
 #define PREFIX_DATA   0x08
 #define PREFIX_ADR    0x10
+#define PREFIX_VEX    0x20
 
 #ifdef TARGET_X86_64
 #define X86_64_ONLY(x) x
@@ -58,11 +62,19 @@
 #define REX_B(s) 0
 #endif
 
+#ifdef TARGET_X86_64
+# define ctztl  ctz64
+# define clztl  clz64
+#else
+# define ctztl  ctz32
+# define clztl  clz32
+#endif
+
 //#define MACRO_TEST   1
 
 /* global register indexes */
 static TCGv_ptr cpu_env;
-static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
+static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst;
 static TCGv_i32 cpu_cc_op;
 /* local temps */
 static TCGv cpu_T[2], cpu_T3;
@@ -73,6 +85,8 @@
 static TCGv_i64 cpu_tmp1_i64;
 static TCGv cpu_tmp5, cpu_tmp6;
 
+static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
+
 #include "exec/gen-icount.h"
 
 #ifdef TARGET_X86_64
@@ -97,7 +111,8 @@
     int rex_x, rex_b;
 #endif
     int ss32;   /* 32 bit stack segment */
-    int cc_op;  /* current CC operation */
+    CCOp cc_op;  /* current CC operation */
+    bool cc_op_dirty;
     int addseg; /* non zero if either DS/ES/SS have a non zero base */
     int f_st;   /* currently unused */
     int vm86;   /* vm86 mode */
@@ -180,6 +195,58 @@
     OR_A0, /* temporary register used when doing address evaluation */
 };
 
+enum {
+    USES_CC_DST = 1,
+    USES_CC_SRC = 2,
+};
+
+/* Bit set if the global variable is live after setting CC_OP to X.  */
+static const uint8_t cc_op_live[CC_OP_NB] = {
+    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_EFLAGS] = USES_CC_SRC,
+    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
+    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
+};
+
+static void set_cc_op(DisasContext *s, CCOp op)
+{
+    int dead;
+
+    if (s->cc_op == op) {
+        return;
+    }
+
+    /* Discard CC computation that will no longer be used.  */
+    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
+    if (dead & USES_CC_DST) {
+        tcg_gen_discard_tl(cpu_cc_dst);
+    }
+    if (dead & USES_CC_SRC) {
+        tcg_gen_discard_tl(cpu_cc_src);
+    }
+
+    s->cc_op = op;
+    /* The DYNAMIC setting is translator only, and should never be
+       stored.  Thus we always consider it clean.  */
+    s->cc_op_dirty = (op != CC_OP_DYNAMIC);
+}
+
+static void gen_update_cc_op(DisasContext *s)
+{
+    if (s->cc_op_dirty) {
+        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
+        s->cc_op_dirty = false;
+    }
+}
+
 static inline void gen_op_movl_T0_0(void)
 {
     tcg_gen_movi_tl(cpu_T[0], 0);
@@ -271,11 +338,30 @@
 #define REG_LH_OFFSET 4
 #endif
 
+/* In instruction encodings for byte register accesses the
+ * register number usually indicates "low 8 bits of register N";
+ * however there are some special cases where N 4..7 indicates
+ * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
+ * true for this special case, false otherwise.
+ */
+static inline bool byte_reg_is_xH(int reg)
+{
+    if (reg < 4) {
+        return false;
+    }
+#ifdef TARGET_X86_64
+    if (reg >= 8 || x86_64_hregs) {
+        return false;
+    }
+#endif
+    return true;
+}
+
 static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
 {
     switch(ot) {
     case OT_BYTE:
-        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+        if (!byte_reg_is_xH(reg)) {
             tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUX86State, regs[reg]) + REG_B_OFFSET);
         } else {
             tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUX86State, regs[reg - 4]) + REG_H_OFFSET);
@@ -317,23 +403,23 @@
 static inline void gen_op_mov_reg_A0(int size, int reg)
 {
     switch(size) {
-    case 0:
+    case OT_BYTE:
         tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUX86State, regs[reg]) + REG_W_OFFSET);
         break;
 #ifdef TARGET_X86_64
-    case 1:
+    case OT_WORD:
         tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUX86State, regs[reg]) + REG_L_OFFSET);
         /* high part of register set to zero */
         tcg_gen_movi_tl(cpu_tmp0, 0);
         tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]) + REG_LH_OFFSET);
         break;
     default:
-    case 2:
+    case OT_LONG:
         tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUX86State, regs[reg]));
         break;
 #else
     default:
-    case 1:
+    case OT_WORD:
         tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUX86State, regs[reg]) + REG_L_OFFSET);
         break;
 #endif
@@ -342,18 +428,10 @@
 
 static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
 {
-    switch(ot) {
-    case OT_BYTE:
-        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
-            goto std_case;
-        } else {
-            tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUX86State, regs[reg - 4]) + REG_H_OFFSET);
-        }
-        break;
-    default:
-    std_case:
+    if (ot == OT_BYTE && byte_reg_is_xH(reg)) {
+        tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUX86State, regs[reg - 4]) + REG_H_OFFSET);
+    } else {
         tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUX86State, regs[reg]));
-        break;
     }
 }
 
@@ -381,7 +459,7 @@
     tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 }
 #endif
-
+    
 static void gen_add_A0_im(DisasContext *s, int val)
 {
 #ifdef TARGET_X86_64
@@ -405,12 +483,12 @@
 static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
 {
     switch(size) {
-    case 0:
+    case OT_BYTE:
         tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]));
         tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
         tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]) + REG_W_OFFSET);
         break;
-    case 1:
+    case OT_WORD:
         tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]));
         tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
 #ifdef TARGET_X86_64
@@ -419,7 +497,7 @@
         tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]));
         break;
 #ifdef TARGET_X86_64
-    case 2:
+    case OT_LONG:
         tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]));
         tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
         tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]));
@@ -431,12 +509,12 @@
 static inline void gen_op_add_reg_T0(int size, int reg)
 {
     switch(size) {
-    case 0:
+    case OT_BYTE:
         tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]));
         tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
         tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]) + REG_W_OFFSET);
         break;
-    case 1:
+    case OT_WORD:
         tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]));
         tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
 #ifdef TARGET_X86_64
@@ -445,7 +523,7 @@
         tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]));
         break;
 #ifdef TARGET_X86_64
-    case 2:
+    case OT_LONG:
         tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]));
         tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
         tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]));
@@ -454,11 +532,6 @@
     }
 }
 
-static inline void gen_op_set_cc_op(int32_t val)
-{
-    tcg_gen_movi_i32(cpu_cc_op, val);
-}
-
 static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
 {
     tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, regs[reg]));
@@ -475,7 +548,7 @@
     tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base) + REG_L_OFFSET);
 }
 
-static inline void gen_op_addl_A0_seg(int reg)
+static inline void gen_op_addl_A0_seg(DisasContext *s, int reg)
 {
     tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base));
     tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
@@ -514,14 +587,14 @@
 {
     int mem_index = (idx >> 2) - 1;
     switch(idx & 3) {
-    case 0:
+    case OT_BYTE:
         tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
         break;
-    case 1:
+    case OT_WORD:
         tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
         break;
     default:
-    case 2:
+    case OT_LONG:
         tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
         break;
     }
@@ -531,17 +604,17 @@
 {
     int mem_index = (idx >> 2) - 1;
     switch(idx & 3) {
-    case 0:
+    case OT_BYTE:
         tcg_gen_qemu_ld8u(t0, a0, mem_index);
         break;
-    case 1:
+    case OT_WORD:
         tcg_gen_qemu_ld16u(t0, a0, mem_index);
         break;
-    case 2:
+    case OT_LONG:
         tcg_gen_qemu_ld32u(t0, a0, mem_index);
         break;
     default:
-    case 3:
+    case OT_QUAD:
         /* Should never happen on 32-bit targets.  */
 #ifdef TARGET_X86_64
         tcg_gen_qemu_ld64(t0, a0, mem_index);
@@ -570,17 +643,17 @@
 {
     int mem_index = (idx >> 2) - 1;
     switch(idx & 3) {
-    case 0:
+    case OT_BYTE:
         tcg_gen_qemu_st8(t0, a0, mem_index);
         break;
-    case 1:
+    case OT_WORD:
         tcg_gen_qemu_st16(t0, a0, mem_index);
         break;
-    case 2:
+    case OT_LONG:
         tcg_gen_qemu_st32(t0, a0, mem_index);
         break;
     default:
-    case 3:
+    case OT_QUAD:
         /* Should never happen on 32-bit targets.  */
 #ifdef TARGET_X86_64
         tcg_gen_qemu_st64(t0, a0, mem_index);
@@ -636,7 +709,7 @@
             override = R_DS;
         gen_op_movl_A0_reg(R_ESI);
         gen_op_andl_A0_ffff();
-        gen_op_addl_A0_seg(override);
+        gen_op_addl_A0_seg(s, override);
     }
 }
 
@@ -657,48 +730,55 @@
     } else {
         gen_op_movl_A0_reg(R_EDI);
         gen_op_andl_A0_ffff();
-        gen_op_addl_A0_seg(R_ES);
+        gen_op_addl_A0_seg(s, R_ES);
     }
 }
 
-static inline void gen_op_movl_T0_Dshift(int ot)
+static inline void gen_op_movl_T0_Dshift(int ot) 
 {
     tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, df));
     tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
 };
 
+static TCGv gen_ext_tl(TCGv dst, TCGv src, int size, bool sign)
+{
+    switch (size) {
+    case OT_BYTE:
+        if (sign) {
+            tcg_gen_ext8s_tl(dst, src);
+        } else {
+            tcg_gen_ext8u_tl(dst, src);
+        }
+        return dst;
+    case OT_WORD:
+        if (sign) {
+            tcg_gen_ext16s_tl(dst, src);
+        } else {
+            tcg_gen_ext16u_tl(dst, src);
+        }
+        return dst;
+#ifdef TARGET_X86_64
+    case OT_LONG:
+        if (sign) {
+            tcg_gen_ext32s_tl(dst, src);
+        } else {
+            tcg_gen_ext32u_tl(dst, src);
+        }
+        return dst;
+#endif
+    default:
+        return src;
+    }
+}
+
 static void gen_extu(int ot, TCGv reg)
 {
-    switch(ot) {
-    case OT_BYTE:
-        tcg_gen_ext8u_tl(reg, reg);
-        break;
-    case OT_WORD:
-        tcg_gen_ext16u_tl(reg, reg);
-        break;
-    case OT_LONG:
-        tcg_gen_ext32u_tl(reg, reg);
-        break;
-    default:
-        break;
-    }
+    gen_ext_tl(reg, reg, ot, false);
 }
 
 static void gen_exts(int ot, TCGv reg)
 {
-    switch(ot) {
-    case OT_BYTE:
-        tcg_gen_ext8s_tl(reg, reg);
-        break;
-    case OT_WORD:
-        tcg_gen_ext16s_tl(reg, reg);
-        break;
-    case OT_LONG:
-        tcg_gen_ext32s_tl(reg, reg);
-        break;
-    default:
-        break;
-    }
+    gen_ext_tl(reg, reg, ot, true);
 }
 
 static inline void gen_op_jnz_ecx(int size, int label1)
@@ -718,21 +798,31 @@
 static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
 {
     switch (ot) {
-    case 0: gen_helper_inb(v, n); break;
-    case 1: gen_helper_inw(v, n); break;
-    case 2: gen_helper_inl(v, n); break;
+    case OT_BYTE:
+        gen_helper_inb(v, n);
+        break;
+    case OT_WORD:
+        gen_helper_inw(v, n);
+        break;
+    case OT_LONG:
+        gen_helper_inl(v, n);
+        break;
     }
-
 }
 
 static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n)
 {
     switch (ot) {
-    case 0: gen_helper_outb(v, n); break;
-    case 1: gen_helper_outw(v, n); break;
-    case 2: gen_helper_outl(v, n); break;
+    case OT_BYTE:
+        gen_helper_outb(v, n);
+        break;
+    case OT_WORD:
+        gen_helper_outw(v, n);
+        break;
+    case OT_LONG:
+        gen_helper_outl(v, n);
+        break;
     }
-
 }
 
 static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
@@ -743,28 +833,33 @@
 
     state_saved = 0;
     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_jmp_im(cur_eip);
         state_saved = 1;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         switch (ot) {
-        case 0: gen_helper_check_iob(cpu_tmp2_i32); break;
-        case 1: gen_helper_check_iow(cpu_tmp2_i32); break;
-        case 2: gen_helper_check_iol(cpu_tmp2_i32); break;
+        case OT_BYTE:
+            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
+            break;
+        case OT_WORD:
+            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
+            break;
+        case OT_LONG:
+            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
+            break;
         }
     }
     if(s->flags & HF_SVMI_MASK) {
         if (!state_saved) {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(cur_eip);
             state_saved = 1;
         }
         svm_flags |= (1 << (4 + ot));
         next_eip = s->pc - s->cs_base;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-        gen_helper_svm_check_io(cpu_tmp2_i32, tcg_const_i32(svm_flags),
+        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
+                                tcg_const_i32(svm_flags),
                                 tcg_const_i32(next_eip - cur_eip));
     }
 }
@@ -780,17 +875,8 @@
     gen_op_add_reg_T0(s->aflag, R_EDI);
 }
 
-static inline void gen_update_cc_op(DisasContext *s)
-{
-    if (s->cc_op != CC_OP_DYNAMIC) {
-        gen_op_set_cc_op(s->cc_op);
-        s->cc_op = CC_OP_DYNAMIC;
-    }
-}
-
 static void gen_op_update1_cc(void)
 {
-    tcg_gen_discard_tl(cpu_cc_src);
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
@@ -808,7 +894,6 @@
 
 static inline void gen_op_testl_T0_T1_cc(void)
 {
-    tcg_gen_discard_tl(cpu_cc_src);
     tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
 }
 
@@ -818,72 +903,227 @@
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
-/* compute eflags.C to reg */
-static void gen_compute_eflags_c(TCGv reg)
-{
-    gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_cc_op);
-    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
-}
-
 /* compute all eflags to cc_src */
-static void gen_compute_eflags(TCGv reg)
+static void gen_compute_eflags(DisasContext *s)
 {
-    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_cc_op);
-    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+    if (s->cc_op == CC_OP_EFLAGS) {
+        return;
+    }
+    gen_update_cc_op(s);
+    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
+    set_cc_op(s, CC_OP_EFLAGS);
+    tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
 }
 
-static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
+/* compute eflags.C to reg */
+static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
 {
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    TCGv t0, t1;
+    int size;
+
+    switch (s->cc_op) {
+    case CC_OP_SUBB ... CC_OP_SUBQ:
+        /* (DATA_TYPE)(CC_DST + CC_SRC) < (DATA_TYPE)CC_SRC */
+        size = s->cc_op - CC_OP_SUBB;
+        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        /* If no temporary was used, be careful not to alias t1 and t0.  */
+        t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
+        tcg_gen_add_tl(t0, cpu_cc_dst, cpu_cc_src);
+        gen_extu(size, t0);
+        goto add_sub;
+
+    case CC_OP_ADDB ... CC_OP_ADDQ:
+        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
+        size = s->cc_op - CC_OP_ADDB;
+        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
+    add_sub:
+        tcg_gen_setcond_tl(inv ? TCG_COND_GEU : TCG_COND_LTU, reg, t0, t1);
+        inv = false;
+        break;
+
+    case CC_OP_SBBB ... CC_OP_SBBQ:
+        /* (DATA_TYPE)(CC_DST + CC_SRC + 1) <= (DATA_TYPE)CC_SRC */
+        size = s->cc_op - CC_OP_SBBB;
+        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        if (TCGV_EQUAL(t1, reg) && TCGV_EQUAL(reg, cpu_cc_src)) {
+            tcg_gen_mov_tl(cpu_tmp0, cpu_cc_src);
+            t1 = cpu_tmp0;
+        }
+
+        tcg_gen_add_tl(reg, cpu_cc_dst, cpu_cc_src);
+        tcg_gen_addi_tl(reg, reg, 1);
+        gen_extu(size, reg);
+        t0 = reg;
+        goto adc_sbb;
+
+    case CC_OP_ADCB ... CC_OP_ADCQ:
+        /* (DATA_TYPE)CC_DST <= (DATA_TYPE)CC_SRC */
+        size = s->cc_op - CC_OP_ADCB;
+        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
+    adc_sbb:
+        tcg_gen_setcond_tl(inv ? TCG_COND_GTU : TCG_COND_LEU, reg, t0, t1);
+        inv = false;
+        break;
+
+    case CC_OP_LOGICB ... CC_OP_LOGICQ:
+        tcg_gen_movi_tl(reg, 0);
+        break;
+
+    case CC_OP_INCB ... CC_OP_INCQ:
+    case CC_OP_DECB ... CC_OP_DECQ:
+        if (inv) {
+            tcg_gen_xori_tl(reg, cpu_cc_src, 1);
+        } else {
+            tcg_gen_mov_tl(reg, cpu_cc_src);
+        }
+        inv = false;
+        break;
+
+    case CC_OP_SHLB ... CC_OP_SHLQ:
+        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
+        size = s->cc_op - CC_OP_SHLB;
+        tcg_gen_shri_tl(reg, cpu_cc_src, (8 << size) - 1);
+        tcg_gen_andi_tl(reg, reg, 1);
+        break;
+
+    case CC_OP_MULB ... CC_OP_MULQ:
+        tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
+                            reg, cpu_cc_src, 0);
+        inv = false;
+        break;
+
+    case CC_OP_EFLAGS:
+    case CC_OP_SARB ... CC_OP_SARQ:
+        /* CC_SRC & 1 */
+        tcg_gen_andi_tl(reg, cpu_cc_src, 1);
+        break;
+
+    default:
+       /* The need to compute only C from CC_OP_DYNAMIC is important
+          in efficiently implementing e.g. INC at the start of a TB.  */
+       gen_update_cc_op(s);
+       gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
+       tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+       break;
+    }
+    if (inv) {
+        tcg_gen_xori_tl(reg, reg, 1);
+    }
+}
+
+/* compute eflags.P to reg */
+static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
+{
+    gen_compute_eflags(s);
+    tcg_gen_shri_tl(reg, cpu_cc_src, 2);
+    tcg_gen_andi_tl(reg, reg, 1);
+}
+
+/* compute eflags.S to reg */
+static void gen_compute_eflags_s(DisasContext *s, TCGv reg, bool inv)
+{
+    switch (s->cc_op) {
+    case CC_OP_DYNAMIC:
+        gen_compute_eflags(s);
+        /* FALLTHRU */
+    case CC_OP_EFLAGS:
+        tcg_gen_shri_tl(reg, cpu_cc_src, 7);
+        tcg_gen_andi_tl(reg, reg, 1);
+        if (inv) {
+            tcg_gen_xori_tl(reg, reg, 1);
+        }
+        break;
+    default:
+        {
+            int size = (s->cc_op - CC_OP_ADDB) & 3;
+            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
+            tcg_gen_setcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, reg, t0, 0);
+        }
+        break;
+    }
+}
+
+/* compute eflags.O to reg */
+static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
+{
+    gen_compute_eflags(s);
+    tcg_gen_shri_tl(reg, cpu_cc_src, 11);
+    tcg_gen_andi_tl(reg, reg, 1);
+}
+
+/* compute eflags.Z to reg */
+static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv)
+{
+    switch (s->cc_op) {
+    case CC_OP_DYNAMIC:
+        gen_compute_eflags(s);
+        /* FALLTHRU */
+    case CC_OP_EFLAGS:
+        tcg_gen_shri_tl(reg, cpu_cc_src, 6);
+        tcg_gen_andi_tl(reg, reg, 1);
+        if (inv) {
+            tcg_gen_xori_tl(reg, reg, 1);
+        }
+        break;
+    default:
+        {
+            int size = (s->cc_op - CC_OP_ADDB) & 3;
+            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
+            tcg_gen_setcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, reg, t0, 0);
+        }
+    }
+}
+
+static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv)
+{
+    assert(!TCGV_EQUAL(reg, cpu_cc_src));
     switch(jcc_op) {
     case JCC_O:
-        gen_compute_eflags(cpu_T[0]);
-        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        gen_compute_eflags_o(s, reg);
         break;
     case JCC_B:
-        gen_compute_eflags_c(cpu_T[0]);
+        gen_compute_eflags_c(s, reg, inv);
+        inv = false;
         break;
     case JCC_Z:
-        gen_compute_eflags(cpu_T[0]);
-        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        gen_compute_eflags_z(s, reg, inv);
+        inv = false;
         break;
     case JCC_BE:
-        gen_compute_eflags(cpu_tmp0);
-        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
-        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
-        break;
+        gen_compute_eflags(s);
+        tcg_gen_andi_tl(reg, cpu_cc_src, CC_Z | CC_C);
+        tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, reg, 0);
+        return;
     case JCC_S:
-        gen_compute_eflags(cpu_T[0]);
-        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        gen_compute_eflags_s(s, reg, inv);
+        inv = false;
         break;
     case JCC_P:
-        gen_compute_eflags(cpu_T[0]);
-        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        gen_compute_eflags_p(s, reg);
         break;
     case JCC_L:
-        gen_compute_eflags(cpu_tmp0);
-        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
-        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
-        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        gen_compute_eflags(s);
+        tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */
+        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */
+        tcg_gen_xor_tl(reg, reg, cpu_tmp0);
+        tcg_gen_andi_tl(reg, reg, 1);
         break;
     default:
     case JCC_LE:
-        gen_compute_eflags(cpu_tmp0);
-        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
-        tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
-        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
-        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
-        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        gen_compute_eflags(s);
+        tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */
+        tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */
+        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */
+        tcg_gen_xor_tl(reg, reg, cpu_tmp4);
+        tcg_gen_or_tl(reg, reg, cpu_tmp0);
+        tcg_gen_andi_tl(reg, reg, 1);
         break;
     }
+    if (inv) {
+        tcg_gen_xori_tl(reg, reg, 1);
+    }
 }
 
 /* return true if setcc_slow is not needed (WARNING: must be kept in
@@ -939,7 +1179,7 @@
 
 /* generate a conditional jump to label 'l1' according to jump opcode
    value 'b'. In the fast case, T0 is guaranted not to be used. */
-static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
+static inline void gen_jcc1(DisasContext *s, int b, int l1)
 {
     int inv, jcc_op, size, cond;
     TCGv t0;
@@ -947,63 +1187,24 @@
     inv = b & 1;
     jcc_op = (b >> 1) & 7;
 
-    switch(cc_op) {
+    switch(s->cc_op) {
         /* we optimize the cmp/jcc case */
     case CC_OP_SUBB:
     case CC_OP_SUBW:
     case CC_OP_SUBL:
     case CC_OP_SUBQ:
 
-        size = cc_op - CC_OP_SUBB;
+        size = s->cc_op - CC_OP_SUBB;
         switch(jcc_op) {
         case JCC_Z:
         fast_jcc_z:
-            switch(size) {
-            case 0:
-                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff);
-                t0 = cpu_tmp0;
-                break;
-            case 1:
-                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff);
-                t0 = cpu_tmp0;
-                break;
-#ifdef TARGET_X86_64
-            case 2:
-                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff);
-                t0 = cpu_tmp0;
-                break;
-#endif
-            default:
-                t0 = cpu_cc_dst;
-                break;
-            }
+            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, false);
             tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
             break;
         case JCC_S:
         fast_jcc_s:
-            switch(size) {
-            case 0:
-                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
-                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
-                                   0, l1);
-                break;
-            case 1:
-                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
-                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
-                                   0, l1);
-                break;
-#ifdef TARGET_X86_64
-            case 2:
-                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
-                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
-                                   0, l1);
-                break;
-#endif
-            default:
-                tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst,
-                                   0, l1);
-                break;
-            }
+            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, true);
+            tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, t0, 0, l1);
             break;
 
         case JCC_B:
@@ -1013,28 +1214,8 @@
             cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
         fast_jcc_b:
             tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
-            switch(size) {
-            case 0:
-                t0 = cpu_tmp0;
-                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff);
-                tcg_gen_andi_tl(t0, cpu_cc_src, 0xff);
-                break;
-            case 1:
-                t0 = cpu_tmp0;
-                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff);
-                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff);
-                break;
-#ifdef TARGET_X86_64
-            case 2:
-                t0 = cpu_tmp0;
-                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff);
-                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff);
-                break;
-#endif
-            default:
-                t0 = cpu_cc_src;
-                break;
-            }
+            gen_extu(size, cpu_tmp4);
+            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
             tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
             break;
 
@@ -1045,28 +1226,8 @@
             cond = inv ? TCG_COND_GT : TCG_COND_LE;
         fast_jcc_l:
             tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
-            switch(size) {
-            case 0:
-                t0 = cpu_tmp0;
-                tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4);
-                tcg_gen_ext8s_tl(t0, cpu_cc_src);
-                break;
-            case 1:
-                t0 = cpu_tmp0;
-                tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4);
-                tcg_gen_ext16s_tl(t0, cpu_cc_src);
-                break;
-#ifdef TARGET_X86_64
-            case 2:
-                t0 = cpu_tmp0;
-                tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4);
-                tcg_gen_ext32s_tl(t0, cpu_cc_src);
-                break;
-#endif
-            default:
-                t0 = cpu_cc_src;
-                break;
-            }
+            gen_exts(size, cpu_tmp4);
+            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
             tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
             break;
 
@@ -1117,10 +1278,10 @@
     case CC_OP_SARQ:
         switch(jcc_op) {
         case JCC_Z:
-            size = (cc_op - CC_OP_ADDB) & 3;
+            size = (s->cc_op - CC_OP_ADDB) & 3;
             goto fast_jcc_z;
         case JCC_S:
-            size = (cc_op - CC_OP_ADDB) & 3;
+            size = (s->cc_op - CC_OP_ADDB) & 3;
             goto fast_jcc_s;
         default:
             goto slow_jcc;
@@ -1128,7 +1289,7 @@
         break;
     default:
     slow_jcc:
-        gen_setcc_slow_T0(s, jcc_op);
+        gen_setcc_slow(s, jcc_op, cpu_T[0], false);
         tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
                            cpu_T[0], 0, l1);
         break;
@@ -1176,6 +1337,7 @@
     gen_op_cmpl_T0_T1_cc();
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
+    set_cc_op(s, CC_OP_SUBB + ot);
 }
 
 static inline void gen_cmps(DisasContext *s, int ot)
@@ -1188,6 +1350,7 @@
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
     gen_op_add_reg_T0(s->aflag, R_EDI);
+    set_cc_op(s, CC_OP_SUBB + ot);
 }
 
 static inline void gen_ins(DisasContext *s, int ot)
@@ -1258,8 +1421,8 @@
     l2 = gen_jz_ecx_string(s, next_eip);                                      \
     gen_ ## op(s, ot);                                                        \
     gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
-    gen_op_set_cc_op(CC_OP_SUBB + ot);                                        \
-    gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2);                \
+    gen_update_cc_op(s);                                                      \
+    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
     if (!s->jmp_opt)                                                          \
         gen_op_jz_ecx(s->aflag, l2);                                          \
     gen_jmp(s, cur_eip);                                                      \
@@ -1276,14 +1439,30 @@
 static void gen_helper_fp_arith_ST0_FT0(int op)
 {
     switch (op) {
-    case 0: gen_helper_fadd_ST0_FT0(); break;
-    case 1: gen_helper_fmul_ST0_FT0(); break;
-    case 2: gen_helper_fcom_ST0_FT0(); break;
-    case 3: gen_helper_fcom_ST0_FT0(); break;
-    case 4: gen_helper_fsub_ST0_FT0(); break;
-    case 5: gen_helper_fsubr_ST0_FT0(); break;
-    case 6: gen_helper_fdiv_ST0_FT0(); break;
-    case 7: gen_helper_fdivr_ST0_FT0(); break;
+    case 0:
+        gen_helper_fadd_ST0_FT0(cpu_env);
+        break;
+    case 1:
+        gen_helper_fmul_ST0_FT0(cpu_env);
+        break;
+    case 2:
+        gen_helper_fcom_ST0_FT0(cpu_env);
+        break;
+    case 3:
+        gen_helper_fcom_ST0_FT0(cpu_env);
+        break;
+    case 4:
+        gen_helper_fsub_ST0_FT0(cpu_env);
+        break;
+    case 5:
+        gen_helper_fsubr_ST0_FT0(cpu_env);
+        break;
+    case 6:
+        gen_helper_fdiv_ST0_FT0(cpu_env);
+        break;
+    case 7:
+        gen_helper_fdivr_ST0_FT0(cpu_env);
+        break;
     }
 }
 
@@ -1292,12 +1471,24 @@
 {
     TCGv_i32 tmp = tcg_const_i32(opreg);
     switch (op) {
-    case 0: gen_helper_fadd_STN_ST0(tmp); break;
-    case 1: gen_helper_fmul_STN_ST0(tmp); break;
-    case 4: gen_helper_fsubr_STN_ST0(tmp); break;
-    case 5: gen_helper_fsub_STN_ST0(tmp); break;
-    case 6: gen_helper_fdivr_STN_ST0(tmp); break;
-    case 7: gen_helper_fdiv_STN_ST0(tmp); break;
+    case 0:
+        gen_helper_fadd_STN_ST0(cpu_env, tmp);
+        break;
+    case 1:
+        gen_helper_fmul_STN_ST0(cpu_env, tmp);
+        break;
+    case 4:
+        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
+        break;
+    case 5:
+        gen_helper_fsub_STN_ST0(cpu_env, tmp);
+        break;
+    case 6:
+        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
+        break;
+    case 7:
+        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
+        break;
     }
 }
 
@@ -1311,9 +1502,7 @@
     }
     switch(op) {
     case OP_ADCL:
-        if (s1->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s1->cc_op);
-        gen_compute_eflags_c(cpu_tmp4);
+        gen_compute_eflags_c(s1, cpu_tmp4, false);
         tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
         if (d != OR_TMP0)
@@ -1325,12 +1514,10 @@
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
         tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
         tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
-        s1->cc_op = CC_OP_DYNAMIC;
+        set_cc_op(s1, CC_OP_DYNAMIC);
         break;
     case OP_SBBL:
-        if (s1->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s1->cc_op);
-        gen_compute_eflags_c(cpu_tmp4);
+        gen_compute_eflags_c(s1, cpu_tmp4, false);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
         if (d != OR_TMP0)
@@ -1342,7 +1529,7 @@
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
         tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
         tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
-        s1->cc_op = CC_OP_DYNAMIC;
+        set_cc_op(s1, CC_OP_DYNAMIC);
         break;
     case OP_ADDL:
         gen_op_addl_T0_T1();
@@ -1351,7 +1538,7 @@
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
         gen_op_update2_cc();
-        s1->cc_op = CC_OP_ADDB + ot;
+        set_cc_op(s1, CC_OP_ADDB + ot);
         break;
     case OP_SUBL:
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -1360,7 +1547,7 @@
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
         gen_op_update2_cc();
-        s1->cc_op = CC_OP_SUBB + ot;
+        set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     default:
     case OP_ANDL:
@@ -1370,7 +1557,7 @@
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
         gen_op_update1_cc();
-        s1->cc_op = CC_OP_LOGICB + ot;
+        set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_ORL:
         tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -1379,7 +1566,7 @@
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
         gen_op_update1_cc();
-        s1->cc_op = CC_OP_LOGICB + ot;
+        set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_XORL:
         tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -1388,11 +1575,11 @@
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
         gen_op_update1_cc();
-        s1->cc_op = CC_OP_LOGICB + ot;
+        set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_CMPL:
         gen_op_cmpl_T0_T1_cc();
-        s1->cc_op = CC_OP_SUBB + ot;
+        set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     }
 }
@@ -1404,20 +1591,18 @@
         gen_op_mov_TN_reg(ot, 0, d);
     else
         gen_op_ld_T0_A0(ot + s1->mem_index);
-    if (s1->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s1->cc_op);
+    gen_compute_eflags_c(s1, cpu_cc_src, false);
     if (c > 0) {
         tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
-        s1->cc_op = CC_OP_INCB + ot;
+        set_cc_op(s1, CC_OP_INCB + ot);
     } else {
         tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
-        s1->cc_op = CC_OP_DECB + ot;
+        set_cc_op(s1, CC_OP_DECB + ot);
     }
     if (d != OR_TMP0)
         gen_op_mov_reg_T0(ot, d);
     else
         gen_op_st_T0_A0(ot + s1->mem_index);
-    gen_compute_eflags_c(cpu_cc_src);
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
@@ -1434,10 +1619,11 @@
         mask = 0x1f;
 
     /* load */
-    if (op1 == OR_TMP0)
+    if (op1 == OR_TMP0) {
         gen_op_ld_T0_A0(ot + s->mem_index);
-    else
+    } else {
         gen_op_mov_TN_reg(ot, 0, op1);
+    }
 
     tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
 
@@ -1459,14 +1645,14 @@
     }
 
     /* store */
-    if (op1 == OR_TMP0)
+    if (op1 == OR_TMP0) {
         gen_op_st_T0_A0(ot + s->mem_index);
-    else
+    } else {
         gen_op_mov_reg_T0(ot, op1);
+    }
 
-    /* update eflags if non zero shift */
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    /* update eflags */
+    gen_update_cc_op(s);
 
     /* XXX: inefficient */
     t0 = tcg_temp_local_new();
@@ -1486,7 +1672,7 @@
         tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
 
     gen_set_label(shift_label);
-    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+    set_cc_op(s, CC_OP_DYNAMIC); /* cannot predict flags after */
 
     tcg_temp_free(t0);
     tcg_temp_free(t1);
@@ -1495,12 +1681,7 @@
 static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
                             int is_right, int is_arith)
 {
-    int mask;
-
-    if (ot == OT_QUAD)
-        mask = 0x3f;
-    else
-        mask = 0x1f;
+    int mask = (ot == OT_QUAD ? 0x3f : 0x1f);
 
     /* load */
     if (op1 == OR_TMP0)
@@ -1531,15 +1712,15 @@
         gen_op_st_T0_A0(ot + s->mem_index);
     else
         gen_op_mov_reg_T0(ot, op1);
-
+        
     /* update eflags if non zero shift */
     if (op2 != 0) {
         tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
         tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
         if (is_right)
-            s->cc_op = CC_OP_SARB + ot;
+            set_cc_op(s, CC_OP_SARB + ot);
         else
-            s->cc_op = CC_OP_SHLB + ot;
+            set_cc_op(s, CC_OP_SHLB + ot);
     }
 }
 
@@ -1551,8 +1732,7 @@
         tcg_gen_shri_tl(ret, arg1, -arg2);
 }
 
-static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
-                          int is_right)
+static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, int is_right)
 {
     target_ulong mask;
     int label1, label2, data_bits;
@@ -1616,14 +1796,13 @@
         gen_op_mov_reg_v(ot, op1, t0);
     }
 
-    /* update eflags */
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    /* update eflags.  It is needed anyway most of the time, do it always.  */
+    gen_compute_eflags(s);
+    assert(s->cc_op == CC_OP_EFLAGS);
 
     label2 = gen_new_label();
     tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
 
-    gen_compute_eflags(cpu_cc_src);
     tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
     tcg_gen_xor_tl(cpu_tmp0, t2, t0);
     tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
@@ -1635,11 +1814,7 @@
     tcg_gen_andi_tl(t0, t0, CC_C);
     tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
 
-    tcg_gen_discard_tl(cpu_cc_dst);
-    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-
     gen_set_label(label2);
-    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
 
     tcg_temp_free(t0);
     tcg_temp_free(t1);
@@ -1699,10 +1874,9 @@
 
     if (op2 != 0) {
         /* update eflags */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_compute_eflags(s);
+        assert(s->cc_op == CC_OP_EFLAGS);
 
-        gen_compute_eflags(cpu_cc_src);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
         tcg_gen_xor_tl(cpu_tmp0, t1, t0);
         tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
@@ -1716,7 +1890,6 @@
 
         tcg_gen_discard_tl(cpu_cc_dst);
         tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-        s->cc_op = CC_OP_EFLAGS;
     }
 
     tcg_temp_free(t0);
@@ -1728,33 +1901,49 @@
 static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
                            int is_right)
 {
-    int label1;
-
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_compute_eflags(s);
+    assert(s->cc_op == CC_OP_EFLAGS);
+    tcg_gen_discard_tl(cpu_cc_dst);
+    set_cc_op(s, CC_OP_EFLAGS);
 
     /* load */
     if (op1 == OR_TMP0)
         gen_op_ld_T0_A0(ot + s->mem_index);
     else
         gen_op_mov_TN_reg(ot, 0, op1);
-
+    
     if (is_right) {
         switch (ot) {
-        case 0: gen_helper_rcrb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
-        case 1: gen_helper_rcrw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
-        case 2: gen_helper_rcrl(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+        case OT_BYTE:
+            gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
+        case OT_WORD:
+            gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
+        case OT_LONG:
+            gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
 #ifdef TARGET_X86_64
-        case 3: gen_helper_rcrq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+        case OT_QUAD:
+            gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
 #endif
         }
     } else {
         switch (ot) {
-        case 0: gen_helper_rclb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
-        case 1: gen_helper_rclw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
-        case 2: gen_helper_rcll(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+        case OT_BYTE:
+            gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
+        case OT_WORD:
+            gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
+        case OT_LONG:
+            gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
 #ifdef TARGET_X86_64
-        case 3: gen_helper_rclq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+        case OT_QUAD:
+            gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
 #endif
         }
     }
@@ -1763,17 +1952,6 @@
         gen_op_st_T0_A0(ot + s->mem_index);
     else
         gen_op_mov_reg_T0(ot, op1);
-
-    /* update eflags */
-    label1 = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
-
-    tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
-    tcg_gen_discard_tl(cpu_cc_dst);
-    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-
-    gen_set_label(label1);
-    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
 }
 
 /* XXX: add faster immediate case */
@@ -1883,8 +2061,7 @@
     }
 
     /* update eflags */
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
 
     label2 = gen_new_label();
     tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
@@ -1897,7 +2074,7 @@
         tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
     }
     gen_set_label(label2);
-    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+    set_cc_op(s, CC_OP_DYNAMIC); /* cannot predict flags after */
 
     tcg_temp_free(t0);
     tcg_temp_free(t1);
@@ -1962,7 +2139,8 @@
     }
 }
 
-static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr)
+static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm,
+                          int *reg_ptr, int *offset_ptr)
 {
     target_long disp;
     int havesib;
@@ -1988,7 +2166,7 @@
 
         if (base == 4) {
             havesib = 1;
-            code = ldub_code(s->pc++);
+            code = cpu_ldub_code(env, s->pc++);
             scale = (code >> 6) & 3;
             index = ((code >> 3) & 7) | REX_X(s);
             base = (code & 7);
@@ -1999,7 +2177,7 @@
         case 0:
             if ((base & 7) == 5) {
                 base = -1;
-                disp = (int32_t)ldl_code(s->pc);
+                disp = (int32_t)cpu_ldl_code(env, s->pc);
                 s->pc += 4;
                 if (CODE64(s) && !havesib) {
                     disp += s->pc + s->rip_offset;
@@ -2009,11 +2187,11 @@
             }
             break;
         case 1:
-            disp = (int8_t)ldub_code(s->pc++);
+            disp = (int8_t)cpu_ldub_code(env, s->pc++);
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = cpu_ldl_code(env, s->pc);
             s->pc += 4;
             break;
         }
@@ -2069,14 +2247,14 @@
             } else
 #endif
             {
-                gen_op_addl_A0_seg(override);
+                gen_op_addl_A0_seg(s, override);
             }
         }
     } else {
         switch (mod) {
         case 0:
             if (rm == 6) {
-                disp = lduw_code(s->pc);
+                disp = cpu_lduw_code(env, s->pc);
                 s->pc += 2;
                 gen_op_movl_A0_im(disp);
                 rm = 0; /* avoid SS override */
@@ -2086,11 +2264,11 @@
             }
             break;
         case 1:
-            disp = (int8_t)ldub_code(s->pc++);
+            disp = (int8_t)cpu_ldub_code(env, s->pc++);
             break;
         default:
         case 2:
-            disp = lduw_code(s->pc);
+            disp = cpu_lduw_code(env, s->pc);
             s->pc += 2;
             break;
         }
@@ -2136,7 +2314,7 @@
                 else
                     override = R_DS;
             }
-            gen_op_addl_A0_seg(override);
+            gen_op_addl_A0_seg(s, override);
         }
     }
 
@@ -2146,7 +2324,7 @@
     *offset_ptr = disp;
 }
 
-static void gen_nop_modrm(DisasContext *s, int modrm)
+static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
 {
     int mod, rm, base, code;
 
@@ -2160,7 +2338,7 @@
         base = rm;
 
         if (base == 4) {
-            code = ldub_code(s->pc++);
+            code = cpu_ldub_code(env, s->pc++);
             base = (code & 7);
         }
 
@@ -2215,14 +2393,15 @@
         } else
 #endif
         {
-            gen_op_addl_A0_seg(override);
+            gen_op_addl_A0_seg(s, override);
         }
     }
 }
 
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
    OR_TMP0 */
-static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store)
+static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
+                           int ot, int reg, int is_store)
 {
     int mod, rm, opreg, disp;
 
@@ -2239,7 +2418,7 @@
                 gen_op_mov_reg_T0(ot, reg);
         }
     } else {
-        gen_lea_modrm(s, modrm, &opreg, &disp);
+        gen_lea_modrm(env, s, modrm, &opreg, &disp);
         if (is_store) {
             if (reg != OR_TMP0)
                 gen_op_mov_TN_reg(ot, 0, reg);
@@ -2252,22 +2431,22 @@
     }
 }
 
-static inline uint32_t insn_get(DisasContext *s, int ot)
+static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, int ot)
 {
     uint32_t ret;
 
     switch(ot) {
     case OT_BYTE:
-        ret = ldub_code(s->pc);
+        ret = cpu_ldub_code(env, s->pc);
         s->pc++;
         break;
     case OT_WORD:
-        ret = lduw_code(s->pc);
+        ret = cpu_lduw_code(env, s->pc);
         s->pc += 2;
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = cpu_ldl_code(env, s->pc);
         s->pc += 4;
         break;
     }
@@ -2295,7 +2474,7 @@
         /* jump to same page: we can use a direct jump */
         tcg_gen_goto_tb(tb_num);
         gen_jmp_im(eip);
-        tcg_gen_exit_tb((long)tb + tb_num);
+        tcg_gen_exit_tb((uintptr_t)tb + tb_num);
     } else {
         /* jump to another page: currently not optimized */
         gen_jmp_im(eip);
@@ -2306,16 +2485,13 @@
 static inline void gen_jcc(DisasContext *s, int b,
                            target_ulong val, target_ulong next_eip)
 {
-    int l1, l2, cc_op;
+    int l1, l2;
 
-    cc_op = s->cc_op;
-    if (s->cc_op != CC_OP_DYNAMIC) {
-        gen_op_set_cc_op(s->cc_op);
-        s->cc_op = CC_OP_DYNAMIC;
-    }
     if (s->jmp_opt) {
+        gen_update_cc_op(s);
         l1 = gen_new_label();
-        gen_jcc1(s, cc_op, b, l1);
+        gen_jcc1(s, b, l1);
+        set_cc_op(s, CC_OP_DYNAMIC);
 
         gen_goto_tb(s, 0, next_eip);
 
@@ -2323,10 +2499,9 @@
         gen_goto_tb(s, 1, val);
         s->is_jmp = 3;
     } else {
-
         l1 = gen_new_label();
         l2 = gen_new_label();
-        gen_jcc1(s, cc_op, b, l1);
+        gen_jcc1(s, b, l1);
 
         gen_jmp_im(next_eip);
         tcg_gen_br(l2);
@@ -2349,7 +2524,7 @@
         t0 = tcg_temp_local_new();
         tcg_gen_movi_tl(t0, 0);
         l1 = gen_new_label();
-        gen_jcc1(s, s->cc_op, b ^ 1, l1);
+        gen_jcc1(s, b ^ 1, l1);
         tcg_gen_movi_tl(t0, 1);
         gen_set_label(l1);
         tcg_gen_mov_tl(cpu_T[0], t0);
@@ -2360,26 +2535,23 @@
            worth to */
         inv = b & 1;
         jcc_op = (b >> 1) & 7;
-        gen_setcc_slow_T0(s, jcc_op);
-        if (inv) {
-            tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
-        }
+        gen_setcc_slow(s, jcc_op, cpu_T[0], inv);
     }
 }
 
 static inline void gen_op_movl_T0_seg(int seg_reg)
 {
-    tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
                      offsetof(CPUX86State,segs[seg_reg].selector));
 }
 
 static inline void gen_op_movl_seg_T0_vm(int seg_reg)
 {
     tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
-    tcg_gen_st32_tl(cpu_T[0], cpu_env,
+    tcg_gen_st32_tl(cpu_T[0], cpu_env, 
                     offsetof(CPUX86State,segs[seg_reg].selector));
     tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
-    tcg_gen_st_tl(cpu_T[0], cpu_env,
+    tcg_gen_st_tl(cpu_T[0], cpu_env, 
                   offsetof(CPUX86State,segs[seg_reg].base));
 }
 
@@ -2389,11 +2561,10 @@
 {
     if (s->pe && !s->vm86) {
         /* XXX: optimize by finding processor state dynamically */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_jmp_im(cur_eip);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-        gen_helper_load_seg(tcg_const_i32(seg_reg), cpu_tmp2_i32);
+        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
         /* abort translation because the addseg value may change or
            because ss32 may change. For R_SS, translation must always
            stop as a special handling must be done to disable hardware
@@ -2419,10 +2590,9 @@
     /* no SVM activated; fast case */
     if (likely(!(s->flags & HF_SVMI_MASK)))
         return;
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
     gen_jmp_im(pc_start - s->cs_base);
-    gen_helper_svm_check_intercept_param(tcg_const_i32(type),
+    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
                                          tcg_const_i64(param));
 }
 
@@ -2471,12 +2641,12 @@
         if (s->ss32) {
             if (s->addseg) {
                 tcg_gen_mov_tl(cpu_T[1], cpu_A0);
-                gen_op_addl_A0_seg(R_SS);
+                gen_op_addl_A0_seg(s, R_SS);
             }
         } else {
             gen_op_andl_A0_ffff();
             tcg_gen_mov_tl(cpu_T[1], cpu_A0);
-            gen_op_addl_A0_seg(R_SS);
+            gen_op_addl_A0_seg(s, R_SS);
         }
         gen_op_st_T0_A0(s->dflag + 1 + s->mem_index);
         if (s->ss32 && !s->addseg)
@@ -2511,11 +2681,11 @@
             gen_op_addl_A0_im(-4);
         if (s->ss32) {
             if (s->addseg) {
-                gen_op_addl_A0_seg(R_SS);
+                gen_op_addl_A0_seg(s, R_SS);
             }
         } else {
             gen_op_andl_A0_ffff();
-            gen_op_addl_A0_seg(R_SS);
+            gen_op_addl_A0_seg(s, R_SS);
         }
         gen_op_st_T1_A0(s->dflag + 1 + s->mem_index);
 
@@ -2539,10 +2709,10 @@
         gen_op_movl_A0_reg(R_ESP);
         if (s->ss32) {
             if (s->addseg)
-                gen_op_addl_A0_seg(R_SS);
+                gen_op_addl_A0_seg(s, R_SS);
         } else {
             gen_op_andl_A0_ffff();
-            gen_op_addl_A0_seg(R_SS);
+            gen_op_addl_A0_seg(s, R_SS);
         }
         gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index);
     }
@@ -2567,7 +2737,7 @@
         gen_op_andl_A0_ffff();
     tcg_gen_mov_tl(cpu_T[1], cpu_A0);
     if (s->addseg)
-        gen_op_addl_A0_seg(R_SS);
+        gen_op_addl_A0_seg(s, R_SS);
 }
 
 /* NOTE: wrap around in 16 bit not fully handled */
@@ -2580,7 +2750,7 @@
         gen_op_andl_A0_ffff();
     tcg_gen_mov_tl(cpu_T[1], cpu_A0);
     if (s->addseg)
-        gen_op_addl_A0_seg(R_SS);
+        gen_op_addl_A0_seg(s, R_SS);
     for(i = 0;i < 8; i++) {
         gen_op_mov_TN_reg(OT_LONG, 0, 7 - i);
         gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index);
@@ -2599,7 +2769,7 @@
     tcg_gen_mov_tl(cpu_T[1], cpu_A0);
     tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 16 <<  s->dflag);
     if (s->addseg)
-        gen_op_addl_A0_seg(R_SS);
+        gen_op_addl_A0_seg(s, R_SS);
     for(i = 0;i < 8; i++) {
         /* ESP is not reloaded */
         if (i != 3) {
@@ -2630,7 +2800,7 @@
         gen_op_st_T0_A0(ot + s->mem_index);
         if (level) {
             /* XXX: must save state */
-            gen_helper_enter64_level(tcg_const_i32(level),
+            gen_helper_enter64_level(cpu_env, tcg_const_i32(level),
                                      tcg_const_i32((ot == OT_QUAD)),
                                      cpu_T[1]);
         }
@@ -2649,13 +2819,13 @@
             gen_op_andl_A0_ffff();
         tcg_gen_mov_tl(cpu_T[1], cpu_A0);
         if (s->addseg)
-            gen_op_addl_A0_seg(R_SS);
+            gen_op_addl_A0_seg(s, R_SS);
         /* push bp */
         gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
         gen_op_st_T0_A0(ot + s->mem_index);
         if (level) {
             /* XXX: must save state */
-            gen_helper_enter_level(tcg_const_i32(level),
+            gen_helper_enter_level(cpu_env, tcg_const_i32(level),
                                    tcg_const_i32(s->dflag),
                                    cpu_T[1]);
         }
@@ -2667,10 +2837,9 @@
 
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
 {
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
     gen_jmp_im(cur_eip);
-    gen_helper_raise_exception(tcg_const_i32(trapno));
+    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
     s->is_jmp = 3;
 }
 
@@ -2679,20 +2848,18 @@
 static void gen_interrupt(DisasContext *s, int intno,
                           target_ulong cur_eip, target_ulong next_eip)
 {
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
     gen_jmp_im(cur_eip);
-    gen_helper_raise_interrupt(tcg_const_i32(intno),
+    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
                                tcg_const_i32(next_eip - cur_eip));
     s->is_jmp = 3;
 }
 
 static void gen_debug(DisasContext *s, target_ulong cur_eip)
 {
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
     gen_jmp_im(cur_eip);
-    gen_helper_debug();
+    gen_helper_debug(cpu_env);
     s->is_jmp = 3;
 }
 
@@ -2700,18 +2867,17 @@
    if needed */
 static void gen_eob(DisasContext *s)
 {
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
     if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
-        gen_helper_reset_inhibit_irq();
+        gen_helper_reset_inhibit_irq(cpu_env);
     }
     if (s->tb->flags & HF_RF_MASK) {
-        gen_helper_reset_rf();
+        gen_helper_reset_rf(cpu_env);
     }
     if (s->singlestep_enabled) {
-        gen_helper_debug();
+        gen_helper_debug(cpu_env);
     } else if (s->tf) {
-	gen_helper_single_step();
+	gen_helper_single_step(cpu_env);
     } else {
         tcg_gen_exit_tb(0);
     }
@@ -2723,10 +2889,7 @@
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
 {
     if (s->jmp_opt) {
-        if (s->cc_op != CC_OP_DYNAMIC) {
-            gen_op_set_cc_op(s->cc_op);
-            s->cc_op = CC_OP_DYNAMIC;
-        }
+        gen_update_cc_op(s);
         gen_goto_tb(s, tb_num, eip);
         s->is_jmp = 3;
     } else {
@@ -2800,6 +2963,17 @@
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
 }
 
+typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
+typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
+typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
+typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
+typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
+typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
+                               TCGv_i32 val);
+typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
+typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
+                               TCGv val);
+
 #define SSE_SPECIAL ((void *)1)
 #define SSE_DUMMY ((void *)2)
 
@@ -2807,7 +2981,7 @@
 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
 
-static void *sse_op_table1[256][4] = {
+static const SSEFunc_0_epp sse_op_table1[256][4] = {
     /* 3DNow! extensions */
     [0x0e] = { SSE_DUMMY }, /* femms */
     [0x0f] = { SSE_DUMMY }, /* pf... */
@@ -2824,7 +2998,7 @@
     [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
     [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
     [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
-    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL },  /* movntps, movntpd */
+    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
     [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
     [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
     [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
@@ -2848,10 +3022,12 @@
     [0x5f] = SSE_FOP(max),
 
     [0xc2] = SSE_FOP(cmpeq),
-    [0xc6] = { gen_helper_shufps, gen_helper_shufpd },
+    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
+               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
 
-    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */
-    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */
+    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
+    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
+    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
 
     /* MMX ops and their SSE extensions */
     [0x60] = MMX_OP2(punpcklbw),
@@ -2870,10 +3046,10 @@
     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
-    [0x70] = { gen_helper_pshufw_mmx,
-               gen_helper_pshufd_xmm,
-               gen_helper_pshufhw_xmm,
-               gen_helper_pshuflw_xmm },
+    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
+               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
+               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
+               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
@@ -2926,7 +3102,8 @@
     [0xf4] = MMX_OP2(pmuludq),
     [0xf5] = MMX_OP2(pmaddwd),
     [0xf6] = MMX_OP2(psadbw),
-    [0xf7] = MMX_OP2(maskmov),
+    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
+               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
     [0xf8] = MMX_OP2(psubb),
     [0xf9] = MMX_OP2(psubw),
     [0xfa] = MMX_OP2(psubl),
@@ -2936,7 +3113,7 @@
     [0xfe] = MMX_OP2(paddl),
 };
 
-static void *sse_op_table2[3 * 8][2] = {
+static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
     [0 + 2] = MMX_OP2(psrlw),
     [0 + 4] = MMX_OP2(psraw),
     [0 + 6] = MMX_OP2(psllw),
@@ -2949,24 +3126,35 @@
     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
 };
 
-static void *sse_op_table3[4 * 3] = {
+static const SSEFunc_0_epi sse_op_table3ai[] = {
     gen_helper_cvtsi2ss,
-    gen_helper_cvtsi2sd,
-    X86_64_ONLY(gen_helper_cvtsq2ss),
-    X86_64_ONLY(gen_helper_cvtsq2sd),
-
-    gen_helper_cvttss2si,
-    gen_helper_cvttsd2si,
-    X86_64_ONLY(gen_helper_cvttss2sq),
-    X86_64_ONLY(gen_helper_cvttsd2sq),
-
-    gen_helper_cvtss2si,
-    gen_helper_cvtsd2si,
-    X86_64_ONLY(gen_helper_cvtss2sq),
-    X86_64_ONLY(gen_helper_cvtsd2sq),
+    gen_helper_cvtsi2sd
 };
 
-static void *sse_op_table4[8][4] = {
+#ifdef TARGET_X86_64
+static const SSEFunc_0_epl sse_op_table3aq[] = {
+    gen_helper_cvtsq2ss,
+    gen_helper_cvtsq2sd
+};
+#endif
+
+static const SSEFunc_i_ep sse_op_table3bi[] = {
+    gen_helper_cvttss2si,
+    gen_helper_cvtss2si,
+    gen_helper_cvttsd2si,
+    gen_helper_cvtsd2si
+};
+
+#ifdef TARGET_X86_64
+static const SSEFunc_l_ep sse_op_table3bq[] = {
+    gen_helper_cvttss2sq,
+    gen_helper_cvtss2sq,
+    gen_helper_cvttsd2sq,
+    gen_helper_cvtsd2sq
+};
+#endif
+
+static const SSEFunc_0_epp sse_op_table4[8][4] = {
     SSE_FOP(cmpeq),
     SSE_FOP(cmplt),
     SSE_FOP(cmple),
@@ -2977,7 +3165,7 @@
     SSE_FOP(cmpord),
 };
 
-static void *sse_op_table5[256] = {
+static const SSEFunc_0_epp sse_op_table5[256] = {
     [0x0c] = gen_helper_pi2fw,
     [0x0d] = gen_helper_pi2fd,
     [0x1c] = gen_helper_pf2iw,
@@ -3004,14 +3192,21 @@
     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
 };
 
-struct sse_op_helper_s {
-    void *op[2]; uint32_t ext_mask;
+struct SSEOpHelper_epp {
+    SSEFunc_0_epp op[2];
+    uint32_t ext_mask;
 };
+
+struct SSEOpHelper_eppi {
+    SSEFunc_0_eppi op[2];
+    uint32_t ext_mask;
+};
+
 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
-static struct sse_op_helper_s sse_op_table6[256] = {
+static const struct SSEOpHelper_epp sse_op_table6[256] = {
     [0x00] = SSSE3_OP(pshufb),
     [0x01] = SSSE3_OP(phaddw),
     [0x02] = SSSE3_OP(phaddd),
@@ -3060,7 +3255,7 @@
     [0x41] = SSE41_OP(phminposuw),
 };
 
-static struct sse_op_helper_s sse_op_table7[256] = {
+static const struct SSEOpHelper_eppi sse_op_table7[256] = {
     [0x08] = SSE41_OP(roundps),
     [0x09] = SSE41_OP(roundpd),
     [0x0a] = SSE41_OP(roundss),
@@ -3085,11 +3280,15 @@
     [0x63] = SSE42_OP(pcmpistri),
 };
 
-static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
+static void gen_sse(CPUX86State *env, DisasContext *s, int b,
+                    target_ulong pc_start, int rex_r)
 {
     int b1, op1_offset, op2_offset, is_xmm, val, ot;
     int modrm, mod, rm, reg, reg_addr, offset_addr;
-    void *sse_op2;
+    SSEFunc_0_epp sse_fn_epp;
+    SSEFunc_0_eppi sse_fn_eppi;
+    SSEFunc_0_ppi sse_fn_ppi;
+    SSEFunc_0_eppt sse_fn_eppt;
 
     b &= 0xff;
     if (s->prefix & PREFIX_DATA)
@@ -3100,9 +3299,10 @@
         b1 = 3;
     else
         b1 = 0;
-    sse_op2 = sse_op_table1[b][b1];
-    if (!sse_op2)
+    sse_fn_epp = sse_op_table1[b][b1];
+    if (!sse_fn_epp) {
         goto illegal_op;
+    }
     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
         is_xmm = 1;
     } else {
@@ -3130,32 +3330,32 @@
         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
             goto illegal_op;
         /* femms */
-        gen_helper_emms();
+        gen_helper_emms(cpu_env);
         return;
     }
     if (b == 0x77) {
         /* emms */
-        gen_helper_emms();
+        gen_helper_emms(cpu_env);
         return;
     }
     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
        the static cpu state) */
     if (!is_xmm) {
-        gen_helper_enter_mmx();
+        gen_helper_enter_mmx(cpu_env);
     }
 
-    modrm = ldub_code(s->pc++);
+    modrm = cpu_ldub_code(env, s->pc++);
     reg = ((modrm >> 3) & 7);
     if (is_xmm)
         reg |= rex_r;
     mod = (modrm >> 6) & 3;
-    if (sse_op2 == SSE_SPECIAL) {
+    if (sse_fn_epp == SSE_SPECIAL) {
         b |= (b1 << 8);
         switch(b) {
         case 0x0e7: /* movntq */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
             break;
         case 0x1e7: /* movntdq */
@@ -3164,19 +3364,19 @@
         case 0x3f0: /* lddqu */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
             break;
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == 2) {
-                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, OT_QUAD, OR_TMP0, 0);
                 tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
             } else
 #endif
             {
-                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, OT_LONG, OR_TMP0, 0);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
@@ -3185,15 +3385,15 @@
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == 2) {
-                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, OT_QUAD, OR_TMP0, 0);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
             } else
 #endif
             {
-                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, OT_LONG, OR_TMP0, 0);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
@@ -3201,7 +3401,7 @@
             break;
         case 0x6f: /* movq mm, ea */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3218,7 +3418,7 @@
         case 0x16f: /* movdqa xmm, ea */
         case 0x26f: /* movdqu xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3228,7 +3428,7 @@
             break;
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_op_movl_T0_0();
@@ -3243,7 +3443,7 @@
             break;
         case 0x310: /* movsd xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
                 gen_op_movl_T0_0();
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
@@ -3257,7 +3457,7 @@
         case 0x012: /* movlps */
         case 0x112: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             } else {
                 /* movhlps */
@@ -3268,7 +3468,7 @@
             break;
         case 0x212: /* movsldup */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3284,7 +3484,7 @@
             break;
         case 0x312: /* movddup */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3297,7 +3497,7 @@
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
             } else {
                 /* movlhps */
@@ -3308,7 +3508,7 @@
             break;
         case 0x216: /* movshdup */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3325,34 +3525,34 @@
         case 0x7e: /* movd ea, mm */
 #ifdef TARGET_X86_64
             if (s->dflag == 2) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env,
+                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
                                offsetof(CPUX86State,fpregs[reg].mmx));
-                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, OT_QUAD, OR_TMP0, 1);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
-                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, OT_LONG, OR_TMP0, 1);
             }
             break;
         case 0x17e: /* movd ea, xmm */
 #ifdef TARGET_X86_64
             if (s->dflag == 2) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env,
+                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
                                offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
-                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, OT_QUAD, OR_TMP0, 1);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, OT_LONG, OR_TMP0, 1);
             }
             break;
         case 0x27e: /* movq xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3363,7 +3563,7 @@
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3378,7 +3578,7 @@
         case 0x17f: /* movdqa ea, xmm */
         case 0x27f: /* movdqu ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3388,7 +3588,7 @@
             break;
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_op_st_T0_A0(OT_LONG + s->mem_index);
             } else {
@@ -3399,7 +3599,7 @@
             break;
         case 0x311: /* movsd ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3410,7 +3610,7 @@
         case 0x013: /* movlps */
         case 0x113: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             } else {
                 goto illegal_op;
@@ -3419,7 +3619,7 @@
         case 0x017: /* movhps */
         case 0x117: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
             } else {
                 goto illegal_op;
@@ -3431,7 +3631,10 @@
         case 0x171: /* shift xmm, im */
         case 0x172:
         case 0x173:
-            val = ldub_code(s->pc++);
+            if (b1 >= 2) {
+	        goto illegal_op;
+            }
+            val = cpu_ldub_code(env, s->pc++);
             if (is_xmm) {
                 gen_op_movl_T0_im(val);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
@@ -3445,9 +3648,11 @@
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
                 op1_offset = offsetof(CPUX86State,mmx_t0);
             }
-            sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1];
-            if (!sse_op2)
+            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
+                                       (((modrm >> 3)) & 7)][b1];
+            if (!sse_fn_epp) {
                 goto illegal_op;
+            }
             if (is_xmm) {
                 rm = (modrm & 7) | REX_B(s);
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
@@ -3457,29 +3662,29 @@
             }
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
             break;
         case 0x050: /* movmskps */
             rm = (modrm & 7) | REX_B(s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskps(cpu_tmp2_i32, cpu_ptr0);
+            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
             tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
             gen_op_mov_reg_T0(OT_LONG, reg);
             break;
         case 0x150: /* movmskpd */
             rm = (modrm & 7) | REX_B(s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskpd(cpu_tmp2_i32, cpu_ptr0);
+            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
             tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
             gen_op_mov_reg_T0(OT_LONG, reg);
             break;
         case 0x02a: /* cvtpi2ps */
         case 0x12a: /* cvtpi2pd */
-            gen_helper_enter_mmx();
+            gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s->mem_index, op2_offset);
             } else {
@@ -3491,35 +3696,40 @@
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             switch(b >> 8) {
             case 0x0:
-                gen_helper_cvtpi2ps(cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
                 break;
             default:
             case 0x1:
-                gen_helper_cvtpi2pd(cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
                 break;
             }
             break;
         case 0x22a: /* cvtsi2ss */
         case 0x32a: /* cvtsi2sd */
             ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
-            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
-            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
             if (ot == OT_LONG) {
+                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                ((void (*)(TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_tmp2_i32);
+                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
             } else {
-                ((void (*)(TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_T[0]);
+#ifdef TARGET_X86_64
+                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
+                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T[0]);
+#else
+                goto illegal_op;
+#endif
             }
             break;
         case 0x02c: /* cvttps2pi */
         case 0x12c: /* cvttpd2pi */
         case 0x02d: /* cvtps2pi */
         case 0x12d: /* cvtpd2pi */
-            gen_helper_enter_mmx();
+            gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 gen_ldo_env_A0(s->mem_index, op2_offset);
             } else {
@@ -3531,16 +3741,16 @@
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             switch(b) {
             case 0x02c:
-                gen_helper_cvttps2pi(cpu_ptr0, cpu_ptr1);
+                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
                 break;
             case 0x12c:
-                gen_helper_cvttpd2pi(cpu_ptr0, cpu_ptr1);
+                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
                 break;
             case 0x02d:
-                gen_helper_cvtps2pi(cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
                 break;
             case 0x12d:
-                gen_helper_cvtpd2pi(cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
                 break;
             }
             break;
@@ -3550,7 +3760,7 @@
         case 0x32d: /* cvtsd2si */
             ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
                 } else {
@@ -3562,22 +3772,28 @@
                 rm = (modrm & 7) | REX_B(s);
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
             }
-            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
-                                    (b & 1) * 4];
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
             if (ot == OT_LONG) {
-                ((void (*)(TCGv_i32, TCGv_ptr))sse_op2)(cpu_tmp2_i32, cpu_ptr0);
+                SSEFunc_i_ep sse_fn_i_ep =
+                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
+                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
                 tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
             } else {
-                ((void (*)(TCGv, TCGv_ptr))sse_op2)(cpu_T[0], cpu_ptr0);
+#ifdef TARGET_X86_64
+                SSEFunc_l_ep sse_fn_l_ep =
+                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
+                sse_fn_l_ep(cpu_T[0], cpu_env, cpu_ptr0);
+#else
+                goto illegal_op;
+#endif
             }
             gen_op_mov_reg_T0(ot, reg);
             break;
         case 0xc4: /* pinsrw */
         case 0x1c4:
             s->rip_offset = 1;
-            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
-            val = ldub_code(s->pc++);
+            gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
+            val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
                 tcg_gen_st16_tl(cpu_T[0], cpu_env,
@@ -3593,7 +3809,7 @@
             if (mod != 3)
                 goto illegal_op;
             ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
-            val = ldub_code(s->pc++);
+            val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
                 rm = (modrm & 7) | REX_B(s);
@@ -3610,7 +3826,7 @@
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3620,14 +3836,14 @@
             }
             break;
         case 0x2d6: /* movq2dq */
-            gen_helper_enter_mmx();
+            gen_helper_enter_mmx(cpu_env);
             rm = (modrm & 7);
             gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
                         offsetof(CPUX86State,fpregs[rm].mmx));
             gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
             break;
         case 0x3d6: /* movdq2q */
-            gen_helper_enter_mmx();
+            gen_helper_enter_mmx(cpu_env);
             rm = (modrm & 7) | REX_B(s);
             gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
                         offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
@@ -3639,29 +3855,31 @@
             if (b1) {
                 rm = (modrm & 7) | REX_B(s);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
-                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_ptr0);
+                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
             } else {
                 rm = (modrm & 7);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
-                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_ptr0);
+                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
             }
             tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
             reg = ((modrm >> 3) & 7) | rex_r;
             gen_op_mov_reg_T0(OT_LONG, reg);
             break;
+
         case 0x138:
             if (s->prefix & PREFIX_REPNZ)
                 goto crc32;
         case 0x038:
             b = modrm;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             rm = modrm & 7;
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
 
-            sse_op2 = sse_op_table6[b].op[b1];
-            if (!sse_op2)
+            sse_fn_epp = sse_op_table6[b].op[b1];
+            if (!sse_fn_epp) {
                 goto illegal_op;
+            }
             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
                 goto illegal_op;
 
@@ -3671,7 +3889,7 @@
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     switch (b) {
                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
@@ -3706,24 +3924,26 @@
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     gen_ldq_env_A0(s->mem_index, op2_offset);
                 }
             }
-            if (sse_op2 == SSE_SPECIAL)
+            if (sse_fn_epp == SSE_SPECIAL) {
                 goto illegal_op;
+            }
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
 
-            if (b == 0x17)
-                s->cc_op = CC_OP_EFLAGS;
+            if (b == 0x17) {
+                set_cc_op(s, CC_OP_EFLAGS);
+            }
             break;
         case 0x338: /* crc32 */
         crc32:
             b = modrm;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
 
             if (b != 0xf0 && b != 0xf1)
@@ -3743,34 +3963,36 @@
 
             gen_op_mov_TN_reg(OT_LONG, 0, reg);
             tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
             gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
                              cpu_T[0], tcg_const_i32(8 << ot));
 
             ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
             gen_op_mov_reg_T0(ot, reg);
             break;
+
         case 0x03a:
         case 0x13a:
             b = modrm;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             rm = modrm & 7;
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
 
-            sse_op2 = sse_op_table7[b].op[b1];
-            if (!sse_op2)
+            sse_fn_eppi = sse_op_table7[b].op[b1];
+            if (!sse_fn_eppi) {
                 goto illegal_op;
+            }
             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
                 goto illegal_op;
 
-            if (sse_op2 == SSE_SPECIAL) {
+            if (sse_fn_eppi == SSE_SPECIAL) {
                 ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3)
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 reg = ((modrm >> 3) & 7) | rex_r;
-                val = ldub_code(s->pc++);
+                val = cpu_ldub_code(env, s->pc++);
                 switch (b) {
                 case 0x14: /* pextrb */
                     tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
@@ -3900,7 +4122,7 @@
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     gen_ldo_env_A0(s->mem_index, op2_offset);
                 }
             } else {
@@ -3909,14 +4131,14 @@
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     gen_ldq_env_A0(s->mem_index, op2_offset);
                 }
             }
-            val = ldub_code(s->pc++);
+            val = cpu_ldub_code(env, s->pc++);
 
             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
-                s->cc_op = CC_OP_EFLAGS;
+                set_cc_op(s, CC_OP_EFLAGS);
 
                 if (s->dflag == 2)
                     /* The helper must use entire 64-bit gp registers */
@@ -3925,7 +4147,7 @@
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
             break;
         default:
             goto illegal_op;
@@ -3944,7 +4166,7 @@
         if (is_xmm) {
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) ||
                                 b == 0xc2)) {
@@ -3967,7 +4189,7 @@
         } else {
             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s->mem_index, op2_offset);
             } else {
@@ -3979,30 +4201,34 @@
         case 0x0f: /* 3DNow! data insns */
             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
                 goto illegal_op;
-            val = ldub_code(s->pc++);
-            sse_op2 = sse_op_table5[val];
-            if (!sse_op2)
+            val = cpu_ldub_code(env, s->pc++);
+            sse_fn_epp = sse_op_table5[val];
+            if (!sse_fn_epp) {
                 goto illegal_op;
+            }
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
             break;
         case 0x70: /* pshufx insn */
         case 0xc6: /* pshufx insn */
-            val = ldub_code(s->pc++);
+            val = cpu_ldub_code(env, s->pc++);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+            /* XXX: introduce a new table? */
+            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
+            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
             break;
         case 0xc2:
             /* compare insns */
-            val = ldub_code(s->pc++);
+            val = cpu_ldub_code(env, s->pc++);
             if (val >= 8)
                 goto illegal_op;
-            sse_op2 = sse_op_table4[val][b1];
+            sse_fn_epp = sse_op_table4[val][b1];
+
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
             break;
         case 0xf7:
             /* maskmov : we must prepare A0 */
@@ -4022,23 +4248,25 @@
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_ptr1, cpu_A0);
+            /* XXX: introduce a new table? */
+            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
+            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
             break;
         default:
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
             break;
         }
         if (b == 0x2e || b == 0x2f) {
-            s->cc_op = CC_OP_EFLAGS;
+            set_cc_op(s, CC_OP_EFLAGS);
         }
     }
 }
 
 /* convert one instruction. s->is_jmp is set if the translation must
    be stopped. Return the next pc value */
-static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
+static target_ulong disas_insn(CPUX86State *env, DisasContext *s, target_ulong pc_start)
 {
     int b, prefixes, aflag, dflag;
     int shift, ot;
@@ -4049,8 +4277,9 @@
 #endif
     int rex_r;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
         tcg_gen_debug_insn_start(pc_start);
+    }
     s->pc = pc_start;
     prefixes = 0;
     aflag = s->code32;
@@ -4064,7 +4293,7 @@
 #endif
     s->rip_offset = 0; /* for relative ip address */
  next_byte:
-    b = ldub_code(s->pc);
+    b = cpu_ldub_code(env, s->pc);
     s->pc++;
     /* check prefixes */
 #ifdef TARGET_X86_64
@@ -4159,10 +4388,12 @@
             prefixes |= PREFIX_ADR;
             goto next_byte;
         }
-        if (prefixes & PREFIX_DATA)
+        if (prefixes & PREFIX_DATA) {
             dflag ^= 1;
-        if (prefixes & PREFIX_ADR)
+        }
+        if (prefixes & PREFIX_ADR) {
             aflag ^= 1;
+        }
     }
 
     s->prefix = prefixes;
@@ -4179,7 +4410,7 @@
     case 0x0f:
         /**************************/
         /* extended op code */
-        b = ldub_code(s->pc++) | 0x100;
+        b = cpu_ldub_code(env, s->pc++) | 0x100;
         goto reswitch;
 
         /**************************/
@@ -4204,18 +4435,18 @@
 
             switch(f) {
             case 0: /* OP Ev, Gv */
-                modrm = ldub_code(s->pc++);
+                modrm = cpu_ldub_code(env, s->pc++);
                 reg = ((modrm >> 3) & 7) | rex_r;
                 mod = (modrm >> 6) & 3;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     opreg = OR_TMP0;
                 } else if (op == OP_XORL && rm == reg) {
                 xor_zero:
                     /* xor reg, reg optimisation */
                     gen_op_movl_T0_0();
-                    s->cc_op = CC_OP_LOGICB + ot;
+                    set_cc_op(s, CC_OP_LOGICB + ot);
                     gen_op_mov_reg_T0(ot, reg);
                     gen_op_update1_cc();
                     break;
@@ -4226,12 +4457,12 @@
                 gen_op(s, op, ot, opreg);
                 break;
             case 1: /* OP Gv, Ev */
-                modrm = ldub_code(s->pc++);
+                modrm = cpu_ldub_code(env, s->pc++);
                 mod = (modrm >> 6) & 3;
                 reg = ((modrm >> 3) & 7) | rex_r;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     gen_op_ld_T1_A0(ot + s->mem_index);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
@@ -4241,7 +4472,7 @@
                 gen_op(s, op, ot, reg);
                 break;
             case 2: /* OP A, Iv */
-                val = insn_get(s, ot);
+                val = insn_get(env, s, ot);
                 gen_op_movl_T1_im(val);
                 gen_op(s, op, ot, OR_EAX);
                 break;
@@ -4263,7 +4494,7 @@
             else
                 ot = dflag + OT_WORD;
 
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             mod = (modrm >> 6) & 3;
             rm = (modrm & 7) | REX_B(s);
             op = (modrm >> 3) & 7;
@@ -4273,7 +4504,7 @@
                     s->rip_offset = 1;
                 else
                     s->rip_offset = insn_const_size(ot);
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 opreg = OR_TMP0;
             } else {
                 opreg = rm;
@@ -4284,10 +4515,10 @@
             case 0x80:
             case 0x81:
             case 0x82:
-                val = insn_get(s, ot);
+                val = insn_get(env, s, ot);
                 break;
             case 0x83:
-                val = (int8_t)insn_get(s, OT_BYTE);
+                val = (int8_t)insn_get(env, s, OT_BYTE);
                 break;
             }
             gen_op_movl_T1_im(val);
@@ -4312,14 +4543,14 @@
         else
             ot = dflag + OT_WORD;
 
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         op = (modrm >> 3) & 7;
         if (mod != 3) {
             if (op == 0)
                 s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_op_ld_T0_A0(ot + s->mem_index);
         } else {
             gen_op_mov_TN_reg(ot, 0, rm);
@@ -4327,10 +4558,10 @@
 
         switch(op) {
         case 0: /* test */
-            val = insn_get(s, ot);
+            val = insn_get(env, s, ot);
             gen_op_movl_T1_im(val);
             gen_op_testl_T0_T1_cc();
-            s->cc_op = CC_OP_LOGICB + ot;
+            set_cc_op(s, CC_OP_LOGICB + ot);
             break;
         case 2: /* not */
             tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
@@ -4348,7 +4579,7 @@
                 gen_op_mov_reg_T0(ot, rm);
             }
             gen_op_update_neg_cc();
-            s->cc_op = CC_OP_SUBB + ot;
+            set_cc_op(s, CC_OP_SUBB + ot);
             break;
         case 4: /* mul */
             switch(ot) {
@@ -4361,7 +4592,7 @@
                 gen_op_mov_reg_T0(OT_WORD, R_EAX);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
                 tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
-                s->cc_op = CC_OP_MULB;
+                set_cc_op(s, CC_OP_MULB);
                 break;
             case OT_WORD:
                 gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
@@ -4374,7 +4605,7 @@
                 tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
                 gen_op_mov_reg_T0(OT_WORD, R_EDX);
                 tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
-                s->cc_op = CC_OP_MULW;
+                set_cc_op(s, CC_OP_MULW);
                 break;
             default:
             case OT_LONG:
@@ -4406,12 +4637,12 @@
                     tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
                 }
 #endif
-                s->cc_op = CC_OP_MULL;
+                set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
-                gen_helper_mulq_EAX_T0(cpu_T[0]);
-                s->cc_op = CC_OP_MULQ;
+                gen_helper_mulq_EAX_T0(cpu_env, cpu_T[0]);
+                set_cc_op(s, CC_OP_MULQ);
                 break;
 #endif
             }
@@ -4428,7 +4659,7 @@
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
                 tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
                 tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
-                s->cc_op = CC_OP_MULB;
+                set_cc_op(s, CC_OP_MULB);
                 break;
             case OT_WORD:
                 gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
@@ -4442,7 +4673,7 @@
                 tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
                 tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
                 gen_op_mov_reg_T0(OT_WORD, R_EDX);
-                s->cc_op = CC_OP_MULW;
+                set_cc_op(s, CC_OP_MULW);
                 break;
             default:
             case OT_LONG:
@@ -4476,12 +4707,12 @@
                     tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
                 }
 #endif
-                s->cc_op = CC_OP_MULL;
+                set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
-                gen_helper_imulq_EAX_T0(cpu_T[0]);
-                s->cc_op = CC_OP_MULQ;
+                gen_helper_imulq_EAX_T0(cpu_env, cpu_T[0]);
+                set_cc_op(s, CC_OP_MULQ);
                 break;
 #endif
             }
@@ -4490,21 +4721,21 @@
             switch(ot) {
             case OT_BYTE:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_divb_AL(cpu_T[0]);
+                gen_helper_divb_AL(cpu_env, cpu_T[0]);
                 break;
             case OT_WORD:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_divw_AX(cpu_T[0]);
+                gen_helper_divw_AX(cpu_env, cpu_T[0]);
                 break;
             default:
             case OT_LONG:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_divl_EAX(cpu_T[0]);
+                gen_helper_divl_EAX(cpu_env, cpu_T[0]);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_divq_EAX(cpu_T[0]);
+                gen_helper_divq_EAX(cpu_env, cpu_T[0]);
                 break;
 #endif
             }
@@ -4513,21 +4744,21 @@
             switch(ot) {
             case OT_BYTE:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_idivb_AL(cpu_T[0]);
+                gen_helper_idivb_AL(cpu_env, cpu_T[0]);
                 break;
             case OT_WORD:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_idivw_AX(cpu_T[0]);
+                gen_helper_idivw_AX(cpu_env, cpu_T[0]);
                 break;
             default:
             case OT_LONG:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_idivl_EAX(cpu_T[0]);
+                gen_helper_idivl_EAX(cpu_env, cpu_T[0]);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_idivq_EAX(cpu_T[0]);
+                gen_helper_idivq_EAX(cpu_env, cpu_T[0]);
                 break;
 #endif
             }
@@ -4544,7 +4775,7 @@
         else
             ot = dflag + OT_WORD;
 
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         op = (modrm >> 3) & 7;
@@ -4565,7 +4796,7 @@
             }
         }
         if (mod != 3) {
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             if (op >= 2 && op != 3 && op != 5)
                 gen_op_ld_T0_A0(ot + s->mem_index);
         } else {
@@ -4603,16 +4834,15 @@
             gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
         do_lcall:
             if (s->pe && !s->vm86) {
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_lcall_protected(cpu_tmp2_i32, cpu_T[1],
+                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
                                            tcg_const_i32(dflag),
                                            tcg_const_i32(s->pc - pc_start));
             } else {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_lcall_real(cpu_tmp2_i32, cpu_T[1],
+                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T[1],
                                       tcg_const_i32(dflag),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
@@ -4630,11 +4860,10 @@
             gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
         do_ljmp:
             if (s->pe && !s->vm86) {
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_ljmp_protected(cpu_tmp2_i32, cpu_T[1],
+                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
                                           tcg_const_i32(s->pc - pc_start));
             } else {
                 gen_op_movl_seg_T0_vm(R_CS);
@@ -4658,15 +4887,15 @@
         else
             ot = dflag + OT_WORD;
 
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         reg = ((modrm >> 3) & 7) | rex_r;
 
-        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_op_mov_TN_reg(ot, 1, reg);
         gen_op_testl_T0_T1_cc();
-        s->cc_op = CC_OP_LOGICB + ot;
+        set_cc_op(s, CC_OP_LOGICB + ot);
         break;
 
     case 0xa8: /* test eAX, Iv */
@@ -4675,12 +4904,12 @@
             ot = OT_BYTE;
         else
             ot = dflag + OT_WORD;
-        val = insn_get(s, ot);
+        val = insn_get(env, s, ot);
 
         gen_op_mov_TN_reg(ot, 0, OR_EAX);
         gen_op_movl_T1_im(val);
         gen_op_testl_T0_T1_cc();
-        s->cc_op = CC_OP_LOGICB + ot;
+        set_cc_op(s, CC_OP_LOGICB + ot);
         break;
 
     case 0x98: /* CWDE/CBW */
@@ -4725,18 +4954,18 @@
     case 0x69: /* imul Gv, Ev, I */
     case 0x6b:
         ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (b == 0x69)
             s->rip_offset = insn_const_size(ot);
         else if (b == 0x6b)
             s->rip_offset = 1;
-        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         if (b == 0x69) {
-            val = insn_get(s, ot);
+            val = insn_get(env, s, ot);
             gen_op_movl_T1_im(val);
         } else if (b == 0x6b) {
-            val = (int8_t)insn_get(s, OT_BYTE);
+            val = (int8_t)insn_get(env, s, OT_BYTE);
             gen_op_movl_T1_im(val);
         } else {
             gen_op_mov_TN_reg(ot, 1, reg);
@@ -4744,7 +4973,7 @@
 
 #ifdef TARGET_X86_64
         if (ot == OT_QUAD) {
-            gen_helper_imulq_T0_T1(cpu_T[0], cpu_T[0], cpu_T[1]);
+            gen_helper_imulq_T0_T1(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
         } else
 #endif
         if (ot == OT_LONG) {
@@ -4781,7 +5010,7 @@
             tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
         }
         gen_op_mov_reg_T0(ot, reg);
-        s->cc_op = CC_OP_MULB + ot;
+        set_cc_op(s, CC_OP_MULB + ot);
         break;
     case 0x1c0:
     case 0x1c1: /* xadd Ev, Gv */
@@ -4789,7 +5018,7 @@
             ot = OT_BYTE;
         else
             ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         if (mod == 3) {
@@ -4800,7 +5029,7 @@
             gen_op_mov_reg_T1(ot, reg);
             gen_op_mov_reg_T0(ot, rm);
         } else {
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_op_mov_TN_reg(ot, 0, reg);
             gen_op_ld_T1_A0(ot + s->mem_index);
             gen_op_addl_T0_T1();
@@ -4808,7 +5037,7 @@
             gen_op_mov_reg_T1(ot, reg);
         }
         gen_op_update2_cc();
-        s->cc_op = CC_OP_ADDB + ot;
+        set_cc_op(s, CC_OP_ADDB + ot);
         break;
     case 0x1b0:
     case 0x1b1: /* cmpxchg Ev, Gv */
@@ -4820,7 +5049,7 @@
                 ot = OT_BYTE;
             else
                 ot = dflag + OT_WORD;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             t0 = tcg_temp_local_new();
@@ -4832,7 +5061,7 @@
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_mov_v_reg(ot, t0, rm);
             } else {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 tcg_gen_mov_tl(a0, cpu_A0);
                 gen_op_ld_v(ot + s->mem_index, t0, a0);
                 rm = 0; /* avoid warning */
@@ -4858,7 +5087,7 @@
             }
             tcg_gen_mov_tl(cpu_cc_src, t0);
             tcg_gen_mov_tl(cpu_cc_dst, t2);
-            s->cc_op = CC_OP_SUBB + ot;
+            set_cc_op(s, CC_OP_SUBB + ot);
             tcg_temp_free(t0);
             tcg_temp_free(t1);
             tcg_temp_free(t2);
@@ -4866,7 +5095,7 @@
         }
         break;
     case 0x1c7: /* cmpxchg8b */
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if ((mod == 3) || ((modrm & 0x38) != 0x8))
             goto illegal_op;
@@ -4875,22 +5104,20 @@
             if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            gen_helper_cmpxchg16b(cpu_A0);
+            gen_update_cc_op(s);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_helper_cmpxchg16b(cpu_env, cpu_A0);
         } else
 #endif
         {
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            gen_helper_cmpxchg8b(cpu_A0);
+            gen_update_cc_op(s);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
-        s->cc_op = CC_OP_EFLAGS;
+        set_cc_op(s, CC_OP_EFLAGS);
         break;
 
         /**************************/
@@ -4928,9 +5155,9 @@
             ot = dflag + OT_WORD;
         }
         if (b == 0x68)
-            val = insn_get(s, ot);
+            val = insn_get(env, s, ot);
         else
-            val = (int8_t)insn_get(s, OT_BYTE);
+            val = (int8_t)insn_get(env, s, OT_BYTE);
         gen_op_movl_T0_im(val);
         gen_push_T0(s);
         break;
@@ -4940,7 +5167,7 @@
         } else {
             ot = dflag + OT_WORD;
         }
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         gen_pop_T0(s);
         if (mod == 3) {
@@ -4951,7 +5178,7 @@
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
-            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             s->popl_esp_hack = 0;
             gen_pop_update(s);
         }
@@ -4959,9 +5186,9 @@
     case 0xc8: /* enter */
         {
             int level;
-            val = lduw_code(s->pc);
+            val = cpu_lduw_code(env, s->pc);
             s->pc += 2;
-            level = ldub_code(s->pc++);
+            level = cpu_ldub_code(env, s->pc++);
             gen_enter(s, val, level);
         }
         break;
@@ -5014,7 +5241,7 @@
             /* If several instructions disable interrupts, only the
                _first_ does it */
             if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
-                gen_helper_set_inhibit_irq();
+                gen_helper_set_inhibit_irq(cpu_env);
             s->tf = 0;
         }
         if (s->is_jmp) {
@@ -5041,11 +5268,11 @@
             ot = OT_BYTE;
         else
             ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
 
         /* generate a generic store */
-        gen_ldst_modrm(s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
         break;
     case 0xc6:
     case 0xc7: /* mov Ev, Iv */
@@ -5053,13 +5280,13 @@
             ot = OT_BYTE;
         else
             ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod != 3) {
             s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
         }
-        val = insn_get(s, ot);
+        val = insn_get(env, s, ot);
         gen_op_movl_T0_im(val);
         if (mod != 3)
             gen_op_st_T0_A0(ot + s->mem_index);
@@ -5072,25 +5299,25 @@
             ot = OT_BYTE;
         else
             ot = OT_WORD + dflag;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
 
-        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_op_mov_reg_T0(ot, reg);
         break;
     case 0x8e: /* mov seg, Gv */
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = (modrm >> 3) & 7;
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
-        gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
         gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
         if (reg == R_SS) {
             /* if reg == SS, inhibit interrupts/trace */
             /* If several instructions disable interrupts, only the
                _first_ does it */
             if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
-                gen_helper_set_inhibit_irq();
+                gen_helper_set_inhibit_irq(cpu_env);
             s->tf = 0;
         }
         if (s->is_jmp) {
@@ -5099,7 +5326,7 @@
         }
         break;
     case 0x8c: /* mov Gv, seg */
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         if (reg >= 6)
@@ -5109,7 +5336,7 @@
             ot = OT_WORD + dflag;
         else
             ot = OT_WORD;
-        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
         break;
 
     case 0x1b6: /* movzbS Gv, Eb */
@@ -5122,7 +5349,7 @@
             d_ot = dflag + OT_WORD;
             /* ot is the size of source */
             ot = (b & 1) + OT_BYTE;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             rm = (modrm & 7) | REX_B(s);
@@ -5146,7 +5373,7 @@
                 }
                 gen_op_mov_reg_T0(d_ot, reg);
             } else {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 if (b & 8) {
                     gen_op_lds_T0_A0(ot + s->mem_index);
                 } else {
@@ -5159,7 +5386,7 @@
 
     case 0x8d: /* lea */
         ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
@@ -5168,7 +5395,7 @@
         s->override = -1;
         val = s->addseg;
         s->addseg = 0;
-        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
         s->addseg = val;
         gen_op_mov_reg_A0(ot - OT_WORD, reg);
         break;
@@ -5186,16 +5413,16 @@
                 ot = dflag + OT_WORD;
 #ifdef TARGET_X86_64
             if (s->aflag == 2) {
-                offset_addr = ldq_code(s->pc);
+                offset_addr = cpu_ldq_code(env, s->pc);
                 s->pc += 8;
                 gen_op_movq_A0_im(offset_addr);
             } else
 #endif
             {
                 if (s->aflag) {
-                    offset_addr = insn_get(s, OT_LONG);
+                    offset_addr = insn_get(env, s, OT_LONG);
                 } else {
-                    offset_addr = insn_get(s, OT_WORD);
+                    offset_addr = insn_get(env, s, OT_WORD);
                 }
                 gen_op_movl_A0_im(offset_addr);
             }
@@ -5233,7 +5460,7 @@
         gen_op_mov_reg_T0(OT_BYTE, R_EAX);
         break;
     case 0xb0 ... 0xb7: /* mov R, Ib */
-        val = insn_get(s, OT_BYTE);
+        val = insn_get(env, s, OT_BYTE);
         gen_op_movl_T0_im(val);
         gen_op_mov_reg_T0(OT_BYTE, (b & 7) | REX_B(s));
         break;
@@ -5242,7 +5469,7 @@
         if (dflag == 2) {
             uint64_t tmp;
             /* 64 bit case */
-            tmp = ldq_code(s->pc);
+            tmp = cpu_ldq_code(env, s->pc);
             s->pc += 8;
             reg = (b & 7) | REX_B(s);
             gen_movtl_T0_im(tmp);
@@ -5251,7 +5478,7 @@
 #endif
         {
             ot = dflag ? OT_LONG : OT_WORD;
-            val = insn_get(s, ot);
+            val = insn_get(env, s, ot);
             reg = (b & 7) | REX_B(s);
             gen_op_movl_T0_im(val);
             gen_op_mov_reg_T0(ot, reg);
@@ -5269,7 +5496,7 @@
             ot = OT_BYTE;
         else
             ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         if (mod == 3) {
@@ -5280,7 +5507,7 @@
             gen_op_mov_reg_T0(ot, rm);
             gen_op_mov_reg_T1(ot, reg);
         } else {
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_op_mov_TN_reg(ot, 0, reg);
             /* for xchg, lock is implicit */
             if (!(prefixes & PREFIX_LOCK))
@@ -5312,12 +5539,12 @@
         op = R_GS;
     do_lxx:
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
         gen_op_ld_T1_A0(ot + s->mem_index);
         gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
         /* load the segment first to handle exceptions properly */
@@ -5337,14 +5564,14 @@
     case 0xc1:
         /* shift Ev,Ib */
         shift = 2;
-    GRP2:
+    grp2:
         {
             if ((b & 1) == 0)
                 ot = OT_BYTE;
             else
                 ot = dflag + OT_WORD;
 
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             mod = (modrm >> 6) & 3;
             op = (modrm >> 3) & 7;
 
@@ -5352,7 +5579,7 @@
                 if (shift == 2) {
                     s->rip_offset = 1;
                 }
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 opreg = OR_TMP0;
             } else {
                 opreg = (modrm & 7) | REX_B(s);
@@ -5363,7 +5590,7 @@
                 gen_shift(s, op, ot, opreg, OR_ECX);
             } else {
                 if (shift == 2) {
-                    shift = ldub_code(s->pc++);
+                    shift = cpu_ldub_code(env, s->pc++);
                 }
                 gen_shifti(s, op, ot, opreg, shift);
             }
@@ -5373,12 +5600,12 @@
     case 0xd1:
         /* shift Ev,1 */
         shift = 1;
-        goto GRP2;
+        goto grp2;
     case 0xd2:
     case 0xd3:
         /* shift Ev,cl */
         shift = 0;
-        goto GRP2;
+        goto grp2;
 
     case 0x1a4: /* shld imm */
         op = 0;
@@ -5397,12 +5624,12 @@
         shift = 0;
     do_shiftd:
         ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (mod != 3) {
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             opreg = OR_TMP0;
         } else {
             opreg = rm;
@@ -5410,7 +5637,7 @@
         gen_op_mov_TN_reg(ot, 1, reg);
 
         if (shift) {
-            val = ldub_code(s->pc++);
+            val = cpu_ldub_code(env, s->pc++);
             tcg_gen_movi_tl(cpu_T3, val);
         } else {
             tcg_gen_ld_tl(cpu_T3, cpu_env, offsetof(CPUX86State, regs[R_ECX]));
@@ -5427,13 +5654,13 @@
             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
             break;
         }
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         rm = modrm & 7;
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
         if (mod != 3) {
             /* memory op */
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             switch(op) {
             case 0x00 ... 0x07: /* fxxxs */
             case 0x10 ... 0x17: /* fixxxl */
@@ -5447,30 +5674,30 @@
                     case 0:
                         gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                        gen_helper_flds_FT0(cpu_tmp2_i32);
+                        gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 1:
                         gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                        gen_helper_fildl_FT0(cpu_tmp2_i32);
+                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+                        tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
                                           (s->mem_index >> 2) - 1);
-                        gen_helper_fldl_FT0(cpu_tmp1_i64);
+                        gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
                         break;
                     case 3:
                     default:
                         gen_op_lds_T0_A0(OT_WORD + s->mem_index);
                         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                        gen_helper_fildl_FT0(cpu_tmp2_i32);
+                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     }
 
                     gen_helper_fp_arith_ST0_FT0(op1);
                     if (op1 == 3) {
                         /* fcomp needs pop */
-                        gen_helper_fpop();
+                        gen_helper_fpop(cpu_env);
                     }
                 }
                 break;
@@ -5486,23 +5713,23 @@
                     case 0:
                         gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                        gen_helper_flds_ST0(cpu_tmp2_i32);
+                        gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 1:
                         gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                        gen_helper_fildl_ST0(cpu_tmp2_i32);
+                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+                        tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
                                           (s->mem_index >> 2) - 1);
-                        gen_helper_fldl_ST0(cpu_tmp1_i64);
+                        gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
                         break;
                     case 3:
                     default:
                         gen_op_lds_T0_A0(OT_WORD + s->mem_index);
                         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                        gen_helper_fildl_ST0(cpu_tmp2_i32);
+                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     }
                     break;
@@ -5510,129 +5737,121 @@
                     /* XXX: the corresponding CPUID bit must be tested ! */
                     switch(op >> 4) {
                     case 1:
-                        gen_helper_fisttl_ST0(cpu_tmp2_i32);
+                        gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
                         tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                         gen_op_st_T0_A0(OT_LONG + s->mem_index);
                         break;
                     case 2:
-                        gen_helper_fisttll_ST0(cpu_tmp1_i64);
-                        tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+                        gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
+                        tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
                                           (s->mem_index >> 2) - 1);
                         break;
                     case 3:
                     default:
-                        gen_helper_fistt_ST0(cpu_tmp2_i32);
+                        gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
                         tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                         gen_op_st_T0_A0(OT_WORD + s->mem_index);
                         break;
                     }
-                    gen_helper_fpop();
+                    gen_helper_fpop(cpu_env);
                     break;
                 default:
                     switch(op >> 4) {
                     case 0:
-                        gen_helper_fsts_ST0(cpu_tmp2_i32);
+                        gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
                         tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                         gen_op_st_T0_A0(OT_LONG + s->mem_index);
                         break;
                     case 1:
-                        gen_helper_fistl_ST0(cpu_tmp2_i32);
+                        gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
                         tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                         gen_op_st_T0_A0(OT_LONG + s->mem_index);
                         break;
                     case 2:
-                        gen_helper_fstl_ST0(cpu_tmp1_i64);
-                        tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+                        gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
+                        tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
                                           (s->mem_index >> 2) - 1);
                         break;
                     case 3:
                     default:
-                        gen_helper_fist_ST0(cpu_tmp2_i32);
+                        gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
                         tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                         gen_op_st_T0_A0(OT_WORD + s->mem_index);
                         break;
                     }
                     if ((op & 7) == 3)
-                        gen_helper_fpop();
+                        gen_helper_fpop(cpu_env);
                     break;
                 }
                 break;
             case 0x0c: /* fldenv mem */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fldenv(
+                gen_helper_fldenv(cpu_env,
                                    cpu_A0, tcg_const_i32(s->dflag));
                 break;
             case 0x0d: /* fldcw mem */
                 gen_op_ld_T0_A0(OT_WORD + s->mem_index);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_fldcw(cpu_tmp2_i32);
+                gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
                 break;
             case 0x0e: /* fnstenv mem */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fstenv(cpu_A0, tcg_const_i32(s->dflag));
+                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
                 break;
             case 0x0f: /* fnstcw mem */
-                gen_helper_fnstcw(cpu_tmp2_i32);
+                gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
                 tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                 gen_op_st_T0_A0(OT_WORD + s->mem_index);
                 break;
             case 0x1d: /* fldt mem */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fldt_ST0(cpu_A0);
+                gen_helper_fldt_ST0(cpu_env, cpu_A0);
                 break;
             case 0x1f: /* fstpt mem */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fstt_ST0(cpu_A0);
-                gen_helper_fpop();
+                gen_helper_fstt_ST0(cpu_env, cpu_A0);
+                gen_helper_fpop(cpu_env);
                 break;
             case 0x2c: /* frstor mem */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_frstor(cpu_A0, tcg_const_i32(s->dflag));
+                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
                 break;
             case 0x2e: /* fnsave mem */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fsave(cpu_A0, tcg_const_i32(s->dflag));
+                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
                 break;
             case 0x2f: /* fnstsw mem */
-                gen_helper_fnstsw(cpu_tmp2_i32);
+                gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
                 tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                 gen_op_st_T0_A0(OT_WORD + s->mem_index);
                 break;
             case 0x3c: /* fbld */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fbld_ST0(cpu_A0);
+                gen_helper_fbld_ST0(cpu_env, cpu_A0);
                 break;
             case 0x3e: /* fbstp */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fbst_ST0(cpu_A0);
-                gen_helper_fpop();
+                gen_helper_fbst_ST0(cpu_env, cpu_A0);
+                gen_helper_fpop(cpu_env);
                 break;
             case 0x3d: /* fildll */
-                tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+                tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
                                   (s->mem_index >> 2) - 1);
-                gen_helper_fildll_ST0(cpu_tmp1_i64);
+                gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
                 break;
             case 0x3f: /* fistpll */
-                gen_helper_fistll_ST0(cpu_tmp1_i64);
-                tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+                gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
+                tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
                                   (s->mem_index >> 2) - 1);
-                gen_helper_fpop();
+                gen_helper_fpop(cpu_env);
                 break;
             default:
                 goto illegal_op;
@@ -5643,22 +5862,22 @@
 
             switch(op) {
             case 0x08: /* fld sti */
-                gen_helper_fpush();
-                gen_helper_fmov_ST0_STN(tcg_const_i32((opreg + 1) & 7));
+                gen_helper_fpush(cpu_env);
+                gen_helper_fmov_ST0_STN(cpu_env,
+                                        tcg_const_i32((opreg + 1) & 7));
                 break;
             case 0x09: /* fxchg sti */
             case 0x29: /* fxchg4 sti, undocumented op */
             case 0x39: /* fxchg7 sti, undocumented op */
-                gen_helper_fxchg_ST0_STN(tcg_const_i32(opreg));
+                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
                 break;
             case 0x0a: /* grp d9/2 */
                 switch(rm) {
                 case 0: /* fnop */
                     /* check exceptions (FreeBSD FPU probe) */
-                    if (s->cc_op != CC_OP_DYNAMIC)
-                        gen_op_set_cc_op(s->cc_op);
+                    gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_helper_fwait();
+                    gen_helper_fwait(cpu_env);
                     break;
                 default:
                     goto illegal_op;
@@ -5667,17 +5886,17 @@
             case 0x0c: /* grp d9/4 */
                 switch(rm) {
                 case 0: /* fchs */
-                    gen_helper_fchs_ST0();
+                    gen_helper_fchs_ST0(cpu_env);
                     break;
                 case 1: /* fabs */
-                    gen_helper_fabs_ST0();
+                    gen_helper_fabs_ST0(cpu_env);
                     break;
                 case 4: /* ftst */
-                    gen_helper_fldz_FT0();
-                    gen_helper_fcom_ST0_FT0();
+                    gen_helper_fldz_FT0(cpu_env);
+                    gen_helper_fcom_ST0_FT0(cpu_env);
                     break;
                 case 5: /* fxam */
-                    gen_helper_fxam_ST0();
+                    gen_helper_fxam_ST0(cpu_env);
                     break;
                 default:
                     goto illegal_op;
@@ -5687,32 +5906,32 @@
                 {
                     switch(rm) {
                     case 0:
-                        gen_helper_fpush();
-                        gen_helper_fld1_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fld1_ST0(cpu_env);
                         break;
                     case 1:
-                        gen_helper_fpush();
-                        gen_helper_fldl2t_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fldl2t_ST0(cpu_env);
                         break;
                     case 2:
-                        gen_helper_fpush();
-                        gen_helper_fldl2e_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fldl2e_ST0(cpu_env);
                         break;
                     case 3:
-                        gen_helper_fpush();
-                        gen_helper_fldpi_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fldpi_ST0(cpu_env);
                         break;
                     case 4:
-                        gen_helper_fpush();
-                        gen_helper_fldlg2_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fldlg2_ST0(cpu_env);
                         break;
                     case 5:
-                        gen_helper_fpush();
-                        gen_helper_fldln2_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fldln2_ST0(cpu_env);
                         break;
                     case 6:
-                        gen_helper_fpush();
-                        gen_helper_fldz_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fldz_ST0(cpu_env);
                         break;
                     default:
                         goto illegal_op;
@@ -5722,58 +5941,58 @@
             case 0x0e: /* grp d9/6 */
                 switch(rm) {
                 case 0: /* f2xm1 */
-                    gen_helper_f2xm1();
+                    gen_helper_f2xm1(cpu_env);
                     break;
                 case 1: /* fyl2x */
-                    gen_helper_fyl2x();
+                    gen_helper_fyl2x(cpu_env);
                     break;
                 case 2: /* fptan */
-                    gen_helper_fptan();
+                    gen_helper_fptan(cpu_env);
                     break;
                 case 3: /* fpatan */
-                    gen_helper_fpatan();
+                    gen_helper_fpatan(cpu_env);
                     break;
                 case 4: /* fxtract */
-                    gen_helper_fxtract();
+                    gen_helper_fxtract(cpu_env);
                     break;
                 case 5: /* fprem1 */
-                    gen_helper_fprem1();
+                    gen_helper_fprem1(cpu_env);
                     break;
                 case 6: /* fdecstp */
-                    gen_helper_fdecstp();
+                    gen_helper_fdecstp(cpu_env);
                     break;
                 default:
                 case 7: /* fincstp */
-                    gen_helper_fincstp();
+                    gen_helper_fincstp(cpu_env);
                     break;
                 }
                 break;
             case 0x0f: /* grp d9/7 */
                 switch(rm) {
                 case 0: /* fprem */
-                    gen_helper_fprem();
+                    gen_helper_fprem(cpu_env);
                     break;
                 case 1: /* fyl2xp1 */
-                    gen_helper_fyl2xp1();
+                    gen_helper_fyl2xp1(cpu_env);
                     break;
                 case 2: /* fsqrt */
-                    gen_helper_fsqrt();
+                    gen_helper_fsqrt(cpu_env);
                     break;
                 case 3: /* fsincos */
-                    gen_helper_fsincos();
+                    gen_helper_fsincos(cpu_env);
                     break;
                 case 5: /* fscale */
-                    gen_helper_fscale();
+                    gen_helper_fscale(cpu_env);
                     break;
                 case 4: /* frndint */
-                    gen_helper_frndint();
+                    gen_helper_frndint(cpu_env);
                     break;
                 case 6: /* fsin */
-                    gen_helper_fsin();
+                    gen_helper_fsin(cpu_env);
                     break;
                 default:
                 case 7: /* fcos */
-                    gen_helper_fcos();
+                    gen_helper_fcos(cpu_env);
                     break;
                 }
                 break;
@@ -5787,32 +6006,32 @@
                     if (op >= 0x20) {
                         gen_helper_fp_arith_STN_ST0(op1, opreg);
                         if (op >= 0x30)
-                            gen_helper_fpop();
+                            gen_helper_fpop(cpu_env);
                     } else {
-                        gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
                         gen_helper_fp_arith_ST0_FT0(op1);
                     }
                 }
                 break;
             case 0x02: /* fcom */
             case 0x22: /* fcom2, undocumented op */
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fcom_ST0_FT0();
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fcom_ST0_FT0(cpu_env);
                 break;
             case 0x03: /* fcomp */
             case 0x23: /* fcomp3, undocumented op */
             case 0x32: /* fcomp5, undocumented op */
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fcom_ST0_FT0();
-                gen_helper_fpop();
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fcom_ST0_FT0(cpu_env);
+                gen_helper_fpop(cpu_env);
                 break;
             case 0x15: /* da/5 */
                 switch(rm) {
                 case 1: /* fucompp */
-                    gen_helper_fmov_FT0_STN(tcg_const_i32(1));
-                    gen_helper_fucom_ST0_FT0();
-                    gen_helper_fpop();
-                    gen_helper_fpop();
+                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
+                    gen_helper_fucom_ST0_FT0(cpu_env);
+                    gen_helper_fpop(cpu_env);
+                    gen_helper_fpop(cpu_env);
                     break;
                 default:
                     goto illegal_op;
@@ -5825,10 +6044,10 @@
                 case 1: /* fdisi (287 only, just do nop here) */
                     break;
                 case 2: /* fclex */
-                    gen_helper_fclex();
+                    gen_helper_fclex(cpu_env);
                     break;
                 case 3: /* fninit */
-                    gen_helper_fninit();
+                    gen_helper_fninit(cpu_env);
                     break;
                 case 4: /* fsetpm (287 only, just do nop here) */
                     break;
@@ -5837,61 +6056,59 @@
                 }
                 break;
             case 0x1d: /* fucomi */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fucomi_ST0_FT0();
-                s->cc_op = CC_OP_EFLAGS;
+                gen_update_cc_op(s);
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fucomi_ST0_FT0(cpu_env);
+                set_cc_op(s, CC_OP_EFLAGS);
                 break;
             case 0x1e: /* fcomi */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fcomi_ST0_FT0();
-                s->cc_op = CC_OP_EFLAGS;
+                gen_update_cc_op(s);
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fcomi_ST0_FT0(cpu_env);
+                set_cc_op(s, CC_OP_EFLAGS);
                 break;
             case 0x28: /* ffree sti */
-                gen_helper_ffree_STN(tcg_const_i32(opreg));
+                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
                 break;
             case 0x2a: /* fst sti */
-                gen_helper_fmov_STN_ST0(tcg_const_i32(opreg));
+                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
                 break;
             case 0x2b: /* fstp sti */
             case 0x0b: /* fstp1 sti, undocumented op */
             case 0x3a: /* fstp8 sti, undocumented op */
             case 0x3b: /* fstp9 sti, undocumented op */
-                gen_helper_fmov_STN_ST0(tcg_const_i32(opreg));
-                gen_helper_fpop();
+                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fpop(cpu_env);
                 break;
             case 0x2c: /* fucom st(i) */
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fucom_ST0_FT0();
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fucom_ST0_FT0(cpu_env);
                 break;
             case 0x2d: /* fucomp st(i) */
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fucom_ST0_FT0();
-                gen_helper_fpop();
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fucom_ST0_FT0(cpu_env);
+                gen_helper_fpop(cpu_env);
                 break;
             case 0x33: /* de/3 */
                 switch(rm) {
                 case 1: /* fcompp */
-                    gen_helper_fmov_FT0_STN(tcg_const_i32(1));
-                    gen_helper_fcom_ST0_FT0();
-                    gen_helper_fpop();
-                    gen_helper_fpop();
+                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
+                    gen_helper_fcom_ST0_FT0(cpu_env);
+                    gen_helper_fpop(cpu_env);
+                    gen_helper_fpop(cpu_env);
                     break;
                 default:
                     goto illegal_op;
                 }
                 break;
             case 0x38: /* ffreep sti, undocumented op */
-                gen_helper_ffree_STN(tcg_const_i32(opreg));
-                gen_helper_fpop();
+                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fpop(cpu_env);
                 break;
             case 0x3c: /* df/4 */
                 switch(rm) {
                 case 0:
-                    gen_helper_fnstsw(cpu_tmp2_i32);
+                    gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
                     tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                     gen_op_mov_reg_T0(OT_WORD, R_EAX);
                     break;
@@ -5900,20 +6117,18 @@
                 }
                 break;
             case 0x3d: /* fucomip */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fucomi_ST0_FT0();
-                gen_helper_fpop();
-                s->cc_op = CC_OP_EFLAGS;
+                gen_update_cc_op(s);
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fucomi_ST0_FT0(cpu_env);
+                gen_helper_fpop(cpu_env);
+                set_cc_op(s, CC_OP_EFLAGS);
                 break;
             case 0x3e: /* fcomip */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fcomi_ST0_FT0();
-                gen_helper_fpop();
-                s->cc_op = CC_OP_EFLAGS;
+                gen_update_cc_op(s);
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fcomi_ST0_FT0(cpu_env);
+                gen_helper_fpop(cpu_env);
+                set_cc_op(s, CC_OP_EFLAGS);
                 break;
             case 0x10 ... 0x13: /* fcmovxx */
             case 0x18 ... 0x1b:
@@ -5927,8 +6142,8 @@
                     };
                     op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
                     l1 = gen_new_label();
-                    gen_jcc1(s, s->cc_op, op1, l1);
-                    gen_helper_fmov_ST0_STN(tcg_const_i32(opreg));
+                    gen_jcc1(s, op1, l1);
+                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
                     gen_set_label(l1);
                 }
                 break;
@@ -5991,7 +6206,7 @@
             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
         } else {
             gen_scas(s, ot);
-            s->cc_op = CC_OP_SUBB + ot;
+            set_cc_op(s, CC_OP_SUBB + ot);
         }
         break;
 
@@ -6007,7 +6222,7 @@
             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
         } else {
             gen_cmps(s, ot);
-            s->cc_op = CC_OP_SUBB + ot;
+            set_cc_op(s, CC_OP_SUBB + ot);
         }
         break;
     case 0x6c: /* insS */
@@ -6018,7 +6233,7 @@
             ot = dflag ? OT_LONG : OT_WORD;
         gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
         gen_op_andl_T0_ffff();
-        gen_check_io(s, ot, pc_start - s->cs_base,
+        gen_check_io(s, ot, pc_start - s->cs_base, 
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
@@ -6058,7 +6273,7 @@
             ot = OT_BYTE;
         else
             ot = dflag ? OT_LONG : OT_WORD;
-        val = ldub_code(s->pc++);
+        val = cpu_ldub_code(env, s->pc++);
         gen_op_movl_T0_im(val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
@@ -6078,7 +6293,7 @@
             ot = OT_BYTE;
         else
             ot = dflag ? OT_LONG : OT_WORD;
-        val = ldub_code(s->pc++);
+        val = cpu_ldub_code(env, s->pc++);
         gen_op_movl_T0_im(val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
@@ -6142,7 +6357,7 @@
         /************************/
         /* control */
     case 0xc2: /* ret im */
-        val = ldsw_code(s->pc);
+        val = cpu_ldsw_code(env, s->pc);
         s->pc += 2;
         gen_pop_T0(s);
         if (CODE64(s) && s->dflag)
@@ -6162,14 +6377,13 @@
         gen_eob(s);
         break;
     case 0xca: /* lret im */
-        val = ldsw_code(s->pc);
+        val = cpu_ldsw_code(env, s->pc);
         s->pc += 2;
     do_lret:
         if (s->pe && !s->vm86) {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_lret_protected(tcg_const_i32(s->dflag),
+            gen_helper_lret_protected(cpu_env, tcg_const_i32(s->dflag),
                                       tcg_const_i32(val));
         } else {
             gen_stack_A0(s);
@@ -6196,31 +6410,30 @@
         gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
         if (!s->pe) {
             /* real mode */
-            gen_helper_iret_real(tcg_const_i32(s->dflag));
-            s->cc_op = CC_OP_EFLAGS;
+            gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag));
+            set_cc_op(s, CC_OP_EFLAGS);
         } else if (s->vm86) {
             if (s->iopl != 3) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
-                gen_helper_iret_real(tcg_const_i32(s->dflag));
-                s->cc_op = CC_OP_EFLAGS;
+                gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag));
+                set_cc_op(s, CC_OP_EFLAGS);
             }
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_iret_protected(tcg_const_i32(s->dflag),
+            gen_helper_iret_protected(cpu_env, tcg_const_i32(s->dflag),
                                       tcg_const_i32(s->pc - s->cs_base));
-            s->cc_op = CC_OP_EFLAGS;
+            set_cc_op(s, CC_OP_EFLAGS);
         }
         gen_eob(s);
         break;
     case 0xe8: /* call im */
         {
             if (dflag)
-                tval = (int32_t)insn_get(s, OT_LONG);
+                tval = (int32_t)insn_get(env, s, OT_LONG);
             else
-                tval = (int16_t)insn_get(s, OT_WORD);
+                tval = (int16_t)insn_get(env, s, OT_WORD);
             next_eip = s->pc - s->cs_base;
             tval += next_eip;
             if (s->dflag == 0)
@@ -6237,8 +6450,8 @@
             if (CODE64(s))
                 goto illegal_op;
             ot = dflag ? OT_LONG : OT_WORD;
-            offset = insn_get(s, ot);
-            selector = insn_get(s, OT_WORD);
+            offset = insn_get(env, s, ot);
+            selector = insn_get(env, s, OT_WORD);
 
             gen_op_movl_T0_im(selector);
             gen_op_movl_T1_imu(offset);
@@ -6246,9 +6459,9 @@
         goto do_lcall;
     case 0xe9: /* jmp im */
         if (dflag)
-            tval = (int32_t)insn_get(s, OT_LONG);
+            tval = (int32_t)insn_get(env, s, OT_LONG);
         else
-            tval = (int16_t)insn_get(s, OT_WORD);
+            tval = (int16_t)insn_get(env, s, OT_WORD);
         tval += s->pc - s->cs_base;
         if (s->dflag == 0)
             tval &= 0xffff;
@@ -6263,28 +6476,28 @@
             if (CODE64(s))
                 goto illegal_op;
             ot = dflag ? OT_LONG : OT_WORD;
-            offset = insn_get(s, ot);
-            selector = insn_get(s, OT_WORD);
+            offset = insn_get(env, s, ot);
+            selector = insn_get(env, s, OT_WORD);
 
             gen_op_movl_T0_im(selector);
             gen_op_movl_T1_imu(offset);
         }
         goto do_ljmp;
     case 0xeb: /* jmp Jb */
-        tval = (int8_t)insn_get(s, OT_BYTE);
+        tval = (int8_t)insn_get(env, s, OT_BYTE);
         tval += s->pc - s->cs_base;
         if (s->dflag == 0)
             tval &= 0xffff;
         gen_jmp(s, tval);
         break;
     case 0x70 ... 0x7f: /* jcc Jb */
-        tval = (int8_t)insn_get(s, OT_BYTE);
+        tval = (int8_t)insn_get(env, s, OT_BYTE);
         goto do_jcc;
     case 0x180 ... 0x18f: /* jcc Jv */
         if (dflag) {
-            tval = (int32_t)insn_get(s, OT_LONG);
+            tval = (int32_t)insn_get(env, s, OT_LONG);
         } else {
-            tval = (int16_t)insn_get(s, OT_WORD);
+            tval = (int16_t)insn_get(env, s, OT_WORD);
         }
     do_jcc:
         next_eip = s->pc - s->cs_base;
@@ -6295,9 +6508,9 @@
         break;
 
     case 0x190 ... 0x19f: /* setcc Gv */
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         gen_setcc(s, b);
-        gen_ldst_modrm(s, modrm, OT_BYTE, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, OT_BYTE, OR_TMP0, 1);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         {
@@ -6305,12 +6518,12 @@
             TCGv t0;
 
             ot = dflag + OT_WORD;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             t0 = tcg_temp_local_new();
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -6320,7 +6533,7 @@
             if (ot == OT_LONG) {
                 /* XXX: specific Intel behaviour ? */
                 l1 = gen_new_label();
-                gen_jcc1(s, s->cc_op, b ^ 1, l1);
+                gen_jcc1(s, b ^ 1, l1);
                 tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUX86State, regs[reg]) + REG_L_OFFSET);
                 gen_set_label(l1);
                 tcg_gen_movi_tl(cpu_tmp0, 0);
@@ -6329,7 +6542,7 @@
 #endif
             {
                 l1 = gen_new_label();
-                gen_jcc1(s, s->cc_op, b ^ 1, l1);
+                gen_jcc1(s, b ^ 1, l1);
                 gen_op_mov_reg_v(ot, reg, t0);
                 gen_set_label(l1);
             }
@@ -6344,9 +6557,8 @@
         if (s->vm86 && s->iopl != 3) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
-            gen_helper_read_eflags(cpu_T[0]);
+            gen_update_cc_op(s);
+            gen_helper_read_eflags(cpu_T[0], cpu_env);
             gen_push_T0(s);
         }
         break;
@@ -6358,33 +6570,51 @@
             gen_pop_T0(s);
             if (s->cpl == 0) {
                 if (s->dflag) {
-                    gen_helper_write_eflags(cpu_T[0],
-                                       tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK)));
+                    gen_helper_write_eflags(cpu_env, cpu_T[0],
+                                            tcg_const_i32((TF_MASK | AC_MASK |
+                                                           ID_MASK | NT_MASK |
+                                                           IF_MASK |
+                                                           IOPL_MASK)));
                 } else {
-                    gen_helper_write_eflags(cpu_T[0],
-                                       tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK) & 0xffff));
+                    gen_helper_write_eflags(cpu_env, cpu_T[0],
+                                            tcg_const_i32((TF_MASK | AC_MASK |
+                                                           ID_MASK | NT_MASK |
+                                                           IF_MASK | IOPL_MASK)
+                                                          & 0xffff));
                 }
             } else {
                 if (s->cpl <= s->iopl) {
                     if (s->dflag) {
-                        gen_helper_write_eflags(cpu_T[0],
-                                           tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK)));
+                        gen_helper_write_eflags(cpu_env, cpu_T[0],
+                                                tcg_const_i32((TF_MASK |
+                                                               AC_MASK |
+                                                               ID_MASK |
+                                                               NT_MASK |
+                                                               IF_MASK)));
                     } else {
-                        gen_helper_write_eflags(cpu_T[0],
-                                           tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK) & 0xffff));
+                        gen_helper_write_eflags(cpu_env, cpu_T[0],
+                                                tcg_const_i32((TF_MASK |
+                                                               AC_MASK |
+                                                               ID_MASK |
+                                                               NT_MASK |
+                                                               IF_MASK)
+                                                              & 0xffff));
                     }
                 } else {
                     if (s->dflag) {
-                        gen_helper_write_eflags(cpu_T[0],
-                                           tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK)));
+                        gen_helper_write_eflags(cpu_env, cpu_T[0],
+                                           tcg_const_i32((TF_MASK | AC_MASK |
+                                                          ID_MASK | NT_MASK)));
                     } else {
-                        gen_helper_write_eflags(cpu_T[0],
-                                           tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff));
+                        gen_helper_write_eflags(cpu_env, cpu_T[0],
+                                           tcg_const_i32((TF_MASK | AC_MASK |
+                                                          ID_MASK | NT_MASK)
+                                                         & 0xffff));
                     }
                 }
             }
             gen_pop_update(s);
-            s->cc_op = CC_OP_EFLAGS;
+            set_cc_op(s, CC_OP_EFLAGS);
             /* abort translation because TF flag may change */
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
@@ -6394,44 +6624,30 @@
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
         gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_compute_eflags(cpu_cc_src);
+        gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
-        s->cc_op = CC_OP_EFLAGS;
         break;
     case 0x9f: /* lahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_compute_eflags(cpu_T[0]);
+        gen_compute_eflags(s);
         /* Note: gen_compute_eflags() only gives the condition codes */
-        tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02);
+        tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02);
         gen_op_mov_reg_T0(OT_BYTE, R_AH);
         break;
     case 0xf5: /* cmc */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_compute_eflags(cpu_cc_src);
+        gen_compute_eflags(s);
         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
-        s->cc_op = CC_OP_EFLAGS;
         break;
     case 0xf8: /* clc */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_compute_eflags(cpu_cc_src);
+        gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
-        s->cc_op = CC_OP_EFLAGS;
         break;
     case 0xf9: /* stc */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_compute_eflags(cpu_cc_src);
+        gen_compute_eflags(s);
         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
-        s->cc_op = CC_OP_EFLAGS;
         break;
     case 0xfc: /* cld */
         tcg_gen_movi_i32(cpu_tmp2_i32, 1);
@@ -6446,19 +6662,19 @@
         /* bit operations */
     case 0x1ba: /* bt/bts/btr/btc Gv, im */
         ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         op = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         if (mod != 3) {
             s->rip_offset = 1;
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_op_ld_T0_A0(ot + s->mem_index);
         } else {
             gen_op_mov_TN_reg(ot, 0, rm);
         }
         /* load shift */
-        val = ldub_code(s->pc++);
+        val = cpu_ldub_code(env, s->pc++);
         gen_op_movl_T1_im(val);
         if (op < 4)
             goto illegal_op;
@@ -6477,13 +6693,13 @@
         op = 3;
     do_btx:
         ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         gen_op_mov_TN_reg(OT_LONG, 1, reg);
         if (mod != 3) {
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             /* specific case: we need to add a displacement */
             gen_exts(ot, cpu_T[1]);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
@@ -6521,7 +6737,7 @@
             tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
             break;
         }
-        s->cc_op = CC_OP_SARB + ot;
+        set_cc_op(s, CC_OP_SARB + ot);
         if (op != 0) {
             if (mod != 3)
                 gen_op_st_T0_A0(ot + s->mem_index);
@@ -6538,9 +6754,9 @@
             TCGv t0;
 
             ot = dflag + OT_WORD;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
             gen_extu(ot, cpu_T[0]);
             label1 = gen_new_label();
             tcg_gen_movi_tl(cpu_cc_dst, 0);
@@ -6555,8 +6771,7 @@
             gen_op_mov_reg_T0(ot, reg);
             tcg_gen_movi_tl(cpu_cc_dst, 1);
             gen_set_label(label1);
-            tcg_gen_discard_tl(cpu_cc_src);
-            s->cc_op = CC_OP_LOGICB + ot;
+            set_cc_op(s, CC_OP_LOGICB + ot);
             tcg_temp_free(t0);
         }
         break;
@@ -6565,60 +6780,57 @@
     case 0x27: /* daa */
         if (CODE64(s))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_helper_daa();
-        s->cc_op = CC_OP_EFLAGS;
+        gen_update_cc_op(s);
+        gen_helper_daa(cpu_env);
+        set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0x2f: /* das */
         if (CODE64(s))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_helper_das();
-        s->cc_op = CC_OP_EFLAGS;
+        gen_update_cc_op(s);
+        gen_helper_das(cpu_env);
+        set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0x37: /* aaa */
         if (CODE64(s))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_helper_aaa();
-        s->cc_op = CC_OP_EFLAGS;
+        gen_update_cc_op(s);
+        gen_helper_aaa(cpu_env);
+        set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0x3f: /* aas */
         if (CODE64(s))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_helper_aas();
-        s->cc_op = CC_OP_EFLAGS;
+        gen_update_cc_op(s);
+        gen_helper_aas(cpu_env);
+        set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0xd4: /* aam */
         if (CODE64(s))
             goto illegal_op;
-        val = ldub_code(s->pc++);
+        val = cpu_ldub_code(env, s->pc++);
         if (val == 0) {
             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
         } else {
-            gen_helper_aam(tcg_const_i32(val));
-            s->cc_op = CC_OP_LOGICB;
+            gen_helper_aam(cpu_env, tcg_const_i32(val));
+            set_cc_op(s, CC_OP_LOGICB);
         }
         break;
     case 0xd5: /* aad */
         if (CODE64(s))
             goto illegal_op;
-        val = ldub_code(s->pc++);
-        gen_helper_aad(tcg_const_i32(val));
-        s->cc_op = CC_OP_LOGICB;
+        val = cpu_ldub_code(env, s->pc++);
+        gen_helper_aad(cpu_env, tcg_const_i32(val));
+        set_cc_op(s, CC_OP_LOGICB);
         break;
         /************************/
         /* misc */
     case 0x90: /* nop */
         /* XXX: xchg + rex handling */
         /* XXX: correct lock test for all insn */
-        if (prefixes & PREFIX_LOCK)
+        if (prefixes & PREFIX_LOCK) {
             goto illegal_op;
+        }
         if (prefixes & PREFIX_REPZ) {
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_PAUSE);
         }
@@ -6628,17 +6840,16 @@
             (HF_MP_MASK | HF_TS_MASK)) {
             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fwait();
+            gen_helper_fwait(cpu_env);
         }
         break;
     case 0xcc: /* int3 */
         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
         break;
     case 0xcd: /* int N */
-        val = ldub_code(s->pc++);
+        val = cpu_ldub_code(env, s->pc++);
         if (s->vm86 && s->iopl != 3) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
@@ -6648,10 +6859,9 @@
     case 0xce: /* into */
         if (CODE64(s))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_jmp_im(pc_start - s->cs_base);
-        gen_helper_into(tcg_const_i32(s->pc - pc_start));
+        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
         break;
 #ifdef WANT_ICEBP
     case 0xf1: /* icebp (undocumented, exits to external debugger) */
@@ -6661,20 +6871,20 @@
 #else
         /* start debug */
         tb_flush(cpu_single_env);
-        cpu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
+        qemu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
 #endif
         break;
 #endif
     case 0xfa: /* cli */
         if (!s->vm86) {
             if (s->cpl <= s->iopl) {
-                gen_helper_cli();
+                gen_helper_cli(cpu_env);
             } else {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             }
         } else {
             if (s->iopl == 3) {
-                gen_helper_cli();
+                gen_helper_cli(cpu_env);
             } else {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             }
@@ -6684,12 +6894,12 @@
         if (!s->vm86) {
             if (s->cpl <= s->iopl) {
             gen_sti:
-                gen_helper_sti();
+                gen_helper_sti(cpu_env);
                 /* interruptions are enabled only the first insn after sti */
                 /* If several instructions disable interrupts, only the
                    _first_ does it */
                 if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
-                    gen_helper_set_inhibit_irq();
+                    gen_helper_set_inhibit_irq(cpu_env);
                 /* give a chance to handle pending irqs */
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
@@ -6708,19 +6918,20 @@
         if (CODE64(s))
             goto illegal_op;
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(cpu_single_env, s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
         gen_op_mov_TN_reg(ot, 0, reg);
-        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-        if (ot == OT_WORD)
-            gen_helper_boundw(cpu_A0, cpu_tmp2_i32);
-        else
-            gen_helper_boundl(cpu_A0, cpu_tmp2_i32);
+        if (ot == OT_WORD) {
+            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
+        } else {
+            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
+        }
         break;
     case 0x1c8 ... 0x1cf: /* bswap reg */
         reg = (b & 7) | REX_B(s);
@@ -6741,9 +6952,7 @@
     case 0xd6: /* salc */
         if (CODE64(s))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_compute_eflags_c(cpu_T[0]);
+        gen_compute_eflags_c(s, cpu_T[0], false);
         tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
         gen_op_mov_reg_T0(OT_BYTE, R_EAX);
         break;
@@ -6754,7 +6963,7 @@
         {
             int l1, l2, l3;
 
-            tval = (int8_t)insn_get(s, OT_BYTE);
+            tval = (int8_t)insn_get(env, s, OT_BYTE);
             next_eip = s->pc - s->cs_base;
             tval += next_eip;
             if (s->dflag == 0)
@@ -6767,17 +6976,9 @@
             switch(b) {
             case 0: /* loopnz */
             case 1: /* loopz */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
                 gen_op_add_reg_im(s->aflag, R_ECX, -1);
                 gen_op_jz_ecx(s->aflag, l3);
-                gen_compute_eflags(cpu_tmp0);
-                tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z);
-                if (b == 0) {
-                    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
-                } else {
-                    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, l1);
-                }
+                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
                 break;
             case 2: /* loop */
                 gen_op_add_reg_im(s->aflag, R_ECX, -1);
@@ -6804,33 +7005,30 @@
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
             if (b & 2) {
-                gen_helper_rdmsr();
+                gen_helper_rdmsr(cpu_env);
             } else {
-                gen_helper_wrmsr();
+                gen_helper_wrmsr(cpu_env);
             }
         }
         break;
     case 0x131: /* rdtsc */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_jmp_im(pc_start - s->cs_base);
         if (use_icount)
             gen_io_start();
-        gen_helper_rdtsc();
+        gen_helper_rdtsc(cpu_env);
         if (use_icount) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
         }
         break;
     case 0x133: /* rdpmc */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_jmp_im(pc_start - s->cs_base);
-        gen_helper_rdpmc();
+        gen_helper_rdpmc(cpu_env);
         break;
     case 0x134: /* sysenter */
         /* For Intel SYSENTER is valid on 64-bit */
@@ -6839,12 +7037,9 @@
         if (!s->pe) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC) {
-                gen_op_set_cc_op(s->cc_op);
-                s->cc_op = CC_OP_DYNAMIC;
-            }
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_sysenter();
+            gen_helper_sysenter(cpu_env);
             gen_eob(s);
         }
         break;
@@ -6855,62 +7050,51 @@
         if (!s->pe) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC) {
-                gen_op_set_cc_op(s->cc_op);
-                s->cc_op = CC_OP_DYNAMIC;
-            }
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_sysexit(tcg_const_i32(dflag));
+            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag));
             gen_eob(s);
         }
         break;
 #ifdef TARGET_X86_64
     case 0x105: /* syscall */
         /* XXX: is it usable in real mode ? */
-        if (s->cc_op != CC_OP_DYNAMIC) {
-            gen_op_set_cc_op(s->cc_op);
-            s->cc_op = CC_OP_DYNAMIC;
-        }
+        gen_update_cc_op(s);
         gen_jmp_im(pc_start - s->cs_base);
-        gen_helper_syscall(tcg_const_i32(s->pc - pc_start));
+        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
         gen_eob(s);
         break;
     case 0x107: /* sysret */
         if (!s->pe) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC) {
-                gen_op_set_cc_op(s->cc_op);
-                s->cc_op = CC_OP_DYNAMIC;
-            }
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_sysret(tcg_const_i32(s->dflag));
+            gen_helper_sysret(cpu_env, tcg_const_i32(s->dflag));
             /* condition codes are modified only in long mode */
             if (s->lma)
-                s->cc_op = CC_OP_EFLAGS;
+                set_cc_op(s, CC_OP_EFLAGS);
             gen_eob(s);
         }
         break;
 #endif
     case 0x1a2: /* cpuid */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_jmp_im(pc_start - s->cs_base);
-        gen_helper_cpuid();
+        gen_helper_cpuid(cpu_env);
         break;
     case 0xf4: /* hlt */
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_hlt(tcg_const_i32(s->pc - pc_start));
+            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->is_jmp = 3;
         }
         break;
     case 0x100:
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         switch(op) {
@@ -6922,7 +7106,7 @@
             ot = OT_WORD;
             if (mod == 3)
                 ot += s->dflag;
-            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             break;
         case 2: /* lldt */
             if (!s->pe || s->vm86)
@@ -6931,10 +7115,10 @@
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
-                gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_lldt(cpu_tmp2_i32);
+                gen_helper_lldt(cpu_env, cpu_tmp2_i32);
             }
             break;
         case 1: /* str */
@@ -6945,7 +7129,7 @@
             ot = OT_WORD;
             if (mod == 3)
                 ot += s->dflag;
-            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             break;
         case 3: /* ltr */
             if (!s->pe || s->vm86)
@@ -6954,31 +7138,31 @@
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
-                gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_ltr(cpu_tmp2_i32);
+                gen_helper_ltr(cpu_env, cpu_tmp2_i32);
             }
             break;
         case 4: /* verr */
         case 5: /* verw */
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
-            if (op == 4)
-                gen_helper_verr(cpu_T[0]);
-            else
-                gen_helper_verw(cpu_T[0]);
-            s->cc_op = CC_OP_EFLAGS;
+            gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
+            gen_update_cc_op(s);
+            if (op == 4) {
+                gen_helper_verr(cpu_env, cpu_T[0]);
+            } else {
+                gen_helper_verw(cpu_env, cpu_T[0]);
+            }
+            set_cc_op(s, CC_OP_EFLAGS);
             break;
         default:
             goto illegal_op;
         }
         break;
     case 0x101:
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         rm = modrm & 7;
@@ -6987,7 +7171,7 @@
             if (mod == 3)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit));
             gen_op_st_T0_A0(OT_WORD + s->mem_index);
             gen_add_A0_im(s, 2);
@@ -7003,8 +7187,7 @@
                     if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
                         s->cpl != 0)
                         goto illegal_op;
-                    if (s->cc_op != CC_OP_DYNAMIC)
-                        gen_op_set_cc_op(s->cc_op);
+                    gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
 #ifdef TARGET_X86_64
                     if (s->aflag == 2) {
@@ -7017,18 +7200,16 @@
                             gen_op_andl_A0_ffff();
                     }
                     gen_add_A0_ds_seg(s);
-                    gen_helper_monitor(cpu_A0);
+                    gen_helper_monitor(cpu_env, cpu_A0);
                     break;
                 case 1: /* mwait */
                     if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
                         s->cpl != 0)
                         goto illegal_op;
-                    if (s->cc_op != CC_OP_DYNAMIC) {
-                        gen_op_set_cc_op(s->cc_op);
-                        s->cc_op = CC_OP_DYNAMIC;
-                    }
+                    gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_helper_mwait(tcg_const_i32(s->pc - pc_start));
+                    gen_helper_mwait(cpu_env,
+                                     tcg_const_i32(s->pc - pc_start));
                     gen_eob(s);
                     break;
                 default:
@@ -7036,7 +7217,7 @@
                 }
             } else { /* sidt */
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit));
                 gen_op_st_T0_A0(OT_WORD + s->mem_index);
                 gen_add_A0_im(s, 2);
@@ -7049,8 +7230,7 @@
         case 2: /* lgdt */
         case 3: /* lidt */
             if (mod == 3) {
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 switch(rm) {
                 case 0: /* VMRUN */
@@ -7060,7 +7240,7 @@
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_vmrun(tcg_const_i32(s->aflag),
+                        gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag),
                                          tcg_const_i32(s->pc - pc_start));
                         tcg_gen_exit_tb(0);
                         s->is_jmp = 3;
@@ -7069,7 +7249,7 @@
                 case 1: /* VMMCALL */
                     if (!(s->flags & HF_SVME_MASK))
                         goto illegal_op;
-                    gen_helper_vmmcall();
+                    gen_helper_vmmcall(cpu_env);
                     break;
                 case 2: /* VMLOAD */
                     if (!(s->flags & HF_SVME_MASK) || !s->pe)
@@ -7078,7 +7258,7 @@
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_vmload(tcg_const_i32(s->aflag));
+                        gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag));
                     }
                     break;
                 case 3: /* VMSAVE */
@@ -7088,19 +7268,19 @@
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_vmsave(tcg_const_i32(s->aflag));
+                        gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag));
                     }
                     break;
                 case 4: /* STGI */
                     if ((!(s->flags & HF_SVME_MASK) &&
-                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
+                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
                         !s->pe)
                         goto illegal_op;
                     if (s->cpl != 0) {
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_stgi();
+                        gen_helper_stgi(cpu_env);
                     }
                     break;
                 case 5: /* CLGI */
@@ -7110,15 +7290,15 @@
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_clgi();
+                        gen_helper_clgi(cpu_env);
                     }
                     break;
                 case 6: /* SKINIT */
-                    if ((!(s->flags & HF_SVME_MASK) &&
-                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
+                    if ((!(s->flags & HF_SVME_MASK) && 
+                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
                         !s->pe)
                         goto illegal_op;
-                    gen_helper_skinit();
+                    gen_helper_skinit(cpu_env);
                     break;
                 case 7: /* INVLPGA */
                     if (!(s->flags & HF_SVME_MASK) || !s->pe)
@@ -7127,7 +7307,7 @@
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_invlpga(tcg_const_i32(s->aflag));
+                        gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag));
                     }
                     break;
                 default:
@@ -7138,7 +7318,7 @@
             } else {
                 gen_svm_check_intercept(s, pc_start,
                                         op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE);
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_op_ld_T1_A0(OT_WORD + s->mem_index);
                 gen_add_A0_im(s, 2);
                 gen_op_ld_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
@@ -7160,15 +7340,15 @@
 #else
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
 #endif
-            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 1);
             break;
         case 6: /* lmsw */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-                gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
-                gen_helper_lmsw(cpu_T[0]);
+                gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_helper_lmsw(cpu_env, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
             }
@@ -7191,11 +7371,10 @@
                         goto illegal_op;
                     }
                 } else {
-                    if (s->cc_op != CC_OP_DYNAMIC)
-                        gen_op_set_cc_op(s->cc_op);
+                    gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-                    gen_helper_invlpg(cpu_A0);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                    gen_helper_invlpg(cpu_env, cpu_A0);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
                 }
@@ -7221,7 +7400,7 @@
             /* d_ot is the size of destination */
             d_ot = dflag + OT_WORD;
 
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             rm = (modrm & 7) | REX_B(s);
@@ -7233,7 +7412,7 @@
                     tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
                 gen_op_mov_reg_T0(d_ot, reg);
             } else {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 if (d_ot == OT_QUAD) {
                     gen_op_lds_T0_A0(OT_LONG + s->mem_index);
                 } else {
@@ -7253,12 +7432,12 @@
             t1 = tcg_temp_local_new();
             t2 = tcg_temp_local_new();
             ot = OT_WORD;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env,  s->pc++);
             reg = (modrm >> 3) & 7;
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
             } else {
                 gen_op_mov_v_reg(ot, t0, rm);
@@ -7278,12 +7457,9 @@
             } else {
                 gen_op_mov_reg_v(ot, rm, t0);
             }
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
-            gen_compute_eflags(cpu_cc_src);
+            gen_compute_eflags(s);
             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
-            s->cc_op = CC_OP_EFLAGS;
             tcg_temp_free(t0);
             tcg_temp_free(t1);
             tcg_temp_free(t2);
@@ -7297,27 +7473,27 @@
             if (!s->pe || s->vm86)
                 goto illegal_op;
             ot = dflag ? OT_LONG : OT_WORD;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env,  s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
             t0 = tcg_temp_local_new();
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
-            if (b == 0x102)
-                gen_helper_lar(t0, cpu_T[0]);
-            else
-                gen_helper_lsl(t0, cpu_T[0]);
+            gen_update_cc_op(s);
+            if (b == 0x102) {
+                gen_helper_lar(t0, cpu_env, cpu_T[0]);
+            } else {
+                gen_helper_lsl(t0, cpu_env, cpu_T[0]);
+            }
             tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
             label1 = gen_new_label();
             tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
             gen_op_mov_reg_v(ot, reg, t0);
             gen_set_label(label1);
-            s->cc_op = CC_OP_EFLAGS;
+            set_cc_op(s, CC_OP_EFLAGS);
             tcg_temp_free(t0);
         }
         break;
     case 0x118:
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         switch(op) {
@@ -7327,24 +7503,24 @@
         case 3: /* prefetchnt0 */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             /* nothing more to do */
             break;
         default: /* nop (multi byte) */
-            gen_nop_modrm(s, modrm);
+            gen_nop_modrm(env, s, modrm);
             break;
         }
         break;
     case 0x119 ... 0x11f: /* nop (multi byte) */
-        modrm = ldub_code(s->pc++);
-        gen_nop_modrm(s, modrm);
+        modrm = cpu_ldub_code(env, s->pc++);
+        gen_nop_modrm(env, s, modrm);
         break;
     case 0x120: /* mov reg, crN */
     case 0x122: /* mov crN, reg */
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             if ((modrm & 0xc0) != 0xc0)
                 goto illegal_op;
             rm = (modrm & 7) | REX_B(s);
@@ -7359,16 +7535,15 @@
             case 3:
             case 4:
             case 8:
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 if (b & 2) {
                     gen_op_mov_TN_reg(ot, 0, rm);
-                    gen_helper_write_crN(tcg_const_i32(reg), cpu_T[0]);
+                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg), cpu_T[0]);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
                 } else {
-                    gen_helper_read_crN(cpu_T[0], tcg_const_i32(reg));
+                    gen_helper_read_crN(cpu_T[0], cpu_env, tcg_const_i32(reg));
                     gen_op_mov_reg_T0(ot, rm);
                 }
                 break;
@@ -7382,7 +7557,7 @@
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             if ((modrm & 0xc0) != 0xc0)
                 goto illegal_op;
             rm = (modrm & 7) | REX_B(s);
@@ -7397,7 +7572,7 @@
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
                 gen_op_mov_TN_reg(ot, 0, rm);
-                gen_helper_movl_drN_T0(tcg_const_i32(reg), cpu_T[0]);
+                gen_helper_movl_drN_T0(cpu_env, tcg_const_i32(reg), cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
             } else {
@@ -7412,7 +7587,7 @@
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-            gen_helper_clts();
+            gen_helper_clts(cpu_env);
             /* abort block because static cpu state changed */
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
@@ -7423,16 +7598,16 @@
         if (!(s->cpuid_features & CPUID_SSE2))
             goto illegal_op;
         ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
         reg = ((modrm >> 3) & 7) | rex_r;
         /* generate a generic store */
-        gen_ldst_modrm(s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
         break;
     case 0x1ae:
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         switch(op) {
@@ -7444,11 +7619,10 @@
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxsave(cpu_A0, tcg_const_i32((s->dflag == 2)));
+            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32((s->dflag == 2)));
             break;
         case 1: /* fxrstor */
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
@@ -7458,11 +7632,10 @@
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxrstor(cpu_A0, tcg_const_i32((s->dflag == 2)));
+            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32((s->dflag == 2)));
             break;
         case 2: /* ldmxcsr */
         case 3: /* stmxcsr */
@@ -7473,7 +7646,7 @@
             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
                 mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             if (op == 2) {
                 gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
@@ -7497,7 +7670,7 @@
                 /* clflush */
                 if (!(s->cpuid_features & CPUID_CLFLUSH))
                     goto illegal_op;
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             }
             break;
         default:
@@ -7505,23 +7678,20 @@
         }
         break;
     case 0x10d: /* 3DNow! prefetch(w) */
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
         /* ignore for now */
         break;
     case 0x1aa: /* rsm */
         gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
         if (!(s->flags & HF_SMM_MASK))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC) {
-            gen_op_set_cc_op(s->cc_op);
-            s->cc_op = CC_OP_DYNAMIC;
-        }
+        gen_update_cc_op(s);
         gen_jmp_im(s->pc - s->cs_base);
-        gen_helper_rsm();
+        gen_helper_rsm(cpu_env);
         gen_eob(s);
         break;
     case 0x1b8: /* SSE4.2 popcnt */
@@ -7531,7 +7701,7 @@
         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
             goto illegal_op;
 
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7);
 
         if (s->prefix & PREFIX_DATA)
@@ -7541,11 +7711,11 @@
         else
             ot = OT_QUAD;
 
-        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
-        gen_helper_popcnt(cpu_T[0], cpu_T[0], tcg_const_i32(ot));
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
         gen_op_mov_reg_T0(ot, reg);
 
-        s->cc_op = CC_OP_EFLAGS;
+        set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0x10e ... 0x10f:
         /* 3DNow! instructions, ignore prefixes */
@@ -7558,7 +7728,7 @@
     case 0x1c2:
     case 0x1c4 ... 0x1c6:
     case 0x1d0 ... 0x1fe:
-        gen_sse(s, b, pc_start, rex_r);
+        gen_sse(env, s, b, pc_start, rex_r);
         break;
     default:
         goto illegal_op;
@@ -7589,15 +7759,9 @@
                                     "cc_src");
     cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_dst),
                                     "cc_dst");
-    cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_tmp),
-                                    "cc_tmp");
-
-    /* register helpers */
-#define GEN_HELPER 2
-#include "helper.h"
 }
 
-/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
+/* generate intermediate code in tcg_ctx.gen_opc_buf and gen_opparam_buf for
    basic block 'tb'. If search_pc is TRUE, also generate PC
    information for each intermediate instruction. */
 static inline void gen_intermediate_code_internal(CPUX86State *env,
@@ -7629,8 +7793,9 @@
     dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
     dc->iopl = (flags >> IOPL_SHIFT) & 3;
     dc->tf = (flags >> TF_SHIFT) & 1;
-    dc->singlestep_enabled = env->singlestep_enabled;
+    dc->singlestep_enabled = ENV_GET_CPU(env)->singlestep_enabled;
     dc->cc_op = CC_OP_DYNAMIC;
+    dc->cc_op_dirty = false;
     dc->cs_base = cs_base;
     dc->tb = tb;
     dc->popl_esp_hack = 0;
@@ -7651,7 +7816,7 @@
     dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
 #endif
     dc->flags = flags;
-    dc->jmp_opt = !(dc->tf || env->singlestep_enabled ||
+    dc->jmp_opt = !(dc->tf || ENV_GET_CPU(env)->singlestep_enabled ||
                     (flags & HF_INHIBIT_IRQ_MASK)
 #ifndef CONFIG_SOFTMMU
                     || (flags & HF_SOFTMMU_MASK)
@@ -7678,7 +7843,7 @@
     cpu_ptr0 = tcg_temp_new_ptr();
     cpu_ptr1 = tcg_temp_new_ptr();
 
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     dc->is_jmp = DISAS_NEXT;
     pc_ptr = pc_start;
@@ -7700,24 +7865,24 @@
             }
         }
         if (search_pc) {
-            j = gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j)
-                    gen_opc_instr_start[lj++] = 0;
+                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
             }
-            gen_opc_pc[lj] = pc_ptr;
+            tcg_ctx.gen_opc_pc[lj] = pc_ptr;
             gen_opc_cc_op[lj] = dc->cc_op;
-            gen_opc_instr_start[lj] = 1;
-            gen_opc_icount[lj] = num_insns;
+            tcg_ctx.gen_opc_instr_start[lj] = 1;
+            tcg_ctx.gen_opc_icount[lj] = num_insns;
         }
         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
             gen_io_start();
 
-        pc_ptr = disas_insn(dc, pc_ptr);
+        pc_ptr = disas_insn(env, dc, pc_ptr);
         num_insns++;
 #ifdef CONFIG_HAX
-        if (hax_enabled() && hax_stop_translate(env))
+        if (hax_enabled() && hax_stop_translate(ENV_GET_CPU(env)))
         {
             gen_jmp_im(pc_ptr - dc->cs_base);
             gen_eob(dc);
@@ -7739,7 +7904,7 @@
             break;
         }
         /* if too long translation, stop generation too */
-        if (gen_opc_ptr >= gen_opc_end ||
+        if (tcg_ctx.gen_opc_ptr >= gen_opc_end ||
             (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
             num_insns >= max_insns) {
             gen_jmp_im(pc_ptr - dc->cs_base);
@@ -7755,17 +7920,17 @@
     if (tb->cflags & CF_LAST_IO)
         gen_io_end();
     gen_icount_end(tb, num_insns);
-    *gen_opc_ptr = INDEX_op_end;
+    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     /* we don't forget to fill the last values */
     if (search_pc) {
-        j = gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j)
-            gen_opc_instr_start[lj++] = 0;
+            tcg_ctx.gen_opc_instr_start[lj++] = 0;
     }
 
 #ifdef DEBUG_DISAS
-    log_cpu_state_mask(CPU_LOG_TB_CPU, env, X86_DUMP_CCOP);
+    log_cpu_state_mask(CPU_LOG_TB_CPU, ENV_GET_CPU(env), X86_DUMP_CCOP);
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         int disas_flags;
         qemu_log("----------------\n");
@@ -7805,16 +7970,17 @@
         int i;
         qemu_log("RESTORE:\n");
         for(i = 0;i <= pc_pos; i++) {
-            if (gen_opc_instr_start[i]) {
-                qemu_log("0x%04x: " TARGET_FMT_lx "\n", i, gen_opc_pc[i]);
+            if (tcg_ctx.gen_opc_instr_start[i]) {
+                qemu_log("0x%04x: " TARGET_FMT_lx "\n", i,
+                        tcg_ctx.gen_opc_pc[i]);
             }
         }
         qemu_log("pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
-                pc_pos, gen_opc_pc[pc_pos] - tb->cs_base,
+                pc_pos, tcg_ctx.gen_opc_pc[pc_pos] - tb->cs_base,
                 (uint32_t)tb->cs_base);
     }
 #endif
-    env->eip = gen_opc_pc[pc_pos] - tb->cs_base;
+    env->eip = tcg_ctx.gen_opc_pc[pc_pos] - tb->cs_base;
     cc_op = gen_opc_cc_op[pc_pos];
     if (cc_op != CC_OP_DYNAMIC)
         env->cc_op = cc_op;
diff --git a/target-mips/cpu-qom.h b/target-mips/cpu-qom.h
new file mode 100644
index 0000000..c25e41b
--- /dev/null
+++ b/target-mips/cpu-qom.h
@@ -0,0 +1,22 @@
+#ifndef QEMU_MIPS_CPU_QOM_H
+#define QEMU_MIPS_CPU_QOM_H
+
+#include "qemu/osdep.h"
+#include "qom/cpu.h"
+
+typedef struct MIPSCPU {
+    CPUState parent_obj;
+
+    CPUMIPSState env;
+} MIPSCPU;
+
+
+static inline MIPSCPU *mips_env_get_cpu(CPUMIPSState *env)
+{
+    return container_of(env, MIPSCPU, env);
+}
+
+#define ENV_GET_CPU(e)  CPU(mips_env_get_cpu(e))
+#define ENV_OFFSET offsetof(MIPSCPU, env)
+
+#endif  // QEMU_MIPS_CPU_QOM_H
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index 2107c54..7ecfa23 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -47,10 +47,10 @@
 struct CPUMIPSTLBContext {
     uint32_t nb_tlb;
     int (*map_address) (struct CPUMIPSState *env, hwaddr *physical, int *prot, target_ulong address, int rw, int access_type);
-    void (*helper_tlbwi) (void);
-    void (*helper_tlbwr) (void);
-    void (*helper_tlbp) (void);
-    void (*helper_tlbr) (void);
+    void (*helper_tlbwi)(struct CPUMIPSState *env);
+    void (*helper_tlbwr)(struct CPUMIPSState *env);
+    void (*helper_tlbp)(struct CPUMIPSState *env);
+    void (*helper_tlbr)(struct CPUMIPSState *env);
     union {
         struct {
             r4k_tlb_t tlb[MIPS_TLB_MAX];
@@ -476,20 +476,22 @@
     struct QEMUTimer *timer; /* Internal timer */
 };
 
+#include "cpu-qom.h"
+
 int no_mmu_map_address (CPUMIPSState *env, hwaddr *physical, int *prot,
                         target_ulong address, int rw, int access_type);
 int fixed_mmu_map_address (CPUMIPSState *env, hwaddr *physical, int *prot,
                            target_ulong address, int rw, int access_type);
 int r4k_map_address (CPUMIPSState *env, hwaddr *physical, int *prot,
                      target_ulong address, int rw, int access_type);
-void r4k_helper_tlbwi (void);
-void r4k_helper_tlbwr (void);
-void r4k_helper_tlbp (void);
-void r4k_helper_tlbr (void);
+void r4k_helper_tlbwi(CPUMIPSState *env);
+void r4k_helper_tlbwr(CPUMIPSState *env);
+void r4k_helper_tlbp(CPUMIPSState *env);
+void r4k_helper_tlbr(CPUMIPSState *env);
 void mips_cpu_list (FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
 
-void do_unassigned_access(hwaddr addr, int is_write, int is_exec,
-                          int unused, int size);
+void cpu_unassigned_access(CPUArchState* env, hwaddr addr,
+                           int is_write, int is_exec, int unused, int size);
 
 #define cpu_init cpu_mips_init
 #define cpu_exec cpu_mips_exec
@@ -648,19 +650,12 @@
 
 /* helper.c */
 int cpu_mips_handle_mmu_fault (CPUMIPSState *env, target_ulong address, int rw,
-                               int mmu_idx, int is_softmmu);
+                               int mmu_idx);
 #define cpu_handle_mmu_fault cpu_mips_handle_mmu_fault
 void do_interrupt (CPUMIPSState *env);
 hwaddr cpu_mips_translate_address (CPUMIPSState *env, target_ulong address,
 		                               int rw);
 
-static inline void cpu_pc_from_tb(CPUMIPSState *env, TranslationBlock *tb)
-{
-    env->active_tc.PC = tb->pc;
-    env->hflags &= ~MIPS_HFLAG_BMASK;
-    env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
-}
-
 static inline void cpu_get_tb_cpu_state(CPUMIPSState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
@@ -674,4 +669,30 @@
     env->tls_value = newtls;
 }
 
+static inline bool cpu_has_work(CPUState *cpu)
+{
+    int has_work = 0;
+
+    /* It is implementation dependent if non-enabled interrupts
+       wake-up the CPU, however most of the implementations only
+       check for interrupts that can be taken. */
+    if ((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
+        cpu_mips_hw_interrupts_pending(cpu->env_ptr)) {
+        has_work = 1;
+    }
+
+    if (cpu->interrupt_request & CPU_INTERRUPT_TIMER) {
+        has_work = 1;
+    }
+
+    return has_work;
+}
+
+static inline void cpu_pc_from_tb(CPUMIPSState *env, TranslationBlock *tb)
+{
+    env->active_tc.PC = tb->pc;
+    env->hflags &= ~MIPS_HFLAG_BMASK;
+    env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
+}
+
 #endif /* !defined (__MIPS_CPU_H__) */
diff --git a/target-mips/exec.h b/target-mips/exec.h
deleted file mode 100644
index a64fe65..0000000
--- a/target-mips/exec.h
+++ /dev/null
@@ -1,107 +0,0 @@
-#if !defined(__QEMU_MIPS_EXEC_H__)
-#define __QEMU_MIPS_EXEC_H__
-
-//#define DEBUG_OP
-
-#include "config.h"
-#include "mips-defs.h"
-#include "dyngen-exec.h"
-#include "exec/cpu-defs.h"
-
-GLOBAL_REGISTER_VARIABLE_DECL struct CPUMIPSState *env asm(AREG0);
-
-#include "cpu.h"
-#include "exec/exec-all.h"
-
-#if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-#endif /* !defined(CONFIG_USER_ONLY) */
-
-void dump_fpu(CPUMIPSState *env);
-void fpu_dump_state(CPUMIPSState *env, FILE *f,
-                    int (*fpu_fprintf)(FILE *f, const char *fmt, ...),
-                    int flags);
-
-void cpu_mips_clock_init (CPUMIPSState *env);
-void cpu_mips_tlb_flush (CPUMIPSState *env, int flush_global);
-
-static inline void env_to_regs(void)
-{
-}
-
-static inline void regs_to_env(void)
-{
-}
-
-static inline int cpu_has_work(CPUMIPSState *env)
-{
-    int has_work = 0;
-
-    /* It is implementation dependent if non-enabled interrupts
-       wake-up the CPU, however most of the implementations only
-       check for interrupts that can be taken. */
-    if ((env->interrupt_request & CPU_INTERRUPT_HARD) &&
-        cpu_mips_hw_interrupts_pending(env)) {
-        has_work = 1;
-    }
-
-    if (env->interrupt_request & CPU_INTERRUPT_TIMER) {
-        has_work = 1;
-    }
-
-    return has_work;
-}
-
-static inline int cpu_halted(CPUMIPSState *env)
-{
-    if (!env->halted)
-        return 0;
-    if (cpu_has_work(env)) {
-        env->halted = 0;
-        return 0;
-    }
-    return EXCP_HALTED;
-}
-
-static inline void compute_hflags(CPUMIPSState *env)
-{
-    env->hflags &= ~(MIPS_HFLAG_COP1X | MIPS_HFLAG_64 | MIPS_HFLAG_CP0 |
-                     MIPS_HFLAG_F64 | MIPS_HFLAG_FPU | MIPS_HFLAG_KSU |
-                     MIPS_HFLAG_UX);
-    if (!(env->CP0_Status & (1 << CP0St_EXL)) &&
-        !(env->CP0_Status & (1 << CP0St_ERL)) &&
-        !(env->hflags & MIPS_HFLAG_DM)) {
-        env->hflags |= (env->CP0_Status >> CP0St_KSU) & MIPS_HFLAG_KSU;
-    }
-#if defined(TARGET_MIPS64)
-    if (((env->hflags & MIPS_HFLAG_KSU) != MIPS_HFLAG_UM) ||
-        (env->CP0_Status & (1 << CP0St_PX)) ||
-        (env->CP0_Status & (1 << CP0St_UX)))
-        env->hflags |= MIPS_HFLAG_64;
-    if (env->CP0_Status & (1 << CP0St_UX))
-        env->hflags |= MIPS_HFLAG_UX;
-#endif
-    if ((env->CP0_Status & (1 << CP0St_CU0)) ||
-        !(env->hflags & MIPS_HFLAG_KSU))
-        env->hflags |= MIPS_HFLAG_CP0;
-    if (env->CP0_Status & (1 << CP0St_CU1))
-        env->hflags |= MIPS_HFLAG_FPU;
-    if (env->CP0_Status & (1 << CP0St_FR))
-        env->hflags |= MIPS_HFLAG_F64;
-    if (env->insn_flags & ISA_MIPS32R2) {
-        if (env->active_fpu.fcr0 & (1 << FCR0_F64))
-            env->hflags |= MIPS_HFLAG_COP1X;
-    } else if (env->insn_flags & ISA_MIPS32) {
-        if (env->hflags & MIPS_HFLAG_64)
-            env->hflags |= MIPS_HFLAG_COP1X;
-    } else if (env->insn_flags & ISA_MIPS4) {
-        /* All supported MIPS IV CPUs use the XX (CU3) to enable
-           and disable the MIPS IV extensions to the MIPS III ISA.
-           Some other MIPS IV CPUs ignore the bit, so the check here
-           would be too restrictive for them.  */
-        if (env->CP0_Status & (1 << CP0St_CU3))
-            env->hflags |= MIPS_HFLAG_COP1X;
-    }
-}
-
-#endif /* !defined(__QEMU_MIPS_EXEC_H__) */
diff --git a/target-mips/helper.c b/target-mips/helper.c
index d4e3083..b9ad090 100644
--- a/target-mips/helper.c
+++ b/target-mips/helper.c
@@ -24,7 +24,6 @@
 #include <signal.h>
 
 #include "cpu.h"
-#include "exec/exec-all.h"
 
 enum {
     TLBRET_DIRTY = -4,
@@ -421,8 +420,8 @@
             prot |= PAGE_WRITE;
 
         tlb_set_page(env, address & TARGET_PAGE_MASK,
-                        physical & TARGET_PAGE_MASK, prot,
-                        mmu_idx, is_softmmu);
+                        physical & TARGET_PAGE_MASK, prot | PAGE_EXEC,
+                        mmu_idx, TARGET_PAGE_SIZE);
         ret = TLBRET_MATCH;
         goto out;
     }
@@ -437,7 +436,7 @@
 }
 
 int cpu_mips_handle_mmu_fault (CPUMIPSState *env, target_ulong address, int rw,
-                               int mmu_idx, int is_softmmu)
+                               int mmu_idx)
 {
 #if !defined(CONFIG_USER_ONLY)
     hwaddr physical;
@@ -450,8 +449,8 @@
 #if 0
     log_cpu_state(env, 0);
 #endif
-    qemu_log("%s pc " TARGET_FMT_lx " ad " TARGET_FMT_lx " rw %d mmu_idx %d smmu %d\n",
-              __func__, env->active_tc.PC, address, rw, mmu_idx, is_softmmu);
+    qemu_log("%s pc " TARGET_FMT_lx " ad " TARGET_FMT_lx " rw %d mmu_idx %d\n",
+              __func__, env->active_tc.PC, address, rw, mmu_idx);
 
     rw &= 1;
 
@@ -467,12 +466,13 @@
     qemu_log("%s address=" TARGET_FMT_lx " ret %d physical " TARGET_FMT_plx " prot %d\n",
               __func__, address, ret, physical, prot);
     if (ret == TLBRET_MATCH) {
-       ret = tlb_set_page(env, address & TARGET_PAGE_MASK,
-                          physical & TARGET_PAGE_MASK, prot,
-                          mmu_idx, is_softmmu);
+       tlb_set_page(env, address & TARGET_PAGE_MASK,
+                    physical & TARGET_PAGE_MASK, prot | PAGE_EXEC,
+                    mmu_idx, TARGET_PAGE_SIZE);
+       ret = 0;
     }
     else if (ret == TLBRET_NOMATCH)
-        ret = cpu_mips_tlb_refill(env,address,rw,mmu_idx,is_softmmu);
+        ret = cpu_mips_tlb_refill(env,address,rw,mmu_idx,1);
     if (ret < 0)
 #endif
     {
@@ -645,7 +645,7 @@
         env->active_tc.PC = (int32_t)0xBFC00480;
         break;
     case EXCP_RESET:
-        cpu_reset(env);
+        cpu_reset(ENV_GET_CPU(env));
         break;
     case EXCP_SRESET:
         env->CP0_Status |= (1 << CP0St_SR);
diff --git a/target-mips/helper.h b/target-mips/helper.h
index d0041d9..3e6557a 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -1,204 +1,204 @@
 #include "exec/def-helper.h"
 
-DEF_HELPER_2(raise_exception_err, void, i32, int)
-DEF_HELPER_1(raise_exception, void, i32)
-DEF_HELPER_0(interrupt_restart, void)
+DEF_HELPER_3(raise_exception_err, void, env, i32, int)
+DEF_HELPER_2(raise_exception, void, env, i32)
+DEF_HELPER_1(interrupt_restart, void, env)
 
 #ifdef TARGET_MIPS64
-DEF_HELPER_3(ldl, tl, tl, tl, int)
-DEF_HELPER_3(ldr, tl, tl, tl, int)
-DEF_HELPER_3(sdl, void, tl, tl, int)
-DEF_HELPER_3(sdr, void, tl, tl, int)
+DEF_HELPER_4(ldl, tl, env, tl, tl, int)
+DEF_HELPER_4(ldr, tl, env, tl, tl, int)
+DEF_HELPER_4(sdl, void, env, tl, tl, int)
+DEF_HELPER_4(sdr, void, env, tl, tl, int)
 #endif
-DEF_HELPER_3(lwl, tl, tl, tl, int)
-DEF_HELPER_3(lwr, tl, tl, tl, int)
-DEF_HELPER_3(swl, void, tl, tl, int)
-DEF_HELPER_3(swr, void, tl, tl, int)
+DEF_HELPER_4(lwl, tl, env, tl, tl, int)
+DEF_HELPER_4(lwr, tl, env, tl, tl, int)
+DEF_HELPER_4(swl, void, env, tl, tl, int)
+DEF_HELPER_4(swr, void, env, tl, tl, int)
 
 #ifndef CONFIG_USER_ONLY
-DEF_HELPER_2(ll, tl, tl, int)
-DEF_HELPER_3(sc, tl, tl, tl, int)
+DEF_HELPER_3(ll, tl, env, tl, int)
+DEF_HELPER_4(sc, tl, env, tl, tl, int)
 #ifdef TARGET_MIPS64
-DEF_HELPER_2(lld, tl, tl, int)
-DEF_HELPER_3(scd, tl, tl, tl, int)
+DEF_HELPER_3(lld, tl, env, tl, int)
+DEF_HELPER_4(scd, tl, env, tl, tl, int)
 #endif
 #endif
 
-DEF_HELPER_FLAGS_1(clo, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
-DEF_HELPER_FLAGS_1(clz, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
+DEF_HELPER_FLAGS_1(clo, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
 #ifdef TARGET_MIPS64
-DEF_HELPER_FLAGS_1(dclo, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
-DEF_HELPER_FLAGS_1(dclz, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
+DEF_HELPER_FLAGS_1(dclo, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(dclz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_2(dmult, void, tl, tl)
 DEF_HELPER_2(dmultu, void, tl, tl)
 #endif
 
-DEF_HELPER_2(muls, tl, tl, tl)
-DEF_HELPER_2(mulsu, tl, tl, tl)
-DEF_HELPER_2(macc, tl, tl, tl)
-DEF_HELPER_2(maccu, tl, tl, tl)
-DEF_HELPER_2(msac, tl, tl, tl)
-DEF_HELPER_2(msacu, tl, tl, tl)
-DEF_HELPER_2(mulhi, tl, tl, tl)
-DEF_HELPER_2(mulhiu, tl, tl, tl)
-DEF_HELPER_2(mulshi, tl, tl, tl)
-DEF_HELPER_2(mulshiu, tl, tl, tl)
-DEF_HELPER_2(macchi, tl, tl, tl)
-DEF_HELPER_2(macchiu, tl, tl, tl)
-DEF_HELPER_2(msachi, tl, tl, tl)
-DEF_HELPER_2(msachiu, tl, tl, tl)
+DEF_HELPER_3(muls, tl, env, tl, tl)
+DEF_HELPER_3(mulsu, tl, env, tl, tl)
+DEF_HELPER_3(macc, tl, env, tl, tl)
+DEF_HELPER_3(maccu, tl, env, tl, tl)
+DEF_HELPER_3(msac, tl, env, tl, tl)
+DEF_HELPER_3(msacu, tl, env, tl, tl)
+DEF_HELPER_3(mulhi, tl, env, tl, tl)
+DEF_HELPER_3(mulhiu, tl, env, tl, tl)
+DEF_HELPER_3(mulshi, tl, env, tl, tl)
+DEF_HELPER_3(mulshiu, tl, env, tl, tl)
+DEF_HELPER_3(macchi, tl, env, tl, tl)
+DEF_HELPER_3(macchiu, tl, env, tl, tl)
+DEF_HELPER_3(msachi, tl, env, tl, tl)
+DEF_HELPER_3(msachiu, tl, env, tl, tl)
 
 #ifndef CONFIG_USER_ONLY
 /* CP0 helpers */
-DEF_HELPER_0(mfc0_mvpcontrol, tl)
-DEF_HELPER_0(mfc0_mvpconf0, tl)
-DEF_HELPER_0(mfc0_mvpconf1, tl)
-DEF_HELPER_0(mfc0_random, tl)
-DEF_HELPER_0(mfc0_tcstatus, tl)
-DEF_HELPER_0(mftc0_tcstatus, tl)
-DEF_HELPER_0(mfc0_tcbind, tl)
-DEF_HELPER_0(mftc0_tcbind, tl)
-DEF_HELPER_0(mfc0_tcrestart, tl)
-DEF_HELPER_0(mftc0_tcrestart, tl)
-DEF_HELPER_0(mfc0_tchalt, tl)
-DEF_HELPER_0(mftc0_tchalt, tl)
-DEF_HELPER_0(mfc0_tccontext, tl)
-DEF_HELPER_0(mftc0_tccontext, tl)
-DEF_HELPER_0(mfc0_tcschedule, tl)
-DEF_HELPER_0(mftc0_tcschedule, tl)
-DEF_HELPER_0(mfc0_tcschefback, tl)
-DEF_HELPER_0(mftc0_tcschefback, tl)
-DEF_HELPER_0(mfc0_count, tl)
-DEF_HELPER_0(mftc0_entryhi, tl)
-DEF_HELPER_0(mftc0_status, tl)
-DEF_HELPER_0(mfc0_lladdr, tl)
-DEF_HELPER_1(mfc0_watchlo, tl, i32)
-DEF_HELPER_1(mfc0_watchhi, tl, i32)
-DEF_HELPER_0(mfc0_debug, tl)
-DEF_HELPER_0(mftc0_debug, tl)
+DEF_HELPER_1(mfc0_mvpcontrol, tl, env)
+DEF_HELPER_1(mfc0_mvpconf0, tl, env)
+DEF_HELPER_1(mfc0_mvpconf1, tl, env)
+DEF_HELPER_1(mfc0_random, tl, env)
+DEF_HELPER_1(mfc0_tcstatus, tl, env)
+DEF_HELPER_1(mftc0_tcstatus, tl, env)
+DEF_HELPER_1(mfc0_tcbind, tl, env)
+DEF_HELPER_1(mftc0_tcbind, tl, env)
+DEF_HELPER_1(mfc0_tcrestart, tl, env)
+DEF_HELPER_1(mftc0_tcrestart, tl, env)
+DEF_HELPER_1(mfc0_tchalt, tl, env)
+DEF_HELPER_1(mftc0_tchalt, tl, env)
+DEF_HELPER_1(mfc0_tccontext, tl, env)
+DEF_HELPER_1(mftc0_tccontext, tl, env)
+DEF_HELPER_1(mfc0_tcschedule, tl, env)
+DEF_HELPER_1(mftc0_tcschedule, tl, env)
+DEF_HELPER_1(mfc0_tcschefback, tl, env)
+DEF_HELPER_1(mftc0_tcschefback, tl, env)
+DEF_HELPER_1(mfc0_count, tl, env)
+DEF_HELPER_1(mftc0_entryhi, tl, env)
+DEF_HELPER_1(mftc0_status, tl, env)
+DEF_HELPER_1(mfc0_lladdr, tl, env)
+DEF_HELPER_2(mfc0_watchlo, tl, env, i32)
+DEF_HELPER_2(mfc0_watchhi, tl, env, i32)
+DEF_HELPER_1(mfc0_debug, tl, env)
+DEF_HELPER_1(mftc0_debug, tl, env)
 #ifdef TARGET_MIPS64
-DEF_HELPER_0(dmfc0_tcrestart, tl)
-DEF_HELPER_0(dmfc0_tchalt, tl)
-DEF_HELPER_0(dmfc0_tccontext, tl)
-DEF_HELPER_0(dmfc0_tcschedule, tl)
-DEF_HELPER_0(dmfc0_tcschefback, tl)
-DEF_HELPER_0(dmfc0_lladdr, tl)
-DEF_HELPER_1(dmfc0_watchlo, tl, i32)
+DEF_HELPER_1(dmfc0_tcrestart, tl, env)
+DEF_HELPER_1(dmfc0_tchalt, tl, env)
+DEF_HELPER_1(dmfc0_tccontext, tl, env)
+DEF_HELPER_1(dmfc0_tcschedule, tl, env)
+DEF_HELPER_1(dmfc0_tcschefback, tl, env)
+DEF_HELPER_1(dmfc0_lladdr, tl, env)
+DEF_HELPER_2(dmfc0_watchlo, tl, env, i32)
 #endif /* TARGET_MIPS64 */
 
-DEF_HELPER_1(mtc0_index, void, tl)
-DEF_HELPER_1(mtc0_mvpcontrol, void, tl)
-DEF_HELPER_1(mtc0_vpecontrol, void, tl)
-DEF_HELPER_1(mtc0_vpeconf0, void, tl)
-DEF_HELPER_1(mtc0_vpeconf1, void, tl)
-DEF_HELPER_1(mtc0_yqmask, void, tl)
-DEF_HELPER_1(mtc0_vpeopt, void, tl)
-DEF_HELPER_1(mtc0_entrylo0, void, tl)
-DEF_HELPER_1(mtc0_tcstatus, void, tl)
-DEF_HELPER_1(mttc0_tcstatus, void, tl)
-DEF_HELPER_1(mtc0_tcbind, void, tl)
-DEF_HELPER_1(mttc0_tcbind, void, tl)
-DEF_HELPER_1(mtc0_tcrestart, void, tl)
-DEF_HELPER_1(mttc0_tcrestart, void, tl)
-DEF_HELPER_1(mtc0_tchalt, void, tl)
-DEF_HELPER_1(mttc0_tchalt, void, tl)
-DEF_HELPER_1(mtc0_tccontext, void, tl)
-DEF_HELPER_1(mttc0_tccontext, void, tl)
-DEF_HELPER_1(mtc0_tcschedule, void, tl)
-DEF_HELPER_1(mttc0_tcschedule, void, tl)
-DEF_HELPER_1(mtc0_tcschefback, void, tl)
-DEF_HELPER_1(mttc0_tcschefback, void, tl)
-DEF_HELPER_1(mtc0_entrylo1, void, tl)
-DEF_HELPER_1(mtc0_context, void, tl)
-DEF_HELPER_1(mtc0_pagemask, void, tl)
-DEF_HELPER_1(mtc0_pagegrain, void, tl)
-DEF_HELPER_1(mtc0_wired, void, tl)
-DEF_HELPER_1(mtc0_srsconf0, void, tl)
-DEF_HELPER_1(mtc0_srsconf1, void, tl)
-DEF_HELPER_1(mtc0_srsconf2, void, tl)
-DEF_HELPER_1(mtc0_srsconf3, void, tl)
-DEF_HELPER_1(mtc0_srsconf4, void, tl)
-DEF_HELPER_1(mtc0_hwrena, void, tl)
-DEF_HELPER_1(mtc0_count, void, tl)
-DEF_HELPER_1(mtc0_entryhi, void, tl)
-DEF_HELPER_1(mttc0_entryhi, void, tl)
-DEF_HELPER_1(mtc0_compare, void, tl)
-DEF_HELPER_1(mtc0_status, void, tl)
-DEF_HELPER_1(mttc0_status, void, tl)
-DEF_HELPER_1(mtc0_intctl, void, tl)
-DEF_HELPER_1(mtc0_srsctl, void, tl)
-DEF_HELPER_1(mtc0_cause, void, tl)
-DEF_HELPER_1(mtc0_ebase, void, tl)
-DEF_HELPER_1(mtc0_config0, void, tl)
-DEF_HELPER_1(mtc0_config2, void, tl)
-DEF_HELPER_1(mtc0_lladdr, void, tl)
-DEF_HELPER_2(mtc0_watchlo, void, tl, i32)
-DEF_HELPER_2(mtc0_watchhi, void, tl, i32)
-DEF_HELPER_1(mtc0_xcontext, void, tl)
-DEF_HELPER_1(mtc0_framemask, void, tl)
-DEF_HELPER_1(mtc0_debug, void, tl)
-DEF_HELPER_1(mttc0_debug, void, tl)
-DEF_HELPER_1(mtc0_performance0, void, tl)
-DEF_HELPER_1(mtc0_taglo, void, tl)
-DEF_HELPER_1(mtc0_datalo, void, tl)
-DEF_HELPER_1(mtc0_taghi, void, tl)
-DEF_HELPER_1(mtc0_datahi, void, tl)
+DEF_HELPER_2(mtc0_index, void, env, tl)
+DEF_HELPER_2(mtc0_mvpcontrol, void, env, tl)
+DEF_HELPER_2(mtc0_vpecontrol, void, env, tl)
+DEF_HELPER_2(mtc0_vpeconf0, void, env, tl)
+DEF_HELPER_2(mtc0_vpeconf1, void, env, tl)
+DEF_HELPER_2(mtc0_yqmask, void, env, tl)
+DEF_HELPER_2(mtc0_vpeopt, void, env, tl)
+DEF_HELPER_2(mtc0_entrylo0, void, env, tl)
+DEF_HELPER_2(mtc0_tcstatus, void, env, tl)
+DEF_HELPER_2(mttc0_tcstatus, void, env, tl)
+DEF_HELPER_2(mtc0_tcbind, void, env, tl)
+DEF_HELPER_2(mttc0_tcbind, void, env, tl)
+DEF_HELPER_2(mtc0_tcrestart, void, env, tl)
+DEF_HELPER_2(mttc0_tcrestart, void, env, tl)
+DEF_HELPER_2(mtc0_tchalt, void, env, tl)
+DEF_HELPER_2(mttc0_tchalt, void, env, tl)
+DEF_HELPER_2(mtc0_tccontext, void, env, tl)
+DEF_HELPER_2(mttc0_tccontext, void, env, tl)
+DEF_HELPER_2(mtc0_tcschedule, void, env, tl)
+DEF_HELPER_2(mttc0_tcschedule, void, env, tl)
+DEF_HELPER_2(mtc0_tcschefback, void, env, tl)
+DEF_HELPER_2(mttc0_tcschefback, void, env, tl)
+DEF_HELPER_2(mtc0_entrylo1, void, env, tl)
+DEF_HELPER_2(mtc0_context, void, env, tl)
+DEF_HELPER_2(mtc0_pagemask, void, env, tl)
+DEF_HELPER_2(mtc0_pagegrain, void, env, tl)
+DEF_HELPER_2(mtc0_wired, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf0, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf1, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf2, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf3, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf4, void, env, tl)
+DEF_HELPER_2(mtc0_hwrena, void, env, tl)
+DEF_HELPER_2(mtc0_count, void, env, tl)
+DEF_HELPER_2(mtc0_entryhi, void, env, tl)
+DEF_HELPER_2(mttc0_entryhi, void, env, tl)
+DEF_HELPER_2(mtc0_compare, void, env, tl)
+DEF_HELPER_2(mtc0_status, void, env, tl)
+DEF_HELPER_2(mttc0_status, void, env, tl)
+DEF_HELPER_2(mtc0_intctl, void, env, tl)
+DEF_HELPER_2(mtc0_srsctl, void, env, tl)
+DEF_HELPER_2(mtc0_cause, void, env, tl)
+DEF_HELPER_2(mtc0_ebase, void, env, tl)
+DEF_HELPER_2(mtc0_config0, void, env, tl)
+DEF_HELPER_2(mtc0_config2, void, env, tl)
+DEF_HELPER_2(mtc0_lladdr, void, env, tl)
+DEF_HELPER_3(mtc0_watchlo, void, env, tl, i32)
+DEF_HELPER_3(mtc0_watchhi, void, env, tl, i32)
+DEF_HELPER_2(mtc0_xcontext, void, env, tl)
+DEF_HELPER_2(mtc0_framemask, void, env, tl)
+DEF_HELPER_2(mtc0_debug, void, env, tl)
+DEF_HELPER_2(mttc0_debug, void, env, tl)
+DEF_HELPER_2(mtc0_performance0, void, env, tl)
+DEF_HELPER_2(mtc0_taglo, void, env, tl)
+DEF_HELPER_2(mtc0_datalo, void, env, tl)
+DEF_HELPER_2(mtc0_taghi, void, env, tl)
+DEF_HELPER_2(mtc0_datahi, void, env, tl)
 
 /* MIPS MT functions */
-DEF_HELPER_1(mftgpr, tl, i32);
-DEF_HELPER_1(mftlo, tl, i32)
-DEF_HELPER_1(mfthi, tl, i32)
-DEF_HELPER_1(mftacx, tl, i32)
-DEF_HELPER_0(mftdsp, tl)
-DEF_HELPER_2(mttgpr, void, tl, i32)
-DEF_HELPER_2(mttlo, void, tl, i32)
-DEF_HELPER_2(mtthi, void, tl, i32)
-DEF_HELPER_2(mttacx, void, tl, i32)
-DEF_HELPER_1(mttdsp, void, tl)
-DEF_HELPER_1(dmt, tl, tl)
-DEF_HELPER_1(emt, tl, tl)
-DEF_HELPER_1(dvpe, tl, tl)
-DEF_HELPER_1(evpe, tl, tl)
+DEF_HELPER_2(mftgpr, tl, env, i32)
+DEF_HELPER_2(mftlo, tl, env, i32)
+DEF_HELPER_2(mfthi, tl, env, i32)
+DEF_HELPER_2(mftacx, tl, env, i32)
+DEF_HELPER_1(mftdsp, tl, env)
+DEF_HELPER_3(mttgpr, void, env, tl, i32)
+DEF_HELPER_3(mttlo, void, env, tl, i32)
+DEF_HELPER_3(mtthi, void, env, tl, i32)
+DEF_HELPER_3(mttacx, void, env, tl, i32)
+DEF_HELPER_2(mttdsp, void, env, tl)
+DEF_HELPER_0(dmt, tl)
+DEF_HELPER_0(emt, tl)
+DEF_HELPER_1(dvpe, tl, env)
+DEF_HELPER_1(evpe, tl, env)
 #endif /* !CONFIG_USER_ONLY */
 DEF_HELPER_2(fork, void, tl, tl)
-DEF_HELPER_1(yield, tl, tl)
+DEF_HELPER_2(yield, tl, env, tl)
 
 /* CP1 functions */
-DEF_HELPER_1(cfc1, tl, i32)
-DEF_HELPER_2(ctc1, void, tl, i32)
+DEF_HELPER_2(cfc1, tl, env, i32)
+DEF_HELPER_3(ctc1, void, env, tl, i32)
 
-DEF_HELPER_1(float_cvtd_s, i64, i32)
-DEF_HELPER_1(float_cvtd_w, i64, i32)
-DEF_HELPER_1(float_cvtd_l, i64, i64)
-DEF_HELPER_1(float_cvtl_d, i64, i64)
-DEF_HELPER_1(float_cvtl_s, i64, i32)
-DEF_HELPER_1(float_cvtps_pw, i64, i64)
-DEF_HELPER_1(float_cvtpw_ps, i64, i64)
-DEF_HELPER_1(float_cvts_d, i32, i64)
-DEF_HELPER_1(float_cvts_w, i32, i32)
-DEF_HELPER_1(float_cvts_l, i32, i64)
-DEF_HELPER_1(float_cvts_pl, i32, i32)
-DEF_HELPER_1(float_cvts_pu, i32, i32)
-DEF_HELPER_1(float_cvtw_s, i32, i32)
-DEF_HELPER_1(float_cvtw_d, i32, i64)
+DEF_HELPER_2(float_cvtd_s, i64, env, i32)
+DEF_HELPER_2(float_cvtd_w, i64, env, i32)
+DEF_HELPER_2(float_cvtd_l, i64, env, i64)
+DEF_HELPER_2(float_cvtl_d, i64, env, i64)
+DEF_HELPER_2(float_cvtl_s, i64, env, i32)
+DEF_HELPER_2(float_cvtps_pw, i64, env, i64)
+DEF_HELPER_2(float_cvtpw_ps, i64, env, i64)
+DEF_HELPER_2(float_cvts_d, i32, env, i64)
+DEF_HELPER_2(float_cvts_w, i32, env, i32)
+DEF_HELPER_2(float_cvts_l, i32, env, i64)
+DEF_HELPER_2(float_cvts_pl, i32, env, i32)
+DEF_HELPER_2(float_cvts_pu, i32, env, i32)
+DEF_HELPER_2(float_cvtw_s, i32, env, i32)
+DEF_HELPER_2(float_cvtw_d, i32, env, i64)
 
-DEF_HELPER_2(float_addr_ps, i64, i64, i64)
-DEF_HELPER_2(float_mulr_ps, i64, i64, i64)
+DEF_HELPER_3(float_addr_ps, i64, env, i64, i64)
+DEF_HELPER_3(float_mulr_ps, i64, env, i64, i64)
 
-#define FOP_PROTO(op)                       \
-DEF_HELPER_1(float_ ## op ## l_s, i64, i32) \
-DEF_HELPER_1(float_ ## op ## l_d, i64, i64) \
-DEF_HELPER_1(float_ ## op ## w_s, i32, i32) \
-DEF_HELPER_1(float_ ## op ## w_d, i32, i64)
+#define FOP_PROTO(op)                            \
+DEF_HELPER_2(float_ ## op ## l_s, i64, env, i32) \
+DEF_HELPER_2(float_ ## op ## l_d, i64, env, i64) \
+DEF_HELPER_2(float_ ## op ## w_s, i32, env, i32) \
+DEF_HELPER_2(float_ ## op ## w_d, i32, env, i64)
 FOP_PROTO(round)
 FOP_PROTO(trunc)
 FOP_PROTO(ceil)
 FOP_PROTO(floor)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                       \
-DEF_HELPER_1(float_ ## op ## _s, i32, i32)  \
-DEF_HELPER_1(float_ ## op ## _d, i64, i64)
+#define FOP_PROTO(op)                            \
+DEF_HELPER_2(float_ ## op ## _s, i32, env, i32)  \
+DEF_HELPER_2(float_ ## op ## _d, i64, env, i64)
 FOP_PROTO(sqrt)
 FOP_PROTO(rsqrt)
 FOP_PROTO(recip)
@@ -210,14 +210,20 @@
 DEF_HELPER_1(float_ ## op ## _ps, i64, i64)
 FOP_PROTO(abs)
 FOP_PROTO(chs)
+#undef FOP_PROTO
+
+#define FOP_PROTO(op)                       \
+DEF_HELPER_2(float_ ## op ## _s, i32, env, i32)  \
+DEF_HELPER_2(float_ ## op ## _d, i64, env, i64)  \
+DEF_HELPER_2(float_ ## op ## _ps, i64, env, i64)
 FOP_PROTO(recip1)
 FOP_PROTO(rsqrt1)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                             \
-DEF_HELPER_2(float_ ## op ## _s, i32, i32, i32)   \
-DEF_HELPER_2(float_ ## op ## _d, i64, i64, i64)   \
-DEF_HELPER_2(float_ ## op ## _ps, i64, i64, i64)
+#define FOP_PROTO(op)                                  \
+DEF_HELPER_3(float_ ## op ## _s, i32, env, i32, i32)   \
+DEF_HELPER_3(float_ ## op ## _d, i64, env, i64, i64)   \
+DEF_HELPER_3(float_ ## op ## _ps, i64, env, i64, i64)
 FOP_PROTO(add)
 FOP_PROTO(sub)
 FOP_PROTO(mul)
@@ -226,23 +232,23 @@
 FOP_PROTO(rsqrt2)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                                 \
-DEF_HELPER_3(float_ ## op ## _s, i32, i32, i32, i32)  \
-DEF_HELPER_3(float_ ## op ## _d, i64, i64, i64, i64)  \
-DEF_HELPER_3(float_ ## op ## _ps, i64, i64, i64, i64)
+#define FOP_PROTO(op)                                      \
+DEF_HELPER_4(float_ ## op ## _s, i32, env, i32, i32, i32)  \
+DEF_HELPER_4(float_ ## op ## _d, i64, env, i64, i64, i64)  \
+DEF_HELPER_4(float_ ## op ## _ps, i64, env, i64, i64, i64)
 FOP_PROTO(muladd)
 FOP_PROTO(mulsub)
 FOP_PROTO(nmuladd)
 FOP_PROTO(nmulsub)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                               \
-DEF_HELPER_3(cmp_d_ ## op, void, i64, i64, int)     \
-DEF_HELPER_3(cmpabs_d_ ## op, void, i64, i64, int)  \
-DEF_HELPER_3(cmp_s_ ## op, void, i32, i32, int)     \
-DEF_HELPER_3(cmpabs_s_ ## op, void, i32, i32, int)  \
-DEF_HELPER_3(cmp_ps_ ## op, void, i64, i64, int)    \
-DEF_HELPER_3(cmpabs_ps_ ## op, void, i64, i64, int)
+#define FOP_PROTO(op)                                    \
+DEF_HELPER_4(cmp_d_ ## op, void, env, i64, i64, int)     \
+DEF_HELPER_4(cmpabs_d_ ## op, void, env, i64, i64, int)  \
+DEF_HELPER_4(cmp_s_ ## op, void, env, i32, i32, int)     \
+DEF_HELPER_4(cmpabs_s_ ## op, void, env, i32, i32, int)  \
+DEF_HELPER_4(cmp_ps_ ## op, void, env, i64, i64, int)    \
+DEF_HELPER_4(cmpabs_ps_ ## op, void, env, i64, i64, int)
 FOP_PROTO(f)
 FOP_PROTO(un)
 FOP_PROTO(eq)
@@ -263,20 +269,20 @@
 
 /* Special functions */
 #ifndef CONFIG_USER_ONLY
-DEF_HELPER_0(tlbwi, void)
-DEF_HELPER_0(tlbwr, void)
-DEF_HELPER_0(tlbp, void)
-DEF_HELPER_0(tlbr, void)
-DEF_HELPER_0(di, tl)
-DEF_HELPER_0(ei, tl)
-DEF_HELPER_0(eret, void)
-DEF_HELPER_0(deret, void)
+DEF_HELPER_1(tlbwi, void, env)
+DEF_HELPER_1(tlbwr, void, env)
+DEF_HELPER_1(tlbp, void, env)
+DEF_HELPER_1(tlbr, void, env)
+DEF_HELPER_1(di, tl, env)
+DEF_HELPER_1(ei, tl, env)
+DEF_HELPER_1(eret, void, env)
+DEF_HELPER_1(deret, void, env)
 #endif /* !CONFIG_USER_ONLY */
-DEF_HELPER_0(rdhwr_cpunum, tl)
-DEF_HELPER_0(rdhwr_synci_step, tl)
-DEF_HELPER_0(rdhwr_cc, tl)
-DEF_HELPER_0(rdhwr_ccres, tl)
-DEF_HELPER_1(pmon, void, int)
-DEF_HELPER_0(wait, void)
+DEF_HELPER_1(rdhwr_cpunum, tl, env)
+DEF_HELPER_1(rdhwr_synci_step, tl, env)
+DEF_HELPER_1(rdhwr_cc, tl, env)
+DEF_HELPER_1(rdhwr_ccres, tl, env)
+DEF_HELPER_2(pmon, void, env, int)
+DEF_HELPER_1(wait, void, env)
 
 #include "exec/def-helper.h"
diff --git a/target-mips/machine.c b/target-mips/machine.c
index 55dc74b..1c56890 100644
--- a/target-mips/machine.c
+++ b/target-mips/machine.c
@@ -1,7 +1,7 @@
 #include "hw/hw.h"
 #include "hw/boards.h"
 
-#include "exec/exec-all.h"
+#include "cpu.h"
 
 static void save_tc(QEMUFile *f, TCState *tc)
 {
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 2936a84..9f8adfd 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -17,15 +17,73 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include <stdlib.h>
-#include "exec.h"
-
+#include "cpu.h"
 #include "qemu/host-utils.h"
-
+#include "tcg.h"
 #include "helper.h"
+
+#if !defined(CONFIG_USER_ONLY)
+#include "exec/softmmu_exec.h"
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+#ifndef CONFIG_USER_ONLY
+static inline void cpu_mips_tlb_flush (CPUMIPSState *env, int flush_global);
+#endif
+
+static inline void compute_hflags(CPUMIPSState *env)
+{
+    env->hflags &= ~(MIPS_HFLAG_COP1X | MIPS_HFLAG_64 | MIPS_HFLAG_CP0 |
+                     MIPS_HFLAG_F64 | MIPS_HFLAG_FPU | MIPS_HFLAG_KSU |
+                     MIPS_HFLAG_UX);
+    if (!(env->CP0_Status & (1 << CP0St_EXL)) &&
+        !(env->CP0_Status & (1 << CP0St_ERL)) &&
+        !(env->hflags & MIPS_HFLAG_DM)) {
+        env->hflags |= (env->CP0_Status >> CP0St_KSU) & MIPS_HFLAG_KSU;
+    }
+#if defined(TARGET_MIPS64)
+    if (((env->hflags & MIPS_HFLAG_KSU) != MIPS_HFLAG_UM) ||
+        (env->CP0_Status & (1 << CP0St_PX)) ||
+        (env->CP0_Status & (1 << CP0St_UX))) {
+        env->hflags |= MIPS_HFLAG_64;
+    }
+    if (env->CP0_Status & (1 << CP0St_UX)) {
+        env->hflags |= MIPS_HFLAG_UX;
+    }
+#endif
+    if ((env->CP0_Status & (1 << CP0St_CU0)) ||
+        !(env->hflags & MIPS_HFLAG_KSU)) {
+        env->hflags |= MIPS_HFLAG_CP0;
+    }
+    if (env->CP0_Status & (1 << CP0St_CU1)) {
+        env->hflags |= MIPS_HFLAG_FPU;
+    }
+    if (env->CP0_Status & (1 << CP0St_FR)) {
+        env->hflags |= MIPS_HFLAG_F64;
+    }
+    if (env->insn_flags & ISA_MIPS32R2) {
+        if (env->active_fpu.fcr0 & (1 << FCR0_F64)) {
+            env->hflags |= MIPS_HFLAG_COP1X;
+        }
+    } else if (env->insn_flags & ISA_MIPS32) {
+        if (env->hflags & MIPS_HFLAG_64) {
+            env->hflags |= MIPS_HFLAG_COP1X;
+        }
+    } else if (env->insn_flags & ISA_MIPS4) {
+        /* All supported MIPS IV CPUs use the XX (CU3) to enable
+           and disable the MIPS IV extensions to the MIPS III ISA.
+           Some other MIPS IV CPUs ignore the bit, so the check here
+           would be too restrictive for them.  */
+        if (env->CP0_Status & (1 << CP0St_CU3)) {
+            env->hflags |= MIPS_HFLAG_COP1X;
+        }
+    }
+}
+
 /*****************************************************************************/
 /* Exceptions processing helpers */
 
-void helper_raise_exception_err (uint32_t exception, int error_code)
+void helper_raise_exception_err (CPUMIPSState *env,
+                                 uint32_t exception, int error_code)
 {
 #if 1
     if (exception < 0x100)
@@ -36,12 +94,12 @@
     cpu_loop_exit(env);
 }
 
-void helper_raise_exception (uint32_t exception)
+void helper_raise_exception (CPUMIPSState *env, uint32_t exception)
 {
-    helper_raise_exception_err(exception, 0);
+    helper_raise_exception_err(env, exception, 0);
 }
 
-void helper_interrupt_restart (void)
+void helper_interrupt_restart (CPUMIPSState *env)
 {
     if (!(env->CP0_Status & (1 << CP0St_EXL)) &&
         !(env->CP0_Status & (1 << CP0St_ERL)) &&
@@ -49,39 +107,40 @@
         (env->CP0_Status & (1 << CP0St_IE)) &&
         (env->CP0_Status & env->CP0_Cause & CP0Ca_IP_mask)) {
         env->CP0_Cause &= ~(0x1f << CP0Ca_EC);
-        helper_raise_exception(EXCP_EXT_INTERRUPT);
+        helper_raise_exception(env, EXCP_EXT_INTERRUPT);
     }
 }
 
 #if !defined(CONFIG_USER_ONLY)
-static void do_restore_state (void *pc_ptr)
+static void do_restore_state (CPUMIPSState *env, uintptr_t pc)
 {
     TranslationBlock *tb;
-    unsigned long pc = (unsigned long) pc_ptr;
 
     tb = tb_find_pc (pc);
     if (tb) {
-        cpu_restore_state (tb, env, pc);
+        cpu_restore_state (env, pc);
     }
 }
 #endif
 
 #if defined(CONFIG_USER_ONLY)
 #define HELPER_LD(name, insn, type)                                     \
-static inline type do_##name(target_ulong addr, int mem_idx)            \
+static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             int mem_idx)                               \
 {                                                                       \
-    return (type) insn##_raw(addr);                                     \
+    return (type) cpu_##insn##_raw(env, addr);                                     \
 }
 #else
 #define HELPER_LD(name, insn, type)                                     \
-static inline type do_##name(target_ulong addr, int mem_idx)            \
+static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             int mem_idx)                               \
 {                                                                       \
     switch (mem_idx)                                                    \
     {                                                                   \
-    case 0: return (type) insn##_kernel(addr); break;                   \
-    case 1: return (type) insn##_super(addr); break;                    \
+    case 0: return (type) cpu_##insn##_kernel(env, addr); break;        \
+    case 1: return (type) cpu_##insn##_super(env, addr); break;         \
     default:                                                            \
-    case 2: return (type) insn##_user(addr); break;                     \
+    case 2: return (type) cpu_##insn##_user(env, addr); break;          \
     }                                                                   \
 }
 #endif
@@ -94,20 +153,22 @@
 
 #if defined(CONFIG_USER_ONLY)
 #define HELPER_ST(name, insn, type)                                     \
-static inline void do_##name(target_ulong addr, type val, int mem_idx)  \
+static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             type val, int mem_idx)                     \
 {                                                                       \
-    insn##_raw(addr, val);                                              \
+    cpu_##insn##_raw(env, addr, val);                                              \
 }
 #else
 #define HELPER_ST(name, insn, type)                                     \
-static inline void do_##name(target_ulong addr, type val, int mem_idx)  \
+static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             type val, int mem_idx)                     \
 {                                                                       \
     switch (mem_idx)                                                    \
     {                                                                   \
-    case 0: insn##_kernel(addr, val); break;                            \
-    case 1: insn##_super(addr, val); break;                             \
+    case 0: cpu_##insn##_kernel(env, addr, val); break;                 \
+    case 1: cpu_##insn##_super(env, addr, val); break;                  \
     default:                                                            \
-    case 2: insn##_user(addr, val); break;                              \
+    case 2: cpu_##insn##_user(env, addr, val); break;                   \
     }                                                                   \
 }
 #endif
@@ -141,135 +202,149 @@
 #endif /* TARGET_MIPS64 */
 
 /* 64 bits arithmetic for 32 bits hosts */
-static inline uint64_t get_HILO (void)
+static inline uint64_t get_HILO(CPUMIPSState *env)
 {
     return ((uint64_t)(env->active_tc.HI[0]) << 32) | (uint32_t)env->active_tc.LO[0];
 }
 
-static inline void set_HILO (uint64_t HILO)
+static inline void set_HILO (CPUMIPSState *env, uint64_t HILO)
 {
     env->active_tc.LO[0] = (int32_t)HILO;
     env->active_tc.HI[0] = (int32_t)(HILO >> 32);
 }
 
-static inline void set_HIT0_LO (target_ulong arg1, uint64_t HILO)
+static inline void set_HIT0_LO (CPUMIPSState *env, target_ulong arg1, uint64_t HILO)
 {
     env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
     arg1 = env->active_tc.HI[0] = (int32_t)(HILO >> 32);
 }
 
-static inline void set_HI_LOT0 (target_ulong arg1, uint64_t HILO)
+static inline void set_HI_LOT0 (CPUMIPSState *env, target_ulong arg1, uint64_t HILO)
 {
     arg1 = env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
     env->active_tc.HI[0] = (int32_t)(HILO >> 32);
 }
 
 /* Multiplication variants of the vr54xx. */
-target_ulong helper_muls (target_ulong arg1, target_ulong arg2)
+target_ulong helper_muls(CPUMIPSState *env, target_ulong arg1,
+                         target_ulong arg2)
 {
-    set_HI_LOT0(arg1, 0 - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
+    set_HI_LOT0(env, arg1, 0 - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
 
     return arg1;
 }
 
-target_ulong helper_mulsu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulsu(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
 {
-    set_HI_LOT0(arg1, 0 - ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
+    set_HI_LOT0(env, arg1, 0 - ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
 
     return arg1;
 }
 
-target_ulong helper_macc (target_ulong arg1, target_ulong arg2)
+target_ulong helper_macc(CPUMIPSState *env, target_ulong arg1,
+                         target_ulong arg2)
 {
-    set_HI_LOT0(arg1, ((int64_t)get_HILO()) + ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
+    set_HI_LOT0(env, arg1, ((int64_t)get_HILO(env)) + ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
 
     return arg1;
 }
 
-target_ulong helper_macchi (target_ulong arg1, target_ulong arg2)
+target_ulong helper_macchi(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
 {
-    set_HIT0_LO(arg1, ((int64_t)get_HILO()) + ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
+    set_HIT0_LO(env, arg1, ((int64_t)get_HILO(env)) + ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
 
     return arg1;
 }
 
-target_ulong helper_maccu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_maccu(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
 {
-    set_HI_LOT0(arg1, ((uint64_t)get_HILO()) + ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
+    set_HI_LOT0(env, arg1, ((uint64_t)get_HILO(env)) + ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
 
     return arg1;
 }
 
-target_ulong helper_macchiu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_macchiu(CPUMIPSState *env, target_ulong arg1,
+                            target_ulong arg2)
 {
-    set_HIT0_LO(arg1, ((uint64_t)get_HILO()) + ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
+    set_HIT0_LO(env, arg1, ((uint64_t)get_HILO(env)) + ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
 
     return arg1;
 }
 
-target_ulong helper_msac (target_ulong arg1, target_ulong arg2)
+target_ulong helper_msac(CPUMIPSState *env, target_ulong arg1,
+                         target_ulong arg2)
 {
-    set_HI_LOT0(arg1, ((int64_t)get_HILO()) - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
+    set_HI_LOT0(env, arg1, ((int64_t)get_HILO(env)) - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
 
     return arg1;
 }
 
-target_ulong helper_msachi (target_ulong arg1, target_ulong arg2)
+target_ulong helper_msachi(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
 {
-    set_HIT0_LO(arg1, ((int64_t)get_HILO()) - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
+    set_HIT0_LO(env, arg1, ((int64_t)get_HILO(env)) - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
 
     return arg1;
 }
 
-target_ulong helper_msacu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_msacu(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
 {
-    set_HI_LOT0(arg1, ((uint64_t)get_HILO()) - ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
+    set_HI_LOT0(env, arg1, ((uint64_t)get_HILO(env)) - ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
 
     return arg1;
 }
 
-target_ulong helper_msachiu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_msachiu(CPUMIPSState *env, target_ulong arg1,
+                            target_ulong arg2)
 {
-    set_HIT0_LO(arg1, ((uint64_t)get_HILO()) - ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
+    set_HIT0_LO(env, arg1, ((uint64_t)get_HILO(env)) - ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
 
     return arg1;
 }
 
-target_ulong helper_mulhi (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulhi(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
 {
-    set_HIT0_LO(arg1, (int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2);
+    set_HIT0_LO(env, arg1, (int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2);
 
     return arg1;
 }
 
-target_ulong helper_mulhiu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulhiu(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
 {
-    set_HIT0_LO(arg1, (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
+    set_HIT0_LO(env, arg1, (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 
     return arg1;
 }
 
-target_ulong helper_mulshi (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulshi(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
 {
-    set_HIT0_LO(arg1, 0 - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
+    set_HIT0_LO(env, arg1, 0 - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
 
     return arg1;
 }
 
-target_ulong helper_mulshiu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulshiu(CPUMIPSState *env, target_ulong arg1,
+                            target_ulong arg2)
 {
-    set_HIT0_LO(arg1, 0 - ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
+    set_HIT0_LO(env, arg1, 0 - ((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2));
 
     return arg1;
 }
 
 #ifdef TARGET_MIPS64
-void helper_dmult (target_ulong arg1, target_ulong arg2)
+void helper_dmult (CPUMIPSState *env, target_ulong arg1, target_ulong arg2)
 {
     muls64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2);
 }
 
-void helper_dmultu (target_ulong arg1, target_ulong arg2)
+void helper_dmultu (CPUMIPSState *env, target_ulong arg1, target_ulong arg2)
 {
     mulu64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2);
 }
@@ -277,7 +352,9 @@
 
 #ifndef CONFIG_USER_ONLY
 
-static inline hwaddr do_translate_address(target_ulong address, int rw)
+static inline hwaddr do_translate_address(CPUMIPSState *env,
+                                                      target_ulong address,
+                                                      int rw)
 {
     hwaddr lladdr;
 
@@ -291,11 +368,12 @@
 }
 
 #define HELPER_LD_ATOMIC(name, insn)                                          \
-target_ulong helper_##name(target_ulong arg, int mem_idx)                     \
+target_ulong helper_##name(CPUMIPSState *env, target_ulong arg, int mem_idx)  \
 {                                                                             \
-    env->lladdr = do_translate_address(arg, 0);                               \
-    env->llval = do_##insn(arg, mem_idx);                                     \
-    return env->llval;                                                        \
+    env->lladdr = do_translate_address(env, arg, 0);                        \
+    /* NOTE(digit): Use of 'cpu_single_env' works around compiler bug! */     \
+    cpu_single_env->llval = do_##insn(env, arg, mem_idx);                    \
+    return env->llval;                                                       \
 }
 HELPER_LD_ATOMIC(ll, lw)
 #ifdef TARGET_MIPS64
@@ -304,18 +382,19 @@
 #undef HELPER_LD_ATOMIC
 
 #define HELPER_ST_ATOMIC(name, ld_insn, st_insn, almask)                      \
-target_ulong helper_##name(target_ulong arg1, target_ulong arg2, int mem_idx) \
+target_ulong helper_##name(CPUMIPSState *env, target_ulong arg1,              \
+                           target_ulong arg2, int mem_idx)                    \
 {                                                                             \
     target_long tmp;                                                          \
                                                                               \
     if (arg2 & almask) {                                                      \
         env->CP0_BadVAddr = arg2;                                             \
-        helper_raise_exception(EXCP_AdES);                                    \
+        helper_raise_exception(env, EXCP_AdES);                               \
     }                                                                         \
-    if (do_translate_address(arg2, 1) == env->lladdr) {                       \
-        tmp = do_##ld_insn(arg2, mem_idx);                                    \
+    if (do_translate_address(env, arg2, 1) == env->lladdr) {                  \
+        tmp = do_##ld_insn(env, arg2, mem_idx);                               \
         if (tmp == env->llval) {                                              \
-            do_##st_insn(arg2, arg1, mem_idx);                                \
+            do_##st_insn(env, arg2, arg1, mem_idx);                           \
             return 1;                                                         \
         }                                                                     \
     }                                                                         \
@@ -336,80 +415,84 @@
 #define GET_OFFSET(addr, offset) (addr - (offset))
 #endif
 
-target_ulong helper_lwl(target_ulong arg1, target_ulong arg2, int mem_idx)
+target_ulong helper_lwl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                        int mem_idx)
 {
     target_ulong tmp;
 
-    tmp = do_lbu(arg2, mem_idx);
+    tmp = do_lbu(env, arg2, mem_idx);
     arg1 = (arg1 & 0x00FFFFFF) | (tmp << 24);
 
     if (GET_LMASK(arg2) <= 2) {
-        tmp = do_lbu(GET_OFFSET(arg2, 1), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 1), mem_idx);
         arg1 = (arg1 & 0xFF00FFFF) | (tmp << 16);
     }
 
     if (GET_LMASK(arg2) <= 1) {
-        tmp = do_lbu(GET_OFFSET(arg2, 2), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 2), mem_idx);
         arg1 = (arg1 & 0xFFFF00FF) | (tmp << 8);
     }
 
     if (GET_LMASK(arg2) == 0) {
-        tmp = do_lbu(GET_OFFSET(arg2, 3), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 3), mem_idx);
         arg1 = (arg1 & 0xFFFFFF00) | tmp;
     }
     return (int32_t)arg1;
 }
 
-target_ulong helper_lwr(target_ulong arg1, target_ulong arg2, int mem_idx)
+target_ulong helper_lwr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                        int mem_idx)
 {
     target_ulong tmp;
 
-    tmp = do_lbu(arg2, mem_idx);
+    tmp = do_lbu(env, arg2, mem_idx);
     arg1 = (arg1 & 0xFFFFFF00) | tmp;
 
     if (GET_LMASK(arg2) >= 1) {
-        tmp = do_lbu(GET_OFFSET(arg2, -1), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -1), mem_idx);
         arg1 = (arg1 & 0xFFFF00FF) | (tmp << 8);
     }
 
     if (GET_LMASK(arg2) >= 2) {
-        tmp = do_lbu(GET_OFFSET(arg2, -2), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -2), mem_idx);
         arg1 = (arg1 & 0xFF00FFFF) | (tmp << 16);
     }
 
     if (GET_LMASK(arg2) == 3) {
-        tmp = do_lbu(GET_OFFSET(arg2, -3), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -3), mem_idx);
         arg1 = (arg1 & 0x00FFFFFF) | (tmp << 24);
     }
     return (int32_t)arg1;
 }
 
-void helper_swl(target_ulong arg1, target_ulong arg2, int mem_idx)
+void helper_swl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
 {
-    do_sb(arg2, (uint8_t)(arg1 >> 24), mem_idx);
+    do_sb(env, arg2, (uint8_t)(arg1 >> 24), mem_idx);
 
     if (GET_LMASK(arg2) <= 2)
-        do_sb(GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 16), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 16), mem_idx);
 
     if (GET_LMASK(arg2) <= 1)
-        do_sb(GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 8), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 8), mem_idx);
 
     if (GET_LMASK(arg2) == 0)
-        do_sb(GET_OFFSET(arg2, 3), (uint8_t)arg1, mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 3), (uint8_t)arg1, mem_idx);
 }
 
-void helper_swr(target_ulong arg1, target_ulong arg2, int mem_idx)
+void helper_swr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
 {
-    do_sb(arg2, (uint8_t)arg1, mem_idx);
+    do_sb(env, arg2, (uint8_t)arg1, mem_idx);
 
     if (GET_LMASK(arg2) >= 1)
-        do_sb(GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8), mem_idx);
 
     if (GET_LMASK(arg2) >= 2)
-        do_sb(GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16), mem_idx);
 
     if (GET_LMASK(arg2) == 3)
-        do_sb(GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24), mem_idx);
 }
 
 #if defined(TARGET_MIPS64)
@@ -422,25 +505,26 @@
 #define GET_LMASK64(v) (((v) & 7) ^ 7)
 #endif
 
-target_ulong helper_ldl(target_ulong arg1, target_ulong arg2, int mem_idx)
+target_ulong helper_ldl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                        int mem_idx)
 {
     uint64_t tmp;
 
-    tmp = do_lbu(arg2, mem_idx);
+    tmp = do_lbu(env, arg2, mem_idx);
     arg1 = (arg1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
 
     if (GET_LMASK64(arg2) <= 6) {
-        tmp = do_lbu(GET_OFFSET(arg2, 1), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 1), mem_idx);
         arg1 = (arg1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
     }
 
     if (GET_LMASK64(arg2) <= 5) {
-        tmp = do_lbu(GET_OFFSET(arg2, 2), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 2), mem_idx);
         arg1 = (arg1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
     }
 
     if (GET_LMASK64(arg2) <= 4) {
-        tmp = do_lbu(GET_OFFSET(arg2, 3), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 3), mem_idx);
         arg1 = (arg1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
     }
 
@@ -450,301 +534,412 @@
     }
 
     if (GET_LMASK64(arg2) <= 2) {
-        tmp = do_lbu(GET_OFFSET(arg2, 5), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 5), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
     }
 
     if (GET_LMASK64(arg2) <= 1) {
-        tmp = do_lbu(GET_OFFSET(arg2, 6), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 6), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFFFF00FFULL) | (tmp << 8);
     }
 
     if (GET_LMASK64(arg2) == 0) {
-        tmp = do_lbu(GET_OFFSET(arg2, 7), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 7), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
     }
 
     return arg1;
 }
 
-target_ulong helper_ldr(target_ulong arg1, target_ulong arg2, int mem_idx)
+target_ulong helper_ldr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                        int mem_idx)
 {
     uint64_t tmp;
 
-    tmp = do_lbu(arg2, mem_idx);
+    tmp = do_lbu(env, arg2, mem_idx);
     arg1 = (arg1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
 
     if (GET_LMASK64(arg2) >= 1) {
-        tmp = do_lbu(GET_OFFSET(arg2, -1), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -1), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFFFF00FFULL) | (tmp  << 8);
     }
 
     if (GET_LMASK64(arg2) >= 2) {
-        tmp = do_lbu(GET_OFFSET(arg2, -2), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -2), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
     }
 
     if (GET_LMASK64(arg2) >= 3) {
-        tmp = do_lbu(GET_OFFSET(arg2, -3), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -3), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
     }
 
     if (GET_LMASK64(arg2) >= 4) {
-        tmp = do_lbu(GET_OFFSET(arg2, -4), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -4), mem_idx);
         arg1 = (arg1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
     }
 
     if (GET_LMASK64(arg2) >= 5) {
-        tmp = do_lbu(GET_OFFSET(arg2, -5), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -5), mem_idx);
         arg1 = (arg1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
     }
 
     if (GET_LMASK64(arg2) >= 6) {
-        tmp = do_lbu(GET_OFFSET(arg2, -6), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -6), mem_idx);
         arg1 = (arg1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
     }
 
     if (GET_LMASK64(arg2) == 7) {
-        tmp = do_lbu(GET_OFFSET(arg2, -7), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -7), mem_idx);
         arg1 = (arg1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
     }
 
     return arg1;
 }
 
-void helper_sdl(target_ulong arg1, target_ulong arg2, int mem_idx)
+void helper_sdl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                 int mem_idx)
 {
-    do_sb(arg2, (uint8_t)(arg1 >> 56), mem_idx);
+    do_sb(env, arg2, (uint8_t)(arg1 >> 56), mem_idx);
 
     if (GET_LMASK64(arg2) <= 6)
-        do_sb(GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 48), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 48), mem_idx);
 
     if (GET_LMASK64(arg2) <= 5)
-        do_sb(GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 40), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 40), mem_idx);
 
     if (GET_LMASK64(arg2) <= 4)
-        do_sb(GET_OFFSET(arg2, 3), (uint8_t)(arg1 >> 32), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 3), (uint8_t)(arg1 >> 32), mem_idx);
 
     if (GET_LMASK64(arg2) <= 3)
-        do_sb(GET_OFFSET(arg2, 4), (uint8_t)(arg1 >> 24), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 4), (uint8_t)(arg1 >> 24), mem_idx);
 
     if (GET_LMASK64(arg2) <= 2)
-        do_sb(GET_OFFSET(arg2, 5), (uint8_t)(arg1 >> 16), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 5), (uint8_t)(arg1 >> 16), mem_idx);
 
     if (GET_LMASK64(arg2) <= 1)
-        do_sb(GET_OFFSET(arg2, 6), (uint8_t)(arg1 >> 8), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 6), (uint8_t)(arg1 >> 8), mem_idx);
 
     if (GET_LMASK64(arg2) <= 0)
-        do_sb(GET_OFFSET(arg2, 7), (uint8_t)arg1, mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 7), (uint8_t)arg1, mem_idx);
 }
 
-void helper_sdr(target_ulong arg1, target_ulong arg2, int mem_idx)
+void helper_sdr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                 int mem_idx)
 {
-    do_sb(arg2, (uint8_t)arg1, mem_idx);
+    do_sb(env, arg2, (uint8_t)arg1, mem_idx);
 
     if (GET_LMASK64(arg2) >= 1)
-        do_sb(GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8), mem_idx);
 
     if (GET_LMASK64(arg2) >= 2)
-        do_sb(GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16), mem_idx);
 
     if (GET_LMASK64(arg2) >= 3)
-        do_sb(GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24), mem_idx);
 
     if (GET_LMASK64(arg2) >= 4)
-        do_sb(GET_OFFSET(arg2, -4), (uint8_t)(arg1 >> 32), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -4), (uint8_t)(arg1 >> 32), mem_idx);
 
     if (GET_LMASK64(arg2) >= 5)
-        do_sb(GET_OFFSET(arg2, -5), (uint8_t)(arg1 >> 40), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -5), (uint8_t)(arg1 >> 40), mem_idx);
 
     if (GET_LMASK64(arg2) >= 6)
-        do_sb(GET_OFFSET(arg2, -6), (uint8_t)(arg1 >> 48), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -6), (uint8_t)(arg1 >> 48), mem_idx);
 
     if (GET_LMASK64(arg2) == 7)
-        do_sb(GET_OFFSET(arg2, -7), (uint8_t)(arg1 >> 56), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -7), (uint8_t)(arg1 >> 56), mem_idx);
 }
 #endif /* TARGET_MIPS64 */
 
 #ifndef CONFIG_USER_ONLY
+/* tc should point to an int with the value of the global TC index.
+   This function will transform it into a local index within the
+   returned CPUState.
+
+   FIXME: This code assumes that all VPEs have the same number of TCs,
+          which depends on runtime setup. Can probably be fixed by
+          walking the list of CPUMIPSStates.  */
+static CPUMIPSState *mips_cpu_map_tc(CPUMIPSState *env, int *tc)
+{
+    int vpe_idx, nr_threads = ENV_GET_CPU(env)->nr_threads;
+    int tc_idx = *tc;
+
+    if (!(env->CP0_VPEConf0 & (1 << CP0VPEC0_MVP))) {
+        /* Not allowed to address other CPUs.  */
+        *tc = env->current_tc;
+        return env;
+    }
+
+    vpe_idx = tc_idx / nr_threads;
+    *tc = tc_idx % nr_threads;
+    CPUState *other = qemu_get_cpu(vpe_idx);
+    return other ? other->env_ptr : env;
+}
+
+/* The per VPE CP0_Status register shares some fields with the per TC
+   CP0_TCStatus registers. These fields are wired to the same registers,
+   so changes to either of them should be reflected on both registers.
+
+   Also, EntryHi shares the bottom 8 bit ASID with TCStauts.
+
+   These helper call synchronizes the regs for a given cpu.  */
+
+/* Called for updates to CP0_Status.  */
+static void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc)
+{
+    int32_t tcstatus, *tcst;
+    uint32_t v = cpu->CP0_Status;
+    uint32_t cu, mx, asid, ksu;
+    uint32_t mask = ((1 << CP0TCSt_TCU3)
+                       | (1 << CP0TCSt_TCU2)
+                       | (1 << CP0TCSt_TCU1)
+                       | (1 << CP0TCSt_TCU0)
+                       | (1 << CP0TCSt_TMX)
+                       | (3 << CP0TCSt_TKSU)
+                       | (0xff << CP0TCSt_TASID));
+
+    cu = (v >> CP0St_CU0) & 0xf;
+    mx = (v >> CP0St_MX) & 0x1;
+    ksu = (v >> CP0St_KSU) & 0x3;
+    asid = env->CP0_EntryHi & 0xff;
+
+    tcstatus = cu << CP0TCSt_TCU0;
+    tcstatus |= mx << CP0TCSt_TMX;
+    tcstatus |= ksu << CP0TCSt_TKSU;
+    tcstatus |= asid;
+
+    if (tc == cpu->current_tc) {
+        tcst = &cpu->active_tc.CP0_TCStatus;
+    } else {
+        tcst = &cpu->tcs[tc].CP0_TCStatus;
+    }
+
+    *tcst &= ~mask;
+    *tcst |= tcstatus;
+    compute_hflags(cpu);
+}
+
+/* Called for updates to CP0_TCStatus.  */
+static void sync_c0_tcstatus(CPUMIPSState *cpu, int tc,
+                             target_ulong v)
+{
+    uint32_t status;
+    uint32_t tcu, tmx, tasid, tksu;
+    uint32_t mask = ((1 << CP0St_CU3)
+                       | (1 << CP0St_CU2)
+                       | (1 << CP0St_CU1)
+                       | (1 << CP0St_CU0)
+                       | (1 << CP0St_MX)
+                       | (3 << CP0St_KSU));
+
+    tcu = (v >> CP0TCSt_TCU0) & 0xf;
+    tmx = (v >> CP0TCSt_TMX) & 0x1;
+    tasid = v & 0xff;
+    tksu = (v >> CP0TCSt_TKSU) & 0x3;
+
+    status = tcu << CP0St_CU0;
+    status |= tmx << CP0St_MX;
+    status |= tksu << CP0St_KSU;
+
+    cpu->CP0_Status &= ~mask;
+    cpu->CP0_Status |= status;
+
+    /* Sync the TASID with EntryHi.  */
+    cpu->CP0_EntryHi &= ~0xff;
+    cpu->CP0_EntryHi = tasid;
+
+    compute_hflags(cpu);
+}
+
+/* Called for updates to CP0_EntryHi.  */
+static void sync_c0_entryhi(CPUMIPSState *cpu, int tc)
+{
+    int32_t *tcst;
+    uint32_t asid, v = cpu->CP0_EntryHi;
+
+    asid = v & 0xff;
+
+    if (tc == cpu->current_tc) {
+        tcst = &cpu->active_tc.CP0_TCStatus;
+    } else {
+        tcst = &cpu->tcs[tc].CP0_TCStatus;
+    }
+
+    *tcst &= ~0xff;
+    *tcst |= asid;
+}
+
 /* CP0 helpers */
-target_ulong helper_mfc0_mvpcontrol (void)
+target_ulong helper_mfc0_mvpcontrol(CPUMIPSState *env)
 {
     return env->mvp->CP0_MVPControl;
 }
 
-target_ulong helper_mfc0_mvpconf0 (void)
+target_ulong helper_mfc0_mvpconf0(CPUMIPSState *env)
 {
     return env->mvp->CP0_MVPConf0;
 }
 
-target_ulong helper_mfc0_mvpconf1 (void)
+target_ulong helper_mfc0_mvpconf1(CPUMIPSState *env)
 {
     return env->mvp->CP0_MVPConf1;
 }
 
-target_ulong helper_mfc0_random (void)
+target_ulong helper_mfc0_random(CPUMIPSState *env)
 {
     return (int32_t)cpu_mips_get_random(env);
 }
 
-target_ulong helper_mfc0_tcstatus (void)
+target_ulong helper_mfc0_tcstatus(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCStatus;
 }
 
-target_ulong helper_mftc0_tcstatus(void)
+target_ulong helper_mftc0_tcstatus(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        return env->active_tc.CP0_TCStatus;
+    if (other_tc == other->current_tc)
+        return other->active_tc.CP0_TCStatus;
     else
-        return env->tcs[other_tc].CP0_TCStatus;
+        return other->tcs[other_tc].CP0_TCStatus;
 }
 
-target_ulong helper_mfc0_tcbind (void)
+target_ulong helper_mfc0_tcbind(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCBind;
 }
 
-target_ulong helper_mftc0_tcbind(void)
+target_ulong helper_mftc0_tcbind(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        return env->active_tc.CP0_TCBind;
+    if (other_tc == other->current_tc)
+        return other->active_tc.CP0_TCBind;
     else
-        return env->tcs[other_tc].CP0_TCBind;
+        return other->tcs[other_tc].CP0_TCBind;
 }
 
-target_ulong helper_mfc0_tcrestart (void)
+target_ulong helper_mfc0_tcrestart(CPUMIPSState *env)
 {
     return env->active_tc.PC;
 }
 
-target_ulong helper_mftc0_tcrestart(void)
+target_ulong helper_mftc0_tcrestart(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        return env->active_tc.PC;
+    if (other_tc == other->current_tc)
+        return other->active_tc.PC;
     else
-        return env->tcs[other_tc].PC;
+        return other->tcs[other_tc].PC;
 }
 
-target_ulong helper_mfc0_tchalt (void)
+target_ulong helper_mfc0_tchalt(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCHalt;
 }
 
-target_ulong helper_mftc0_tchalt(void)
+target_ulong helper_mftc0_tchalt(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        return env->active_tc.CP0_TCHalt;
+    if (other_tc == other->current_tc)
+        return other->active_tc.CP0_TCHalt;
     else
-        return env->tcs[other_tc].CP0_TCHalt;
+        return other->tcs[other_tc].CP0_TCHalt;
 }
 
-target_ulong helper_mfc0_tccontext (void)
+target_ulong helper_mfc0_tccontext(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCContext;
 }
 
-target_ulong helper_mftc0_tccontext(void)
+target_ulong helper_mftc0_tccontext(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        return env->active_tc.CP0_TCContext;
+    if (other_tc == other->current_tc)
+        return other->active_tc.CP0_TCContext;
     else
-        return env->tcs[other_tc].CP0_TCContext;
+        return other->tcs[other_tc].CP0_TCContext;
 }
 
-target_ulong helper_mfc0_tcschedule (void)
+target_ulong helper_mfc0_tcschedule(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCSchedule;
 }
 
-target_ulong helper_mftc0_tcschedule(void)
+target_ulong helper_mftc0_tcschedule(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        return env->active_tc.CP0_TCSchedule;
+    if (other_tc == other->current_tc)
+        return other->active_tc.CP0_TCSchedule;
     else
-        return env->tcs[other_tc].CP0_TCSchedule;
+        return other->tcs[other_tc].CP0_TCSchedule;
 }
 
-target_ulong helper_mfc0_tcschefback (void)
+target_ulong helper_mfc0_tcschefback(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCScheFBack;
 }
 
-target_ulong helper_mftc0_tcschefback(void)
+target_ulong helper_mftc0_tcschefback(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        return env->active_tc.CP0_TCScheFBack;
+    if (other_tc == other->current_tc)
+        return other->active_tc.CP0_TCScheFBack;
     else
-        return env->tcs[other_tc].CP0_TCScheFBack;
+        return other->tcs[other_tc].CP0_TCScheFBack;
 }
 
-target_ulong helper_mfc0_count (void)
+target_ulong helper_mfc0_count(CPUMIPSState *env)
 {
     return (int32_t)cpu_mips_get_count(env);
 }
 
-target_ulong helper_mftc0_entryhi(void)
+target_ulong helper_mftc0_entryhi(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    int32_t tcstatus;
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        tcstatus = env->active_tc.CP0_TCStatus;
-    else
-        tcstatus = env->tcs[other_tc].CP0_TCStatus;
-
-    return (env->CP0_EntryHi & ~0xff) | (tcstatus & 0xff);
+    return other->CP0_EntryHi;
 }
 
-target_ulong helper_mftc0_status(void)
+target_ulong helper_mftc0_status(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    target_ulong t0;
-    int32_t tcstatus;
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        tcstatus = env->active_tc.CP0_TCStatus;
-    else
-        tcstatus = env->tcs[other_tc].CP0_TCStatus;
-
-    t0 = env->CP0_Status & ~0xf1000018;
-    t0 |= tcstatus & (0xf << CP0TCSt_TCU0);
-    t0 |= (tcstatus & (1 << CP0TCSt_TMX)) >> (CP0TCSt_TMX - CP0St_MX);
-    t0 |= (tcstatus & (0x3 << CP0TCSt_TKSU)) >> (CP0TCSt_TKSU - CP0St_KSU);
-
-    return t0;
+    return other->CP0_Status;
 }
 
-target_ulong helper_mfc0_lladdr (void)
+target_ulong helper_mfc0_lladdr(CPUMIPSState *env)
 {
     return (int32_t)(env->lladdr >> env->CP0_LLAddr_shift);
 }
 
-target_ulong helper_mfc0_watchlo (uint32_t sel)
+target_ulong helper_mfc0_watchlo(CPUMIPSState *env, uint32_t sel)
 {
     return (int32_t)env->CP0_WatchLo[sel];
 }
 
-target_ulong helper_mfc0_watchhi (uint32_t sel)
+target_ulong helper_mfc0_watchhi(CPUMIPSState *env, uint32_t sel)
 {
     return env->CP0_WatchHi[sel];
 }
 
-target_ulong helper_mfc0_debug (void)
+target_ulong helper_mfc0_debug(CPUMIPSState *env)
 {
     target_ulong t0 = env->CP0_Debug;
     if (env->hflags & MIPS_HFLAG_DM)
@@ -753,59 +948,60 @@
     return t0;
 }
 
-target_ulong helper_mftc0_debug(void)
+target_ulong helper_mftc0_debug(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     int32_t tcstatus;
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        tcstatus = env->active_tc.CP0_Debug_tcstatus;
+    if (other_tc == other->current_tc)
+        tcstatus = other->active_tc.CP0_Debug_tcstatus;
     else
-        tcstatus = env->tcs[other_tc].CP0_Debug_tcstatus;
+        tcstatus = other->tcs[other_tc].CP0_Debug_tcstatus;
 
     /* XXX: Might be wrong, check with EJTAG spec. */
-    return (env->CP0_Debug & ~((1 << CP0DB_SSt) | (1 << CP0DB_Halt))) |
+    return (other->CP0_Debug & ~((1 << CP0DB_SSt) | (1 << CP0DB_Halt))) |
             (tcstatus & ((1 << CP0DB_SSt) | (1 << CP0DB_Halt)));
 }
 
 #if defined(TARGET_MIPS64)
-target_ulong helper_dmfc0_tcrestart (void)
+target_ulong helper_dmfc0_tcrestart(CPUMIPSState *env)
 {
     return env->active_tc.PC;
 }
 
-target_ulong helper_dmfc0_tchalt (void)
+target_ulong helper_dmfc0_tchalt(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCHalt;
 }
 
-target_ulong helper_dmfc0_tccontext (void)
+target_ulong helper_dmfc0_tccontext(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCContext;
 }
 
-target_ulong helper_dmfc0_tcschedule (void)
+target_ulong helper_dmfc0_tcschedule(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCSchedule;
 }
 
-target_ulong helper_dmfc0_tcschefback (void)
+target_ulong helper_dmfc0_tcschefback(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCScheFBack;
 }
 
-target_ulong helper_dmfc0_lladdr (void)
+target_ulong helper_dmfc0_lladdr(CPUMIPSState *env)
 {
     return env->lladdr >> env->CP0_LLAddr_shift;
 }
 
-target_ulong helper_dmfc0_watchlo (uint32_t sel)
+target_ulong helper_dmfc0_watchlo(CPUMIPSState *env, uint32_t sel)
 {
     return env->CP0_WatchLo[sel];
 }
 #endif /* TARGET_MIPS64 */
 
-void helper_mtc0_index (target_ulong arg1)
+void helper_mtc0_index(CPUMIPSState *env, target_ulong arg1)
 {
     int num = 1;
     unsigned int tmp = env->tlb->nb_tlb;
@@ -817,7 +1013,7 @@
     env->CP0_Index = (env->CP0_Index & 0x80000000) | (arg1 & (num - 1));
 }
 
-void helper_mtc0_mvpcontrol (target_ulong arg1)
+void helper_mtc0_mvpcontrol(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0;
     uint32_t newval;
@@ -834,7 +1030,7 @@
     env->mvp->CP0_MVPControl = newval;
 }
 
-void helper_mtc0_vpecontrol (target_ulong arg1)
+void helper_mtc0_vpecontrol(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask;
     uint32_t newval;
@@ -851,7 +1047,7 @@
     env->CP0_VPEControl = newval;
 }
 
-void helper_mtc0_vpeconf0 (target_ulong arg1)
+void helper_mtc0_vpeconf0(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0;
     uint32_t newval;
@@ -868,7 +1064,7 @@
     env->CP0_VPEConf0 = newval;
 }
 
-void helper_mtc0_vpeconf1 (target_ulong arg1)
+void helper_mtc0_vpeconf1(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0;
     uint32_t newval;
@@ -886,49 +1082,48 @@
     env->CP0_VPEConf1 = newval;
 }
 
-void helper_mtc0_yqmask (target_ulong arg1)
+void helper_mtc0_yqmask(CPUMIPSState *env, target_ulong arg1)
 {
     /* Yield qualifier inputs not implemented. */
     env->CP0_YQMask = 0x00000000;
 }
 
-void helper_mtc0_vpeopt (target_ulong arg1)
+void helper_mtc0_vpeopt(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_VPEOpt = arg1 & 0x0000ffff;
 }
 
-void helper_mtc0_entrylo0 (target_ulong arg1)
+void helper_mtc0_entrylo0(CPUMIPSState *env, target_ulong arg1)
 {
     /* Large physaddr (PABITS) not implemented */
     /* 1k pages not implemented */
     env->CP0_EntryLo0 = arg1 & 0x3FFFFFFF;
 }
 
-void helper_mtc0_tcstatus (target_ulong arg1)
+void helper_mtc0_tcstatus(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = env->CP0_TCStatus_rw_bitmask;
     uint32_t newval;
 
     newval = (env->active_tc.CP0_TCStatus & ~mask) | (arg1 & mask);
 
-    // TODO: Sync with CP0_Status.
-
     env->active_tc.CP0_TCStatus = newval;
+    sync_c0_tcstatus(env, env->current_tc, newval);
 }
 
-void helper_mttc0_tcstatus (target_ulong arg1)
+void helper_mttc0_tcstatus(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    // TODO: Sync with CP0_Status.
-
-    if (other_tc == env->current_tc)
-        env->active_tc.CP0_TCStatus = arg1;
+    if (other_tc == other->current_tc)
+        other->active_tc.CP0_TCStatus = arg1;
     else
-        env->tcs[other_tc].CP0_TCStatus = arg1;
+        other->tcs[other_tc].CP0_TCStatus = arg1;
+    sync_c0_tcstatus(other, other_tc, arg1);
 }
 
-void helper_mtc0_tcbind (target_ulong arg1)
+void helper_mtc0_tcbind(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = (1 << CP0TCBd_TBE);
     uint32_t newval;
@@ -939,24 +1134,25 @@
     env->active_tc.CP0_TCBind = newval;
 }
 
-void helper_mttc0_tcbind (target_ulong arg1)
+void helper_mttc0_tcbind(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     uint32_t mask = (1 << CP0TCBd_TBE);
     uint32_t newval;
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (env->mvp->CP0_MVPControl & (1 << CP0MVPCo_VPC))
+    if (other->mvp->CP0_MVPControl & (1 << CP0MVPCo_VPC))
         mask |= (1 << CP0TCBd_CurVPE);
-    if (other_tc == env->current_tc) {
-        newval = (env->active_tc.CP0_TCBind & ~mask) | (arg1 & mask);
-        env->active_tc.CP0_TCBind = newval;
+    if (other_tc == other->current_tc) {
+        newval = (other->active_tc.CP0_TCBind & ~mask) | (arg1 & mask);
+        other->active_tc.CP0_TCBind = newval;
     } else {
-        newval = (env->tcs[other_tc].CP0_TCBind & ~mask) | (arg1 & mask);
-        env->tcs[other_tc].CP0_TCBind = newval;
+        newval = (other->tcs[other_tc].CP0_TCBind & ~mask) | (arg1 & mask);
+        other->tcs[other_tc].CP0_TCBind = newval;
     }
 }
 
-void helper_mtc0_tcrestart (target_ulong arg1)
+void helper_mtc0_tcrestart(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.PC = arg1;
     env->active_tc.CP0_TCStatus &= ~(1 << CP0TCSt_TDS);
@@ -964,106 +1160,111 @@
     /* MIPS16 not implemented. */
 }
 
-void helper_mttc0_tcrestart (target_ulong arg1)
+void helper_mttc0_tcrestart(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc) {
-        env->active_tc.PC = arg1;
-        env->active_tc.CP0_TCStatus &= ~(1 << CP0TCSt_TDS);
-        env->lladdr = 0ULL;
+    if (other_tc == other->current_tc) {
+        other->active_tc.PC = arg1;
+        other->active_tc.CP0_TCStatus &= ~(1 << CP0TCSt_TDS);
+        other->lladdr = 0ULL;
         /* MIPS16 not implemented. */
     } else {
-        env->tcs[other_tc].PC = arg1;
-        env->tcs[other_tc].CP0_TCStatus &= ~(1 << CP0TCSt_TDS);
-        env->lladdr = 0ULL;
+        other->tcs[other_tc].PC = arg1;
+        other->tcs[other_tc].CP0_TCStatus &= ~(1 << CP0TCSt_TDS);
+        other->lladdr = 0ULL;
         /* MIPS16 not implemented. */
     }
 }
 
-void helper_mtc0_tchalt (target_ulong arg1)
+void helper_mtc0_tchalt(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.CP0_TCHalt = arg1 & 0x1;
 
     // TODO: Halt TC / Restart (if allocated+active) TC.
 }
 
-void helper_mttc0_tchalt (target_ulong arg1)
+void helper_mttc0_tchalt(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     // TODO: Halt TC / Restart (if allocated+active) TC.
 
-    if (other_tc == env->current_tc)
-        env->active_tc.CP0_TCHalt = arg1;
+    if (other_tc == other->current_tc)
+        other->active_tc.CP0_TCHalt = arg1;
     else
-        env->tcs[other_tc].CP0_TCHalt = arg1;
+        other->tcs[other_tc].CP0_TCHalt = arg1;
 }
 
-void helper_mtc0_tccontext (target_ulong arg1)
+void helper_mtc0_tccontext(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.CP0_TCContext = arg1;
 }
 
-void helper_mttc0_tccontext (target_ulong arg1)
+void helper_mttc0_tccontext(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        env->active_tc.CP0_TCContext = arg1;
+    if (other_tc == other->current_tc)
+        other->active_tc.CP0_TCContext = arg1;
     else
-        env->tcs[other_tc].CP0_TCContext = arg1;
+        other->tcs[other_tc].CP0_TCContext = arg1;
 }
 
-void helper_mtc0_tcschedule (target_ulong arg1)
+void helper_mtc0_tcschedule(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.CP0_TCSchedule = arg1;
 }
 
-void helper_mttc0_tcschedule (target_ulong arg1)
+void helper_mttc0_tcschedule(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        env->active_tc.CP0_TCSchedule = arg1;
+    if (other_tc == other->current_tc)
+        other->active_tc.CP0_TCSchedule = arg1;
     else
-        env->tcs[other_tc].CP0_TCSchedule = arg1;
+        other->tcs[other_tc].CP0_TCSchedule = arg1;
 }
 
-void helper_mtc0_tcschefback (target_ulong arg1)
+void helper_mtc0_tcschefback(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.CP0_TCScheFBack = arg1;
 }
 
-void helper_mttc0_tcschefback (target_ulong arg1)
+void helper_mttc0_tcschefback(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        env->active_tc.CP0_TCScheFBack = arg1;
+    if (other_tc == other->current_tc)
+        other->active_tc.CP0_TCScheFBack = arg1;
     else
-        env->tcs[other_tc].CP0_TCScheFBack = arg1;
+        other->tcs[other_tc].CP0_TCScheFBack = arg1;
 }
 
-void helper_mtc0_entrylo1 (target_ulong arg1)
+void helper_mtc0_entrylo1(CPUMIPSState *env, target_ulong arg1)
 {
     /* Large physaddr (PABITS) not implemented */
     /* 1k pages not implemented */
     env->CP0_EntryLo1 = arg1 & 0x3FFFFFFF;
 }
 
-void helper_mtc0_context (target_ulong arg1)
+void helper_mtc0_context(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Context = (env->CP0_Context & 0x007FFFFF) | (arg1 & ~0x007FFFFF);
 }
 
-void helper_mtc0_pagemask (target_ulong arg1)
+void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1)
 {
     /* 1k pages not implemented */
     env->CP0_PageMask = arg1 & (0x1FFFFFFF & (TARGET_PAGE_MASK << 1));
 }
 
-void helper_mtc0_pagegrain (target_ulong arg1)
+void helper_mtc0_pagegrain(CPUMIPSState *env, target_ulong arg1)
 {
     /* SmartMIPS not implemented */
     /* Large physaddr (PABITS) not implemented */
@@ -1071,47 +1272,47 @@
     env->CP0_PageGrain = 0;
 }
 
-void helper_mtc0_wired (target_ulong arg1)
+void helper_mtc0_wired(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Wired = arg1 % env->tlb->nb_tlb;
 }
 
-void helper_mtc0_srsconf0 (target_ulong arg1)
+void helper_mtc0_srsconf0(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf0 |= arg1 & env->CP0_SRSConf0_rw_bitmask;
 }
 
-void helper_mtc0_srsconf1 (target_ulong arg1)
+void helper_mtc0_srsconf1(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf1 |= arg1 & env->CP0_SRSConf1_rw_bitmask;
 }
 
-void helper_mtc0_srsconf2 (target_ulong arg1)
+void helper_mtc0_srsconf2(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf2 |= arg1 & env->CP0_SRSConf2_rw_bitmask;
 }
 
-void helper_mtc0_srsconf3 (target_ulong arg1)
+void helper_mtc0_srsconf3(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf3 |= arg1 & env->CP0_SRSConf3_rw_bitmask;
 }
 
-void helper_mtc0_srsconf4 (target_ulong arg1)
+void helper_mtc0_srsconf4(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf4 |= arg1 & env->CP0_SRSConf4_rw_bitmask;
 }
 
-void helper_mtc0_hwrena (target_ulong arg1)
+void helper_mtc0_hwrena(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_HWREna = arg1 & 0x0000000F;
 }
 
-void helper_mtc0_count (target_ulong arg1)
+void helper_mtc0_count(CPUMIPSState *env, target_ulong arg1)
 {
     cpu_mips_store_count(env, arg1);
 }
 
-void helper_mtc0_entryhi (target_ulong arg1)
+void helper_mtc0_entryhi(CPUMIPSState *env, target_ulong arg1)
 {
     target_ulong old, val;
 
@@ -1123,35 +1324,28 @@
     old = env->CP0_EntryHi;
     env->CP0_EntryHi = val;
     if (env->CP0_Config3 & (1 << CP0C3_MT)) {
-        uint32_t tcst = env->active_tc.CP0_TCStatus & ~0xff;
-        env->active_tc.CP0_TCStatus = tcst | (val & 0xff);
+        sync_c0_entryhi(env, env->current_tc);
     }
     /* If the ASID changes, flush qemu's TLB.  */
     if ((old & 0xFF) != (val & 0xFF))
         cpu_mips_tlb_flush(env, 1);
 }
 
-void helper_mttc0_entryhi(target_ulong arg1)
+void helper_mttc0_entryhi(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    int32_t tcstatus;
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    env->CP0_EntryHi = (env->CP0_EntryHi & 0xff) | (arg1 & ~0xff);
-    if (other_tc == env->current_tc) {
-        tcstatus = (env->active_tc.CP0_TCStatus & ~0xff) | (arg1 & 0xff);
-        env->active_tc.CP0_TCStatus = tcstatus;
-    } else {
-        tcstatus = (env->tcs[other_tc].CP0_TCStatus & ~0xff) | (arg1 & 0xff);
-        env->tcs[other_tc].CP0_TCStatus = tcstatus;
-    }
+    other->CP0_EntryHi = arg1;
+    sync_c0_entryhi(other, other_tc);
 }
 
-void helper_mtc0_compare (target_ulong arg1)
+void helper_mtc0_compare(CPUMIPSState *env, target_ulong arg1)
 {
     cpu_mips_store_compare(env, arg1);
 }
 
-void helper_mtc0_status (target_ulong arg1)
+void helper_mtc0_status(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t val, old;
     uint32_t mask = env->CP0_Status_rw_bitmask;
@@ -1159,7 +1353,12 @@
     val = arg1 & mask;
     old = env->CP0_Status;
     env->CP0_Status = (env->CP0_Status & ~mask) | val;
-    compute_hflags(env);
+    if (env->CP0_Config3 & (1 << CP0C3_MT)) {
+        sync_c0_status(env, env, env->current_tc);
+    } else {
+        compute_hflags(env);
+    }
+
     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
         qemu_log("Status %08x (%08x) => %08x (%08x) Cause %08x",
                 old, old & env->CP0_Cause & CP0Ca_IP_mask,
@@ -1175,34 +1374,28 @@
     cpu_mips_update_irq(env);
 }
 
-void helper_mttc0_status(target_ulong arg1)
+void helper_mttc0_status(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    int32_t tcstatus = env->tcs[other_tc].CP0_TCStatus;
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    env->CP0_Status = arg1 & ~0xf1000018;
-    tcstatus = (tcstatus & ~(0xf << CP0TCSt_TCU0)) | (arg1 & (0xf << CP0St_CU0));
-    tcstatus = (tcstatus & ~(1 << CP0TCSt_TMX)) | ((arg1 & (1 << CP0St_MX)) << (CP0TCSt_TMX - CP0St_MX));
-    tcstatus = (tcstatus & ~(0x3 << CP0TCSt_TKSU)) | ((arg1 & (0x3 << CP0St_KSU)) << (CP0TCSt_TKSU - CP0St_KSU));
-    if (other_tc == env->current_tc)
-        env->active_tc.CP0_TCStatus = tcstatus;
-    else
-        env->tcs[other_tc].CP0_TCStatus = tcstatus;
+    other->CP0_Status = arg1 & ~0xf1000018;
+    sync_c0_status(env, other, other_tc);
 }
 
-void helper_mtc0_intctl (target_ulong arg1)
+void helper_mtc0_intctl(CPUMIPSState *env, target_ulong arg1)
 {
     /* vectored interrupts not implemented, no performance counters. */
     env->CP0_IntCtl = (env->CP0_IntCtl & ~0x000002e0) | (arg1 & 0x000002e0);
 }
 
-void helper_mtc0_srsctl (target_ulong arg1)
+void helper_mtc0_srsctl(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = (0xf << CP0SRSCtl_ESS) | (0xf << CP0SRSCtl_PSS);
     env->CP0_SRSCtl = (env->CP0_SRSCtl & ~mask) | (arg1 & mask);
 }
 
-void helper_mtc0_cause (target_ulong arg1)
+void helper_mtc0_cause(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0x00C00300;
     uint32_t old = env->CP0_Cause;
@@ -1226,56 +1419,56 @@
     }
 }
 
-void helper_mtc0_ebase (target_ulong arg1)
+void helper_mtc0_ebase(CPUMIPSState *env, target_ulong arg1)
 {
     /* vectored interrupts not implemented */
     /* Multi-CPU not implemented */
     env->CP0_EBase = 0x80000000 | (arg1 & 0x3FFFF000);
 }
 
-void helper_mtc0_config0 (target_ulong arg1)
+void helper_mtc0_config0(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Config0 = (env->CP0_Config0 & 0x81FFFFF8) | (arg1 & 0x00000007);
 }
 
-void helper_mtc0_config2 (target_ulong arg1)
+void helper_mtc0_config2(CPUMIPSState *env, target_ulong arg1)
 {
     /* tertiary/secondary caches not implemented */
     env->CP0_Config2 = (env->CP0_Config2 & 0x8FFF0FFF);
 }
 
-void helper_mtc0_lladdr (target_ulong arg1)
+void helper_mtc0_lladdr(CPUMIPSState *env, target_ulong arg1)
 {
     target_long mask = env->CP0_LLAddr_rw_bitmask;
     arg1 = arg1 << env->CP0_LLAddr_shift;
     env->lladdr = (env->lladdr & ~mask) | (arg1 & mask);
 }
 
-void helper_mtc0_watchlo (target_ulong arg1, uint32_t sel)
+void helper_mtc0_watchlo(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     /* Watch exceptions for instructions, data loads, data stores
        not implemented. */
     env->CP0_WatchLo[sel] = (arg1 & ~0x7);
 }
 
-void helper_mtc0_watchhi (target_ulong arg1, uint32_t sel)
+void helper_mtc0_watchhi(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     env->CP0_WatchHi[sel] = (arg1 & 0x40FF0FF8);
     env->CP0_WatchHi[sel] &= ~(env->CP0_WatchHi[sel] & arg1 & 0x7);
 }
 
-void helper_mtc0_xcontext (target_ulong arg1)
+void helper_mtc0_xcontext(CPUMIPSState *env, target_ulong arg1)
 {
     target_ulong mask = (1ULL << (env->SEGBITS - 7)) - 1;
     env->CP0_XContext = (env->CP0_XContext & mask) | (arg1 & ~mask);
 }
 
-void helper_mtc0_framemask (target_ulong arg1)
+void helper_mtc0_framemask(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Framemask = arg1; /* XXX */
 }
 
-void helper_mtc0_debug (target_ulong arg1)
+void helper_mtc0_debug(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Debug = (env->CP0_Debug & 0x8C03FC1F) | (arg1 & 0x13300120);
     if (arg1 & (1 << CP0DB_DM))
@@ -1284,181 +1477,181 @@
         env->hflags &= ~MIPS_HFLAG_DM;
 }
 
-void helper_mttc0_debug(target_ulong arg1)
+void helper_mttc0_debug(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     uint32_t val = arg1 & ((1 << CP0DB_SSt) | (1 << CP0DB_Halt));
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     /* XXX: Might be wrong, check with EJTAG spec. */
-    if (other_tc == env->current_tc)
-        env->active_tc.CP0_Debug_tcstatus = val;
+    if (other_tc == other->current_tc)
+        other->active_tc.CP0_Debug_tcstatus = val;
     else
-        env->tcs[other_tc].CP0_Debug_tcstatus = val;
-    env->CP0_Debug = (env->CP0_Debug & ((1 << CP0DB_SSt) | (1 << CP0DB_Halt))) |
+        other->tcs[other_tc].CP0_Debug_tcstatus = val;
+    other->CP0_Debug = (other->CP0_Debug &
+                     ((1 << CP0DB_SSt) | (1 << CP0DB_Halt))) |
                      (arg1 & ~((1 << CP0DB_SSt) | (1 << CP0DB_Halt)));
 }
 
-void helper_mtc0_performance0 (target_ulong arg1)
+void helper_mtc0_performance0(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Performance0 = arg1 & 0x000007ff;
 }
 
-void helper_mtc0_taglo (target_ulong arg1)
+void helper_mtc0_taglo(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_TagLo = arg1 & 0xFFFFFCF6;
 }
 
-void helper_mtc0_datalo (target_ulong arg1)
+void helper_mtc0_datalo(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_DataLo = arg1; /* XXX */
 }
 
-void helper_mtc0_taghi (target_ulong arg1)
+void helper_mtc0_taghi(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_TagHi = arg1; /* XXX */
 }
 
-void helper_mtc0_datahi (target_ulong arg1)
+void helper_mtc0_datahi(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_DataHi = arg1; /* XXX */
 }
 
 /* MIPS MT functions */
-target_ulong helper_mftgpr(uint32_t sel)
+target_ulong helper_mftgpr(CPUMIPSState *env, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        return env->active_tc.gpr[sel];
+    if (other_tc == other->current_tc)
+        return other->active_tc.gpr[sel];
     else
-        return env->tcs[other_tc].gpr[sel];
+        return other->tcs[other_tc].gpr[sel];
 }
 
-target_ulong helper_mftlo(uint32_t sel)
+target_ulong helper_mftlo(CPUMIPSState *env, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        return env->active_tc.LO[sel];
+    if (other_tc == other->current_tc)
+        return other->active_tc.LO[sel];
     else
-        return env->tcs[other_tc].LO[sel];
+        return other->tcs[other_tc].LO[sel];
 }
 
-target_ulong helper_mfthi(uint32_t sel)
+target_ulong helper_mfthi(CPUMIPSState *env, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        return env->active_tc.HI[sel];
+    if (other_tc == other->current_tc)
+        return other->active_tc.HI[sel];
     else
-        return env->tcs[other_tc].HI[sel];
+        return other->tcs[other_tc].HI[sel];
 }
 
-target_ulong helper_mftacx(uint32_t sel)
+target_ulong helper_mftacx(CPUMIPSState *env, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        return env->active_tc.ACX[sel];
+    if (other_tc == other->current_tc)
+        return other->active_tc.ACX[sel];
     else
-        return env->tcs[other_tc].ACX[sel];
+        return other->tcs[other_tc].ACX[sel];
 }
 
-target_ulong helper_mftdsp(void)
+target_ulong helper_mftdsp(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        return env->active_tc.DSPControl;
+    if (other_tc == other->current_tc)
+        return other->active_tc.DSPControl;
     else
-        return env->tcs[other_tc].DSPControl;
+        return other->tcs[other_tc].DSPControl;
 }
 
-void helper_mttgpr(target_ulong arg1, uint32_t sel)
+void helper_mttgpr(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        env->active_tc.gpr[sel] = arg1;
+    if (other_tc == other->current_tc)
+        other->active_tc.gpr[sel] = arg1;
     else
-        env->tcs[other_tc].gpr[sel] = arg1;
+        other->tcs[other_tc].gpr[sel] = arg1;
 }
 
-void helper_mttlo(target_ulong arg1, uint32_t sel)
+void helper_mttlo(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        env->active_tc.LO[sel] = arg1;
+    if (other_tc == other->current_tc)
+        other->active_tc.LO[sel] = arg1;
     else
-        env->tcs[other_tc].LO[sel] = arg1;
+        other->tcs[other_tc].LO[sel] = arg1;
 }
 
-void helper_mtthi(target_ulong arg1, uint32_t sel)
+void helper_mtthi(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        env->active_tc.HI[sel] = arg1;
+    if (other_tc == other->current_tc)
+        other->active_tc.HI[sel] = arg1;
     else
-        env->tcs[other_tc].HI[sel] = arg1;
+        other->tcs[other_tc].HI[sel] = arg1;
 }
 
-void helper_mttacx(target_ulong arg1, uint32_t sel)
+void helper_mttacx(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        env->active_tc.ACX[sel] = arg1;
+    if (other_tc == other->current_tc)
+        other->active_tc.ACX[sel] = arg1;
     else
-        env->tcs[other_tc].ACX[sel] = arg1;
+        other->tcs[other_tc].ACX[sel] = arg1;
 }
 
-void helper_mttdsp(target_ulong arg1)
+void helper_mttdsp(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == env->current_tc)
-        env->active_tc.DSPControl = arg1;
+    if (other_tc == other->current_tc)
+        other->active_tc.DSPControl = arg1;
     else
-        env->tcs[other_tc].DSPControl = arg1;
+        other->tcs[other_tc].DSPControl = arg1;
 }
 
 /* MIPS MT functions */
-target_ulong helper_dmt(target_ulong arg1)
+target_ulong helper_dmt(void)
 {
     // TODO
-    arg1 = 0;
-    // rt = arg1
-
-    return arg1;
+     return 0;
 }
 
-target_ulong helper_emt(target_ulong arg1)
+target_ulong helper_emt(void)
 {
     // TODO
-    arg1 = 0;
-    // rt = arg1
-
-    return arg1;
+    return 0;
 }
 
-target_ulong helper_dvpe(target_ulong arg1)
+target_ulong helper_dvpe(CPUMIPSState *env)
 {
     // TODO
-    arg1 = 0;
-    // rt = arg1
-
-    return arg1;
+    return 0;
 }
 
-target_ulong helper_evpe(target_ulong arg1)
+target_ulong helper_evpe(CPUMIPSState *env)
 {
     // TODO
-    arg1 = 0;
-    // rt = arg1
-
-    return arg1;
+    return 0;
 }
 #endif /* !CONFIG_USER_ONLY */
 
@@ -1469,8 +1662,10 @@
     // TODO: store to TC register
 }
 
-target_ulong helper_yield(target_ulong arg1)
+target_ulong helper_yield(CPUMIPSState *env, target_ulong arg)
 {
+    target_long arg1 = arg;
+
     if (arg1 < 0) {
         /* No scheduling policy implemented. */
         if (arg1 != -2) {
@@ -1478,13 +1673,13 @@
                 env->active_tc.CP0_TCStatus & (1 << CP0TCSt_DT)) {
                 env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
                 env->CP0_VPEControl |= 4 << CP0VPECo_EXCPT;
-                helper_raise_exception(EXCP_THREAD);
+                helper_raise_exception(env, EXCP_THREAD);
             }
         }
     } else if (arg1 == 0) {
         if (0 /* TODO: TC underflow */) {
             env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
-            helper_raise_exception(EXCP_THREAD);
+            helper_raise_exception(env, EXCP_THREAD);
         } else {
             // TODO: Deallocate TC
         }
@@ -1492,7 +1687,7 @@
         /* Yield qualifier inputs not implemented. */
         env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
         env->CP0_VPEControl |= 2 << CP0VPECo_EXCPT;
-        helper_raise_exception(EXCP_THREAD);
+        helper_raise_exception(env, EXCP_THREAD);
     }
     return env->CP0_YQMask;
 }
@@ -1563,7 +1758,7 @@
     tlb_flush (env, flush_global);
 }
 
-static void r4k_fill_tlb (int idx)
+static void r4k_fill_tlb(CPUMIPSState *env, int idx)
 {
     r4k_tlb_t *tlb;
 
@@ -1586,24 +1781,15 @@
     tlb->PFN[1] = (env->CP0_EntryLo1 >> 6) << 12;
 }
 
-void r4k_helper_ptw_tlbrefill(CPUMIPSState *target_env)
+void r4k_helper_ptw_tlbrefill(CPUMIPSState *env)
 {
-   CPUMIPSState *saved_env;
-
-   /* Save current 'env' value */
-   saved_env = env;
-   env = target_env;
-
    /* Do TLB load on behalf of Page Table Walk */
     int r = cpu_mips_get_random(env);
     r4k_invalidate_tlb_shadow(env, r);
-    r4k_fill_tlb(r);
-
-   /* Restore 'env' value */
-   env = saved_env;
+    r4k_fill_tlb(env, r);
 }
 
-void r4k_helper_tlbwi (void)
+void r4k_helper_tlbwi (CPUMIPSState *env)
 {
     r4k_tlb_t *tlb;
     target_ulong tag;
@@ -1637,18 +1823,18 @@
     cpu_mips_tlb_flush (env, 1);
 
     r4k_invalidate_tlb(env, env->CP0_Index % env->tlb->nb_tlb);
-    r4k_fill_tlb(env->CP0_Index % env->tlb->nb_tlb);
+    r4k_fill_tlb(env, env->CP0_Index % env->tlb->nb_tlb);
 }
 
-void r4k_helper_tlbwr (void)
+void r4k_helper_tlbwr (CPUMIPSState *env)
 {
     int r = cpu_mips_get_random(env);
 
     r4k_invalidate_tlb_shadow(env, r);
-    r4k_fill_tlb(r);
+    r4k_fill_tlb(env, r);
 }
 
-void r4k_helper_tlbp (void)
+void r4k_helper_tlbp(CPUMIPSState *env)
 {
     r4k_tlb_t *tlb;
     target_ulong mask;
@@ -1679,7 +1865,7 @@
     }
 }
 
-void r4k_helper_tlbr (void)
+void r4k_helper_tlbr(CPUMIPSState *env)
 {
     r4k_tlb_t *tlb;
     uint8_t ASID;
@@ -1702,28 +1888,28 @@
                         (tlb->C1 << 3) | (tlb->PFN[1] >> 6);
 }
 
-void helper_tlbwi(void)
+void helper_tlbwi(CPUMIPSState *env)
 {
-    env->tlb->helper_tlbwi();
+    env->tlb->helper_tlbwi(env);
 }
 
-void helper_tlbwr(void)
+void helper_tlbwr(CPUMIPSState *env)
 {
-    env->tlb->helper_tlbwr();
+    env->tlb->helper_tlbwr(env);
 }
 
-void helper_tlbp(void)
+void helper_tlbp(CPUMIPSState *env)
 {
-    env->tlb->helper_tlbp();
+    env->tlb->helper_tlbp(env);
 }
 
-void helper_tlbr(void)
+void helper_tlbr(CPUMIPSState *env)
 {
-    env->tlb->helper_tlbr();
+    env->tlb->helper_tlbr(env);
 }
 
 /* Specials */
-target_ulong helper_di (void)
+target_ulong helper_di(CPUMIPSState *env)
 {
     target_ulong t0 = env->CP0_Status;
 
@@ -1733,7 +1919,7 @@
     return t0;
 }
 
-target_ulong helper_ei (void)
+target_ulong helper_ei(CPUMIPSState *env)
 {
     target_ulong t0 = env->CP0_Status;
 
@@ -1743,7 +1929,7 @@
     return t0;
 }
 
-static void debug_pre_eret (void)
+static void debug_pre_eret(CPUMIPSState *env)
 {
     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
         qemu_log("ERET: PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx,
@@ -1756,7 +1942,7 @@
     }
 }
 
-static void debug_post_eret (void)
+static void debug_post_eret(CPUMIPSState *env)
 {
     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
         qemu_log("  =>  PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx,
@@ -1774,9 +1960,9 @@
     }
 }
 
-void helper_eret (void)
+void helper_eret (CPUMIPSState *env)
 {
-    debug_pre_eret();
+    debug_pre_eret(env);
     if (env->CP0_Status & (1 << CP0St_ERL)) {
         env->active_tc.PC = env->CP0_ErrorEPC;
         env->CP0_Status &= ~(1 << CP0St_ERL);
@@ -1785,66 +1971,66 @@
         env->CP0_Status &= ~(1 << CP0St_EXL);
     }
     compute_hflags(env);
-    debug_post_eret();
+    debug_post_eret(env);
     env->lladdr = 1;
 }
 
-void helper_deret (void)
+void helper_deret (CPUMIPSState *env)
 {
-    debug_pre_eret();
+    debug_pre_eret(env);
     env->active_tc.PC = env->CP0_DEPC;
     env->hflags &= MIPS_HFLAG_DM;
     compute_hflags(env);
-    debug_post_eret();
+    debug_post_eret(env);
     env->lladdr = 1;
 }
 #endif /* !CONFIG_USER_ONLY */
 
-target_ulong helper_rdhwr_cpunum(void)
+target_ulong helper_rdhwr_cpunum(CPUMIPSState *env)
 {
     if ((env->hflags & MIPS_HFLAG_CP0) ||
         (env->CP0_HWREna & (1 << 0)))
         return env->CP0_EBase & 0x3ff;
     else
-        helper_raise_exception(EXCP_RI);
+        helper_raise_exception(env, EXCP_RI);
 
     return 0;
 }
 
-target_ulong helper_rdhwr_synci_step(void)
+target_ulong helper_rdhwr_synci_step(CPUMIPSState *env)
 {
     if ((env->hflags & MIPS_HFLAG_CP0) ||
         (env->CP0_HWREna & (1 << 1)))
         return env->SYNCI_Step;
     else
-        helper_raise_exception(EXCP_RI);
+        helper_raise_exception(env, EXCP_RI);
 
     return 0;
 }
 
-target_ulong helper_rdhwr_cc(void)
+target_ulong helper_rdhwr_cc(CPUMIPSState *env)
 {
     if ((env->hflags & MIPS_HFLAG_CP0) ||
         (env->CP0_HWREna & (1 << 2)))
         return env->CP0_Count;
     else
-        helper_raise_exception(EXCP_RI);
+        helper_raise_exception(env, EXCP_RI);
 
     return 0;
 }
 
-target_ulong helper_rdhwr_ccres(void)
+target_ulong helper_rdhwr_ccres(CPUMIPSState *env)
 {
     if ((env->hflags & MIPS_HFLAG_CP0) ||
         (env->CP0_HWREna & (1 << 3)))
         return env->CCRes;
     else
-        helper_raise_exception(EXCP_RI);
+        helper_raise_exception(env, EXCP_RI);
 
     return 0;
 }
 
-void helper_pmon (int function)
+void helper_pmon(CPUMIPSState *env, int function)
 {
     function /= 2;
     switch (function) {
@@ -1870,15 +2056,17 @@
     }
 }
 
-void helper_wait (void)
+void helper_wait(CPUMIPSState *env)
 {
-    env->halted = 1;
-    helper_raise_exception(EXCP_HLT);
+    ENV_GET_CPU(env)->halted = 1;
+    helper_raise_exception(env, EXCP_HLT);
 }
 
 #if !defined(CONFIG_USER_ONLY)
 
-static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr);
+static void do_unaligned_access (CPUMIPSState *env,
+                                 target_ulong addr, int is_write,
+                                 int is_user, uintptr_t retaddr);
 
 #define MMUSUFFIX _mmu
 #define ALIGNED_ONLY
@@ -1895,59 +2083,53 @@
 #define SHIFT 3
 #include "exec/softmmu_template.h"
 
-static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr)
+static void do_unaligned_access(CPUMIPSState *env, target_ulong addr,
+                                int is_write, int is_user, uintptr_t retaddr)
 {
     env->CP0_BadVAddr = addr;
-    do_restore_state (retaddr);
-    helper_raise_exception ((is_write == 1) ? EXCP_AdES : EXCP_AdEL);
+    do_restore_state (env, retaddr);
+    helper_raise_exception(env, (is_write == 1) ? EXCP_AdES : EXCP_AdEL);
 }
 
-void tlb_fill (target_ulong addr, int is_write, int mmu_idx, void *retaddr)
+void tlb_fill (CPUMIPSState* env, target_ulong addr, int is_write, int mmu_idx,
+               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUMIPSState *saved_env;
-    unsigned long pc;
     int ret;
 
-    /* XXX: hack to restore env in all cases, even if not called from
-       generated code */
-    saved_env = env;
-    env = cpu_single_env;
-    ret = cpu_mips_handle_mmu_fault(env, addr, is_write, mmu_idx, 1);
+    ret = cpu_mips_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (ret) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            pc = (unsigned long)retaddr;
-            tb = tb_find_pc(pc);
+            tb = tb_find_pc(retaddr);
             if (tb) {
                 /* the PC is inside the translated code. It means that we have
                    a virtual CPU fault */
-                cpu_restore_state(tb, env, pc);
+                cpu_restore_state(env, retaddr);
             }
         }
-        helper_raise_exception_err(env->exception_index, env->error_code);
+        helper_raise_exception_err(env, env->exception_index, env->error_code);
     }
-    env = saved_env;
 }
 
-void do_unassigned_access(hwaddr addr, int is_write, int is_exec,
-                          int unused, int size)
+void cpu_unassigned_access(CPUMIPSState* env, hwaddr addr,
+                           int is_write, int is_exec, int unused, int size)
 {
     if (is_exec)
-        helper_raise_exception(EXCP_IBE);
+        helper_raise_exception(env, EXCP_IBE);
     else
-        helper_raise_exception(EXCP_DBE);
+        helper_raise_exception(env, EXCP_DBE);
 }
 /*
  * The following functions are address translation helper functions
  * for fast memory access in QEMU.
  */
-static unsigned long v2p_mmu(target_ulong addr, int is_user)
+static unsigned long v2p_mmu(CPUMIPSState *env, target_ulong addr, int is_user)
 {
     int index;
     target_ulong tlb_addr;
-    unsigned long physaddr;
-    void *retaddr;
+    hwaddr physaddr;
+    uintptr_t retaddr;
 
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
 redo:
@@ -1957,7 +2139,7 @@
     } else {
         /* the page is not in the TLB : fill it */
         retaddr = GETPC();
-        tlb_fill(addr, 0, is_user, retaddr);
+        tlb_fill(env, addr, 0, is_user, retaddr);
         goto redo;
     }
     return physaddr;
@@ -1970,22 +2152,20 @@
  */
 unsigned long v2p(target_ulong ptr, int is_user)
 {
-    CPUMIPSState *saved_env;
+    CPUMIPSState *env;
     int index;
     target_ulong addr;
-    unsigned long physaddr;
+    hwaddr physaddr;
 
-    saved_env = env;
     env = cpu_single_env;
     addr = ptr;
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     if (__builtin_expect(env->tlb_table[is_user][index].addr_read !=
                 (addr & TARGET_PAGE_MASK), 0)) {
-        physaddr = v2p_mmu(addr, is_user);
+        physaddr = v2p_mmu(env, addr, is_user);
     } else {
         physaddr = addr + env->tlb_table[is_user][index].addend;
     }
-    env = saved_env;
     return physaddr;
 }
 
@@ -1993,7 +2173,7 @@
 void vstrcpy(target_ulong ptr, char *buf, int max)
 {
     char *phys = 0;
-    unsigned long page = 0;
+    target_ulong page = 0;
 
     if (buf == NULL) return;
 
@@ -2039,7 +2219,7 @@
 #define RESTORE_FLUSH_MODE \
     set_flush_to_zero((env->active_fpu.fcr31 & (1 << 24)) != 0, &env->active_fpu.fp_status);
 
-target_ulong helper_cfc1 (uint32_t reg)
+target_ulong helper_cfc1 (CPUMIPSState *env, uint32_t reg)
 {
     target_ulong arg1;
 
@@ -2064,7 +2244,7 @@
     return arg1;
 }
 
-void helper_ctc1 (target_ulong arg1, uint32_t reg)
+void helper_ctc1(CPUMIPSState *env, target_ulong arg1, uint32_t reg)
 {
     switch(reg) {
     case 25:
@@ -2098,7 +2278,7 @@
     RESTORE_FLUSH_MODE;
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     if ((GET_FP_ENABLE(env->active_fpu.fcr31) | 0x20) & GET_FP_CAUSE(env->active_fpu.fcr31))
-        helper_raise_exception(EXCP_FPE);
+        helper_raise_exception(env, EXCP_FPE);
 }
 
 static inline char ieee_ex_to_mips(char xcpt)
@@ -2119,13 +2299,13 @@
            (xcpt & FP_INVALID) >> 4;
 }
 
-static inline void update_fcr31(void)
+static inline void update_fcr31(CPUMIPSState *env)
 {
     int tmp = ieee_ex_to_mips(get_float_exception_flags(&env->active_fpu.fp_status));
 
     SET_FP_CAUSE(env->active_fpu.fcr31, tmp);
     if (GET_FP_ENABLE(env->active_fpu.fcr31) & tmp)
-        helper_raise_exception(EXCP_FPE);
+        helper_raise_exception(env, EXCP_FPE);
     else
         UPDATE_FP_FLAGS(env->active_fpu.fcr31, tmp);
 }
@@ -2136,71 +2316,71 @@
    paired single lower "pl", paired single upper "pu".  */
 
 /* unary operations, modifying fp status  */
-uint64_t helper_float_sqrt_d(uint64_t fdt0)
+uint64_t helper_float_sqrt_d(CPUMIPSState *env, uint64_t fdt0)
 {
     return float64_sqrt(fdt0, &env->active_fpu.fp_status);
 }
 
-uint32_t helper_float_sqrt_s(uint32_t fst0)
+uint32_t helper_float_sqrt_s(CPUMIPSState *env, uint32_t fst0)
 {
     return float32_sqrt(fst0, &env->active_fpu.fp_status);
 }
 
-uint64_t helper_float_cvtd_s(uint32_t fst0)
+uint64_t helper_float_cvtd_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float32_to_float64(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint64_t helper_float_cvtd_w(uint32_t wt0)
+uint64_t helper_float_cvtd_w(CPUMIPSState *env, uint32_t wt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = int32_to_float64(wt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint64_t helper_float_cvtd_l(uint64_t dt0)
+uint64_t helper_float_cvtd_l(CPUMIPSState *env, uint64_t dt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = int64_to_float64(dt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint64_t helper_float_cvtl_d(uint64_t fdt0)
+uint64_t helper_float_cvtl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_cvtl_s(uint32_t fst0)
+uint64_t helper_float_cvtl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_cvtps_pw(uint64_t dt0)
+uint64_t helper_float_cvtps_pw(CPUMIPSState *env, uint64_t dt0)
 {
     uint32_t fst2;
     uint32_t fsth2;
@@ -2208,11 +2388,11 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = int32_to_float32(dt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     fsth2 = int32_to_float32(dt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_cvtpw_ps(uint64_t fdt0)
+uint64_t helper_float_cvtpw_ps(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
     uint32_t wth2;
@@ -2220,7 +2400,7 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     wth2 = float32_to_int32(fdt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID)) {
         wt2 = FLOAT_SNAN32;
         wth2 = FLOAT_SNAN32;
@@ -2228,81 +2408,81 @@
     return ((uint64_t)wth2 << 32) | wt2;
 }
 
-uint32_t helper_float_cvts_d(uint64_t fdt0)
+uint32_t helper_float_cvts_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float64_to_float32(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint32_t helper_float_cvts_w(uint32_t wt0)
+uint32_t helper_float_cvts_w(CPUMIPSState *env, uint32_t wt0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = int32_to_float32(wt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint32_t helper_float_cvts_l(uint64_t dt0)
+uint32_t helper_float_cvts_l(CPUMIPSState *env, uint64_t dt0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = int64_to_float32(dt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint32_t helper_float_cvts_pl(uint32_t wt0)
+uint32_t helper_float_cvts_pl(CPUMIPSState *env, uint32_t wt0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = wt0;
-    update_fcr31();
+    update_fcr31(env);
     return wt2;
 }
 
-uint32_t helper_float_cvts_pu(uint32_t wth0)
+uint32_t helper_float_cvts_pu(CPUMIPSState *env, uint32_t wth0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = wth0;
-    update_fcr31();
+    update_fcr31(env);
     return wt2;
 }
 
-uint32_t helper_float_cvtw_s(uint32_t fst0)
+uint32_t helper_float_cvtw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_cvtw_d(uint64_t fdt0)
+uint32_t helper_float_cvtw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint64_t helper_float_roundl_d(uint64_t fdt0)
+uint64_t helper_float_roundl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
@@ -2310,13 +2490,13 @@
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_roundl_s(uint32_t fst0)
+uint64_t helper_float_roundl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
@@ -2324,13 +2504,13 @@
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint32_t helper_float_roundw_d(uint64_t fdt0)
+uint32_t helper_float_roundw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
@@ -2338,13 +2518,13 @@
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_roundw_s(uint32_t fst0)
+uint32_t helper_float_roundw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
@@ -2352,61 +2532,61 @@
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint64_t helper_float_truncl_d(uint64_t fdt0)
+uint64_t helper_float_truncl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float64_to_int64_round_to_zero(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_truncl_s(uint32_t fst0)
+uint64_t helper_float_truncl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float32_to_int64_round_to_zero(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint32_t helper_float_truncw_d(uint64_t fdt0)
+uint32_t helper_float_truncw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float64_to_int32_round_to_zero(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_truncw_s(uint32_t fst0)
+uint32_t helper_float_truncw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float32_to_int32_round_to_zero(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint64_t helper_float_ceill_d(uint64_t fdt0)
+uint64_t helper_float_ceill_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
@@ -2414,13 +2594,13 @@
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_ceill_s(uint32_t fst0)
+uint64_t helper_float_ceill_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
@@ -2428,13 +2608,13 @@
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint32_t helper_float_ceilw_d(uint64_t fdt0)
+uint32_t helper_float_ceilw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
@@ -2442,13 +2622,13 @@
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_ceilw_s(uint32_t fst0)
+uint32_t helper_float_ceilw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
@@ -2456,13 +2636,13 @@
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint64_t helper_float_floorl_d(uint64_t fdt0)
+uint64_t helper_float_floorl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
@@ -2470,13 +2650,13 @@
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_floorl_s(uint32_t fst0)
+uint64_t helper_float_floorl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
@@ -2484,13 +2664,13 @@
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint32_t helper_float_floorw_d(uint64_t fdt0)
+uint32_t helper_float_floorw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
@@ -2498,13 +2678,13 @@
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_floorw_s(uint32_t fst0)
+uint32_t helper_float_floorw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
@@ -2512,7 +2692,7 @@
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
@@ -2542,69 +2722,69 @@
 #undef FLOAT_UNOP
 
 /* MIPS specific unary operations */
-uint64_t helper_float_recip_d(uint64_t fdt0)
+uint64_t helper_float_recip_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_div(FLOAT_ONE64, fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_recip_s(uint32_t fst0)
+uint32_t helper_float_recip_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_rsqrt_d(uint64_t fdt0)
+uint64_t helper_float_rsqrt_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
     fdt2 = float64_div(FLOAT_ONE64, fdt2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_rsqrt_s(uint32_t fst0)
+uint32_t helper_float_rsqrt_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_recip1_d(uint64_t fdt0)
+uint64_t helper_float_recip1_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_div(FLOAT_ONE64, fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_recip1_s(uint32_t fst0)
+uint32_t helper_float_recip1_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_recip1_ps(uint64_t fdt0)
+uint64_t helper_float_recip1_ps(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t fst2;
     uint32_t fsth2;
@@ -2612,33 +2792,33 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     fsth2 = float32_div(FLOAT_ONE32, fdt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_rsqrt1_d(uint64_t fdt0)
+uint64_t helper_float_rsqrt1_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
     fdt2 = float64_div(FLOAT_ONE64, fdt2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_rsqrt1_s(uint32_t fst0)
+uint32_t helper_float_rsqrt1_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_rsqrt1_ps(uint64_t fdt0)
+uint64_t helper_float_rsqrt1_ps(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t fst2;
     uint32_t fsth2;
@@ -2648,39 +2828,43 @@
     fsth2 = float32_sqrt(fdt0 >> 32, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst2, &env->active_fpu.fp_status);
     fsth2 = float32_div(FLOAT_ONE32, fsth2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-#define FLOAT_OP(name, p) void helper_float_##name##_##p(void)
+#define FLOAT_OP(name, p) void helper_float_##name##_##p(CPUMIPSState *env)
 
 /* binary operations */
 #define FLOAT_BINOP(name)                                          \
-uint64_t helper_float_ ## name ## _d(uint64_t fdt0, uint64_t fdt1)     \
+uint64_t helper_float_ ## name ## _d(CPUMIPSState *env,            \
+                                     uint64_t fdt0, uint64_t fdt1) \
 {                                                                  \
     uint64_t dt2;                                                  \
                                                                    \
     set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     dt2 = float64_ ## name (fdt0, fdt1, &env->active_fpu.fp_status);     \
-    update_fcr31();                                                \
+    update_fcr31(env);                                                \
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & FP_INVALID)                \
         dt2 = FLOAT_QNAN64;                                        \
     return dt2;                                                    \
 }                                                                  \
                                                                    \
-uint32_t helper_float_ ## name ## _s(uint32_t fst0, uint32_t fst1)     \
+uint32_t helper_float_ ## name ## _s(CPUMIPSState *env,            \
+                                     uint32_t fst0, uint32_t fst1) \
 {                                                                  \
     uint32_t wt2;                                                  \
                                                                    \
     set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     wt2 = float32_ ## name (fst0, fst1, &env->active_fpu.fp_status);     \
-    update_fcr31();                                                \
+    update_fcr31(env);                                                \
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & FP_INVALID)                \
         wt2 = FLOAT_QNAN32;                                        \
     return wt2;                                                    \
 }                                                                  \
                                                                    \
-uint64_t helper_float_ ## name ## _ps(uint64_t fdt0, uint64_t fdt1)    \
+uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,           \
+                                      uint64_t fdt0,               \
+                                      uint64_t fdt1)               \
 {                                                                  \
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;                             \
     uint32_t fsth0 = fdt0 >> 32;                                   \
@@ -2692,7 +2876,7 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     wt2 = float32_ ## name (fst0, fst1, &env->active_fpu.fp_status);     \
     wth2 = float32_ ## name (fsth0, fsth1, &env->active_fpu.fp_status);  \
-    update_fcr31();                                                \
+    update_fcr31(env);                                                \
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & FP_INVALID) {              \
         wt2 = FLOAT_QNAN32;                                        \
         wth2 = FLOAT_QNAN32;                                       \
@@ -2708,21 +2892,24 @@
 
 /* ternary operations */
 #define FLOAT_TERNOP(name1, name2)                                        \
-uint64_t helper_float_ ## name1 ## name2 ## _d(uint64_t fdt0, uint64_t fdt1,  \
+uint64_t helper_float_ ## name1 ## name2 ## _d(CPUMIPSState *env,         \
+                                           uint64_t fdt0, uint64_t fdt1,  \
                                            uint64_t fdt2)                 \
 {                                                                         \
     fdt0 = float64_ ## name1 (fdt0, fdt1, &env->active_fpu.fp_status);          \
     return float64_ ## name2 (fdt0, fdt2, &env->active_fpu.fp_status);          \
 }                                                                         \
                                                                           \
-uint32_t helper_float_ ## name1 ## name2 ## _s(uint32_t fst0, uint32_t fst1,  \
+uint32_t helper_float_ ## name1 ## name2 ## _s(CPUMIPSState *env,         \
+                                           uint32_t fst0, uint32_t fst1,  \
                                            uint32_t fst2)                 \
 {                                                                         \
     fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status);          \
     return float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status);          \
 }                                                                         \
                                                                           \
-uint64_t helper_float_ ## name1 ## name2 ## _ps(uint64_t fdt0, uint64_t fdt1, \
+uint64_t helper_float_ ## name1 ## name2 ## _ps(CPUMIPSState *env,        \
+                                            uint64_t fdt0, uint64_t fdt1, \
                                             uint64_t fdt2)                \
 {                                                                         \
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;                                    \
@@ -2745,7 +2932,8 @@
 
 /* negated ternary operations */
 #define FLOAT_NTERNOP(name1, name2)                                       \
-uint64_t helper_float_n ## name1 ## name2 ## _d(uint64_t fdt0, uint64_t fdt1, \
+uint64_t helper_float_n ## name1 ## name2 ## _d(CPUMIPSState *env,        \
+                                           uint64_t fdt0, uint64_t fdt1, \
                                            uint64_t fdt2)                 \
 {                                                                         \
     fdt0 = float64_ ## name1 (fdt0, fdt1, &env->active_fpu.fp_status);          \
@@ -2753,7 +2941,8 @@
     return float64_chs(fdt2);                                             \
 }                                                                         \
                                                                           \
-uint32_t helper_float_n ## name1 ## name2 ## _s(uint32_t fst0, uint32_t fst1, \
+uint32_t helper_float_n ## name1 ## name2 ## _s(CPUMIPSState *env,        \
+                                           uint32_t fst0, uint32_t fst1, \
                                            uint32_t fst2)                 \
 {                                                                         \
     fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status);          \
@@ -2761,7 +2950,8 @@
     return float32_chs(fst2);                                             \
 }                                                                         \
                                                                           \
-uint64_t helper_float_n ## name1 ## name2 ## _ps(uint64_t fdt0, uint64_t fdt1,\
+uint64_t helper_float_n ## name1 ## name2 ## _ps(CPUMIPSState *env,       \
+                                           uint64_t fdt0, uint64_t fdt1,\
                                            uint64_t fdt2)                 \
 {                                                                         \
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;                                    \
@@ -2785,25 +2975,25 @@
 #undef FLOAT_NTERNOP
 
 /* MIPS specific binary operations */
-uint64_t helper_float_recip2_d(uint64_t fdt0, uint64_t fdt2)
+uint64_t helper_float_recip2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_mul(fdt0, fdt2, &env->active_fpu.fp_status);
     fdt2 = float64_chs(float64_sub(fdt2, FLOAT_ONE64, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_recip2_s(uint32_t fst0, uint32_t fst2)
+uint32_t helper_float_recip2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
 {
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_recip2_ps(uint64_t fdt0, uint64_t fdt2)
+uint64_t helper_float_recip2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
@@ -2815,31 +3005,31 @@
     fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status));
     fsth2 = float32_chs(float32_sub(fsth2, FLOAT_ONE32, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_rsqrt2_d(uint64_t fdt0, uint64_t fdt2)
+uint64_t helper_float_rsqrt2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_mul(fdt0, fdt2, &env->active_fpu.fp_status);
     fdt2 = float64_sub(fdt2, FLOAT_ONE64, &env->active_fpu.fp_status);
     fdt2 = float64_chs(float64_div(fdt2, FLOAT_TWO64, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_rsqrt2_s(uint32_t fst0, uint32_t fst2)
+uint32_t helper_float_rsqrt2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
 {
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
     fst2 = float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_rsqrt2_ps(uint64_t fdt0, uint64_t fdt2)
+uint64_t helper_float_rsqrt2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
@@ -2853,11 +3043,11 @@
     fsth2 = float32_sub(fsth2, FLOAT_ONE32, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, &env->active_fpu.fp_status));
     fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_addr_ps(uint64_t fdt0, uint64_t fdt1)
+uint64_t helper_float_addr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
 {
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
@@ -2869,11 +3059,11 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_add (fst0, fsth0, &env->active_fpu.fp_status);
     fsth2 = float32_add (fst1, fsth1, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_mulr_ps(uint64_t fdt0, uint64_t fdt1)
+uint64_t helper_float_mulr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
 {
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
@@ -2885,28 +3075,30 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul (fst0, fsth0, &env->active_fpu.fp_status);
     fsth2 = float32_mul (fst1, fsth1, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
 /* compare operations */
 #define FOP_COND_D(op, cond)                                   \
-void helper_cmp_d_ ## op (uint64_t fdt0, uint64_t fdt1, int cc)    \
+void helper_cmp_d_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
+                         uint64_t fdt1, int cc)                \
 {                                                              \
     int c = cond;                                              \
-    update_fcr31();                                            \
+    update_fcr31(env);                                            \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
         CLEAR_FP_COND(cc, env->active_fpu);                    \
 }                                                              \
-void helper_cmpabs_d_ ## op (uint64_t fdt0, uint64_t fdt1, int cc) \
+void helper_cmpabs_d_ ## op(CPUMIPSState *env, uint64_t fdt0,  \
+                            uint64_t fdt1, int cc)             \
 {                                                              \
     int c;                                                     \
     fdt0 = float64_abs(fdt0);                                  \
     fdt1 = float64_abs(fdt1);                                  \
     c = cond;                                                  \
-    update_fcr31();                                            \
+    update_fcr31(env);                                            \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
@@ -2949,22 +3141,24 @@
 FOP_COND_D(ngt, float64_is_unordered(1, fdt1, fdt0, &env->active_fpu.fp_status)  || float64_le(fdt0, fdt1, &env->active_fpu.fp_status))
 
 #define FOP_COND_S(op, cond)                                   \
-void helper_cmp_s_ ## op (uint32_t fst0, uint32_t fst1, int cc)    \
+void helper_cmp_s_ ## op(CPUMIPSState *env, uint32_t fst0,     \
+                         uint32_t fst1, int cc)                \
 {                                                              \
     int c = cond;                                              \
-    update_fcr31();                                            \
+    update_fcr31(env);                                            \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
         CLEAR_FP_COND(cc, env->active_fpu);                    \
 }                                                              \
-void helper_cmpabs_s_ ## op (uint32_t fst0, uint32_t fst1, int cc) \
+void helper_cmpabs_s_ ## op(CPUMIPSState *env, uint32_t fst0,  \
+                            uint32_t fst1, int cc)             \
 {                                                              \
     int c;                                                     \
     fst0 = float32_abs(fst0);                                  \
     fst1 = float32_abs(fst1);                                  \
     c = cond;                                                  \
-    update_fcr31();                                            \
+    update_fcr31(env);                                            \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
@@ -3007,7 +3201,8 @@
 FOP_COND_S(ngt, float32_is_unordered(1, fst1, fst0, &env->active_fpu.fp_status)  || float32_le(fst0, fst1, &env->active_fpu.fp_status))
 
 #define FOP_COND_PS(op, condl, condh)                           \
-void helper_cmp_ps_ ## op (uint64_t fdt0, uint64_t fdt1, int cc)    \
+void helper_cmp_ps_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
+                          uint64_t fdt1, int cc)                \
 {                                                               \
     uint32_t fst0 = float32_abs(fdt0 & 0XFFFFFFFF);             \
     uint32_t fsth0 = float32_abs(fdt0 >> 32);                   \
@@ -3016,7 +3211,7 @@
     int cl = condl;                                             \
     int ch = condh;                                             \
                                                                 \
-    update_fcr31();                                             \
+    update_fcr31(env);                                             \
     if (cl)                                                     \
         SET_FP_COND(cc, env->active_fpu);                       \
     else                                                        \
@@ -3026,7 +3221,8 @@
     else                                                        \
         CLEAR_FP_COND(cc + 1, env->active_fpu);                 \
 }                                                               \
-void helper_cmpabs_ps_ ## op (uint64_t fdt0, uint64_t fdt1, int cc) \
+void helper_cmpabs_ps_ ## op(CPUMIPSState *env, uint64_t fdt0,  \
+                             uint64_t fdt1, int cc)             \
 {                                                               \
     uint32_t fst0 = float32_abs(fdt0 & 0XFFFFFFFF);             \
     uint32_t fsth0 = float32_abs(fdt0 >> 32);                   \
@@ -3035,7 +3231,7 @@
     int cl = condl;                                             \
     int ch = condh;                                             \
                                                                 \
-    update_fcr31();                                             \
+    update_fcr31(env);                                             \
     if (cl)                                                     \
         SET_FP_COND(cc, env->active_fpu);                       \
     else                                                        \
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 87b2cba..683c936 100755
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -26,7 +26,6 @@
 #include <inttypes.h>
 
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "disas/disas.h"
 #include "tcg-op.h"
 #include "qemu-common.h"
@@ -433,6 +432,8 @@
 static TCGv_i32 hflags;
 static TCGv_i32 fpu_fcr0, fpu_fcr31;
 
+static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
+
 #include "exec/gen-icount.h"
 
 #define gen_helper_0i(name, arg) do {                             \
@@ -459,6 +460,12 @@
     tcg_temp_free_i32(helper_tmp);                                \
     } while(0)
 
+#define gen_helper_4i(name, arg1, arg2, arg3, arg4, arg5) do {    \
+    TCGv_i32 helper_tmp = tcg_const_i32(arg5);                    \
+    gen_helper_##name(arg1, arg2, arg3, arg4, helper_tmp);        \
+    tcg_temp_free_i32(helper_tmp);                                \
+    } while(0)
+
 typedef struct DisasContext {
     struct TranslationBlock *tb;
     target_ulong pc, saved_pc;
@@ -657,22 +664,22 @@
                                                TCGv_i##bits b, int cc)        \
 {                                                                             \
     switch (n) {                                                              \
-    case  0: gen_helper_2i(cmp ## type ## _ ## fmt ## _f, a, b, cc);    break;\
-    case  1: gen_helper_2i(cmp ## type ## _ ## fmt ## _un, a, b, cc);   break;\
-    case  2: gen_helper_2i(cmp ## type ## _ ## fmt ## _eq, a, b, cc);   break;\
-    case  3: gen_helper_2i(cmp ## type ## _ ## fmt ## _ueq, a, b, cc);  break;\
-    case  4: gen_helper_2i(cmp ## type ## _ ## fmt ## _olt, a, b, cc);  break;\
-    case  5: gen_helper_2i(cmp ## type ## _ ## fmt ## _ult, a, b, cc);  break;\
-    case  6: gen_helper_2i(cmp ## type ## _ ## fmt ## _ole, a, b, cc);  break;\
-    case  7: gen_helper_2i(cmp ## type ## _ ## fmt ## _ule, a, b, cc);  break;\
-    case  8: gen_helper_2i(cmp ## type ## _ ## fmt ## _sf, a, b, cc);   break;\
-    case  9: gen_helper_2i(cmp ## type ## _ ## fmt ## _ngle, a, b, cc); break;\
-    case 10: gen_helper_2i(cmp ## type ## _ ## fmt ## _seq, a, b, cc);  break;\
-    case 11: gen_helper_2i(cmp ## type ## _ ## fmt ## _ngl, a, b, cc);  break;\
-    case 12: gen_helper_2i(cmp ## type ## _ ## fmt ## _lt, a, b, cc);   break;\
-    case 13: gen_helper_2i(cmp ## type ## _ ## fmt ## _nge, a, b, cc);  break;\
-    case 14: gen_helper_2i(cmp ## type ## _ ## fmt ## _le, a, b, cc);   break;\
-    case 15: gen_helper_2i(cmp ## type ## _ ## fmt ## _ngt, a, b, cc);  break;\
+    case  0: gen_helper_3i(cmp ## type ## _ ## fmt ## _f, cpu_env, a, b, cc);    break;\
+    case  1: gen_helper_3i(cmp ## type ## _ ## fmt ## _un, cpu_env, a, b, cc);   break;\
+    case  2: gen_helper_3i(cmp ## type ## _ ## fmt ## _eq, cpu_env, a, b, cc);   break;\
+    case  3: gen_helper_3i(cmp ## type ## _ ## fmt ## _ueq, cpu_env, a, b, cc);  break;\
+    case  4: gen_helper_3i(cmp ## type ## _ ## fmt ## _olt, cpu_env, a, b, cc);  break;\
+    case  5: gen_helper_3i(cmp ## type ## _ ## fmt ## _ult, cpu_env, a, b, cc);  break;\
+    case  6: gen_helper_3i(cmp ## type ## _ ## fmt ## _ole, cpu_env, a, b, cc);  break;\
+    case  7: gen_helper_3i(cmp ## type ## _ ## fmt ## _ule, cpu_env, a, b, cc);  break;\
+    case  8: gen_helper_3i(cmp ## type ## _ ## fmt ## _sf, cpu_env, a, b, cc);   break;\
+    case  9: gen_helper_3i(cmp ## type ## _ ## fmt ## _ngle, cpu_env, a, b, cc); break;\
+    case 10: gen_helper_3i(cmp ## type ## _ ## fmt ## _seq, cpu_env, a, b, cc);  break;\
+    case 11: gen_helper_3i(cmp ## type ## _ ## fmt ## _ngl, cpu_env, a, b, cc);  break;\
+    case 12: gen_helper_3i(cmp ## type ## _ ## fmt ## _lt, cpu_env, a, b, cc);   break;\
+    case 13: gen_helper_3i(cmp ## type ## _ ## fmt ## _nge, cpu_env, a, b, cc);  break;\
+    case 14: gen_helper_3i(cmp ## type ## _ ## fmt ## _le, cpu_env, a, b, cc);   break;\
+    case 15: gen_helper_3i(cmp ## type ## _ ## fmt ## _ngt, cpu_env, a, b, cc);  break;\
     default: abort();                                                         \
     }                                                                         \
 }
@@ -790,7 +797,7 @@
     TCGv_i32 texcp = tcg_const_i32(excp);
     TCGv_i32 terr = tcg_const_i32(err);
     save_cpu_state(ctx, 1);
-    gen_helper_raise_exception_err(texcp, terr);
+    gen_helper_raise_exception_err(cpu_env, texcp, terr);
     tcg_temp_free_i32(terr);
     tcg_temp_free_i32(texcp);
 }
@@ -799,7 +806,7 @@
 generate_exception (DisasContext *ctx, int excp)
 {
     save_cpu_state(ctx, 1);
-    gen_helper_0i(raise_exception, excp);
+    gen_helper_1i(raise_exception, cpu_env, excp);
 }
 
 /* Addresses computation */
@@ -927,7 +934,7 @@
 #define OP_LD_ATOMIC(insn,fname)                                           \
 static inline void op_ldst_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)  \
 {                                                                          \
-    gen_helper_2i(insn, ret, arg1, ctx->mem_idx);                          \
+    gen_helper_3i(insn, ret, cpu_env, arg1, ctx->mem_idx);                 \
 }
 #endif
 OP_LD_ATOMIC(ll,ld32s);
@@ -954,7 +961,7 @@
     tcg_gen_movi_tl(t0, rt | ((almask << 3) & 0x20));                        \
     tcg_gen_st_tl(t0, cpu_env, offsetof(CPUMIPSState, llreg));                   \
     tcg_gen_st_tl(arg1, cpu_env, offsetof(CPUMIPSState, llnewval));              \
-    gen_helper_0i(raise_exception, EXCP_SC);                                 \
+    gen_helper_1i(raise_exception, cpu_env, EXCP_SC);                            \
     gen_set_label(l2);                                                       \
     tcg_gen_movi_tl(t0, 0);                                                  \
     gen_store_gpr(t0, rt);                                                   \
@@ -965,7 +972,7 @@
 static inline void op_ldst_##insn(TCGv arg1, TCGv arg2, int rt, DisasContext *ctx) \
 {                                                                            \
     TCGv t0 = tcg_temp_new();                                                \
-    gen_helper_3i(insn, t0, arg1, arg2, ctx->mem_idx);                       \
+    gen_helper_4i(insn, t0, cpu_env, arg1, arg2, ctx->mem_idx);              \
     gen_store_gpr(t0, rt);                                                   \
     tcg_temp_free(t0);                                                       \
 }
@@ -1023,27 +1030,27 @@
     case OPC_LDL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(ldl, t1, t1, t0, ctx->mem_idx);
+        gen_helper_4i(ldl, t1, cpu_env, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "ldl";
         break;
     case OPC_SDL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_2i(sdl, t1, t0, ctx->mem_idx);
+        gen_helper_3i(sdl, cpu_env, t1, t0, ctx->mem_idx);
         opn = "sdl";
         break;
     case OPC_LDR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(ldr, t1, t1, t0, ctx->mem_idx);
+        gen_helper_4i(ldr, t1, cpu_env, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "ldr";
         break;
     case OPC_SDR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_2i(sdr, t1, t0, ctx->mem_idx);
+        gen_helper_3i(sdr, cpu_env, t1, t0, ctx->mem_idx);
         opn = "sdr";
         break;
 #endif
@@ -1098,27 +1105,27 @@
     case OPC_LWL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(lwl, t1, t1, t0, ctx->mem_idx);
+        gen_helper_4i(lwl, t1, cpu_env, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "lwl";
         break;
     case OPC_SWL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_2i(swl, t1, t0, ctx->mem_idx);
-        opn = "swr";
+        gen_helper_3i(swl, cpu_env, t1, t0, ctx->mem_idx);
+        opn = "swl";
         break;
     case OPC_LWR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(lwr, t1, t1, t0, ctx->mem_idx);
+        gen_helper_4i(lwr, t1, cpu_env, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "lwr";
         break;
     case OPC_SWR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_2i(swr, t1, t0, ctx->mem_idx);
+        gen_helper_3i(swr, cpu_env, t1, t0, ctx->mem_idx);
         opn = "swr";
         break;
     case OPC_LL:
@@ -2246,59 +2253,59 @@
 
     switch (opc) {
     case OPC_VR54XX_MULS:
-        gen_helper_muls(t0, t0, t1);
+        gen_helper_muls(t0, cpu_env, t0, t1);
         opn = "muls";
         break;
     case OPC_VR54XX_MULSU:
-        gen_helper_mulsu(t0, t0, t1);
+        gen_helper_mulsu(t0, cpu_env, t0, t1);
         opn = "mulsu";
         break;
     case OPC_VR54XX_MACC:
-        gen_helper_macc(t0, t0, t1);
+        gen_helper_macc(t0, cpu_env, t0, t1);
         opn = "macc";
         break;
     case OPC_VR54XX_MACCU:
-        gen_helper_maccu(t0, t0, t1);
+        gen_helper_maccu(t0, cpu_env, t0, t1);
         opn = "maccu";
         break;
     case OPC_VR54XX_MSAC:
-        gen_helper_msac(t0, t0, t1);
+        gen_helper_msac(t0, cpu_env, t0, t1);
         opn = "msac";
         break;
     case OPC_VR54XX_MSACU:
-        gen_helper_msacu(t0, t0, t1);
+        gen_helper_msacu(t0, cpu_env, t0, t1);
         opn = "msacu";
         break;
     case OPC_VR54XX_MULHI:
-        gen_helper_mulhi(t0, t0, t1);
+        gen_helper_mulhi(t0, cpu_env, t0, t1);
         opn = "mulhi";
         break;
     case OPC_VR54XX_MULHIU:
-        gen_helper_mulhiu(t0, t0, t1);
+        gen_helper_mulhiu(t0, cpu_env, t0, t1);
         opn = "mulhiu";
         break;
     case OPC_VR54XX_MULSHI:
-        gen_helper_mulshi(t0, t0, t1);
+        gen_helper_mulshi(t0, cpu_env, t0, t1);
         opn = "mulshi";
         break;
     case OPC_VR54XX_MULSHIU:
-        gen_helper_mulshiu(t0, t0, t1);
+        gen_helper_mulshiu(t0, cpu_env, t0, t1);
         opn = "mulshiu";
         break;
     case OPC_VR54XX_MACCHI:
-        gen_helper_macchi(t0, t0, t1);
+        gen_helper_macchi(t0, cpu_env, t0, t1);
         opn = "macchi";
         break;
     case OPC_VR54XX_MACCHIU:
-        gen_helper_macchiu(t0, t0, t1);
+        gen_helper_macchiu(t0, cpu_env, t0, t1);
         opn = "macchiu";
         break;
     case OPC_VR54XX_MSACHI:
-        gen_helper_msachi(t0, t0, t1);
+        gen_helper_msachi(t0, cpu_env, t0, t1);
         opn = "msachi";
         break;
     case OPC_VR54XX_MSACHIU:
-        gen_helper_msachiu(t0, t0, t1);
+        gen_helper_msachiu(t0, cpu_env, t0, t1);
         opn = "msachiu";
         break;
     default:
@@ -2458,7 +2465,7 @@
         gen_save_pc(dest);
         if (ctx->singlestep_enabled) {
             save_cpu_state(ctx, 0);
-            gen_helper_0i(raise_exception, EXCP_DEBUG);
+            gen_helper_1i(raise_exception, cpu_env, EXCP_DEBUG);
         }
         tcg_gen_exit_tb(0);
     }
@@ -2911,17 +2918,17 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpcontrol(arg);
+            gen_helper_mfc0_mvpcontrol(arg, cpu_env);
             rn = "MVPControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf0(arg);
+            gen_helper_mfc0_mvpconf0(arg, cpu_env);
             rn = "MVPConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf1(arg);
+            gen_helper_mfc0_mvpconf1(arg, cpu_env);
             rn = "MVPConf1";
             break;
         default:
@@ -2931,7 +2938,7 @@
     case 1:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_random(arg);
+            gen_helper_mfc0_random(arg, cpu_env);
             rn = "Random";
             break;
         case 1:
@@ -2982,37 +2989,37 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcstatus(arg);
+            gen_helper_mfc0_tcstatus(arg, cpu_env);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcbind(arg);
+            gen_helper_mfc0_tcbind(arg, cpu_env);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcrestart(arg);
+            gen_helper_mfc0_tcrestart(arg, cpu_env);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tchalt(arg);
+            gen_helper_mfc0_tchalt(arg, cpu_env);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tccontext(arg);
+            gen_helper_mfc0_tccontext(arg, cpu_env);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcschedule(arg);
+            gen_helper_mfc0_tcschedule(arg, cpu_env);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcschefback(arg);
+            gen_helper_mfc0_tcschefback(arg, cpu_env);
             rn = "TCScheFBack";
             break;
         default:
@@ -3123,7 +3130,7 @@
             /* Mark as an IO operation because we read the time.  */
             if (use_icount)
                 gen_io_start();
-            gen_helper_mfc0_count(arg);
+            gen_helper_mfc0_count(arg, cpu_env);
             if (use_icount) {
                 gen_io_end();
                 ctx->bstate = BS_STOP;
@@ -3253,7 +3260,7 @@
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_lladdr(arg);
+            gen_helper_mfc0_lladdr(arg, cpu_env);
             rn = "LLAddr";
             break;
         default:
@@ -3263,7 +3270,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mfc0_watchlo, arg, sel);
+            gen_helper_2i(mfc0_watchlo, arg, cpu_env, sel);
             rn = "WatchLo";
             break;
         default:
@@ -3273,7 +3280,7 @@
     case 19:
         switch (sel) {
         case 0 ...7:
-            gen_helper_1i(mfc0_watchhi, arg, sel);
+            gen_helper_2i(mfc0_watchhi, arg, cpu_env, sel);
             rn = "WatchHi";
             break;
         default:
@@ -3312,7 +3319,7 @@
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_debug(arg); /* EJTAG support */
+            gen_helper_mfc0_debug(arg, cpu_env); /* EJTAG support */
             rn = "Debug";
             break;
         case 1:
@@ -3486,12 +3493,12 @@
     case 0:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_index(arg);
+            gen_helper_mtc0_index(cpu_env, arg);
             rn = "Index";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_mvpcontrol(arg);
+            gen_helper_mtc0_mvpcontrol(cpu_env, arg);
             rn = "MVPControl";
             break;
         case 2:
@@ -3516,22 +3523,22 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpecontrol(arg);
+            gen_helper_mtc0_vpecontrol(cpu_env, arg);
             rn = "VPEControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf0(arg);
+            gen_helper_mtc0_vpeconf0(cpu_env, arg);
             rn = "VPEConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf1(arg);
+            gen_helper_mtc0_vpeconf1(cpu_env, arg);
             rn = "VPEConf1";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_yqmask(arg);
+            gen_helper_mtc0_yqmask(cpu_env, arg);
             rn = "YQMask";
             break;
         case 5:
@@ -3546,7 +3553,7 @@
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeopt(arg);
+            gen_helper_mtc0_vpeopt(cpu_env, arg);
             rn = "VPEOpt";
             break;
         default:
@@ -3556,42 +3563,42 @@
     case 2:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo0(arg);
+            gen_helper_mtc0_entrylo0(cpu_env, arg);
             rn = "EntryLo0";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcstatus(arg);
+            gen_helper_mtc0_tcstatus(cpu_env, arg);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcbind(arg);
+            gen_helper_mtc0_tcbind(cpu_env, arg);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcrestart(arg);
+            gen_helper_mtc0_tcrestart(cpu_env, arg);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tchalt(arg);
+            gen_helper_mtc0_tchalt(cpu_env, arg);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tccontext(arg);
+            gen_helper_mtc0_tccontext(cpu_env, arg);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschedule(arg);
+            gen_helper_mtc0_tcschedule(cpu_env, arg);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschefback(arg);
+            gen_helper_mtc0_tcschefback(cpu_env, arg);
             rn = "TCScheFBack";
             break;
         default:
@@ -3601,7 +3608,7 @@
     case 3:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo1(arg);
+            gen_helper_mtc0_entrylo1(cpu_env, arg);
             rn = "EntryLo1";
             break;
         default:
@@ -3611,7 +3618,7 @@
     case 4:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_context(arg);
+            gen_helper_mtc0_context(cpu_env, arg);
             rn = "Context";
             break;
         case 1:
@@ -3625,12 +3632,12 @@
     case 5:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_pagemask(arg);
+            gen_helper_mtc0_pagemask(cpu_env, arg);
             rn = "PageMask";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_pagegrain(arg);
+            gen_helper_mtc0_pagegrain(cpu_env, arg);
             rn = "PageGrain";
             break;
         default:
@@ -3640,32 +3647,32 @@
     case 6:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_wired(arg);
+            gen_helper_mtc0_wired(cpu_env, arg);
             rn = "Wired";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf0(arg);
+            gen_helper_mtc0_srsconf0(cpu_env, arg);
             rn = "SRSConf0";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf1(arg);
+            gen_helper_mtc0_srsconf1(cpu_env, arg);
             rn = "SRSConf1";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf2(arg);
+            gen_helper_mtc0_srsconf2(cpu_env, arg);
             rn = "SRSConf2";
             break;
         case 4:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf3(arg);
+            gen_helper_mtc0_srsconf3(cpu_env, arg);
             rn = "SRSConf3";
             break;
         case 5:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf4(arg);
+            gen_helper_mtc0_srsconf4(cpu_env, arg);
             rn = "SRSConf4";
             break;
         default:
@@ -3676,7 +3683,7 @@
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_hwrena(arg);
+            gen_helper_mtc0_hwrena(cpu_env, arg);
             rn = "HWREna";
             break;
         default:
@@ -3690,7 +3697,7 @@
     case 9:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_count(arg);
+            gen_helper_mtc0_count(cpu_env, arg);
             rn = "Count";
             break;
         /* 6,7 are implementation dependent */
@@ -3701,7 +3708,7 @@
     case 10:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entryhi(arg);
+            gen_helper_mtc0_entryhi(cpu_env, arg);
             rn = "EntryHi";
             break;
         default:
@@ -3711,7 +3718,7 @@
     case 11:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_compare(arg);
+            gen_helper_mtc0_compare(cpu_env, arg);
             rn = "Compare";
             break;
         /* 6,7 are implementation dependent */
@@ -3723,7 +3730,7 @@
         switch (sel) {
         case 0:
             save_cpu_state(ctx, 1);
-            gen_helper_mtc0_status(arg);
+            gen_helper_mtc0_status(cpu_env, arg);
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
@@ -3731,14 +3738,14 @@
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_intctl(arg);
+            gen_helper_mtc0_intctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "IntCtl";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsctl(arg);
+            gen_helper_mtc0_srsctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "SRSCtl";
@@ -3758,7 +3765,7 @@
         switch (sel) {
         case 0:
             save_cpu_state(ctx, 1);
-            gen_helper_mtc0_cause(arg);
+            gen_helper_mtc0_cause(cpu_env, arg);
             rn = "Cause";
             break;
         default:
@@ -3783,7 +3790,7 @@
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_ebase(arg);
+            gen_helper_mtc0_ebase(cpu_env, arg);
             rn = "EBase";
             break;
         default:
@@ -3793,7 +3800,7 @@
     case 16:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_config0(arg);
+            gen_helper_mtc0_config0(cpu_env, arg);
             rn = "Config";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -3803,7 +3810,7 @@
             rn = "Config1";
             break;
         case 2:
-            gen_helper_mtc0_config2(arg);
+            gen_helper_mtc0_config2(cpu_env, arg);
             rn = "Config2";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -3830,7 +3837,7 @@
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_lladdr(arg);
+            gen_helper_mtc0_lladdr(cpu_env, arg);
             rn = "LLAddr";
             break;
         default:
@@ -3840,7 +3847,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchlo, arg, sel);
+            gen_helper_2i(mtc0_watchlo, cpu_env, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -3850,7 +3857,7 @@
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchhi, arg, sel);
+            gen_helper_2i(mtc0_watchhi, cpu_env, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -3862,7 +3869,7 @@
         case 0:
 #if defined(TARGET_MIPS64)
             check_insn(env, ctx, ISA_MIPS3);
-            gen_helper_mtc0_xcontext(arg);
+            gen_helper_mtc0_xcontext(cpu_env, arg);
             rn = "XContext";
             break;
 #endif
@@ -3874,7 +3881,7 @@
        /* Officially reserved, but sel 0 is used for R1x000 framemask */
         switch (sel) {
         case 0:
-            gen_helper_mtc0_framemask(arg);
+            gen_helper_mtc0_framemask(cpu_env, arg);
             rn = "Framemask";
             break;
         default:
@@ -3888,7 +3895,7 @@
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_debug(arg); /* EJTAG support */
+            gen_helper_mtc0_debug(cpu_env, arg); /* EJTAG support */
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
@@ -3938,35 +3945,35 @@
     case 25:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_performance0(arg);
+            gen_helper_mtc0_performance0(cpu_env, arg);
             rn = "Performance0";
             break;
         case 1:
-//            gen_helper_mtc0_performance1(arg);
+//            gen_helper_mtc0_performance1(cpu_env, arg);
             rn = "Performance1";
 //            break;
         case 2:
-//            gen_helper_mtc0_performance2(arg);
+//            gen_helper_mtc0_performance2(cpu_env, arg);
             rn = "Performance2";
 //            break;
         case 3:
-//            gen_helper_mtc0_performance3(arg);
+//            gen_helper_mtc0_performance3(cpu_env, arg);
             rn = "Performance3";
 //            break;
         case 4:
-//            gen_helper_mtc0_performance4(arg);
+//            gen_helper_mtc0_performance4(cpu_env, arg);
             rn = "Performance4";
 //            break;
         case 5:
-//            gen_helper_mtc0_performance5(arg);
+//            gen_helper_mtc0_performance5(cpu_env, arg);
             rn = "Performance5";
 //            break;
         case 6:
-//            gen_helper_mtc0_performance6(arg);
+//            gen_helper_mtc0_performance6(cpu_env, arg);
             rn = "Performance6";
 //            break;
         case 7:
-//            gen_helper_mtc0_performance7(arg);
+//            gen_helper_mtc0_performance7(cpu_env, arg);
             rn = "Performance7";
 //            break;
         default:
@@ -3993,14 +4000,14 @@
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taglo(arg);
+            gen_helper_mtc0_taglo(cpu_env, arg);
             rn = "TagLo";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datalo(arg);
+            gen_helper_mtc0_datalo(cpu_env, arg);
             rn = "DataLo";
             break;
         default:
@@ -4013,14 +4020,14 @@
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taghi(arg);
+            gen_helper_mtc0_taghi(cpu_env, arg);
             rn = "TagHi";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datahi(arg);
+            gen_helper_mtc0_datahi(cpu_env, arg);
             rn = "DataHi";
             break;
         default:
@@ -4164,27 +4171,27 @@
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tcrestart(arg);
+            gen_helper_dmfc0_tcrestart(arg, cpu_env);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tchalt(arg);
+            gen_helper_dmfc0_tchalt(arg, cpu_env);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tccontext(arg);
+            gen_helper_dmfc0_tccontext(arg, cpu_env);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tcschedule(arg);
+            gen_helper_dmfc0_tcschedule(arg, cpu_env);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tcschefback(arg);
+            gen_helper_dmfc0_tcschefback(arg, cpu_env);
             rn = "TCScheFBack";
             break;
         default:
@@ -4419,7 +4426,7 @@
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_dmfc0_lladdr(arg);
+            gen_helper_dmfc0_lladdr(arg, cpu_env);
             rn = "LLAddr";
             break;
         default:
@@ -4475,7 +4482,7 @@
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_debug(arg); /* EJTAG support */
+            gen_helper_mfc0_debug(arg, cpu_env); /* EJTAG support */
             rn = "Debug";
             break;
         case 1:
@@ -4648,12 +4655,12 @@
     case 0:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_index(arg);
+            gen_helper_mtc0_index(cpu_env, arg);
             rn = "Index";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_mvpcontrol(arg);
+            gen_helper_mtc0_mvpcontrol(cpu_env, arg);
             rn = "MVPControl";
             break;
         case 2:
@@ -4678,22 +4685,22 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpecontrol(arg);
+            gen_helper_mtc0_vpecontrol(cpu_env, arg);
             rn = "VPEControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf0(arg);
+            gen_helper_mtc0_vpeconf0(cpu_env, arg);
             rn = "VPEConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf1(arg);
+            gen_helper_mtc0_vpeconf1(cpu_env, arg);
             rn = "VPEConf1";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_yqmask(arg);
+            gen_helper_mtc0_yqmask(cpu_env, arg);
             rn = "YQMask";
             break;
         case 5:
@@ -4708,7 +4715,7 @@
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeopt(arg);
+            gen_helper_mtc0_vpeopt(cpu_env, arg);
             rn = "VPEOpt";
             break;
         default:
@@ -4718,42 +4725,42 @@
     case 2:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo0(arg);
+            gen_helper_mtc0_entrylo0(cpu_env, arg);
             rn = "EntryLo0";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcstatus(arg);
+            gen_helper_mtc0_tcstatus(cpu_env, arg);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcbind(arg);
+            gen_helper_mtc0_tcbind(cpu_env, arg);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcrestart(arg);
+            gen_helper_mtc0_tcrestart(cpu_env, arg);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tchalt(arg);
+            gen_helper_mtc0_tchalt(cpu_env, arg);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tccontext(arg);
+            gen_helper_mtc0_tccontext(cpu_env, arg);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschedule(arg);
+            gen_helper_mtc0_tcschedule(cpu_env, arg);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschefback(arg);
+            gen_helper_mtc0_tcschefback(cpu_env, arg);
             rn = "TCScheFBack";
             break;
         default:
@@ -4763,7 +4770,7 @@
     case 3:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo1(arg);
+            gen_helper_mtc0_entrylo1(cpu_env, arg);
             rn = "EntryLo1";
             break;
         default:
@@ -4773,7 +4780,7 @@
     case 4:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_context(arg);
+            gen_helper_mtc0_context(cpu_env, arg);
             rn = "Context";
             break;
         case 1:
@@ -4787,12 +4794,12 @@
     case 5:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_pagemask(arg);
+            gen_helper_mtc0_pagemask(cpu_env, arg);
             rn = "PageMask";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_pagegrain(arg);
+            gen_helper_mtc0_pagegrain(cpu_env, arg);
             rn = "PageGrain";
             break;
         default:
@@ -4802,32 +4809,32 @@
     case 6:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_wired(arg);
+            gen_helper_mtc0_wired(cpu_env, arg);
             rn = "Wired";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf0(arg);
+            gen_helper_mtc0_srsconf0(cpu_env, arg);
             rn = "SRSConf0";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf1(arg);
+            gen_helper_mtc0_srsconf1(cpu_env, arg);
             rn = "SRSConf1";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf2(arg);
+            gen_helper_mtc0_srsconf2(cpu_env, arg);
             rn = "SRSConf2";
             break;
         case 4:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf3(arg);
+            gen_helper_mtc0_srsconf3(cpu_env, arg);
             rn = "SRSConf3";
             break;
         case 5:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf4(arg);
+            gen_helper_mtc0_srsconf4(cpu_env, arg);
             rn = "SRSConf4";
             break;
         default:
@@ -4838,7 +4845,7 @@
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_hwrena(arg);
+            gen_helper_mtc0_hwrena(cpu_env, arg);
             rn = "HWREna";
             break;
         default:
@@ -4852,7 +4859,7 @@
     case 9:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_count(arg);
+            gen_helper_mtc0_count(cpu_env, arg);
             rn = "Count";
             break;
         /* 6,7 are implementation dependent */
@@ -4865,7 +4872,7 @@
     case 10:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entryhi(arg);
+            gen_helper_mtc0_entryhi(cpu_env, arg);
             rn = "EntryHi";
             break;
         default:
@@ -4875,7 +4882,7 @@
     case 11:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_compare(arg);
+            gen_helper_mtc0_compare(cpu_env, arg);
             rn = "Compare";
             break;
         /* 6,7 are implementation dependent */
@@ -4889,7 +4896,7 @@
         switch (sel) {
         case 0:
             save_cpu_state(ctx, 1);
-            gen_helper_mtc0_status(arg);
+            gen_helper_mtc0_status(cpu_env, arg);
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
@@ -4897,14 +4904,14 @@
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_intctl(arg);
+            gen_helper_mtc0_intctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "IntCtl";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsctl(arg);
+            gen_helper_mtc0_srsctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "SRSCtl";
@@ -4924,7 +4931,7 @@
         switch (sel) {
         case 0:
             save_cpu_state(ctx, 1);
-            gen_helper_mtc0_cause(arg);
+            gen_helper_mtc0_cause(cpu_env, arg);
             rn = "Cause";
             break;
         default:
@@ -4949,7 +4956,7 @@
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_ebase(arg);
+            gen_helper_mtc0_ebase(cpu_env, arg);
             rn = "EBase";
             break;
         default:
@@ -4959,7 +4966,7 @@
     case 16:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_config0(arg);
+            gen_helper_mtc0_config0(cpu_env, arg);
             rn = "Config";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -4969,7 +4976,7 @@
             rn = "Config1";
             break;
         case 2:
-            gen_helper_mtc0_config2(arg);
+            gen_helper_mtc0_config2(cpu_env, arg);
             rn = "Config2";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -4987,7 +4994,7 @@
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_lladdr(arg);
+            gen_helper_mtc0_lladdr(cpu_env, arg);
             rn = "LLAddr";
             break;
         default:
@@ -4997,7 +5004,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchlo, arg, sel);
+            gen_helper_2i(mtc0_watchlo, cpu_env, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -5007,7 +5014,7 @@
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchhi, arg, sel);
+            gen_helper_2i(mtc0_watchhi, cpu_env, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -5018,7 +5025,7 @@
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS3);
-            gen_helper_mtc0_xcontext(arg);
+            gen_helper_mtc0_xcontext(cpu_env, arg);
             rn = "XContext";
             break;
         default:
@@ -5029,7 +5036,7 @@
        /* Officially reserved, but sel 0 is used for R1x000 framemask */
         switch (sel) {
         case 0:
-            gen_helper_mtc0_framemask(arg);
+            gen_helper_mtc0_framemask(cpu_env, arg);
             rn = "Framemask";
             break;
         default:
@@ -5043,7 +5050,7 @@
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_debug(arg); /* EJTAG support */
+            gen_helper_mtc0_debug(cpu_env, arg); /* EJTAG support */
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
@@ -5091,35 +5098,35 @@
     case 25:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_performance0(arg);
+            gen_helper_mtc0_performance0(cpu_env, arg);
             rn = "Performance0";
             break;
         case 1:
-//            gen_helper_mtc0_performance1(arg);
+//            gen_helper_mtc0_performance1(cpu_env, arg);
             rn = "Performance1";
 //            break;
         case 2:
-//            gen_helper_mtc0_performance2(arg);
+//            gen_helper_mtc0_performance2(cpu_env, arg);
             rn = "Performance2";
 //            break;
         case 3:
-//            gen_helper_mtc0_performance3(arg);
+//            gen_helper_mtc0_performance3(cpu_env, arg);
             rn = "Performance3";
 //            break;
         case 4:
-//            gen_helper_mtc0_performance4(arg);
+//            gen_helper_mtc0_performance4(cpu_env, arg);
             rn = "Performance4";
 //            break;
         case 5:
-//            gen_helper_mtc0_performance5(arg);
+//            gen_helper_mtc0_performance5(cpu_env, arg);
             rn = "Performance5";
 //            break;
         case 6:
-//            gen_helper_mtc0_performance6(arg);
+//            gen_helper_mtc0_performance6(cpu_env, arg);
             rn = "Performance6";
 //            break;
         case 7:
-//            gen_helper_mtc0_performance7(arg);
+//            gen_helper_mtc0_performance7(cpu_env, arg);
             rn = "Performance7";
 //            break;
         default:
@@ -5146,14 +5153,14 @@
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taglo(arg);
+            gen_helper_mtc0_taglo(cpu_env, arg);
             rn = "TagLo";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datalo(arg);
+            gen_helper_mtc0_datalo(cpu_env, arg);
             rn = "DataLo";
             break;
         default:
@@ -5166,14 +5173,14 @@
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taghi(arg);
+            gen_helper_mtc0_taghi(cpu_env, arg);
             rn = "TagHi";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datahi(arg);
+            gen_helper_mtc0_datahi(cpu_env, arg);
             rn = "DataHi";
             break;
         default:
@@ -5239,25 +5246,25 @@
         case 2:
             switch (sel) {
             case 1:
-                gen_helper_mftc0_tcstatus(t0);
+                gen_helper_mftc0_tcstatus(t0, cpu_env);
                 break;
             case 2:
-                gen_helper_mftc0_tcbind(t0);
+                gen_helper_mftc0_tcbind(t0, cpu_env);
                 break;
             case 3:
-                gen_helper_mftc0_tcrestart(t0);
+                gen_helper_mftc0_tcrestart(t0, cpu_env);
                 break;
             case 4:
-                gen_helper_mftc0_tchalt(t0);
+                gen_helper_mftc0_tchalt(t0, cpu_env);
                 break;
             case 5:
-                gen_helper_mftc0_tccontext(t0);
+                gen_helper_mftc0_tccontext(t0, cpu_env);
                 break;
             case 6:
-                gen_helper_mftc0_tcschedule(t0);
+                gen_helper_mftc0_tcschedule(t0, cpu_env);
                 break;
             case 7:
-                gen_helper_mftc0_tcschefback(t0);
+                gen_helper_mftc0_tcschefback(t0, cpu_env);
                 break;
             default:
                 gen_mfc0(env, ctx, t0, rt, sel);
@@ -5267,7 +5274,7 @@
         case 10:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_entryhi(t0);
+                gen_helper_mftc0_entryhi(t0, cpu_env);
                 break;
             default:
                 gen_mfc0(env, ctx, t0, rt, sel);
@@ -5276,7 +5283,7 @@
         case 12:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_status(t0);
+                gen_helper_mftc0_status(t0, cpu_env);
                 break;
             default:
                 gen_mfc0(env, ctx, t0, rt, sel);
@@ -5285,7 +5292,7 @@
         case 23:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_debug(t0);
+                gen_helper_mftc0_debug(t0, cpu_env);
                 break;
             default:
                 gen_mfc0(env, ctx, t0, rt, sel);
@@ -5298,49 +5305,49 @@
     } else switch (sel) {
     /* GPR registers. */
     case 0:
-        gen_helper_1i(mftgpr, t0, rt);
+        gen_helper_2i(mftgpr, t0, cpu_env, rt);
         break;
     /* Auxiliary CPU registers */
     case 1:
         switch (rt) {
         case 0:
-            gen_helper_1i(mftlo, t0, 0);
+            gen_helper_2i(mftlo, t0, cpu_env, 0);
             break;
         case 1:
-            gen_helper_1i(mfthi, t0, 0);
+            gen_helper_2i(mfthi, t0, cpu_env, 0);
             break;
         case 2:
-            gen_helper_1i(mftacx, t0, 0);
+            gen_helper_2i(mftacx, t0, cpu_env, 0);
             break;
         case 4:
-            gen_helper_1i(mftlo, t0, 1);
+            gen_helper_2i(mftlo, t0, cpu_env, 1);
             break;
         case 5:
-            gen_helper_1i(mfthi, t0, 1);
+            gen_helper_2i(mfthi, t0, cpu_env, 1);
             break;
         case 6:
-            gen_helper_1i(mftacx, t0, 1);
+            gen_helper_2i(mftacx, t0, cpu_env, 1);
             break;
         case 8:
-            gen_helper_1i(mftlo, t0, 2);
+            gen_helper_2i(mftlo, t0, cpu_env, 2);
             break;
         case 9:
-            gen_helper_1i(mfthi, t0, 2);
+            gen_helper_2i(mfthi, t0, cpu_env, 2);
             break;
         case 10:
-            gen_helper_1i(mftacx, t0, 2);
+            gen_helper_2i(mftacx, t0, cpu_env, 2);
             break;
         case 12:
-            gen_helper_1i(mftlo, t0, 3);
+            gen_helper_2i(mftlo, t0, cpu_env, 3);
             break;
         case 13:
-            gen_helper_1i(mfthi, t0, 3);
+            gen_helper_2i(mfthi, t0, cpu_env, 3);
             break;
         case 14:
-            gen_helper_1i(mftacx, t0, 3);
+            gen_helper_2i(mftacx, t0, cpu_env, 3);
             break;
         case 16:
-            gen_helper_mftdsp(t0);
+            gen_helper_mftdsp(t0, cpu_env);
             break;
         default:
             goto die;
@@ -5365,7 +5372,7 @@
         break;
     case 3:
         /* XXX: For now we support only a single FPU context. */
-        gen_helper_1i(cfc1, t0, rt);
+        gen_helper_2i(cfc1, t0, cpu_env, rt);
         break;
     /* COP2: Not implemented. */
     case 4:
@@ -5404,25 +5411,25 @@
         case 2:
             switch (sel) {
             case 1:
-                gen_helper_mttc0_tcstatus(t0);
+                gen_helper_mttc0_tcstatus(cpu_env, t0);
                 break;
             case 2:
-                gen_helper_mttc0_tcbind(t0);
+                gen_helper_mttc0_tcbind(cpu_env, t0);
                 break;
             case 3:
-                gen_helper_mttc0_tcrestart(t0);
+                gen_helper_mttc0_tcrestart(cpu_env, t0);
                 break;
             case 4:
-                gen_helper_mttc0_tchalt(t0);
+                gen_helper_mttc0_tchalt(cpu_env, t0);
                 break;
             case 5:
-                gen_helper_mttc0_tccontext(t0);
+                gen_helper_mttc0_tccontext(cpu_env, t0);
                 break;
             case 6:
-                gen_helper_mttc0_tcschedule(t0);
+                gen_helper_mttc0_tcschedule(cpu_env, t0);
                 break;
             case 7:
-                gen_helper_mttc0_tcschefback(t0);
+                gen_helper_mttc0_tcschefback(cpu_env, t0);
                 break;
             default:
                 gen_mtc0(env, ctx, t0, rd, sel);
@@ -5432,7 +5439,7 @@
         case 10:
             switch (sel) {
             case 0:
-                gen_helper_mttc0_entryhi(t0);
+                gen_helper_mttc0_entryhi(cpu_env, t0);
                 break;
             default:
                 gen_mtc0(env, ctx, t0, rd, sel);
@@ -5441,7 +5448,7 @@
         case 12:
             switch (sel) {
             case 0:
-                gen_helper_mttc0_status(t0);
+                gen_helper_mttc0_status(cpu_env, t0);
                 break;
             default:
                 gen_mtc0(env, ctx, t0, rd, sel);
@@ -5450,7 +5457,7 @@
         case 23:
             switch (sel) {
             case 0:
-                gen_helper_mttc0_debug(t0);
+                gen_helper_mttc0_debug(cpu_env, t0);
                 break;
             default:
                 gen_mtc0(env, ctx, t0, rd, sel);
@@ -5463,49 +5470,49 @@
     } else switch (sel) {
     /* GPR registers. */
     case 0:
-        gen_helper_1i(mttgpr, t0, rd);
+        gen_helper_2i(mttgpr, cpu_env, t0, rd);
         break;
     /* Auxiliary CPU registers */
     case 1:
         switch (rd) {
         case 0:
-            gen_helper_1i(mttlo, t0, 0);
+            gen_helper_2i(mttlo, cpu_env, t0, 0);
             break;
         case 1:
-            gen_helper_1i(mtthi, t0, 0);
+            gen_helper_2i(mtthi, cpu_env, t0, 0);
             break;
         case 2:
-            gen_helper_1i(mttacx, t0, 0);
+            gen_helper_2i(mttacx, cpu_env, t0, 0);
             break;
         case 4:
-            gen_helper_1i(mttlo, t0, 1);
+            gen_helper_2i(mttlo, cpu_env, t0, 1);
             break;
         case 5:
-            gen_helper_1i(mtthi, t0, 1);
+            gen_helper_2i(mtthi, cpu_env, t0, 1);
             break;
         case 6:
-            gen_helper_1i(mttacx, t0, 1);
+            gen_helper_2i(mttacx, cpu_env, t0, 1);
             break;
         case 8:
-            gen_helper_1i(mttlo, t0, 2);
+            gen_helper_2i(mttlo, cpu_env, t0, 2);
             break;
         case 9:
-            gen_helper_1i(mtthi, t0, 2);
+            gen_helper_2i(mtthi, cpu_env, t0, 2);
             break;
         case 10:
-            gen_helper_1i(mttacx, t0, 2);
+            gen_helper_2i(mttacx, cpu_env, t0, 2);
             break;
         case 12:
-            gen_helper_1i(mttlo, t0, 3);
+            gen_helper_2i(mttlo, cpu_env, t0, 3);
             break;
         case 13:
-            gen_helper_1i(mtthi, t0, 3);
+            gen_helper_2i(mtthi, cpu_env, t0, 3);
             break;
         case 14:
-            gen_helper_1i(mttacx, t0, 3);
+            gen_helper_2i(mttacx, cpu_env, t0, 3);
             break;
         case 16:
-            gen_helper_mttdsp(t0);
+            gen_helper_mttdsp(cpu_env, t0);
             break;
         default:
             goto die;
@@ -5530,7 +5537,7 @@
         break;
     case 3:
         /* XXX: For now we support only a single FPU context. */
-        gen_helper_1i(ctc1, t0, rd);
+        gen_helper_2i(ctc1, cpu_env, t0, rd);
         break;
     /* COP2: Not implemented. */
     case 4:
@@ -5614,30 +5621,30 @@
         opn = "tlbwi";
         if (!env->tlb->helper_tlbwi)
             goto die;
-        gen_helper_tlbwi();
+        gen_helper_tlbwi(cpu_env);
         break;
     case OPC_TLBWR:
         opn = "tlbwr";
         if (!env->tlb->helper_tlbwr)
             goto die;
-        gen_helper_tlbwr();
+        gen_helper_tlbwr(cpu_env);
         break;
     case OPC_TLBP:
         opn = "tlbp";
         if (!env->tlb->helper_tlbp)
             goto die;
-        gen_helper_tlbp();
+        gen_helper_tlbp(cpu_env);
         break;
     case OPC_TLBR:
         opn = "tlbr";
         if (!env->tlb->helper_tlbr)
             goto die;
-        gen_helper_tlbr();
+        gen_helper_tlbr(cpu_env);
         break;
     case OPC_ERET:
         opn = "eret";
         check_insn(env, ctx, ISA_MIPS2);
-        gen_helper_eret();
+        gen_helper_eret(cpu_env);
         ctx->bstate = BS_EXCP;
         break;
     case OPC_DERET:
@@ -5647,7 +5654,7 @@
             MIPS_INVAL(opn);
             generate_exception(ctx, EXCP_RI);
         } else {
-            gen_helper_deret();
+            gen_helper_deret(cpu_env);
             ctx->bstate = BS_EXCP;
         }
         break;
@@ -5658,7 +5665,7 @@
         ctx->pc += 4;
         save_cpu_state(ctx, 1);
         ctx->pc -= 4;
-        gen_helper_wait();
+        gen_helper_wait(cpu_env);
         ctx->bstate = BS_EXCP;
         break;
     default:
@@ -5819,13 +5826,13 @@
         opn = "mtc1";
         break;
     case OPC_CFC1:
-        gen_helper_1i(cfc1, t0, fs);
+        gen_helper_2i(cfc1, t0, cpu_env, fs);
         gen_store_gpr(t0, rt);
         opn = "cfc1";
         break;
     case OPC_CTC1:
         gen_load_gpr(t0, rt);
-        gen_helper_1i(ctc1, t0, fs);
+        gen_helper_2i(ctc1, cpu_env, t0, fs);
         opn = "ctc1";
         break;
 #if defined(TARGET_MIPS64)
@@ -6021,7 +6028,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_add_s(fp0, fp0, fp1);
+            gen_helper_float_add_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6036,7 +6043,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_sub_s(fp0, fp0, fp1);
+            gen_helper_float_sub_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6051,7 +6058,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_mul_s(fp0, fp0, fp1);
+            gen_helper_float_mul_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6066,7 +6073,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_div_s(fp0, fp0, fp1);
+            gen_helper_float_div_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6079,7 +6086,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_sqrt_s(fp0, fp0);
+            gen_helper_float_sqrt_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6124,7 +6131,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_roundl_s(fp64, fp32);
+            gen_helper_float_roundl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6138,7 +6145,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_truncl_s(fp64, fp32);
+            gen_helper_float_truncl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6152,7 +6159,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_ceill_s(fp64, fp32);
+            gen_helper_float_ceill_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6166,7 +6173,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_floorl_s(fp64, fp32);
+            gen_helper_float_floorl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6178,7 +6185,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_roundw_s(fp0, fp0);
+            gen_helper_float_roundw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6189,7 +6196,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_truncw_s(fp0, fp0);
+            gen_helper_float_truncw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6200,7 +6207,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_ceilw_s(fp0, fp0);
+            gen_helper_float_ceilw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6211,7 +6218,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_floorw_s(fp0, fp0);
+            gen_helper_float_floorw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6259,7 +6266,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_recip_s(fp0, fp0);
+            gen_helper_float_recip_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6271,7 +6278,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_rsqrt_s(fp0, fp0);
+            gen_helper_float_rsqrt_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6285,7 +6292,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, fd);
-            gen_helper_float_recip2_s(fp0, fp0, fp1);
+            gen_helper_float_recip2_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6298,7 +6305,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_recip1_s(fp0, fp0);
+            gen_helper_float_recip1_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6310,7 +6317,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_rsqrt1_s(fp0, fp0);
+            gen_helper_float_rsqrt1_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6324,7 +6331,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_rsqrt2_s(fp0, fp0, fp1);
+            gen_helper_float_rsqrt2_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6338,7 +6345,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_cvtd_s(fp64, fp32);
+            gen_helper_float_cvtd_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6350,7 +6357,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_cvtw_s(fp0, fp0);
+            gen_helper_float_cvtw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6363,7 +6370,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_cvtl_s(fp64, fp32);
+            gen_helper_float_cvtl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6429,7 +6436,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_add_d(fp0, fp0, fp1);
+            gen_helper_float_add_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6445,7 +6452,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_sub_d(fp0, fp0, fp1);
+            gen_helper_float_sub_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6461,7 +6468,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_mul_d(fp0, fp0, fp1);
+            gen_helper_float_mul_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6477,7 +6484,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_div_d(fp0, fp0, fp1);
+            gen_helper_float_div_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6491,7 +6498,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_sqrt_d(fp0, fp0);
+            gen_helper_float_sqrt_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -6538,7 +6545,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_roundl_d(fp0, fp0);
+            gen_helper_float_roundl_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -6550,7 +6557,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_truncl_d(fp0, fp0);
+            gen_helper_float_truncl_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -6562,7 +6569,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_ceill_d(fp0, fp0);
+            gen_helper_float_ceill_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -6574,7 +6581,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_floorl_d(fp0, fp0);
+            gen_helper_float_floorl_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -6587,7 +6594,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_roundw_d(fp32, fp64);
+            gen_helper_float_roundw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -6601,7 +6608,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_truncw_d(fp32, fp64);
+            gen_helper_float_truncw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -6615,7 +6622,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_ceilw_d(fp32, fp64);
+            gen_helper_float_ceilw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -6629,7 +6636,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_floorw_d(fp32, fp64);
+            gen_helper_float_floorw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -6678,7 +6685,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_recip_d(fp0, fp0);
+            gen_helper_float_recip_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -6690,7 +6697,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_rsqrt_d(fp0, fp0);
+            gen_helper_float_rsqrt_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -6704,7 +6711,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_recip2_d(fp0, fp0, fp1);
+            gen_helper_float_recip2_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6717,7 +6724,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_recip1_d(fp0, fp0);
+            gen_helper_float_recip1_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -6729,7 +6736,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_rsqrt1_d(fp0, fp0);
+            gen_helper_float_rsqrt1_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -6743,7 +6750,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_rsqrt2_d(fp0, fp0, fp1);
+            gen_helper_float_rsqrt2_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6793,7 +6800,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_cvts_d(fp32, fp64);
+            gen_helper_float_cvts_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -6807,7 +6814,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_cvtw_d(fp32, fp64);
+            gen_helper_float_cvtw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -6820,7 +6827,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtl_d(fp0, fp0);
+            gen_helper_float_cvtl_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -6831,7 +6838,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_cvts_w(fp0, fp0);
+            gen_helper_float_cvts_w(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6844,7 +6851,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_cvtd_w(fp64, fp32);
+            gen_helper_float_cvtd_w(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6858,7 +6865,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_cvts_l(fp32, fp64);
+            gen_helper_float_cvts_l(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -6871,7 +6878,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtd_l(fp0, fp0);
+            gen_helper_float_cvtd_l(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -6883,7 +6890,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtps_pw(fp0, fp0);
+            gen_helper_float_cvtps_pw(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -6897,7 +6904,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_add_ps(fp0, fp0, fp1);
+            gen_helper_float_add_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6912,7 +6919,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_sub_ps(fp0, fp0, fp1);
+            gen_helper_float_sub_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6927,7 +6934,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_mul_ps(fp0, fp0, fp1);
+            gen_helper_float_mul_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7015,7 +7022,7 @@
 
             gen_load_fpr64(ctx, fp0, ft);
             gen_load_fpr64(ctx, fp1, fs);
-            gen_helper_float_addr_ps(fp0, fp0, fp1);
+            gen_helper_float_addr_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7030,7 +7037,7 @@
 
             gen_load_fpr64(ctx, fp0, ft);
             gen_load_fpr64(ctx, fp1, fs);
-            gen_helper_float_mulr_ps(fp0, fp0, fp1);
+            gen_helper_float_mulr_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7045,7 +7052,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, fd);
-            gen_helper_float_recip2_ps(fp0, fp0, fp1);
+            gen_helper_float_recip2_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7058,7 +7065,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_recip1_ps(fp0, fp0);
+            gen_helper_float_recip1_ps(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7070,7 +7077,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_rsqrt1_ps(fp0, fp0);
+            gen_helper_float_rsqrt1_ps(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7084,7 +7091,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_rsqrt2_ps(fp0, fp0, fp1);
+            gen_helper_float_rsqrt2_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7097,7 +7104,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32h(fp0, fs);
-            gen_helper_float_cvts_pu(fp0, fp0);
+            gen_helper_float_cvts_pu(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -7109,7 +7116,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtpw_ps(fp0, fp0);
+            gen_helper_float_cvtpw_ps(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7121,7 +7128,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_cvts_pl(fp0, fp0);
+            gen_helper_float_cvts_pl(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -7396,7 +7403,7 @@
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_muladd_s(fp2, fp0, fp1, fp2);
+            gen_helper_float_muladd_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -7415,7 +7422,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_muladd_d(fp2, fp0, fp1, fp2);
+            gen_helper_float_muladd_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7433,7 +7440,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_muladd_ps(fp2, fp0, fp1, fp2);
+            gen_helper_float_muladd_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7451,7 +7458,7 @@
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_mulsub_s(fp2, fp0, fp1, fp2);
+            gen_helper_float_mulsub_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -7470,7 +7477,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_mulsub_d(fp2, fp0, fp1, fp2);
+            gen_helper_float_mulsub_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7488,7 +7495,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_mulsub_ps(fp2, fp0, fp1, fp2);
+            gen_helper_float_mulsub_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7506,7 +7513,7 @@
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_nmuladd_s(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmuladd_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -7525,7 +7532,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmuladd_d(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmuladd_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7543,7 +7550,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmuladd_ps(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmuladd_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7561,7 +7568,7 @@
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_nmulsub_s(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmulsub_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -7580,7 +7587,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmulsub_d(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmulsub_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7598,7 +7605,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmulsub_ps(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmulsub_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7719,7 +7726,7 @@
             MIPS_INVAL("PMON / selsl");
             generate_exception(ctx, EXCP_RI);
 #else
-            gen_helper_0i(pmon, sa);
+            gen_helper_1i(pmon, cpu_env, sa);
 #endif
             break;
         case OPC_SYSCALL:
@@ -7853,22 +7860,22 @@
                 switch (rd) {
                 case 0:
                     save_cpu_state(ctx, 1);
-                    gen_helper_rdhwr_cpunum(t0);
+                    gen_helper_rdhwr_cpunum(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     break;
                 case 1:
                     save_cpu_state(ctx, 1);
-                    gen_helper_rdhwr_synci_step(t0);
+                    gen_helper_rdhwr_synci_step(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     break;
                 case 2:
                     save_cpu_state(ctx, 1);
-                    gen_helper_rdhwr_cc(t0);
+                    gen_helper_rdhwr_cc(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     break;
                 case 3:
                     save_cpu_state(ctx, 1);
-                    gen_helper_rdhwr_ccres(t0);
+                    gen_helper_rdhwr_ccres(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     break;
                 case 29:
@@ -7908,7 +7915,7 @@
 
                 save_cpu_state(ctx, 1);
                 gen_load_gpr(t0, rs);
-                gen_helper_yield(t0, t0);
+                gen_helper_yield(t0, cpu_env, t0);
                 gen_store_gpr(t0, rd);
                 tcg_temp_free(t0);
             }
@@ -7984,28 +7991,28 @@
                 switch (op2) {
                 case OPC_DMT:
                     check_insn(env, ctx, ASE_MT);
-                    gen_helper_dmt(t0, t0);
+                    gen_helper_dmt(t0);
                     gen_store_gpr(t0, rt);
                     break;
                 case OPC_EMT:
                     check_insn(env, ctx, ASE_MT);
-                    gen_helper_emt(t0, t0);
+                    gen_helper_emt(t0);
                     gen_store_gpr(t0, rt);
                     break;
                 case OPC_DVPE:
                     check_insn(env, ctx, ASE_MT);
-                    gen_helper_dvpe(t0, t0);
+                    gen_helper_dvpe(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     break;
                 case OPC_EVPE:
                     check_insn(env, ctx, ASE_MT);
-                    gen_helper_evpe(t0, t0);
+                    gen_helper_evpe(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     break;
                 case OPC_DI:
                     check_insn(env, ctx, ISA_MIPS32R2);
                     save_cpu_state(ctx, 1);
-                    gen_helper_di(t0);
+                    gen_helper_di(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     /* Stop translation as we may have switched the execution mode */
                     ctx->bstate = BS_STOP;
@@ -8013,7 +8020,7 @@
                 case OPC_EI:
                     check_insn(env, ctx, ISA_MIPS32R2);
                     save_cpu_state(ctx, 1);
-                    gen_helper_ei(t0);
+                    gen_helper_ei(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     /* Stop translation as we may have switched the execution mode */
                     ctx->bstate = BS_STOP;
@@ -8264,7 +8271,7 @@
             tcg_gen_mov_tl(cpu_PC, btarget);
             if (ctx->singlestep_enabled) {
                 save_cpu_state(ctx, 0);
-                gen_helper_0i(raise_exception, EXCP_DEBUG);
+                gen_helper_1i(raise_exception, cpu_env, EXCP_DEBUG);
             }
             tcg_gen_exit_tb(0);
             break;
@@ -8291,10 +8298,10 @@
         qemu_log("search pc %d\n", search_pc);
 
     pc_start = tb->pc;
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     ctx.pc = pc_start;
     ctx.saved_pc = -1;
-    ctx.singlestep_enabled = env->singlestep_enabled;
+    ctx.singlestep_enabled = ENV_GET_CPU(env)->singlestep_enabled;
     ctx.tb = tb;
     ctx.bstate = BS_NONE;
     /* Restore delay slot state from the tb context.  */
@@ -8312,7 +8319,7 @@
 #ifdef DEBUG_DISAS
     qemu_log_mask(CPU_LOG_TB_CPU, "------------------------------------------------\n");
     /* FIXME: This may print out stale hflags from env... */
-    log_cpu_state_mask(CPU_LOG_TB_CPU, env, 0);
+    log_cpu_state_mask(CPU_LOG_TB_CPU, ENV_GET_CPU(env), 0);
 #endif
     LOG_DISAS("\ntb %p idx %d hflags %04x\n", tb, ctx.mem_idx, ctx.hflags);
     gen_icount_start();
@@ -8322,7 +8329,7 @@
                 if (bp->pc == ctx.pc) {
                     save_cpu_state(&ctx, 1);
                     ctx.bstate = BS_BRANCH;
-                    gen_helper_0i(raise_exception, EXCP_DEBUG);
+                    gen_helper_1i(raise_exception, cpu_env, EXCP_DEBUG);
                     /* Include the breakpoint location or the tb won't
                      * be flushed when it must be.  */
                     ctx.pc += 4;
@@ -8332,20 +8339,20 @@
         }
 
         if (search_pc) {
-            j = gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j)
-                    gen_opc_instr_start[lj++] = 0;
+                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
             }
-            gen_opc_pc[lj] = ctx.pc;
+            tcg_ctx.gen_opc_pc[lj] = ctx.pc;
             gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
-            gen_opc_instr_start[lj] = 1;
-            gen_opc_icount[lj] = num_insns;
+            tcg_ctx.gen_opc_instr_start[lj] = 1;
+            tcg_ctx.gen_opc_icount[lj] = num_insns;
         }
         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
             gen_io_start();
-        ctx.opcode = ldl_code(ctx.pc);
+        ctx.opcode = cpu_ldl_code(env, ctx.pc);
         decode_opc(env, &ctx);
         ctx.pc += 4;
         num_insns++;
@@ -8354,7 +8361,7 @@
            This is what GDB expects and is consistent with what the
            hardware does (e.g. if a delay slot instruction faults, the
            reported PC is the PC of the branch).  */
-        if (env->singlestep_enabled && (ctx.hflags & MIPS_HFLAG_BMASK) == 0)
+        if (ENV_GET_CPU(env)->singlestep_enabled && (ctx.hflags & MIPS_HFLAG_BMASK) == 0)
             break;
 
         /* Do not split a branch instruction and its delay slot into two
@@ -8363,7 +8370,7 @@
         if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0 && (ctx.hflags & MIPS_HFLAG_BMASK) == 0)
             break;
 
-        if (gen_opc_ptr >= gen_opc_end)
+        if (tcg_ctx.gen_opc_ptr >= gen_opc_end)
             break;
 
         if (num_insns >= max_insns)
@@ -8374,13 +8381,13 @@
     }
     if (tb->cflags & CF_LAST_IO)
         gen_io_end();
-    if (env->singlestep_enabled && ctx.bstate != BS_BRANCH) {
+    if (ENV_GET_CPU(env)->singlestep_enabled && ctx.bstate != BS_BRANCH) {
         save_cpu_state(&ctx, ctx.bstate == BS_NONE);
-        gen_helper_0i(raise_exception, EXCP_DEBUG);
+        gen_helper_1i(raise_exception, cpu_env, EXCP_DEBUG);
     } else {
         switch (ctx.bstate) {
         case BS_STOP:
-            gen_helper_interrupt_restart();
+            gen_helper_interrupt_restart(cpu_env);
             gen_goto_tb(&ctx, 0, ctx.pc);
             break;
         case BS_NONE:
@@ -8388,7 +8395,7 @@
             gen_goto_tb(&ctx, 0, ctx.pc);
             break;
         case BS_EXCP:
-            gen_helper_interrupt_restart();
+            gen_helper_interrupt_restart(cpu_env);
             tcg_gen_exit_tb(0);
             break;
         case BS_BRANCH:
@@ -8398,12 +8405,12 @@
     }
 done_generating:
     gen_icount_end(tb, num_insns);
-    *gen_opc_ptr = INDEX_op_end;
+    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
-        j = gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j)
-            gen_opc_instr_start[lj++] = 0;
+            tcg_ctx.gen_opc_instr_start[lj++] = 0;
     } else {
         tb->size = ctx.pc - pc_start;
         tb->icount = num_insns;
@@ -8498,10 +8505,11 @@
 }
 #endif
 
-void cpu_dump_state (CPUMIPSState *env, FILE *f,
+void cpu_dump_state (CPUState *cpu, FILE *f,
                      int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
                      int flags)
 {
+    CPUMIPSState *env = cpu->env_ptr;
     int i;
 
     cpu_fprintf(f, "pc=0x" TARGET_FMT_lx " HI=0x" TARGET_FMT_lx " LO=0x" TARGET_FMT_lx " ds %04x " TARGET_FMT_lx " %d\n",
@@ -8570,11 +8578,6 @@
     fpu_fcr31 = tcg_global_mem_new_i32(TCG_AREG0,
                                        offsetof(CPUMIPSState, active_fpu.fcr31),
                                        "fcr31");
-
-    /* register helpers */
-#define GEN_HELPER 2
-#include "helper.h"
-
     inited = 1;
 }
 
@@ -8582,15 +8585,21 @@
 
 CPUMIPSState *cpu_mips_init (const char *cpu_model)
 {
+    MIPSCPU *mips_cpu;
     CPUMIPSState *env;
+    CPUState *cpu;
     const mips_def_t *def;
 
     def = cpu_mips_find_by_name(cpu_model);
     if (!def)
         return NULL;
-    env = g_malloc0(sizeof(CPUMIPSState));
+    mips_cpu = g_malloc0(sizeof(MIPSCPU));
+    env = &mips_cpu->env;
+    ENV_GET_CPU(env)->env_ptr = env;
+
+    cpu = ENV_GET_CPU(env);
     env->cpu_model = def;
-    env->cpu_model_str = cpu_model;
+    cpu->cpu_model_str = cpu_model;
 
     cpu_exec_init(env);
 #ifndef CONFIG_USER_ONLY
@@ -8598,16 +8607,18 @@
 #endif
     mvp_init(env, def);
     mips_tcg_init();
-    cpu_reset(env);
-    qemu_init_vcpu(env);
+    cpu_reset(cpu);
+    qemu_init_vcpu(cpu);
     return env;
 }
 
-void cpu_reset (CPUMIPSState *env)
+void cpu_reset(CPUState *cpu)
 {
+    CPUMIPSState *env = cpu->env_ptr;
+
     if (qemu_loglevel_mask(CPU_LOG_RESET)) {
-        qemu_log("CPU Reset (CPU %d)\n", env->cpu_index);
-        log_cpu_state(env, 0);
+        qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index);
+        log_cpu_state(cpu, 0);
     }
 
     memset(env, 0, offsetof(CPUMIPSState, breakpoints));
@@ -8709,7 +8720,7 @@
 
 void restore_state_to_opc(CPUMIPSState *env, TranslationBlock *tb, int pc_pos)
 {
-    env->active_tc.PC = gen_opc_pc[pc_pos];
+    env->active_tc.PC = tcg_ctx.gen_opc_pc[pc_pos];
     env->hflags &= ~MIPS_HFLAG_BMASK;
     env->hflags |= gen_opc_hflags[pc_pos];
 }
diff --git a/tcg-runtime.c b/tcg-runtime.c
new file mode 100644
index 0000000..5340e51
--- /dev/null
+++ b/tcg-runtime.c
@@ -0,0 +1,103 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "qemu/host-utils.h"
+
+/* 32-bit helpers */
+
+int32_t tcg_helper_div_i32(int32_t arg1, int32_t arg2)
+{
+    return arg1 / arg2;
+}
+
+int32_t tcg_helper_rem_i32(int32_t arg1, int32_t arg2)
+{
+    return arg1 % arg2;
+}
+
+uint32_t tcg_helper_divu_i32(uint32_t arg1, uint32_t arg2)
+{
+    return arg1 / arg2;
+}
+
+uint32_t tcg_helper_remu_i32(uint32_t arg1, uint32_t arg2)
+{
+    return arg1 % arg2;
+}
+
+/* 64-bit helpers */
+
+int64_t tcg_helper_shl_i64(int64_t arg1, int64_t arg2)
+{
+    return arg1 << arg2;
+}
+
+int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2)
+{
+    return (uint64_t)arg1 >> arg2;
+}
+
+int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2)
+{
+    return arg1 >> arg2;
+}
+
+int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2)
+{
+    return arg1 / arg2;
+}
+
+int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2)
+{
+    return arg1 % arg2;
+}
+
+uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2)
+{
+    return arg1 / arg2;
+}
+
+uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2)
+{
+    return arg1 % arg2;
+}
+
+uint64_t tcg_helper_muluh_i64(uint64_t arg1, uint64_t arg2)
+{
+    uint64_t l, h;
+    mulu64(&l, &h, arg1, arg2);
+    return h;
+}
+
+int64_t tcg_helper_mulsh_i64(int64_t arg1, int64_t arg2)
+{
+    uint64_t l, h;
+    muls64(&l, &h, arg1, arg2);
+    return h;
+}
diff --git a/tcg/README b/tcg/README
index 6600122..f178212 100644
--- a/tcg/README
+++ b/tcg/README
@@ -14,6 +14,10 @@
 for cross compiling, it is assumed that the TCG target is different
 from the host, although it is never the case for QEMU.
 
+In this document, we use "guest" to specify what architecture we are
+emulating; "target" always means the TCG target, the machine on which
+we are running QEMU.
+
 A TCG "function" corresponds to a QEMU Translated Block (TB).
 
 A TCG "temporary" is a variable only live in a basic
@@ -77,19 +81,27 @@
 Using the tcg_gen_helper_x_y it is possible to call any function
 taking i32, i64 or pointer types. By default, before calling a helper,
 all globals are stored at their canonical location and it is assumed
-that the function can modify them. This can be overridden by the
-TCG_CALL_CONST function modifier. By default, the helper is allowed to
-modify the CPU state or raise an exception. This can be overridden by
-the TCG_CALL_PURE function modifier, in which case the call to the
-function is removed if the return value is not used.
+that the function can modify them. By default, the helper is allowed to
+modify the CPU state or raise an exception.
+
+This can be overridden using the following function modifiers:
+- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals,
+  either directly or via an exception. They will not be saved to their
+  canonical locations before calling the helper.
+- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any globals.
+  They will only be saved to their canonical location before calling helpers,
+  but they won't be reloaded afterwise.
+- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is removed if
+  the return value is not used.
+
+Note that TCG_CALL_NO_READ_GLOBALS implies TCG_CALL_NO_WRITE_GLOBALS.
 
 On some TCG targets (e.g. x86), several calling conventions are
 supported.
 
 * Branches:
 
-Use the instruction 'br' to jump to a label. Use 'jmp' to jump to an
-explicit address. Conditional branches can only jump to labels.
+Use the instruction 'br' to jump to a label.
 
 3.3) Code Optimizations
 
@@ -129,10 +141,6 @@
 
 ********* Jumps/Labels
 
-* jmp t0
-
-Absolute jump to address t0 (pointer type).
-
 * set_label $label
 
 Define label 'label' at the current program point.
@@ -141,7 +149,7 @@
 
 Jump to label.
 
-* brcond_i32/i64 cond, t0, t1, label
+* brcond_i32/i64 t0, t1, cond, label
 
 Conditional jump if t0 cond t1 is true. cond can be:
     TCG_COND_EQ
@@ -301,12 +309,18 @@
 
 ********* Conditional moves
 
-* setcond_i32/i64 cond, dest, t1, t2
+* setcond_i32/i64 dest, t1, t2, cond
 
 dest = (t1 cond t2)
 
 Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
 
+* movcond_i32/i64 dest, c1, c2, v1, v2, cond
+
+dest = (c1 cond c2 ? v1 : v2)
+
+Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2.
+
 ********* Type conversions
 
 * ext_i32_i64 t0, t1
@@ -348,30 +362,39 @@
 write(t0, t1 + offset)
 Write 8, 16, 32 or 64 bits to host memory.
 
-********* 64-bit target on 32-bit host support
+All this opcodes assume that the pointed host memory doesn't correspond
+to a global. In the latter case the behaviour is unpredictable.
+
+********* Multiword arithmetic support
+
+* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+
+Similar to add/sub, except that the double-word inputs T1 and T2 are
+formed from two single-word arguments, and the double-word output T0
+is returned in two single-word outputs.
+
+* mulu2_i32/i64 t0_low, t0_high, t1, t2
+
+Similar to mul, except two unsigned inputs T1 and T2 yielding the full
+double-word product T0.  The later is returned in two single-word outputs.
+
+* muls2_i32/i64 t0_low, t0_high, t1, t2
+
+Similar to mulu2, except the two inputs T1 and T2 are signed.
+
+********* 64-bit guest on 32-bit host support
 
 The following opcodes are internal to TCG.  Thus they are to be implemented by
 32-bit host code generators, but are not to be emitted by guest translators.
 They are emitted as needed by inline functions within "tcg-op.h".
 
-* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
+* brcond2_i32 t0_low, t0_high, t1_low, t1_high, cond, label
 
 Similar to brcond, except that the 64-bit values T0 and T1
 are formed from two 32-bit arguments.
 
-* add2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
-* sub2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
-
-Similar to add/sub, except that the 64-bit inputs T1 and T2 are
-formed from two 32-bit arguments, and the 64-bit output T0
-is returned in two 32-bit outputs.
-
-* mulu2_i32 t0_low, t0_high, t1, t2
-
-Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding
-the full 64-bit product T0.  The later is returned in two 32-bit outputs.
-
-* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
+* setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond
 
 Similar to setcond, except that the 64-bit values T1 and T2 are
 formed from two 32-bit arguments.  The result is a 32-bit value.
@@ -386,32 +409,28 @@
 
 Exit the current TB and jump to the TB index 'index' (constant) if the
 current TB was linked to this TB. Otherwise execute the next
-instructions.
+instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued
+at most once with each slot index per TB.
 
-* qemu_ld8u t0, t1, flags
-qemu_ld8s t0, t1, flags
-qemu_ld16u t0, t1, flags
-qemu_ld16s t0, t1, flags
-qemu_ld32 t0, t1, flags
-qemu_ld32u t0, t1, flags
-qemu_ld32s t0, t1, flags
-qemu_ld64 t0, t1, flags
+* qemu_ld_i32/i64 t0, t1, flags, memidx
+* qemu_st_i32/i64 t0, t1, flags, memidx
 
-Load data at the QEMU CPU address t1 into t0. t1 has the QEMU CPU address
-type. 'flags' contains the QEMU memory index (selects user or kernel access)
-for example.
+Load data at the guest address t1 into t0, or store data in t0 at guest
+address t1.  The _i32/_i64 size applies to the size of the input/output
+register t0 only.  The address t1 is always sized according to the guest,
+and the width of the memory operation is controlled by flags.
 
-Note that "qemu_ld32" implies a 32-bit result, while "qemu_ld32u" and
-"qemu_ld32s" imply a 64-bit result appropriately extended from 32 bits.
+Both t0 and t1 may be split into little-endian ordered pairs of registers
+if dealing with 64-bit quantities on a 32-bit host.
 
-* qemu_st8 t0, t1, flags
-qemu_st16 t0, t1, flags
-qemu_st32 t0, t1, flags
-qemu_st64 t0, t1, flags
+The memidx selects the qemu tlb index to use (e.g. user or kernel access).
+The flags are the TCGMemOp bits, selecting the sign, width, and endianness
+of the memory access.
 
-Store the data t0 at the QEMU CPU Address t1. t1 has the QEMU CPU
-address type. 'flags' contains the QEMU memory index (selects user or
-kernel access) for example.
+For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
+64-bit memory access specified in flags.
+
+*********
 
 Note 1: Some shortcuts are defined when the last operand is known to be
 a constant (e.g. addi for add, movi for mov).
@@ -501,11 +520,19 @@
   a better generated code, but it reduces the memory usage of TCG and
   the speed of the translation.
 
-- Don't hesitate to use helpers for complicated or seldom used target
+- Don't hesitate to use helpers for complicated or seldom used guest
   instructions. There is little performance advantage in using TCG to
-  implement target instructions taking more than about twenty TCG
-  instructions.
+  implement guest instructions taking more than about twenty TCG
+  instructions. Note that this rule of thumb is more applicable to
+  helpers doing complex logic or arithmetic, where the C compiler has
+  scope to do a good job of optimisation; it is less relevant where
+  the instruction is mostly doing loads and stores, and in those cases
+  inline TCG may still be faster for longer sequences.
+
+- The hard limit on the number of TCG instructions you can generate
+  per guest instruction is set by MAX_OP_PER_INSTR in exec-all.h --
+  you cannot exceed this without risking a buffer overrun.
 
 - Use the 'discard' instruction if you know that TCG won't be able to
   prove that a given global is "dead" at a given program point. The
-  x86 target uses it to improve the condition codes optimisation.
+  x86 guest uses it to improve the condition codes optimisation.
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
deleted file mode 100644
index 2ecb683..0000000
--- a/tcg/hppa/tcg-target.c
+++ /dev/null
@@ -1,1683 +0,0 @@
-/*
- * Tiny Code Generator for QEMU
- *
- * Copyright (c) 2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef NDEBUG
-static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-    "%r0", "%r1", "%rp", "%r3", "%r4", "%r5", "%r6", "%r7",
-    "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
-    "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
-    "%r24", "%r25", "%r26", "%dp", "%ret0", "%ret1", "%sp", "%r31",
-};
-#endif
-
-/* This is an 8 byte temp slot in the stack frame.  */
-#define STACK_TEMP_OFS -16
-
-#ifdef CONFIG_USE_GUEST_BASE
-#define TCG_GUEST_BASE_REG TCG_REG_R16
-#else
-#define TCG_GUEST_BASE_REG TCG_REG_R0
-#endif
-
-static const int tcg_target_reg_alloc_order[] = {
-    TCG_REG_R4,
-    TCG_REG_R5,
-    TCG_REG_R6,
-    TCG_REG_R7,
-    TCG_REG_R8,
-    TCG_REG_R9,
-    TCG_REG_R10,
-    TCG_REG_R11,
-    TCG_REG_R12,
-    TCG_REG_R13,
-
-    TCG_REG_R17,
-    TCG_REG_R14,
-    TCG_REG_R15,
-    TCG_REG_R16,
-
-    TCG_REG_R26,
-    TCG_REG_R25,
-    TCG_REG_R24,
-    TCG_REG_R23,
-
-    TCG_REG_RET0,
-    TCG_REG_RET1,
-};
-
-static const int tcg_target_call_iarg_regs[4] = {
-    TCG_REG_R26,
-    TCG_REG_R25,
-    TCG_REG_R24,
-    TCG_REG_R23,
-};
-
-static const int tcg_target_call_oarg_regs[2] = {
-    TCG_REG_RET0,
-    TCG_REG_RET1,
-};
-
-/* True iff val fits a signed field of width BITS.  */
-static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
-{
-    return (val << ((sizeof(tcg_target_long) * 8 - bits))
-            >> (sizeof(tcg_target_long) * 8 - bits)) == val;
-}
-
-/* True iff depi can be used to compute (reg | MASK).
-   Accept a bit pattern like:
-      0....01....1
-      1....10....0
-      0..01..10..0
-   Copied from gcc sources.  */
-static inline int or_mask_p(tcg_target_ulong mask)
-{
-    if (mask == 0 || mask == -1) {
-        return 0;
-    }
-    mask += mask & -mask;
-    return (mask & (mask - 1)) == 0;
-}
-
-/* True iff depi or extru can be used to compute (reg & mask).
-   Accept a bit pattern like these:
-      0....01....1
-      1....10....0
-      1..10..01..1
-   Copied from gcc sources.  */
-static inline int and_mask_p(tcg_target_ulong mask)
-{
-    return or_mask_p(~mask);
-}
-
-static int low_sign_ext(int val, int len)
-{
-    return (((val << 1) & ~(-1u << len)) | ((val >> (len - 1)) & 1));
-}
-
-static int reassemble_12(int as12)
-{
-    return (((as12 & 0x800) >> 11) |
-            ((as12 & 0x400) >> 8) |
-            ((as12 & 0x3ff) << 3));
-}
-
-static int reassemble_17(int as17)
-{
-    return (((as17 & 0x10000) >> 16) |
-            ((as17 & 0x0f800) << 5) |
-            ((as17 & 0x00400) >> 8) |
-            ((as17 & 0x003ff) << 3));
-}
-
-static int reassemble_21(int as21)
-{
-    return (((as21 & 0x100000) >> 20) |
-            ((as21 & 0x0ffe00) >> 8) |
-            ((as21 & 0x000180) << 7) |
-            ((as21 & 0x00007c) << 14) |
-            ((as21 & 0x000003) << 12));
-}
-
-/* ??? Bizzarely, there is no PCREL12F relocation type.  I guess all
-   such relocations are simply fully handled by the assembler.  */
-#define R_PARISC_PCREL12F  R_PARISC_NONE
-
-static void patch_reloc(uint8_t *code_ptr, int type,
-                        tcg_target_long value, tcg_target_long addend)
-{
-    uint32_t *insn_ptr = (uint32_t *)code_ptr;
-    uint32_t insn = *insn_ptr;
-    tcg_target_long pcrel;
-
-    value += addend;
-    pcrel = (value - ((tcg_target_long)code_ptr + 8)) >> 2;
-
-    switch (type) {
-    case R_PARISC_PCREL12F:
-        assert(check_fit_tl(pcrel, 12));
-        /* ??? We assume all patches are forward.  See tcg_out_brcond
-           re setting the NUL bit on the branch and eliding the nop.  */
-        assert(pcrel >= 0);
-        insn &= ~0x1ffdu;
-        insn |= reassemble_12(pcrel);
-        break;
-    case R_PARISC_PCREL17F:
-        assert(check_fit_tl(pcrel, 17));
-        insn &= ~0x1f1ffdu;
-        insn |= reassemble_17(pcrel);
-        break;
-    default:
-        tcg_abort();
-    }
-
-    *insn_ptr = insn;
-}
-
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return 4;
-}
-
-/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
-{
-    const char *ct_str;
-
-    ct_str = *pct_str;
-    switch (ct_str[0]) {
-    case 'r':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
-        break;
-    case 'L': /* qemu_ld/st constraint */
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R26);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R25);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R24);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R23);
-        break;
-    case 'Z':
-        ct->ct |= TCG_CT_CONST_0;
-        break;
-    case 'I':
-        ct->ct |= TCG_CT_CONST_S11;
-        break;
-    case 'J':
-        ct->ct |= TCG_CT_CONST_S5;
-	break;
-    case 'K':
-        ct->ct |= TCG_CT_CONST_MS11;
-        break;
-    case 'M':
-        ct->ct |= TCG_CT_CONST_AND;
-        break;
-    case 'O':
-        ct->ct |= TCG_CT_CONST_OR;
-        break;
-    default:
-        return -1;
-    }
-    ct_str++;
-    *pct_str = ct_str;
-    return 0;
-}
-
-/* test if a constant matches the constraint */
-static int tcg_target_const_match(tcg_target_long val,
-                                  const TCGArgConstraint *arg_ct)
-{
-    int ct = arg_ct->ct;
-    if (ct & TCG_CT_CONST) {
-        return 1;
-    } else if (ct & TCG_CT_CONST_0) {
-        return val == 0;
-    } else if (ct & TCG_CT_CONST_S5) {
-        return check_fit_tl(val, 5);
-    } else if (ct & TCG_CT_CONST_S11) {
-        return check_fit_tl(val, 11);
-    } else if (ct & TCG_CT_CONST_MS11) {
-        return check_fit_tl(-val, 11);
-    } else if (ct & TCG_CT_CONST_AND) {
-        return and_mask_p(val);
-    } else if (ct & TCG_CT_CONST_OR) {
-        return or_mask_p(val);
-    }
-    return 0;
-}
-
-#define INSN_OP(x)       ((x) << 26)
-#define INSN_EXT3BR(x)   ((x) << 13)
-#define INSN_EXT3SH(x)   ((x) << 10)
-#define INSN_EXT4(x)     ((x) << 6)
-#define INSN_EXT5(x)     (x)
-#define INSN_EXT6(x)     ((x) << 6)
-#define INSN_EXT7(x)     ((x) << 6)
-#define INSN_EXT8A(x)    ((x) << 6)
-#define INSN_EXT8B(x)    ((x) << 5)
-#define INSN_T(x)        (x)
-#define INSN_R1(x)       ((x) << 16)
-#define INSN_R2(x)       ((x) << 21)
-#define INSN_DEP_LEN(x)  (32 - (x))
-#define INSN_SHDEP_CP(x) ((31 - (x)) << 5)
-#define INSN_SHDEP_P(x)  ((x) << 5)
-#define INSN_COND(x)     ((x) << 13)
-#define INSN_IM11(x)     low_sign_ext(x, 11)
-#define INSN_IM14(x)     low_sign_ext(x, 14)
-#define INSN_IM5(x)      (low_sign_ext(x, 5) << 16)
-
-#define COND_NEVER   0
-#define COND_EQ      1
-#define COND_LT      2
-#define COND_LE      3
-#define COND_LTU     4
-#define COND_LEU     5
-#define COND_SV      6
-#define COND_OD      7
-#define COND_FALSE   8
-
-#define INSN_ADD	(INSN_OP(0x02) | INSN_EXT6(0x18))
-#define INSN_ADDC	(INSN_OP(0x02) | INSN_EXT6(0x1c))
-#define INSN_ADDI	(INSN_OP(0x2d))
-#define INSN_ADDIL	(INSN_OP(0x0a))
-#define INSN_ADDL	(INSN_OP(0x02) | INSN_EXT6(0x28))
-#define INSN_AND	(INSN_OP(0x02) | INSN_EXT6(0x08))
-#define INSN_ANDCM	(INSN_OP(0x02) | INSN_EXT6(0x00))
-#define INSN_COMCLR	(INSN_OP(0x02) | INSN_EXT6(0x22))
-#define INSN_COMICLR	(INSN_OP(0x24))
-#define INSN_DEP	(INSN_OP(0x35) | INSN_EXT3SH(3))
-#define INSN_DEPI	(INSN_OP(0x35) | INSN_EXT3SH(7))
-#define INSN_EXTRS	(INSN_OP(0x34) | INSN_EXT3SH(7))
-#define INSN_EXTRU	(INSN_OP(0x34) | INSN_EXT3SH(6))
-#define INSN_LDIL	(INSN_OP(0x08))
-#define INSN_LDO	(INSN_OP(0x0d))
-#define INSN_MTCTL	(INSN_OP(0x00) | INSN_EXT8B(0xc2))
-#define INSN_OR		(INSN_OP(0x02) | INSN_EXT6(0x09))
-#define INSN_SHD	(INSN_OP(0x34) | INSN_EXT3SH(2))
-#define INSN_SUB	(INSN_OP(0x02) | INSN_EXT6(0x10))
-#define INSN_SUBB	(INSN_OP(0x02) | INSN_EXT6(0x14))
-#define INSN_SUBI	(INSN_OP(0x25))
-#define INSN_VEXTRS	(INSN_OP(0x34) | INSN_EXT3SH(5))
-#define INSN_VEXTRU	(INSN_OP(0x34) | INSN_EXT3SH(4))
-#define INSN_VSHD	(INSN_OP(0x34) | INSN_EXT3SH(0))
-#define INSN_XOR	(INSN_OP(0x02) | INSN_EXT6(0x0a))
-#define INSN_ZDEP	(INSN_OP(0x35) | INSN_EXT3SH(2))
-#define INSN_ZVDEP	(INSN_OP(0x35) | INSN_EXT3SH(0))
-
-#define INSN_BL         (INSN_OP(0x3a) | INSN_EXT3BR(0))
-#define INSN_BL_N       (INSN_OP(0x3a) | INSN_EXT3BR(0) | 2)
-#define INSN_BLR        (INSN_OP(0x3a) | INSN_EXT3BR(2))
-#define INSN_BV         (INSN_OP(0x3a) | INSN_EXT3BR(6))
-#define INSN_BV_N       (INSN_OP(0x3a) | INSN_EXT3BR(6) | 2)
-#define INSN_BLE_SR4    (INSN_OP(0x39) | (1 << 13))
-
-#define INSN_LDB        (INSN_OP(0x10))
-#define INSN_LDH        (INSN_OP(0x11))
-#define INSN_LDW        (INSN_OP(0x12))
-#define INSN_LDWM       (INSN_OP(0x13))
-#define INSN_FLDDS      (INSN_OP(0x0b) | INSN_EXT4(0) | (1 << 12))
-
-#define INSN_LDBX	(INSN_OP(0x03) | INSN_EXT4(0))
-#define INSN_LDHX	(INSN_OP(0x03) | INSN_EXT4(1))
-#define INSN_LDWX       (INSN_OP(0x03) | INSN_EXT4(2))
-
-#define INSN_STB        (INSN_OP(0x18))
-#define INSN_STH        (INSN_OP(0x19))
-#define INSN_STW        (INSN_OP(0x1a))
-#define INSN_STWM       (INSN_OP(0x1b))
-#define INSN_FSTDS      (INSN_OP(0x0b) | INSN_EXT4(8) | (1 << 12))
-
-#define INSN_COMBT      (INSN_OP(0x20))
-#define INSN_COMBF      (INSN_OP(0x22))
-#define INSN_COMIBT     (INSN_OP(0x21))
-#define INSN_COMIBF     (INSN_OP(0x23))
-
-/* supplied by libgcc */
-extern void *__canonicalize_funcptr_for_compare(void *);
-
-static void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
-{
-    /* PA1.1 defines COPY as OR r,0,t; PA2.0 defines COPY as LDO 0(r),t
-       but hppa-dis.c is unaware of this definition */
-    if (ret != arg) {
-        tcg_out32(s, INSN_OR | INSN_T(ret) | INSN_R1(arg)
-                  | INSN_R2(TCG_REG_R0));
-    }
-}
-
-static void tcg_out_movi(TCGContext *s, TCGType type,
-                         int ret, tcg_target_long arg)
-{
-    if (check_fit_tl(arg, 14)) {
-        tcg_out32(s, INSN_LDO | INSN_R1(ret)
-                  | INSN_R2(TCG_REG_R0) | INSN_IM14(arg));
-    } else {
-        uint32_t hi, lo;
-        hi = arg >> 11;
-        lo = arg & 0x7ff;
-
-        tcg_out32(s, INSN_LDIL | INSN_R2(ret) | reassemble_21(hi));
-        if (lo) {
-            tcg_out32(s, INSN_LDO | INSN_R1(ret)
-                      | INSN_R2(ret) | INSN_IM14(lo));
-        }
-    }
-}
-
-static void tcg_out_ldst(TCGContext *s, int ret, int addr,
-                         tcg_target_long offset, int op)
-{
-    if (!check_fit_tl(offset, 14)) {
-        uint32_t hi, lo, op;
-
-        hi = offset >> 11;
-        lo = offset & 0x7ff;
-
-        if (addr == TCG_REG_R0) {
-            op = INSN_LDIL | INSN_R2(TCG_REG_R1);
-        } else {
-            op = INSN_ADDIL | INSN_R2(addr);
-        }
-        tcg_out32(s, op | reassemble_21(hi));
-
-        addr = TCG_REG_R1;
-	offset = lo;
-    }
-
-    if (ret != addr || offset != 0 || op != INSN_LDO) {
-        tcg_out32(s, op | INSN_R1(ret) | INSN_R2(addr) | INSN_IM14(offset));
-    }
-}
-
-/* This function is required by tcg.c.  */
-static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
-                              int arg1, tcg_target_long arg2)
-{
-    tcg_out_ldst(s, ret, arg1, arg2, INSN_LDW);
-}
-
-/* This function is required by tcg.c.  */
-static inline void tcg_out_st(TCGContext *s, TCGType type, int ret,
-                              int arg1, tcg_target_long arg2)
-{
-    tcg_out_ldst(s, ret, arg1, arg2, INSN_STW);
-}
-
-static void tcg_out_ldst_index(TCGContext *s, int data,
-                               int base, int index, int op)
-{
-    tcg_out32(s, op | INSN_T(data) | INSN_R1(index) | INSN_R2(base));
-}
-
-static inline void tcg_out_addi2(TCGContext *s, int ret, int arg1,
-                                 tcg_target_long val)
-{
-    tcg_out_ldst(s, ret, arg1, val, INSN_LDO);
-}
-
-/* This function is required by tcg.c.  */
-static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
-{
-    tcg_out_addi2(s, reg, reg, val);
-}
-
-static inline void tcg_out_arith(TCGContext *s, int t, int r1, int r2, int op)
-{
-    tcg_out32(s, op | INSN_T(t) | INSN_R1(r1) | INSN_R2(r2));
-}
-
-static inline void tcg_out_arithi(TCGContext *s, int t, int r1,
-                                  tcg_target_long val, int op)
-{
-    assert(check_fit_tl(val, 11));
-    tcg_out32(s, op | INSN_R1(t) | INSN_R2(r1) | INSN_IM11(val));
-}
-
-static inline void tcg_out_nop(TCGContext *s)
-{
-    tcg_out_arith(s, TCG_REG_R0, TCG_REG_R0, TCG_REG_R0, INSN_OR);
-}
-
-static inline void tcg_out_mtctl_sar(TCGContext *s, int arg)
-{
-    tcg_out32(s, INSN_MTCTL | INSN_R2(11) | INSN_R1(arg));
-}
-
-/* Extract LEN bits at position OFS from ARG and place in RET.
-   Note that here the bit ordering is reversed from the PA-RISC
-   standard, such that the right-most bit is 0.  */
-static inline void tcg_out_extr(TCGContext *s, int ret, int arg,
-                                unsigned ofs, unsigned len, int sign)
-{
-    assert(ofs < 32 && len <= 32 - ofs);
-    tcg_out32(s, (sign ? INSN_EXTRS : INSN_EXTRU)
-              | INSN_R1(ret) | INSN_R2(arg)
-              | INSN_SHDEP_P(31 - ofs) | INSN_DEP_LEN(len));
-}
-
-/* Likewise with OFS interpreted little-endian.  */
-static inline void tcg_out_dep(TCGContext *s, int ret, int arg,
-                               unsigned ofs, unsigned len)
-{
-    assert(ofs < 32 && len <= 32 - ofs);
-    tcg_out32(s, INSN_DEP | INSN_R2(ret) | INSN_R1(arg)
-              | INSN_SHDEP_CP(31 - ofs) | INSN_DEP_LEN(len));
-}
-
-static inline void tcg_out_shd(TCGContext *s, int ret, int hi, int lo,
-                               unsigned count)
-{
-    assert(count < 32);
-    tcg_out32(s, INSN_SHD | INSN_R1(hi) | INSN_R2(lo) | INSN_T(ret)
-              | INSN_SHDEP_CP(count));
-}
-
-static void tcg_out_vshd(TCGContext *s, int ret, int hi, int lo, int creg)
-{
-    tcg_out_mtctl_sar(s, creg);
-    tcg_out32(s, INSN_VSHD | INSN_T(ret) | INSN_R1(hi) | INSN_R2(lo));
-}
-
-static void tcg_out_ori(TCGContext *s, int ret, int arg, tcg_target_ulong m)
-{
-    int bs0, bs1;
-
-    /* Note that the argument is constrained to match or_mask_p.  */
-    for (bs0 = 0; bs0 < 32; bs0++) {
-        if ((m & (1u << bs0)) != 0) {
-            break;
-        }
-    }
-    for (bs1 = bs0; bs1 < 32; bs1++) {
-        if ((m & (1u << bs1)) == 0) {
-            break;
-        }
-    }
-    assert(bs1 == 32 || (1ul << bs1) > m);
-
-    tcg_out_mov(s, TCG_TYPE_I32, ret, arg);
-    tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(-1)
-              | INSN_SHDEP_CP(31 - bs0) | INSN_DEP_LEN(bs1 - bs0));
-}
-
-static void tcg_out_andi(TCGContext *s, int ret, int arg, tcg_target_ulong m)
-{
-    int ls0, ls1, ms0;
-
-    /* Note that the argument is constrained to match and_mask_p.  */
-    for (ls0 = 0; ls0 < 32; ls0++) {
-        if ((m & (1u << ls0)) == 0) {
-            break;
-        }
-    }
-    for (ls1 = ls0; ls1 < 32; ls1++) {
-        if ((m & (1u << ls1)) != 0) {
-            break;
-        }
-    }
-    for (ms0 = ls1; ms0 < 32; ms0++) {
-        if ((m & (1u << ms0)) == 0) {
-            break;
-        }
-    }
-    assert (ms0 == 32);
-
-    if (ls1 == 32) {
-        tcg_out_extr(s, ret, arg, 0, ls0, 0);
-    } else {
-        tcg_out_mov(s, TCG_TYPE_I32, ret, arg);
-        tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(0)
-                  | INSN_SHDEP_CP(31 - ls0) | INSN_DEP_LEN(ls1 - ls0));
-    }
-}
-
-static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg)
-{
-    tcg_out_extr(s, ret, arg, 0, 8, 1);
-}
-
-static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg)
-{
-    tcg_out_extr(s, ret, arg, 0, 16, 1);
-}
-
-static void tcg_out_shli(TCGContext *s, int ret, int arg, int count)
-{
-    count &= 31;
-    tcg_out32(s, INSN_ZDEP | INSN_R2(ret) | INSN_R1(arg)
-              | INSN_SHDEP_CP(31 - count) | INSN_DEP_LEN(32 - count));
-}
-
-static void tcg_out_shl(TCGContext *s, int ret, int arg, int creg)
-{
-    tcg_out_arithi(s, TCG_REG_R20, creg, 31, INSN_SUBI);
-    tcg_out_mtctl_sar(s, TCG_REG_R20);
-    tcg_out32(s, INSN_ZVDEP | INSN_R2(ret) | INSN_R1(arg) | INSN_DEP_LEN(32));
-}
-
-static void tcg_out_shri(TCGContext *s, int ret, int arg, int count)
-{
-    count &= 31;
-    tcg_out_extr(s, ret, arg, count, 32 - count, 0);
-}
-
-static void tcg_out_shr(TCGContext *s, int ret, int arg, int creg)
-{
-    tcg_out_vshd(s, ret, TCG_REG_R0, arg, creg);
-}
-
-static void tcg_out_sari(TCGContext *s, int ret, int arg, int count)
-{
-    count &= 31;
-    tcg_out_extr(s, ret, arg, count, 32 - count, 1);
-}
-
-static void tcg_out_sar(TCGContext *s, int ret, int arg, int creg)
-{
-    tcg_out_arithi(s, TCG_REG_R20, creg, 31, INSN_SUBI);
-    tcg_out_mtctl_sar(s, TCG_REG_R20);
-    tcg_out32(s, INSN_VEXTRS | INSN_R1(ret) | INSN_R2(arg) | INSN_DEP_LEN(32));
-}
-
-static void tcg_out_rotli(TCGContext *s, int ret, int arg, int count)
-{
-    count &= 31;
-    tcg_out_shd(s, ret, arg, arg, 32 - count);
-}
-
-static void tcg_out_rotl(TCGContext *s, int ret, int arg, int creg)
-{
-    tcg_out_arithi(s, TCG_REG_R20, creg, 32, INSN_SUBI);
-    tcg_out_vshd(s, ret, arg, arg, TCG_REG_R20);
-}
-
-static void tcg_out_rotri(TCGContext *s, int ret, int arg, int count)
-{
-    count &= 31;
-    tcg_out_shd(s, ret, arg, arg, count);
-}
-
-static void tcg_out_rotr(TCGContext *s, int ret, int arg, int creg)
-{
-    tcg_out_vshd(s, ret, arg, arg, creg);
-}
-
-static void tcg_out_bswap16(TCGContext *s, int ret, int arg, int sign)
-{
-    if (ret != arg) {
-        tcg_out_mov(s, TCG_TYPE_I32, ret, arg); /* arg =  xxAB */
-    }
-    tcg_out_dep(s, ret, ret, 16, 8);          /* ret =  xBAB */
-    tcg_out_extr(s, ret, ret, 8, 16, sign);   /* ret =  ..BA */
-}
-
-static void tcg_out_bswap32(TCGContext *s, int ret, int arg, int temp)
-{
-                                          /* arg =  ABCD */
-    tcg_out_rotri(s, temp, arg, 16);      /* temp = CDAB */
-    tcg_out_dep(s, temp, temp, 16, 8);    /* temp = CBAB */
-    tcg_out_shd(s, ret, arg, temp, 8);    /* ret =  DCBA */
-}
-
-static void tcg_out_call(TCGContext *s, void *func)
-{
-    tcg_target_long val, hi, lo, disp;
-
-    val = (uint32_t)__canonicalize_funcptr_for_compare(func);
-    disp = (val - ((tcg_target_long)s->code_ptr + 8)) >> 2;
-
-    if (check_fit_tl(disp, 17)) {
-        tcg_out32(s, INSN_BL_N | INSN_R2(TCG_REG_RP) | reassemble_17(disp));
-    } else {
-        hi = val >> 11;
-        lo = val & 0x7ff;
-
-        tcg_out32(s, INSN_LDIL | INSN_R2(TCG_REG_R20) | reassemble_21(hi));
-        tcg_out32(s, INSN_BLE_SR4 | INSN_R2(TCG_REG_R20)
-                  | reassemble_17(lo >> 2));
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_RP, TCG_REG_R31);
-    }
-}
-
-static void tcg_out_xmpyu(TCGContext *s, int retl, int reth,
-                          int arg1, int arg2)
-{
-    /* Store both words into the stack for copy to the FPU.  */
-    tcg_out_ldst(s, arg1, TCG_REG_SP, STACK_TEMP_OFS, INSN_STW);
-    tcg_out_ldst(s, arg2, TCG_REG_SP, STACK_TEMP_OFS + 4, INSN_STW);
-
-    /* Load both words into the FPU at the same time.  We get away
-       with this because we can address the left and right half of the
-       FPU registers individually once loaded.  */
-    /* fldds stack_temp(sp),fr22 */
-    tcg_out32(s, INSN_FLDDS | INSN_R2(TCG_REG_SP)
-              | INSN_IM5(STACK_TEMP_OFS) | INSN_T(22));
-
-    /* xmpyu fr22r,fr22,fr22 */
-    tcg_out32(s, 0x3ad64796);
-
-    /* Store the 64-bit result back into the stack.  */
-    /* fstds stack_temp(sp),fr22 */
-    tcg_out32(s, INSN_FSTDS | INSN_R2(TCG_REG_SP)
-              | INSN_IM5(STACK_TEMP_OFS) | INSN_T(22));
-
-    /* Load the pieces of the result that the caller requested.  */
-    if (reth) {
-        tcg_out_ldst(s, reth, TCG_REG_SP, STACK_TEMP_OFS, INSN_LDW);
-    }
-    if (retl) {
-        tcg_out_ldst(s, retl, TCG_REG_SP, STACK_TEMP_OFS + 4, INSN_LDW);
-    }
-}
-
-static void tcg_out_add2(TCGContext *s, int destl, int desth,
-                         int al, int ah, int bl, int bh, int blconst)
-{
-    int tmp = (destl == ah || destl == bh ? TCG_REG_R20 : destl);
-
-    if (blconst) {
-        tcg_out_arithi(s, tmp, al, bl, INSN_ADDI);
-    } else {
-        tcg_out_arith(s, tmp, al, bl, INSN_ADD);
-    }
-    tcg_out_arith(s, desth, ah, bh, INSN_ADDC);
-
-    tcg_out_mov(s, TCG_TYPE_I32, destl, tmp);
-}
-
-static void tcg_out_sub2(TCGContext *s, int destl, int desth, int al, int ah,
-                         int bl, int bh, int alconst, int blconst)
-{
-    int tmp = (destl == ah || destl == bh ? TCG_REG_R20 : destl);
-
-    if (alconst) {
-        if (blconst) {
-            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, bl);
-            bl = TCG_REG_R20;
-        }
-        tcg_out_arithi(s, tmp, bl, al, INSN_SUBI);
-    } else if (blconst) {
-        tcg_out_arithi(s, tmp, al, -bl, INSN_ADDI);
-    } else {
-        tcg_out_arith(s, tmp, al, bl, INSN_SUB);
-    }
-    tcg_out_arith(s, desth, ah, bh, INSN_SUBB);
-
-    tcg_out_mov(s, TCG_TYPE_I32, destl, tmp);
-}
-
-static void tcg_out_branch(TCGContext *s, int label_index, int nul)
-{
-    TCGLabel *l = &s->labels[label_index];
-    uint32_t op = nul ? INSN_BL_N : INSN_BL;
-
-    if (l->has_value) {
-        tcg_target_long val = l->u.value;
-
-        val -= (tcg_target_long)s->code_ptr + 8;
-        val >>= 2;
-        assert(check_fit_tl(val, 17));
-
-        tcg_out32(s, op | reassemble_17(val));
-    } else {
-        /* We need to keep the offset unchanged for retranslation.  */
-        uint32_t old_insn = *(uint32_t *)s->code_ptr;
-
-        tcg_out_reloc(s, s->code_ptr, R_PARISC_PCREL17F, label_index, 0);
-        tcg_out32(s, op | (old_insn & 0x1f1ffdu));
-    }
-}
-
-static const uint8_t tcg_cond_to_cmp_cond[10] =
-{
-    [TCG_COND_EQ] = COND_EQ,
-    [TCG_COND_NE] = COND_EQ | COND_FALSE,
-    [TCG_COND_LT] = COND_LT,
-    [TCG_COND_GE] = COND_LT | COND_FALSE,
-    [TCG_COND_LE] = COND_LE,
-    [TCG_COND_GT] = COND_LE | COND_FALSE,
-    [TCG_COND_LTU] = COND_LTU,
-    [TCG_COND_GEU] = COND_LTU | COND_FALSE,
-    [TCG_COND_LEU] = COND_LEU,
-    [TCG_COND_GTU] = COND_LEU | COND_FALSE,
-};
-
-static void tcg_out_brcond(TCGContext *s, int cond, TCGArg c1,
-                           TCGArg c2, int c2const, int label_index)
-{
-    TCGLabel *l = &s->labels[label_index];
-    int op, pacond;
-
-    /* Note that COMIB operates as if the immediate is the first
-       operand.  We model brcond with the immediate in the second
-       to better match what targets are likely to give us.  For
-       consistency, model COMB with reversed operands as well.  */
-    pacond = tcg_cond_to_cmp_cond[tcg_swap_cond(cond)];
-
-    if (c2const) {
-        op = (pacond & COND_FALSE ? INSN_COMIBF : INSN_COMIBT);
-        op |= INSN_IM5(c2);
-    } else {
-        op = (pacond & COND_FALSE ? INSN_COMBF : INSN_COMBT);
-        op |= INSN_R1(c2);
-    }
-    op |= INSN_R2(c1);
-    op |= INSN_COND(pacond & 7);
-
-    if (l->has_value) {
-        tcg_target_long val = l->u.value;
-
-        val -= (tcg_target_long)s->code_ptr + 8;
-        val >>= 2;
-        assert(check_fit_tl(val, 12));
-
-        /* ??? Assume that all branches to defined labels are backward.
-           Which means that if the nul bit is set, the delay slot is
-           executed if the branch is taken, and not executed in fallthru.  */
-        tcg_out32(s, op | reassemble_12(val));
-        tcg_out_nop(s);
-    } else {
-        /* We need to keep the offset unchanged for retranslation.  */
-        uint32_t old_insn = *(uint32_t *)s->code_ptr;
-
-        tcg_out_reloc(s, s->code_ptr, R_PARISC_PCREL12F, label_index, 0);
-        /* ??? Assume that all branches to undefined labels are forward.
-           Which means that if the nul bit is set, the delay slot is
-           not executed if the branch is taken, which is what we want.  */
-        tcg_out32(s, op | 2 | (old_insn & 0x1ffdu));
-    }
-}
-
-static void tcg_out_comclr(TCGContext *s, int cond, TCGArg ret,
-                           TCGArg c1, TCGArg c2, int c2const)
-{
-    int op, pacond;
-
-    /* Note that COMICLR operates as if the immediate is the first
-       operand.  We model setcond with the immediate in the second
-       to better match what targets are likely to give us.  For
-       consistency, model COMCLR with reversed operands as well.  */
-    pacond = tcg_cond_to_cmp_cond[tcg_swap_cond(cond)];
-
-    if (c2const) {
-        op = INSN_COMICLR | INSN_R2(c1) | INSN_R1(ret) | INSN_IM11(c2);
-    } else {
-        op = INSN_COMCLR | INSN_R2(c1) | INSN_R1(c2) | INSN_T(ret);
-    }
-    op |= INSN_COND(pacond & 7);
-    op |= pacond & COND_FALSE ? 1 << 12 : 0;
-
-    tcg_out32(s, op);
-}
-
-static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah,
-                            TCGArg bl, int blconst, TCGArg bh, int bhconst,
-                            int label_index)
-{
-    switch (cond) {
-    case TCG_COND_EQ:
-    case TCG_COND_NE:
-        tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, al, bl, blconst);
-        tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
-        break;
-
-    default:
-        tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
-        tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, ah, bh, bhconst);
-        tcg_out_brcond(s, tcg_unsigned_cond(cond),
-                       al, bl, blconst, label_index);
-        break;
-    }
-}
-
-static void tcg_out_setcond(TCGContext *s, int cond, TCGArg ret,
-                            TCGArg c1, TCGArg c2, int c2const)
-{
-    tcg_out_comclr(s, tcg_invert_cond(cond), ret, c1, c2, c2const);
-    tcg_out_movi(s, TCG_TYPE_I32, ret, 1);
-}
-
-static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
-                             TCGArg al, TCGArg ah, TCGArg bl, int blconst,
-                             TCGArg bh, int bhconst)
-{
-    int scratch = TCG_REG_R20;
-
-    if (ret != al && ret != ah
-        && (blconst || ret != bl)
-        && (bhconst || ret != bh)) {
-        scratch = ret;
-    }
-
-    switch (cond) {
-    case TCG_COND_EQ:
-    case TCG_COND_NE:
-        tcg_out_setcond(s, cond, scratch, al, bl, blconst);
-        tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst);
-        tcg_out_movi(s, TCG_TYPE_I32, scratch, cond == TCG_COND_NE);
-        break;
-
-    default:
-        tcg_out_setcond(s, tcg_unsigned_cond(cond), scratch, al, bl, blconst);
-        tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst);
-        tcg_out_movi(s, TCG_TYPE_I32, scratch, 0);
-        tcg_out_comclr(s, cond, TCG_REG_R0, ah, bh, bhconst);
-        tcg_out_movi(s, TCG_TYPE_I32, scratch, 1);
-        break;
-    }
-
-    tcg_out_mov(s, TCG_TYPE_I32, ret, scratch);
-}
-
-#if defined(CONFIG_SOFTMMU)
-#include "../../softmmu_defs.h"
-
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-
-/* Load and compare a TLB entry, and branch if TLB miss.  OFFSET is set to
-   the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate
-   TLB for the memory index.  The return value is the offset from ENV
-   contained in R1 afterward (to be used when loading ADDEND); if the
-   return value is 0, R1 is not used.  */
-
-static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
-                            int addrhi, int s_bits, int lab_miss, int offset)
-{
-    int ret;
-
-    /* Extracting the index into the TLB.  The "normal C operation" is
-          r1 = addr_reg >> TARGET_PAGE_BITS;
-          r1 &= CPU_TLB_SIZE - 1;
-          r1 <<= CPU_TLB_ENTRY_BITS;
-       What this does is extract CPU_TLB_BITS beginning at TARGET_PAGE_BITS
-       and place them at CPU_TLB_ENTRY_BITS.  We can combine the first two
-       operations with an EXTRU.  Unfortunately, the current value of
-       CPU_TLB_ENTRY_BITS is > 3, so we can't merge that shift with the
-       add that follows.  */
-    tcg_out_extr(s, r1, addrlo, TARGET_PAGE_BITS, CPU_TLB_BITS, 0);
-    tcg_out_shli(s, r1, r1, CPU_TLB_ENTRY_BITS);
-    tcg_out_arith(s, r1, r1, TCG_AREG0, INSN_ADDL);
-
-    /* Make sure that both the addr_{read,write} and addend can be
-       read with a 14-bit offset from the same base register.  */
-    if (check_fit_tl(offset + CPU_TLB_SIZE, 14)) {
-        ret = 0;
-    } else {
-        ret = (offset + 0x400) & ~0x7ff;
-        offset = ret - offset;
-        tcg_out_addi2(s, TCG_REG_R1, r1, ret);
-        r1 = TCG_REG_R1;
-    }
-
-    /* Load the entry from the computed slot.  */
-    if (TARGET_LONG_BITS == 64) {
-        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R23, r1, offset);
-        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset + 4);
-    } else {
-        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset);
-    }
-
-    /* Compute the value that ought to appear in the TLB for a hit, namely, the page
-       of the address.  We include the low N bits of the address to catch unaligned
-       accesses and force them onto the slow path.  Do this computation after having
-       issued the load from the TLB slot to give the load time to complete.  */
-    tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-
-    /* If not equal, jump to lab_miss. */
-    if (TARGET_LONG_BITS == 64) {
-        tcg_out_brcond2(s, TCG_COND_NE, TCG_REG_R20, TCG_REG_R23,
-                        r0, 0, addrhi, 0, lab_miss);
-    } else {
-        tcg_out_brcond(s, TCG_COND_NE, TCG_REG_R20, r0, 0, lab_miss);
-    }
-
-    return ret;
-}
-#endif
-
-static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg,
-                                   int addr_reg, int addend_reg, int opc)
-{
-#ifdef TARGET_WORDS_BIGENDIAN
-    const int bswap = 0;
-#else
-    const int bswap = 1;
-#endif
-
-    switch (opc) {
-    case 0:
-        tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDBX);
-        break;
-    case 0 | 4:
-        tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDBX);
-        tcg_out_ext8s(s, datalo_reg, datalo_reg);
-        break;
-    case 1:
-        tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDHX);
-        if (bswap) {
-            tcg_out_bswap16(s, datalo_reg, datalo_reg, 0);
-        }
-        break;
-    case 1 | 4:
-        tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDHX);
-        if (bswap) {
-            tcg_out_bswap16(s, datalo_reg, datalo_reg, 1);
-        } else {
-            tcg_out_ext16s(s, datalo_reg, datalo_reg);
-        }
-        break;
-    case 2:
-        tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDWX);
-        if (bswap) {
-            tcg_out_bswap32(s, datalo_reg, datalo_reg, TCG_REG_R20);
-        }
-        break;
-    case 3:
-        if (bswap) {
-            int t = datahi_reg;
-            datahi_reg = datalo_reg;
-            datalo_reg = t;
-        }
-        /* We can't access the low-part with a reg+reg addressing mode,
-           so perform the addition now and use reg_ofs addressing mode.  */
-        if (addend_reg != TCG_REG_R0) {
-            tcg_out_arith(s, TCG_REG_R20, addr_reg, addend_reg, INSN_ADD);
-            addr_reg = TCG_REG_R20;
-	}
-        /* Make sure not to clobber the base register.  */
-        if (datahi_reg == addr_reg) {
-            tcg_out_ldst(s, datalo_reg, addr_reg, 4, INSN_LDW);
-            tcg_out_ldst(s, datahi_reg, addr_reg, 0, INSN_LDW);
-        } else {
-            tcg_out_ldst(s, datahi_reg, addr_reg, 0, INSN_LDW);
-            tcg_out_ldst(s, datalo_reg, addr_reg, 4, INSN_LDW);
-        }
-        if (bswap) {
-            tcg_out_bswap32(s, datalo_reg, datalo_reg, TCG_REG_R20);
-            tcg_out_bswap32(s, datahi_reg, datahi_reg, TCG_REG_R20);
-        }
-        break;
-    default:
-        tcg_abort();
-    }
-}
-
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
-{
-    int datalo_reg = *args++;
-    /* Note that datahi_reg is only used for 64-bit loads.  */
-    int datahi_reg = (opc == 3 ? *args++ : TCG_REG_R0);
-    int addrlo_reg = *args++;
-
-#if defined(CONFIG_SOFTMMU)
-    /* Note that addrhi_reg is only used for 64-bit guests.  */
-    int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
-    int mem_index = *args;
-    int lab1, lab2, argreg, offset;
-
-    lab1 = gen_new_label();
-    lab2 = gen_new_label();
-
-    offset = offsetof(CPUOldState, tlb_table[mem_index][0].addr_read);
-    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
-                              opc & 3, lab1, offset);
-
-    /* TLB Hit.  */
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
-               offsetof(CPUOldState, tlb_table[mem_index][0].addend) - offset);
-    tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc);
-    tcg_out_branch(s, lab2, 1);
-
-    /* TLB Miss.  */
-    /* label1: */
-    tcg_out_label(s, lab1, (tcg_target_long)s->code_ptr);
-
-    argreg = TCG_REG_R26;
-    tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
-    if (TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
-    }
-    tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-
-    tcg_out_call(s, qemu_ld_helpers[opc & 3]);
-
-    switch (opc) {
-    case 0:
-        tcg_out_andi(s, datalo_reg, TCG_REG_RET0, 0xff);
-        break;
-    case 0 | 4:
-        tcg_out_ext8s(s, datalo_reg, TCG_REG_RET0);
-        break;
-    case 1:
-        tcg_out_andi(s, datalo_reg, TCG_REG_RET0, 0xffff);
-        break;
-    case 1 | 4:
-        tcg_out_ext16s(s, datalo_reg, TCG_REG_RET0);
-        break;
-    case 2:
-    case 2 | 4:
-        tcg_out_mov(s, TCG_TYPE_I32, datalo_reg, TCG_REG_RET0);
-        break;
-    case 3:
-        tcg_out_mov(s, TCG_TYPE_I32, datahi_reg, TCG_REG_RET0);
-        tcg_out_mov(s, TCG_TYPE_I32, datalo_reg, TCG_REG_RET1);
-        break;
-    default:
-        tcg_abort();
-    }
-
-    /* label2: */
-    tcg_out_label(s, lab2, (tcg_target_long)s->code_ptr);
-#else
-    tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg,
-                           (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_R0), opc);
-#endif
-}
-
-static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg,
-                                   int addr_reg, int opc)
-{
-#ifdef TARGET_WORDS_BIGENDIAN
-    const int bswap = 0;
-#else
-    const int bswap = 1;
-#endif
-
-    switch (opc) {
-    case 0:
-        tcg_out_ldst(s, datalo_reg, addr_reg, 0, INSN_STB);
-        break;
-    case 1:
-        if (bswap) {
-            tcg_out_bswap16(s, TCG_REG_R20, datalo_reg, 0);
-            datalo_reg = TCG_REG_R20;
-        }
-        tcg_out_ldst(s, datalo_reg, addr_reg, 0, INSN_STH);
-        break;
-    case 2:
-        if (bswap) {
-            tcg_out_bswap32(s, TCG_REG_R20, datalo_reg, TCG_REG_R20);
-            datalo_reg = TCG_REG_R20;
-        }
-        tcg_out_ldst(s, datalo_reg, addr_reg, 0, INSN_STW);
-        break;
-    case 3:
-        if (bswap) {
-            tcg_out_bswap32(s, TCG_REG_R20, datalo_reg, TCG_REG_R20);
-            tcg_out_bswap32(s, TCG_REG_R23, datahi_reg, TCG_REG_R23);
-            datahi_reg = TCG_REG_R20;
-            datalo_reg = TCG_REG_R23;
-        }
-        tcg_out_ldst(s, datahi_reg, addr_reg, 0, INSN_STW);
-        tcg_out_ldst(s, datalo_reg, addr_reg, 4, INSN_STW);
-        break;
-    default:
-        tcg_abort();
-    }
-
-}
-
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
-{
-    int datalo_reg = *args++;
-    /* Note that datahi_reg is only used for 64-bit loads.  */
-    int datahi_reg = (opc == 3 ? *args++ : TCG_REG_R0);
-    int addrlo_reg = *args++;
-
-#if defined(CONFIG_SOFTMMU)
-    /* Note that addrhi_reg is only used for 64-bit guests.  */
-    int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
-    int mem_index = *args;
-    int lab1, lab2, argreg, offset;
-
-    lab1 = gen_new_label();
-    lab2 = gen_new_label();
-
-    offset = offsetof(CPUOldState, tlb_table[mem_index][0].addr_write);
-    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
-                              opc, lab1, offset);
-
-    /* TLB Hit.  */
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
-               offsetof(CPUOldState, tlb_table[mem_index][0].addend) - offset);
-
-    /* There are no indexed stores, so we must do this addition explitly.
-       Careful to avoid R20, which is used for the bswaps to follow.  */
-    tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_REG_R20, INSN_ADDL);
-    tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, TCG_REG_R31, opc);
-    tcg_out_branch(s, lab2, 1);
-
-    /* TLB Miss.  */
-    /* label1: */
-    tcg_out_label(s, lab1, (tcg_target_long)s->code_ptr);
-
-    argreg = TCG_REG_R26;
-    tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
-    if (TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
-    }
-
-    switch(opc) {
-    case 0:
-        tcg_out_andi(s, argreg--, datalo_reg, 0xff);
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-        break;
-    case 1:
-        tcg_out_andi(s, argreg--, datalo_reg, 0xffff);
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-        break;
-    case 2:
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg);
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-        break;
-    case 3:
-        /* Because of the alignment required by the 64-bit data argument,
-           we will always use R23/R24.  Also, we will always run out of
-           argument registers for storing mem_index, so that will have
-           to go on the stack.  */
-        if (mem_index == 0) {
-            argreg = TCG_REG_R0;
-        } else {
-            argreg = TCG_REG_R20;
-            tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-        }
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg);
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg);
-        tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_SP,
-                   TCG_TARGET_CALL_STACK_OFFSET - 4);
-        break;
-    default:
-        tcg_abort();
-    }
-
-    tcg_out_call(s, qemu_st_helpers[opc]);
-
-    /* label2: */
-    tcg_out_label(s, lab2, (tcg_target_long)s->code_ptr);
-#else
-    /* There are no indexed stores, so if GUEST_BASE is set we must do the add
-       explicitly.  Careful to avoid R20, which is used for the bswaps to follow.  */
-    if (GUEST_BASE != 0) {
-        tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL);
-        addrlo_reg = TCG_REG_R31;
-    }
-    tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc);
-#endif
-}
-
-static void tcg_out_exit_tb(TCGContext *s, TCGArg arg)
-{
-    if (!check_fit_tl(arg, 14)) {
-        uint32_t hi, lo;
-        hi = arg & ~0x7ff;
-        lo = arg & 0x7ff;
-        if (lo) {
-            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, hi);
-            tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_R18));
-            tcg_out_addi(s, TCG_REG_RET0, lo);
-            return;
-        }
-        arg = hi;
-    }
-    tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_R18));
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, arg);
-}
-
-static void tcg_out_goto_tb(TCGContext *s, TCGArg arg)
-{
-    if (s->tb_jmp_offset) {
-        /* direct jump method */
-        fprintf(stderr, "goto_tb direct\n");
-        tcg_abort();
-    } else {
-        /* indirect jump method */
-        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, TCG_REG_R0,
-                   (tcg_target_long)(s->tb_next + arg));
-        tcg_out32(s, INSN_BV_N | INSN_R2(TCG_REG_R20));
-    }
-    s->tb_next_offset[arg] = s->code_ptr - s->code_buf;
-}
-
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
-                              const int *const_args)
-{
-    switch (opc) {
-    case INDEX_op_exit_tb:
-        tcg_out_exit_tb(s, args[0]);
-        break;
-    case INDEX_op_goto_tb:
-        tcg_out_goto_tb(s, args[0]);
-        break;
-
-    case INDEX_op_call:
-        if (const_args[0]) {
-            tcg_out_call(s, (void *)args[0]);
-        } else {
-            /* ??? FIXME: the value in the register in args[0] is almost
-               certainly a procedure descriptor, not a code address.  We
-               probably need to use the millicode $$dyncall routine.  */
-            tcg_abort();
-        }
-        break;
-
-    case INDEX_op_jmp:
-        fprintf(stderr, "unimplemented jmp\n");
-        tcg_abort();
-        break;
-
-    case INDEX_op_br:
-        tcg_out_branch(s, args[0], 1);
-        break;
-
-    case INDEX_op_movi_i32:
-        tcg_out_movi(s, TCG_TYPE_I32, args[0], (uint32_t)args[1]);
-        break;
-
-    case INDEX_op_ld8u_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDB);
-        break;
-    case INDEX_op_ld8s_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDB);
-        tcg_out_ext8s(s, args[0], args[0]);
-        break;
-    case INDEX_op_ld16u_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDH);
-        break;
-    case INDEX_op_ld16s_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDH);
-        tcg_out_ext16s(s, args[0], args[0]);
-        break;
-    case INDEX_op_ld_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDW);
-        break;
-
-    case INDEX_op_st8_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], INSN_STB);
-        break;
-    case INDEX_op_st16_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], INSN_STH);
-        break;
-    case INDEX_op_st_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], INSN_STW);
-        break;
-
-    case INDEX_op_add_i32:
-        if (const_args[2]) {
-            tcg_out_addi2(s, args[0], args[1], args[2]);
-        } else {
-            tcg_out_arith(s, args[0], args[1], args[2], INSN_ADDL);
-        }
-        break;
-
-    case INDEX_op_sub_i32:
-        if (const_args[1]) {
-            if (const_args[2]) {
-                tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1] - args[2]);
-            } else {
-                /* Recall that SUBI is a reversed subtract.  */
-                tcg_out_arithi(s, args[0], args[2], args[1], INSN_SUBI);
-            }
-        } else if (const_args[2]) {
-            tcg_out_addi2(s, args[0], args[1], -args[2]);
-        } else {
-            tcg_out_arith(s, args[0], args[1], args[2], INSN_SUB);
-        }
-        break;
-
-    case INDEX_op_and_i32:
-        if (const_args[2]) {
-            tcg_out_andi(s, args[0], args[1], args[2]);
-        } else {
-            tcg_out_arith(s, args[0], args[1], args[2], INSN_AND);
-        }
-        break;
-
-    case INDEX_op_or_i32:
-        if (const_args[2]) {
-            tcg_out_ori(s, args[0], args[1], args[2]);
-        } else {
-            tcg_out_arith(s, args[0], args[1], args[2], INSN_OR);
-        }
-        break;
-
-    case INDEX_op_xor_i32:
-        tcg_out_arith(s, args[0], args[1], args[2], INSN_XOR);
-        break;
-
-    case INDEX_op_andc_i32:
-        if (const_args[2]) {
-            tcg_out_andi(s, args[0], args[1], ~args[2]);
-        } else {
-            tcg_out_arith(s, args[0], args[1], args[2], INSN_ANDCM);
-        }
-        break;
-
-    case INDEX_op_shl_i32:
-        if (const_args[2]) {
-            tcg_out_shli(s, args[0], args[1], args[2]);
-        } else {
-            tcg_out_shl(s, args[0], args[1], args[2]);
-        }
-        break;
-
-    case INDEX_op_shr_i32:
-        if (const_args[2]) {
-            tcg_out_shri(s, args[0], args[1], args[2]);
-        } else {
-            tcg_out_shr(s, args[0], args[1], args[2]);
-        }
-        break;
-
-    case INDEX_op_sar_i32:
-        if (const_args[2]) {
-            tcg_out_sari(s, args[0], args[1], args[2]);
-        } else {
-            tcg_out_sar(s, args[0], args[1], args[2]);
-        }
-        break;
-
-    case INDEX_op_rotl_i32:
-        if (const_args[2]) {
-            tcg_out_rotli(s, args[0], args[1], args[2]);
-        } else {
-            tcg_out_rotl(s, args[0], args[1], args[2]);
-        }
-        break;
-
-    case INDEX_op_rotr_i32:
-        if (const_args[2]) {
-            tcg_out_rotri(s, args[0], args[1], args[2]);
-        } else {
-            tcg_out_rotr(s, args[0], args[1], args[2]);
-        }
-        break;
-
-    case INDEX_op_mul_i32:
-        tcg_out_xmpyu(s, args[0], TCG_REG_R0, args[1], args[2]);
-        break;
-    case INDEX_op_mulu2_i32:
-        tcg_out_xmpyu(s, args[0], args[1], args[2], args[3]);
-        break;
-
-    case INDEX_op_bswap16_i32:
-        tcg_out_bswap16(s, args[0], args[1], 0);
-        break;
-    case INDEX_op_bswap32_i32:
-        tcg_out_bswap32(s, args[0], args[1], TCG_REG_R20);
-        break;
-
-    case INDEX_op_not_i32:
-        tcg_out_arithi(s, args[0], args[1], -1, INSN_SUBI);
-        break;
-    case INDEX_op_ext8s_i32:
-        tcg_out_ext8s(s, args[0], args[1]);
-        break;
-    case INDEX_op_ext16s_i32:
-        tcg_out_ext16s(s, args[0], args[1]);
-        break;
-
-    case INDEX_op_brcond_i32:
-        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
-        break;
-    case INDEX_op_brcond2_i32:
-        tcg_out_brcond2(s, args[4], args[0], args[1],
-                        args[2], const_args[2],
-                        args[3], const_args[3], args[5]);
-        break;
-
-    case INDEX_op_setcond_i32:
-        tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
-        break;
-    case INDEX_op_setcond2_i32:
-        tcg_out_setcond2(s, args[5], args[0], args[1], args[2],
-                         args[3], const_args[3], args[4], const_args[4]);
-        break;
-
-    case INDEX_op_add2_i32:
-        tcg_out_add2(s, args[0], args[1], args[2], args[3],
-                     args[4], args[5], const_args[4]);
-        break;
-
-    case INDEX_op_sub2_i32:
-        tcg_out_sub2(s, args[0], args[1], args[2], args[3],
-                     args[4], args[5], const_args[2], const_args[4]);
-        break;
-
-    case INDEX_op_qemu_ld8u:
-        tcg_out_qemu_ld(s, args, 0);
-        break;
-    case INDEX_op_qemu_ld8s:
-        tcg_out_qemu_ld(s, args, 0 | 4);
-        break;
-    case INDEX_op_qemu_ld16u:
-        tcg_out_qemu_ld(s, args, 1);
-        break;
-    case INDEX_op_qemu_ld16s:
-        tcg_out_qemu_ld(s, args, 1 | 4);
-        break;
-    case INDEX_op_qemu_ld32:
-        tcg_out_qemu_ld(s, args, 2);
-        break;
-    case INDEX_op_qemu_ld64:
-        tcg_out_qemu_ld(s, args, 3);
-        break;
-
-    case INDEX_op_qemu_st8:
-        tcg_out_qemu_st(s, args, 0);
-        break;
-    case INDEX_op_qemu_st16:
-        tcg_out_qemu_st(s, args, 1);
-        break;
-    case INDEX_op_qemu_st32:
-        tcg_out_qemu_st(s, args, 2);
-        break;
-    case INDEX_op_qemu_st64:
-        tcg_out_qemu_st(s, args, 3);
-        break;
-
-    default:
-        fprintf(stderr, "unknown opcode 0x%x\n", opc);
-        tcg_abort();
-    }
-}
-
-static const TCGTargetOpDef hppa_op_defs[] = {
-    { INDEX_op_exit_tb, { } },
-    { INDEX_op_goto_tb, { } },
-
-    { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "r" } },
-    { INDEX_op_br, { } },
-
-    { INDEX_op_mov_i32, { "r", "r" } },
-    { INDEX_op_movi_i32, { "r" } },
-
-    { INDEX_op_ld8u_i32, { "r", "r" } },
-    { INDEX_op_ld8s_i32, { "r", "r" } },
-    { INDEX_op_ld16u_i32, { "r", "r" } },
-    { INDEX_op_ld16s_i32, { "r", "r" } },
-    { INDEX_op_ld_i32, { "r", "r" } },
-    { INDEX_op_st8_i32, { "rZ", "r" } },
-    { INDEX_op_st16_i32, { "rZ", "r" } },
-    { INDEX_op_st_i32, { "rZ", "r" } },
-
-    { INDEX_op_add_i32, { "r", "rZ", "ri" } },
-    { INDEX_op_sub_i32, { "r", "rI", "ri" } },
-    { INDEX_op_and_i32, { "r", "rZ", "rM" } },
-    { INDEX_op_or_i32, { "r", "rZ", "rO" } },
-    { INDEX_op_xor_i32, { "r", "rZ", "rZ" } },
-    /* Note that the second argument will be inverted, which means
-       we want a constant whose inversion matches M, and that O = ~M.
-       See the implementation of and_mask_p.  */
-    { INDEX_op_andc_i32, { "r", "rZ", "rO" } },
-
-    { INDEX_op_mul_i32, { "r", "r", "r" } },
-    { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
-
-    { INDEX_op_shl_i32, { "r", "r", "ri" } },
-    { INDEX_op_shr_i32, { "r", "r", "ri" } },
-    { INDEX_op_sar_i32, { "r", "r", "ri" } },
-    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
-    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
-
-    { INDEX_op_bswap16_i32, { "r", "r" } },
-    { INDEX_op_bswap32_i32, { "r", "r" } },
-    { INDEX_op_not_i32, { "r", "r" } },
-
-    { INDEX_op_ext8s_i32, { "r", "r" } },
-    { INDEX_op_ext16s_i32, { "r", "r" } },
-
-    { INDEX_op_brcond_i32, { "rZ", "rJ" } },
-    { INDEX_op_brcond2_i32,  { "rZ", "rZ", "rJ", "rJ" } },
-
-    { INDEX_op_setcond_i32, { "r", "rZ", "rI" } },
-    { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rI", "rI" } },
-
-    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rI", "rZ" } },
-    { INDEX_op_sub2_i32, { "r", "r", "rI", "rZ", "rK", "rZ" } },
-
-#if TARGET_LONG_BITS == 32
-    { INDEX_op_qemu_ld8u, { "r", "L" } },
-    { INDEX_op_qemu_ld8s, { "r", "L" } },
-    { INDEX_op_qemu_ld16u, { "r", "L" } },
-    { INDEX_op_qemu_ld16s, { "r", "L" } },
-    { INDEX_op_qemu_ld32, { "r", "L" } },
-    { INDEX_op_qemu_ld64, { "r", "r", "L" } },
-
-    { INDEX_op_qemu_st8, { "LZ", "L" } },
-    { INDEX_op_qemu_st16, { "LZ", "L" } },
-    { INDEX_op_qemu_st32, { "LZ", "L" } },
-    { INDEX_op_qemu_st64, { "LZ", "LZ", "L" } },
-#else
-    { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
-
-    { INDEX_op_qemu_st8, { "LZ", "L", "L" } },
-    { INDEX_op_qemu_st16, { "LZ", "L", "L" } },
-    { INDEX_op_qemu_st32, { "LZ", "L", "L" } },
-    { INDEX_op_qemu_st64, { "LZ", "LZ", "L", "L" } },
-#endif
-    { -1 },
-};
-
-static int tcg_target_callee_save_regs[] = {
-    /* R2, the return address register, is saved specially
-       in the caller's frame.  */
-    /* R3, the frame pointer, is not currently modified.  */
-    TCG_REG_R4,
-    TCG_REG_R5,
-    TCG_REG_R6,
-    TCG_REG_R7,
-    TCG_REG_R8,
-    TCG_REG_R9,
-    TCG_REG_R10,
-    TCG_REG_R11,
-    TCG_REG_R12,
-    TCG_REG_R13,
-    TCG_REG_R14,
-    TCG_REG_R15,
-    TCG_REG_R16,
-    /* R17 is the global env, so no need to save.  */
-    TCG_REG_R18
-};
-
-static void tcg_target_qemu_prologue(TCGContext *s)
-{
-    int frame_size, i;
-
-    /* Allocate space for the fixed frame marker.  */
-    frame_size = -TCG_TARGET_CALL_STACK_OFFSET;
-    frame_size += TCG_TARGET_STATIC_CALL_ARGS_SIZE;
-
-    /* Allocate space for the saved registers.  */
-    frame_size += ARRAY_SIZE(tcg_target_callee_save_regs) * 4;
-
-    /* Align the allocated space.  */
-    frame_size = ((frame_size + TCG_TARGET_STACK_ALIGN - 1)
-                  & -TCG_TARGET_STACK_ALIGN);
-
-    /* The return address is stored in the caller's frame.  */
-    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_RP, TCG_REG_SP, -20);
-
-    /* Allocate stack frame, saving the first register at the same time.  */
-    tcg_out_ldst(s, tcg_target_callee_save_regs[0],
-                 TCG_REG_SP, frame_size, INSN_STWM);
-
-    /* Save all callee saved registers.  */
-    for (i = 1; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
-        tcg_out_st(s, TCG_TYPE_PTR, tcg_target_callee_save_regs[i],
-                   TCG_REG_SP, -frame_size + i * 4);
-    }
-
-#ifdef CONFIG_USE_GUEST_BASE
-    if (GUEST_BASE != 0) {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
-        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
-    }
-#endif
-
-    /* Jump to TB, and adjust R18 to be the return address.  */
-    tcg_out32(s, INSN_BLE_SR4 | INSN_R2(TCG_REG_R26));
-    tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R18, TCG_REG_R31);
-
-    /* Restore callee saved registers.  */
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_RP, TCG_REG_SP, -frame_size - 20);
-    for (i = 1; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
-        tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_callee_save_regs[i],
-                   TCG_REG_SP, -frame_size + i * 4);
-    }
-
-    /* Deallocate stack frame and return.  */
-    tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_RP));
-    tcg_out_ldst(s, tcg_target_callee_save_regs[0],
-                 TCG_REG_SP, -frame_size, INSN_LDWM);
-}
-
-static void tcg_target_init(TCGContext *s)
-{
-    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
-
-    tcg_regset_clear(tcg_target_call_clobber_regs);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R20);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R21);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R22);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R23);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R24);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R25);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R26);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RET0);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RET1);
-
-    tcg_regset_clear(s->reserved_regs);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);  /* hardwired to zero */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1);  /* addil target */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_RP);  /* link register */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3);  /* frame pointer */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R18); /* return pointer */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R19); /* clobbered w/o pic */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R20); /* reserved */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_DP);  /* data pointer */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);  /* stack pointer */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R31); /* ble link reg */
-
-    tcg_add_target_add_op_defs(hppa_op_defs);
-}
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
deleted file mode 100644
index a5cc440..0000000
--- a/tcg/hppa/tcg-target.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Tiny Code Generator for QEMU
- *
- * Copyright (c) 2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#define TCG_TARGET_HPPA 1
-
-#if defined(_PA_RISC1_1)
-#define TCG_TARGET_REG_BITS 32
-#else
-#error unsupported
-#endif
-
-#define TCG_TARGET_WORDS_BIGENDIAN
-
-#define TCG_TARGET_NB_REGS 32
-
-enum {
-    TCG_REG_R0 = 0,
-    TCG_REG_R1,
-    TCG_REG_RP,
-    TCG_REG_R3,
-    TCG_REG_R4,
-    TCG_REG_R5,
-    TCG_REG_R6,
-    TCG_REG_R7,
-    TCG_REG_R8,
-    TCG_REG_R9,
-    TCG_REG_R10,
-    TCG_REG_R11,
-    TCG_REG_R12,
-    TCG_REG_R13,
-    TCG_REG_R14,
-    TCG_REG_R15,
-    TCG_REG_R16,
-    TCG_REG_R17,
-    TCG_REG_R18,
-    TCG_REG_R19,
-    TCG_REG_R20,
-    TCG_REG_R21,
-    TCG_REG_R22,
-    TCG_REG_R23,
-    TCG_REG_R24,
-    TCG_REG_R25,
-    TCG_REG_R26,
-    TCG_REG_DP,
-    TCG_REG_RET0,
-    TCG_REG_RET1,
-    TCG_REG_SP,
-    TCG_REG_R31,
-};
-
-#define TCG_CT_CONST_0    0x0100
-#define TCG_CT_CONST_S5   0x0200
-#define TCG_CT_CONST_S11  0x0400
-#define TCG_CT_CONST_MS11 0x0800
-#define TCG_CT_CONST_AND  0x1000
-#define TCG_CT_CONST_OR   0x2000
-
-/* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_SP
-#define TCG_TARGET_STACK_ALIGN 64
-#define TCG_TARGET_CALL_STACK_OFFSET -48
-#define TCG_TARGET_STATIC_CALL_ARGS_SIZE 8*4
-#define TCG_TARGET_CALL_ALIGN_ARGS 1
-#define TCG_TARGET_STACK_GROWSUP
-
-/* optional instructions */
-// #define TCG_TARGET_HAS_div_i32
-#define TCG_TARGET_HAS_rot_i32
-#define TCG_TARGET_HAS_ext8s_i32
-#define TCG_TARGET_HAS_ext16s_i32
-#define TCG_TARGET_HAS_bswap16_i32
-#define TCG_TARGET_HAS_bswap32_i32
-#define TCG_TARGET_HAS_not_i32
-#define TCG_TARGET_HAS_andc_i32
-// #define TCG_TARGET_HAS_orc_i32
-
-/* optional instructions automatically implemented */
-#undef TCG_TARGET_HAS_neg_i32           /* sub rd, 0, rs */
-#undef TCG_TARGET_HAS_ext8u_i32         /* and rd, rs, 0xff */
-#undef TCG_TARGET_HAS_ext16u_i32        /* and rd, rs, 0xffff */
-
-#define TCG_TARGET_HAS_GUEST_BASE
-
-/* Note: must be synced with dyngen-exec.h */
-#define TCG_AREG0 TCG_REG_R17
-
-static inline void flush_icache_range(unsigned long start, unsigned long stop)
-{
-    start &= ~31;
-    while (start <= stop) {
-        asm volatile ("fdc 0(%0)\n\t"
-                      "sync\n\t"
-                      "fic 0(%%sr4, %0)\n\t"
-                      "sync"
-                      : : "r"(start) : "memory");
-        start += 32;
-    }
-}
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 28ae003..4268345 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -22,6 +22,14 @@
  * THE SOFTWARE.
  */
 
+#include "tcg-be-ldst.h"
+
+#ifdef _WIN32
+// For some reason, the Mingw32 headers define the 'small' macro which
+// prevents this source from compiling.
+#undef small
+#endif
+
 #ifndef NDEBUG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 #if TCG_TARGET_REG_BITS == 64
@@ -51,40 +59,68 @@
     TCG_REG_RDI,
     TCG_REG_RAX,
 #else
-    TCG_REG_EAX,
-    TCG_REG_EDX,
-    TCG_REG_ECX,
     TCG_REG_EBX,
     TCG_REG_ESI,
     TCG_REG_EDI,
     TCG_REG_EBP,
+    TCG_REG_ECX,
+    TCG_REG_EDX,
+    TCG_REG_EAX,
 #endif
 };
 
 static const int tcg_target_call_iarg_regs[] = {
 #if TCG_TARGET_REG_BITS == 64
+#if defined(_WIN64)
+    TCG_REG_RCX,
+    TCG_REG_RDX,
+#else
     TCG_REG_RDI,
     TCG_REG_RSI,
     TCG_REG_RDX,
     TCG_REG_RCX,
+#endif
     TCG_REG_R8,
     TCG_REG_R9,
 #else
-    TCG_REG_EAX,
-    TCG_REG_EDX,
-    TCG_REG_ECX
+    /* 32 bit mode uses stack based calling convention (GCC default). */
 #endif
 };
 
-static const int tcg_target_call_oarg_regs[2] = {
+static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_EAX,
+#if TCG_TARGET_REG_BITS == 32
     TCG_REG_EDX
+#endif
 };
 
+/* Registers used with L constraint, which are the first argument 
+   registers on x86_64, and two random call clobbered registers on
+   i386. */
+#if TCG_TARGET_REG_BITS == 64
+# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
+# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
+#else
+# define TCG_REG_L0 TCG_REG_EAX
+# define TCG_REG_L1 TCG_REG_EDX
+#endif
+
+/* For 32-bit, we are going to attempt to determine at runtime whether cmov
+   is available.  However, the host compiler must supply <cpuid.h>, as we're
+   not going to go so far as our own inline assembly.  */
+#if TCG_TARGET_REG_BITS == 64
+# define have_cmov 1
+#elif defined(CONFIG_CPUID_H)
+#include <cpuid.h>
+static bool have_cmov;
+#else
+# define have_cmov 0
+#endif
+
 static uint8_t *tb_ret_addr;
 
 static void patch_reloc(uint8_t *code_ptr, int type,
-                        tcg_target_long value, tcg_target_long addend)
+                        intptr_t value, intptr_t addend)
 {
     value += addend;
     switch(type) {
@@ -107,26 +143,6 @@
     }
 }
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    if (TCG_TARGET_REG_BITS == 64) {
-        return 6;
-    }
-
-    flags &= TCG_CALL_TYPE_MASK;
-    switch(flags) {
-    case TCG_CALL_TYPE_STD:
-        return 0;
-    case TCG_CALL_TYPE_REGPARM_1:
-    case TCG_CALL_TYPE_REGPARM_2:
-    case TCG_CALL_TYPE_REGPARM:
-        return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
-    default:
-        tcg_abort();
-    }
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
@@ -166,6 +182,10 @@
             tcg_regset_set32(ct->u.regs, 0, 0xf);
         }
         break;
+    case 'Q':
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xf);
+        break;
     case 'r':
         ct->ct |= TCG_CT_REG;
         if (TCG_TARGET_REG_BITS == 64) {
@@ -180,13 +200,11 @@
         ct->ct |= TCG_CT_REG;
         if (TCG_TARGET_REG_BITS == 64) {
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
         } else {
             tcg_regset_set32(ct->u.regs, 0, 0xff);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
         }
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
         break;
 
     case 'e':
@@ -234,11 +252,13 @@
 # define P_REXW		0x800		/* Set REX.W = 1 */
 # define P_REXB_R	0x1000		/* REG field as byte register */
 # define P_REXB_RM	0x2000		/* R/M field as byte register */
+# define P_GS           0x4000          /* gs segment override */
 #else
 # define P_ADDR32	0
 # define P_REXW		0
 # define P_REXB_R	0
 # define P_REXB_RM	0
+# define P_GS           0
 #endif
 
 #define OPC_ARITH_EvIz	(0x81)
@@ -247,6 +267,7 @@
 #define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
 #define OPC_BSWAP	(0xc8 | P_EXT)
 #define OPC_CALL_Jz	(0xe8)
+#define OPC_CMOVCC      (0x40 | P_EXT)  /* ... plus condition code */
 #define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
 #define OPC_DEC_r32	(0x48)
 #define OPC_IMUL_GvEv	(0xaf | P_EXT)
@@ -261,6 +282,7 @@
 #define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
 #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
 #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
+#define OPC_MOVB_EvIz   (0xc6)
 #define OPC_MOVL_EvIz	(0xc7)
 #define OPC_MOVL_Iv     (0xb8)
 #define OPC_MOVSBL	(0xbe | P_EXT)
@@ -334,7 +356,7 @@
 #define JCC_JLE 0xe
 #define JCC_JG  0xf
 
-static const uint8_t tcg_cond_to_jcc[10] = {
+static const uint8_t tcg_cond_to_jcc[] = {
     [TCG_COND_EQ] = JCC_JE,
     [TCG_COND_NE] = JCC_JNE,
     [TCG_COND_LT] = JCC_JL,
@@ -352,6 +374,9 @@
 {
     int rex;
 
+    if (opc & P_GS) {
+        tcg_out8(s, 0x65);
+    }
     if (opc & P_DATA16) {
         /* We should never be asking for both 16 and 64-bit operation.  */
         assert((opc & P_REXW) == 0);
@@ -413,8 +438,7 @@
    that will follow the instruction.  */
 
 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
-                                     int index, int shift,
-                                     tcg_target_long offset)
+                                     int index, int shift, intptr_t offset)
 {
     int mod, len;
 
@@ -422,8 +446,8 @@
         if (TCG_TARGET_REG_BITS == 64) {
             /* Try for a rip-relative addressing mode.  This has replaced
                the 32-bit-mode absolute addressing encoding.  */
-            tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
-            tcg_target_long disp = offset - pc;
+            intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
+            intptr_t disp = offset - pc;
             if (disp == (int32_t)disp) {
                 tcg_out_opc(s, opc, r, 0, 0);
                 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
@@ -497,7 +521,7 @@
 
 /* A simplification of the above with no index or shift.  */
 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
-                                        int rm, tcg_target_long offset)
+                                        int rm, intptr_t offset)
 {
     tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
 }
@@ -512,7 +536,8 @@
     tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
 }
 
-static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
+static inline void tcg_out_mov(TCGContext *s, TCGType type,
+                               TCGReg ret, TCGReg arg)
 {
     if (arg != ret) {
         int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
@@ -521,22 +546,36 @@
 }
 
 static void tcg_out_movi(TCGContext *s, TCGType type,
-                         int ret, tcg_target_long arg)
+                         TCGReg ret, tcg_target_long arg)
 {
+    tcg_target_long diff;
+
     if (arg == 0) {
         tgen_arithr(s, ARITH_XOR, ret, ret);
         return;
-    } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
+    }
+    if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
         tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
         tcg_out32(s, arg);
-    } else if (arg == (int32_t)arg) {
+        return;
+    }
+    if (arg == (int32_t)arg) {
         tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
         tcg_out32(s, arg);
-    } else {
-        tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
-        tcg_out32(s, arg);
-        tcg_out32(s, arg >> 31 >> 1);
+        return;
     }
+
+    /* Try a 7 byte pc-relative lea before the 10 byte movq.  */
+    diff = arg - ((uintptr_t)s->code_ptr + 7);
+    if (diff == (int32_t)diff) {
+        tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
+        tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
+        tcg_out32(s, diff);
+        return;
+    }
+
+    tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
+    tcg_out64(s, arg);
 }
 
 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
@@ -562,20 +601,28 @@
     tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
 }
 
-static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
-                              int arg1, tcg_target_long arg2)
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+                              TCGReg arg1, intptr_t arg2)
 {
     int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
     tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
 }
 
-static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
-                              int arg1, tcg_target_long arg2)
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+                              TCGReg arg1, intptr_t arg2)
 {
     int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
     tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
 }
 
+static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
+                               tcg_target_long ofs, tcg_target_long val)
+{
+    int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
+    tcg_out_modrm_offset(s, opc, 0, base, ofs);
+    tcg_out32(s, val);
+}
+
 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
 {
     /* Propagate an opcode prefix, such as P_DATA16.  */
@@ -710,8 +757,6 @@
     }
 }
 
-#undef small  /* for mingw build */
-
 /* Use SMALL != 0 to force a short forward branch.  */
 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
 {
@@ -719,7 +764,7 @@
     TCGLabel *l = &s->labels[label_index];
 
     if (l->has_value) {
-        val = l->u.value - (tcg_target_long)s->code_ptr;
+        val = l->u.value - (intptr_t)s->code_ptr;
         val1 = val - 2;
         if ((int8_t)val1 == val1) {
             if (opc == -1) {
@@ -870,7 +915,7 @@
     default:
         tcg_abort();
     }
-    tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
+    tcg_out_label(s, label_next, s->code_ptr);
 }
 #endif
 
@@ -912,10 +957,10 @@
 
         tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
         tcg_out_jxx(s, JCC_JMP, label_over, 1);
-        tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
+        tcg_out_label(s, label_true, s->code_ptr);
 
         tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
-        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+        tcg_out_label(s, label_over, s->code_ptr);
     } else {
         /* When the destination does not overlap one of the arguments,
            clear the destination first, jump if cond false, and emit an
@@ -929,14 +974,39 @@
         tcg_out_brcond2(s, new_args, const_args+1, 1);
 
         tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
-        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+        tcg_out_label(s, label_over, s->code_ptr);
     }
 }
 #endif
 
-static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
+static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
+                              TCGArg c1, TCGArg c2, int const_c2,
+                              TCGArg v1)
 {
-    tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
+    tcg_out_cmp(s, c1, c2, const_c2, 0);
+    if (have_cmov) {
+        tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+    } else {
+        int over = gen_new_label();
+        tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
+        tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
+        tcg_out_label(s, over, s->code_ptr);
+    }
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
+                              TCGArg c1, TCGArg c2, int const_c2,
+                              TCGArg v1)
+{
+    tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
+    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
+}
+#endif
+
+static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest)
+{
+    intptr_t disp = dest - (intptr_t)s->code_ptr - 5;
 
     if (disp == (int32_t)disp) {
         tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
@@ -948,39 +1018,47 @@
     }
 }
 
-static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
+static inline void tcg_out_calli(TCGContext *s, uintptr_t dest)
 {
     tcg_out_branch(s, 1, dest);
 }
 
-static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
+static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
 {
     tcg_out_branch(s, 0, dest);
 }
 
 #if defined(CONFIG_SOFTMMU)
-
-#include "exec/softmmu_defs.h"
-
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ *                                     int mmu_idx, uintptr_t ra)
+ */
+static const void * const qemu_ld_helpers[16] = {
+    [MO_UB]   = helper_ret_ldub_mmu,
+    [MO_LEUW] = helper_le_lduw_mmu,
+    [MO_LEUL] = helper_le_ldul_mmu,
+    [MO_LEQ]  = helper_le_ldq_mmu,
+    [MO_BEUW] = helper_be_lduw_mmu,
+    [MO_BEUL] = helper_be_ldul_mmu,
+    [MO_BEQ]  = helper_be_ldq_mmu,
 };
 
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
+static const void * const qemu_st_helpers[16] = {
+    [MO_UB]   = helper_ret_stb_mmu,
+    [MO_LEUW] = helper_le_stw_mmu,
+    [MO_LEUL] = helper_le_stl_mmu,
+    [MO_LEQ]  = helper_le_stq_mmu,
+    [MO_BEUW] = helper_be_stw_mmu,
+    [MO_BEUL] = helper_be_stl_mmu,
+    [MO_BEQ]  = helper_be_stq_mmu,
 };
 
 /* Perform the TLB load and compare.
 
    Inputs:
-   ADDRLO_IDX contains the index into ARGS of the low part of the
-   address; the high part of the address is at ADDR_LOW_IDX+1.
+   ADDRLO and ADDRHI contain the low and high part of the address.
 
    MEM_INDEX and S_BITS are the memory context and log2 size of the load.
 
@@ -991,122 +1069,325 @@
    LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
    positions of the displacements of forward jumps to the TLB miss case.
 
-   First argument register is loaded with the low part of the address.
+   Second argument register is loaded with the low part of the address.
    In the TLB hit case, it has been adjusted as indicated by the TLB
    and so is a host address.  In the TLB miss case, it continues to
    hold a guest address.
 
-   Second argument register is clobbered.  */
+   First argument register is clobbered.  */
 
-static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
-                                    int mem_index, int s_bits,
-                                    const TCGArg *args,
+static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
+                                    int mem_index, TCGMemOp s_bits,
                                     uint8_t **label_ptr, int which)
 {
-    const int addrlo = args[addrlo_idx];
-    const int r0 = tcg_target_call_iarg_regs[0];
-    const int r1 = tcg_target_call_iarg_regs[1];
-    TCGType type = TCG_TYPE_I32;
-    int rexw = 0;
+    const TCGReg r0 = TCG_REG_L0;
+    const TCGReg r1 = TCG_REG_L1;
+    TCGType ttype = TCG_TYPE_I32;
+    TCGType htype = TCG_TYPE_I32;
+    int trexw = 0, hrexw = 0;
 
-    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
-        type = TCG_TYPE_I64;
-        rexw = P_REXW;
+    if (TCG_TARGET_REG_BITS == 64) {
+        if (TARGET_LONG_BITS == 64) {
+            ttype = TCG_TYPE_I64;
+            trexw = P_REXW;
+        }
+        if (TCG_TYPE_PTR == TCG_TYPE_I64) {
+            htype = TCG_TYPE_I64;
+            hrexw = P_REXW;
+        }
     }
 
-    tcg_out_mov(s, type, r1, addrlo);
-    tcg_out_mov(s, type, r0, addrlo);
+    tcg_out_mov(s, htype, r0, addrlo);
+    tcg_out_mov(s, ttype, r1, addrlo);
 
-    tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
+    tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
-    tgen_arithi(s, ARITH_AND + rexw, r0,
+    tgen_arithi(s, ARITH_AND + trexw, r1,
                 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
-    tgen_arithi(s, ARITH_AND + rexw, r1,
+    tgen_arithi(s, ARITH_AND + hrexw, r0,
                 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
 
-    tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
-                             offsetof(CPUOldState, tlb_table[mem_index][0])
+    tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
+                             offsetof(CPUArchState, tlb_table[mem_index][0])
                              + which);
 
-    /* cmp 0(r1), r0 */
-    tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
+    /* cmp 0(r0), r1 */
+    tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
 
-    tcg_out_mov(s, type, r0, addrlo);
+    /* Prepare for both the fast path add of the tlb addend, and the slow
+       path function argument setup.  There are two cases worth note:
+       For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
+       before the fastpath ADDQ below.  For 64-bit guest and x32 host, MOVQ
+       copies the entire guest address for the slow path, while truncation
+       for the 32-bit host happens with the fastpath ADDL below.  */
+    tcg_out_mov(s, ttype, r1, addrlo);
 
-    /* jne label1 */
-    tcg_out8(s, OPC_JCC_short + JCC_JNE);
+    /* jne slow_path */
+    tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
     label_ptr[0] = s->code_ptr;
-    s->code_ptr++;
+    s->code_ptr += 4;
 
     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-        /* cmp 4(r1), addrhi */
-        tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
+        /* cmp 4(r0), addrhi */
+        tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
 
-        /* jne label1 */
-        tcg_out8(s, OPC_JCC_short + JCC_JNE);
+        /* jne slow_path */
+        tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
         label_ptr[1] = s->code_ptr;
-        s->code_ptr++;
+        s->code_ptr += 4;
     }
 
     /* TLB Hit.  */
 
-    /* add addend(r1), r0 */
-    tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
+    /* add addend(r0), r1 */
+    tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
                          offsetof(CPUTLBEntry, addend) - which);
 }
-#endif
 
-static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
-                                   int base, tcg_target_long ofs, int sizeop)
+/*
+ * Record the context of a call to the out of line helper code for the slow path
+ * for a load or store, so that we can later generate the correct helper code
+ */
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
+                                TCGReg datalo, TCGReg datahi,
+                                TCGReg addrlo, TCGReg addrhi,
+                                int mem_index, uint8_t *raddr,
+                                uint8_t **label_ptr)
 {
-#ifdef TARGET_WORDS_BIGENDIAN
-    const int bswap = 1;
-#else
-    const int bswap = 0;
+    TCGLabelQemuLdst *label = new_ldst_label(s);
+
+    label->is_ld = is_ld;
+    label->opc = opc;
+    label->datalo_reg = datalo;
+    label->datahi_reg = datahi;
+    label->addrlo_reg = addrlo;
+    label->addrhi_reg = addrhi;
+    label->mem_index = mem_index;
+    label->raddr = raddr;
+    label->label_ptr[0] = label_ptr[0];
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        label->label_ptr[1] = label_ptr[1];
+    }
+}
+
+/*
+ * Generate code for the slow path for a load at the end of block
+ */
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    TCGMemOp opc = l->opc;
+    TCGReg data_reg;
+    uint8_t **label_ptr = &l->label_ptr[0];
+
+    /* resolve label address */
+    *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
+    }
+
+    if (TCG_TARGET_REG_BITS == 32) {
+        int ofs = 0;
+
+        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
+        ofs += 4;
+
+        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
+        ofs += 4;
+
+        if (TARGET_LONG_BITS == 64) {
+            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
+            ofs += 4;
+        }
+
+        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
+        ofs += 4;
+
+        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
+    } else {
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+        /* The second argument is already loaded with addrlo.  */
+        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                     l->mem_index);
+        tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
+                     (uintptr_t)l->raddr);
+    }
+
+    tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
+
+    data_reg = l->datalo_reg;
+    switch (opc & MO_SSIZE) {
+    case MO_SB:
+        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
+        break;
+    case MO_SW:
+        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
+        break;
+#if TCG_TARGET_REG_BITS == 64
+    case MO_SL:
+        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
+        break;
 #endif
-    switch (sizeop) {
-    case 0:
-        tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
+    case MO_UB:
+    case MO_UW:
+        /* Note that the helpers have zero-extended to tcg_target_long.  */
+    case MO_UL:
+        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
         break;
-    case 0 | 4:
-        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
+    case MO_Q:
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
+        } else if (data_reg == TCG_REG_EDX) {
+            /* xchg %edx, %eax */
+            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
+            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
+        } else {
+            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
+        }
         break;
-    case 1:
-        tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+    default:
+        tcg_abort();
+    }
+
+    /* Jump to the code corresponding to next IR of qemu_st */
+    tcg_out_jmp(s, (uintptr_t)l->raddr);
+}
+
+/*
+ * Generate code for the slow path for a store at the end of block
+ */
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    TCGMemOp opc = l->opc;
+    TCGMemOp s_bits = opc & MO_SIZE;
+    uint8_t **label_ptr = &l->label_ptr[0];
+    TCGReg retaddr;
+
+    /* resolve label address */
+    *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
+    }
+
+    if (TCG_TARGET_REG_BITS == 32) {
+        int ofs = 0;
+
+        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
+        ofs += 4;
+
+        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
+        ofs += 4;
+
+        if (TARGET_LONG_BITS == 64) {
+            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
+            ofs += 4;
+        }
+
+        tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
+        ofs += 4;
+
+        if (s_bits == MO_64) {
+            tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
+            ofs += 4;
+        }
+
+        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
+        ofs += 4;
+
+        retaddr = TCG_REG_EAX;
+        tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr);
+        tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs);
+    } else {
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+        /* The second argument is already loaded with addrlo.  */
+        tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+                    tcg_target_call_iarg_regs[2], l->datalo_reg);
+        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                     l->mem_index);
+
+        if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
+            retaddr = tcg_target_call_iarg_regs[4];
+            tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
+        } else {
+            retaddr = TCG_REG_RAX;
+            tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
+            tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
+        }
+    }
+
+    /* "Tail call" to the helper, with the return address back inline.  */
+    tcg_out_push(s, retaddr);
+    tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]);
+}
+#elif defined(__x86_64__) && defined(__linux__)
+# include <asm/prctl.h>
+# include <sys/prctl.h>
+
+int arch_prctl(int code, unsigned long addr);
+
+static int guest_base_flags;
+static inline void setup_guest_base_seg(void)
+{
+    if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
+        guest_base_flags = P_GS;
+    }
+}
+#else
+# define guest_base_flags 0
+static inline void setup_guest_base_seg(void) { }
+#endif /* SOFTMMU */
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+                                   TCGReg base, intptr_t ofs, int seg,
+                                   TCGMemOp memop)
+{
+    const TCGMemOp bswap = memop & MO_BSWAP;
+
+    switch (memop & MO_SSIZE) {
+    case MO_UB:
+        tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
+        break;
+    case MO_SB:
+        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
+        break;
+    case MO_UW:
+        tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
         if (bswap) {
             tcg_out_rolw_8(s, datalo);
         }
         break;
-    case 1 | 4:
+    case MO_SW:
         if (bswap) {
-            tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
             tcg_out_rolw_8(s, datalo);
             tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
         } else {
-            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
+                                 datalo, base, ofs);
         }
         break;
-    case 2:
-        tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+    case MO_UL:
+        tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
         if (bswap) {
             tcg_out_bswap32(s, datalo);
         }
         break;
 #if TCG_TARGET_REG_BITS == 64
-    case 2 | 4:
+    case MO_SL:
         if (bswap) {
-            tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
             tcg_out_bswap32(s, datalo);
             tcg_out_ext32s(s, datalo, datalo);
         } else {
-            tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
         }
         break;
 #endif
-    case 3:
+    case MO_Q:
         if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
+                                 datalo, base, ofs);
             if (bswap) {
                 tcg_out_bswap64(s, datalo);
             }
@@ -1117,11 +1398,15 @@
                 datahi = t;
             }
             if (base != datalo) {
-                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
-                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+                                     datalo, base, ofs);
+                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+                                     datahi, base, ofs + 4);
             } else {
-                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
-                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+                                     datahi, base, ofs + 4);
+                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+                                     datalo, base, ofs);
             }
             if (bswap) {
                 tcg_out_bswap32(s, datalo);
@@ -1137,173 +1422,121 @@
 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
    EAX. It will be useful once fixed registers globals are less
    common. */
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
-                            int opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 {
-    int data_reg, data_reg2 = 0;
-    int addrlo_idx;
+    TCGReg datalo, datahi, addrlo;
+    TCGReg addrhi __attribute__((unused));
+    TCGMemOp opc;
 #if defined(CONFIG_SOFTMMU)
-    int mem_index, s_bits, arg_idx;
-    uint8_t *label_ptr[3];
+    int mem_index;
+    TCGMemOp s_bits;
+    uint8_t *label_ptr[2];
 #endif
 
-    data_reg = args[0];
-    addrlo_idx = 1;
-    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
-        data_reg2 = args[1];
-        addrlo_idx = 2;
-    }
+    datalo = *args++;
+    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
+    addrlo = *args++;
+    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
+    opc = *args++;
 
 #if defined(CONFIG_SOFTMMU)
-    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
-    s_bits = opc & 3;
+    mem_index = *args++;
+    s_bits = opc & MO_SIZE;
 
-    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
+    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
                      label_ptr, offsetof(CPUTLBEntry, addr_read));
 
     /* TLB Hit.  */
-    tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
-                           tcg_target_call_iarg_regs[0], 0, opc);
+    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
 
-    /* jmp label2 */
-    tcg_out8(s, OPC_JMP_short);
-    label_ptr[2] = s->code_ptr;
-    s->code_ptr++;
-
-    /* TLB Miss.  */
-
-    /* label1: */
-    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
-    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
-    }
-
-    /* XXX: move that code at the end of the TB */
-    /* The first argument is already loaded with addrlo.  */
-    arg_idx = 1;
-    if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
-                    args[addrlo_idx + 1]);
-    }
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
-                 mem_index);
-    tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
-
-    switch(opc) {
-    case 0 | 4:
-        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
-        break;
-    case 1 | 4:
-        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
-        break;
-    case 0:
-        tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
-        break;
-    case 1:
-        tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
-        break;
-    case 2:
-        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
-        break;
-#if TCG_TARGET_REG_BITS == 64
-    case 2 | 4:
-        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
-        break;
-#endif
-    case 3:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
-        } else if (data_reg == TCG_REG_EDX) {
-            /* xchg %edx, %eax */
-            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
-            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
-        } else {
-            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
-            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
-        }
-        break;
-    default:
-        tcg_abort();
-    }
-
-    /* label2: */
-    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
+    /* Record the current context of a load into ldst label */
+    add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi,
+                        mem_index, s->code_ptr, label_ptr);
 #else
     {
         int32_t offset = GUEST_BASE;
-        int base = args[addrlo_idx];
+        TCGReg base = addrlo;
+        int seg = 0;
 
-        if (TCG_TARGET_REG_BITS == 64) {
-            /* ??? We assume all operations have left us with register
-               contents that are zero extended.  So far this appears to
-               be true.  If we want to enforce this, we can either do
-               an explicit zero-extension here, or (if GUEST_BASE == 0)
-               use the ADDR32 prefix.  For now, do nothing.  */
-
-            if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
-                base = TCG_REG_RDI, offset = 0;
-            }
+        /* ??? We assume all operations have left us with register contents
+           that are zero extended.  So far this appears to be true.  If we
+           want to enforce this, we can either do an explicit zero-extension
+           here, or (if GUEST_BASE == 0, or a segment register is in use)
+           use the ADDR32 prefix.  For now, do nothing.  */
+        if (GUEST_BASE && guest_base_flags) {
+            seg = guest_base_flags;
+            offset = 0;
+        } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
+            base = TCG_REG_L1;
+            offset = 0;
         }
 
-        tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
+        tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc);
     }
 #endif
 }
 
-static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
-                                   int base, tcg_target_long ofs, int sizeop)
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+                                   TCGReg base, intptr_t ofs, int seg,
+                                   TCGMemOp memop)
 {
-#ifdef TARGET_WORDS_BIGENDIAN
-    const int bswap = 1;
-#else
-    const int bswap = 0;
-#endif
+    const TCGMemOp bswap = memop & MO_BSWAP;
+
     /* ??? Ideally we wouldn't need a scratch register.  For user-only,
        we could perform the bswap twice to restore the original value
        instead of moving to the scratch.  But as it is, the L constraint
-       means that the second argument reg is definitely free here.  */
-    int scratch = tcg_target_call_iarg_regs[1];
+       means that TCG_REG_L0 is definitely free here.  */
+    const TCGReg scratch = TCG_REG_L0;
 
-    switch (sizeop) {
-    case 0:
-        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
+    switch (memop & MO_SIZE) {
+    case MO_8:
+        /* In 32-bit mode, 8-byte stores can only happen from [abcd]x.
+           Use the scratch register if necessary.  */
+        if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
+            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+            datalo = scratch;
+        }
+        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
+                             datalo, base, ofs);
         break;
-    case 1:
+    case MO_16:
         if (bswap) {
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
             tcg_out_rolw_8(s, scratch);
             datalo = scratch;
         }
-        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
+                             datalo, base, ofs);
         break;
-    case 2:
+    case MO_32:
         if (bswap) {
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
             tcg_out_bswap32(s, scratch);
             datalo = scratch;
         }
-        tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
         break;
-    case 3:
+    case MO_64:
         if (TCG_TARGET_REG_BITS == 64) {
             if (bswap) {
                 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
                 tcg_out_bswap64(s, scratch);
                 datalo = scratch;
             }
-            tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
+                                 datalo, base, ofs);
         } else if (bswap) {
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
             tcg_out_bswap32(s, scratch);
-            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
             tcg_out_bswap32(s, scratch);
-            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
         } else {
-            tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
-            tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
         }
         break;
     default:
@@ -1311,120 +1544,58 @@
     }
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
-                            int opc)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 {
-    int data_reg, data_reg2 = 0;
-    int addrlo_idx;
+    TCGReg datalo, datahi, addrlo;
+    TCGReg addrhi __attribute__((unused));
+    TCGMemOp opc;
 #if defined(CONFIG_SOFTMMU)
-    int mem_index, s_bits;
-    int stack_adjust;
-    uint8_t *label_ptr[3];
+    int mem_index;
+    TCGMemOp s_bits;
+    uint8_t *label_ptr[2];
 #endif
 
-    data_reg = args[0];
-    addrlo_idx = 1;
-    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
-        data_reg2 = args[1];
-        addrlo_idx = 2;
-    }
+    datalo = *args++;
+    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
+    addrlo = *args++;
+    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
+    opc = *args++;
 
 #if defined(CONFIG_SOFTMMU)
-    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
-    s_bits = opc;
+    mem_index = *args++;
+    s_bits = opc & MO_SIZE;
 
-    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
+    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
                      label_ptr, offsetof(CPUTLBEntry, addr_write));
 
     /* TLB Hit.  */
-    tcg_out_qemu_st_direct(s, data_reg, data_reg2,
-                           tcg_target_call_iarg_regs[0], 0, opc);
+    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
 
-    /* jmp label2 */
-    tcg_out8(s, OPC_JMP_short);
-    label_ptr[2] = s->code_ptr;
-    s->code_ptr++;
-
-    /* TLB Miss.  */
-
-    /* label1: */
-    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
-    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
-    }
-
-    /* XXX: move that code at the end of the TB */
-    if (TCG_TARGET_REG_BITS == 64) {
-        tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-                    TCG_REG_RSI, data_reg);
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
-        stack_adjust = 0;
-    } else if (TARGET_LONG_BITS == 32) {
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
-        if (opc == 3) {
-            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
-            tcg_out_pushi(s, mem_index);
-            stack_adjust = 4;
-        } else {
-            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
-            stack_adjust = 0;
-        }
-    } else {
-        if (opc == 3) {
-            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
-            tcg_out_pushi(s, mem_index);
-            tcg_out_push(s, data_reg2);
-            tcg_out_push(s, data_reg);
-            stack_adjust = 12;
-        } else {
-            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
-            switch(opc) {
-            case 0:
-                tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
-                break;
-            case 1:
-                tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
-                break;
-            case 2:
-                tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
-                break;
-            }
-            tcg_out_pushi(s, mem_index);
-            stack_adjust = 4;
-        }
-    }
-
-    tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
-
-    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
-        /* Pop and discard.  This is 2 bytes smaller than the add.  */
-        tcg_out_pop(s, TCG_REG_ECX);
-    } else if (stack_adjust != 0) {
-        tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
-    }
-
-    /* label2: */
-    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
+    /* Record the current context of a store into ldst label */
+    add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi,
+                        mem_index, s->code_ptr, label_ptr);
 #else
     {
         int32_t offset = GUEST_BASE;
-        int base = args[addrlo_idx];
+        TCGReg base = addrlo;
+        int seg = 0;
 
-        if (TCG_TARGET_REG_BITS == 64) {
-            /* ??? We assume all operations have left us with register
-               contents that are zero extended.  So far this appears to
-               be true.  If we want to enforce this, we can either do
-               an explicit zero-extension here, or (if GUEST_BASE == 0)
-               use the ADDR32 prefix.  For now, do nothing.  */
-
-            if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
-                base = TCG_REG_RDI, offset = 0;
-            }
+        /* ??? We assume all operations have left us with register contents
+           that are zero extended.  So far this appears to be true.  If we
+           want to enforce this, we can either do an explicit zero-extension
+           here, or (if GUEST_BASE == 0, or a segment register is in use)
+           use the ADDR32 prefix.  For now, do nothing.  */
+        if (GUEST_BASE && guest_base_flags) {
+            seg = guest_base_flags;
+            offset = 0;
+        } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
+            base = TCG_REG_L1;
+            offset = 0;
         }
 
-        tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
+        tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
     }
 #endif
 }
@@ -1447,7 +1618,7 @@
     switch(opc) {
     case INDEX_op_exit_tb:
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
-        tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
+        tcg_out_jmp(s, (uintptr_t)tb_ret_addr);
         break;
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
@@ -1458,7 +1629,7 @@
         } else {
             /* indirect jump method */
             tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
-                                 (tcg_target_long)(s->tb_next + args[0]));
+                                 (intptr_t)(s->tb_next + args[0]));
         }
         s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
         break;
@@ -1470,14 +1641,6 @@
             tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
         }
         break;
-    case INDEX_op_jmp:
-        if (const_args[0]) {
-            tcg_out_jmp(s, args[0]);
-        } else {
-            /* jmp *reg */
-            tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
-        }
-        break;
     case INDEX_op_br:
         tcg_out_jxx(s, JCC_JMP, args[0], 0);
         break;
@@ -1506,18 +1669,35 @@
         break;
 
     OP_32_64(st8):
-        tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
-                             args[0], args[1], args[2]);
+        if (const_args[0]) {
+            tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
+                                 0, args[1], args[2]);
+            tcg_out8(s, args[0]);
+        } else {
+            tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
+                                 args[0], args[1], args[2]);
+        }
         break;
     OP_32_64(st16):
-        tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
-                             args[0], args[1], args[2]);
+        if (const_args[0]) {
+            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
+                                 0, args[1], args[2]);
+            tcg_out16(s, args[0]);
+        } else {
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
+                                 args[0], args[1], args[2]);
+        }
         break;
 #if TCG_TARGET_REG_BITS == 64
     case INDEX_op_st32_i64:
 #endif
     case INDEX_op_st_i32:
-        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+        if (const_args[0]) {
+            tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
+            tcg_out32(s, args[0]);
+        } else {
+            tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+        }
         break;
 
     OP_32_64(add):
@@ -1613,6 +1793,10 @@
         tcg_out_setcond32(s, args[3], args[0], args[1],
                           args[2], const_args[2]);
         break;
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond32(s, args[5], args[0], args[1],
+                          args[2], const_args[2], args[3]);
+        break;
 
     OP_32_64(bswap16):
         tcg_out_rolw_8(s, args[0]);
@@ -1641,39 +1825,48 @@
         tcg_out_ext16u(s, args[0], args[1]);
         break;
 
-    case INDEX_op_qemu_ld8u:
+    case INDEX_op_qemu_ld_i32:
         tcg_out_qemu_ld(s, args, 0);
         break;
-    case INDEX_op_qemu_ld8s:
-        tcg_out_qemu_ld(s, args, 0 | 4);
-        break;
-    case INDEX_op_qemu_ld16u:
+    case INDEX_op_qemu_ld_i64:
         tcg_out_qemu_ld(s, args, 1);
         break;
-    case INDEX_op_qemu_ld16s:
-        tcg_out_qemu_ld(s, args, 1 | 4);
-        break;
-#if TCG_TARGET_REG_BITS == 64
-    case INDEX_op_qemu_ld32u:
-#endif
-    case INDEX_op_qemu_ld32:
-        tcg_out_qemu_ld(s, args, 2);
-        break;
-    case INDEX_op_qemu_ld64:
-        tcg_out_qemu_ld(s, args, 3);
-        break;
-
-    case INDEX_op_qemu_st8:
+    case INDEX_op_qemu_st_i32:
         tcg_out_qemu_st(s, args, 0);
         break;
-    case INDEX_op_qemu_st16:
+    case INDEX_op_qemu_st_i64:
         tcg_out_qemu_st(s, args, 1);
         break;
-    case INDEX_op_qemu_st32:
-        tcg_out_qemu_st(s, args, 2);
+
+    OP_32_64(mulu2):
+        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
         break;
-    case INDEX_op_qemu_st64:
-        tcg_out_qemu_st(s, args, 3);
+    OP_32_64(muls2):
+        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
+        break;
+    OP_32_64(add2):
+        if (const_args[4]) {
+            tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
+        } else {
+            tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
+        }
+        if (const_args[5]) {
+            tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
+        } else {
+            tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
+        }
+        break;
+    OP_32_64(sub2):
+        if (const_args[4]) {
+            tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
+        } else {
+            tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
+        }
+        if (const_args[5]) {
+            tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
+        } else {
+            tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
+        }
         break;
 
 #if TCG_TARGET_REG_BITS == 32
@@ -1683,33 +1876,6 @@
     case INDEX_op_setcond2_i32:
         tcg_out_setcond2(s, args, const_args);
         break;
-    case INDEX_op_mulu2_i32:
-        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
-        break;
-    case INDEX_op_add2_i32:
-        if (const_args[4]) {
-            tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
-        } else {
-            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
-        }
-        if (const_args[5]) {
-            tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
-        } else {
-            tgen_arithr(s, ARITH_ADC, args[1], args[5]);
-        }
-        break;
-    case INDEX_op_sub2_i32:
-        if (const_args[4]) {
-            tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
-        } else {
-            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
-        }
-        if (const_args[5]) {
-            tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
-        } else {
-            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
-        }
-        break;
 #else /* TCG_TARGET_REG_BITS == 64 */
     case INDEX_op_movi_i64:
         tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
@@ -1721,10 +1887,13 @@
         tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
         break;
     case INDEX_op_st_i64:
-        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_qemu_ld32s:
-        tcg_out_qemu_ld(s, args, 2 | 4);
+        if (const_args[0]) {
+            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
+                                 0, args[1], args[2]);
+            tcg_out32(s, args[0]);
+        } else {
+            tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+        }
         break;
 
     case INDEX_op_brcond_i64:
@@ -1735,6 +1904,10 @@
         tcg_out_setcond64(s, args[3], args[0], args[1],
                           args[2], const_args[2]);
         break;
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond64(s, args[5], args[0], args[1],
+                          args[2], const_args[2], args[3]);
+        break;
 
     case INDEX_op_bswap64_i64:
         tcg_out_bswap64(s, args[0]);
@@ -1747,6 +1920,22 @@
         break;
 #endif
 
+    OP_32_64(deposit):
+        if (args[3] == 0 && args[4] == 8) {
+            /* load bits 0..7 */
+            tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
+                          args[2], args[0]);
+        } else if (args[3] == 8 && args[4] == 8) {
+            /* load bits 8..15 */
+            tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
+        } else if (args[3] == 0 && args[4] == 16) {
+            /* load bits 0..15 */
+            tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
+        } else {
+            tcg_abort();
+        }
+        break;
+
     default:
         tcg_abort();
     }
@@ -1758,7 +1947,6 @@
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
     { INDEX_op_mov_i32, { "r", "r" } },
     { INDEX_op_movi_i32, { "r" } },
@@ -1767,9 +1955,9 @@
     { INDEX_op_ld16u_i32, { "r", "r" } },
     { INDEX_op_ld16s_i32, { "r", "r" } },
     { INDEX_op_ld_i32, { "r", "r" } },
-    { INDEX_op_st8_i32, { "q", "r" } },
-    { INDEX_op_st16_i32, { "r", "r" } },
-    { INDEX_op_st_i32, { "r", "r" } },
+    { INDEX_op_st8_i32, { "qi", "r" } },
+    { INDEX_op_st16_i32, { "ri", "r" } },
+    { INDEX_op_st_i32, { "ri", "r" } },
 
     { INDEX_op_add_i32, { "r", "r", "ri" } },
     { INDEX_op_sub_i32, { "r", "0", "ri" } },
@@ -1802,10 +1990,17 @@
 
     { INDEX_op_setcond_i32, { "q", "r", "ri" } },
 
-#if TCG_TARGET_REG_BITS == 32
+    { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
+#if TCG_TARGET_HAS_movcond_i32
+    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
+#endif
+
     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
+    { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
     { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
+
+#if TCG_TARGET_REG_BITS == 32
     { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
     { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
 #else
@@ -1818,12 +2013,12 @@
     { INDEX_op_ld32u_i64, { "r", "r" } },
     { INDEX_op_ld32s_i64, { "r", "r" } },
     { INDEX_op_ld_i64, { "r", "r" } },
-    { INDEX_op_st8_i64, { "r", "r" } },
-    { INDEX_op_st16_i64, { "r", "r" } },
-    { INDEX_op_st32_i64, { "r", "r" } },
-    { INDEX_op_st_i64, { "r", "r" } },
+    { INDEX_op_st8_i64, { "ri", "r" } },
+    { INDEX_op_st16_i64, { "ri", "r" } },
+    { INDEX_op_st32_i64, { "ri", "r" } },
+    { INDEX_op_st_i64, { "re", "r" } },
 
-    { INDEX_op_add_i64, { "r", "0", "re" } },
+    { INDEX_op_add_i64, { "r", "r", "re" } },
     { INDEX_op_mul_i64, { "r", "0", "re" } },
     { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
     { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
@@ -1853,46 +2048,31 @@
     { INDEX_op_ext8u_i64, { "r", "r" } },
     { INDEX_op_ext16u_i64, { "r", "r" } },
     { INDEX_op_ext32u_i64, { "r", "r" } },
+
+    { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
+    { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
+
+    { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
+    { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
+    { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
+    { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
 #endif
 
 #if TCG_TARGET_REG_BITS == 64
-    { INDEX_op_qemu_ld8u, { "r", "L" } },
-    { INDEX_op_qemu_ld8s, { "r", "L" } },
-    { INDEX_op_qemu_ld16u, { "r", "L" } },
-    { INDEX_op_qemu_ld16s, { "r", "L" } },
-    { INDEX_op_qemu_ld32, { "r", "L" } },
-    { INDEX_op_qemu_ld32u, { "r", "L" } },
-    { INDEX_op_qemu_ld32s, { "r", "L" } },
-    { INDEX_op_qemu_ld64, { "r", "L" } },
-
-    { INDEX_op_qemu_st8, { "L", "L" } },
-    { INDEX_op_qemu_st16, { "L", "L" } },
-    { INDEX_op_qemu_st32, { "L", "L" } },
-    { INDEX_op_qemu_st64, { "L", "L" } },
+    { INDEX_op_qemu_ld_i32, { "r", "L" } },
+    { INDEX_op_qemu_st_i32, { "L", "L" } },
+    { INDEX_op_qemu_ld_i64, { "r", "L" } },
+    { INDEX_op_qemu_st_i64, { "L", "L" } },
 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
-    { INDEX_op_qemu_ld8u, { "r", "L" } },
-    { INDEX_op_qemu_ld8s, { "r", "L" } },
-    { INDEX_op_qemu_ld16u, { "r", "L" } },
-    { INDEX_op_qemu_ld16s, { "r", "L" } },
-    { INDEX_op_qemu_ld32, { "r", "L" } },
-    { INDEX_op_qemu_ld64, { "r", "r", "L" } },
-
-    { INDEX_op_qemu_st8, { "cb", "L" } },
-    { INDEX_op_qemu_st16, { "L", "L" } },
-    { INDEX_op_qemu_st32, { "L", "L" } },
-    { INDEX_op_qemu_st64, { "L", "L", "L" } },
+    { INDEX_op_qemu_ld_i32, { "r", "L" } },
+    { INDEX_op_qemu_st_i32, { "L", "L" } },
+    { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
+    { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
 #else
-    { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
-
-    { INDEX_op_qemu_st8, { "cb", "L", "L" } },
-    { INDEX_op_qemu_st16, { "L", "L", "L" } },
-    { INDEX_op_qemu_st32, { "L", "L", "L" } },
-    { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
+    { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
+    { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
+    { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
+    { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
 #endif
     { -1 },
 };
@@ -1901,60 +2081,96 @@
 #if TCG_TARGET_REG_BITS == 64
     TCG_REG_RBP,
     TCG_REG_RBX,
+#if defined(_WIN64)
+    TCG_REG_RDI,
+    TCG_REG_RSI,
+#endif
     TCG_REG_R12,
     TCG_REG_R13,
-    /* TCG_REG_R14, */ /* Currently used for the global env. */
+    TCG_REG_R14, /* Currently used for the global env. */
     TCG_REG_R15,
 #else
-    /* TCG_REG_EBP, */ /* Currently used for the global env. */
+    TCG_REG_EBP, /* Currently used for the global env. */
     TCG_REG_EBX,
     TCG_REG_ESI,
     TCG_REG_EDI,
 #endif
 };
 
+/* Compute frame size via macros, to share between tcg_target_qemu_prologue
+   and tcg_register_jit.  */
+
+#define PUSH_SIZE \
+    ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
+     * (TCG_TARGET_REG_BITS / 8))
+
+#define FRAME_SIZE \
+    ((PUSH_SIZE \
+      + TCG_STATIC_CALL_ARGS_SIZE \
+      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
+      + TCG_TARGET_STACK_ALIGN - 1) \
+     & ~(TCG_TARGET_STACK_ALIGN - 1))
+
 /* Generate global QEMU prologue and epilogue code */
 static void tcg_target_qemu_prologue(TCGContext *s)
 {
-    int i, frame_size, push_size, stack_addend;
+    int i, stack_addend;
 
     /* TB prologue */
 
+    /* Reserve some stack space, also for TCG temps.  */
+    stack_addend = FRAME_SIZE - PUSH_SIZE;
+    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
+
     /* Save all callee saved registers.  */
     for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
         tcg_out_push(s, tcg_target_callee_save_regs[i]);
     }
 
-    /* Reserve some stack space.  */
-    push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
-    push_size *= TCG_TARGET_REG_BITS / 8;
-
-    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
-    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
-        ~(TCG_TARGET_STACK_ALIGN - 1);
-    stack_addend = frame_size - push_size;
+#if TCG_TARGET_REG_BITS == 32
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
+               (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
-
     /* jmp *tb.  */
-    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
+    tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
+		         (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
+			 + stack_addend);
+#else
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+    /* jmp *tb.  */
+    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
+#endif
 
     /* TB epilogue */
     tb_ret_addr = s->code_ptr;
 
-    tcg_out_addi(s, TCG_REG_ESP, stack_addend);
+    tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
 
     for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
         tcg_out_pop(s, tcg_target_callee_save_regs[i]);
     }
     tcg_out_opc(s, OPC_RET, 0, 0, 0);
+
+#if !defined(CONFIG_SOFTMMU)
+    /* Try to set up a segment register to point to GUEST_BASE.  */
+    if (GUEST_BASE) {
+        setup_guest_base_seg();
+    }
+#endif
 }
 
 static void tcg_target_init(TCGContext *s)
 {
-#if !defined(CONFIG_USER_ONLY)
-    /* fail safe */
-    if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
-        tcg_abort();
+    /* For 32-bit, 99% certainty that we're running on hardware that supports
+       cmov, but we still need to check.  In case cmov is not available, we'll
+       use a small forward branch.  */
+#ifndef have_cmov
+    {
+        unsigned a, b, c, d;
+        have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
+    }
 #endif
 
     if (TCG_TARGET_REG_BITS == 64) {
@@ -1969,8 +2185,10 @@
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
     if (TCG_TARGET_REG_BITS == 64) {
+#if !defined(_WIN64)
         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
+#endif
         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
@@ -1978,7 +2196,87 @@
     }
 
     tcg_regset_clear(s->reserved_regs);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
 
     tcg_add_target_add_op_defs(x86_op_defs);
 }
+
+typedef struct {
+    DebugFrameCIE cie;
+    DebugFrameFDEHeader fde;
+    uint8_t fde_def_cfa[4];
+    uint8_t fde_reg_ofs[14];
+} DebugFrame;
+
+/* We're expecting a 2 byte uleb128 encoded value.  */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+#if !defined(__ELF__)
+    /* Host machine without ELF. */
+#elif TCG_TARGET_REG_BITS == 64
+#define ELF_HOST_MACHINE EM_X86_64
+static DebugFrame debug_frame = {
+    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+    .cie.id = -1,
+    .cie.version = 1,
+    .cie.code_align = 1,
+    .cie.data_align = 0x78,             /* sleb128 -8 */
+    .cie.return_column = 16,
+
+    /* Total FDE size does not include the "len" member.  */
+    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
+
+    .fde_def_cfa = {
+        12, 7,                          /* DW_CFA_def_cfa %rsp, ... */
+        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
+        (FRAME_SIZE >> 7)
+    },
+    .fde_reg_ofs = {
+        0x90, 1,                        /* DW_CFA_offset, %rip, -8 */
+        /* The following ordering must match tcg_target_callee_save_regs.  */
+        0x86, 2,                        /* DW_CFA_offset, %rbp, -16 */
+        0x83, 3,                        /* DW_CFA_offset, %rbx, -24 */
+        0x8c, 4,                        /* DW_CFA_offset, %r12, -32 */
+        0x8d, 5,                        /* DW_CFA_offset, %r13, -40 */
+        0x8e, 6,                        /* DW_CFA_offset, %r14, -48 */
+        0x8f, 7,                        /* DW_CFA_offset, %r15, -56 */
+    }
+};
+#else
+#define ELF_HOST_MACHINE EM_386
+static DebugFrame debug_frame = {
+    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+    .cie.id = -1,
+    .cie.version = 1,
+    .cie.code_align = 1,
+    .cie.data_align = 0x7c,             /* sleb128 -4 */
+    .cie.return_column = 8,
+
+    /* Total FDE size does not include the "len" member.  */
+    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
+
+    .fde_def_cfa = {
+        12, 4,                          /* DW_CFA_def_cfa %esp, ... */
+        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
+        (FRAME_SIZE >> 7)
+    },
+    .fde_reg_ofs = {
+        0x88, 1,                        /* DW_CFA_offset, %eip, -4 */
+        /* The following ordering must match tcg_target_callee_save_regs.  */
+        0x85, 2,                        /* DW_CFA_offset, %ebp, -8 */
+        0x83, 3,                        /* DW_CFA_offset, %ebx, -12 */
+        0x86, 4,                        /* DW_CFA_offset, %esi, -16 */
+        0x87, 5,                        /* DW_CFA_offset, %edi, -20 */
+    }
+};
+#endif
+
+#if defined(ELF_HOST_MACHINE)
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+    debug_frame.fde.func_start = (uintptr_t)buf;
+    debug_frame.fde.func_len = buf_size;
+
+    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
+#endif
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index ec85c51..92c0fcd 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -21,22 +21,20 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
+#ifndef TCG_TARGET_I386 
 #define TCG_TARGET_I386 1
 
-#if defined(__x86_64__)
-# define TCG_TARGET_REG_BITS 64
-#else
-#define TCG_TARGET_REG_BITS 32
-#endif
-//#define TCG_TARGET_WORDS_BIGENDIAN
+#undef TCG_TARGET_WORDS_BIGENDIAN
 
-#if TCG_TARGET_REG_BITS == 64
-# define TCG_TARGET_NB_REGS 16
+#ifdef __x86_64__
+# define TCG_TARGET_REG_BITS  64
+# define TCG_TARGET_NB_REGS   16
 #else
-#define TCG_TARGET_NB_REGS 8
+# define TCG_TARGET_REG_BITS  32
+# define TCG_TARGET_NB_REGS    8
 #endif
 
-enum {
+typedef enum {
     TCG_REG_EAX = 0,
     TCG_REG_ECX,
     TCG_REG_EDX,
@@ -64,67 +62,89 @@
     TCG_REG_RBP = TCG_REG_EBP,
     TCG_REG_RSI = TCG_REG_ESI,
     TCG_REG_RDI = TCG_REG_EDI,
-};
+} TCGReg;
 
 #define TCG_CT_CONST_S32 0x100
 #define TCG_CT_CONST_U32 0x200
 
 /* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_ESP
+#define TCG_REG_CALL_STACK TCG_REG_ESP 
 #define TCG_TARGET_STACK_ALIGN 16
+#if defined(_WIN64)
+#define TCG_TARGET_CALL_STACK_OFFSET 32
+#else
 #define TCG_TARGET_CALL_STACK_OFFSET 0
+#endif
 
 /* optional instructions */
-#define TCG_TARGET_HAS_div2_i32
-#define TCG_TARGET_HAS_rot_i32
-#define TCG_TARGET_HAS_ext8s_i32
-#define TCG_TARGET_HAS_ext16s_i32
-#define TCG_TARGET_HAS_ext8u_i32
-#define TCG_TARGET_HAS_ext16u_i32
-#define TCG_TARGET_HAS_bswap16_i32
-#define TCG_TARGET_HAS_bswap32_i32
-#define TCG_TARGET_HAS_neg_i32
-#define TCG_TARGET_HAS_not_i32
-// #define TCG_TARGET_HAS_andc_i32
-// #define TCG_TARGET_HAS_orc_i32
-// #define TCG_TARGET_HAS_eqv_i32
-// #define TCG_TARGET_HAS_nand_i32
-// #define TCG_TARGET_HAS_nor_i32
+#define TCG_TARGET_HAS_div2_i32         1
+#define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_ext8s_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
+#define TCG_TARGET_HAS_ext8u_i32        1
+#define TCG_TARGET_HAS_ext16u_i32       1
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_neg_i32          1
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_andc_i32         0
+#define TCG_TARGET_HAS_orc_i32          0
+#define TCG_TARGET_HAS_eqv_i32          0
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_nor_i32          0
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        1
+#define TCG_TARGET_HAS_muls2_i32        1
+#define TCG_TARGET_HAS_muluh_i32        0
+#define TCG_TARGET_HAS_mulsh_i32        0
 
 #if TCG_TARGET_REG_BITS == 64
-#define TCG_TARGET_HAS_div2_i64
-#define TCG_TARGET_HAS_rot_i64
-#define TCG_TARGET_HAS_ext8s_i64
-#define TCG_TARGET_HAS_ext16s_i64
-#define TCG_TARGET_HAS_ext32s_i64
-#define TCG_TARGET_HAS_ext8u_i64
-#define TCG_TARGET_HAS_ext16u_i64
-#define TCG_TARGET_HAS_ext32u_i64
-#define TCG_TARGET_HAS_bswap16_i64
-#define TCG_TARGET_HAS_bswap32_i64
-#define TCG_TARGET_HAS_bswap64_i64
-#define TCG_TARGET_HAS_neg_i64
-#define TCG_TARGET_HAS_not_i64
-// #define TCG_TARGET_HAS_andc_i64
-// #define TCG_TARGET_HAS_orc_i64
-// #define TCG_TARGET_HAS_eqv_i64
-// #define TCG_TARGET_HAS_nand_i64
-// #define TCG_TARGET_HAS_nor_i64
+#define TCG_TARGET_HAS_div2_i64         1
+#define TCG_TARGET_HAS_rot_i64          1
+#define TCG_TARGET_HAS_ext8s_i64        1
+#define TCG_TARGET_HAS_ext16s_i64       1
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext8u_i64        1
+#define TCG_TARGET_HAS_ext16u_i64       1
+#define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_bswap16_i64      1
+#define TCG_TARGET_HAS_bswap32_i64      1
+#define TCG_TARGET_HAS_bswap64_i64      1
+#define TCG_TARGET_HAS_neg_i64          1
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_andc_i64         0
+#define TCG_TARGET_HAS_orc_i64          0
+#define TCG_TARGET_HAS_eqv_i64          0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          0
+#define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_movcond_i64      1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        1
+#define TCG_TARGET_HAS_muls2_i64        1
+#define TCG_TARGET_HAS_muluh_i64        0
+#define TCG_TARGET_HAS_mulsh_i64        0
 #endif
 
-#define TCG_TARGET_HAS_GUEST_BASE
+#define TCG_TARGET_HAS_new_ldst         1
 
-/* Note: must be synced with dyngen-exec.h */
+#define TCG_TARGET_deposit_i32_valid(ofs, len) \
+    (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
+     ((ofs) == 0 && (len) == 16))
+#define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
+
 #if TCG_TARGET_REG_BITS == 64
-#define TCG_AREG0 TCG_REG_R14
-#define TCG_AREG1 TCG_REG_R15
-#define TCG_AREG2 TCG_REG_R12
+# define TCG_AREG0 TCG_REG_R14
 #else
-#define TCG_AREG0 TCG_REG_EBP
-#define TCG_AREG1 TCG_REG_EBX
-#define TCG_AREG2 TCG_REG_ESI
+# define TCG_AREG0 TCG_REG_EBP
 #endif
 
-static inline void flush_icache_range(unsigned long start, unsigned long stop)
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
 }
+
+#endif
diff --git a/tcg/optimize.c b/tcg/optimize.c
new file mode 100644
index 0000000..ef83229
--- /dev/null
+++ b/tcg/optimize.c
@@ -0,0 +1,1165 @@
+/*
+ * Optimizations for Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2010 Samsung Electronics.
+ * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "qemu-common.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "tcg-op.h"
+
+#define CASE_OP_32_64(x)                        \
+        glue(glue(case INDEX_op_, x), _i32):    \
+        glue(glue(case INDEX_op_, x), _i64)
+
+typedef enum {
+    TCG_TEMP_UNDEF = 0,
+    TCG_TEMP_CONST,
+    TCG_TEMP_COPY,
+} tcg_temp_state;
+
+struct tcg_temp_info {
+    tcg_temp_state state;
+    uint16_t prev_copy;
+    uint16_t next_copy;
+    tcg_target_ulong val;
+    tcg_target_ulong mask;
+};
+
+static struct tcg_temp_info temps[TCG_MAX_TEMPS];
+
+/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove
+   the copy flag from the left temp.  */
+static void reset_temp(TCGArg temp)
+{
+    if (temps[temp].state == TCG_TEMP_COPY) {
+        if (temps[temp].prev_copy == temps[temp].next_copy) {
+            temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
+        } else {
+            temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
+            temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
+        }
+    }
+    temps[temp].state = TCG_TEMP_UNDEF;
+    temps[temp].mask = -1;
+}
+
+/* Reset all temporaries, given that there are NB_TEMPS of them.  */
+static void reset_all_temps(int nb_temps)
+{
+    int i;
+    for (i = 0; i < nb_temps; i++) {
+        temps[i].state = TCG_TEMP_UNDEF;
+        temps[i].mask = -1;
+    }
+}
+
+static int op_bits(TCGOpcode op)
+{
+    const TCGOpDef *def = &tcg_op_defs[op];
+    return def->flags & TCG_OPF_64BIT ? 64 : 32;
+}
+
+static TCGOpcode op_to_movi(TCGOpcode op)
+{
+    switch (op_bits(op)) {
+    case 32:
+        return INDEX_op_movi_i32;
+    case 64:
+        return INDEX_op_movi_i64;
+    default:
+        fprintf(stderr, "op_to_movi: unexpected return value of "
+                "function op_bits.\n");
+        tcg_abort();
+    }
+}
+
+static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
+{
+    TCGArg i;
+
+    /* If this is already a global, we can't do better. */
+    if (temp < s->nb_globals) {
+        return temp;
+    }
+
+    /* Search for a global first. */
+    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+        if (i < s->nb_globals) {
+            return i;
+        }
+    }
+
+    /* If it is a temp, search for a temp local. */
+    if (!s->temps[temp].temp_local) {
+        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+            if (s->temps[i].temp_local) {
+                return i;
+            }
+        }
+    }
+
+    /* Failure to find a better representation, return the same temp. */
+    return temp;
+}
+
+static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
+{
+    TCGArg i;
+
+    if (arg1 == arg2) {
+        return true;
+    }
+
+    if (temps[arg1].state != TCG_TEMP_COPY
+        || temps[arg2].state != TCG_TEMP_COPY) {
+        return false;
+    }
+
+    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
+        if (i == arg2) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
+                            TCGArg dst, TCGArg src)
+{
+    reset_temp(dst);
+    temps[dst].mask = temps[src].mask;
+    assert(temps[src].state != TCG_TEMP_CONST);
+
+    if (s->temps[src].type == s->temps[dst].type) {
+        if (temps[src].state != TCG_TEMP_COPY) {
+            temps[src].state = TCG_TEMP_COPY;
+            temps[src].next_copy = src;
+            temps[src].prev_copy = src;
+        }
+        temps[dst].state = TCG_TEMP_COPY;
+        temps[dst].next_copy = temps[src].next_copy;
+        temps[dst].prev_copy = src;
+        temps[temps[dst].next_copy].prev_copy = dst;
+        temps[src].next_copy = dst;
+    }
+
+    gen_args[0] = dst;
+    gen_args[1] = src;
+}
+
+static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
+{
+    reset_temp(dst);
+    temps[dst].state = TCG_TEMP_CONST;
+    temps[dst].val = val;
+    temps[dst].mask = val;
+    gen_args[0] = dst;
+    gen_args[1] = val;
+}
+
+static TCGOpcode op_to_mov(TCGOpcode op)
+{
+    switch (op_bits(op)) {
+    case 32:
+        return INDEX_op_mov_i32;
+    case 64:
+        return INDEX_op_mov_i64;
+    default:
+        fprintf(stderr, "op_to_mov: unexpected return value of "
+                "function op_bits.\n");
+        tcg_abort();
+    }
+}
+
+static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
+{
+    uint64_t l64, h64;
+
+    switch (op) {
+    CASE_OP_32_64(add):
+        return x + y;
+
+    CASE_OP_32_64(sub):
+        return x - y;
+
+    CASE_OP_32_64(mul):
+        return x * y;
+
+    CASE_OP_32_64(and):
+        return x & y;
+
+    CASE_OP_32_64(or):
+        return x | y;
+
+    CASE_OP_32_64(xor):
+        return x ^ y;
+
+    case INDEX_op_shl_i32:
+        return (uint32_t)x << (uint32_t)y;
+
+    case INDEX_op_shl_i64:
+        return (uint64_t)x << (uint64_t)y;
+
+    case INDEX_op_shr_i32:
+        return (uint32_t)x >> (uint32_t)y;
+
+    case INDEX_op_shr_i64:
+        return (uint64_t)x >> (uint64_t)y;
+
+    case INDEX_op_sar_i32:
+        return (int32_t)x >> (int32_t)y;
+
+    case INDEX_op_sar_i64:
+        return (int64_t)x >> (int64_t)y;
+
+    case INDEX_op_rotr_i32:
+        return ror32(x, y);
+
+    case INDEX_op_rotr_i64:
+        return ror64(x, y);
+
+    case INDEX_op_rotl_i32:
+        return rol32(x, y);
+
+    case INDEX_op_rotl_i64:
+        return rol64(x, y);
+
+    CASE_OP_32_64(not):
+        return ~x;
+
+    CASE_OP_32_64(neg):
+        return -x;
+
+    CASE_OP_32_64(andc):
+        return x & ~y;
+
+    CASE_OP_32_64(orc):
+        return x | ~y;
+
+    CASE_OP_32_64(eqv):
+        return ~(x ^ y);
+
+    CASE_OP_32_64(nand):
+        return ~(x & y);
+
+    CASE_OP_32_64(nor):
+        return ~(x | y);
+
+    CASE_OP_32_64(ext8s):
+        return (int8_t)x;
+
+    CASE_OP_32_64(ext16s):
+        return (int16_t)x;
+
+    CASE_OP_32_64(ext8u):
+        return (uint8_t)x;
+
+    CASE_OP_32_64(ext16u):
+        return (uint16_t)x;
+
+    case INDEX_op_ext32s_i64:
+        return (int32_t)x;
+
+    case INDEX_op_ext32u_i64:
+        return (uint32_t)x;
+
+    case INDEX_op_muluh_i32:
+        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
+    case INDEX_op_mulsh_i32:
+        return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
+
+    case INDEX_op_muluh_i64:
+        mulu64(&l64, &h64, x, y);
+        return h64;
+    case INDEX_op_mulsh_i64:
+        muls64(&l64, &h64, x, y);
+        return h64;
+
+    case INDEX_op_div_i32:
+        /* Avoid crashing on divide by zero, otherwise undefined.  */
+        return (int32_t)x / ((int32_t)y ? : 1);
+    case INDEX_op_divu_i32:
+        return (uint32_t)x / ((uint32_t)y ? : 1);
+    case INDEX_op_div_i64:
+        return (int64_t)x / ((int64_t)y ? : 1);
+    case INDEX_op_divu_i64:
+        return (uint64_t)x / ((uint64_t)y ? : 1);
+
+    case INDEX_op_rem_i32:
+        return (int32_t)x % ((int32_t)y ? : 1);
+    case INDEX_op_remu_i32:
+        return (uint32_t)x % ((uint32_t)y ? : 1);
+    case INDEX_op_rem_i64:
+        return (int64_t)x % ((int64_t)y ? : 1);
+    case INDEX_op_remu_i64:
+        return (uint64_t)x % ((uint64_t)y ? : 1);
+
+    default:
+        fprintf(stderr,
+                "Unrecognized operation %d in do_constant_folding.\n", op);
+        tcg_abort();
+    }
+}
+
+static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
+{
+    TCGArg res = do_constant_folding_2(op, x, y);
+    if (op_bits(op) == 32) {
+        res &= 0xffffffff;
+    }
+    return res;
+}
+
+static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_EQ:
+        return x == y;
+    case TCG_COND_NE:
+        return x != y;
+    case TCG_COND_LT:
+        return (int32_t)x < (int32_t)y;
+    case TCG_COND_GE:
+        return (int32_t)x >= (int32_t)y;
+    case TCG_COND_LE:
+        return (int32_t)x <= (int32_t)y;
+    case TCG_COND_GT:
+        return (int32_t)x > (int32_t)y;
+    case TCG_COND_LTU:
+        return x < y;
+    case TCG_COND_GEU:
+        return x >= y;
+    case TCG_COND_LEU:
+        return x <= y;
+    case TCG_COND_GTU:
+        return x > y;
+    default:
+        tcg_abort();
+    }
+}
+
+static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_EQ:
+        return x == y;
+    case TCG_COND_NE:
+        return x != y;
+    case TCG_COND_LT:
+        return (int64_t)x < (int64_t)y;
+    case TCG_COND_GE:
+        return (int64_t)x >= (int64_t)y;
+    case TCG_COND_LE:
+        return (int64_t)x <= (int64_t)y;
+    case TCG_COND_GT:
+        return (int64_t)x > (int64_t)y;
+    case TCG_COND_LTU:
+        return x < y;
+    case TCG_COND_GEU:
+        return x >= y;
+    case TCG_COND_LEU:
+        return x <= y;
+    case TCG_COND_GTU:
+        return x > y;
+    default:
+        tcg_abort();
+    }
+}
+
+static bool do_constant_folding_cond_eq(TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_GT:
+    case TCG_COND_LTU:
+    case TCG_COND_LT:
+    case TCG_COND_GTU:
+    case TCG_COND_NE:
+        return 0;
+    case TCG_COND_GE:
+    case TCG_COND_GEU:
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+    case TCG_COND_EQ:
+        return 1;
+    default:
+        tcg_abort();
+    }
+}
+
+/* Return 2 if the condition can't be simplified, and the result
+   of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
+                                       TCGArg y, TCGCond c)
+{
+    if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
+        switch (op_bits(op)) {
+        case 32:
+            return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
+        case 64:
+            return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
+        default:
+            tcg_abort();
+        }
+    } else if (temps_are_copies(x, y)) {
+        return do_constant_folding_cond_eq(c);
+    } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
+        switch (c) {
+        case TCG_COND_LTU:
+            return 0;
+        case TCG_COND_GEU:
+            return 1;
+        default:
+            return 2;
+        }
+    } else {
+        return 2;
+    }
+}
+
+/* Return 2 if the condition can't be simplified, and the result
+   of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+{
+    TCGArg al = p1[0], ah = p1[1];
+    TCGArg bl = p2[0], bh = p2[1];
+
+    if (temps[bl].state == TCG_TEMP_CONST
+        && temps[bh].state == TCG_TEMP_CONST) {
+        uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
+
+        if (temps[al].state == TCG_TEMP_CONST
+            && temps[ah].state == TCG_TEMP_CONST) {
+            uint64_t a;
+            a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
+            return do_constant_folding_cond_64(a, b, c);
+        }
+        if (b == 0) {
+            switch (c) {
+            case TCG_COND_LTU:
+                return 0;
+            case TCG_COND_GEU:
+                return 1;
+            default:
+                break;
+            }
+        }
+    }
+    if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
+        return do_constant_folding_cond_eq(c);
+    }
+    return 2;
+}
+
+static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
+{
+    TCGArg a1 = *p1, a2 = *p2;
+    int sum = 0;
+    sum += temps[a1].state == TCG_TEMP_CONST;
+    sum -= temps[a2].state == TCG_TEMP_CONST;
+
+    /* Prefer the constant in second argument, and then the form
+       op a, a, b, which is better handled on non-RISC hosts. */
+    if (sum > 0 || (sum == 0 && dest == a2)) {
+        *p1 = a2;
+        *p2 = a1;
+        return true;
+    }
+    return false;
+}
+
+static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
+{
+    int sum = 0;
+    sum += temps[p1[0]].state == TCG_TEMP_CONST;
+    sum += temps[p1[1]].state == TCG_TEMP_CONST;
+    sum -= temps[p2[0]].state == TCG_TEMP_CONST;
+    sum -= temps[p2[1]].state == TCG_TEMP_CONST;
+    if (sum > 0) {
+        TCGArg t;
+        t = p1[0], p1[0] = p2[0], p2[0] = t;
+        t = p1[1], p1[1] = p2[1], p2[1] = t;
+        return true;
+    }
+    return false;
+}
+
+/* Propagate constants and copies, fold constant expressions. */
+static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
+                                    TCGArg *args, TCGOpDef *tcg_op_defs)
+{
+    int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
+    tcg_target_ulong mask, affected;
+    TCGOpcode op;
+    const TCGOpDef *def;
+    TCGArg *gen_args;
+    TCGArg tmp;
+
+    /* Array VALS has an element for each temp.
+       If this temp holds a constant then its value is kept in VALS' element.
+       If this temp is a copy of other ones then the other copies are
+       available through the doubly linked circular list. */
+
+    nb_temps = s->nb_temps;
+    nb_globals = s->nb_globals;
+    reset_all_temps(nb_temps);
+
+    nb_ops = tcg_opc_ptr - s->gen_opc_buf;
+    gen_args = args;
+    for (op_index = 0; op_index < nb_ops; op_index++) {
+        op = s->gen_opc_buf[op_index];
+        def = &tcg_op_defs[op];
+        /* Do copy propagation */
+        if (op == INDEX_op_call) {
+            int nb_oargs = args[0] >> 16;
+            int nb_iargs = args[0] & 0xffff;
+            for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
+                if (temps[args[i]].state == TCG_TEMP_COPY) {
+                    args[i] = find_better_copy(s, args[i]);
+                }
+            }
+        } else {
+            for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
+                if (temps[args[i]].state == TCG_TEMP_COPY) {
+                    args[i] = find_better_copy(s, args[i]);
+                }
+            }
+        }
+
+        /* For commutative operations make constant second argument */
+        switch (op) {
+        CASE_OP_32_64(add):
+        CASE_OP_32_64(mul):
+        CASE_OP_32_64(and):
+        CASE_OP_32_64(or):
+        CASE_OP_32_64(xor):
+        CASE_OP_32_64(eqv):
+        CASE_OP_32_64(nand):
+        CASE_OP_32_64(nor):
+        CASE_OP_32_64(muluh):
+        CASE_OP_32_64(mulsh):
+            swap_commutative(args[0], &args[1], &args[2]);
+            break;
+        CASE_OP_32_64(brcond):
+            if (swap_commutative(-1, &args[0], &args[1])) {
+                args[2] = tcg_swap_cond(args[2]);
+            }
+            break;
+        CASE_OP_32_64(setcond):
+            if (swap_commutative(args[0], &args[1], &args[2])) {
+                args[3] = tcg_swap_cond(args[3]);
+            }
+            break;
+        CASE_OP_32_64(movcond):
+            if (swap_commutative(-1, &args[1], &args[2])) {
+                args[5] = tcg_swap_cond(args[5]);
+            }
+            /* For movcond, we canonicalize the "false" input reg to match
+               the destination reg so that the tcg backend can implement
+               a "move if true" operation.  */
+            if (swap_commutative(args[0], &args[4], &args[3])) {
+                args[5] = tcg_invert_cond(args[5]);
+            }
+            break;
+        CASE_OP_32_64(add2):
+            swap_commutative(args[0], &args[2], &args[4]);
+            swap_commutative(args[1], &args[3], &args[5]);
+            break;
+        CASE_OP_32_64(mulu2):
+        CASE_OP_32_64(muls2):
+            swap_commutative(args[0], &args[2], &args[3]);
+            break;
+        case INDEX_op_brcond2_i32:
+            if (swap_commutative2(&args[0], &args[2])) {
+                args[4] = tcg_swap_cond(args[4]);
+            }
+            break;
+        case INDEX_op_setcond2_i32:
+            if (swap_commutative2(&args[1], &args[3])) {
+                args[5] = tcg_swap_cond(args[5]);
+            }
+            break;
+        default:
+            break;
+        }
+
+        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
+           and "sub r, 0, a => neg r, a" case.  */
+        switch (op) {
+        CASE_OP_32_64(shl):
+        CASE_OP_32_64(shr):
+        CASE_OP_32_64(sar):
+        CASE_OP_32_64(rotl):
+        CASE_OP_32_64(rotr):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[1]].val == 0) {
+                s->gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
+                args += 3;
+                gen_args += 2;
+                continue;
+            }
+            break;
+        CASE_OP_32_64(sub):
+            {
+                TCGOpcode neg_op;
+                bool have_neg;
+
+                if (temps[args[2]].state == TCG_TEMP_CONST) {
+                    /* Proceed with possible constant folding. */
+                    break;
+                }
+                if (op == INDEX_op_sub_i32) {
+                    neg_op = INDEX_op_neg_i32;
+                    have_neg = TCG_TARGET_HAS_neg_i32;
+                } else {
+                    neg_op = INDEX_op_neg_i64;
+                    have_neg = TCG_TARGET_HAS_neg_i64;
+                }
+                if (!have_neg) {
+                    break;
+                }
+                if (temps[args[1]].state == TCG_TEMP_CONST
+                    && temps[args[1]].val == 0) {
+                    s->gen_opc_buf[op_index] = neg_op;
+                    reset_temp(args[0]);
+                    gen_args[0] = args[0];
+                    gen_args[1] = args[2];
+                    args += 3;
+                    gen_args += 2;
+                    continue;
+                }
+            }
+            break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, 0 => mov r, a" cases */
+        switch (op) {
+        CASE_OP_32_64(add):
+        CASE_OP_32_64(sub):
+        CASE_OP_32_64(shl):
+        CASE_OP_32_64(shr):
+        CASE_OP_32_64(sar):
+        CASE_OP_32_64(rotl):
+        CASE_OP_32_64(rotr):
+        CASE_OP_32_64(or):
+        CASE_OP_32_64(xor):
+            if (temps[args[1]].state == TCG_TEMP_CONST) {
+                /* Proceed with possible constant folding. */
+                break;
+            }
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[2]].val == 0) {
+                if (temps_are_copies(args[0], args[1])) {
+                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+                } else {
+                    s->gen_opc_buf[op_index] = op_to_mov(op);
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
+                    gen_args += 2;
+                }
+                args += 3;
+                continue;
+            }
+            break;
+        default:
+            break;
+        }
+
+        /* Simplify using known-zero bits */
+        mask = -1;
+        affected = -1;
+        switch (op) {
+        CASE_OP_32_64(ext8s):
+            if ((temps[args[1]].mask & 0x80) != 0) {
+                break;
+            }
+        CASE_OP_32_64(ext8u):
+            mask = 0xff;
+            goto and_const;
+        CASE_OP_32_64(ext16s):
+            if ((temps[args[1]].mask & 0x8000) != 0) {
+                break;
+            }
+        CASE_OP_32_64(ext16u):
+            mask = 0xffff;
+            goto and_const;
+        case INDEX_op_ext32s_i64:
+            if ((temps[args[1]].mask & 0x80000000) != 0) {
+                break;
+            }
+        case INDEX_op_ext32u_i64:
+            mask = 0xffffffffU;
+            goto and_const;
+
+        CASE_OP_32_64(and):
+            mask = temps[args[2]].mask;
+            if (temps[args[2]].state == TCG_TEMP_CONST) {
+        and_const:
+                affected = temps[args[1]].mask & ~mask;
+            }
+            mask = temps[args[1]].mask & mask;
+            break;
+
+        CASE_OP_32_64(sar):
+            if (temps[args[2]].state == TCG_TEMP_CONST) {
+                mask = ((tcg_target_long)temps[args[1]].mask
+                        >> temps[args[2]].val);
+            }
+            break;
+
+        CASE_OP_32_64(shr):
+            if (temps[args[2]].state == TCG_TEMP_CONST) {
+                mask = temps[args[1]].mask >> temps[args[2]].val;
+            }
+            break;
+
+        CASE_OP_32_64(shl):
+            if (temps[args[2]].state == TCG_TEMP_CONST) {
+                mask = temps[args[1]].mask << temps[args[2]].val;
+            }
+            break;
+
+        CASE_OP_32_64(neg):
+            /* Set to 1 all bits to the left of the rightmost.  */
+            mask = -(temps[args[1]].mask & -temps[args[1]].mask);
+            break;
+
+        CASE_OP_32_64(deposit):
+            tmp = ((1ull << args[4]) - 1);
+            mask = ((temps[args[1]].mask & ~(tmp << args[3]))
+                    | ((temps[args[2]].mask & tmp) << args[3]));
+            break;
+
+        CASE_OP_32_64(or):
+        CASE_OP_32_64(xor):
+            mask = temps[args[1]].mask | temps[args[2]].mask;
+            break;
+
+        CASE_OP_32_64(setcond):
+            mask = 1;
+            break;
+
+        CASE_OP_32_64(movcond):
+            mask = temps[args[3]].mask | temps[args[4]].mask;
+            break;
+
+        default:
+            break;
+        }
+
+        if (mask == 0) {
+            assert(def->nb_oargs == 1);
+            s->gen_opc_buf[op_index] = op_to_movi(op);
+            tcg_opt_gen_movi(gen_args, args[0], 0);
+            args += def->nb_oargs + def->nb_iargs + def->nb_cargs;
+            gen_args += 2;
+            continue;
+        }
+        if (affected == 0) {
+            assert(def->nb_oargs == 1);
+            if (temps_are_copies(args[0], args[1])) {
+                s->gen_opc_buf[op_index] = INDEX_op_nop;
+            } else if (temps[args[1]].state != TCG_TEMP_CONST) {
+                s->gen_opc_buf[op_index] = op_to_mov(op);
+                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
+                gen_args += 2;
+            } else {
+                s->gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val);
+                gen_args += 2;
+            }
+            args += def->nb_iargs + 1;
+            continue;
+        }
+
+        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
+        switch (op) {
+        CASE_OP_32_64(and):
+        CASE_OP_32_64(mul):
+        CASE_OP_32_64(muluh):
+        CASE_OP_32_64(mulsh):
+            if ((temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[2]].val == 0)) {
+                s->gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
+                args += 3;
+                gen_args += 2;
+                continue;
+            }
+            break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, a => mov r, a" cases */
+        switch (op) {
+        CASE_OP_32_64(or):
+        CASE_OP_32_64(and):
+            if (temps_are_copies(args[1], args[2])) {
+                if (temps_are_copies(args[0], args[1])) {
+                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+                } else {
+                    s->gen_opc_buf[op_index] = op_to_mov(op);
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
+                    gen_args += 2;
+                }
+                args += 3;
+                continue;
+            }
+            break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, a => movi r, 0" cases */
+        switch (op) {
+        CASE_OP_32_64(sub):
+        CASE_OP_32_64(xor):
+            if (temps_are_copies(args[1], args[2])) {
+                s->gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
+                gen_args += 2;
+                args += 3;
+                continue;
+            }
+            break;
+        default:
+            break;
+        }
+
+        /* Propagate constants through copy operations and do constant
+           folding.  Constants will be substituted to arguments by register
+           allocator where needed and possible.  Also detect copies. */
+        switch (op) {
+        CASE_OP_32_64(mov):
+            if (temps_are_copies(args[0], args[1])) {
+                args += 2;
+                s->gen_opc_buf[op_index] = INDEX_op_nop;
+                break;
+            }
+            if (temps[args[1]].state != TCG_TEMP_CONST) {
+                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
+                gen_args += 2;
+                args += 2;
+                break;
+            }
+            /* Source argument is constant.  Rewrite the operation and
+               let movi case handle it. */
+            op = op_to_movi(op);
+            s->gen_opc_buf[op_index] = op;
+            args[1] = temps[args[1]].val;
+            /* fallthrough */
+        CASE_OP_32_64(movi):
+            tcg_opt_gen_movi(gen_args, args[0], args[1]);
+            gen_args += 2;
+            args += 2;
+            break;
+
+        CASE_OP_32_64(not):
+        CASE_OP_32_64(neg):
+        CASE_OP_32_64(ext8s):
+        CASE_OP_32_64(ext8u):
+        CASE_OP_32_64(ext16s):
+        CASE_OP_32_64(ext16u):
+        case INDEX_op_ext32s_i64:
+        case INDEX_op_ext32u_i64:
+            if (temps[args[1]].state == TCG_TEMP_CONST) {
+                s->gen_opc_buf[op_index] = op_to_movi(op);
+                tmp = do_constant_folding(op, temps[args[1]].val, 0);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+                args += 2;
+                break;
+            }
+            goto do_default;
+
+        CASE_OP_32_64(add):
+        CASE_OP_32_64(sub):
+        CASE_OP_32_64(mul):
+        CASE_OP_32_64(or):
+        CASE_OP_32_64(and):
+        CASE_OP_32_64(xor):
+        CASE_OP_32_64(shl):
+        CASE_OP_32_64(shr):
+        CASE_OP_32_64(sar):
+        CASE_OP_32_64(rotl):
+        CASE_OP_32_64(rotr):
+        CASE_OP_32_64(andc):
+        CASE_OP_32_64(orc):
+        CASE_OP_32_64(eqv):
+        CASE_OP_32_64(nand):
+        CASE_OP_32_64(nor):
+        CASE_OP_32_64(muluh):
+        CASE_OP_32_64(mulsh):
+        CASE_OP_32_64(div):
+        CASE_OP_32_64(divu):
+        CASE_OP_32_64(rem):
+        CASE_OP_32_64(remu):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state == TCG_TEMP_CONST) {
+                s->gen_opc_buf[op_index] = op_to_movi(op);
+                tmp = do_constant_folding(op, temps[args[1]].val,
+                                          temps[args[2]].val);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+                args += 3;
+                break;
+            }
+            goto do_default;
+
+        CASE_OP_32_64(deposit):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state == TCG_TEMP_CONST) {
+                s->gen_opc_buf[op_index] = op_to_movi(op);
+                tmp = ((1ull << args[4]) - 1);
+                tmp = (temps[args[1]].val & ~(tmp << args[3]))
+                      | ((temps[args[2]].val & tmp) << args[3]);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+                args += 5;
+                break;
+            }
+            goto do_default;
+
+        CASE_OP_32_64(setcond):
+            tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
+            if (tmp != 2) {
+                s->gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+                args += 4;
+                break;
+            }
+            goto do_default;
+
+        CASE_OP_32_64(brcond):
+            tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
+            if (tmp != 2) {
+                if (tmp) {
+                    reset_all_temps(nb_temps);
+                    s->gen_opc_buf[op_index] = INDEX_op_br;
+                    gen_args[0] = args[3];
+                    gen_args += 1;
+                } else {
+                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+                }
+                args += 4;
+                break;
+            }
+            goto do_default;
+
+        CASE_OP_32_64(movcond):
+            tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
+            if (tmp != 2) {
+                if (temps_are_copies(args[0], args[4-tmp])) {
+                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+                } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
+                    s->gen_opc_buf[op_index] = op_to_movi(op);
+                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
+                    gen_args += 2;
+                } else {
+                    s->gen_opc_buf[op_index] = op_to_mov(op);
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
+                    gen_args += 2;
+                }
+                args += 6;
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_add2_i32:
+        case INDEX_op_sub2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST
+                && temps[args[4]].state == TCG_TEMP_CONST
+                && temps[args[5]].state == TCG_TEMP_CONST) {
+                uint32_t al = temps[args[2]].val;
+                uint32_t ah = temps[args[3]].val;
+                uint32_t bl = temps[args[4]].val;
+                uint32_t bh = temps[args[5]].val;
+                uint64_t a = ((uint64_t)ah << 32) | al;
+                uint64_t b = ((uint64_t)bh << 32) | bl;
+                TCGArg rl, rh;
+
+                if (op == INDEX_op_add2_i32) {
+                    a += b;
+                } else {
+                    a -= b;
+                }
+
+                /* We emit the extra nop when we emit the add2/sub2.  */
+                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
+                gen_args += 4;
+                args += 6;
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_mulu2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST) {
+                uint32_t a = temps[args[2]].val;
+                uint32_t b = temps[args[3]].val;
+                uint64_t r = (uint64_t)a * b;
+                TCGArg rl, rh;
+
+                /* We emit the extra nop when we emit the mulu2.  */
+                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
+                gen_args += 4;
+                args += 4;
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_brcond2_i32:
+            tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
+            if (tmp != 2) {
+                if (tmp) {
+                    reset_all_temps(nb_temps);
+                    s->gen_opc_buf[op_index] = INDEX_op_br;
+                    gen_args[0] = args[5];
+                    gen_args += 1;
+                } else {
+                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+                }
+            } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
+                       && temps[args[2]].state == TCG_TEMP_CONST
+                       && temps[args[3]].state == TCG_TEMP_CONST
+                       && temps[args[2]].val == 0
+                       && temps[args[3]].val == 0) {
+                /* Simplify LT/GE comparisons vs zero to a single compare
+                   vs the high word of the input.  */
+                reset_all_temps(nb_temps);
+                s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
+                gen_args[0] = args[1];
+                gen_args[1] = args[3];
+                gen_args[2] = args[4];
+                gen_args[3] = args[5];
+                gen_args += 4;
+            } else {
+                goto do_default;
+            }
+            args += 6;
+            break;
+
+        case INDEX_op_setcond2_i32:
+            tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
+            if (tmp != 2) {
+                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+            } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
+                       && temps[args[3]].state == TCG_TEMP_CONST
+                       && temps[args[4]].state == TCG_TEMP_CONST
+                       && temps[args[3]].val == 0
+                       && temps[args[4]].val == 0) {
+                /* Simplify LT/GE comparisons vs zero to a single compare
+                   vs the high word of the input.  */
+                s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
+                reset_temp(args[0]);
+                gen_args[0] = args[0];
+                gen_args[1] = args[2];
+                gen_args[2] = args[4];
+                gen_args[3] = args[5];
+                gen_args += 4;
+            } else {
+                goto do_default;
+            }
+            args += 6;
+            break;
+
+        case INDEX_op_call:
+            nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
+            if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS |
+                                            TCG_CALL_NO_WRITE_GLOBALS))) {
+                for (i = 0; i < nb_globals; i++) {
+                    reset_temp(i);
+                }
+            }
+            for (i = 0; i < (args[0] >> 16); i++) {
+                reset_temp(args[i + 1]);
+            }
+            i = nb_call_args + 3;
+            while (i) {
+                *gen_args = *args;
+                args++;
+                gen_args++;
+                i--;
+            }
+            break;
+
+        default:
+        do_default:
+            /* Default case: we know nothing about operation (or were unable
+               to compute the operation result) so no propagation is done.
+               We trash everything if the operation is the end of a basic
+               block, otherwise we only trash the output args.  "mask" is
+               the non-zero bits mask for the first output arg.  */
+            if (def->flags & TCG_OPF_BB_END) {
+                reset_all_temps(nb_temps);
+            } else {
+                for (i = 0; i < def->nb_oargs; i++) {
+                    reset_temp(args[i]);
+                }
+            }
+            for (i = 0; i < def->nb_args; i++) {
+                gen_args[i] = args[i];
+            }
+            args += def->nb_args;
+            gen_args += def->nb_args;
+            break;
+        }
+    }
+
+    return gen_args;
+}
+
+TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
+        TCGArg *args, TCGOpDef *tcg_op_defs)
+{
+    TCGArg *res;
+    res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
+    return res;
+}
diff --git a/tcg/tcg-be-ldst.h b/tcg/tcg-be-ldst.h
new file mode 100644
index 0000000..284db0c
--- /dev/null
+++ b/tcg/tcg-be-ldst.h
@@ -0,0 +1,90 @@
+/*
+ * TCG Backend Data: load-store optimization only.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifdef CONFIG_SOFTMMU
+#define TCG_MAX_QEMU_LDST       640
+
+typedef struct TCGLabelQemuLdst {
+    int is_ld:1;            /* qemu_ld: 1, qemu_st: 0 */
+    TCGMemOp opc:4;
+    TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
+    TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
+    TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
+    TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
+    int mem_index;          /* soft MMU memory index */
+    uint8_t *raddr;         /* gen code addr of the next IR of qemu_ld/st IR */
+    uint8_t *label_ptr[2];  /* label pointers to be updated */
+} TCGLabelQemuLdst;
+
+typedef struct TCGBackendData {
+    int nb_ldst_labels;
+    TCGLabelQemuLdst ldst_labels[TCG_MAX_QEMU_LDST];
+} TCGBackendData;
+
+
+/*
+ * Initialize TB backend data at the beginning of the TB.
+ */
+
+static inline void tcg_out_tb_init(TCGContext *s)
+{
+    s->be->nb_ldst_labels = 0;
+}
+
+/*
+ * Generate TB finalization at the end of block
+ */
+
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
+
+static void tcg_out_tb_finalize(TCGContext *s)
+{
+    TCGLabelQemuLdst *lb = s->be->ldst_labels;
+    int i, n = s->be->nb_ldst_labels;
+
+    /* qemu_ld/st slow paths */
+    for (i = 0; i < n; i++) {
+        if (lb[i].is_ld) {
+            tcg_out_qemu_ld_slow_path(s, lb + i);
+        } else {
+            tcg_out_qemu_st_slow_path(s, lb + i);
+        }
+    }
+}
+
+/*
+ * Allocate a new TCGLabelQemuLdst entry.
+ */
+
+static inline TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
+{
+    TCGBackendData *be = s->be;
+    int n = be->nb_ldst_labels;
+
+    assert(n < TCG_MAX_QEMU_LDST);
+    be->nb_ldst_labels = n + 1;
+    return &be->ldst_labels[n];
+}
+#else
+#include "tcg-be-null.h"
+#endif /* CONFIG_SOFTMMU */
diff --git a/hw/usb/usb-dummy-android.c b/tcg/tcg-be-null.h
old mode 100755
new mode 100644
similarity index 73%
rename from hw/usb/usb-dummy-android.c
rename to tcg/tcg-be-null.h
index 3aeb8cb..74c57d5
--- a/hw/usb/usb-dummy-android.c
+++ b/tcg/tcg-be-null.h
@@ -1,7 +1,5 @@
 /*
- * Fake Win32 USB redirector
- *
- * Copyright (c) 2009 The Android Open Source Project
+ * TCG Backend Data: No backend data
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -22,22 +20,24 @@
  * THE SOFTWARE.
  */
 
-#include "qemu-common.h"
-#include "qemu/timer.h"
-#include "monitor/monitor.h"
-#include "hw/usb.h"
+typedef struct TCGBackendData {
+    /* Empty */
+    char dummy;
+} TCGBackendData;
 
-USBDevice *usb_host_device_open(const char *devname)
+
+/*
+ * Initialize TB backend data at the beginning of the TB.
+ */
+
+static inline void tcg_out_tb_init(TCGContext *s)
 {
-	return NULL;
 }
 
-int usb_host_device_close(const char *devname)
-{
-	return 0;
-}
+/*
+ * Generate TB finalization at the end of block
+ */
 
-void usb_host_info(Monitor *mon)
+static inline void tcg_out_tb_finalize(TCGContext *s)
 {
-    monitor_printf(mon, "   No devices\n");
 }
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 71f6e8a..7eabf22 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -25,335 +25,337 @@
 
 int gen_new_label(void);
 
+static inline void tcg_gen_op0(TCGOpcode opc)
+{
+    *tcg_ctx.gen_opc_ptr++ = opc;
+}
+
 static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
 }
 
 static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 arg1)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
 }
 
 static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg arg1)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = arg1;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = arg1;
 }
 
 static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
 }
 
 static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
 }
 
 static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGArg arg2)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = arg2;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = arg2;
 }
 
 static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGArg arg2)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = arg2;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = arg2;
 }
 
 static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg arg1, TCGArg arg2)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = arg1;
-    *gen_opparam_ptr++ = arg2;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = arg1;
+    *tcg_ctx.gen_opparam_ptr++ = arg2;
 }
 
 static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                    TCGv_i32 arg3)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
 }
 
 static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                    TCGv_i64 arg3)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
 }
 
 static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 arg1,
                                     TCGv_i32 arg2, TCGArg arg3)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
-    *gen_opparam_ptr++ = arg3;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = arg3;
 }
 
 static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 arg1,
                                     TCGv_i64 arg2, TCGArg arg3)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
-    *gen_opparam_ptr++ = arg3;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = arg3;
 }
 
 static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val,
                                        TCGv_ptr base, TCGArg offset)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(val);
-    *gen_opparam_ptr++ = GET_TCGV_PTR(base);
-    *gen_opparam_ptr++ = offset;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(val);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_PTR(base);
+    *tcg_ctx.gen_opparam_ptr++ = offset;
 }
 
 static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val,
                                        TCGv_ptr base, TCGArg offset)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(val);
-    *gen_opparam_ptr++ = GET_TCGV_PTR(base);
-    *gen_opparam_ptr++ = offset;
-}
-
-static inline void tcg_gen_qemu_ldst_op_i64_i32(TCGOpcode opc, TCGv_i64 val,
-                                                TCGv_i32 addr, TCGArg mem_index)
-{
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(val);
-    *gen_opparam_ptr++ = GET_TCGV_I32(addr);
-    *gen_opparam_ptr++ = mem_index;
-}
-
-static inline void tcg_gen_qemu_ldst_op_i64_i64(TCGOpcode opc, TCGv_i64 val,
-                                                TCGv_i64 addr, TCGArg mem_index)
-{
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(val);
-    *gen_opparam_ptr++ = GET_TCGV_I64(addr);
-    *gen_opparam_ptr++ = mem_index;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_PTR(base);
+    *tcg_ctx.gen_opparam_ptr++ = offset;
 }
 
 static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                    TCGv_i32 arg3, TCGv_i32 arg4)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg4);
 }
 
 static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                    TCGv_i64 arg3, TCGv_i64 arg4)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg4);
 }
 
 static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                     TCGv_i32 arg3, TCGArg arg4)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
-    *gen_opparam_ptr++ = arg4;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = arg4;
 }
 
 static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                     TCGv_i64 arg3, TCGArg arg4)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
-    *gen_opparam_ptr++ = arg4;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = arg4;
 }
 
 static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                      TCGArg arg3, TCGArg arg4)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
-    *gen_opparam_ptr++ = arg3;
-    *gen_opparam_ptr++ = arg4;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = arg3;
+    *tcg_ctx.gen_opparam_ptr++ = arg4;
 }
 
 static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                      TCGArg arg3, TCGArg arg4)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
-    *gen_opparam_ptr++ = arg3;
-    *gen_opparam_ptr++ = arg4;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = arg3;
+    *tcg_ctx.gen_opparam_ptr++ = arg4;
 }
 
 static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                    TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg5);
 }
 
 static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                    TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg5);
 }
 
 static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                     TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
-    *gen_opparam_ptr++ = arg5;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+    *tcg_ctx.gen_opparam_ptr++ = arg5;
 }
 
 static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                     TCGv_i64 arg3, TCGv_i64 arg4, TCGArg arg5)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
-    *gen_opparam_ptr++ = arg5;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+    *tcg_ctx.gen_opparam_ptr++ = arg5;
 }
 
 static inline void tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 arg1,
                                      TCGv_i32 arg2, TCGv_i32 arg3,
                                      TCGArg arg4, TCGArg arg5)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
-    *gen_opparam_ptr++ = arg4;
-    *gen_opparam_ptr++ = arg5;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = arg4;
+    *tcg_ctx.gen_opparam_ptr++ = arg5;
 }
 
 static inline void tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 arg1,
                                      TCGv_i64 arg2, TCGv_i64 arg3,
                                      TCGArg arg4, TCGArg arg5)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
-    *gen_opparam_ptr++ = arg4;
-    *gen_opparam_ptr++ = arg5;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = arg4;
+    *tcg_ctx.gen_opparam_ptr++ = arg5;
 }
 
 static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                    TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5,
                                    TCGv_i32 arg6)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg6);
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg6);
 }
 
 static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                    TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5,
                                    TCGv_i64 arg6)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg6);
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg6);
 }
 
 static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                     TCGv_i32 arg3, TCGv_i32 arg4,
                                     TCGv_i32 arg5, TCGArg arg6)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
-    *gen_opparam_ptr++ = arg6;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+    *tcg_ctx.gen_opparam_ptr++ = arg6;
 }
 
 static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                     TCGv_i64 arg3, TCGv_i64 arg4,
                                     TCGv_i64 arg5, TCGArg arg6)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
-    *gen_opparam_ptr++ = arg6;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+    *tcg_ctx.gen_opparam_ptr++ = arg6;
 }
 
 static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 arg1,
                                      TCGv_i32 arg2, TCGv_i32 arg3,
                                      TCGv_i32 arg4, TCGArg arg5, TCGArg arg6)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
-    *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
-    *gen_opparam_ptr++ = arg5;
-    *gen_opparam_ptr++ = arg6;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+    *tcg_ctx.gen_opparam_ptr++ = arg5;
+    *tcg_ctx.gen_opparam_ptr++ = arg6;
 }
 
 static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 arg1,
                                      TCGv_i64 arg2, TCGv_i64 arg3,
                                      TCGv_i64 arg4, TCGArg arg5, TCGArg arg6)
 {
-    *gen_opc_ptr++ = opc;
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
-    *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
-    *gen_opparam_ptr++ = arg5;
-    *gen_opparam_ptr++ = arg6;
+    *tcg_ctx.gen_opc_ptr++ = opc;
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+    *tcg_ctx.gen_opparam_ptr++ = arg5;
+    *tcg_ctx.gen_opparam_ptr++ = arg6;
+}
+
+static inline void tcg_add_param_i32(TCGv_i32 val)
+{
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(val);
+}
+
+static inline void tcg_add_param_i64(TCGv_i64 val)
+{
+#if TCG_TARGET_REG_BITS == 32
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_LOW(val));
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_HIGH(val));
+#else
+    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val);
+#endif
 }
 
 static inline void gen_set_label(int n)
@@ -389,27 +391,28 @@
                                    TCGArg ret, int nargs, TCGArg *args)
 {
     TCGv_ptr fn;
-    fn = tcg_const_ptr((tcg_target_long)func);
+    fn = tcg_const_ptr(func);
     tcg_gen_callN(&tcg_ctx, fn, flags, sizemask, ret,
                   nargs, args);
     tcg_temp_free_ptr(fn);
 }
 
 /* Note: Both tcg_gen_helper32() and tcg_gen_helper64() are currently
-   reserved for helpers in tcg-runtime.c. These helpers are all const
-   and pure, hence the call to tcg_gen_callN() with TCG_CALL_CONST |
-   TCG_CALL_PURE. This may need to be adjusted if these functions
-   start to be used with other helpers. */
+   reserved for helpers in tcg-runtime.c. These helpers all do not read
+   globals and do not have side effects, hence the call to tcg_gen_callN()
+   with TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS. This may need
+   to be adjusted if these functions start to be used with other helpers. */
 static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret,
                                     TCGv_i32 a, TCGv_i32 b)
 {
     TCGv_ptr fn;
     TCGArg args[2];
-    fn = tcg_const_ptr((tcg_target_long)func);
+    fn = tcg_const_ptr(func);
     args[0] = GET_TCGV_I32(a);
     args[1] = GET_TCGV_I32(b);
-    tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask,
-                  GET_TCGV_I32(ret), 2, args);
+    tcg_gen_callN(&tcg_ctx, fn,
+                  TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS,
+                  sizemask, GET_TCGV_I32(ret), 2, args);
     tcg_temp_free_ptr(fn);
 }
 
@@ -418,11 +421,12 @@
 {
     TCGv_ptr fn;
     TCGArg args[2];
-    fn = tcg_const_ptr((tcg_target_long)func);
+    fn = tcg_const_ptr(func);
     args[0] = GET_TCGV_I64(a);
     args[1] = GET_TCGV_I64(b);
-    tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask,
-                  GET_TCGV_I64(ret), 2, args);
+    tcg_gen_callN(&tcg_ctx, fn,
+                  TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS,
+                  sizemask, GET_TCGV_I64(ret), 2, args);
     tcg_temp_free_ptr(fn);
 }
 
@@ -518,18 +522,34 @@
     }
 }
 
-static inline void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+static inline void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
 {
-    /* some cases can be optimized here */
-    if (arg2 == 0) {
+    TCGv_i32 t0;
+    /* Some cases can be optimized here.  */
+    switch (arg2) {
+    case 0:
         tcg_gen_movi_i32(ret, 0);
-    } else if (arg2 == 0xffffffff) {
+        return;
+    case 0xffffffffu:
         tcg_gen_mov_i32(ret, arg1);
-    } else {
-        TCGv_i32 t0 = tcg_const_i32(arg2);
-        tcg_gen_and_i32(ret, arg1, t0);
-        tcg_temp_free_i32(t0);
+        return;
+    case 0xffu:
+        /* Don't recurse with tcg_gen_ext8u_i32.  */
+        if (TCG_TARGET_HAS_ext8u_i32) {
+            tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
+            return;
+        }
+        break;
+    case 0xffffu:
+        if (TCG_TARGET_HAS_ext16u_i32) {
+            tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
+            return;
+        }
+        break;
     }
+    t0 = tcg_const_i32(arg2);
+    tcg_gen_and_i32(ret, arg1, t0);
+    tcg_temp_free_i32(t0);
 }
 
 static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
@@ -543,9 +563,9 @@
 
 static inline void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
-    /* some cases can be optimized here */
-    if (arg2 == 0xffffffff) {
-        tcg_gen_movi_i32(ret, 0xffffffff);
+    /* Some cases can be optimized here.  */
+    if (arg2 == -1) {
+        tcg_gen_movi_i32(ret, -1);
     } else if (arg2 == 0) {
         tcg_gen_mov_i32(ret, arg1);
     } else {
@@ -566,9 +586,12 @@
 
 static inline void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
-    /* some cases can be optimized here */
+    /* Some cases can be optimized here.  */
     if (arg2 == 0) {
         tcg_gen_mov_i32(ret, arg1);
+    } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
+        /* Don't recurse with tcg_gen_not_i32.  */
+        tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
     } else {
         TCGv_i32 t0 = tcg_const_i32(arg2);
         tcg_gen_xor_i32(ret, arg1, t0);
@@ -627,29 +650,49 @@
 static inline void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1,
                                       TCGv_i32 arg2, int label_index)
 {
-    tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index);
+    }
 }
 
 static inline void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1,
                                        int32_t arg2, int label_index)
 {
-    TCGv_i32 t0 = tcg_const_i32(arg2);
-    tcg_gen_brcond_i32(cond, arg1, t0, label_index);
-    tcg_temp_free_i32(t0);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_brcond_i32(cond, arg1, t0, label_index);
+        tcg_temp_free_i32(t0);
+    }
 }
 
 static inline void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
                                        TCGv_i32 arg1, TCGv_i32 arg2)
 {
-    tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(ret, 0);
+    } else {
+        tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+    }
 }
 
 static inline void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
                                         TCGv_i32 arg1, int32_t arg2)
 {
-    TCGv_i32 t0 = tcg_const_i32(arg2);
-    tcg_gen_setcond_i32(cond, ret, arg1, t0);
-    tcg_temp_free_i32(t0);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(ret, 0);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_setcond_i32(cond, ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
 }
 
 static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
@@ -664,107 +707,93 @@
     tcg_temp_free_i32(t0);
 }
 
-#ifdef TCG_TARGET_HAS_div_i32
 static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
-    tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
+    if (TCG_TARGET_HAS_div_i32) {
+        tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div2_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_sari_i32(t0, arg1, 31);
+        tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
+        tcg_temp_free_i32(t0);
+    } else {
+        int sizemask = 0;
+        /* Return value and both arguments are 32-bit and signed.  */
+        sizemask |= tcg_gen_sizemask(0, 0, 1);
+        sizemask |= tcg_gen_sizemask(1, 0, 1);
+        sizemask |= tcg_gen_sizemask(2, 0, 1);
+        tcg_gen_helper32(tcg_helper_div_i32, sizemask, ret, arg1, arg2);
+    }
 }
 
 static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
-    tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
+    if (TCG_TARGET_HAS_rem_i32) {
+        tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2);
+        tcg_gen_mul_i32(t0, t0, arg2);
+        tcg_gen_sub_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    } else if (TCG_TARGET_HAS_div2_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_sari_i32(t0, arg1, 31);
+        tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
+        tcg_temp_free_i32(t0);
+    } else {
+        int sizemask = 0;
+        /* Return value and both arguments are 32-bit and signed.  */
+        sizemask |= tcg_gen_sizemask(0, 0, 1);
+        sizemask |= tcg_gen_sizemask(1, 0, 1);
+        sizemask |= tcg_gen_sizemask(2, 0, 1);
+        tcg_gen_helper32(tcg_helper_rem_i32, sizemask, ret, arg1, arg2);
+    }
 }
 
 static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
-    tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
+    if (TCG_TARGET_HAS_div_i32) {
+        tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div2_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_movi_i32(t0, 0);
+        tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
+        tcg_temp_free_i32(t0);
+    } else {
+        int sizemask = 0;
+        /* Return value and both arguments are 32-bit and unsigned.  */
+        sizemask |= tcg_gen_sizemask(0, 0, 0);
+        sizemask |= tcg_gen_sizemask(1, 0, 0);
+        sizemask |= tcg_gen_sizemask(2, 0, 0);
+        tcg_gen_helper32(tcg_helper_divu_i32, sizemask, ret, arg1, arg2);
+    }
 }
 
 static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
-    tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
+    if (TCG_TARGET_HAS_rem_i32) {
+        tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2);
+        tcg_gen_mul_i32(t0, t0, arg2);
+        tcg_gen_sub_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    } else if (TCG_TARGET_HAS_div2_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_movi_i32(t0, 0);
+        tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
+        tcg_temp_free_i32(t0);
+    } else {
+        int sizemask = 0;
+        /* Return value and both arguments are 32-bit and unsigned.  */
+        sizemask |= tcg_gen_sizemask(0, 0, 0);
+        sizemask |= tcg_gen_sizemask(1, 0, 0);
+        sizemask |= tcg_gen_sizemask(2, 0, 0);
+        tcg_gen_helper32(tcg_helper_remu_i32, sizemask, ret, arg1, arg2);
+    }
 }
-#elif defined(TCG_TARGET_HAS_div2_i32)
-static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    TCGv_i32 t0;
-    t0 = tcg_temp_new_i32();
-    tcg_gen_sari_i32(t0, arg1, 31);
-    tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
-    tcg_temp_free_i32(t0);
-}
-
-static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    TCGv_i32 t0;
-    t0 = tcg_temp_new_i32();
-    tcg_gen_sari_i32(t0, arg1, 31);
-    tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
-    tcg_temp_free_i32(t0);
-}
-
-static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    TCGv_i32 t0;
-    t0 = tcg_temp_new_i32();
-    tcg_gen_movi_i32(t0, 0);
-    tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
-    tcg_temp_free_i32(t0);
-}
-
-static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    TCGv_i32 t0;
-    t0 = tcg_temp_new_i32();
-    tcg_gen_movi_i32(t0, 0);
-    tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
-    tcg_temp_free_i32(t0);
-}
-#else
-static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    int sizemask = 0;
-    /* Return value and both arguments are 32-bit and signed.  */
-    sizemask |= tcg_gen_sizemask(0, 0, 1);
-    sizemask |= tcg_gen_sizemask(1, 0, 1);
-    sizemask |= tcg_gen_sizemask(2, 0, 1);
-
-    tcg_gen_helper32(tcg_helper_div_i32, sizemask, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    int sizemask = 0;
-    /* Return value and both arguments are 32-bit and signed.  */
-    sizemask |= tcg_gen_sizemask(0, 0, 1);
-    sizemask |= tcg_gen_sizemask(1, 0, 1);
-    sizemask |= tcg_gen_sizemask(2, 0, 1);
-
-    tcg_gen_helper32(tcg_helper_rem_i32, sizemask, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    int sizemask = 0;
-    /* Return value and both arguments are 32-bit and unsigned.  */
-    sizemask |= tcg_gen_sizemask(0, 0, 0);
-    sizemask |= tcg_gen_sizemask(1, 0, 0);
-    sizemask |= tcg_gen_sizemask(2, 0, 0);
-
-    tcg_gen_helper32(tcg_helper_divu_i32, sizemask, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    int sizemask = 0;
-    /* Return value and both arguments are 32-bit and unsigned.  */
-    sizemask |= tcg_gen_sizemask(0, 0, 0);
-    sizemask |= tcg_gen_sizemask(1, 0, 0);
-    sizemask |= tcg_gen_sizemask(2, 0, 0);
-
-    tcg_gen_helper32(tcg_helper_remu_i32, sizemask, ret, arg1, arg2);
-}
-#endif
 
 #if TCG_TARGET_REG_BITS == 32
 
@@ -873,6 +902,8 @@
     tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace add2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -880,6 +911,8 @@
     tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace sub2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -971,17 +1004,27 @@
 static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
                                       TCGv_i64 arg2, int label_index)
 {
-    tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
-                      TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
-                      TCGV_HIGH(arg2), cond, label_index);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
+                          TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
+                          TCGV_HIGH(arg2), cond, label_index);
+    }
 }
 
 static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
                                        TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
-                     TCGV_LOW(arg1), TCGV_HIGH(arg1),
-                     TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(TCGV_LOW(ret), 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(TCGV_LOW(ret), 0);
+    } else {
+        tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+                         TCGV_LOW(arg1), TCGV_HIGH(arg1),
+                         TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+    }
     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
 }
 
@@ -993,8 +1036,18 @@
     t0 = tcg_temp_new_i64();
     t1 = tcg_temp_new_i32();
 
-    tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
-                    TCGV_LOW(arg1), TCGV_LOW(arg2));
+    if (TCG_TARGET_HAS_mulu2_i32) {
+        tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
+                        TCGV_LOW(arg1), TCGV_LOW(arg2));
+        /* Allow the optimizer room to replace mulu2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        tcg_debug_assert(TCG_TARGET_HAS_muluh_i32);
+        tcg_gen_op3_i32(INDEX_op_mul_i32, TCGV_LOW(t0),
+                        TCGV_LOW(arg1), TCGV_LOW(arg2));
+        tcg_gen_op3_i32(INDEX_op_muluh_i32, TCGV_HIGH(t0),
+                        TCGV_LOW(arg1), TCGV_LOW(arg2));
+    }
 
     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
@@ -1063,66 +1116,66 @@
     tcg_gen_op2i_i64(INDEX_op_movi_i64, ret, arg);
 }
 
-static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_i64 arg2,
+static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2,
                                     tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_i64 arg2,
+static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2,
                                     tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_i64 arg2,
+static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2,
                                      tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_i64 arg2,
+static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2,
                                      tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_i64 arg2,
+static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2,
                                      tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_i64 arg2,
+static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2,
                                      tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_i64 arg2, tcg_target_long offset)
+static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2,
                                    tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset);
 }
 
-static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2,
                                     tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset);
 }
 
-static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2,
                                     tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset);
 }
 
-static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_i64 arg2, tcg_target_long offset)
+static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset);
 }
@@ -1146,9 +1199,38 @@
     }
 }
 
-static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
 {
-    TCGv_i64 t0 = tcg_const_i64(arg2);
+    TCGv_i64 t0;
+    /* Some cases can be optimized here.  */
+    switch (arg2) {
+    case 0:
+        tcg_gen_movi_i64(ret, 0);
+        return;
+    case 0xffffffffffffffffull:
+        tcg_gen_mov_i64(ret, arg1);
+        return;
+    case 0xffull:
+        /* Don't recurse with tcg_gen_ext8u_i32.  */
+        if (TCG_TARGET_HAS_ext8u_i64) {
+            tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
+            return;
+        }
+        break;
+    case 0xffffu:
+        if (TCG_TARGET_HAS_ext16u_i64) {
+            tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
+            return;
+        }
+        break;
+    case 0xffffffffull:
+        if (TCG_TARGET_HAS_ext32u_i64) {
+            tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
+            return;
+        }
+        break;
+    }
+    t0 = tcg_const_i64(arg2);
     tcg_gen_and_i64(ret, arg1, t0);
     tcg_temp_free_i64(t0);
 }
@@ -1164,9 +1246,16 @@
 
 static inline void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 {
-    TCGv_i64 t0 = tcg_const_i64(arg2);
-    tcg_gen_or_i64(ret, arg1, t0);
-    tcg_temp_free_i64(t0);
+    /* Some cases can be optimized here.  */
+    if (arg2 == -1) {
+        tcg_gen_movi_i64(ret, -1);
+    } else if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_or_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
 }
 
 static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1180,9 +1269,17 @@
 
 static inline void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 {
-    TCGv_i64 t0 = tcg_const_i64(arg2);
-    tcg_gen_xor_i64(ret, arg1, t0);
-    tcg_temp_free_i64(t0);
+    /* Some cases can be optimized here.  */
+    if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
+        /* Don't recurse with tcg_gen_not_i64.  */
+        tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_xor_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
 }
 
 static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1236,13 +1333,23 @@
 static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
                                       TCGv_i64 arg2, int label_index)
 {
-    tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index);
+    }
 }
 
 static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
                                        TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i64(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i64(ret, 0);
+    } else {
+        tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+    }
 }
 
 static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1250,109 +1357,94 @@
     tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2);
 }
 
-#ifdef TCG_TARGET_HAS_div_i64
 static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
+    if (TCG_TARGET_HAS_div_i64) {
+        tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div2_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_sari_i64(t0, arg1, 63);
+        tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
+        tcg_temp_free_i64(t0);
+    } else {
+        int sizemask = 0;
+        /* Return value and both arguments are 64-bit and signed.  */
+        sizemask |= tcg_gen_sizemask(0, 1, 1);
+        sizemask |= tcg_gen_sizemask(1, 1, 1);
+        sizemask |= tcg_gen_sizemask(2, 1, 1);
+        tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2);
+    }
 }
 
 static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
+    if (TCG_TARGET_HAS_rem_i64) {
+        tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2);
+        tcg_gen_mul_i64(t0, t0, arg2);
+        tcg_gen_sub_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    } else if (TCG_TARGET_HAS_div2_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_sari_i64(t0, arg1, 63);
+        tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
+        tcg_temp_free_i64(t0);
+    } else {
+        int sizemask = 0;
+        /* Return value and both arguments are 64-bit and signed.  */
+        sizemask |= tcg_gen_sizemask(0, 1, 1);
+        sizemask |= tcg_gen_sizemask(1, 1, 1);
+        sizemask |= tcg_gen_sizemask(2, 1, 1);
+        tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2);
+    }
 }
 
 static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
+    if (TCG_TARGET_HAS_div_i64) {
+        tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div2_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_movi_i64(t0, 0);
+        tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
+        tcg_temp_free_i64(t0);
+    } else {
+        int sizemask = 0;
+        /* Return value and both arguments are 64-bit and unsigned.  */
+        sizemask |= tcg_gen_sizemask(0, 1, 0);
+        sizemask |= tcg_gen_sizemask(1, 1, 0);
+        sizemask |= tcg_gen_sizemask(2, 1, 0);
+        tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2);
+    }
 }
 
 static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
+    if (TCG_TARGET_HAS_rem_i64) {
+        tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2);
+        tcg_gen_mul_i64(t0, t0, arg2);
+        tcg_gen_sub_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    } else if (TCG_TARGET_HAS_div2_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_movi_i64(t0, 0);
+        tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
+        tcg_temp_free_i64(t0);
+    } else {
+        int sizemask = 0;
+        /* Return value and both arguments are 64-bit and unsigned.  */
+        sizemask |= tcg_gen_sizemask(0, 1, 0);
+        sizemask |= tcg_gen_sizemask(1, 1, 0);
+        sizemask |= tcg_gen_sizemask(2, 1, 0);
+        tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2);
+    }
 }
-#elif defined(TCG_TARGET_HAS_div2_i64)
-static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    TCGv_i64 t0;
-    t0 = tcg_temp_new_i64();
-    tcg_gen_sari_i64(t0, arg1, 63);
-    tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
-    tcg_temp_free_i64(t0);
-}
-
-static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    TCGv_i64 t0;
-    t0 = tcg_temp_new_i64();
-    tcg_gen_sari_i64(t0, arg1, 63);
-    tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
-    tcg_temp_free_i64(t0);
-}
-
-static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    TCGv_i64 t0;
-    t0 = tcg_temp_new_i64();
-    tcg_gen_movi_i64(t0, 0);
-    tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
-    tcg_temp_free_i64(t0);
-}
-
-static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    TCGv_i64 t0;
-    t0 = tcg_temp_new_i64();
-    tcg_gen_movi_i64(t0, 0);
-    tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
-    tcg_temp_free_i64(t0);
-}
-#else
-static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    int sizemask = 0;
-    /* Return value and both arguments are 64-bit and signed.  */
-    sizemask |= tcg_gen_sizemask(0, 1, 1);
-    sizemask |= tcg_gen_sizemask(1, 1, 1);
-    sizemask |= tcg_gen_sizemask(2, 1, 1);
-
-    tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    int sizemask = 0;
-    /* Return value and both arguments are 64-bit and signed.  */
-    sizemask |= tcg_gen_sizemask(0, 1, 1);
-    sizemask |= tcg_gen_sizemask(1, 1, 1);
-    sizemask |= tcg_gen_sizemask(2, 1, 1);
-
-    tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    int sizemask = 0;
-    /* Return value and both arguments are 64-bit and unsigned.  */
-    sizemask |= tcg_gen_sizemask(0, 1, 0);
-    sizemask |= tcg_gen_sizemask(1, 1, 0);
-    sizemask |= tcg_gen_sizemask(2, 1, 0);
-
-    tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    int sizemask = 0;
-    /* Return value and both arguments are 64-bit and unsigned.  */
-    sizemask |= tcg_gen_sizemask(0, 1, 0);
-    sizemask |= tcg_gen_sizemask(1, 1, 0);
-    sizemask |= tcg_gen_sizemask(2, 1, 0);
-
-    tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2);
-}
-#endif
-
-#endif
+#endif /* TCG_TARGET_REG_BITS == 32 */
 
 static inline void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 {
@@ -1387,9 +1479,13 @@
 static inline void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1,
                                        int64_t arg2, int label_index)
 {
-    TCGv_i64 t0 = tcg_const_i64(arg2);
-    tcg_gen_brcond_i64(cond, arg1, t0, label_index);
-    tcg_temp_free_i64(t0);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_brcond_i64(cond, arg1, t0, label_index);
+        tcg_temp_free_i64(t0);
+    }
 }
 
 static inline void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
@@ -1413,82 +1509,82 @@
 
 static inline void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
-#ifdef TCG_TARGET_HAS_ext8s_i32
-    tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
-#else
-    tcg_gen_shli_i32(ret, arg, 24);
-    tcg_gen_sari_i32(ret, ret, 24);
-#endif
+    if (TCG_TARGET_HAS_ext8s_i32) {
+        tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
+    } else {
+        tcg_gen_shli_i32(ret, arg, 24);
+        tcg_gen_sari_i32(ret, ret, 24);
+    }
 }
 
 static inline void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
-#ifdef TCG_TARGET_HAS_ext16s_i32
-    tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
-#else
-    tcg_gen_shli_i32(ret, arg, 16);
-    tcg_gen_sari_i32(ret, ret, 16);
-#endif
+    if (TCG_TARGET_HAS_ext16s_i32) {
+        tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
+    } else {
+        tcg_gen_shli_i32(ret, arg, 16);
+        tcg_gen_sari_i32(ret, ret, 16);
+    }
 }
 
 static inline void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
-#ifdef TCG_TARGET_HAS_ext8u_i32
-    tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
-#else
-    tcg_gen_andi_i32(ret, arg, 0xffu);
-#endif
+    if (TCG_TARGET_HAS_ext8u_i32) {
+        tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
+    } else {
+        tcg_gen_andi_i32(ret, arg, 0xffu);
+    }
 }
 
 static inline void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
-#ifdef TCG_TARGET_HAS_ext16u_i32
-    tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
-#else
-    tcg_gen_andi_i32(ret, arg, 0xffffu);
-#endif
+    if (TCG_TARGET_HAS_ext16u_i32) {
+        tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
+    } else {
+        tcg_gen_andi_i32(ret, arg, 0xffffu);
+    }
 }
 
 /* Note: we assume the two high bytes are set to zero */
 static inline void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
-#ifdef TCG_TARGET_HAS_bswap16_i32
-    tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);
-#else
-    TCGv_i32 t0 = tcg_temp_new_i32();
-
-    tcg_gen_ext8u_i32(t0, arg);
-    tcg_gen_shli_i32(t0, t0, 8);
-    tcg_gen_shri_i32(ret, arg, 8);
-    tcg_gen_or_i32(ret, ret, t0);
-    tcg_temp_free_i32(t0);
-#endif
+    if (TCG_TARGET_HAS_bswap16_i32) {
+        tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);
+    } else {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+    
+        tcg_gen_ext8u_i32(t0, arg);
+        tcg_gen_shli_i32(t0, t0, 8);
+        tcg_gen_shri_i32(ret, arg, 8);
+        tcg_gen_or_i32(ret, ret, t0);
+        tcg_temp_free_i32(t0);
+    }
 }
 
 static inline void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
-#ifdef TCG_TARGET_HAS_bswap32_i32
-    tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg);
-#else
-    TCGv_i32 t0, t1;
-    t0 = tcg_temp_new_i32();
-    t1 = tcg_temp_new_i32();
-
-    tcg_gen_shli_i32(t0, arg, 24);
-
-    tcg_gen_andi_i32(t1, arg, 0x0000ff00);
-    tcg_gen_shli_i32(t1, t1, 8);
-    tcg_gen_or_i32(t0, t0, t1);
-
-    tcg_gen_shri_i32(t1, arg, 8);
-    tcg_gen_andi_i32(t1, t1, 0x0000ff00);
-    tcg_gen_or_i32(t0, t0, t1);
-
-    tcg_gen_shri_i32(t1, arg, 24);
-    tcg_gen_or_i32(ret, t0, t1);
-    tcg_temp_free_i32(t0);
-    tcg_temp_free_i32(t1);
-#endif
+    if (TCG_TARGET_HAS_bswap32_i32) {
+        tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg);
+    } else {
+        TCGv_i32 t0, t1;
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
+    
+        tcg_gen_shli_i32(t0, arg, 24);
+    
+        tcg_gen_andi_i32(t1, arg, 0x0000ff00);
+        tcg_gen_shli_i32(t1, t1, 8);
+        tcg_gen_or_i32(t0, t0, t1);
+    
+        tcg_gen_shri_i32(t1, arg, 8);
+        tcg_gen_andi_i32(t1, t1, 0x0000ff00);
+        tcg_gen_or_i32(t0, t0, t1);
+    
+        tcg_gen_shri_i32(t1, arg, 24);
+        tcg_gen_or_i32(ret, t0, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
 }
 
 #if TCG_TARGET_REG_BITS == 32
@@ -1576,59 +1672,59 @@
 
 static inline void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
-#ifdef TCG_TARGET_HAS_ext8s_i64
-    tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
-#else
-    tcg_gen_shli_i64(ret, arg, 56);
-    tcg_gen_sari_i64(ret, ret, 56);
-#endif
+    if (TCG_TARGET_HAS_ext8s_i64) {
+        tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
+    } else {
+        tcg_gen_shli_i64(ret, arg, 56);
+        tcg_gen_sari_i64(ret, ret, 56);
+    }
 }
 
 static inline void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
-#ifdef TCG_TARGET_HAS_ext16s_i64
-    tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
-#else
-    tcg_gen_shli_i64(ret, arg, 48);
-    tcg_gen_sari_i64(ret, ret, 48);
-#endif
+    if (TCG_TARGET_HAS_ext16s_i64) {
+        tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
+    } else {
+        tcg_gen_shli_i64(ret, arg, 48);
+        tcg_gen_sari_i64(ret, ret, 48);
+    }
 }
 
 static inline void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
-#ifdef TCG_TARGET_HAS_ext32s_i64
-    tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
-#else
-    tcg_gen_shli_i64(ret, arg, 32);
-    tcg_gen_sari_i64(ret, ret, 32);
-#endif
+    if (TCG_TARGET_HAS_ext32s_i64) {
+        tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
+    } else {
+        tcg_gen_shli_i64(ret, arg, 32);
+        tcg_gen_sari_i64(ret, ret, 32);
+    }
 }
 
 static inline void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
-#ifdef TCG_TARGET_HAS_ext8u_i64
-    tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
-#else
-    tcg_gen_andi_i64(ret, arg, 0xffu);
-#endif
+    if (TCG_TARGET_HAS_ext8u_i64) {
+        tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
+    } else {
+        tcg_gen_andi_i64(ret, arg, 0xffu);
+    }
 }
 
 static inline void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
-#ifdef TCG_TARGET_HAS_ext16u_i64
-    tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
-#else
-    tcg_gen_andi_i64(ret, arg, 0xffffu);
-#endif
+    if (TCG_TARGET_HAS_ext16u_i64) {
+        tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
+    } else {
+        tcg_gen_andi_i64(ret, arg, 0xffffu);
+    }
 }
 
 static inline void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
-#ifdef TCG_TARGET_HAS_ext32u_i64
-    tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
-#else
-    tcg_gen_andi_i64(ret, arg, 0xffffffffu);
-#endif
+    if (TCG_TARGET_HAS_ext32u_i64) {
+        tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
+    } else {
+        tcg_gen_andi_i64(ret, arg, 0xffffffffu);
+    }
 }
 
 /* Note: we assume the target supports move between 32 and 64 bit
@@ -1655,130 +1751,132 @@
 /* Note: we assume the six high bytes are set to zero */
 static inline void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
-#ifdef TCG_TARGET_HAS_bswap16_i64
-    tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg);
-#else
-    TCGv_i64 t0 = tcg_temp_new_i64();
+    if (TCG_TARGET_HAS_bswap16_i64) {
+        tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
 
-    tcg_gen_ext8u_i64(t0, arg);
-    tcg_gen_shli_i64(t0, t0, 8);
-    tcg_gen_shri_i64(ret, arg, 8);
-    tcg_gen_or_i64(ret, ret, t0);
-    tcg_temp_free_i64(t0);
-#endif
+        tcg_gen_ext8u_i64(t0, arg);
+        tcg_gen_shli_i64(t0, t0, 8);
+        tcg_gen_shri_i64(ret, arg, 8);
+        tcg_gen_or_i64(ret, ret, t0);
+        tcg_temp_free_i64(t0);
+    }
 }
 
 /* Note: we assume the four high bytes are set to zero */
 static inline void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
-#ifdef TCG_TARGET_HAS_bswap32_i64
-    tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);
-#else
-    TCGv_i64 t0, t1;
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
+    if (TCG_TARGET_HAS_bswap32_i64) {
+        tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);
+    } else {
+        TCGv_i64 t0, t1;
+        t0 = tcg_temp_new_i64();
+        t1 = tcg_temp_new_i64();
 
-    tcg_gen_shli_i64(t0, arg, 24);
-    tcg_gen_ext32u_i64(t0, t0);
+        tcg_gen_shli_i64(t0, arg, 24);
+        tcg_gen_ext32u_i64(t0, t0);
 
-    tcg_gen_andi_i64(t1, arg, 0x0000ff00);
-    tcg_gen_shli_i64(t1, t1, 8);
-    tcg_gen_or_i64(t0, t0, t1);
+        tcg_gen_andi_i64(t1, arg, 0x0000ff00);
+        tcg_gen_shli_i64(t1, t1, 8);
+        tcg_gen_or_i64(t0, t0, t1);
 
-    tcg_gen_shri_i64(t1, arg, 8);
-    tcg_gen_andi_i64(t1, t1, 0x0000ff00);
-    tcg_gen_or_i64(t0, t0, t1);
+        tcg_gen_shri_i64(t1, arg, 8);
+        tcg_gen_andi_i64(t1, t1, 0x0000ff00);
+        tcg_gen_or_i64(t0, t0, t1);
 
-    tcg_gen_shri_i64(t1, arg, 24);
-    tcg_gen_or_i64(ret, t0, t1);
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-#endif
+        tcg_gen_shri_i64(t1, arg, 24);
+        tcg_gen_or_i64(ret, t0, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
 }
 
 static inline void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
-#ifdef TCG_TARGET_HAS_bswap64_i64
-    tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg);
-#else
-    TCGv_i64 t0 = tcg_temp_new_i64();
-    TCGv_i64 t1 = tcg_temp_new_i64();
+    if (TCG_TARGET_HAS_bswap64_i64) {
+        tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+    
+        tcg_gen_shli_i64(t0, arg, 56);
+    
+        tcg_gen_andi_i64(t1, arg, 0x0000ff00);
+        tcg_gen_shli_i64(t1, t1, 40);
+        tcg_gen_or_i64(t0, t0, t1);
+    
+        tcg_gen_andi_i64(t1, arg, 0x00ff0000);
+        tcg_gen_shli_i64(t1, t1, 24);
+        tcg_gen_or_i64(t0, t0, t1);
 
-    tcg_gen_shli_i64(t0, arg, 56);
+        tcg_gen_andi_i64(t1, arg, 0xff000000);
+        tcg_gen_shli_i64(t1, t1, 8);
+        tcg_gen_or_i64(t0, t0, t1);
 
-    tcg_gen_andi_i64(t1, arg, 0x0000ff00);
-    tcg_gen_shli_i64(t1, t1, 40);
-    tcg_gen_or_i64(t0, t0, t1);
+        tcg_gen_shri_i64(t1, arg, 8);
+        tcg_gen_andi_i64(t1, t1, 0xff000000);
+        tcg_gen_or_i64(t0, t0, t1);
+    
+        tcg_gen_shri_i64(t1, arg, 24);
+        tcg_gen_andi_i64(t1, t1, 0x00ff0000);
+        tcg_gen_or_i64(t0, t0, t1);
 
-    tcg_gen_andi_i64(t1, arg, 0x00ff0000);
-    tcg_gen_shli_i64(t1, t1, 24);
-    tcg_gen_or_i64(t0, t0, t1);
+        tcg_gen_shri_i64(t1, arg, 40);
+        tcg_gen_andi_i64(t1, t1, 0x0000ff00);
+        tcg_gen_or_i64(t0, t0, t1);
 
-    tcg_gen_andi_i64(t1, arg, 0xff000000);
-    tcg_gen_shli_i64(t1, t1, 8);
-    tcg_gen_or_i64(t0, t0, t1);
-
-    tcg_gen_shri_i64(t1, arg, 8);
-    tcg_gen_andi_i64(t1, t1, 0xff000000);
-    tcg_gen_or_i64(t0, t0, t1);
-
-    tcg_gen_shri_i64(t1, arg, 24);
-    tcg_gen_andi_i64(t1, t1, 0x00ff0000);
-    tcg_gen_or_i64(t0, t0, t1);
-
-    tcg_gen_shri_i64(t1, arg, 40);
-    tcg_gen_andi_i64(t1, t1, 0x0000ff00);
-    tcg_gen_or_i64(t0, t0, t1);
-
-    tcg_gen_shri_i64(t1, arg, 56);
-    tcg_gen_or_i64(ret, t0, t1);
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-#endif
+        tcg_gen_shri_i64(t1, arg, 56);
+        tcg_gen_or_i64(ret, t0, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
 }
 
 #endif
 
 static inline void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
-#ifdef TCG_TARGET_HAS_neg_i32
-    tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg);
-#else
-    TCGv_i32 t0 = tcg_const_i32(0);
-    tcg_gen_sub_i32(ret, t0, arg);
-    tcg_temp_free_i32(t0);
-#endif
+    if (TCG_TARGET_HAS_neg_i32) {
+        tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(0);
+        tcg_gen_sub_i32(ret, t0, arg);
+        tcg_temp_free_i32(t0);
+    }
 }
 
 static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
-#ifdef TCG_TARGET_HAS_neg_i64
-    tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg);
-#else
-    TCGv_i64 t0 = tcg_const_i64(0);
-    tcg_gen_sub_i64(ret, t0, arg);
-    tcg_temp_free_i64(t0);
-#endif
+    if (TCG_TARGET_HAS_neg_i64) {
+        tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(0);
+        tcg_gen_sub_i64(ret, t0, arg);
+        tcg_temp_free_i64(t0);
+    }
 }
 
 static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
-#ifdef TCG_TARGET_HAS_not_i32
-    tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg);
-#else
-    tcg_gen_xori_i32(ret, arg, -1);
-#endif
+    if (TCG_TARGET_HAS_not_i32) {
+        tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg);
+    } else {
+        tcg_gen_xori_i32(ret, arg, -1);
+    }
 }
 
 static inline void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
-#ifdef TCG_TARGET_HAS_not_i64
-    tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
-#elif defined(TCG_TARGET_HAS_not_i32) && TCG_TARGET_REG_BITS == 32
+#if TCG_TARGET_REG_BITS == 64
+    if (TCG_TARGET_HAS_not_i64) {
+        tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
+    } else {
+        tcg_gen_xori_i64(ret, arg, -1);
+    }
+#else
     tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
     tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
-#else
-    tcg_gen_xori_i64(ret, arg, -1);
 #endif
 }
 
@@ -1787,210 +1885,182 @@
     tcg_gen_op1_i32(INDEX_op_discard, arg);
 }
 
-#if TCG_TARGET_REG_BITS == 32
 static inline void tcg_gen_discard_i64(TCGv_i64 arg)
 {
+#if TCG_TARGET_REG_BITS == 32
     tcg_gen_discard_i32(TCGV_LOW(arg));
     tcg_gen_discard_i32(TCGV_HIGH(arg));
-}
 #else
-static inline void tcg_gen_discard_i64(TCGv_i64 arg)
-{
     tcg_gen_op1_i64(INDEX_op_discard, arg);
-}
-#endif
-
-static inline void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
-{
-#if TCG_TARGET_REG_BITS == 32
-    tcg_gen_mov_i32(TCGV_LOW(dest), low);
-    tcg_gen_mov_i32(TCGV_HIGH(dest), high);
-#else
-    TCGv_i64 tmp = tcg_temp_new_i64();
-    /* This extension is only needed for type correctness.
-       We may be able to do better given target specific information.  */
-    tcg_gen_extu_i32_i64(tmp, high);
-    tcg_gen_shli_i64(tmp, tmp, 32);
-    tcg_gen_extu_i32_i64(dest, low);
-    tcg_gen_or_i64(dest, dest, tmp);
-    tcg_temp_free_i64(tmp);
-#endif
-}
-
-static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low, TCGv_i64 high)
-{
-#if TCG_TARGET_REG_BITS == 32
-    tcg_gen_concat_i32_i64(dest, TCGV_LOW(low), TCGV_LOW(high));
-#else
-    TCGv_i64 tmp = tcg_temp_new_i64();
-    tcg_gen_ext32u_i64(dest, low);
-    tcg_gen_shli_i64(tmp, high, 32);
-    tcg_gen_or_i64(dest, dest, tmp);
-    tcg_temp_free_i64(tmp);
 #endif
 }
 
 static inline void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
-#ifdef TCG_TARGET_HAS_andc_i32
-    tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
-#else
-    TCGv_i32 t0;
-    t0 = tcg_temp_new_i32();
-    tcg_gen_not_i32(t0, arg2);
-    tcg_gen_and_i32(ret, arg1, t0);
-    tcg_temp_free_i32(t0);
-#endif
+    if (TCG_TARGET_HAS_andc_i32) {
+        tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
+    } else {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_not_i32(t0, arg2);
+        tcg_gen_and_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
 }
 
 static inline void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-#ifdef TCG_TARGET_HAS_andc_i64
-    tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
-#elif defined(TCG_TARGET_HAS_andc_i32) && TCG_TARGET_REG_BITS == 32
+#if TCG_TARGET_REG_BITS == 64
+    if (TCG_TARGET_HAS_andc_i64) {
+        tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_not_i64(t0, arg2);
+        tcg_gen_and_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
+#else
     tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
     tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
-#else
-    TCGv_i64 t0;
-    t0 = tcg_temp_new_i64();
-    tcg_gen_not_i64(t0, arg2);
-    tcg_gen_and_i64(ret, arg1, t0);
-    tcg_temp_free_i64(t0);
 #endif
 }
 
 static inline void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
-#ifdef TCG_TARGET_HAS_eqv_i32
-    tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
-#else
-    tcg_gen_xor_i32(ret, arg1, arg2);
-    tcg_gen_not_i32(ret, ret);
-#endif
+    if (TCG_TARGET_HAS_eqv_i32) {
+        tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
+    } else {
+        tcg_gen_xor_i32(ret, arg1, arg2);
+        tcg_gen_not_i32(ret, ret);
+    }
 }
 
 static inline void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-#ifdef TCG_TARGET_HAS_eqv_i64
-    tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
-#elif defined(TCG_TARGET_HAS_eqv_i32) && TCG_TARGET_REG_BITS == 32
+#if TCG_TARGET_REG_BITS == 64
+    if (TCG_TARGET_HAS_eqv_i64) {
+        tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
+    } else {
+        tcg_gen_xor_i64(ret, arg1, arg2);
+        tcg_gen_not_i64(ret, ret);
+    }
+#else
     tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
     tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
-#else
-    tcg_gen_xor_i64(ret, arg1, arg2);
-    tcg_gen_not_i64(ret, ret);
 #endif
 }
 
 static inline void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
-#ifdef TCG_TARGET_HAS_nand_i32
-    tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
-#else
-    tcg_gen_and_i32(ret, arg1, arg2);
-    tcg_gen_not_i32(ret, ret);
-#endif
+    if (TCG_TARGET_HAS_nand_i32) {
+        tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
+    } else {
+        tcg_gen_and_i32(ret, arg1, arg2);
+        tcg_gen_not_i32(ret, ret);
+    }
 }
 
 static inline void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-#ifdef TCG_TARGET_HAS_nand_i64
-    tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
-#elif defined(TCG_TARGET_HAS_nand_i32) && TCG_TARGET_REG_BITS == 32
+#if TCG_TARGET_REG_BITS == 64
+    if (TCG_TARGET_HAS_nand_i64) {
+        tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
+    } else {
+        tcg_gen_and_i64(ret, arg1, arg2);
+        tcg_gen_not_i64(ret, ret);
+    }
+#else
     tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
     tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
-#else
-    tcg_gen_and_i64(ret, arg1, arg2);
-    tcg_gen_not_i64(ret, ret);
 #endif
 }
 
 static inline void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
-#ifdef TCG_TARGET_HAS_nor_i32
-    tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
-#else
-    tcg_gen_or_i32(ret, arg1, arg2);
-    tcg_gen_not_i32(ret, ret);
-#endif
+    if (TCG_TARGET_HAS_nor_i32) {
+        tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
+    } else {
+        tcg_gen_or_i32(ret, arg1, arg2);
+        tcg_gen_not_i32(ret, ret);
+    }
 }
 
 static inline void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-#ifdef TCG_TARGET_HAS_nor_i64
-    tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
-#elif defined(TCG_TARGET_HAS_nor_i32) && TCG_TARGET_REG_BITS == 32
+#if TCG_TARGET_REG_BITS == 64
+    if (TCG_TARGET_HAS_nor_i64) {
+        tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
+    } else {
+        tcg_gen_or_i64(ret, arg1, arg2);
+        tcg_gen_not_i64(ret, ret);
+    }
+#else
     tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
     tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
-#else
-    tcg_gen_or_i64(ret, arg1, arg2);
-    tcg_gen_not_i64(ret, ret);
 #endif
 }
 
 static inline void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
-#ifdef TCG_TARGET_HAS_orc_i32
-    tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
-#else
-    TCGv_i32 t0;
-    t0 = tcg_temp_new_i32();
-    tcg_gen_not_i32(t0, arg2);
-    tcg_gen_or_i32(ret, arg1, t0);
-    tcg_temp_free_i32(t0);
-#endif
+    if (TCG_TARGET_HAS_orc_i32) {
+        tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
+    } else {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_not_i32(t0, arg2);
+        tcg_gen_or_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
 }
 
 static inline void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-#ifdef TCG_TARGET_HAS_orc_i64
-    tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
-#elif defined(TCG_TARGET_HAS_orc_i32) && TCG_TARGET_REG_BITS == 32
+#if TCG_TARGET_REG_BITS == 64
+    if (TCG_TARGET_HAS_orc_i64) {
+        tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_not_i64(t0, arg2);
+        tcg_gen_or_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
+#else
     tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
     tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
-#else
-    TCGv_i64 t0;
-    t0 = tcg_temp_new_i64();
-    tcg_gen_not_i64(t0, arg2);
-    tcg_gen_or_i64(ret, arg1, t0);
-    tcg_temp_free_i64(t0);
 #endif
 }
 
 static inline void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
-#ifdef TCG_TARGET_HAS_rot_i32
-    tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
-#else
-    TCGv_i32 t0, t1;
+    if (TCG_TARGET_HAS_rot_i32) {
+        tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
+    } else {
+        TCGv_i32 t0, t1;
 
-    t0 = tcg_temp_new_i32();
-    t1 = tcg_temp_new_i32();
-    tcg_gen_shl_i32(t0, arg1, arg2);
-    tcg_gen_subfi_i32(t1, 32, arg2);
-    tcg_gen_shr_i32(t1, arg1, t1);
-    tcg_gen_or_i32(ret, t0, t1);
-    tcg_temp_free_i32(t0);
-    tcg_temp_free_i32(t1);
-#endif
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
+        tcg_gen_shl_i32(t0, arg1, arg2);
+        tcg_gen_subfi_i32(t1, 32, arg2);
+        tcg_gen_shr_i32(t1, arg1, t1);
+        tcg_gen_or_i32(ret, t0, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
 }
 
 static inline void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-#ifdef TCG_TARGET_HAS_rot_i64
-    tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
-#else
-    TCGv_i64 t0, t1;
-
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-    tcg_gen_shl_i64(t0, arg1, arg2);
-    tcg_gen_subfi_i64(t1, 64, arg2);
-    tcg_gen_shr_i64(t1, arg1, t1);
-    tcg_gen_or_i64(ret, t0, t1);
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-#endif
+    if (TCG_TARGET_HAS_rot_i64) {
+        tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
+    } else {
+        TCGv_i64 t0, t1;
+        t0 = tcg_temp_new_i64();
+        t1 = tcg_temp_new_i64();
+        tcg_gen_shl_i64(t0, arg1, arg2);
+        tcg_gen_subfi_i64(t1, 64, arg2);
+        tcg_gen_shr_i64(t1, arg1, t1);
+        tcg_gen_or_i64(ret, t0, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
 }
 
 static inline void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
@@ -1998,12 +2068,11 @@
     /* some cases can be optimized here */
     if (arg2 == 0) {
         tcg_gen_mov_i32(ret, arg1);
-    } else {
-#ifdef TCG_TARGET_HAS_rot_i32
+    } else if (TCG_TARGET_HAS_rot_i32) {
         TCGv_i32 t0 = tcg_const_i32(arg2);
         tcg_gen_rotl_i32(ret, arg1, t0);
         tcg_temp_free_i32(t0);
-#else
+    } else {
         TCGv_i32 t0, t1;
         t0 = tcg_temp_new_i32();
         t1 = tcg_temp_new_i32();
@@ -2012,7 +2081,6 @@
         tcg_gen_or_i32(ret, t0, t1);
         tcg_temp_free_i32(t0);
         tcg_temp_free_i32(t1);
-#endif
     }
 }
 
@@ -2021,12 +2089,11 @@
     /* some cases can be optimized here */
     if (arg2 == 0) {
         tcg_gen_mov_i64(ret, arg1);
-    } else {
-#ifdef TCG_TARGET_HAS_rot_i64
+    } else if (TCG_TARGET_HAS_rot_i64) {
         TCGv_i64 t0 = tcg_const_i64(arg2);
         tcg_gen_rotl_i64(ret, arg1, t0);
         tcg_temp_free_i64(t0);
-#else
+    } else {
         TCGv_i64 t0, t1;
         t0 = tcg_temp_new_i64();
         t1 = tcg_temp_new_i64();
@@ -2035,44 +2102,42 @@
         tcg_gen_or_i64(ret, t0, t1);
         tcg_temp_free_i64(t0);
         tcg_temp_free_i64(t1);
-#endif
     }
 }
 
 static inline void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
-#ifdef TCG_TARGET_HAS_rot_i32
-    tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
-#else
-    TCGv_i32 t0, t1;
+    if (TCG_TARGET_HAS_rot_i32) {
+        tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
+    } else {
+        TCGv_i32 t0, t1;
 
-    t0 = tcg_temp_new_i32();
-    t1 = tcg_temp_new_i32();
-    tcg_gen_shr_i32(t0, arg1, arg2);
-    tcg_gen_subfi_i32(t1, 32, arg2);
-    tcg_gen_shl_i32(t1, arg1, t1);
-    tcg_gen_or_i32(ret, t0, t1);
-    tcg_temp_free_i32(t0);
-    tcg_temp_free_i32(t1);
-#endif
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
+        tcg_gen_shr_i32(t0, arg1, arg2);
+        tcg_gen_subfi_i32(t1, 32, arg2);
+        tcg_gen_shl_i32(t1, arg1, t1);
+        tcg_gen_or_i32(ret, t0, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
 }
 
 static inline void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-#ifdef TCG_TARGET_HAS_rot_i64
-    tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
-#else
-    TCGv_i64 t0, t1;
-
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-    tcg_gen_shr_i64(t0, arg1, arg2);
-    tcg_gen_subfi_i64(t1, 64, arg2);
-    tcg_gen_shl_i64(t1, arg1, t1);
-    tcg_gen_or_i64(ret, t0, t1);
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-#endif
+    if (TCG_TARGET_HAS_rot_i64) {
+        tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
+    } else {
+        TCGv_i64 t0, t1;
+        t0 = tcg_temp_new_i64();
+        t1 = tcg_temp_new_i64();
+        tcg_gen_shr_i64(t0, arg1, arg2);
+        tcg_gen_subfi_i64(t1, 64, arg2);
+        tcg_gen_shl_i64(t1, arg1, t1);
+        tcg_gen_or_i64(ret, t0, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
 }
 
 static inline void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
@@ -2096,41 +2161,426 @@
 }
 
 static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1,
-				       TCGv_i32 arg2, unsigned int ofs,
-				       unsigned int len)
+                                       TCGv_i32 arg2, unsigned int ofs,
+                                       unsigned int len)
 {
-#ifdef TCG_TARGET_HAS_deposit_i32
-  tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
-#else
-  uint32_t mask = (1u << len) - 1;
-  TCGv_i32 t1 = tcg_temp_new_i32 ();
+    uint32_t mask;
+    TCGv_i32 t1;
 
-  tcg_gen_andi_i32(t1, arg2, mask);
-  tcg_gen_shli_i32(t1, t1, ofs);
-  tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
-  tcg_gen_or_i32(ret, ret, t1);
+    tcg_debug_assert(ofs < 32);
+    tcg_debug_assert(len <= 32);
+    tcg_debug_assert(ofs + len <= 32);
 
-  tcg_temp_free_i32(t1);
-#endif
+    if (ofs == 0 && len == 32) {
+        tcg_gen_mov_i32(ret, arg2);
+        return;
+    }
+    if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
+        tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
+        return;
+    }
+
+    mask = (1u << len) - 1;
+    t1 = tcg_temp_new_i32();
+
+    if (ofs + len < 32) {
+        tcg_gen_andi_i32(t1, arg2, mask);
+        tcg_gen_shli_i32(t1, t1, ofs);
+    } else {
+        tcg_gen_shli_i32(t1, arg2, ofs);
+    }
+    tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
+    tcg_gen_or_i32(ret, ret, t1);
+
+    tcg_temp_free_i32(t1);
 }
 
 static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
-				       TCGv_i64 arg2, unsigned int ofs,
-				       unsigned int len)
+                                       TCGv_i64 arg2, unsigned int ofs,
+                                       unsigned int len)
 {
-#ifdef TCG_TARGET_HAS_deposit_i64
-  tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
-#else
-  uint64_t mask = (1ull << len) - 1;
-  TCGv_i64 t1 = tcg_temp_new_i64 ();
+    uint64_t mask;
+    TCGv_i64 t1;
 
-  tcg_gen_andi_i64(t1, arg2, mask);
-  tcg_gen_shli_i64(t1, t1, ofs);
-  tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
-  tcg_gen_or_i64(ret, ret, t1);
+    tcg_debug_assert(ofs < 64);
+    tcg_debug_assert(len <= 64);
+    tcg_debug_assert(ofs + len <= 64);
 
-  tcg_temp_free_i64(t1);
+    if (ofs == 0 && len == 64) {
+        tcg_gen_mov_i64(ret, arg2);
+        return;
+    }
+    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
+        tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
+        return;
+    }
+
+#if TCG_TARGET_REG_BITS == 32
+    if (ofs >= 32) {
+        tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
+                            TCGV_LOW(arg2), ofs - 32, len);
+        tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
+        return;
+    }
+    if (ofs + len <= 32) {
+        tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
+                            TCGV_LOW(arg2), ofs, len);
+        tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
+        return;
+    }
 #endif
+
+    mask = (1ull << len) - 1;
+    t1 = tcg_temp_new_i64();
+
+    if (ofs + len < 64) {
+        tcg_gen_andi_i64(t1, arg2, mask);
+        tcg_gen_shli_i64(t1, t1, ofs);
+    } else {
+        tcg_gen_shli_i64(t1, arg2, ofs);
+    }
+    tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
+    tcg_gen_or_i64(ret, ret, t1);
+
+    tcg_temp_free_i64(t1);
+}
+
+static inline void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low,
+                                          TCGv_i32 high)
+{
+#if TCG_TARGET_REG_BITS == 32
+    tcg_gen_mov_i32(TCGV_LOW(dest), low);
+    tcg_gen_mov_i32(TCGV_HIGH(dest), high);
+#else
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    /* These extensions are only needed for type correctness.
+       We may be able to do better given target specific information.  */
+    tcg_gen_extu_i32_i64(tmp, high);
+    tcg_gen_extu_i32_i64(dest, low);
+    /* If deposit is available, use it.  Otherwise use the extra
+       knowledge that we have of the zero-extensions above.  */
+    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
+        tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
+    } else {
+        tcg_gen_shli_i64(tmp, tmp, 32);
+        tcg_gen_or_i64(dest, dest, tmp);
+    }
+    tcg_temp_free_i64(tmp);
+#endif
+}
+
+static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low,
+                                        TCGv_i64 high)
+{
+    tcg_gen_deposit_i64(dest, low, high, 32, 32);
+}
+
+static inline void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
+{
+#if TCG_TARGET_REG_BITS == 32
+    tcg_gen_mov_i32(lo, TCGV_LOW(arg));
+    tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
+#else
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    tcg_gen_trunc_i64_i32(lo, arg);
+    tcg_gen_shri_i64(t0, arg, 32);
+    tcg_gen_trunc_i64_i32(hi, t0);
+    tcg_temp_free_i64(t0);
+#endif
+}
+
+static inline void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
+{
+    tcg_gen_ext32u_i64(lo, arg);
+    tcg_gen_shri_i64(hi, arg, 32);
+}
+
+static inline void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret,
+                                       TCGv_i32 c1, TCGv_i32 c2,
+                                       TCGv_i32 v1, TCGv_i32 v2)
+{
+    if (TCG_TARGET_HAS_movcond_i32) {
+        tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
+    } else {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        tcg_gen_setcond_i32(cond, t0, c1, c2);
+        tcg_gen_neg_i32(t0, t0);
+        tcg_gen_and_i32(t1, v1, t0);
+        tcg_gen_andc_i32(ret, v2, t0);
+        tcg_gen_or_i32(ret, ret, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
+}
+
+static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret,
+                                       TCGv_i64 c1, TCGv_i64 c2,
+                                       TCGv_i64 v1, TCGv_i64 v2)
+{
+#if TCG_TARGET_REG_BITS == 32
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
+                     TCGV_LOW(c1), TCGV_HIGH(c1),
+                     TCGV_LOW(c2), TCGV_HIGH(c2), cond);
+
+    if (TCG_TARGET_HAS_movcond_i32) {
+        tcg_gen_movi_i32(t1, 0);
+        tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
+                            TCGV_LOW(v1), TCGV_LOW(v2));
+        tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
+                            TCGV_HIGH(v1), TCGV_HIGH(v2));
+    } else {
+        tcg_gen_neg_i32(t0, t0);
+
+        tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
+        tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
+        tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
+
+        tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
+        tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
+        tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
+    }
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t1);
+#else
+    if (TCG_TARGET_HAS_movcond_i64) {
+        tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_setcond_i64(cond, t0, c1, c2);
+        tcg_gen_neg_i64(t0, t0);
+        tcg_gen_and_i64(t1, v1, t0);
+        tcg_gen_andc_i64(ret, v2, t0);
+        tcg_gen_or_i64(ret, ret, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+#endif
+}
+
+static inline void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+                                    TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
+{
+    if (TCG_TARGET_HAS_add2_i32) {
+        tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
+        /* Allow the optimizer room to replace add2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_concat_i32_i64(t0, al, ah);
+        tcg_gen_concat_i32_i64(t1, bl, bh);
+        tcg_gen_add_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+static inline void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+                                    TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
+{
+    if (TCG_TARGET_HAS_sub2_i32) {
+        tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
+        /* Allow the optimizer room to replace sub2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_concat_i32_i64(t0, al, ah);
+        tcg_gen_concat_i32_i64(t1, bl, bh);
+        tcg_gen_sub_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh,
+                                     TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_mulu2_i32) {
+        tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
+        /* Allow the optimizer room to replace mulu2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else if (TCG_TARGET_HAS_muluh_i32) {
+        TCGv_i32 t = tcg_temp_new_i32();
+        tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
+        tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
+        tcg_gen_mov_i32(rl, t);
+        tcg_temp_free_i32(t);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(t0, arg1);
+        tcg_gen_extu_i32_i64(t1, arg2);
+        tcg_gen_mul_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh,
+                                     TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_muls2_i32) {
+        tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
+        /* Allow the optimizer room to replace muls2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else if (TCG_TARGET_HAS_mulsh_i32) {
+        TCGv_i32 t = tcg_temp_new_i32();
+        tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
+        tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
+        tcg_gen_mov_i32(rl, t);
+        tcg_temp_free_i32(t);
+    } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        TCGv_i32 t2 = tcg_temp_new_i32();
+        TCGv_i32 t3 = tcg_temp_new_i32();
+        tcg_gen_op4_i32(INDEX_op_mulu2_i32, t0, t1, arg1, arg2);
+        /* Allow the optimizer room to replace mulu2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+        /* Adjust for negative inputs.  */
+        tcg_gen_sari_i32(t2, arg1, 31);
+        tcg_gen_sari_i32(t3, arg2, 31);
+        tcg_gen_and_i32(t2, t2, arg2);
+        tcg_gen_and_i32(t3, t3, arg1);
+        tcg_gen_sub_i32(rh, t1, t2);
+        tcg_gen_sub_i32(rh, rh, t3);
+        tcg_gen_mov_i32(rl, t0);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+        tcg_temp_free_i32(t2);
+        tcg_temp_free_i32(t3);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_ext_i32_i64(t0, arg1);
+        tcg_gen_ext_i32_i64(t1, arg2);
+        tcg_gen_mul_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+static inline void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+                                    TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+    if (TCG_TARGET_HAS_add2_i64) {
+        tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
+        /* Allow the optimizer room to replace add2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_add_i64(t0, al, bl);
+        tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
+        tcg_gen_add_i64(rh, ah, bh);
+        tcg_gen_add_i64(rh, rh, t1);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+static inline void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+                                    TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+    if (TCG_TARGET_HAS_sub2_i64) {
+        tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
+        /* Allow the optimizer room to replace sub2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_sub_i64(t0, al, bl);
+        tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
+        tcg_gen_sub_i64(rh, ah, bh);
+        tcg_gen_sub_i64(rh, rh, t1);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh,
+                                     TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_HAS_mulu2_i64) {
+        tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
+        /* Allow the optimizer room to replace mulu2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else if (TCG_TARGET_HAS_muluh_i64) {
+        TCGv_i64 t = tcg_temp_new_i64();
+        tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
+        tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
+        tcg_gen_mov_i64(rl, t);
+        tcg_temp_free_i64(t);
+    } else if (TCG_TARGET_HAS_mulu2_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        TCGv_i64 t2 = tcg_temp_new_i64();
+        TCGv_i64 t3 = tcg_temp_new_i64();
+        tcg_gen_op4_i64(INDEX_op_mulu2_i64, t0, t1, arg1, arg2);
+        /* Allow the optimizer room to replace mulu2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+        /* Adjust for negative inputs.  */
+        tcg_gen_sari_i64(t2, arg1, 63);
+        tcg_gen_sari_i64(t3, arg2, 63);
+        tcg_gen_and_i64(t2, t2, arg2);
+        tcg_gen_and_i64(t3, t3, arg1);
+        tcg_gen_sub_i64(rh, t1, t2);
+        tcg_gen_sub_i64(rh, rh, t3);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+        tcg_temp_free_i64(t2);
+        tcg_temp_free_i64(t3);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        int sizemask = 0;
+        /* Return value and both arguments are 64-bit and unsigned.  */
+        sizemask |= tcg_gen_sizemask(0, 1, 0);
+        sizemask |= tcg_gen_sizemask(1, 1, 0);
+        sizemask |= tcg_gen_sizemask(2, 1, 0);
+        tcg_gen_mul_i64(t0, arg1, arg2);
+        tcg_gen_helper64(tcg_helper_muluh_i64, sizemask, rh, arg1, arg2);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh,
+                                     TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_HAS_muls2_i64) {
+        tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
+        /* Allow the optimizer room to replace muls2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else if (TCG_TARGET_HAS_mulsh_i64) {
+        TCGv_i64 t = tcg_temp_new_i64();
+        tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
+        tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
+        tcg_gen_mov_i64(rl, t);
+        tcg_temp_free_i64(t);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        int sizemask = 0;
+        /* Return value and both arguments are 64-bit and signed.  */
+        sizemask |= tcg_gen_sizemask(0, 1, 1);
+        sizemask |= tcg_gen_sizemask(1, 1, 1);
+        sizemask |= tcg_gen_sizemask(2, 1, 1);
+        tcg_gen_mul_i64(t0, arg1, arg2);
+        tcg_gen_helper64(tcg_helper_mulsh_i64, sizemask, rh, arg1, arg2);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+    }
 }
 
 /***************************************/
@@ -2147,10 +2597,12 @@
 #define tcg_global_mem_new tcg_global_mem_new_i32
 #define tcg_temp_local_new() tcg_temp_local_new_i32()
 #define tcg_temp_free tcg_temp_free_i32
-#define tcg_gen_qemu_ldst_op tcg_gen_op3i_i32
-#define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i32
 #define TCGV_UNUSED(x) TCGV_UNUSED_I32(x)
+#define TCGV_IS_UNUSED(x) TCGV_IS_UNUSED_I32(x)
 #define TCGV_EQUAL(a, b) TCGV_EQUAL_I32(a, b)
+#define tcg_add_param_tl tcg_add_param_i32
+#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
+#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
 #else
 #define TCGv TCGv_i64
 #define tcg_temp_new() tcg_temp_new_i64()
@@ -2158,10 +2610,12 @@
 #define tcg_global_mem_new tcg_global_mem_new_i64
 #define tcg_temp_local_new() tcg_temp_local_new_i64()
 #define tcg_temp_free tcg_temp_free_i64
-#define tcg_gen_qemu_ldst_op tcg_gen_op3i_i64
-#define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i64
 #define TCGV_UNUSED(x) TCGV_UNUSED_I64(x)
+#define TCGV_IS_UNUSED(x) TCGV_IS_UNUSED_I64(x)
 #define TCGV_EQUAL(a, b) TCGV_EQUAL_I64(a, b)
+#define tcg_add_param_tl tcg_add_param_i64
+#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
+#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
 #endif
 
 /* debug info: write the PC of the corresponding QEMU CPU instruction */
@@ -2169,214 +2623,91 @@
 {
     /* XXX: must really use a 32 bit size for TCGArg in all cases */
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-    tcg_gen_op2ii(INDEX_op_debug_insn_start,
+    tcg_gen_op2ii(INDEX_op_debug_insn_start, 
                   (uint32_t)(pc), (uint32_t)(pc >> 32));
 #else
     tcg_gen_op1i(INDEX_op_debug_insn_start, pc);
 #endif
 }
 
-static inline void tcg_gen_exit_tb(tcg_target_long val)
+static inline void tcg_gen_exit_tb(uintptr_t val)
 {
     tcg_gen_op1i(INDEX_op_exit_tb, val);
 }
 
-static inline void tcg_gen_goto_tb(int idx)
+static inline void tcg_gen_goto_tb(unsigned idx)
 {
+    /* We only support two chained exits.  */
+    tcg_debug_assert(idx <= 1);
+#ifdef CONFIG_DEBUG_TCG
+    /* Verify that we havn't seen this numbered exit before.  */
+    tcg_debug_assert((tcg_ctx.goto_tb_issue_mask & (1 << idx)) == 0);
+    tcg_ctx.goto_tb_issue_mask |= 1 << idx;
+#endif
     tcg_gen_op1i(INDEX_op_goto_tb, idx);
 }
 
-#if TCG_TARGET_REG_BITS == 32
+
+void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, TCGMemOp);
+void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, TCGMemOp);
+void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, TCGMemOp);
+void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, TCGMemOp);
+
 static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
 {
-#if TARGET_LONG_BITS == 32
-    tcg_gen_op3i_i32(INDEX_op_qemu_ld8u, ret, addr, mem_index);
-#else
-    tcg_gen_op4i_i32(INDEX_op_qemu_ld8u, TCGV_LOW(ret), TCGV_LOW(addr),
-                     TCGV_HIGH(addr), mem_index);
-    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
-#endif
+    tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_UB);
 }
 
 static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index)
 {
-#if TARGET_LONG_BITS == 32
-    tcg_gen_op3i_i32(INDEX_op_qemu_ld8s, ret, addr, mem_index);
-#else
-    tcg_gen_op4i_i32(INDEX_op_qemu_ld8s, TCGV_LOW(ret), TCGV_LOW(addr),
-                     TCGV_HIGH(addr), mem_index);
-    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
-#endif
+    tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_SB);
 }
 
 static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index)
 {
-#if TARGET_LONG_BITS == 32
-    tcg_gen_op3i_i32(INDEX_op_qemu_ld16u, ret, addr, mem_index);
-#else
-    tcg_gen_op4i_i32(INDEX_op_qemu_ld16u, TCGV_LOW(ret), TCGV_LOW(addr),
-                     TCGV_HIGH(addr), mem_index);
-    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
-#endif
+    tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TEUW);
 }
 
 static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index)
 {
-#if TARGET_LONG_BITS == 32
-    tcg_gen_op3i_i32(INDEX_op_qemu_ld16s, ret, addr, mem_index);
-#else
-    tcg_gen_op4i_i32(INDEX_op_qemu_ld16s, TCGV_LOW(ret), TCGV_LOW(addr),
-                     TCGV_HIGH(addr), mem_index);
-    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
-#endif
+    tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TESW);
 }
 
 static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index)
 {
-#if TARGET_LONG_BITS == 32
-    tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index);
-#else
-    tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr),
-                     TCGV_HIGH(addr), mem_index);
-    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
-#endif
+    tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TEUL);
 }
 
 static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index)
 {
-#if TARGET_LONG_BITS == 32
-    tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index);
-#else
-    tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr),
-                     TCGV_HIGH(addr), mem_index);
-    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
-#endif
+    tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TESL);
 }
 
 static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index)
 {
-#if TARGET_LONG_BITS == 32
-    tcg_gen_op4i_i32(INDEX_op_qemu_ld64, TCGV_LOW(ret), TCGV_HIGH(ret), addr, mem_index);
-#else
-    tcg_gen_op5i_i32(INDEX_op_qemu_ld64, TCGV_LOW(ret), TCGV_HIGH(ret),
-                     TCGV_LOW(addr), TCGV_HIGH(addr), mem_index);
-#endif
+    tcg_gen_qemu_ld_i64(ret, addr, mem_index, MO_TEQ);
 }
 
 static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index)
 {
-#if TARGET_LONG_BITS == 32
-    tcg_gen_op3i_i32(INDEX_op_qemu_st8, arg, addr, mem_index);
-#else
-    tcg_gen_op4i_i32(INDEX_op_qemu_st8, TCGV_LOW(arg), TCGV_LOW(addr),
-                     TCGV_HIGH(addr), mem_index);
-#endif
+    tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_UB);
 }
 
 static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index)
 {
-#if TARGET_LONG_BITS == 32
-    tcg_gen_op3i_i32(INDEX_op_qemu_st16, arg, addr, mem_index);
-#else
-    tcg_gen_op4i_i32(INDEX_op_qemu_st16, TCGV_LOW(arg), TCGV_LOW(addr),
-                     TCGV_HIGH(addr), mem_index);
-#endif
+    tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_TEUW);
 }
 
 static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index)
 {
-#if TARGET_LONG_BITS == 32
-    tcg_gen_op3i_i32(INDEX_op_qemu_st32, arg, addr, mem_index);
-#else
-    tcg_gen_op4i_i32(INDEX_op_qemu_st32, TCGV_LOW(arg), TCGV_LOW(addr),
-                     TCGV_HIGH(addr), mem_index);
-#endif
+    tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_TEUL);
 }
 
 static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 {
-#if TARGET_LONG_BITS == 32
-    tcg_gen_op4i_i32(INDEX_op_qemu_st64, TCGV_LOW(arg), TCGV_HIGH(arg), addr,
-                     mem_index);
-#else
-    tcg_gen_op5i_i32(INDEX_op_qemu_st64, TCGV_LOW(arg), TCGV_HIGH(arg),
-                     TCGV_LOW(addr), TCGV_HIGH(addr), mem_index);
-#endif
+    tcg_gen_qemu_st_i64(arg, addr, mem_index, MO_TEQ);
 }
 
-#define tcg_gen_ld_ptr tcg_gen_ld_i32
-#define tcg_gen_discard_ptr tcg_gen_discard_i32
-
-#else /* TCG_TARGET_REG_BITS == 32 */
-
-static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
-{
-    tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld8u, ret, addr, mem_index);
-}
-
-static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index)
-{
-    tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld8s, ret, addr, mem_index);
-}
-
-static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index)
-{
-    tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld16u, ret, addr, mem_index);
-}
-
-static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index)
-{
-    tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld16s, ret, addr, mem_index);
-}
-
-static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index)
-{
-#if TARGET_LONG_BITS == 32
-    tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index);
-#else
-    tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32u, ret, addr, mem_index);
-#endif
-}
-
-static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index)
-{
-#if TARGET_LONG_BITS == 32
-    tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index);
-#else
-    tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32s, ret, addr, mem_index);
-#endif
-}
-
-static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index)
-{
-    tcg_gen_qemu_ldst_op_i64(INDEX_op_qemu_ld64, ret, addr, mem_index);
-}
-
-static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index)
-{
-    tcg_gen_qemu_ldst_op(INDEX_op_qemu_st8, arg, addr, mem_index);
-}
-
-static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index)
-{
-    tcg_gen_qemu_ldst_op(INDEX_op_qemu_st16, arg, addr, mem_index);
-}
-
-static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index)
-{
-    tcg_gen_qemu_ldst_op(INDEX_op_qemu_st32, arg, addr, mem_index);
-}
-
-static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
-{
-    tcg_gen_qemu_ldst_op_i64(INDEX_op_qemu_st64, arg, addr, mem_index);
-}
-
-#define tcg_gen_ld_ptr tcg_gen_ld_i64
-#define tcg_gen_discard_ptr tcg_gen_discard_i64
-
-#endif /* TCG_TARGET_REG_BITS != 32 */
-
 #if TARGET_LONG_BITS == 64
 #define tcg_gen_movi_tl tcg_gen_movi_i64
 #define tcg_gen_mov_tl tcg_gen_mov_i64
@@ -2437,6 +2768,7 @@
 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i64
 #define tcg_gen_bswap64_tl tcg_gen_bswap64_i64
 #define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64
+#define tcg_gen_extr_i64_tl tcg_gen_extr32_i64
 #define tcg_gen_andc_tl tcg_gen_andc_i64
 #define tcg_gen_eqv_tl tcg_gen_eqv_i64
 #define tcg_gen_nand_tl tcg_gen_nand_i64
@@ -2449,6 +2781,11 @@
 #define tcg_gen_deposit_tl tcg_gen_deposit_i64
 #define tcg_const_tl tcg_const_i64
 #define tcg_const_local_tl tcg_const_local_i64
+#define tcg_gen_movcond_tl tcg_gen_movcond_i64
+#define tcg_gen_add2_tl tcg_gen_add2_i64
+#define tcg_gen_sub2_tl tcg_gen_sub2_i64
+#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
+#define tcg_gen_muls2_tl tcg_gen_muls2_i64
 #else
 #define tcg_gen_movi_tl tcg_gen_movi_i32
 #define tcg_gen_mov_tl tcg_gen_mov_i32
@@ -2508,6 +2845,7 @@
 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i32
 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
+#define tcg_gen_extr_tl_i64 tcg_gen_extr_i32_i64
 #define tcg_gen_andc_tl tcg_gen_andc_i32
 #define tcg_gen_eqv_tl tcg_gen_eqv_i32
 #define tcg_gen_nand_tl tcg_gen_nand_i32
@@ -2520,14 +2858,33 @@
 #define tcg_gen_deposit_tl tcg_gen_deposit_i32
 #define tcg_const_tl tcg_const_i32
 #define tcg_const_local_tl tcg_const_local_i32
+#define tcg_gen_movcond_tl tcg_gen_movcond_i32
+#define tcg_gen_add2_tl tcg_gen_add2_i32
+#define tcg_gen_sub2_tl tcg_gen_sub2_i32
+#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
+#define tcg_gen_muls2_tl tcg_gen_muls2_i32
 #endif
 
 #if TCG_TARGET_REG_BITS == 32
-#define tcg_gen_add_ptr tcg_gen_add_i32
-#define tcg_gen_addi_ptr tcg_gen_addi_i32
-#define tcg_gen_ext_i32_ptr tcg_gen_mov_i32
-#else /* TCG_TARGET_REG_BITS == 32 */
-#define tcg_gen_add_ptr tcg_gen_add_i64
-#define tcg_gen_addi_ptr tcg_gen_addi_i64
-#define tcg_gen_ext_i32_ptr tcg_gen_ext_i32_i64
-#endif /* TCG_TARGET_REG_BITS != 32 */
+# define tcg_gen_ld_ptr(R, A, O) \
+    tcg_gen_ld_i32(TCGV_PTR_TO_NAT(R), (A), (O))
+# define tcg_gen_discard_ptr(A) \
+    tcg_gen_discard_i32(TCGV_PTR_TO_NAT(A))
+# define tcg_gen_add_ptr(R, A, B) \
+    tcg_gen_add_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), TCGV_PTR_TO_NAT(B))
+# define tcg_gen_addi_ptr(R, A, B) \
+    tcg_gen_addi_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B))
+# define tcg_gen_ext_i32_ptr(R, A) \
+    tcg_gen_mov_i32(TCGV_PTR_TO_NAT(R), (A))
+#else
+# define tcg_gen_ld_ptr(R, A, O) \
+    tcg_gen_ld_i64(TCGV_PTR_TO_NAT(R), (A), (O))
+# define tcg_gen_discard_ptr(A) \
+    tcg_gen_discard_i64(TCGV_PTR_TO_NAT(A))
+# define tcg_gen_add_ptr(R, A, B) \
+    tcg_gen_add_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), TCGV_PTR_TO_NAT(B))
+# define tcg_gen_addi_ptr(R, A, B) \
+    tcg_gen_addi_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B))
+# define tcg_gen_ext_i32_ptr(R, A) \
+    tcg_gen_ext_i32_i64(TCGV_PTR_TO_NAT(R), (A))
+#endif /* TCG_TARGET_REG_BITS == 32 */
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 1b8a6e4..d71707d 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -27,46 +27,53 @@
  */
 
 /* predefined ops */
-DEF(end, 0, 0, 0, 0) /* must be kept first */
-DEF(nop, 0, 0, 0, 0)
-DEF(nop1, 0, 0, 1, 0)
-DEF(nop2, 0, 0, 2, 0)
-DEF(nop3, 0, 0, 3, 0)
-DEF(nopn, 0, 0, 1, 0) /* variable number of parameters */
+DEF(end, 0, 0, 0, TCG_OPF_NOT_PRESENT) /* must be kept first */
+DEF(nop, 0, 0, 0, TCG_OPF_NOT_PRESENT)
+DEF(nop1, 0, 0, 1, TCG_OPF_NOT_PRESENT)
+DEF(nop2, 0, 0, 2, TCG_OPF_NOT_PRESENT)
+DEF(nop3, 0, 0, 3, TCG_OPF_NOT_PRESENT)
 
-DEF(discard, 1, 0, 0, 0)
+/* variable number of parameters */
+DEF(nopn, 0, 0, 1, TCG_OPF_NOT_PRESENT)
 
-DEF(set_label, 0, 0, 1, 0)
-DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */
-DEF(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
-DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT)
+DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
+
+/* variable number of parameters */
+DEF(call, 0, 1, 2, TCG_OPF_CALL_CLOBBER)
+
+DEF(br, 0, 0, 1, TCG_OPF_BB_END)
+
+#define IMPL(X) (__builtin_constant_p(X) && !(X) ? TCG_OPF_NOT_PRESENT : 0)
+#if TCG_TARGET_REG_BITS == 32
+# define IMPL64  TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT
+#else
+# define IMPL64  TCG_OPF_64BIT
+#endif
 
 DEF(mov_i32, 1, 1, 0, 0)
 DEF(movi_i32, 1, 0, 1, 0)
 DEF(setcond_i32, 1, 2, 1, 0)
+DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32))
 /* load/store */
 DEF(ld8u_i32, 1, 1, 1, 0)
 DEF(ld8s_i32, 1, 1, 1, 0)
 DEF(ld16u_i32, 1, 1, 1, 0)
 DEF(ld16s_i32, 1, 1, 1, 0)
 DEF(ld_i32, 1, 1, 1, 0)
-DEF(st8_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF(st16_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF(st_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st8_i32, 0, 2, 1, 0)
+DEF(st16_i32, 0, 2, 1, 0)
+DEF(st_i32, 0, 2, 1, 0)
 /* arith */
 DEF(add_i32, 1, 2, 0, 0)
 DEF(sub_i32, 1, 2, 0, 0)
 DEF(mul_i32, 1, 2, 0, 0)
-#ifdef TCG_TARGET_HAS_div_i32
-DEF(div_i32, 1, 2, 0, 0)
-DEF(divu_i32, 1, 2, 0, 0)
-DEF(rem_i32, 1, 2, 0, 0)
-DEF(remu_i32, 1, 2, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_div2_i32
-DEF(div2_i32, 2, 3, 0, 0)
-DEF(divu2_i32, 2, 3, 0, 0)
-#endif
+DEF(div_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32))
+DEF(divu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32))
+DEF(rem_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32))
+DEF(remu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32))
+DEF(div2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32))
+DEF(divu2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32))
 DEF(and_i32, 1, 2, 0, 0)
 DEF(or_i32, 1, 2, 0, 0)
 DEF(xor_i32, 1, 2, 0, 0)
@@ -74,242 +81,206 @@
 DEF(shl_i32, 1, 2, 0, 0)
 DEF(shr_i32, 1, 2, 0, 0)
 DEF(sar_i32, 1, 2, 0, 0)
-#ifdef TCG_TARGET_HAS_rot_i32
-DEF(rotl_i32, 1, 2, 0, 0)
-DEF(rotr_i32, 1, 2, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_deposit_i32
-DEF(deposit_i32, 1, 2, 2, 0)
-#endif
+DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
+DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
+DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32))
 
-DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
-#if TCG_TARGET_REG_BITS == 32
-DEF(add2_i32, 2, 4, 0, 0)
-DEF(sub2_i32, 2, 4, 0, 0)
-DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
-DEF(mulu2_i32, 2, 2, 0, 0)
-DEF(setcond2_i32, 1, 4, 1, 0)
-#endif
-#ifdef TCG_TARGET_HAS_ext8s_i32
-DEF(ext8s_i32, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_ext16s_i32
-DEF(ext16s_i32, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_ext8u_i32
-DEF(ext8u_i32, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_ext16u_i32
-DEF(ext16u_i32, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_bswap16_i32
-DEF(bswap16_i32, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_bswap32_i32
-DEF(bswap32_i32, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_not_i32
-DEF(not_i32, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_neg_i32
-DEF(neg_i32, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_andc_i32
-DEF(andc_i32, 1, 2, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_orc_i32
-DEF(orc_i32, 1, 2, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_eqv_i32
-DEF(eqv_i32, 1, 2, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_nand_i32
-DEF(nand_i32, 1, 2, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_nor_i32
-DEF(nor_i32, 1, 2, 0, 0)
-#endif
+DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END)
 
-#if TCG_TARGET_REG_BITS == 64
-DEF(mov_i64, 1, 1, 0, 0)
-DEF(movi_i64, 1, 0, 1, 0)
-DEF(setcond_i64, 1, 2, 1, 0)
+DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
+DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
+DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
+DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
+DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
+DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
+DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
+DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
+
+DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
+DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32))
+DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32))
+DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32))
+DEF(bswap16_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap16_i32))
+DEF(bswap32_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap32_i32))
+DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32))
+DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32))
+DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32))
+DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32))
+DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32))
+DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32))
+DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
+
+DEF(mov_i64, 1, 1, 0, IMPL64)
+DEF(movi_i64, 1, 0, 1, IMPL64)
+DEF(setcond_i64, 1, 2, 1, IMPL64)
+DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64))
 /* load/store */
-DEF(ld8u_i64, 1, 1, 1, 0)
-DEF(ld8s_i64, 1, 1, 1, 0)
-DEF(ld16u_i64, 1, 1, 1, 0)
-DEF(ld16s_i64, 1, 1, 1, 0)
-DEF(ld32u_i64, 1, 1, 1, 0)
-DEF(ld32s_i64, 1, 1, 1, 0)
-DEF(ld_i64, 1, 1, 1, 0)
-DEF(st8_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF(st16_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF(st32_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF(st_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(ld8u_i64, 1, 1, 1, IMPL64)
+DEF(ld8s_i64, 1, 1, 1, IMPL64)
+DEF(ld16u_i64, 1, 1, 1, IMPL64)
+DEF(ld16s_i64, 1, 1, 1, IMPL64)
+DEF(ld32u_i64, 1, 1, 1, IMPL64)
+DEF(ld32s_i64, 1, 1, 1, IMPL64)
+DEF(ld_i64, 1, 1, 1, IMPL64)
+DEF(st8_i64, 0, 2, 1, IMPL64)
+DEF(st16_i64, 0, 2, 1, IMPL64)
+DEF(st32_i64, 0, 2, 1, IMPL64)
+DEF(st_i64, 0, 2, 1, IMPL64)
 /* arith */
-DEF(add_i64, 1, 2, 0, 0)
-DEF(sub_i64, 1, 2, 0, 0)
-DEF(mul_i64, 1, 2, 0, 0)
-#ifdef TCG_TARGET_HAS_div_i64
-DEF(div_i64, 1, 2, 0, 0)
-DEF(divu_i64, 1, 2, 0, 0)
-DEF(rem_i64, 1, 2, 0, 0)
-DEF(remu_i64, 1, 2, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_div2_i64
-DEF(div2_i64, 2, 3, 0, 0)
-DEF(divu2_i64, 2, 3, 0, 0)
-#endif
-DEF(and_i64, 1, 2, 0, 0)
-DEF(or_i64, 1, 2, 0, 0)
-DEF(xor_i64, 1, 2, 0, 0)
+DEF(add_i64, 1, 2, 0, IMPL64)
+DEF(sub_i64, 1, 2, 0, IMPL64)
+DEF(mul_i64, 1, 2, 0, IMPL64)
+DEF(div_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64))
+DEF(divu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64))
+DEF(rem_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64))
+DEF(remu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64))
+DEF(div2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64))
+DEF(divu2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64))
+DEF(and_i64, 1, 2, 0, IMPL64)
+DEF(or_i64, 1, 2, 0, IMPL64)
+DEF(xor_i64, 1, 2, 0, IMPL64)
 /* shifts/rotates */
-DEF(shl_i64, 1, 2, 0, 0)
-DEF(shr_i64, 1, 2, 0, 0)
-DEF(sar_i64, 1, 2, 0, 0)
-#ifdef TCG_TARGET_HAS_rot_i64
-DEF(rotl_i64, 1, 2, 0, 0)
-DEF(rotr_i64, 1, 2, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_deposit_i64
-DEF(deposit_i64, 1, 2, 2, 0)
-#endif
+DEF(shl_i64, 1, 2, 0, IMPL64)
+DEF(shr_i64, 1, 2, 0, IMPL64)
+DEF(sar_i64, 1, 2, 0, IMPL64)
+DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
+DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
+DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
 
-DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
-#ifdef TCG_TARGET_HAS_ext8s_i64
-DEF(ext8s_i64, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_ext16s_i64
-DEF(ext16s_i64, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_ext32s_i64
-DEF(ext32s_i64, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_ext8u_i64
-DEF(ext8u_i64, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_ext16u_i64
-DEF(ext16u_i64, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_ext32u_i64
-DEF(ext32u_i64, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_bswap16_i64
-DEF(bswap16_i64, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_bswap32_i64
-DEF(bswap32_i64, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_bswap64_i64
-DEF(bswap64_i64, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_not_i64
-DEF(not_i64, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_neg_i64
-DEF(neg_i64, 1, 1, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_andc_i64
-DEF(andc_i64, 1, 2, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_orc_i64
-DEF(orc_i64, 1, 2, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_eqv_i64
-DEF(eqv_i64, 1, 2, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_nand_i64
-DEF(nand_i64, 1, 2, 0, 0)
-#endif
-#ifdef TCG_TARGET_HAS_nor_i64
-DEF(nor_i64, 1, 2, 0, 0)
-#endif
-#endif
+DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64)
+DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64))
+DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64))
+DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
+DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64))
+DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64))
+DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64))
+DEF(bswap16_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
+DEF(bswap32_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
+DEF(bswap64_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
+DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64))
+DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64))
+DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64))
+DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64))
+DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64))
+DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64))
+DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
+
+DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
+DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
+DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
+DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
+DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64))
+DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64))
 
 /* QEMU specific */
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-DEF(debug_insn_start, 0, 0, 2, 0)
+DEF(debug_insn_start, 0, 0, 2, TCG_OPF_NOT_PRESENT)
 #else
-DEF(debug_insn_start, 0, 0, 1, 0)
+DEF(debug_insn_start, 0, 0, 1, TCG_OPF_NOT_PRESENT)
 #endif
-DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
-DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
-/* Note: even if TARGET_LONG_BITS is not defined, the INDEX_op
-   constants must be defined */
+DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
+DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
+
+#define IMPL_NEW_LDST \
+    (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS \
+     | IMPL(TCG_TARGET_HAS_new_ldst))
+
+#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+DEF(qemu_ld_i32, 1, 1, 2, IMPL_NEW_LDST)
+DEF(qemu_st_i32, 0, 2, 2, IMPL_NEW_LDST)
+# if TCG_TARGET_REG_BITS == 64
+DEF(qemu_ld_i64, 1, 1, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
+DEF(qemu_st_i64, 0, 2, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
+# else
+DEF(qemu_ld_i64, 2, 1, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
+DEF(qemu_st_i64, 0, 3, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
+# endif
+#else
+DEF(qemu_ld_i32, 1, 2, 2, IMPL_NEW_LDST)
+DEF(qemu_st_i32, 0, 3, 2, IMPL_NEW_LDST)
+DEF(qemu_ld_i64, 2, 2, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
+DEF(qemu_st_i64, 0, 4, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
+#endif
+
+#undef IMPL_NEW_LDST
+
+#define IMPL_OLD_LDST \
+    (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS \
+     | IMPL(!TCG_TARGET_HAS_new_ldst))
+
 #if TCG_TARGET_REG_BITS == 32
 #if TARGET_LONG_BITS == 32
-DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld8u, 1, 1, 1, IMPL_OLD_LDST)
 #else
-DEF(qemu_ld8u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld8u, 1, 2, 1, IMPL_OLD_LDST)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld8s, 1, 1, 1, IMPL_OLD_LDST)
 #else
-DEF(qemu_ld8s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld8s, 1, 2, 1, IMPL_OLD_LDST)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld16u, 1, 1, 1, IMPL_OLD_LDST)
 #else
-DEF(qemu_ld16u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld16u, 1, 2, 1, IMPL_OLD_LDST)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld16s, 1, 1, 1, IMPL_OLD_LDST)
 #else
-DEF(qemu_ld16s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld16s, 1, 2, 1, IMPL_OLD_LDST)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld32, 1, 1, 1, IMPL_OLD_LDST)
 #else
-DEF(qemu_ld32, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld32, 1, 2, 1, IMPL_OLD_LDST)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld64, 2, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
 #else
-DEF(qemu_ld32s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld64, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-#else
-DEF(qemu_ld64, 2, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld64, 2, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
 #endif
 
 #if TARGET_LONG_BITS == 32
-DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st8, 0, 2, 1, IMPL_OLD_LDST)
 #else
-DEF(qemu_st8, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st8, 0, 3, 1, IMPL_OLD_LDST)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st16, 0, 2, 1, IMPL_OLD_LDST)
 #else
-DEF(qemu_st16, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st16, 0, 3, 1, IMPL_OLD_LDST)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st32, 0, 2, 1, IMPL_OLD_LDST)
 #else
-DEF(qemu_st32, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st32, 0, 3, 1, IMPL_OLD_LDST)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF(qemu_st64, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st64, 0, 3, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
 #else
-DEF(qemu_st64, 0, 4, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st64, 0, 4, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
 #endif
 
 #else /* TCG_TARGET_REG_BITS == 32 */
 
-DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_ld32u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_ld64, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld8u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
+DEF(qemu_ld8s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
+DEF(qemu_ld16u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
+DEF(qemu_ld16s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
+DEF(qemu_ld32, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
+DEF(qemu_ld32u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
+DEF(qemu_ld32s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
+DEF(qemu_ld64, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
 
-DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF(qemu_st64, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st8, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
+DEF(qemu_st16, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
+DEF(qemu_st32, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
+DEF(qemu_st64, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
 
 #endif /* TCG_TARGET_REG_BITS != 32 */
 
+#undef IMPL_OLD_LDST
+
+#undef IMPL
+#undef IMPL64
 #undef DEF
diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
index 5615b13..a1ebef9 100644
--- a/tcg/tcg-runtime.h
+++ b/tcg/tcg-runtime.h
@@ -12,7 +12,9 @@
 int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2);
 int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2);
 int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2);
+int64_t tcg_helper_mulsh_i64(int64_t arg1, int64_t arg2);
 uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2);
 uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2);
+uint64_t tcg_helper_muluh_i64(uint64_t arg1, uint64_t arg2);
 
 #endif
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 921e772..712438c 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -24,26 +24,18 @@
 
 /* define it to use liveness analysis (better code) */
 #define USE_LIVENESS_ANALYSIS
+#define USE_TCG_OPTIMIZATIONS
 
 #include "config.h"
 
+/* Define to jump the ELF file used to communicate with GDB.  */
+#undef DEBUG_JIT
+
 #if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
 /* define it to suppress various consistency checks (faster) */
 #define NDEBUG
 #endif
 
-#include <stdarg.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <inttypes.h>
-#ifdef _WIN32
-#include <malloc.h>
-#endif
-#ifdef _AIX
-#include <alloca.h>
-#endif
-
 #include "qemu-common.h"
 #include "qemu/cache-utils.h"
 #include "qemu/host-utils.h"
@@ -54,43 +46,77 @@
    instructions */
 #define NO_CPU_IO_DEFS
 #include "cpu.h"
-#include "exec/exec-all.h"
 
 #include "tcg-op.h"
-#include "elf.h"
 
-#if defined(CONFIG_USE_GUEST_BASE) && !defined(TCG_TARGET_HAS_GUEST_BASE)
-#error GUEST_BASE not supported on this host.
+#if UINTPTR_MAX == UINT32_MAX
+# define ELF_CLASS  ELFCLASS32
+#else
+# define ELF_CLASS  ELFCLASS64
+#endif
+#ifdef HOST_WORDS_BIGENDIAN
+# define ELF_DATA   ELFDATA2MSB
+#else
+# define ELF_DATA   ELFDATA2LSB
 #endif
 
+#include "elf.h"
+
+/* Forward declarations for functions declared in tcg-target.c and used here. */
 static void tcg_target_init(TCGContext *s);
 static void tcg_target_qemu_prologue(TCGContext *s);
-static void patch_reloc(uint8_t *code_ptr, int type,
-                        tcg_target_long value, tcg_target_long addend);
+static void patch_reloc(uint8_t *code_ptr, int type, 
+                        intptr_t value, intptr_t addend);
 
-static TCGOpDef tcg_op_defs[] = {
+/* The CIE and FDE header definitions will be common to all hosts.  */
+typedef struct {
+    uint32_t len __attribute__((aligned((sizeof(void *)))));
+    uint32_t id;
+    uint8_t version;
+    char augmentation[1];
+    uint8_t code_align;
+    uint8_t data_align;
+    uint8_t return_column;
+} DebugFrameCIE;
+
+typedef struct QEMU_PACKED {
+    uint32_t len __attribute__((aligned((sizeof(void *)))));
+    uint32_t cie_offset;
+    uintptr_t func_start;
+    uintptr_t func_len;
+} DebugFrameFDEHeader;
+
+static void tcg_register_jit_int(void *buf, size_t size,
+                                 void *debug_frame, size_t debug_frame_size)
+    __attribute__((unused));
+
+/* Forward declarations for functions declared and used in tcg-target.c. */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str);
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
+                       intptr_t arg2);
+static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
+static void tcg_out_movi(TCGContext *s, TCGType type,
+                         TCGReg ret, tcg_target_long arg);
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
+                       const int *const_args);
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
+                       intptr_t arg2);
+static int tcg_target_const_match(tcg_target_long val,
+                                  const TCGArgConstraint *arg_ct);
+static void tcg_out_tb_init(TCGContext *s);
+static void tcg_out_tb_finalize(TCGContext *s);
+
+
+TCGOpDef tcg_op_defs[] = {
 #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
 #include "tcg-opc.h"
 #undef DEF
 };
+const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs);
 
 static TCGRegSet tcg_target_available_regs[2];
 static TCGRegSet tcg_target_call_clobber_regs;
 
-/* XXX: move that inside the context */
-uint16_t *gen_opc_ptr;
-TCGArg *gen_opparam_ptr;
-
-#ifdef CONFIG_MEMCHECK
-/*
- * Memchecker addition in this module is intended to build a map that matches
- * translated PC to a guest PC. Map is built in tcg_gen_code_common routine,
- * and is saved into temporary gen_opc_tpc2gpc_ptr array, that later will be
- * copied into the TranslationBlock that represents the translated code.
- */
-#include "memcheck/memcheck_api.h"
-#endif  // CONFIG_MEMCHECK
-
 static inline void tcg_out8(TCGContext *s, uint8_t v)
 {
     *s->code_ptr++ = v;
@@ -98,20 +124,29 @@
 
 static inline void tcg_out16(TCGContext *s, uint16_t v)
 {
-    *(uint16_t *)s->code_ptr = v;
-    s->code_ptr += 2;
+    uint8_t *p = s->code_ptr;
+    *(uint16_t *)p = v;
+    s->code_ptr = p + 2;
 }
 
 static inline void tcg_out32(TCGContext *s, uint32_t v)
 {
-    *(uint32_t *)s->code_ptr = v;
-    s->code_ptr += 4;
+    uint8_t *p = s->code_ptr;
+    *(uint32_t *)p = v;
+    s->code_ptr = p + 4;
+}
+
+static inline void tcg_out64(TCGContext *s, uint64_t v)
+{
+    uint8_t *p = s->code_ptr;
+    *(uint64_t *)p = v;
+    s->code_ptr = p + 8;
 }
 
 /* label relocation processing */
 
 static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type,
-                   int label_index, long addend)
+                          int label_index, intptr_t addend)
 {
     TCGLabel *l;
     TCGRelocation *r;
@@ -119,7 +154,7 @@
     l = &s->labels[label_index];
     if (l->has_value) {
         /* FIXME: This may break relocations on RISC targets that
-           modify instruction fields in place.  The caller may not have
+           modify instruction fields in place.  The caller may not have 
            written the initial value.  */
         patch_reloc(code_ptr, type, l->u.value, addend);
     } else {
@@ -133,15 +168,16 @@
     }
 }
 
-static void tcg_out_label(TCGContext *s, int label_index,
-                          tcg_target_long value)
+static void tcg_out_label(TCGContext *s, int label_index, void *ptr)
 {
     TCGLabel *l;
     TCGRelocation *r;
+    intptr_t value = (intptr_t)ptr;
 
     l = &s->labels[label_index];
-    if (l->has_value)
+    if (l->has_value) {
         tcg_abort();
+    }
     r = l->u.first_reloc;
     while (r != NULL) {
         patch_reloc(r->ptr, r->type, value, r->addend);
@@ -173,16 +209,14 @@
 {
     TCGPool *p;
     int pool_size;
-
+    
     if (size > TCG_POOL_CHUNK_SIZE) {
         /* big malloc: insert a new pool (XXX: could optimize) */
         p = g_malloc(sizeof(TCGPool) + size);
         p->size = size;
-        if (s->pool_current)
-            s->pool_current->next = p;
-        else
-            s->pool_first = p;
-        p->next = s->pool_current;
+        p->next = s->pool_first_large;
+        s->pool_first_large = p;
+        return p->data;
     } else {
         p = s->pool_current;
         if (!p) {
@@ -196,7 +230,7 @@
                 p = g_malloc(sizeof(TCGPool) + pool_size);
                 p->size = pool_size;
                 p->next = NULL;
-                if (s->pool_current)
+                if (s->pool_current) 
                     s->pool_current->next = p;
                 else
                     s->pool_first = p;
@@ -213,21 +247,55 @@
 
 void tcg_pool_reset(TCGContext *s)
 {
+    TCGPool *p, *t;
+    for (p = s->pool_first_large; p; p = t) {
+        t = p->next;
+        g_free(p);
+    }
+    s->pool_first_large = NULL;
     s->pool_cur = s->pool_end = NULL;
     s->pool_current = NULL;
 }
 
+#include "helper.h"
+
+typedef struct TCGHelperInfo {
+    void *func;
+    const char *name;
+} TCGHelperInfo;
+
+static const TCGHelperInfo all_helpers[] = {
+#define GEN_HELPER 2
+#include "helper.h"
+
+    /* Include tcg-runtime.c functions.  */
+    { tcg_helper_div_i32, "div_i32" },
+    { tcg_helper_rem_i32, "rem_i32" },
+    { tcg_helper_divu_i32, "divu_i32" },
+    { tcg_helper_remu_i32, "remu_i32" },
+
+    { tcg_helper_shl_i64, "shl_i64" },
+    { tcg_helper_shr_i64, "shr_i64" },
+    { tcg_helper_sar_i64, "sar_i64" },
+    { tcg_helper_div_i64, "div_i64" },
+    { tcg_helper_rem_i64, "rem_i64" },
+    { tcg_helper_divu_i64, "divu_i64" },
+    { tcg_helper_remu_i64, "remu_i64" },
+    { tcg_helper_mulsh_i64, "mulsh_i64" },
+    { tcg_helper_muluh_i64, "muluh_i64" },
+};
+
 void tcg_context_init(TCGContext *s)
 {
-    int op, total_args, n;
+    int op, total_args, n, i;
     TCGOpDef *def;
     TCGArgConstraint *args_ct;
     int *sorted_args;
+    GHashTable *helper_table;
 
     memset(s, 0, sizeof(*s));
-    s->temps = s->static_temps;
     s->nb_globals = 0;
-
+    
     /* Count total number of arguments and allocate the corresponding
        space */
     total_args = 0;
@@ -249,21 +317,38 @@
         args_ct += n;
     }
 
+    /* Register helpers.  */
+    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
+    s->helpers = helper_table = g_hash_table_new(NULL, NULL);
+
+    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
+        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
+                            (gpointer)all_helpers[i].name);
+    }
+
     tcg_target_init(s);
 }
 
 void tcg_prologue_init(TCGContext *s)
 {
     /* init global prologue and epilogue */
-    s->code_buf = code_gen_prologue;
+    s->code_buf = s->code_gen_prologue;
     s->code_ptr = s->code_buf;
     tcg_target_qemu_prologue(s);
-    flush_icache_range((unsigned long)s->code_buf,
-                       (unsigned long)s->code_ptr);
+    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
+
+#ifdef DEBUG_DISAS
+    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
+        size_t size = s->code_ptr - s->code_buf;
+        qemu_log("PROLOGUE: [size=%zu]\n", size);
+        log_disas(s->code_buf, size);
+        qemu_log("\n");
+        qemu_log_flush();
+    }
+#endif
 }
 
-void tcg_set_frame(TCGContext *s, int reg,
-                   tcg_target_long start, tcg_target_long size)
+void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size)
 {
     s->frame_start = start;
     s->frame_end = start + size;
@@ -272,17 +357,24 @@
 
 void tcg_func_start(TCGContext *s)
 {
-    int i;
     tcg_pool_reset(s);
     s->nb_temps = s->nb_globals;
-    for(i = 0; i < (TCG_TYPE_COUNT * 2); i++)
-        s->first_free_temp[i] = -1;
+
+    /* No temps have been previously allocated for size or locality.  */
+    memset(s->free_temps, 0, sizeof(s->free_temps));
+
     s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS);
     s->nb_labels = 0;
     s->current_frame_offset = s->frame_start;
 
-    gen_opc_ptr = gen_opc_buf;
-    gen_opparam_ptr = gen_opparam_buf;
+#ifdef CONFIG_DEBUG_TCG
+    s->goto_tb_issue_mask = 0;
+#endif
+
+    s->gen_opc_ptr = s->gen_opc_buf;
+    s->gen_opparam_ptr = s->gen_opparam_buf;
+
+    s->be = tcg_malloc(sizeof(TCGBackendData));
 }
 
 static inline void tcg_temp_alloc(TCGContext *s, int n)
@@ -334,7 +426,7 @@
 }
 
 static inline int tcg_global_mem_new_internal(TCGType type, int reg,
-                                              tcg_target_long offset,
+                                              intptr_t offset,
                                               const char *name)
 {
     TCGContext *s = &tcg_ctx;
@@ -394,21 +486,15 @@
     return idx;
 }
 
-TCGv_i32 tcg_global_mem_new_i32(int reg, tcg_target_long offset,
-                                const char *name)
+TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name)
 {
-    int idx;
-
-    idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name);
+    int idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name);
     return MAKE_TCGV_I32(idx);
 }
 
-TCGv_i64 tcg_global_mem_new_i64(int reg, tcg_target_long offset,
-                                const char *name)
+TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name)
 {
-    int idx;
-
-    idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name);
+    int idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name);
     return MAKE_TCGV_I64(idx);
 }
 
@@ -418,16 +504,15 @@
     TCGTemp *ts;
     int idx, k;
 
-    k = type;
-    if (temp_local)
-        k += TCG_TYPE_COUNT;
-    idx = s->first_free_temp[k];
-    if (idx != -1) {
-        /* There is already an available temp with the
-           right type */
+    k = type + (temp_local ? TCG_TYPE_COUNT : 0);
+    idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
+    if (idx < TCG_MAX_TEMPS) {
+        /* There is already an available temp with the right type.  */
+        clear_bit(idx, s->free_temps[k].l);
+
         ts = &s->temps[idx];
-        s->first_free_temp[k] = ts->next_free_temp;
         ts->temp_allocated = 1;
+        assert(ts->base_type == type);
         assert(ts->temp_local == temp_local);
     } else {
         idx = s->nb_temps;
@@ -483,7 +568,7 @@
     return MAKE_TCGV_I64(idx);
 }
 
-static inline void tcg_temp_free_internal(int idx)
+static void tcg_temp_free_internal(int idx)
 {
     TCGContext *s = &tcg_ctx;
     TCGTemp *ts;
@@ -500,11 +585,9 @@
     ts = &s->temps[idx];
     assert(ts->temp_allocated != 0);
     ts->temp_allocated = 0;
-    k = ts->base_type;
-    if (ts->temp_local)
-        k += TCG_TYPE_COUNT;
-    ts->next_free_temp = s->first_free_temp[k];
-    s->first_free_temp[k] = idx;
+
+    k = ts->type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
+    set_bit(idx, s->free_temps[k].l);
 }
 
 void tcg_temp_free_i32(TCGv_i32 arg)
@@ -570,34 +653,12 @@
 }
 #endif
 
-void tcg_register_helper(void *func, const char *name)
-{
-    TCGContext *s = &tcg_ctx;
-    int n;
-    if ((s->nb_helpers + 1) > s->allocated_helpers) {
-        n = s->allocated_helpers;
-        if (n == 0) {
-            n = 4;
-        } else {
-            n *= 2;
-        }
-        s->helpers = realloc(s->helpers, n * sizeof(TCGHelperInfo));
-        s->allocated_helpers = n;
-    }
-    s->helpers[s->nb_helpers].func = (tcg_target_ulong)func;
-    s->helpers[s->nb_helpers].name = name;
-    s->nb_helpers++;
-}
-
 /* Note: we convert the 64 bit args to 32 bit and do some alignment
    and endian swap. Maybe it would be better to do the alignment
    and endian swap in tcg_reg_alloc_call(). */
 void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
                    int sizemask, TCGArg ret, int nargs, TCGArg *args)
 {
-#ifdef TCG_TARGET_I386
-    int __attribute__((unused)) call_type;
-#endif
     int i;
     int real_args;
     int nb_rets;
@@ -620,26 +681,23 @@
     }
 #endif /* TCG_TARGET_EXTEND_ARGS */
 
-    *gen_opc_ptr++ = INDEX_op_call;
-    nparam = gen_opparam_ptr++;
-#ifdef TCG_TARGET_I386
-    call_type = (flags & TCG_CALL_TYPE_MASK);
-#endif
+    *s->gen_opc_ptr++ = INDEX_op_call;
+    nparam = s->gen_opparam_ptr++;
     if (ret != TCG_CALL_DUMMY_ARG) {
 #if TCG_TARGET_REG_BITS < 64
         if (sizemask & 1) {
 #ifdef TCG_TARGET_WORDS_BIGENDIAN
-            *gen_opparam_ptr++ = ret + 1;
-            *gen_opparam_ptr++ = ret;
+            *s->gen_opparam_ptr++ = ret + 1;
+            *s->gen_opparam_ptr++ = ret;
 #else
-            *gen_opparam_ptr++ = ret;
-            *gen_opparam_ptr++ = ret + 1;
+            *s->gen_opparam_ptr++ = ret;
+            *s->gen_opparam_ptr++ = ret + 1;
 #endif
             nb_rets = 2;
         } else
 #endif
         {
-            *gen_opparam_ptr++ = ret;
+            *s->gen_opparam_ptr++ = ret;
             nb_rets = 1;
         }
     } else {
@@ -650,18 +708,10 @@
 #if TCG_TARGET_REG_BITS < 64
         int is_64bit = sizemask & (1 << (i+1)*2);
         if (is_64bit) {
-#ifdef TCG_TARGET_I386
-            /* REGPARM case: if the third parameter is 64 bit, it is
-               allocated on the stack */
-            if (i == 2 && call_type == TCG_CALL_TYPE_REGPARM) {
-                call_type = TCG_CALL_TYPE_REGPARM_2;
-                flags = (flags & ~TCG_CALL_TYPE_MASK) | call_type;
-            }
-#endif
 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
             /* some targets want aligned 64 bit args */
             if (real_args & 1) {
-                *gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
+                *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
                 real_args++;
             }
 #endif
@@ -676,28 +726,28 @@
 	       have to get more complicated to differentiate between
 	       stack arguments and register arguments.  */
 #if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
-            *gen_opparam_ptr++ = args[i] + 1;
-            *gen_opparam_ptr++ = args[i];
+            *s->gen_opparam_ptr++ = args[i] + 1;
+            *s->gen_opparam_ptr++ = args[i];
 #else
-            *gen_opparam_ptr++ = args[i];
-            *gen_opparam_ptr++ = args[i] + 1;
+            *s->gen_opparam_ptr++ = args[i];
+            *s->gen_opparam_ptr++ = args[i] + 1;
 #endif
             real_args += 2;
             continue;
         }
 #endif /* TCG_TARGET_REG_BITS < 64 */
 
-            *gen_opparam_ptr++ = args[i];
-            real_args++;
-        }
-    *gen_opparam_ptr++ = GET_TCGV_PTR(func);
+        *s->gen_opparam_ptr++ = args[i];
+        real_args++;
+    }
+    *s->gen_opparam_ptr++ = GET_TCGV_PTR(func);
 
-    *gen_opparam_ptr++ = flags;
+    *s->gen_opparam_ptr++ = flags;
 
     *nparam = (nb_rets << 16) | (real_args + 1);
 
     /* total parameters, needed to go backward in the instruction stream */
-    *gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
+    *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
 
 #if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
     for (i = 0; i < nargs; ++i) {
@@ -759,6 +809,188 @@
 }
 #endif
 
+static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
+{
+    switch (op & MO_SIZE) {
+    case MO_8:
+        op &= ~MO_BSWAP;
+        break;
+    case MO_16:
+        break;
+    case MO_32:
+        if (!is64) {
+            op &= ~MO_SIGN;
+        }
+        break;
+    case MO_64:
+        if (!is64) {
+            tcg_abort();
+        }
+        break;
+    }
+    if (st) {
+        op &= ~MO_SIGN;
+    }
+    return op;
+}
+
+static const TCGOpcode old_ld_opc[8] = {
+    [MO_UB] = INDEX_op_qemu_ld8u,
+    [MO_SB] = INDEX_op_qemu_ld8s,
+    [MO_UW] = INDEX_op_qemu_ld16u,
+    [MO_SW] = INDEX_op_qemu_ld16s,
+#if TCG_TARGET_REG_BITS == 32
+    [MO_UL] = INDEX_op_qemu_ld32,
+    [MO_SL] = INDEX_op_qemu_ld32,
+#else
+    [MO_UL] = INDEX_op_qemu_ld32u,
+    [MO_SL] = INDEX_op_qemu_ld32s,
+#endif
+    [MO_Q]  = INDEX_op_qemu_ld64,
+};
+
+static const TCGOpcode old_st_opc[4] = {
+    [MO_UB] = INDEX_op_qemu_st8,
+    [MO_UW] = INDEX_op_qemu_st16,
+    [MO_UL] = INDEX_op_qemu_st32,
+    [MO_Q]  = INDEX_op_qemu_st64,
+};
+
+void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
+{
+    memop = tcg_canonicalize_memop(memop, 0, 0);
+
+    if (TCG_TARGET_HAS_new_ldst) {
+        *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32;
+        tcg_add_param_i32(val);
+        tcg_add_param_tl(addr);
+        *tcg_ctx.gen_opparam_ptr++ = memop;
+        *tcg_ctx.gen_opparam_ptr++ = idx;
+        return;
+    }
+
+    /* The old opcodes only support target-endian memory operations.  */
+    assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
+    assert(old_ld_opc[memop & MO_SSIZE] != 0);
+
+    if (TCG_TARGET_REG_BITS == 32) {
+        *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
+        tcg_add_param_i32(val);
+        tcg_add_param_tl(addr);
+        *tcg_ctx.gen_opparam_ptr++ = idx;
+    } else {
+        TCGv_i64 val64 = tcg_temp_new_i64();
+
+        *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
+        tcg_add_param_i64(val64);
+        tcg_add_param_tl(addr);
+        *tcg_ctx.gen_opparam_ptr++ = idx;
+
+        tcg_gen_trunc_i64_i32(val, val64);
+        tcg_temp_free_i64(val64);
+    }
+}
+
+void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
+{
+    memop = tcg_canonicalize_memop(memop, 0, 1);
+
+    if (TCG_TARGET_HAS_new_ldst) {
+        *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32;
+        tcg_add_param_i32(val);
+        tcg_add_param_tl(addr);
+        *tcg_ctx.gen_opparam_ptr++ = memop;
+        *tcg_ctx.gen_opparam_ptr++ = idx;
+        return;
+    }
+
+    /* The old opcodes only support target-endian memory operations.  */
+    assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
+    assert(old_st_opc[memop & MO_SIZE] != 0);
+
+    if (TCG_TARGET_REG_BITS == 32) {
+        *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
+        tcg_add_param_i32(val);
+        tcg_add_param_tl(addr);
+        *tcg_ctx.gen_opparam_ptr++ = idx;
+    } else {
+        TCGv_i64 val64 = tcg_temp_new_i64();
+
+        tcg_gen_extu_i32_i64(val64, val);
+
+        *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
+        tcg_add_param_i64(val64);
+        tcg_add_param_tl(addr);
+        *tcg_ctx.gen_opparam_ptr++ = idx;
+
+        tcg_temp_free_i64(val64);
+    }
+}
+
+void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
+{
+    memop = tcg_canonicalize_memop(memop, 1, 0);
+
+#if TCG_TARGET_REG_BITS == 32
+    if ((memop & MO_SIZE) < MO_64) {
+        tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
+        if (memop & MO_SIGN) {
+            tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
+        } else {
+            tcg_gen_movi_i32(TCGV_HIGH(val), 0);
+        }
+        return;
+    }
+#endif
+
+    if (TCG_TARGET_HAS_new_ldst) {
+        *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64;
+        tcg_add_param_i64(val);
+        tcg_add_param_tl(addr);
+        *tcg_ctx.gen_opparam_ptr++ = memop;
+        *tcg_ctx.gen_opparam_ptr++ = idx;
+        return;
+    }
+
+    /* The old opcodes only support target-endian memory operations.  */
+    assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
+    assert(old_ld_opc[memop & MO_SSIZE] != 0);
+
+    *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
+    tcg_add_param_i64(val);
+    tcg_add_param_tl(addr);
+    *tcg_ctx.gen_opparam_ptr++ = idx;
+}
+
+void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
+{
+    memop = tcg_canonicalize_memop(memop, 1, 1);
+
+#if TCG_TARGET_REG_BITS == 32
+    if ((memop & MO_SIZE) < MO_64) {
+        tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
+        return;
+    }
+#endif
+
+    if (TCG_TARGET_HAS_new_ldst) {
+        *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64;
+        tcg_add_param_i64(val);
+        tcg_add_param_tl(addr);
+        *tcg_ctx.gen_opparam_ptr++ = memop;
+        *tcg_ctx.gen_opparam_ptr++ = idx;
+        return;
+    }
+
+    /* The old opcodes only support target-endian memory operations.  */
+    assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
+    assert(old_st_opc[memop & MO_SIZE] != 0);
+
+    *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
+    tcg_add_param_i64(val);
+    tcg_add_param_tl(addr);
+    *tcg_ctx.gen_opparam_ptr++ = idx;
+}
 
 static void tcg_reg_alloc_start(TCGContext *s)
 {
@@ -774,7 +1006,11 @@
     }
     for(i = s->nb_globals; i < s->nb_temps; i++) {
         ts = &s->temps[i];
-        ts->val_type = TEMP_VAL_DEAD;
+        if (ts->temp_local) {
+            ts->val_type = TEMP_VAL_MEM;
+        } else {
+            ts->val_type = TEMP_VAL_DEAD;
+        }
         ts->mem_allocated = 0;
         ts->fixed_reg = 0;
     }
@@ -788,11 +1024,12 @@
 {
     TCGTemp *ts;
 
+    assert(idx >= 0 && idx < s->nb_temps);
     ts = &s->temps[idx];
     if (idx < s->nb_globals) {
         pstrcpy(buf, buf_size, ts->name);
     } else {
-        if (ts->temp_local)
+        if (ts->temp_local) 
             snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
         else
             snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
@@ -810,51 +1047,20 @@
     return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I64(arg));
 }
 
-static int helper_cmp(const void *p1, const void *p2)
+/* Find helper name.  */
+static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
 {
-    const TCGHelperInfo *th1 = p1;
-    const TCGHelperInfo *th2 = p2;
-    if (th1->func < th2->func)
-        return -1;
-    else if (th1->func == th2->func)
-        return 0;
-    else
-        return 1;
-}
-
-/* find helper definition (Note: A hash table would be better) */
-static TCGHelperInfo *tcg_find_helper(TCGContext *s, tcg_target_ulong val)
-{
-    int m, m_min, m_max;
-    TCGHelperInfo *th;
-    tcg_target_ulong v;
-
-    if (unlikely(!s->helpers_sorted)) {
-        qsort(s->helpers, s->nb_helpers, sizeof(TCGHelperInfo),
-              helper_cmp);
-        s->helpers_sorted = 1;
+    const char *ret = NULL;
+    if (s->helpers) {
+        ret = g_hash_table_lookup(s->helpers, (gpointer)val);
     }
-
-    /* binary search */
-    m_min = 0;
-    m_max = s->nb_helpers - 1;
-    while (m_min <= m_max) {
-        m = (m_min + m_max) >> 1;
-        th = &s->helpers[m];
-        v = th->func;
-        if (v == val)
-            return th;
-        else if (val < v) {
-            m_max = m - 1;
-        } else {
-            m_min = m + 1;
-        }
-    }
-    return NULL;
+    return ret;
 }
 
 static const char * const cond_name[] =
 {
+    [TCG_COND_NEVER] = "never",
+    [TCG_COND_ALWAYS] = "always",
     [TCG_COND_EQ] = "eq",
     [TCG_COND_NE] = "ne",
     [TCG_COND_LT] = "lt",
@@ -867,7 +1073,23 @@
     [TCG_COND_GTU] = "gtu"
 };
 
-void tcg_dump_ops(TCGContext *s, FILE *outfile)
+static const char * const ldst_name[] =
+{
+    [MO_UB]   = "ub",
+    [MO_SB]   = "sb",
+    [MO_LEUW] = "leuw",
+    [MO_LESW] = "lesw",
+    [MO_LEUL] = "leul",
+    [MO_LESL] = "lesl",
+    [MO_LEQ]  = "leq",
+    [MO_BEUW] = "beuw",
+    [MO_BESW] = "besw",
+    [MO_BEUL] = "beul",
+    [MO_BESL] = "besl",
+    [MO_BEQ]  = "beq",
+};
+
+void tcg_dump_ops(TCGContext *s)
 {
     const uint16_t *opc_ptr;
     const TCGArg *args;
@@ -878,9 +1100,9 @@
     char buf[128];
 
     first_insn = 1;
-    opc_ptr = gen_opc_buf;
-    args = gen_opparam_buf;
-    while (opc_ptr < gen_opc_ptr) {
+    opc_ptr = s->gen_opc_buf;
+    args = s->gen_opparam_buf;
+    while (opc_ptr < s->gen_opc_ptr) {
         c = *opc_ptr++;
         def = &tcg_op_defs[c];
         if (c == INDEX_op_debug_insn_start) {
@@ -890,9 +1112,10 @@
 #else
             pc = args[0];
 #endif
-            if (!first_insn)
-                fprintf(outfile, "\n");
-            fprintf(outfile, " ---- 0x%" PRIx64, pc);
+            if (!first_insn) {
+                qemu_log("\n");
+            }
+            qemu_log(" ---- 0x%" PRIx64, pc);
             first_insn = 0;
             nb_oargs = def->nb_oargs;
             nb_iargs = def->nb_iargs;
@@ -906,55 +1129,52 @@
             nb_iargs = arg & 0xffff;
             nb_cargs = def->nb_cargs;
 
-            fprintf(outfile, " %s ", def->name);
+            qemu_log(" %s ", def->name);
 
             /* function name */
-            fprintf(outfile, "%s",
-                    tcg_get_arg_str_idx(s, buf, sizeof(buf), args[nb_oargs + nb_iargs - 1]));
+            qemu_log("%s",
+                     tcg_get_arg_str_idx(s, buf, sizeof(buf),
+                                         args[nb_oargs + nb_iargs - 1]));
             /* flags */
-            fprintf(outfile, ",$0x%" TCG_PRIlx,
-                    args[nb_oargs + nb_iargs]);
+            qemu_log(",$0x%" TCG_PRIlx, args[nb_oargs + nb_iargs]);
             /* nb out args */
-            fprintf(outfile, ",$%d", nb_oargs);
+            qemu_log(",$%d", nb_oargs);
             for(i = 0; i < nb_oargs; i++) {
-                fprintf(outfile, ",");
-                fprintf(outfile, "%s",
-                        tcg_get_arg_str_idx(s, buf, sizeof(buf), args[i]));
+                qemu_log(",");
+                qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+                                                   args[i]));
             }
             for(i = 0; i < (nb_iargs - 1); i++) {
-                fprintf(outfile, ",");
+                qemu_log(",");
                 if (args[nb_oargs + i] == TCG_CALL_DUMMY_ARG) {
-                    fprintf(outfile, "<dummy>");
+                    qemu_log("<dummy>");
                 } else {
-                    fprintf(outfile, "%s",
-                            tcg_get_arg_str_idx(s, buf, sizeof(buf), args[nb_oargs + i]));
+                    qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+                                                       args[nb_oargs + i]));
                 }
             }
-        } else if (c == INDEX_op_movi_i32
-#if TCG_TARGET_REG_BITS == 64
-                   || c == INDEX_op_movi_i64
-#endif
-                   ) {
+        } else if (c == INDEX_op_movi_i32 || c == INDEX_op_movi_i64) {
             tcg_target_ulong val;
-            TCGHelperInfo *th;
+            const char *name;
 
             nb_oargs = def->nb_oargs;
             nb_iargs = def->nb_iargs;
             nb_cargs = def->nb_cargs;
-            fprintf(outfile, " %s %s,$", def->name,
-                    tcg_get_arg_str_idx(s, buf, sizeof(buf), args[0]));
+            qemu_log(" %s %s,$", def->name,
+                     tcg_get_arg_str_idx(s, buf, sizeof(buf), args[0]));
             val = args[1];
-            th = tcg_find_helper(s, val);
-            if (th) {
-                fprintf(outfile, "%s", th->name);
+            name = tcg_find_helper(s, val);
+            if (name) {
+                qemu_log("%s", name);
             } else {
-                if (c == INDEX_op_movi_i32)
-                    fprintf(outfile, "0x%x", (uint32_t)val);
-                else
-                    fprintf(outfile, "0x%" PRIx64 , (uint64_t)val);
+                if (c == INDEX_op_movi_i32) {
+                    qemu_log("0x%x", (uint32_t)val);
+                } else {
+                    qemu_log("0x%" PRIx64 , (uint64_t)val);
+                }
             }
         } else {
-            fprintf(outfile, " %s ", def->name);
+            qemu_log(" %s ", def->name);
             if (c == INDEX_op_nopn) {
                 /* variable number of arguments */
                 nb_cargs = *args;
@@ -965,37 +1185,47 @@
                 nb_iargs = def->nb_iargs;
                 nb_cargs = def->nb_cargs;
             }
-
+            
             k = 0;
             for(i = 0; i < nb_oargs; i++) {
-                if (k != 0)
-                    fprintf(outfile, ",");
-                fprintf(outfile, "%s",
-                        tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
+                if (k != 0) {
+                    qemu_log(",");
+                }
+                qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+                                                   args[k++]));
             }
             for(i = 0; i < nb_iargs; i++) {
-                if (k != 0)
-                    fprintf(outfile, ",");
-                fprintf(outfile, "%s",
-                        tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
+                if (k != 0) {
+                    qemu_log(",");
+                }
+                qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+                                                   args[k++]));
             }
             switch (c) {
             case INDEX_op_brcond_i32:
-#if TCG_TARGET_REG_BITS == 32
-            case INDEX_op_brcond2_i32:
-#elif TCG_TARGET_REG_BITS == 64
-            case INDEX_op_brcond_i64:
-#endif
             case INDEX_op_setcond_i32:
-#if TCG_TARGET_REG_BITS == 32
+            case INDEX_op_movcond_i32:
+            case INDEX_op_brcond2_i32:
             case INDEX_op_setcond2_i32:
-#elif TCG_TARGET_REG_BITS == 64
+            case INDEX_op_brcond_i64:
             case INDEX_op_setcond_i64:
-#endif
-                if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
-                    fprintf(outfile, ",%s", cond_name[args[k++]]);
-                else
-                    fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]);
+            case INDEX_op_movcond_i64:
+                if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
+                    qemu_log(",%s", cond_name[args[k++]]);
+                } else {
+                    qemu_log(",$0x%" TCG_PRIlx, args[k++]);
+                }
+                i = 1;
+                break;
+            case INDEX_op_qemu_ld_i32:
+            case INDEX_op_qemu_st_i32:
+            case INDEX_op_qemu_ld_i64:
+            case INDEX_op_qemu_st_i64:
+                if (args[k] < ARRAY_SIZE(ldst_name) && ldst_name[args[k]]) {
+                    qemu_log(",%s", ldst_name[args[k++]]);
+                } else {
+                    qemu_log(",$0x%" TCG_PRIlx, args[k++]);
+                }
                 i = 1;
                 break;
             default:
@@ -1003,13 +1233,14 @@
                 break;
             }
             for(; i < nb_cargs; i++) {
-                if (k != 0)
-                    fprintf(outfile, ",");
+                if (k != 0) {
+                    qemu_log(",");
+                }
                 arg = args[k++];
-                fprintf(outfile, "$0x%" TCG_PRIlx, arg);
+                qemu_log("$0x%" TCG_PRIlx, arg);
             }
         }
-        fprintf(outfile, "\n");
+        qemu_log("\n");
         args += nb_iargs + nb_oargs + nb_cargs;
     }
 }
@@ -1138,20 +1369,19 @@
 #if defined(CONFIG_DEBUG_TCG)
     i = 0;
     for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) {
-        if (op < INDEX_op_call || op == INDEX_op_debug_insn_start) {
+        const TCGOpDef *def = &tcg_op_defs[op];
+        if (def->flags & TCG_OPF_NOT_PRESENT) {
             /* Wrong entry in op definitions? */
-            if (tcg_op_defs[op].used) {
-                fprintf(stderr, "Invalid op definition for %s\n",
-                        tcg_op_defs[op].name);
+            if (def->used) {
+                fprintf(stderr, "Invalid op definition for %s\n", def->name);
                 i = 1;
             }
         } else {
             /* Missing entry in op definitions? */
-            if (!tcg_op_defs[op].used) {
-                fprintf(stderr, "Missing op definition for %s\n",
-                        tcg_op_defs[op].name);
+            if (!def->used) {
+                fprintf(stderr, "Missing op definition for %s\n", def->name);
                 i = 1;
-        }
+            }
         }
     }
     if (i == 1) {
@@ -1163,7 +1393,7 @@
 #ifdef USE_LIVENESS_ANALYSIS
 
 /* set a nop for an operation using 'nb_args' */
-static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr,
+static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr, 
                                TCGArg *args, int nb_args)
 {
     if (nb_args == 0) {
@@ -1175,66 +1405,59 @@
     }
 }
 
-/* liveness analysis: end of function: globals are live, temps are
-   dead. */
-/* XXX: at this stage, not used as there would be little gains because
-   most TBs end with a conditional jump. */
-static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps)
+/* liveness analysis: end of function: all temps are dead, and globals
+   should be in memory. */
+static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
+                                   uint8_t *mem_temps)
 {
-    memset(dead_temps, 0, s->nb_globals);
-    memset(dead_temps + s->nb_globals, 1, s->nb_temps - s->nb_globals);
+    memset(dead_temps, 1, s->nb_temps);
+    memset(mem_temps, 1, s->nb_globals);
+    memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
 }
 
-/* liveness analysis: end of basic block: globals are live, temps are
-   dead, local temps are live. */
-static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps)
+/* liveness analysis: end of basic block: all temps are dead, globals
+   and local temps should be in memory. */
+static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
+                                 uint8_t *mem_temps)
 {
     int i;
-    TCGTemp *ts;
 
-    memset(dead_temps, 0, s->nb_globals);
-    ts = &s->temps[s->nb_globals];
+    memset(dead_temps, 1, s->nb_temps);
+    memset(mem_temps, 1, s->nb_globals);
     for(i = s->nb_globals; i < s->nb_temps; i++) {
-        if (ts->temp_local)
-            dead_temps[i] = 0;
-        else
-            dead_temps[i] = 1;
-        ts++;
+        mem_temps[i] = s->temps[i].temp_local;
     }
 }
 
-/* Liveness analysis : update the opc_dead_iargs array to tell if a
+/* Liveness analysis : update the opc_dead_args array to tell if a
    given input arguments is dead. Instructions updating dead
    temporaries are removed. */
 static void tcg_liveness_analysis(TCGContext *s)
 {
     int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops;
-    TCGOpcode op;
+    TCGOpcode op, op_new, op_new2;
     TCGArg *args;
     const TCGOpDef *def;
-    uint8_t *dead_temps;
-    unsigned int dead_iargs;
+    uint8_t *dead_temps, *mem_temps;
+    uint16_t dead_args;
+    uint8_t sync_args;
+    bool have_op_new2;
+    
+    s->gen_opc_ptr++; /* skip end */
 
-    /* sanity check */
-    if (gen_opc_ptr - gen_opc_buf > OPC_BUF_SIZE) {
-        fprintf(stderr, "PANIC: too many opcodes generated (%d > %d)\n",
-                (int)(gen_opc_ptr - gen_opc_buf), OPC_BUF_SIZE);
-        tcg_abort();
-    }
+    nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
 
-    gen_opc_ptr++; /* skip end */
-
-    nb_ops = gen_opc_ptr - gen_opc_buf;
-
-    s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t));
-
+    s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
+    s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
+    
     dead_temps = tcg_malloc(s->nb_temps);
-    memset(dead_temps, 1, s->nb_temps);
+    mem_temps = tcg_malloc(s->nb_temps);
+    tcg_la_func_end(s, dead_temps, mem_temps);
 
-    args = gen_opparam_ptr;
+    args = s->gen_opparam_ptr;
     op_index = nb_ops - 1;
     while (op_index >= 0) {
-        op = gen_opc_buf[op_index];
+        op = s->gen_opc_buf[op_index];
         def = &tcg_op_defs[op];
         switch(op) {
         case INDEX_op_call:
@@ -1250,49 +1473,59 @@
 
                 /* pure functions can be removed if their result is not
                    used */
-                if (call_flags & TCG_CALL_PURE) {
+                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
                     for(i = 0; i < nb_oargs; i++) {
                         arg = args[i];
-                        if (!dead_temps[arg])
+                        if (!dead_temps[arg] || mem_temps[arg]) {
                             goto do_not_remove_call;
+                        }
                     }
-                    tcg_set_nop(s, gen_opc_buf + op_index,
+                    tcg_set_nop(s, s->gen_opc_buf + op_index,
                                 args - 1, nb_args);
                 } else {
                 do_not_remove_call:
 
                     /* output args are dead */
+                    dead_args = 0;
+                    sync_args = 0;
                     for(i = 0; i < nb_oargs; i++) {
                         arg = args[i];
+                        if (dead_temps[arg]) {
+                            dead_args |= (1 << i);
+                        }
+                        if (mem_temps[arg]) {
+                            sync_args |= (1 << i);
+                        }
                         dead_temps[arg] = 1;
+                        mem_temps[arg] = 0;
                     }
 
-                    if (!(call_flags & TCG_CALL_CONST)) {
-                        /* globals are live (they may be used by the call) */
-                        memset(dead_temps, 0, s->nb_globals);
+                    if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+                        /* globals should be synced to memory */
+                        memset(mem_temps, 1, s->nb_globals);
+                    }
+                    if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
+                                        TCG_CALL_NO_READ_GLOBALS))) {
+                        /* globals should go back to memory */
+                        memset(dead_temps, 1, s->nb_globals);
                     }
 
                     /* input args are live */
-                    dead_iargs = 0;
-                    for(i = 0; i < nb_iargs; i++) {
-                        arg = args[i + nb_oargs];
+                    for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+                        arg = args[i];
                         if (arg != TCG_CALL_DUMMY_ARG) {
                             if (dead_temps[arg]) {
-                                dead_iargs |= (1 << i);
+                                dead_args |= (1 << i);
                             }
                             dead_temps[arg] = 0;
                         }
                     }
-                    s->op_dead_iargs[op_index] = dead_iargs;
+                    s->op_dead_args[op_index] = dead_args;
+                    s->op_sync_args[op_index] = sync_args;
                 }
                 args--;
             }
             break;
-        case INDEX_op_set_label:
-            args--;
-            /* mark end of basic block */
-            tcg_la_bb_end(s, dead_temps);
-            break;
         case INDEX_op_debug_insn_start:
             args -= def->nb_args;
             break;
@@ -1304,11 +1537,97 @@
             args--;
             /* mark the temporary as dead */
             dead_temps[args[0]] = 1;
+            mem_temps[args[0]] = 0;
             break;
         case INDEX_op_end:
             break;
-            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
+
+        case INDEX_op_add2_i32:
+            op_new = INDEX_op_add_i32;
+            goto do_addsub2;
+        case INDEX_op_sub2_i32:
+            op_new = INDEX_op_sub_i32;
+            goto do_addsub2;
+        case INDEX_op_add2_i64:
+            op_new = INDEX_op_add_i64;
+            goto do_addsub2;
+        case INDEX_op_sub2_i64:
+            op_new = INDEX_op_sub_i64;
+        do_addsub2:
+            args -= 6;
+            nb_iargs = 4;
+            nb_oargs = 2;
+            /* Test if the high part of the operation is dead, but not
+               the low part.  The result can be optimized to a simple
+               add or sub.  This happens often for x86_64 guest when the
+               cpu mode is set to 32 bit.  */
+            if (dead_temps[args[1]] && !mem_temps[args[1]]) {
+                if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+                    goto do_remove;
+                }
+                /* Create the single operation plus nop.  */
+                s->gen_opc_buf[op_index] = op = op_new;
+                args[1] = args[2];
+                args[2] = args[4];
+                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3);
+                /* Fall through and mark the single-word operation live.  */
+                nb_iargs = 2;
+                nb_oargs = 1;
+            }
+            goto do_not_remove;
+
+        case INDEX_op_mulu2_i32:
+            op_new = INDEX_op_mul_i32;
+            op_new2 = INDEX_op_muluh_i32;
+            have_op_new2 = TCG_TARGET_HAS_muluh_i32;
+            goto do_mul2;
+        case INDEX_op_muls2_i32:
+            op_new = INDEX_op_mul_i32;
+            op_new2 = INDEX_op_mulsh_i32;
+            have_op_new2 = TCG_TARGET_HAS_mulsh_i32;
+            goto do_mul2;
+        case INDEX_op_mulu2_i64:
+            op_new = INDEX_op_mul_i64;
+            op_new2 = INDEX_op_muluh_i64;
+            have_op_new2 = TCG_TARGET_HAS_muluh_i64;
+            goto do_mul2;
+        case INDEX_op_muls2_i64:
+            op_new = INDEX_op_mul_i64;
+            op_new2 = INDEX_op_mulsh_i64;
+            have_op_new2 = TCG_TARGET_HAS_mulsh_i64;
+            goto do_mul2;
+        do_mul2:
+            args -= 4;
+            nb_iargs = 2;
+            nb_oargs = 2;
+            if (dead_temps[args[1]] && !mem_temps[args[1]]) {
+                if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+                    /* Both parts of the operation are dead.  */
+                    goto do_remove;
+                }
+                /* The high part of the operation is dead; generate the low. */
+                s->gen_opc_buf[op_index] = op = op_new;
+                args[1] = args[2];
+                args[2] = args[3];
+            } else if (have_op_new2 && dead_temps[args[0]]
+                       && !mem_temps[args[0]]) {
+                /* The low part of the operation is dead; generate the high.  */
+                s->gen_opc_buf[op_index] = op = op_new2;
+                args[0] = args[1];
+                args[1] = args[2];
+                args[2] = args[3];
+            } else {
+                goto do_not_remove;
+            }
+            assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+            tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
+            /* Mark the single-word operation live.  */
+            nb_oargs = 1;
+            goto do_not_remove;
+
         default:
+            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
             args -= def->nb_args;
             nb_iargs = def->nb_iargs;
             nb_oargs = def->nb_oargs;
@@ -1319,10 +1638,12 @@
             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
                 for(i = 0; i < nb_oargs; i++) {
                     arg = args[i];
-                    if (!dead_temps[arg])
+                    if (!dead_temps[arg] || mem_temps[arg]) {
                         goto do_not_remove;
+                    }
                 }
-                tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args);
+            do_remove:
+                tcg_set_nop(s, s->gen_opc_buf + op_index, args, def->nb_args);
 #ifdef CONFIG_PROFILER
                 s->del_op_count++;
 #endif
@@ -1330,47 +1651,59 @@
             do_not_remove:
 
                 /* output args are dead */
+                dead_args = 0;
+                sync_args = 0;
                 for(i = 0; i < nb_oargs; i++) {
                     arg = args[i];
+                    if (dead_temps[arg]) {
+                        dead_args |= (1 << i);
+                    }
+                    if (mem_temps[arg]) {
+                        sync_args |= (1 << i);
+                    }
                     dead_temps[arg] = 1;
+                    mem_temps[arg] = 0;
                 }
 
                 /* if end of basic block, update */
                 if (def->flags & TCG_OPF_BB_END) {
-                    tcg_la_bb_end(s, dead_temps);
-                } else if (def->flags & TCG_OPF_CALL_CLOBBER) {
-                    /* globals are live */
-                    memset(dead_temps, 0, s->nb_globals);
+                    tcg_la_bb_end(s, dead_temps, mem_temps);
+                } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+                    /* globals should be synced to memory */
+                    memset(mem_temps, 1, s->nb_globals);
                 }
 
                 /* input args are live */
-                dead_iargs = 0;
-                for(i = 0; i < nb_iargs; i++) {
-                    arg = args[i + nb_oargs];
+                for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+                    arg = args[i];
                     if (dead_temps[arg]) {
-                        dead_iargs |= (1 << i);
+                        dead_args |= (1 << i);
                     }
                     dead_temps[arg] = 0;
                 }
-                s->op_dead_iargs[op_index] = dead_iargs;
+                s->op_dead_args[op_index] = dead_args;
+                s->op_sync_args[op_index] = sync_args;
             }
             break;
         }
         op_index--;
     }
 
-    if (args != gen_opparam_buf)
+    if (args != s->gen_opparam_buf) {
         tcg_abort();
+    }
 }
 #else
 /* dummy liveness analysis */
 static void tcg_liveness_analysis(TCGContext *s)
 {
     int nb_ops;
-    nb_ops = gen_opc_ptr - gen_opc_buf;
+    nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
 
-    s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t));
-    memset(s->op_dead_iargs, 0, nb_ops * sizeof(uint16_t));
+    s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
+    memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
+    s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
+    memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
 }
 #endif
 
@@ -1406,8 +1739,8 @@
 
     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
         if (s->reg_to_temp[i] >= 0) {
-            printf("%s: %s\n",
-                   tcg_target_reg_names[i],
+            printf("%s: %s\n", 
+                   tcg_target_reg_names[i], 
                    tcg_get_arg_str_idx(s, buf, sizeof(buf), s->reg_to_temp[i]));
         }
     }
@@ -1425,7 +1758,7 @@
             ts = &s->temps[k];
             if (ts->val_type != TEMP_VAL_REG ||
                 ts->reg != reg) {
-                printf("Inconsistency for register %s:\n",
+                printf("Inconsistency for register %s:\n", 
                        tcg_target_reg_names[reg]);
                 goto fail;
             }
@@ -1436,7 +1769,7 @@
         if (ts->val_type == TEMP_VAL_REG &&
             !ts->fixed_reg &&
             s->reg_to_temp[ts->reg] != k) {
-                printf("Inconsistency for temp %s:\n",
+                printf("Inconsistency for temp %s:\n", 
                        tcg_get_arg_str_idx(s, buf, sizeof(buf), k));
         fail:
                 printf("reg state:\n");
@@ -1451,31 +1784,49 @@
 {
     TCGTemp *ts;
     ts = &s->temps[temp];
-    s->current_frame_offset = (s->current_frame_offset + sizeof(tcg_target_long) - 1) & ~(sizeof(tcg_target_long) - 1);
-    if (s->current_frame_offset + sizeof(tcg_target_long) > s->frame_end)
+#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
+    /* Sparc64 stack is accessed with offset of 2047 */
+    s->current_frame_offset = (s->current_frame_offset +
+                               (tcg_target_long)sizeof(tcg_target_long) - 1) &
+        ~(sizeof(tcg_target_long) - 1);
+#endif
+    if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
+        s->frame_end) {
         tcg_abort();
+    }
     ts->mem_offset = s->current_frame_offset;
     ts->mem_reg = s->frame_reg;
     ts->mem_allocated = 1;
     s->current_frame_offset += sizeof(tcg_target_long);
 }
 
-/* free register 'reg' by spilling the corresponding temporary if necessary */
-static void tcg_reg_free(TCGContext *s, int reg)
+/* sync register 'reg' by saving it to the corresponding temporary */
+static inline void tcg_reg_sync(TCGContext *s, int reg)
 {
     TCGTemp *ts;
     int temp;
 
     temp = s->reg_to_temp[reg];
-    if (temp != -1) {
-        ts = &s->temps[temp];
-        assert(ts->val_type == TEMP_VAL_REG);
-        if (!ts->mem_coherent) {
-            if (!ts->mem_allocated)
-                temp_allocate_frame(s, temp);
-            tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+    ts = &s->temps[temp];
+    assert(ts->val_type == TEMP_VAL_REG);
+    if (!ts->mem_coherent && !ts->fixed_reg) {
+        if (!ts->mem_allocated) {
+            temp_allocate_frame(s, temp);
         }
-        ts->val_type = TEMP_VAL_MEM;
+        tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+    }
+    ts->mem_coherent = 1;
+}
+
+/* free register 'reg' by spilling the corresponding temporary if necessary */
+static void tcg_reg_free(TCGContext *s, int reg)
+{
+    int temp;
+
+    temp = s->reg_to_temp[reg];
+    if (temp != -1) {
+        tcg_reg_sync(s, reg);
+        s->temps[temp].val_type = TEMP_VAL_MEM;
         s->reg_to_temp[reg] = -1;
     }
 }
@@ -1507,31 +1858,45 @@
     tcg_abort();
 }
 
-/* save a temporary to memory. 'allocated_regs' is used in case a
-   temporary registers needs to be allocated to store a constant. */
-static void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
+/* mark a temporary as dead. */
+static inline void temp_dead(TCGContext *s, int temp)
 {
     TCGTemp *ts;
-    int reg;
+
+    ts = &s->temps[temp];
+    if (!ts->fixed_reg) {
+        if (ts->val_type == TEMP_VAL_REG) {
+            s->reg_to_temp[ts->reg] = -1;
+        }
+        if (temp < s->nb_globals || ts->temp_local) {
+            ts->val_type = TEMP_VAL_MEM;
+        } else {
+            ts->val_type = TEMP_VAL_DEAD;
+        }
+    }
+}
+
+/* sync a temporary to memory. 'allocated_regs' is used in case a
+   temporary registers needs to be allocated to store a constant. */
+static inline void temp_sync(TCGContext *s, int temp, TCGRegSet allocated_regs)
+{
+    TCGTemp *ts;
 
     ts = &s->temps[temp];
     if (!ts->fixed_reg) {
         switch(ts->val_type) {
+        case TEMP_VAL_CONST:
+            ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+                                    allocated_regs);
+            ts->val_type = TEMP_VAL_REG;
+            s->reg_to_temp[ts->reg] = temp;
+            ts->mem_coherent = 0;
+            tcg_out_movi(s, ts->type, ts->reg, ts->val);
+            /* fallthrough*/
         case TEMP_VAL_REG:
-            tcg_reg_free(s, ts->reg);
+            tcg_reg_sync(s, ts->reg);
             break;
         case TEMP_VAL_DEAD:
-            ts->val_type = TEMP_VAL_MEM;
-            break;
-        case TEMP_VAL_CONST:
-            reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
-                                allocated_regs);
-            if (!ts->mem_allocated)
-                temp_allocate_frame(s, temp);
-            tcg_out_movi(s, ts->type, reg, ts->val);
-            tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
-            ts->val_type = TEMP_VAL_MEM;
-            break;
         case TEMP_VAL_MEM:
             break;
         default:
@@ -1540,7 +1905,21 @@
     }
 }
 
-/* save globals to their cannonical location and assume they can be
+/* save a temporary to memory. 'allocated_regs' is used in case a
+   temporary registers needs to be allocated to store a constant. */
+static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
+{
+#ifdef USE_LIVENESS_ANALYSIS
+    /* The liveness analysis already ensures that globals are back
+       in memory. Keep an assert for safety. */
+    assert(s->temps[temp].val_type == TEMP_VAL_MEM || s->temps[temp].fixed_reg);
+#else
+    temp_sync(s, temp, allocated_regs);
+    temp_dead(s, temp);
+#endif
+}
+
+/* save globals to their canonical location and assume they can be
    modified be the following code. 'allocated_regs' is used in case a
    temporary registers needs to be allocated to store a constant. */
 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
@@ -1552,6 +1931,23 @@
     }
 }
 
+/* sync globals to their canonical location and assume they can be
+   read by the following code. 'allocated_regs' is used in case a
+   temporary registers needs to be allocated to store a constant. */
+static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
+{
+    int i;
+
+    for (i = 0; i < s->nb_globals; i++) {
+#ifdef USE_LIVENESS_ANALYSIS
+        assert(s->temps[i].val_type != TEMP_VAL_REG || s->temps[i].fixed_reg ||
+               s->temps[i].mem_coherent);
+#else
+        temp_sync(s, i, allocated_regs);
+#endif
+    }
+}
+
 /* at the end of a basic block, we assume all temporaries are dead and
    all globals are stored at their canonical location. */
 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
@@ -1564,19 +1960,24 @@
         if (ts->temp_local) {
             temp_save(s, i, allocated_regs);
         } else {
-            if (ts->val_type == TEMP_VAL_REG) {
-                s->reg_to_temp[ts->reg] = -1;
-            }
-            ts->val_type = TEMP_VAL_DEAD;
+#ifdef USE_LIVENESS_ANALYSIS
+            /* The liveness analysis already ensures that temps are dead.
+               Keep an assert for safety. */
+            assert(ts->val_type == TEMP_VAL_DEAD);
+#else
+            temp_dead(s, i);
+#endif
         }
     }
 
     save_globals(s, allocated_regs);
 }
 
-#define IS_DEAD_IARG(n) ((dead_iargs >> (n)) & 1)
+#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
+#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
 
-static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args)
+static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
+                               uint16_t dead_args, uint8_t sync_args)
 {
     TCGTemp *ots;
     tcg_target_ulong val;
@@ -1595,71 +1996,99 @@
         ots->val_type = TEMP_VAL_CONST;
         ots->val = val;
     }
+    if (NEED_SYNC_ARG(0)) {
+        temp_sync(s, args[0], s->reserved_regs);
+    }
+    if (IS_DEAD_ARG(0)) {
+        temp_dead(s, args[0]);
+    }
 }
 
 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
-                              const TCGArg *args,
-                              unsigned int dead_iargs)
+                              const TCGArg *args, uint16_t dead_args,
+                              uint8_t sync_args)
 {
+    TCGRegSet allocated_regs;
     TCGTemp *ts, *ots;
-    int reg;
-    const TCGArgConstraint *arg_ct;
+    const TCGArgConstraint *arg_ct, *oarg_ct;
 
+    tcg_regset_set(allocated_regs, s->reserved_regs);
     ots = &s->temps[args[0]];
     ts = &s->temps[args[1]];
-    arg_ct = &def->args_ct[0];
+    oarg_ct = &def->args_ct[0];
+    arg_ct = &def->args_ct[1];
 
-    /* XXX: always mark arg dead if IS_DEAD_IARG(0) */
-    if (ts->val_type == TEMP_VAL_REG) {
-        if (IS_DEAD_IARG(0) && !ts->fixed_reg && !ots->fixed_reg) {
-            /* the mov can be suppressed */
-            if (ots->val_type == TEMP_VAL_REG)
-                s->reg_to_temp[ots->reg] = -1;
-            reg = ts->reg;
-            s->reg_to_temp[reg] = -1;
-            ts->val_type = TEMP_VAL_DEAD;
-        } else {
-            if (ots->val_type == TEMP_VAL_REG) {
-                reg = ots->reg;
-            } else {
-                reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs);
-            }
-            if (ts->reg != reg) {
-                tcg_out_mov(s, ots->type, reg, ts->reg);
-            }
+    /* If the source value is not in a register, and we're going to be
+       forced to have it in a register in order to perform the copy,
+       then copy the SOURCE value into its own register first.  That way
+       we don't have to reload SOURCE the next time it is used. */
+    if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
+        || ts->val_type == TEMP_VAL_MEM) {
+        ts->reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+        if (ts->val_type == TEMP_VAL_MEM) {
+            tcg_out_ld(s, ts->type, ts->reg, ts->mem_reg, ts->mem_offset);
+            ts->mem_coherent = 1;
+        } else if (ts->val_type == TEMP_VAL_CONST) {
+            tcg_out_movi(s, ts->type, ts->reg, ts->val);
         }
-    } else if (ts->val_type == TEMP_VAL_MEM) {
-        if (ots->val_type == TEMP_VAL_REG) {
-            reg = ots->reg;
-        } else {
-            reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs);
-        }
-        tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
-    } else if (ts->val_type == TEMP_VAL_CONST) {
-        if (ots->fixed_reg) {
-            reg = ots->reg;
-            tcg_out_movi(s, ots->type, reg, ts->val);
-        } else {
-            /* propagate constant */
-            if (ots->val_type == TEMP_VAL_REG)
-                s->reg_to_temp[ots->reg] = -1;
-            ots->val_type = TEMP_VAL_CONST;
-            ots->val = ts->val;
-            return;
-        }
-    } else {
-        tcg_abort();
+        s->reg_to_temp[ts->reg] = args[1];
+        ts->val_type = TEMP_VAL_REG;
     }
-    s->reg_to_temp[reg] = args[0];
-    ots->reg = reg;
-    ots->val_type = TEMP_VAL_REG;
-    ots->mem_coherent = 0;
+
+    if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
+        /* mov to a non-saved dead register makes no sense (even with
+           liveness analysis disabled). */
+        assert(NEED_SYNC_ARG(0));
+        /* The code above should have moved the temp to a register. */
+        assert(ts->val_type == TEMP_VAL_REG);
+        if (!ots->mem_allocated) {
+            temp_allocate_frame(s, args[0]);
+        }
+        tcg_out_st(s, ots->type, ts->reg, ots->mem_reg, ots->mem_offset);
+        if (IS_DEAD_ARG(1)) {
+            temp_dead(s, args[1]);
+        }
+        temp_dead(s, args[0]);
+    } else if (ts->val_type == TEMP_VAL_CONST) {
+        /* propagate constant */
+        if (ots->val_type == TEMP_VAL_REG) {
+            s->reg_to_temp[ots->reg] = -1;
+        }
+        ots->val_type = TEMP_VAL_CONST;
+        ots->val = ts->val;
+    } else {
+        /* The code in the first if block should have moved the
+           temp to a register. */
+        assert(ts->val_type == TEMP_VAL_REG);
+        if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
+            /* the mov can be suppressed */
+            if (ots->val_type == TEMP_VAL_REG) {
+                s->reg_to_temp[ots->reg] = -1;
+            }
+            ots->reg = ts->reg;
+            temp_dead(s, args[1]);
+        } else {
+            if (ots->val_type != TEMP_VAL_REG) {
+                /* When allocating a new register, make sure to not spill the
+                   input one. */
+                tcg_regset_set_reg(allocated_regs, ts->reg);
+                ots->reg = tcg_reg_alloc(s, oarg_ct->u.regs, allocated_regs);
+            }
+            tcg_out_mov(s, ots->type, ots->reg, ts->reg);
+        }
+        ots->val_type = TEMP_VAL_REG;
+        ots->mem_coherent = 0;
+        s->reg_to_temp[ots->reg] = args[0];
+        if (NEED_SYNC_ARG(0)) {
+            tcg_reg_sync(s, ots->reg);
+        }
+    }
 }
 
-static void tcg_reg_alloc_op(TCGContext *s,
+static void tcg_reg_alloc_op(TCGContext *s, 
                              const TCGOpDef *def, TCGOpcode opc,
-                             const TCGArg *args,
-                             unsigned int dead_iargs)
+                             const TCGArg *args, uint16_t dead_args,
+                             uint8_t sync_args)
 {
     TCGRegSet allocated_regs;
     int i, k, nb_iargs, nb_oargs, reg;
@@ -1673,11 +2102,11 @@
     nb_iargs = def->nb_iargs;
 
     /* copy constants */
-    memcpy(new_args + nb_oargs + nb_iargs,
-           args + nb_oargs + nb_iargs,
+    memcpy(new_args + nb_oargs + nb_iargs, 
+           args + nb_oargs + nb_iargs, 
            sizeof(TCGArg) * def->nb_cargs);
 
-    /* satisfy input constraints */
+    /* satisfy input constraints */ 
     tcg_regset_set(allocated_regs, s->reserved_regs);
     for(k = 0; k < nb_iargs; k++) {
         i = def->sorted_args[nb_oargs + k];
@@ -1718,8 +2147,9 @@
                 /* if the input is aliased to an output and if it is
                    not dead after the instruction, we must allocate
                    a new register and move it */
-                if (!IS_DEAD_IARG(i - nb_oargs))
+                if (!IS_DEAD_ARG(i)) {
                     goto allocate_in_reg;
+                }
             }
         }
         reg = ts->reg;
@@ -1727,7 +2157,7 @@
             /* nothing to do : the constraint is satisfied */
         } else {
         allocate_in_reg:
-            /* allocate a new register matching the constraint
+            /* allocate a new register matching the constraint 
                and move the temporary register into it */
             reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
             tcg_out_mov(s, ts->type, reg, ts->reg);
@@ -1737,38 +2167,31 @@
         tcg_regset_set_reg(allocated_regs, reg);
     iarg_end: ;
     }
+    
+    /* mark dead temporaries and free the associated registers */
+    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+        if (IS_DEAD_ARG(i)) {
+            temp_dead(s, args[i]);
+        }
+    }
 
     if (def->flags & TCG_OPF_BB_END) {
         tcg_reg_alloc_bb_end(s, allocated_regs);
     } else {
-        /* mark dead temporaries and free the associated registers */
-        for(i = 0; i < nb_iargs; i++) {
-            arg = args[nb_oargs + i];
-            if (IS_DEAD_IARG(i)) {
-                ts = &s->temps[arg];
-                if (!ts->fixed_reg) {
-                    if (ts->val_type == TEMP_VAL_REG)
-                        s->reg_to_temp[ts->reg] = -1;
-                    ts->val_type = TEMP_VAL_DEAD;
-                }
-            }
-        }
-
         if (def->flags & TCG_OPF_CALL_CLOBBER) {
-            /* XXX: permit generic clobber register list ? */
+            /* XXX: permit generic clobber register list ? */ 
             for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
                     tcg_reg_free(s, reg);
                 }
             }
-            /* XXX: for load/store we could do that only for the slow path
-               (i.e. when a memory callback is called) */
-
-            /* store globals and free associated registers (we assume the insn
-               can modify any global. */
-            save_globals(s, allocated_regs);
         }
-
+        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+            /* sync globals if the op has side effects and might trigger
+               an exception. */
+            sync_globals(s, allocated_regs);
+        }
+        
         /* satisfy the output constraints */
         tcg_regset_set(allocated_regs, s->reserved_regs);
         for(k = 0; k < nb_oargs; k++) {
@@ -1790,8 +2213,9 @@
             tcg_regset_set_reg(allocated_regs, reg);
             /* if a fixed register is used, then a move will be done afterwards */
             if (!ts->fixed_reg) {
-                if (ts->val_type == TEMP_VAL_REG)
+                if (ts->val_type == TEMP_VAL_REG) {
                     s->reg_to_temp[ts->reg] = -1;
+                }
                 ts->val_type = TEMP_VAL_REG;
                 ts->reg = reg;
                 /* temp value is modified, so the value kept in memory is
@@ -1806,7 +2230,7 @@
 
     /* emit instruction */
     tcg_out_op(s, opc, new_args, const_args);
-
+    
     /* move the outputs in the correct register if needed */
     for(i = 0; i < nb_oargs; i++) {
         ts = &s->temps[args[i]];
@@ -1814,6 +2238,12 @@
         if (ts->fixed_reg && ts->reg != reg) {
             tcg_out_mov(s, ts->type, ts->reg, reg);
         }
+        if (NEED_SYNC_ARG(i)) {
+            tcg_reg_sync(s, reg);
+        }
+        if (IS_DEAD_ARG(i)) {
+            temp_dead(s, args[i]);
+        }
     }
 }
 
@@ -1825,12 +2255,14 @@
 
 static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
                               TCGOpcode opc, const TCGArg *args,
-                              unsigned int dead_iargs)
+                              uint16_t dead_args, uint8_t sync_args)
 {
     int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
     TCGArg arg, func_arg;
     TCGTemp *ts;
-    tcg_target_long stack_offset, call_stack_size, func_addr;
+    intptr_t stack_offset;
+    size_t call_stack_size;
+    uintptr_t func_addr;
     int const_func_arg, allocate_args;
     TCGRegSet allocated_regs;
     const TCGArgConstraint *arg_ct;
@@ -1843,18 +2275,19 @@
 
     flags = args[nb_oargs + nb_iargs];
 
-    nb_regs = tcg_target_get_call_iarg_regs_count(flags);
+    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
     if (nb_regs > nb_params)
         nb_regs = nb_params;
 
     /* assign stack slots first */
-    /* XXX: preallocate call stack */
     call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
-    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
+    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 
         ~(TCG_TARGET_STACK_ALIGN - 1);
     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
     if (allocate_args) {
-        tcg_out_addi(s, TCG_REG_CALL_STACK, -STACK_DIR(call_stack_size));
+        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
+           preallocate call stack */
+        tcg_abort();
     }
 
     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
@@ -1868,13 +2301,13 @@
             if (ts->val_type == TEMP_VAL_REG) {
                 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
             } else if (ts->val_type == TEMP_VAL_MEM) {
-                reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+                reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 
                                     s->reserved_regs);
                 /* XXX: not correct if reading values from the stack */
                 tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
                 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
             } else if (ts->val_type == TEMP_VAL_CONST) {
-                reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+                reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 
                                     s->reserved_regs);
                 /* XXX: sign extend may be needed on some targets */
                 tcg_out_movi(s, ts->type, reg, ts->val);
@@ -1887,7 +2320,7 @@
         stack_offset += sizeof(tcg_target_long);
 #endif
     }
-
+    
     /* assign input registers */
     tcg_regset_set(allocated_regs, s->reserved_regs);
     for(i = 0; i < nb_regs; i++) {
@@ -1911,7 +2344,7 @@
             tcg_regset_set_reg(allocated_regs, reg);
         }
     }
-
+    
     /* assign function address */
     func_arg = args[nb_oargs + nb_iargs - 1];
     arg_ct = &def->args_ct[0];
@@ -1944,21 +2377,15 @@
     } else {
         tcg_abort();
     }
-
-
+        
+    
     /* mark dead temporaries and free the associated registers */
-    for(i = 0; i < nb_iargs; i++) {
-        arg = args[nb_oargs + i];
-        if (IS_DEAD_IARG(i)) {
-            ts = &s->temps[arg];
-            if (!ts->fixed_reg) {
-                if (ts->val_type == TEMP_VAL_REG)
-                    s->reg_to_temp[ts->reg] = -1;
-                ts->val_type = TEMP_VAL_DEAD;
-            }
+    for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+        if (IS_DEAD_ARG(i)) {
+            temp_dead(s, args[i]);
         }
     }
-
+    
     /* clobber call registers */
     for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
@@ -1966,18 +2393,18 @@
         }
     }
 
-    /* store globals and free associated registers (we assume the call
-       can modify any global. */
-    if (!(flags & TCG_CALL_CONST)) {
+    /* Save globals if they might be written by the helper, sync them if
+       they might be read. */
+    if (flags & TCG_CALL_NO_READ_GLOBALS) {
+        /* Nothing to do */
+    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
+        sync_globals(s, allocated_regs);
+    } else {
         save_globals(s, allocated_regs);
     }
 
     tcg_out_op(s, opc, &func_arg, &const_func_arg);
 
-    if (allocate_args) {
-        tcg_out_addi(s, TCG_REG_CALL_STACK, STACK_DIR(call_stack_size));
-    }
-
     /* assign output registers and emit moves if needed */
     for(i = 0; i < nb_oargs; i++) {
         arg = args[i];
@@ -1989,15 +2416,22 @@
                 tcg_out_mov(s, ts->type, ts->reg, reg);
             }
         } else {
-            if (ts->val_type == TEMP_VAL_REG)
+            if (ts->val_type == TEMP_VAL_REG) {
                 s->reg_to_temp[ts->reg] = -1;
+            }
             ts->val_type = TEMP_VAL_REG;
             ts->reg = reg;
             ts->mem_coherent = 0;
             s->reg_to_temp[reg] = arg;
+            if (NEED_SYNC_ARG(i)) {
+                tcg_reg_sync(s, reg);
+            }
+            if (IS_DEAD_ARG(i)) {
+                temp_dead(s, args[i]);
+            }
         }
     }
-
+    
     return nb_iargs + nb_oargs + def->nb_cargs + 1;
 }
 
@@ -2024,41 +2458,40 @@
     TCGOpcode opc;
     int op_index;
     const TCGOpDef *def;
-    unsigned int dead_iargs;
     const TCGArg *args;
-#ifdef CONFIG_MEMCHECK
-    unsigned int tpc2gpc_index = 0;
-#endif  // CONFIG_MEMCHECK
-
-#if !SUPPORT_GLOBAL_REGISTER_VARIABLE
-    printf("ERROR: This emulator is built by compiler without global register variable\n"
-           "support!  Emulator reserves a register to point to target architecture state\n"
-           "for better code generation.  LLVM-based compilers such as clang and llvm-gcc\n"
-           "currently don't support global register variable.  Please see\n"
-           "http://source.android.com/source/initializing.html for detail.\n\n");
-    tcg_abort();
-#endif
 
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
         qemu_log("OP:\n");
-        tcg_dump_ops(s, qemu_logfile);
+        tcg_dump_ops(s);
         qemu_log("\n");
     }
 #endif
 
 #ifdef CONFIG_PROFILER
+    s->opt_time -= profile_getclock();
+#endif
+
+#ifdef USE_TCG_OPTIMIZATIONS
+    s->gen_opparam_ptr =
+        tcg_optimize(s, s->gen_opc_ptr, s->gen_opparam_buf, tcg_op_defs);
+#endif
+
+#ifdef CONFIG_PROFILER
+    s->opt_time += profile_getclock();
     s->la_time -= profile_getclock();
 #endif
+
     tcg_liveness_analysis(s);
+
 #ifdef CONFIG_PROFILER
     s->la_time += profile_getclock();
 #endif
 
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
-        qemu_log("OP after liveness analysis:\n");
-        tcg_dump_ops(s, qemu_logfile);
+        qemu_log("OP after optimization and liveness analysis:\n");
+        tcg_dump_ops(s);
         qemu_log("\n");
     }
 #endif
@@ -2068,31 +2501,13 @@
     s->code_buf = gen_code_buf;
     s->code_ptr = gen_code_buf;
 
-    args = gen_opparam_buf;
+    tcg_out_tb_init(s);
+
+    args = s->gen_opparam_buf;
     op_index = 0;
 
-#ifdef CONFIG_MEMCHECK
-    gen_opc_tpc2gpc_pairs = 0;
-#endif  // CONFIG_MEMCHECK
-
     for(;;) {
-#ifdef CONFIG_MEMCHECK
-        /* On condition that memcheck is enabled, and operation index reached
-         * new operation in the guest code, save (pc_tb, pc_guest) pair into
-         * gen_opc_tpc2gpc array. Note that we do that only on condition that
-         * search_pc is < 0. This way we make sure that this is "normal"
-         * translation, called from tcg_gen_code, and not from
-         * tcg_gen_code_search_pc. */
-        if (memcheck_enabled && search_pc < 0 &&
-            gen_opc_instr_start[op_index]) {
-            gen_opc_tpc2gpc_ptr[tpc2gpc_index] = s->code_ptr;
-            tpc2gpc_index++;
-            gen_opc_tpc2gpc_ptr[tpc2gpc_index] = (void*)(ptrdiff_t)gen_opc_pc[op_index];
-            tpc2gpc_index++;
-            gen_opc_tpc2gpc_pairs++;
-        }
-#endif  // CONFIG_MEMCHECK
-        opc = gen_opc_buf[op_index];
+        opc = s->gen_opc_buf[op_index];
 #ifdef CONFIG_PROFILER
         tcg_table_op_count[opc]++;
 #endif
@@ -2104,17 +2519,14 @@
 #endif
         switch(opc) {
         case INDEX_op_mov_i32:
-#if TCG_TARGET_REG_BITS == 64
         case INDEX_op_mov_i64:
-#endif
-            dead_iargs = s->op_dead_iargs[op_index];
-            tcg_reg_alloc_mov(s, def, args, dead_iargs);
+            tcg_reg_alloc_mov(s, def, args, s->op_dead_args[op_index],
+                              s->op_sync_args[op_index]);
             break;
         case INDEX_op_movi_i32:
-#if TCG_TARGET_REG_BITS == 64
         case INDEX_op_movi_i64:
-#endif
-            tcg_reg_alloc_movi(s, args);
+            tcg_reg_alloc_movi(s, args, s->op_dead_args[op_index],
+                               s->op_sync_args[op_index]);
             break;
         case INDEX_op_debug_insn_start:
             /* debug instruction */
@@ -2128,33 +2540,29 @@
             args += args[0];
             goto next;
         case INDEX_op_discard:
-            {
-                TCGTemp *ts;
-                ts = &s->temps[args[0]];
-                /* mark the temporary as dead */
-                if (!ts->fixed_reg) {
-                    if (ts->val_type == TEMP_VAL_REG)
-                        s->reg_to_temp[ts->reg] = -1;
-                    ts->val_type = TEMP_VAL_DEAD;
-                }
-            }
+            temp_dead(s, args[0]);
             break;
         case INDEX_op_set_label:
             tcg_reg_alloc_bb_end(s, s->reserved_regs);
-            tcg_out_label(s, args[0], (long)s->code_ptr);
+            tcg_out_label(s, args[0], s->code_ptr);
             break;
         case INDEX_op_call:
-            dead_iargs = s->op_dead_iargs[op_index];
-            args += tcg_reg_alloc_call(s, def, opc, args, dead_iargs);
+            args += tcg_reg_alloc_call(s, def, opc, args,
+                                       s->op_dead_args[op_index],
+                                       s->op_sync_args[op_index]);
             goto next;
         case INDEX_op_end:
             goto the_end;
         default:
+            /* Sanity check that we've not introduced any unhandled opcodes. */
+            if (def->flags & TCG_OPF_NOT_PRESENT) {
+                tcg_abort();
+            }
             /* Note: in order to speed up the code, it would be much
                faster to have specialized register allocator functions for
                some common argument patterns */
-            dead_iargs = s->op_dead_iargs[op_index];
-            tcg_reg_alloc_op(s, def, opc, args, dead_iargs);
+            tcg_reg_alloc_op(s, def, opc, args, s->op_dead_args[op_index],
+                             s->op_sync_args[op_index]);
             break;
         }
         args += def->nb_args;
@@ -2168,6 +2576,8 @@
 #endif
     }
  the_end:
+    /* Generate TB finalization at the end of block */
+    tcg_out_tb_finalize(s);
     return -1;
 }
 
@@ -2176,7 +2586,7 @@
 #ifdef CONFIG_PROFILER
     {
         int n;
-        n = (gen_opc_ptr - gen_opc_buf);
+        n = (s->gen_opc_ptr - s->gen_opc_buf);
         s->op_count += n;
         if (n > s->op_count_max)
             s->op_count_max = n;
@@ -2187,18 +2597,11 @@
     }
 #endif
 
-    /* sanity check */
-    if (gen_opc_ptr - gen_opc_buf > OPC_BUF_SIZE) {
-        fprintf(stderr, "PANIC: too many opcodes generated (%d > %d)\n",
-                (int)(gen_opc_ptr - gen_opc_buf), OPC_BUF_SIZE);
-        tcg_abort();
-    }
-
     tcg_gen_code_common(s, gen_code_buf, -1);
 
     /* flush instruction cache */
-    flush_icache_range((unsigned long)gen_code_buf,
-                       (unsigned long)s->code_ptr);
+    flush_icache_range((uintptr_t)gen_code_buf, (uintptr_t)s->code_ptr);
+
     return s->code_ptr -  gen_code_buf;
 }
 
@@ -2220,33 +2623,36 @@
     tot = s->interm_time + s->code_time;
     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
                 tot, tot / 2.4e9);
-    cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
-                s->tb_count,
+    cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 
+                s->tb_count, 
                 s->tb_count1 - s->tb_count,
                 s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0);
-    cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n",
+    cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n", 
                 s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max);
     cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
-                s->tb_count ?
+                s->tb_count ? 
                 (double)s->del_op_count / s->tb_count : 0);
     cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
-                s->tb_count ?
+                s->tb_count ? 
                 (double)s->temp_count / s->tb_count : 0,
                 s->temp_count_max);
-
-    cpu_fprintf(f, "cycles/op           %0.1f\n",
+    
+    cpu_fprintf(f, "cycles/op           %0.1f\n", 
                 s->op_count ? (double)tot / s->op_count : 0);
-    cpu_fprintf(f, "cycles/in byte      %0.1f\n",
+    cpu_fprintf(f, "cycles/in byte      %0.1f\n", 
                 s->code_in_len ? (double)tot / s->code_in_len : 0);
-    cpu_fprintf(f, "cycles/out byte     %0.1f\n",
+    cpu_fprintf(f, "cycles/out byte     %0.1f\n", 
                 s->code_out_len ? (double)tot / s->code_out_len : 0);
     if (tot == 0)
         tot = 1;
-    cpu_fprintf(f, "  gen_interm time   %0.1f%%\n",
+    cpu_fprintf(f, "  gen_interm time   %0.1f%%\n", 
                 (double)s->interm_time / tot * 100.0);
-    cpu_fprintf(f, "  gen_code time     %0.1f%%\n",
+    cpu_fprintf(f, "  gen_code time     %0.1f%%\n", 
                 (double)s->code_time / tot * 100.0);
-    cpu_fprintf(f, "liveness/code time  %0.1f%%\n",
+    cpu_fprintf(f, "optim./code time    %0.1f%%\n",
+                (double)s->opt_time / (s->code_time ? s->code_time : 1)
+                * 100.0);
+    cpu_fprintf(f, "liveness/code time  %0.1f%%\n", 
                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
                 s->restore_count);
@@ -2261,3 +2667,267 @@
     cpu_fprintf(f, "[TCG profiler not compiled]\n");
 }
 #endif
+
+#ifdef ELF_HOST_MACHINE
+/* In order to use this feature, the backend needs to do three things:
+
+   (1) Define ELF_HOST_MACHINE to indicate both what value to
+       put into the ELF image and to indicate support for the feature.
+
+   (2) Define tcg_register_jit.  This should create a buffer containing
+       the contents of a .debug_frame section that describes the post-
+       prologue unwind info for the tcg machine.
+
+   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
+*/
+
+/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
+typedef enum {
+    JIT_NOACTION = 0,
+    JIT_REGISTER_FN,
+    JIT_UNREGISTER_FN
+} jit_actions_t;
+
+struct jit_code_entry {
+    struct jit_code_entry *next_entry;
+    struct jit_code_entry *prev_entry;
+    const void *symfile_addr;
+    uint64_t symfile_size;
+};
+
+struct jit_descriptor {
+    uint32_t version;
+    uint32_t action_flag;
+    struct jit_code_entry *relevant_entry;
+    struct jit_code_entry *first_entry;
+};
+
+void __jit_debug_register_code(void) __attribute__((noinline));
+void __jit_debug_register_code(void)
+{
+    asm("");
+}
+
+/* Must statically initialize the version, because GDB may check
+   the version before we can set it.  */
+struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
+
+/* End GDB interface.  */
+
+static int find_string(const char *strtab, const char *str)
+{
+    const char *p = strtab + 1;
+
+    while (1) {
+        if (strcmp(p, str) == 0) {
+            return p - strtab;
+        }
+        p += strlen(p) + 1;
+    }
+}
+
+static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
+                                 void *debug_frame, size_t debug_frame_size)
+{
+    struct __attribute__((packed)) DebugInfo {
+        uint32_t  len;
+        uint16_t  version;
+        uint32_t  abbrev;
+        uint8_t   ptr_size;
+        uint8_t   cu_die;
+        uint16_t  cu_lang;
+        uintptr_t cu_low_pc;
+        uintptr_t cu_high_pc;
+        uint8_t   fn_die;
+        char      fn_name[16];
+        uintptr_t fn_low_pc;
+        uintptr_t fn_high_pc;
+        uint8_t   cu_eoc;
+    };
+
+    struct ElfImage {
+        ElfW(Ehdr) ehdr;
+        ElfW(Phdr) phdr;
+        ElfW(Shdr) shdr[7];
+        ElfW(Sym)  sym[2];
+        struct DebugInfo di;
+        uint8_t    da[24];
+        char       str[80];
+    };
+
+    struct ElfImage *img;
+
+    static const struct ElfImage img_template = {
+        .ehdr = {
+            .e_ident[EI_MAG0] = ELFMAG0,
+            .e_ident[EI_MAG1] = ELFMAG1,
+            .e_ident[EI_MAG2] = ELFMAG2,
+            .e_ident[EI_MAG3] = ELFMAG3,
+            .e_ident[EI_CLASS] = ELF_CLASS,
+            .e_ident[EI_DATA] = ELF_DATA,
+            .e_ident[EI_VERSION] = EV_CURRENT,
+            .e_type = ET_EXEC,
+            .e_machine = ELF_HOST_MACHINE,
+            .e_version = EV_CURRENT,
+            .e_phoff = offsetof(struct ElfImage, phdr),
+            .e_shoff = offsetof(struct ElfImage, shdr),
+            .e_ehsize = sizeof(ElfW(Shdr)),
+            .e_phentsize = sizeof(ElfW(Phdr)),
+            .e_phnum = 1,
+            .e_shentsize = sizeof(ElfW(Shdr)),
+            .e_shnum = ARRAY_SIZE(img->shdr),
+            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
+#ifdef ELF_HOST_FLAGS
+            .e_flags = ELF_HOST_FLAGS,
+#endif
+#ifdef ELF_OSABI
+            .e_ident[EI_OSABI] = ELF_OSABI,
+#endif
+        },
+        .phdr = {
+            .p_type = PT_LOAD,
+            .p_flags = PF_X,
+        },
+        .shdr = {
+            [0] = { .sh_type = SHT_NULL },
+            /* Trick: The contents of code_gen_buffer are not present in
+               this fake ELF file; that got allocated elsewhere.  Therefore
+               we mark .text as SHT_NOBITS (similar to .bss) so that readers
+               will not look for contents.  We can record any address.  */
+            [1] = { /* .text */
+                .sh_type = SHT_NOBITS,
+                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
+            },
+            [2] = { /* .debug_info */
+                .sh_type = SHT_PROGBITS,
+                .sh_offset = offsetof(struct ElfImage, di),
+                .sh_size = sizeof(struct DebugInfo),
+            },
+            [3] = { /* .debug_abbrev */
+                .sh_type = SHT_PROGBITS,
+                .sh_offset = offsetof(struct ElfImage, da),
+                .sh_size = sizeof(img->da),
+            },
+            [4] = { /* .debug_frame */
+                .sh_type = SHT_PROGBITS,
+                .sh_offset = sizeof(struct ElfImage),
+            },
+            [5] = { /* .symtab */
+                .sh_type = SHT_SYMTAB,
+                .sh_offset = offsetof(struct ElfImage, sym),
+                .sh_size = sizeof(img->sym),
+                .sh_info = 1,
+                .sh_link = ARRAY_SIZE(img->shdr) - 1,
+                .sh_entsize = sizeof(ElfW(Sym)),
+            },
+            [6] = { /* .strtab */
+                .sh_type = SHT_STRTAB,
+                .sh_offset = offsetof(struct ElfImage, str),
+                .sh_size = sizeof(img->str),
+            }
+        },
+        .sym = {
+            [1] = { /* code_gen_buffer */
+                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
+                .st_shndx = 1,
+            }
+        },
+        .di = {
+            .len = sizeof(struct DebugInfo) - 4,
+            .version = 2,
+            .ptr_size = sizeof(void *),
+            .cu_die = 1,
+            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
+            .fn_die = 2,
+            .fn_name = "code_gen_buffer"
+        },
+        .da = {
+            1,          /* abbrev number (the cu) */
+            0x11, 1,    /* DW_TAG_compile_unit, has children */
+            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
+            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
+            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
+            0, 0,       /* end of abbrev */
+            2,          /* abbrev number (the fn) */
+            0x2e, 0,    /* DW_TAG_subprogram, no children */
+            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
+            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
+            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
+            0, 0,       /* end of abbrev */
+            0           /* no more abbrev */
+        },
+        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
+               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
+    };
+
+    /* We only need a single jit entry; statically allocate it.  */
+    static struct jit_code_entry one_entry;
+
+    uintptr_t buf = (uintptr_t)buf_ptr;
+    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
+
+    img = g_malloc(img_size);
+    *img = img_template;
+    memcpy(img + 1, debug_frame, debug_frame_size);
+
+    img->phdr.p_vaddr = buf;
+    img->phdr.p_paddr = buf;
+    img->phdr.p_memsz = buf_size;
+
+    img->shdr[1].sh_name = find_string(img->str, ".text");
+    img->shdr[1].sh_addr = buf;
+    img->shdr[1].sh_size = buf_size;
+
+    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
+    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
+
+    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
+    img->shdr[4].sh_size = debug_frame_size;
+
+    img->shdr[5].sh_name = find_string(img->str, ".symtab");
+    img->shdr[6].sh_name = find_string(img->str, ".strtab");
+
+    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
+    img->sym[1].st_value = buf;
+    img->sym[1].st_size = buf_size;
+
+    img->di.cu_low_pc = buf;
+    img->di.cu_high_pc = buf + buf_size;
+    img->di.fn_low_pc = buf;
+    img->di.fn_high_pc = buf + buf_size;
+
+#ifdef DEBUG_JIT
+    /* Enable this block to be able to debug the ELF image file creation.
+       One can use readelf, objdump, or other inspection utilities.  */
+    {
+        FILE *f = fopen("/tmp/qemu.jit", "w+b");
+        if (f) {
+            if (fwrite(img, img_size, 1, f) != img_size) {
+                /* Avoid stupid unused return value warning for fwrite.  */
+            }
+            fclose(f);
+        }
+    }
+#endif
+
+    one_entry.symfile_addr = img;
+    one_entry.symfile_size = img_size;
+
+    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+    __jit_debug_descriptor.relevant_entry = &one_entry;
+    __jit_debug_descriptor.first_entry = &one_entry;
+    __jit_debug_register_code();
+}
+#else
+/* No support for the feature.  Provide the entry point expected by exec.c,
+   and implement the internal function we declared earlier.  */
+
+static void tcg_register_jit_int(void *buf, size_t size,
+                                 void *debug_frame, size_t debug_frame_size)
+{
+}
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+}
+#endif /* ELF_HOST_MACHINE */
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 3598e96..c72af6c 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -21,9 +21,24 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
+
+#ifndef TCG_H
+#define TCG_H
+
 #include "qemu-common.h"
+#include "qemu/bitops.h"
 #include "tcg-target.h"
-#include "tcg-runtime.h"
+
+/* Default target word size to pointer size.  */
+#ifndef TCG_TARGET_REG_BITS
+# if UINTPTR_MAX == UINT32_MAX
+#  define TCG_TARGET_REG_BITS 32
+# elif UINTPTR_MAX == UINT64_MAX
+#  define TCG_TARGET_REG_BITS 64
+# else
+#  error Unknown pointer size for tcg target
+# endif
+#endif
 
 #if TCG_TARGET_REG_BITS == 32
 typedef int32_t tcg_target_long;
@@ -39,6 +54,8 @@
 #error unsupported
 #endif
 
+#include "tcg-runtime.h"
+
 #if TCG_TARGET_NB_REGS <= 32
 typedef uint32_t TCGRegSet;
 #elif TCG_TARGET_NB_REGS <= 64
@@ -47,6 +64,63 @@
 #error unsupported
 #endif
 
+#if TCG_TARGET_REG_BITS == 32
+/* Turn some undef macros into false macros.  */
+#define TCG_TARGET_HAS_div_i64          0
+#define TCG_TARGET_HAS_rem_i64          0
+#define TCG_TARGET_HAS_div2_i64         0
+#define TCG_TARGET_HAS_rot_i64          0
+#define TCG_TARGET_HAS_ext8s_i64        0
+#define TCG_TARGET_HAS_ext16s_i64       0
+#define TCG_TARGET_HAS_ext32s_i64       0
+#define TCG_TARGET_HAS_ext8u_i64        0
+#define TCG_TARGET_HAS_ext16u_i64       0
+#define TCG_TARGET_HAS_ext32u_i64       0
+#define TCG_TARGET_HAS_bswap16_i64      0
+#define TCG_TARGET_HAS_bswap32_i64      0
+#define TCG_TARGET_HAS_bswap64_i64      0
+#define TCG_TARGET_HAS_neg_i64          0
+#define TCG_TARGET_HAS_not_i64          0
+#define TCG_TARGET_HAS_andc_i64         0
+#define TCG_TARGET_HAS_orc_i64          0
+#define TCG_TARGET_HAS_eqv_i64          0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          0
+#define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_movcond_i64      0
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_muluh_i64        0
+#define TCG_TARGET_HAS_mulsh_i64        0
+/* Turn some undef macros into true macros.  */
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        1
+#endif
+
+#ifndef TCG_TARGET_deposit_i32_valid
+#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
+#endif
+#ifndef TCG_TARGET_deposit_i64_valid
+#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
+#endif
+
+/* Only one of DIV or DIV2 should be defined.  */
+#if defined(TCG_TARGET_HAS_div_i32)
+#define TCG_TARGET_HAS_div2_i32         0
+#elif defined(TCG_TARGET_HAS_div2_i32)
+#define TCG_TARGET_HAS_div_i32          0
+#define TCG_TARGET_HAS_rem_i32          0
+#endif
+#if defined(TCG_TARGET_HAS_div_i64)
+#define TCG_TARGET_HAS_div2_i64         0
+#elif defined(TCG_TARGET_HAS_div2_i64)
+#define TCG_TARGET_HAS_div_i64          0
+#define TCG_TARGET_HAS_rem_i64          0
+#endif
+
 typedef enum TCGOpcode {
 #define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
 #include "tcg-opc.h"
@@ -69,13 +143,13 @@
     struct TCGRelocation *next;
     int type;
     uint8_t *ptr;
-    tcg_target_long addend;
-} TCGRelocation;
+    intptr_t addend;
+} TCGRelocation; 
 
 typedef struct TCGLabel {
     int has_value;
     union {
-        tcg_target_ulong value;
+        uintptr_t value;
         TCGRelocation *first_reloc;
     } u;
 } TCGLabel;
@@ -108,9 +182,12 @@
     TCG_TYPE_REG = TCG_TYPE_I64,
 #endif
 
-    /* An alias for the size of the native pointer.  We don't currently
-       support any hosts with 64-bit registers and 32-bit pointers.  */
-    TCG_TYPE_PTR = TCG_TYPE_REG,
+    /* An alias for the size of the native pointer.  */
+#if UINTPTR_MAX == UINT32_MAX
+    TCG_TYPE_PTR = TCG_TYPE_I32,
+#else
+    TCG_TYPE_PTR = TCG_TYPE_I64,
+#endif
 
     /* An alias for the size of the target "long", aka register.  */
 #if TARGET_LONG_BITS == 64
@@ -120,16 +197,70 @@
 #endif
 } TCGType;
 
+/* Constants for qemu_ld and qemu_st for the Memory Operation field.  */
+typedef enum TCGMemOp {
+    MO_8     = 0,
+    MO_16    = 1,
+    MO_32    = 2,
+    MO_64    = 3,
+    MO_SIZE  = 3,   /* Mask for the above.  */
+
+    MO_SIGN  = 4,   /* Sign-extended, otherwise zero-extended.  */
+
+    MO_BSWAP = 8,   /* Host reverse endian.  */
+#ifdef HOST_WORDS_BIGENDIAN
+    MO_LE    = MO_BSWAP,
+    MO_BE    = 0,
+#else
+    MO_LE    = 0,
+    MO_BE    = MO_BSWAP,
+#endif
+#ifdef TARGET_WORDS_BIGENDIAN
+    MO_TE    = MO_BE,
+#else
+    MO_TE    = MO_LE,
+#endif
+
+    /* Combinations of the above, for ease of use.  */
+    MO_UB    = MO_8,
+    MO_UW    = MO_16,
+    MO_UL    = MO_32,
+    MO_SB    = MO_SIGN | MO_8,
+    MO_SW    = MO_SIGN | MO_16,
+    MO_SL    = MO_SIGN | MO_32,
+    MO_Q     = MO_64,
+
+    MO_LEUW  = MO_LE | MO_UW,
+    MO_LEUL  = MO_LE | MO_UL,
+    MO_LESW  = MO_LE | MO_SW,
+    MO_LESL  = MO_LE | MO_SL,
+    MO_LEQ   = MO_LE | MO_Q,
+
+    MO_BEUW  = MO_BE | MO_UW,
+    MO_BEUL  = MO_BE | MO_UL,
+    MO_BESW  = MO_BE | MO_SW,
+    MO_BESL  = MO_BE | MO_SL,
+    MO_BEQ   = MO_BE | MO_Q,
+
+    MO_TEUW  = MO_TE | MO_UW,
+    MO_TEUL  = MO_TE | MO_UL,
+    MO_TESW  = MO_TE | MO_SW,
+    MO_TESL  = MO_TE | MO_SL,
+    MO_TEQ   = MO_TE | MO_Q,
+
+    MO_SSIZE = MO_SIZE | MO_SIGN,
+} TCGMemOp;
+
 typedef tcg_target_ulong TCGArg;
 
-/* Define a type and accessor macros for varables.  Using a struct is
+/* Define a type and accessor macros for variables.  Using a struct is
    nice because it gives some level of type safely.  Ideally the compiler
    be able to see through all this.  However in practice this is not true,
-   expecially on targets with braindamaged ABIs (e.g. i386).
+   especially on targets with braindamaged ABIs (e.g. i386).
    We use plain int by default to avoid this runtime overhead.
    Users of tcg_gen_* don't need to know about any of this, and should
    treat TCGv as an opaque type.
-   In additon we do typechecking for different types of variables.  TCGv_i32
+   In addition we do typechecking for different types of variables.  TCGv_i32
    and TCGv_i64 are 32/64-bit variables respectively.  TCGv and TCGv_ptr
    are aliases for target_ulong and host pointer sized values respectively.
  */
@@ -150,12 +281,19 @@
     int i64;
 } TCGv_i64;
 
+typedef struct {
+    int iptr;
+} TCGv_ptr;
+
 #define MAKE_TCGV_I32(i) __extension__                  \
     ({ TCGv_i32 make_tcgv_tmp = {i}; make_tcgv_tmp;})
 #define MAKE_TCGV_I64(i) __extension__                  \
     ({ TCGv_i64 make_tcgv_tmp = {i}; make_tcgv_tmp;})
+#define MAKE_TCGV_PTR(i) __extension__                  \
+    ({ TCGv_ptr make_tcgv_tmp = {i}; make_tcgv_tmp; })
 #define GET_TCGV_I32(t) ((t).i32)
 #define GET_TCGV_I64(t) ((t).i64)
+#define GET_TCGV_PTR(t) ((t).iptr)
 #if TCG_TARGET_REG_BITS == 32
 #define TCGV_LOW(t) MAKE_TCGV_I32(GET_TCGV_I64(t))
 #define TCGV_HIGH(t) MAKE_TCGV_I32(GET_TCGV_I64(t) + 1)
@@ -165,10 +303,17 @@
 
 typedef int TCGv_i32;
 typedef int TCGv_i64;
+#if TCG_TARGET_REG_BITS == 32
+#define TCGv_ptr TCGv_i32
+#else
+#define TCGv_ptr TCGv_i64
+#endif
 #define MAKE_TCGV_I32(x) (x)
 #define MAKE_TCGV_I64(x) (x)
+#define MAKE_TCGV_PTR(x) (x)
 #define GET_TCGV_I32(t) (t)
 #define GET_TCGV_I64(t) (t)
+#define GET_TCGV_PTR(t) (t)
 
 #if TCG_TARGET_REG_BITS == 32
 #define TCGV_LOW(t) (t)
@@ -184,37 +329,51 @@
 #define TCGV_UNUSED_I32(x) x = MAKE_TCGV_I32(-1)
 #define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1)
 
+#define TCGV_IS_UNUSED_I32(x) (GET_TCGV_I32(x) == -1)
+#define TCGV_IS_UNUSED_I64(x) (GET_TCGV_I64(x) == -1)
+
 /* call flags */
-#define TCG_CALL_TYPE_MASK      0x000f
-#define TCG_CALL_TYPE_STD       0x0000 /* standard C call */
-#define TCG_CALL_TYPE_REGPARM_1 0x0001 /* i386 style regparm call (1 reg) */
-#define TCG_CALL_TYPE_REGPARM_2 0x0002 /* i386 style regparm call (2 regs) */
-#define TCG_CALL_TYPE_REGPARM   0x0003 /* i386 style regparm call (3 regs) */
-/* A pure function only reads its arguments and TCG global variables
-   and cannot raise exceptions. Hence a call to a pure function can be
-   safely suppressed if the return value is not used. */
-#define TCG_CALL_PURE           0x0010
-/* A const function only reads its arguments and does not use TCG
-   global variables. Hence a call to such a function does not
-   save TCG global variables back to their canonical location. */
-#define TCG_CALL_CONST          0x0020
+/* Helper does not read globals (either directly or through an exception). It
+   implies TCG_CALL_NO_WRITE_GLOBALS. */
+#define TCG_CALL_NO_READ_GLOBALS    0x0010
+/* Helper does not write globals */
+#define TCG_CALL_NO_WRITE_GLOBALS   0x0020
+/* Helper can be safely suppressed if the return value is not used. */
+#define TCG_CALL_NO_SIDE_EFFECTS    0x0040
+
+/* convenience version of most used call flags */
+#define TCG_CALL_NO_RWG         TCG_CALL_NO_READ_GLOBALS
+#define TCG_CALL_NO_WG          TCG_CALL_NO_WRITE_GLOBALS
+#define TCG_CALL_NO_SE          TCG_CALL_NO_SIDE_EFFECTS
+#define TCG_CALL_NO_RWG_SE      (TCG_CALL_NO_RWG | TCG_CALL_NO_SE)
+#define TCG_CALL_NO_WG_SE       (TCG_CALL_NO_WG | TCG_CALL_NO_SE)
 
 /* used to align parameters */
 #define TCG_CALL_DUMMY_TCGV     MAKE_TCGV_I32(-1)
 #define TCG_CALL_DUMMY_ARG      ((TCGArg)(-1))
 
+/* Conditions.  Note that these are laid out for easy manipulation by
+   the functions below:
+     bit 0 is used for inverting;
+     bit 1 is signed,
+     bit 2 is unsigned,
+     bit 3 is used with bit 0 for swapping signed/unsigned.  */
 typedef enum {
-    TCG_COND_EQ,
-    TCG_COND_NE,
-    TCG_COND_LT,
-    TCG_COND_GE,
-    TCG_COND_LE,
-    TCG_COND_GT,
+    /* non-signed */
+    TCG_COND_NEVER  = 0 | 0 | 0 | 0,
+    TCG_COND_ALWAYS = 0 | 0 | 0 | 1,
+    TCG_COND_EQ     = 8 | 0 | 0 | 0,
+    TCG_COND_NE     = 8 | 0 | 0 | 1,
+    /* signed */
+    TCG_COND_LT     = 0 | 0 | 2 | 0,
+    TCG_COND_GE     = 0 | 0 | 2 | 1,
+    TCG_COND_LE     = 8 | 0 | 2 | 0,
+    TCG_COND_GT     = 8 | 0 | 2 | 1,
     /* unsigned */
-    TCG_COND_LTU,
-    TCG_COND_GEU,
-    TCG_COND_LEU,
-    TCG_COND_GTU,
+    TCG_COND_LTU    = 0 | 4 | 0 | 0,
+    TCG_COND_GEU    = 0 | 4 | 0 | 1,
+    TCG_COND_LEU    = 8 | 4 | 0 | 0,
+    TCG_COND_GTU    = 8 | 4 | 0 | 1,
 } TCGCond;
 
 /* Invert the sense of the comparison.  */
@@ -226,13 +385,34 @@
 /* Swap the operands in a comparison.  */
 static inline TCGCond tcg_swap_cond(TCGCond c)
 {
-    int mask = (c < TCG_COND_LT ? 0 : c < TCG_COND_LTU ? 7 : 15);
-    return (TCGCond)(c ^ mask);
+    return c & 6 ? (TCGCond)(c ^ 9) : c;
 }
 
+/* Create an "unsigned" version of a "signed" comparison.  */
 static inline TCGCond tcg_unsigned_cond(TCGCond c)
 {
-    return (c >= TCG_COND_LT && c <= TCG_COND_GT ? c + 4 : c);
+    return c & 2 ? (TCGCond)(c ^ 6) : c;
+}
+
+/* Must a comparison be considered unsigned?  */
+static inline bool is_unsigned_cond(TCGCond c)
+{
+    return (c & 4) != 0;
+}
+
+/* Create a "high" version of a double-word comparison.
+   This removes equality from a LTE or GTE comparison.  */
+static inline TCGCond tcg_high_cond(TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_GE:
+    case TCG_COND_LE:
+    case TCG_COND_GEU:
+    case TCG_COND_LEU:
+        return (TCGCond)(c ^ 8);
+    default:
+        return c;
+    }
 }
 
 #define TEMP_VAL_DEAD  0
@@ -248,63 +428,58 @@
     int reg;
     tcg_target_long val;
     int mem_reg;
-    tcg_target_long mem_offset;
+    intptr_t mem_offset;
     unsigned int fixed_reg:1;
     unsigned int mem_coherent:1;
     unsigned int mem_allocated:1;
-    unsigned int temp_local:1; /* If true, the temp is saved accross
+    unsigned int temp_local:1; /* If true, the temp is saved across
                                   basic blocks. Otherwise, it is not
-                                  preserved accross basic blocks. */
+                                  preserved across basic blocks. */
     unsigned int temp_allocated:1; /* never used for code gen */
-    /* index of next free temp of same base type, -1 if end */
-    int next_free_temp;
     const char *name;
 } TCGTemp;
 
-typedef struct TCGHelperInfo {
-    tcg_target_ulong func;
-    const char *name;
-} TCGHelperInfo;
-
 typedef struct TCGContext TCGContext;
 
+typedef struct TCGTempSet {
+    unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
+} TCGTempSet;
+
 struct TCGContext {
     uint8_t *pool_cur, *pool_end;
-    TCGPool *pool_first, *pool_current;
+    TCGPool *pool_first, *pool_current, *pool_first_large;
     TCGLabel *labels;
     int nb_labels;
-    TCGTemp *temps; /* globals first, temps after */
     int nb_globals;
     int nb_temps;
-    /* index of free temps, -1 if none */
-    int first_free_temp[TCG_TYPE_COUNT * 2];
 
     /* goto_tb support */
     uint8_t *code_buf;
-    unsigned long *tb_next;
+    uintptr_t *tb_next;
     uint16_t *tb_next_offset;
     uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */
 
     /* liveness analysis */
-    uint16_t *op_dead_iargs; /* for each operation, each bit tells if the
-                                corresponding input argument is dead */
-
+    uint16_t *op_dead_args; /* for each operation, each bit tells if the
+                               corresponding argument is dead */
+    uint8_t *op_sync_args;  /* for each operation, each bit tells if the
+                               corresponding output argument needs to be
+                               sync to memory. */
+    
     /* tells in which temporary a given register is. It does not take
        into account fixed registers */
     int reg_to_temp[TCG_TARGET_NB_REGS];
     TCGRegSet reserved_regs;
-    tcg_target_long current_frame_offset;
-    tcg_target_long frame_start;
-    tcg_target_long frame_end;
+    intptr_t current_frame_offset;
+    intptr_t frame_start;
+    intptr_t frame_end;
     int frame_reg;
 
     uint8_t *code_ptr;
-    TCGTemp static_temps[TCG_MAX_TEMPS];
+    TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
+    TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
 
-    TCGHelperInfo *helpers;
-    int nb_helpers;
-    int allocated_helpers;
-    int helpers_sorted;
+    GHashTable *helpers;
 
 #ifdef CONFIG_PROFILER
     /* profiling info */
@@ -320,20 +495,41 @@
     int64_t interm_time;
     int64_t code_time;
     int64_t la_time;
+    int64_t opt_time;
     int64_t restore_count;
     int64_t restore_time;
 #endif
 
 #ifdef CONFIG_DEBUG_TCG
     int temps_in_use;
+    int goto_tb_issue_mask;
 #endif
+
+    uint16_t gen_opc_buf[OPC_BUF_SIZE];
+    TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
+
+    uint16_t *gen_opc_ptr;
+    TCGArg *gen_opparam_ptr;
+    target_ulong gen_opc_pc[OPC_BUF_SIZE];
+    uint16_t gen_opc_icount[OPC_BUF_SIZE];
+    uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+
+    /* Code generation */
+    int code_gen_max_blocks;
+    uint8_t *code_gen_prologue;
+    uint8_t *code_gen_buffer;
+    size_t code_gen_buffer_size;
+    /* threshold to flush the translated code buffer */
+    size_t code_gen_buffer_max_size;
+    uint8_t *code_gen_ptr;
+
+    TBContext tb_ctx;
+
+    /* The TCGBackendData structure is private to tcg-target.c.  */
+    struct TCGBackendData *be;
 };
 
 extern TCGContext tcg_ctx;
-extern uint16_t *gen_opc_ptr;
-extern TCGArg *gen_opparam_ptr;
-extern uint16_t gen_opc_buf[];
-extern TCGArg gen_opparam_buf[];
 
 /* pool based memory allocation */
 
@@ -363,12 +559,10 @@
 int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf);
 int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, long offset);
 
-void tcg_set_frame(TCGContext *s, int reg,
-                   tcg_target_long start, tcg_target_long size);
+void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size);
 
 TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name);
-TCGv_i32 tcg_global_mem_new_i32(int reg, tcg_target_long offset,
-                                const char *name);
+TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name);
 TCGv_i32 tcg_temp_new_internal_i32(int temp_local);
 static inline TCGv_i32 tcg_temp_new_i32(void)
 {
@@ -382,8 +576,7 @@
 char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg);
 
 TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name);
-TCGv_i64 tcg_global_mem_new_i64(int reg, tcg_target_long offset,
-                                const char *name);
+TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name);
 TCGv_i64 tcg_temp_new_internal_i64(int temp_local);
 static inline TCGv_i64 tcg_temp_new_i64(void)
 {
@@ -426,13 +619,21 @@
 
 #define TCG_MAX_OP_ARGS 16
 
-#define TCG_OPF_BB_END     0x01 /* instruction defines the end of a basic
-                                   block */
-#define TCG_OPF_CALL_CLOBBER 0x02 /* instruction clobbers call registers
-                                   and potentially update globals. */
-#define TCG_OPF_SIDE_EFFECTS 0x04 /* instruction has side effects : it
-                                     cannot be removed if its output
-                                     are not used */
+/* Bits for TCGOpDef->flags, 8 bits available.  */
+enum {
+    /* Instruction defines the end of a basic block.  */
+    TCG_OPF_BB_END       = 0x01,
+    /* Instruction clobbers call registers and potentially update globals.  */
+    TCG_OPF_CALL_CLOBBER = 0x02,
+    /* Instruction has side effects: it cannot be removed if its outputs
+       are not used, and might trigger exceptions.  */
+    TCG_OPF_SIDE_EFFECTS = 0x04,
+    /* Instruction operands are 64-bits (otherwise 32-bits).  */
+    TCG_OPF_64BIT        = 0x08,
+    /* Instruction is optional and not implemented by the host, or insn
+       is generic and should not be implemened by the host.  */
+    TCG_OPF_NOT_PRESENT  = 0x10,
+};
 
 typedef struct TCGOpDef {
     const char *name;
@@ -445,6 +646,9 @@
 #endif
 } TCGOpDef;
 
+extern TCGOpDef tcg_op_defs[];
+extern const size_t tcg_op_defs_max;
+
 typedef struct TCGTargetOpDef {
     TCGOpcode op;
     const char *args_ct_str[TCG_MAX_OP_ARGS];
@@ -456,28 +660,39 @@
     abort();\
 } while (0)
 
+#ifdef CONFIG_DEBUG_TCG
+# define tcg_debug_assert(X) do { assert(X); } while (0)
+#elif QEMU_GNUC_PREREQ(4, 5)
+# define tcg_debug_assert(X) \
+    do { if (!(X)) { __builtin_unreachable(); } } while (0)
+#else
+# define tcg_debug_assert(X) do { (void)(X); } while (0)
+#endif
+
 void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
 
-#if TCG_TARGET_REG_BITS == 32
-#define tcg_const_ptr tcg_const_i32
-#define tcg_add_ptr tcg_add_i32
-#define tcg_sub_ptr tcg_sub_i32
-#define TCGv_ptr TCGv_i32
-#define GET_TCGV_PTR GET_TCGV_I32
-#define tcg_global_reg_new_ptr tcg_global_reg_new_i32
-#define tcg_global_mem_new_ptr tcg_global_mem_new_i32
-#define tcg_temp_new_ptr tcg_temp_new_i32
-#define tcg_temp_free_ptr tcg_temp_free_i32
+#if UINTPTR_MAX == UINT32_MAX
+#define TCGV_NAT_TO_PTR(n) MAKE_TCGV_PTR(GET_TCGV_I32(n))
+#define TCGV_PTR_TO_NAT(n) MAKE_TCGV_I32(GET_TCGV_PTR(n))
+
+#define tcg_const_ptr(V) TCGV_NAT_TO_PTR(tcg_const_i32((intptr_t)(V)))
+#define tcg_global_reg_new_ptr(R, N) \
+    TCGV_NAT_TO_PTR(tcg_global_reg_new_i32((R), (N)))
+#define tcg_global_mem_new_ptr(R, O, N) \
+    TCGV_NAT_TO_PTR(tcg_global_mem_new_i32((R), (O), (N)))
+#define tcg_temp_new_ptr() TCGV_NAT_TO_PTR(tcg_temp_new_i32())
+#define tcg_temp_free_ptr(T) tcg_temp_free_i32(TCGV_PTR_TO_NAT(T))
 #else
-#define tcg_const_ptr tcg_const_i64
-#define tcg_add_ptr tcg_add_i64
-#define tcg_sub_ptr tcg_sub_i64
-#define TCGv_ptr TCGv_i64
-#define GET_TCGV_PTR GET_TCGV_I64
-#define tcg_global_reg_new_ptr tcg_global_reg_new_i64
-#define tcg_global_mem_new_ptr tcg_global_mem_new_i64
-#define tcg_temp_new_ptr tcg_temp_new_i64
-#define tcg_temp_free_ptr tcg_temp_free_i64
+#define TCGV_NAT_TO_PTR(n) MAKE_TCGV_PTR(GET_TCGV_I64(n))
+#define TCGV_PTR_TO_NAT(n) MAKE_TCGV_I64(GET_TCGV_PTR(n))
+
+#define tcg_const_ptr(V) TCGV_NAT_TO_PTR(tcg_const_i64((intptr_t)(V)))
+#define tcg_global_reg_new_ptr(R, N) \
+    TCGV_NAT_TO_PTR(tcg_global_reg_new_i64((R), (N)))
+#define tcg_global_mem_new_ptr(R, O, N) \
+    TCGV_NAT_TO_PTR(tcg_global_mem_new_i64((R), (O), (N)))
+#define tcg_temp_new_ptr() TCGV_NAT_TO_PTR(tcg_temp_new_i64())
+#define tcg_temp_free_ptr(T) tcg_temp_free_i64(TCGV_PTR_TO_NAT(T))
 #endif
 
 void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
@@ -486,10 +701,11 @@
 void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
                         int c, int right, int arith);
 
+TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args,
+                     TCGOpDef *tcg_op_def);
+
 /* only used for debugging purposes */
-void tcg_register_helper(void *func, const char *name);
-const char *tcg_helper_get_name(TCGContext *s, void *func);
-void tcg_dump_ops(TCGContext *s, FILE *outfile);
+void tcg_dump_ops(TCGContext *s);
 
 void dump_ops(const uint16_t *opc_buf, const TCGArg *opparam_buf);
 TCGv_i32 tcg_const_i32(int32_t val);
@@ -497,10 +713,142 @@
 TCGv_i32 tcg_const_local_i32(int32_t val);
 TCGv_i64 tcg_const_local_i64(int64_t val);
 
-extern uint8_t code_gen_prologue[];
-#if defined(_ARCH_PPC) && !defined(_ARCH_PPC64)
-#define tcg_qemu_tb_exec(tb_ptr) \
-    ((long REGPARM __attribute__ ((longcall)) (*)(void *))code_gen_prologue)(tb_ptr)
-#else
-#define tcg_qemu_tb_exec(tb_ptr) ((long REGPARM (*)(void *))code_gen_prologue)(tb_ptr)
+/**
+ * tcg_qemu_tb_exec:
+ * @env: CPUArchState * for the CPU
+ * @tb_ptr: address of generated code for the TB to execute
+ *
+ * Start executing code from a given translation block.
+ * Where translation blocks have been linked, execution
+ * may proceed from the given TB into successive ones.
+ * Control eventually returns only when some action is needed
+ * from the top-level loop: either control must pass to a TB
+ * which has not yet been directly linked, or an asynchronous
+ * event such as an interrupt needs handling.
+ *
+ * The return value is a pointer to the next TB to execute
+ * (if known; otherwise zero). This pointer is assumed to be
+ * 4-aligned, and the bottom two bits are used to return further
+ * information:
+ *  0, 1: the link between this TB and the next is via the specified
+ *        TB index (0 or 1). That is, we left the TB via (the equivalent
+ *        of) "goto_tb <index>". The main loop uses this to determine
+ *        how to link the TB just executed to the next.
+ *  2:    we are using instruction counting code generation, and we
+ *        did not start executing this TB because the instruction counter
+ *        would hit zero midway through it. In this case the next-TB pointer
+ *        returned is the TB we were about to execute, and the caller must
+ *        arrange to execute the remaining count of instructions.
+ *  3:    we stopped because the CPU's exit_request flag was set
+ *        (usually meaning that there is an interrupt that needs to be
+ *        handled). The next-TB pointer returned is the TB we were
+ *        about to execute when we noticed the pending exit request.
+ *
+ * If the bottom two bits indicate an exit-via-index then the CPU
+ * state is correctly synchronised and ready for execution of the next
+ * TB (and in particular the guest PC is the address to execute next).
+ * Otherwise, we gave up on execution of this TB before it started, and
+ * the caller must fix up the CPU state by calling cpu_pc_from_tb()
+ * with the next-TB pointer we return.
+ *
+ * Note that TCG targets may use a different definition of tcg_qemu_tb_exec
+ * to this default (which just calls the prologue.code emitted by
+ * tcg_target_qemu_prologue()).
+ */
+#define TB_EXIT_MASK 3
+#define TB_EXIT_IDX0 0
+#define TB_EXIT_IDX1 1
+#define TB_EXIT_ICOUNT_EXPIRED 2
+#define TB_EXIT_REQUESTED 3
+
+#if !defined(tcg_qemu_tb_exec)
+# define tcg_qemu_tb_exec(env, tb_ptr) \
+    ((uintptr_t (*)(void *, void *))tcg_ctx.code_gen_prologue)(env, tb_ptr)
 #endif
+
+void tcg_register_jit(void *buf, size_t buf_size);
+
+/*
+ * Memory helpers that will be used by TCG generated code.
+ */
+#ifdef CONFIG_SOFTMMU
+/* Value zero-extended to tcg register size.  */
+tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
+                                     int mmu_idx, uintptr_t retaddr);
+tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
+                                    int mmu_idx, uintptr_t retaddr);
+tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
+                                    int mmu_idx, uintptr_t retaddr);
+uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
+                           int mmu_idx, uintptr_t retaddr);
+tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
+                                    int mmu_idx, uintptr_t retaddr);
+tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
+                                    int mmu_idx, uintptr_t retaddr);
+uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
+                           int mmu_idx, uintptr_t retaddr);
+
+/* Value sign-extended to tcg register size.  */
+tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
+                                     int mmu_idx, uintptr_t retaddr);
+tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
+                                    int mmu_idx, uintptr_t retaddr);
+tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
+                                    int mmu_idx, uintptr_t retaddr);
+tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
+                                    int mmu_idx, uintptr_t retaddr);
+tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
+                                    int mmu_idx, uintptr_t retaddr);
+
+void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
+                        int mmu_idx, uintptr_t retaddr);
+void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                       int mmu_idx, uintptr_t retaddr);
+void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                       int mmu_idx, uintptr_t retaddr);
+void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                       int mmu_idx, uintptr_t retaddr);
+void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                       int mmu_idx, uintptr_t retaddr);
+void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                       int mmu_idx, uintptr_t retaddr);
+void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                       int mmu_idx, uintptr_t retaddr);
+
+/* Temporary aliases until backends are converted.  */
+#ifdef TARGET_WORDS_BIGENDIAN
+# define helper_ret_ldsw_mmu  helper_be_ldsw_mmu
+# define helper_ret_lduw_mmu  helper_be_lduw_mmu
+# define helper_ret_ldsl_mmu  helper_be_ldsl_mmu
+# define helper_ret_ldul_mmu  helper_be_ldul_mmu
+# define helper_ret_ldq_mmu   helper_be_ldq_mmu
+# define helper_ret_stw_mmu   helper_be_stw_mmu
+# define helper_ret_stl_mmu   helper_be_stl_mmu
+# define helper_ret_stq_mmu   helper_be_stq_mmu
+#else
+# define helper_ret_ldsw_mmu  helper_le_ldsw_mmu
+# define helper_ret_lduw_mmu  helper_le_lduw_mmu
+# define helper_ret_ldsl_mmu  helper_le_ldsl_mmu
+# define helper_ret_ldul_mmu  helper_le_ldul_mmu
+# define helper_ret_ldq_mmu   helper_le_ldq_mmu
+# define helper_ret_stw_mmu   helper_le_stw_mmu
+# define helper_ret_stl_mmu   helper_le_stl_mmu
+# define helper_ret_stq_mmu   helper_le_stq_mmu
+#endif
+
+uint8_t helper_ldb_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+uint16_t helper_ldw_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+uint32_t helper_ldl_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+
+void helper_stb_mmu(CPUArchState *env, target_ulong addr,
+                    uint8_t val, int mmu_idx);
+void helper_stw_mmu(CPUArchState *env, target_ulong addr,
+                    uint16_t val, int mmu_idx);
+void helper_stl_mmu(CPUArchState *env, target_ulong addr,
+                    uint32_t val, int mmu_idx);
+void helper_stq_mmu(CPUArchState *env, target_ulong addr,
+                    uint64_t val, int mmu_idx);
+#endif /* CONFIG_SOFTMMU */
+
+#endif /* TCG_H */
diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
deleted file mode 100644
index bef3858..0000000
--- a/tcg/x86_64/tcg-target.c
+++ /dev/null
@@ -1,1450 +0,0 @@
-/*
- * Tiny Code Generator for QEMU
- *
- * Copyright (c) 2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef NDEBUG
-static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-    "%rax",
-    "%rcx",
-    "%rdx",
-    "%rbx",
-    "%rsp",
-    "%rbp",
-    "%rsi",
-    "%rdi",
-    "%r8",
-    "%r9",
-    "%r10",
-    "%r11",
-    "%r12",
-    "%r13",
-    "%r14",
-    "%r15",
-};
-#endif
-
-static const int tcg_target_reg_alloc_order[] = {
-    TCG_REG_RBP,
-    TCG_REG_RBX,
-    TCG_REG_R12,
-    TCG_REG_R13,
-    TCG_REG_R14,
-    TCG_REG_R15,
-    TCG_REG_R10,
-    TCG_REG_R11,
-    TCG_REG_R9,
-    TCG_REG_R8,
-    TCG_REG_RCX,
-    TCG_REG_RDX,
-    TCG_REG_RSI,
-    TCG_REG_RDI,
-    TCG_REG_RAX,
-};
-
-static const int tcg_target_call_iarg_regs[6] = {
-    TCG_REG_RDI,
-    TCG_REG_RSI,
-    TCG_REG_RDX,
-    TCG_REG_RCX,
-    TCG_REG_R8,
-    TCG_REG_R9,
-};
-
-static const int tcg_target_call_oarg_regs[2] = {
-    TCG_REG_RAX,
-    TCG_REG_RDX
-};
-
-static uint8_t *tb_ret_addr;
-
-static void patch_reloc(uint8_t *code_ptr, int type,
-                        tcg_target_long value, tcg_target_long addend)
-{
-    value += addend;
-    switch(type) {
-    case R_X86_64_32:
-        if (value != (uint32_t)value)
-            tcg_abort();
-        *(uint32_t *)code_ptr = value;
-        break;
-    case R_X86_64_32S:
-        if (value != (int32_t)value)
-            tcg_abort();
-        *(uint32_t *)code_ptr = value;
-        break;
-    case R_386_PC32:
-        value -= (long)code_ptr;
-        if (value != (int32_t)value)
-            tcg_abort();
-        *(uint32_t *)code_ptr = value;
-        break;
-    default:
-        tcg_abort();
-    }
-}
-
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return 6;
-}
-
-/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
-{
-    const char *ct_str;
-
-    ct_str = *pct_str;
-    switch(ct_str[0]) {
-    case 'a':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_RAX);
-        break;
-    case 'b':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_RBX);
-        break;
-    case 'c':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_RCX);
-        break;
-    case 'd':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_RDX);
-        break;
-    case 'S':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_RSI);
-        break;
-    case 'D':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_RDI);
-        break;
-    case 'q':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xf);
-        break;
-    case 'r':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xffff);
-        break;
-    case 'L': /* qemu_ld/st constraint */
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xffff);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
-        break;
-    case 'e':
-        ct->ct |= TCG_CT_CONST_S32;
-        break;
-    case 'Z':
-        ct->ct |= TCG_CT_CONST_U32;
-        break;
-    default:
-        return -1;
-    }
-    ct_str++;
-    *pct_str = ct_str;
-    return 0;
-}
-
-/* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val,
-                                         const TCGArgConstraint *arg_ct)
-{
-    int ct;
-    ct = arg_ct->ct;
-    if (ct & TCG_CT_CONST)
-        return 1;
-    else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val)
-        return 1;
-    else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val)
-        return 1;
-    else
-        return 0;
-}
-
-#define ARITH_ADD 0
-#define ARITH_OR  1
-#define ARITH_ADC 2
-#define ARITH_SBB 3
-#define ARITH_AND 4
-#define ARITH_SUB 5
-#define ARITH_XOR 6
-#define ARITH_CMP 7
-
-#define SHIFT_ROL 0
-#define SHIFT_ROR 1
-#define SHIFT_SHL 4
-#define SHIFT_SHR 5
-#define SHIFT_SAR 7
-
-#define JCC_JMP (-1)
-#define JCC_JO  0x0
-#define JCC_JNO 0x1
-#define JCC_JB  0x2
-#define JCC_JAE 0x3
-#define JCC_JE  0x4
-#define JCC_JNE 0x5
-#define JCC_JBE 0x6
-#define JCC_JA  0x7
-#define JCC_JS  0x8
-#define JCC_JNS 0x9
-#define JCC_JP  0xa
-#define JCC_JNP 0xb
-#define JCC_JL  0xc
-#define JCC_JGE 0xd
-#define JCC_JLE 0xe
-#define JCC_JG  0xf
-
-#define P_EXT		0x100		/* 0x0f opcode prefix */
-#define P_REXW		0x200		/* set rex.w = 1 */
-#define P_REXB_R	0x400		/* REG field as byte register */
-#define P_REXB_RM	0x800		/* R/M field as byte register */
-
-static const uint8_t tcg_cond_to_jcc[10] = {
-    [TCG_COND_EQ] = JCC_JE,
-    [TCG_COND_NE] = JCC_JNE,
-    [TCG_COND_LT] = JCC_JL,
-    [TCG_COND_GE] = JCC_JGE,
-    [TCG_COND_LE] = JCC_JLE,
-    [TCG_COND_GT] = JCC_JG,
-    [TCG_COND_LTU] = JCC_JB,
-    [TCG_COND_GEU] = JCC_JAE,
-    [TCG_COND_LEU] = JCC_JBE,
-    [TCG_COND_GTU] = JCC_JA,
-};
-
-static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
-{
-    int rex = 0;
-
-    rex |= (opc & P_REXW) >> 6;		/* REX.W */
-    rex |= (r & 8) >> 1;		/* REX.R */
-    rex |= (x & 8) >> 2;		/* REX.X */
-    rex |= (rm & 8) >> 3;		/* REX.B */
-
-    /* P_REXB_{R,RM} indicates that the given register is the low byte.
-       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
-       as otherwise the encoding indicates %[abcd]h.  Note that the values
-       that are ORed in merely indicate that the REX byte must be present;
-       those bits get discarded in output.  */
-    rex |= opc & (r >= 4 ? P_REXB_R : 0);
-    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
-
-    if (rex) {
-        tcg_out8(s, (uint8_t)(rex | 0x40));
-    }
-    if (opc & P_EXT) {
-        tcg_out8(s, 0x0f);
-    }
-    tcg_out8(s, opc & 0xff);
-}
-
-static inline void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
-{
-    tcg_out_opc(s, opc, r, rm, 0);
-    tcg_out8(s, 0xc0 | ((r & 7) << 3) | (rm & 7));
-}
-
-/* rm < 0 means no register index plus (-rm - 1 immediate bytes) */
-static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm,
-                                        tcg_target_long offset)
-{
-    if (rm < 0) {
-        tcg_target_long val;
-        tcg_out_opc(s, opc, r, 0, 0);
-        val = offset - ((tcg_target_long)s->code_ptr + 5 + (-rm - 1));
-        if (val == (int32_t)val) {
-            /* eip relative */
-            tcg_out8(s, 0x05 | ((r & 7) << 3));
-            tcg_out32(s, val);
-        } else if (offset == (int32_t)offset) {
-            tcg_out8(s, 0x04 | ((r & 7) << 3));
-            tcg_out8(s, 0x25); /* sib */
-            tcg_out32(s, offset);
-        } else {
-            tcg_abort();
-        }
-    } else if (offset == 0 && (rm & 7) != TCG_REG_RBP) {
-        tcg_out_opc(s, opc, r, rm, 0);
-        if ((rm & 7) == TCG_REG_RSP) {
-            tcg_out8(s, 0x04 | ((r & 7) << 3));
-            tcg_out8(s, 0x24);
-        } else {
-            tcg_out8(s, 0x00 | ((r & 7) << 3) | (rm & 7));
-        }
-    } else if ((int8_t)offset == offset) {
-        tcg_out_opc(s, opc, r, rm, 0);
-        if ((rm & 7) == TCG_REG_RSP) {
-            tcg_out8(s, 0x44 | ((r & 7) << 3));
-            tcg_out8(s, 0x24);
-        } else {
-            tcg_out8(s, 0x40 | ((r & 7) << 3) | (rm & 7));
-        }
-        tcg_out8(s, offset);
-    } else {
-        tcg_out_opc(s, opc, r, rm, 0);
-        if ((rm & 7) == TCG_REG_RSP) {
-            tcg_out8(s, 0x84 | ((r & 7) << 3));
-            tcg_out8(s, 0x24);
-        } else {
-            tcg_out8(s, 0x80 | ((r & 7) << 3) | (rm & 7));
-        }
-        tcg_out32(s, offset);
-    }
-}
-
-#if defined(CONFIG_SOFTMMU)
-/* XXX: incomplete. index must be different from ESP */
-static void tcg_out_modrm_offset2(TCGContext *s, int opc, int r, int rm,
-                                  int index, int shift,
-                                  tcg_target_long offset)
-{
-    int mod;
-    if (rm == -1)
-        tcg_abort();
-    if (offset == 0 && (rm & 7) != TCG_REG_RBP) {
-        mod = 0;
-    } else if (offset == (int8_t)offset) {
-        mod = 0x40;
-    } else if (offset == (int32_t)offset) {
-        mod = 0x80;
-    } else {
-        tcg_abort();
-    }
-    if (index == -1) {
-        tcg_out_opc(s, opc, r, rm, 0);
-        if ((rm & 7) == TCG_REG_RSP) {
-            tcg_out8(s, mod | ((r & 7) << 3) | 0x04);
-            tcg_out8(s, 0x04 | (rm & 7));
-        } else {
-            tcg_out8(s, mod | ((r & 7) << 3) | (rm & 7));
-        }
-    } else {
-        tcg_out_opc(s, opc, r, rm, index);
-        tcg_out8(s, mod | ((r & 7) << 3) | 0x04);
-        tcg_out8(s, (shift << 6) | ((index & 7) << 3) | (rm & 7));
-    }
-    if (mod == 0x40) {
-        tcg_out8(s, offset);
-    } else if (mod == 0x80) {
-        tcg_out32(s, offset);
-    }
-}
-#endif
-
-static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
-{
-    tcg_out_modrm(s, 0x8b | P_REXW, ret, arg);
-}
-
-static inline void tcg_out_movi(TCGContext *s, TCGType type,
-                                int ret, tcg_target_long arg)
-{
-    if (arg == 0) {
-        tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret); /* xor r0,r0 */
-    } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
-        tcg_out_opc(s, 0xb8 + (ret & 7), 0, ret, 0);
-        tcg_out32(s, arg);
-    } else if (arg == (int32_t)arg) {
-        tcg_out_modrm(s, 0xc7 | P_REXW, 0, ret);
-        tcg_out32(s, arg);
-    } else {
-        tcg_out_opc(s, (0xb8 + (ret & 7)) | P_REXW, 0, ret, 0);
-        tcg_out32(s, arg);
-        tcg_out32(s, arg >> 32);
-    }
-}
-
-static void tcg_out_goto(TCGContext *s, int call, uint8_t *target)
-{
-    int32_t disp;
-
-    disp = target - s->code_ptr - 5;
-    if (disp == (target - s->code_ptr - 5)) {
-        tcg_out8(s, call ? 0xe8 : 0xe9);
-        tcg_out32(s, disp);
-    } else {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (tcg_target_long) target);
-        tcg_out_modrm(s, 0xff, call ? 2 : 4, TCG_REG_R10);
-    }
-}
-
-static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
-                              int arg1, tcg_target_long arg2)
-{
-    if (type == TCG_TYPE_I32)
-        tcg_out_modrm_offset(s, 0x8b, ret, arg1, arg2); /* movl */
-    else
-        tcg_out_modrm_offset(s, 0x8b | P_REXW, ret, arg1, arg2); /* movq */
-}
-
-static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
-                              int arg1, tcg_target_long arg2)
-{
-    if (type == TCG_TYPE_I32)
-        tcg_out_modrm_offset(s, 0x89, arg, arg1, arg2); /* movl */
-    else
-        tcg_out_modrm_offset(s, 0x89 | P_REXW, arg, arg1, arg2); /* movq */
-}
-
-static inline void tgen_arithi32(TCGContext *s, int c, int r0, int32_t val)
-{
-    if ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1)) {
-        /* inc */
-        tcg_out_modrm(s, 0xff, 0, r0);
-    } else if ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1)) {
-        /* dec */
-        tcg_out_modrm(s, 0xff, 1, r0);
-    } else if (val == (int8_t)val) {
-        tcg_out_modrm(s, 0x83, c, r0);
-        tcg_out8(s, val);
-    } else if (c == ARITH_AND && val == 0xffu) {
-        /* movzbl */
-        tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB_RM, r0, r0);
-    } else if (c == ARITH_AND && val == 0xffffu) {
-        /* movzwl */
-        tcg_out_modrm(s, 0xb7 | P_EXT, r0, r0);
-    } else {
-        tcg_out_modrm(s, 0x81, c, r0);
-        tcg_out32(s, val);
-    }
-}
-
-static inline void tgen_arithi64(TCGContext *s, int c, int r0, int64_t val)
-{
-    if ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1)) {
-        /* inc */
-        tcg_out_modrm(s, 0xff | P_REXW, 0, r0);
-    } else if ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1)) {
-        /* dec */
-        tcg_out_modrm(s, 0xff | P_REXW, 1, r0);
-    } else if (c == ARITH_AND && val == 0xffffffffu) {
-        /* 32-bit mov zero extends */
-        tcg_out_modrm(s, 0x8b, r0, r0);
-    } else if (c == ARITH_AND && val == (uint32_t)val) {
-        /* AND with no high bits set can use a 32-bit operation.  */
-        tgen_arithi32(s, c, r0, (uint32_t)val);
-    } else if (val == (int8_t)val) {
-        tcg_out_modrm(s, 0x83 | P_REXW, c, r0);
-        tcg_out8(s, val);
-    } else if (val == (int32_t)val) {
-        tcg_out_modrm(s, 0x81 | P_REXW, c, r0);
-        tcg_out32(s, val);
-    } else {
-        tcg_abort();
-    }
-}
-
-static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
-{
-    if (val != 0)
-        tgen_arithi64(s, ARITH_ADD, reg, val);
-}
-
-static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
-{
-    int32_t val, val1;
-    TCGLabel *l = &s->labels[label_index];
-
-    if (l->has_value) {
-        val = l->u.value - (tcg_target_long)s->code_ptr;
-        val1 = val - 2;
-        if ((int8_t)val1 == val1) {
-            if (opc == -1)
-                tcg_out8(s, 0xeb);
-            else
-                tcg_out8(s, 0x70 + opc);
-            tcg_out8(s, val1);
-        } else {
-            if (opc == -1) {
-                tcg_out8(s, 0xe9);
-                tcg_out32(s, val - 5);
-            } else {
-                tcg_out8(s, 0x0f);
-                tcg_out8(s, 0x80 + opc);
-                tcg_out32(s, val - 6);
-            }
-        }
-    } else {
-        if (opc == -1) {
-            tcg_out8(s, 0xe9);
-        } else {
-            tcg_out8(s, 0x0f);
-            tcg_out8(s, 0x80 + opc);
-        }
-        tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
-        s->code_ptr += 4;
-    }
-}
-
-static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
-                        int const_arg2, int rexw)
-{
-    if (const_arg2) {
-        if (arg2 == 0) {
-            /* test r, r */
-            tcg_out_modrm(s, 0x85 | rexw, arg1, arg1);
-        } else {
-            if (rexw) {
-                tgen_arithi64(s, ARITH_CMP, arg1, arg2);
-            } else {
-                tgen_arithi32(s, ARITH_CMP, arg1, arg2);
-            }
-        }
-    } else {
-        tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
-    }
-}
-
-static void tcg_out_brcond(TCGContext *s, int cond,
-                           TCGArg arg1, TCGArg arg2, int const_arg2,
-                           int label_index, int rexw)
-{
-    tcg_out_cmp(s, arg1, arg2, const_arg2, rexw);
-    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
-}
-
-static void tcg_out_setcond(TCGContext *s, int cond, TCGArg dest,
-                            TCGArg arg1, TCGArg arg2, int const_arg2, int rexw)
-{
-    tcg_out_cmp(s, arg1, arg2, const_arg2, rexw);
-    /* setcc */
-    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT | P_REXB_RM, 0, dest);
-    tgen_arithi32(s, ARITH_AND, dest, 0xff);
-}
-
-#if defined(CONFIG_SOFTMMU)
-
-#include "../../softmmu_defs.h"
-
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
-
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
-                            int opc)
-{
-    int addr_reg, data_reg, r0, r1, mem_index, s_bits, bswap, rexw;
-    int32_t offset;
-#if defined(CONFIG_SOFTMMU)
-    uint8_t *label1_ptr, *label2_ptr;
-#endif
-
-    data_reg = *args++;
-    addr_reg = *args++;
-    mem_index = *args;
-    s_bits = opc & 3;
-
-    r0 = TCG_REG_RDI;
-    r1 = TCG_REG_RSI;
-
-#if TARGET_LONG_BITS == 32
-    rexw = 0;
-#else
-    rexw = P_REXW;
-#endif
-#if defined(CONFIG_SOFTMMU)
-    /* mov */
-    tcg_out_modrm(s, 0x8b | rexw, r1, addr_reg);
-
-    /* mov */
-    tcg_out_modrm(s, 0x8b | rexw, r0, addr_reg);
-
-    tcg_out_modrm(s, 0xc1 | rexw, 5, r1); /* shr $x, r1 */
-    tcg_out8(s, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
-
-    tcg_out_modrm(s, 0x81 | rexw, 4, r0); /* andl $x, r0 */
-    tcg_out32(s, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-
-    tcg_out_modrm(s, 0x81, 4, r1); /* andl $x, r1 */
-    tcg_out32(s, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-
-    /* lea offset(r1, env), r1 */
-    tcg_out_modrm_offset2(s, 0x8d | P_REXW, r1, r1, TCG_AREG0, 0,
-                          offsetof(CPUOldState, tlb_table[mem_index][0].addr_read));
-
-    /* cmp 0(r1), r0 */
-    tcg_out_modrm_offset(s, 0x3b | rexw, r0, r1, 0);
-
-    /* mov */
-    tcg_out_modrm(s, 0x8b | rexw, r0, addr_reg);
-
-    /* je label1 */
-    tcg_out8(s, 0x70 + JCC_JE);
-    label1_ptr = s->code_ptr;
-    s->code_ptr++;
-
-    /* XXX: move that code at the end of the TB */
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RSI, mem_index);
-    tcg_out_goto(s, 1, qemu_ld_helpers[s_bits]);
-
-    switch(opc) {
-    case 0 | 4:
-        /* movsbq */
-        tcg_out_modrm(s, 0xbe | P_EXT | P_REXW, data_reg, TCG_REG_RAX);
-        break;
-    case 1 | 4:
-        /* movswq */
-        tcg_out_modrm(s, 0xbf | P_EXT | P_REXW, data_reg, TCG_REG_RAX);
-        break;
-    case 2 | 4:
-        /* movslq */
-        tcg_out_modrm(s, 0x63 | P_REXW, data_reg, TCG_REG_RAX);
-        break;
-    case 0:
-        /* movzbq */
-        tcg_out_modrm(s, 0xb6 | P_EXT | P_REXW, data_reg, TCG_REG_RAX);
-        break;
-    case 1:
-        /* movzwq */
-        tcg_out_modrm(s, 0xb7 | P_EXT | P_REXW, data_reg, TCG_REG_RAX);
-        break;
-    case 2:
-    default:
-        /* movl */
-        tcg_out_modrm(s, 0x8b, data_reg, TCG_REG_RAX);
-        break;
-    case 3:
-        tcg_out_mov(s, data_reg, TCG_REG_RAX);
-        break;
-    }
-
-    /* jmp label2 */
-    tcg_out8(s, 0xeb);
-    label2_ptr = s->code_ptr;
-    s->code_ptr++;
-
-    /* label1: */
-    *label1_ptr = s->code_ptr - label1_ptr - 1;
-
-    /* add x(r1), r0 */
-    tcg_out_modrm_offset(s, 0x03 | P_REXW, r0, r1, offsetof(CPUTLBEntry, addend) -
-                         offsetof(CPUTLBEntry, addr_read));
-    offset = 0;
-#else
-    if (GUEST_BASE == (int32_t)GUEST_BASE) {
-        r0 = addr_reg;
-        offset = GUEST_BASE;
-    } else {
-        offset = 0;
-        /* movq $GUEST_BASE, r0 */
-        tcg_out_opc(s, (0xb8 + (r0 & 7)) | P_REXW, 0, r0, 0);
-        tcg_out32(s, GUEST_BASE);
-        tcg_out32(s, GUEST_BASE >> 32);
-        /* addq addr_reg, r0 */
-        tcg_out_modrm(s, 0x01 | P_REXW, addr_reg, r0);
-    }
-#endif
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 1;
-#else
-    bswap = 0;
-#endif
-    switch(opc) {
-    case 0:
-        /* movzbl */
-        tcg_out_modrm_offset(s, 0xb6 | P_EXT, data_reg, r0, offset);
-        break;
-    case 0 | 4:
-        /* movsbX */
-        tcg_out_modrm_offset(s, 0xbe | P_EXT | rexw, data_reg, r0, offset);
-        break;
-    case 1:
-        /* movzwl */
-        tcg_out_modrm_offset(s, 0xb7 | P_EXT, data_reg, r0, offset);
-        if (bswap) {
-            /* rolw $8, data_reg */
-            tcg_out8(s, 0x66);
-            tcg_out_modrm(s, 0xc1, 0, data_reg);
-            tcg_out8(s, 8);
-        }
-        break;
-    case 1 | 4:
-        if (bswap) {
-            /* movzwl */
-            tcg_out_modrm_offset(s, 0xb7 | P_EXT, data_reg, r0, offset);
-            /* rolw $8, data_reg */
-            tcg_out8(s, 0x66);
-            tcg_out_modrm(s, 0xc1, 0, data_reg);
-            tcg_out8(s, 8);
-
-            /* movswX data_reg, data_reg */
-            tcg_out_modrm(s, 0xbf | P_EXT | rexw, data_reg, data_reg);
-        } else {
-            /* movswX */
-            tcg_out_modrm_offset(s, 0xbf | P_EXT | rexw, data_reg, r0, offset);
-        }
-        break;
-    case 2:
-        /* movl (r0), data_reg */
-        tcg_out_modrm_offset(s, 0x8b, data_reg, r0, offset);
-        if (bswap) {
-            /* bswap */
-            tcg_out_opc(s, (0xc8 + (data_reg & 7)) | P_EXT, 0, data_reg, 0);
-        }
-        break;
-    case 2 | 4:
-        if (bswap) {
-            /* movl (r0), data_reg */
-            tcg_out_modrm_offset(s, 0x8b, data_reg, r0, offset);
-            /* bswap */
-            tcg_out_opc(s, (0xc8 + (data_reg & 7)) | P_EXT, 0, data_reg, 0);
-            /* movslq */
-            tcg_out_modrm(s, 0x63 | P_REXW, data_reg, data_reg);
-        } else {
-            /* movslq */
-            tcg_out_modrm_offset(s, 0x63 | P_REXW, data_reg, r0, offset);
-        }
-        break;
-    case 3:
-        /* movq (r0), data_reg */
-        tcg_out_modrm_offset(s, 0x8b | P_REXW, data_reg, r0, offset);
-        if (bswap) {
-            /* bswap */
-            tcg_out_opc(s, (0xc8 + (data_reg & 7)) | P_EXT | P_REXW, 0, data_reg, 0);
-        }
-        break;
-    default:
-        tcg_abort();
-    }
-
-#if defined(CONFIG_SOFTMMU)
-    /* label2: */
-    *label2_ptr = s->code_ptr - label2_ptr - 1;
-#endif
-}
-
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
-                            int opc)
-{
-    int addr_reg, data_reg, r0, r1, mem_index, s_bits, bswap, rexw;
-    int32_t offset;
-#if defined(CONFIG_SOFTMMU)
-    uint8_t *label1_ptr, *label2_ptr;
-#endif
-
-    data_reg = *args++;
-    addr_reg = *args++;
-    mem_index = *args;
-
-    s_bits = opc;
-
-    r0 = TCG_REG_RDI;
-    r1 = TCG_REG_RSI;
-
-#if TARGET_LONG_BITS == 32
-    rexw = 0;
-#else
-    rexw = P_REXW;
-#endif
-#if defined(CONFIG_SOFTMMU)
-    /* mov */
-    tcg_out_modrm(s, 0x8b | rexw, r1, addr_reg);
-
-    /* mov */
-    tcg_out_modrm(s, 0x8b | rexw, r0, addr_reg);
-
-    tcg_out_modrm(s, 0xc1 | rexw, 5, r1); /* shr $x, r1 */
-    tcg_out8(s, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
-
-    tcg_out_modrm(s, 0x81 | rexw, 4, r0); /* andl $x, r0 */
-    tcg_out32(s, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-
-    tcg_out_modrm(s, 0x81, 4, r1); /* andl $x, r1 */
-    tcg_out32(s, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-
-    /* lea offset(r1, env), r1 */
-    tcg_out_modrm_offset2(s, 0x8d | P_REXW, r1, r1, TCG_AREG0, 0,
-                          offsetof(CPUOldState, tlb_table[mem_index][0].addr_write));
-
-    /* cmp 0(r1), r0 */
-    tcg_out_modrm_offset(s, 0x3b | rexw, r0, r1, 0);
-
-    /* mov */
-    tcg_out_modrm(s, 0x8b | rexw, r0, addr_reg);
-
-    /* je label1 */
-    tcg_out8(s, 0x70 + JCC_JE);
-    label1_ptr = s->code_ptr;
-    s->code_ptr++;
-
-    /* XXX: move that code at the end of the TB */
-    switch(opc) {
-    case 0:
-        /* movzbl */
-        tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB_RM, TCG_REG_RSI, data_reg);
-        break;
-    case 1:
-        /* movzwl */
-        tcg_out_modrm(s, 0xb7 | P_EXT, TCG_REG_RSI, data_reg);
-        break;
-    case 2:
-        /* movl */
-        tcg_out_modrm(s, 0x8b, TCG_REG_RSI, data_reg);
-        break;
-    default:
-    case 3:
-        tcg_out_mov(s, TCG_REG_RSI, data_reg);
-        break;
-    }
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
-    tcg_out_goto(s, 1, qemu_st_helpers[s_bits]);
-
-    /* jmp label2 */
-    tcg_out8(s, 0xeb);
-    label2_ptr = s->code_ptr;
-    s->code_ptr++;
-
-    /* label1: */
-    *label1_ptr = s->code_ptr - label1_ptr - 1;
-
-    /* add x(r1), r0 */
-    tcg_out_modrm_offset(s, 0x03 | P_REXW, r0, r1, offsetof(CPUTLBEntry, addend) -
-                         offsetof(CPUTLBEntry, addr_write));
-    offset = 0;
-#else
-    if (GUEST_BASE == (int32_t)GUEST_BASE) {
-        r0 = addr_reg;
-        offset = GUEST_BASE;
-    } else {
-        offset = 0;
-        /* movq $GUEST_BASE, r0 */
-        tcg_out_opc(s, (0xb8 + (r0 & 7)) | P_REXW, 0, r0, 0);
-        tcg_out32(s, GUEST_BASE);
-        tcg_out32(s, GUEST_BASE >> 32);
-        /* addq addr_reg, r0 */
-        tcg_out_modrm(s, 0x01 | P_REXW, addr_reg, r0);
-    }
-#endif
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 1;
-#else
-    bswap = 0;
-#endif
-    switch(opc) {
-    case 0:
-        /* movb */
-        tcg_out_modrm_offset(s, 0x88 | P_REXB_R, data_reg, r0, offset);
-        break;
-    case 1:
-        if (bswap) {
-            tcg_out_modrm(s, 0x8b, r1, data_reg); /* movl */
-            tcg_out8(s, 0x66); /* rolw $8, %ecx */
-            tcg_out_modrm(s, 0xc1, 0, r1);
-            tcg_out8(s, 8);
-            data_reg = r1;
-        }
-        /* movw */
-        tcg_out8(s, 0x66);
-        tcg_out_modrm_offset(s, 0x89, data_reg, r0, offset);
-        break;
-    case 2:
-        if (bswap) {
-            tcg_out_modrm(s, 0x8b, r1, data_reg); /* movl */
-            /* bswap data_reg */
-            tcg_out_opc(s, (0xc8 + r1) | P_EXT, 0, r1, 0);
-            data_reg = r1;
-        }
-        /* movl */
-        tcg_out_modrm_offset(s, 0x89, data_reg, r0, offset);
-        break;
-    case 3:
-        if (bswap) {
-            tcg_out_mov(s, r1, data_reg);
-            /* bswap data_reg */
-            tcg_out_opc(s, (0xc8 + r1) | P_EXT | P_REXW, 0, r1, 0);
-            data_reg = r1;
-        }
-        /* movq */
-        tcg_out_modrm_offset(s, 0x89 | P_REXW, data_reg, r0, offset);
-        break;
-    default:
-        tcg_abort();
-    }
-
-#if defined(CONFIG_SOFTMMU)
-    /* label2: */
-    *label2_ptr = s->code_ptr - label2_ptr - 1;
-#endif
-}
-
-static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
-                              const int *const_args)
-{
-    int c;
-
-    switch(opc) {
-    case INDEX_op_exit_tb:
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, args[0]);
-        tcg_out_goto(s, 0, tb_ret_addr);
-        break;
-    case INDEX_op_goto_tb:
-        if (s->tb_jmp_offset) {
-            /* direct jump method */
-            tcg_out8(s, 0xe9); /* jmp im */
-            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
-            tcg_out32(s, 0);
-        } else {
-            /* indirect jump method */
-            /* jmp Ev */
-            tcg_out_modrm_offset(s, 0xff, 4, -1,
-                                 (tcg_target_long)(s->tb_next +
-                                                   args[0]));
-        }
-        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
-        break;
-    case INDEX_op_call:
-        if (const_args[0]) {
-            tcg_out_goto(s, 1, (void *) args[0]);
-        } else {
-            tcg_out_modrm(s, 0xff, 2, args[0]);
-        }
-        break;
-    case INDEX_op_jmp:
-        if (const_args[0]) {
-            tcg_out_goto(s, 0, (void *) args[0]);
-        } else {
-            tcg_out_modrm(s, 0xff, 4, args[0]);
-        }
-        break;
-    case INDEX_op_br:
-        tcg_out_jxx(s, JCC_JMP, args[0]);
-        break;
-    case INDEX_op_movi_i32:
-        tcg_out_movi(s, TCG_TYPE_I32, args[0], (uint32_t)args[1]);
-        break;
-    case INDEX_op_movi_i64:
-        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
-        break;
-    case INDEX_op_ld8u_i32:
-    case INDEX_op_ld8u_i64:
-        /* movzbl */
-        tcg_out_modrm_offset(s, 0xb6 | P_EXT, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_ld8s_i32:
-        /* movsbl */
-        tcg_out_modrm_offset(s, 0xbe | P_EXT, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_ld8s_i64:
-        /* movsbq */
-        tcg_out_modrm_offset(s, 0xbe | P_EXT | P_REXW, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_ld16u_i32:
-    case INDEX_op_ld16u_i64:
-        /* movzwl */
-        tcg_out_modrm_offset(s, 0xb7 | P_EXT, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_ld16s_i32:
-        /* movswl */
-        tcg_out_modrm_offset(s, 0xbf | P_EXT, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_ld16s_i64:
-        /* movswq */
-        tcg_out_modrm_offset(s, 0xbf | P_EXT | P_REXW, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_ld_i32:
-    case INDEX_op_ld32u_i64:
-        /* movl */
-        tcg_out_modrm_offset(s, 0x8b, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_ld32s_i64:
-        /* movslq */
-        tcg_out_modrm_offset(s, 0x63 | P_REXW, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_ld_i64:
-        /* movq */
-        tcg_out_modrm_offset(s, 0x8b | P_REXW, args[0], args[1], args[2]);
-        break;
-
-    case INDEX_op_st8_i32:
-    case INDEX_op_st8_i64:
-        /* movb */
-        tcg_out_modrm_offset(s, 0x88 | P_REXB_R, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_st16_i32:
-    case INDEX_op_st16_i64:
-        /* movw */
-        tcg_out8(s, 0x66);
-        tcg_out_modrm_offset(s, 0x89, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_st_i32:
-    case INDEX_op_st32_i64:
-        /* movl */
-        tcg_out_modrm_offset(s, 0x89, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_st_i64:
-        /* movq */
-        tcg_out_modrm_offset(s, 0x89 | P_REXW, args[0], args[1], args[2]);
-        break;
-
-    case INDEX_op_sub_i32:
-        c = ARITH_SUB;
-        goto gen_arith32;
-    case INDEX_op_and_i32:
-        c = ARITH_AND;
-        goto gen_arith32;
-    case INDEX_op_or_i32:
-        c = ARITH_OR;
-        goto gen_arith32;
-    case INDEX_op_xor_i32:
-        c = ARITH_XOR;
-        goto gen_arith32;
-    case INDEX_op_add_i32:
-        c = ARITH_ADD;
-    gen_arith32:
-        if (const_args[2]) {
-            tgen_arithi32(s, c, args[0], args[2]);
-        } else {
-            tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]);
-        }
-        break;
-
-    case INDEX_op_sub_i64:
-        c = ARITH_SUB;
-        goto gen_arith64;
-    case INDEX_op_and_i64:
-        c = ARITH_AND;
-        goto gen_arith64;
-    case INDEX_op_or_i64:
-        c = ARITH_OR;
-        goto gen_arith64;
-    case INDEX_op_xor_i64:
-        c = ARITH_XOR;
-        goto gen_arith64;
-    case INDEX_op_add_i64:
-        c = ARITH_ADD;
-    gen_arith64:
-        if (const_args[2]) {
-            tgen_arithi64(s, c, args[0], args[2]);
-        } else {
-            tcg_out_modrm(s, 0x01 | (c << 3) | P_REXW, args[2], args[0]);
-        }
-        break;
-
-    case INDEX_op_mul_i32:
-        if (const_args[2]) {
-            int32_t val;
-            val = args[2];
-            if (val == (int8_t)val) {
-                tcg_out_modrm(s, 0x6b, args[0], args[0]);
-                tcg_out8(s, val);
-            } else {
-                tcg_out_modrm(s, 0x69, args[0], args[0]);
-                tcg_out32(s, val);
-            }
-        } else {
-            tcg_out_modrm(s, 0xaf | P_EXT, args[0], args[2]);
-        }
-        break;
-    case INDEX_op_mul_i64:
-        if (const_args[2]) {
-            int32_t val;
-            val = args[2];
-            if (val == (int8_t)val) {
-                tcg_out_modrm(s, 0x6b | P_REXW, args[0], args[0]);
-                tcg_out8(s, val);
-            } else {
-                tcg_out_modrm(s, 0x69 | P_REXW, args[0], args[0]);
-                tcg_out32(s, val);
-            }
-        } else {
-            tcg_out_modrm(s, 0xaf | P_EXT | P_REXW, args[0], args[2]);
-        }
-        break;
-    case INDEX_op_div2_i32:
-        tcg_out_modrm(s, 0xf7, 7, args[4]);
-        break;
-    case INDEX_op_divu2_i32:
-        tcg_out_modrm(s, 0xf7, 6, args[4]);
-        break;
-    case INDEX_op_div2_i64:
-        tcg_out_modrm(s, 0xf7 | P_REXW, 7, args[4]);
-        break;
-    case INDEX_op_divu2_i64:
-        tcg_out_modrm(s, 0xf7 | P_REXW, 6, args[4]);
-        break;
-
-    case INDEX_op_shl_i32:
-        c = SHIFT_SHL;
-    gen_shift32:
-        if (const_args[2]) {
-            if (args[2] == 1) {
-                tcg_out_modrm(s, 0xd1, c, args[0]);
-            } else {
-                tcg_out_modrm(s, 0xc1, c, args[0]);
-                tcg_out8(s, args[2]);
-            }
-        } else {
-            tcg_out_modrm(s, 0xd3, c, args[0]);
-        }
-        break;
-    case INDEX_op_shr_i32:
-        c = SHIFT_SHR;
-        goto gen_shift32;
-    case INDEX_op_sar_i32:
-        c = SHIFT_SAR;
-        goto gen_shift32;
-    case INDEX_op_rotl_i32:
-        c = SHIFT_ROL;
-        goto gen_shift32;
-    case INDEX_op_rotr_i32:
-        c = SHIFT_ROR;
-        goto gen_shift32;
-
-    case INDEX_op_shl_i64:
-        c = SHIFT_SHL;
-    gen_shift64:
-        if (const_args[2]) {
-            if (args[2] == 1) {
-                tcg_out_modrm(s, 0xd1 | P_REXW, c, args[0]);
-            } else {
-                tcg_out_modrm(s, 0xc1 | P_REXW, c, args[0]);
-                tcg_out8(s, args[2]);
-            }
-        } else {
-            tcg_out_modrm(s, 0xd3 | P_REXW, c, args[0]);
-        }
-        break;
-    case INDEX_op_shr_i64:
-        c = SHIFT_SHR;
-        goto gen_shift64;
-    case INDEX_op_sar_i64:
-        c = SHIFT_SAR;
-        goto gen_shift64;
-    case INDEX_op_rotl_i64:
-        c = SHIFT_ROL;
-        goto gen_shift64;
-    case INDEX_op_rotr_i64:
-        c = SHIFT_ROR;
-        goto gen_shift64;
-
-    case INDEX_op_brcond_i32:
-        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
-                       args[3], 0);
-        break;
-    case INDEX_op_brcond_i64:
-        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
-                       args[3], P_REXW);
-        break;
-
-    case INDEX_op_bswap16_i32:
-    case INDEX_op_bswap16_i64:
-        tcg_out8(s, 0x66);
-        tcg_out_modrm(s, 0xc1, SHIFT_ROL, args[0]);
-        tcg_out8(s, 8);
-        break;
-    case INDEX_op_bswap32_i32:
-    case INDEX_op_bswap32_i64:
-        tcg_out_opc(s, (0xc8 + (args[0] & 7)) | P_EXT, 0, args[0], 0);
-        break;
-    case INDEX_op_bswap64_i64:
-        tcg_out_opc(s, (0xc8 + (args[0] & 7)) | P_EXT | P_REXW, 0, args[0], 0);
-        break;
-
-    case INDEX_op_neg_i32:
-        tcg_out_modrm(s, 0xf7, 3, args[0]);
-        break;
-    case INDEX_op_neg_i64:
-        tcg_out_modrm(s, 0xf7 | P_REXW, 3, args[0]);
-        break;
-
-    case INDEX_op_not_i32:
-        tcg_out_modrm(s, 0xf7, 2, args[0]);
-        break;
-    case INDEX_op_not_i64:
-        tcg_out_modrm(s, 0xf7 | P_REXW, 2, args[0]);
-        break;
-
-    case INDEX_op_ext8s_i32:
-        tcg_out_modrm(s, 0xbe | P_EXT | P_REXB_RM, args[0], args[1]);
-        break;
-    case INDEX_op_ext16s_i32:
-        tcg_out_modrm(s, 0xbf | P_EXT, args[0], args[1]);
-        break;
-    case INDEX_op_ext8s_i64:
-        tcg_out_modrm(s, 0xbe | P_EXT | P_REXW, args[0], args[1]);
-        break;
-    case INDEX_op_ext16s_i64:
-        tcg_out_modrm(s, 0xbf | P_EXT | P_REXW, args[0], args[1]);
-        break;
-    case INDEX_op_ext32s_i64:
-        tcg_out_modrm(s, 0x63 | P_REXW, args[0], args[1]);
-        break;
-    case INDEX_op_ext8u_i32:
-    case INDEX_op_ext8u_i64:
-        tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB_RM, args[0], args[1]);
-        break;
-    case INDEX_op_ext16u_i32:
-    case INDEX_op_ext16u_i64:
-        tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
-        break;
-    case INDEX_op_ext32u_i64:
-        tcg_out_modrm(s, 0x8b, args[0], args[1]);
-        break;
-
-    case INDEX_op_setcond_i32:
-        tcg_out_setcond(s, args[3], args[0], args[1], args[2],
-                        const_args[2], 0);
-        break;
-    case INDEX_op_setcond_i64:
-        tcg_out_setcond(s, args[3], args[0], args[1], args[2],
-                        const_args[2], P_REXW);
-        break;
-
-    case INDEX_op_qemu_ld8u:
-        tcg_out_qemu_ld(s, args, 0);
-        break;
-    case INDEX_op_qemu_ld8s:
-        tcg_out_qemu_ld(s, args, 0 | 4);
-        break;
-    case INDEX_op_qemu_ld16u:
-        tcg_out_qemu_ld(s, args, 1);
-        break;
-    case INDEX_op_qemu_ld16s:
-        tcg_out_qemu_ld(s, args, 1 | 4);
-        break;
-    case INDEX_op_qemu_ld32u:
-        tcg_out_qemu_ld(s, args, 2);
-        break;
-    case INDEX_op_qemu_ld32s:
-        tcg_out_qemu_ld(s, args, 2 | 4);
-        break;
-    case INDEX_op_qemu_ld64:
-        tcg_out_qemu_ld(s, args, 3);
-        break;
-
-    case INDEX_op_qemu_st8:
-        tcg_out_qemu_st(s, args, 0);
-        break;
-    case INDEX_op_qemu_st16:
-        tcg_out_qemu_st(s, args, 1);
-        break;
-    case INDEX_op_qemu_st32:
-        tcg_out_qemu_st(s, args, 2);
-        break;
-    case INDEX_op_qemu_st64:
-        tcg_out_qemu_st(s, args, 3);
-        break;
-
-    default:
-        tcg_abort();
-    }
-}
-
-static int tcg_target_callee_save_regs[] = {
-    TCG_REG_RBP,
-    TCG_REG_RBX,
-    TCG_REG_R12,
-    TCG_REG_R13,
-    /*    TCG_REG_R14, */ /* currently used for the global env, so no
-                             need to save */
-    TCG_REG_R15,
-};
-
-static inline void tcg_out_push(TCGContext *s, int reg)
-{
-    tcg_out_opc(s, (0x50 + (reg & 7)), 0, reg, 0);
-}
-
-static inline void tcg_out_pop(TCGContext *s, int reg)
-{
-    tcg_out_opc(s, (0x58 + (reg & 7)), 0, reg, 0);
-}
-
-/* Generate global QEMU prologue and epilogue code */
-void tcg_target_qemu_prologue(TCGContext *s)
-{
-    int i, frame_size, push_size, stack_addend;
-
-    /* TB prologue */
-    /* save all callee saved registers */
-    for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
-        tcg_out_push(s, tcg_target_callee_save_regs[i]);
-
-    }
-    /* reserve some stack space */
-    push_size = 8 + ARRAY_SIZE(tcg_target_callee_save_regs) * 8;
-    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
-    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
-        ~(TCG_TARGET_STACK_ALIGN - 1);
-    stack_addend = frame_size - push_size;
-    tcg_out_addi(s, TCG_REG_RSP, -stack_addend);
-
-    tcg_out_modrm(s, 0xff, 4, TCG_REG_RDI); /* jmp *%rdi */
-
-    /* TB epilogue */
-    tb_ret_addr = s->code_ptr;
-    tcg_out_addi(s, TCG_REG_RSP, stack_addend);
-    for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
-        tcg_out_pop(s, tcg_target_callee_save_regs[i]);
-    }
-    tcg_out8(s, 0xc3); /* ret */
-}
-
-static const TCGTargetOpDef x86_64_op_defs[] = {
-    { INDEX_op_exit_tb, { } },
-    { INDEX_op_goto_tb, { } },
-    { INDEX_op_call, { "ri" } }, /* XXX: might need a specific constant constraint */
-    { INDEX_op_jmp, { "ri" } }, /* XXX: might need a specific constant constraint */
-    { INDEX_op_br, { } },
-
-    { INDEX_op_mov_i32, { "r", "r" } },
-    { INDEX_op_movi_i32, { "r" } },
-    { INDEX_op_ld8u_i32, { "r", "r" } },
-    { INDEX_op_ld8s_i32, { "r", "r" } },
-    { INDEX_op_ld16u_i32, { "r", "r" } },
-    { INDEX_op_ld16s_i32, { "r", "r" } },
-    { INDEX_op_ld_i32, { "r", "r" } },
-    { INDEX_op_st8_i32, { "r", "r" } },
-    { INDEX_op_st16_i32, { "r", "r" } },
-    { INDEX_op_st_i32, { "r", "r" } },
-
-    { INDEX_op_add_i32, { "r", "0", "ri" } },
-    { INDEX_op_mul_i32, { "r", "0", "ri" } },
-    { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
-    { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
-    { INDEX_op_sub_i32, { "r", "0", "ri" } },
-    { INDEX_op_and_i32, { "r", "0", "ri" } },
-    { INDEX_op_or_i32, { "r", "0", "ri" } },
-    { INDEX_op_xor_i32, { "r", "0", "ri" } },
-
-    { INDEX_op_shl_i32, { "r", "0", "ci" } },
-    { INDEX_op_shr_i32, { "r", "0", "ci" } },
-    { INDEX_op_sar_i32, { "r", "0", "ci" } },
-    { INDEX_op_rotl_i32, { "r", "0", "ci" } },
-    { INDEX_op_rotr_i32, { "r", "0", "ci" } },
-
-    { INDEX_op_brcond_i32, { "r", "ri" } },
-
-    { INDEX_op_mov_i64, { "r", "r" } },
-    { INDEX_op_movi_i64, { "r" } },
-    { INDEX_op_ld8u_i64, { "r", "r" } },
-    { INDEX_op_ld8s_i64, { "r", "r" } },
-    { INDEX_op_ld16u_i64, { "r", "r" } },
-    { INDEX_op_ld16s_i64, { "r", "r" } },
-    { INDEX_op_ld32u_i64, { "r", "r" } },
-    { INDEX_op_ld32s_i64, { "r", "r" } },
-    { INDEX_op_ld_i64, { "r", "r" } },
-    { INDEX_op_st8_i64, { "r", "r" } },
-    { INDEX_op_st16_i64, { "r", "r" } },
-    { INDEX_op_st32_i64, { "r", "r" } },
-    { INDEX_op_st_i64, { "r", "r" } },
-
-    { INDEX_op_add_i64, { "r", "0", "re" } },
-    { INDEX_op_mul_i64, { "r", "0", "re" } },
-    { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
-    { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
-    { INDEX_op_sub_i64, { "r", "0", "re" } },
-    { INDEX_op_and_i64, { "r", "0", "reZ" } },
-    { INDEX_op_or_i64, { "r", "0", "re" } },
-    { INDEX_op_xor_i64, { "r", "0", "re" } },
-
-    { INDEX_op_shl_i64, { "r", "0", "ci" } },
-    { INDEX_op_shr_i64, { "r", "0", "ci" } },
-    { INDEX_op_sar_i64, { "r", "0", "ci" } },
-    { INDEX_op_rotl_i64, { "r", "0", "ci" } },
-    { INDEX_op_rotr_i64, { "r", "0", "ci" } },
-
-    { INDEX_op_brcond_i64, { "r", "re" } },
-
-    { INDEX_op_bswap16_i32, { "r", "0" } },
-    { INDEX_op_bswap16_i64, { "r", "0" } },
-    { INDEX_op_bswap32_i32, { "r", "0" } },
-    { INDEX_op_bswap32_i64, { "r", "0" } },
-    { INDEX_op_bswap64_i64, { "r", "0" } },
-
-    { INDEX_op_neg_i32, { "r", "0" } },
-    { INDEX_op_neg_i64, { "r", "0" } },
-
-    { INDEX_op_not_i32, { "r", "0" } },
-    { INDEX_op_not_i64, { "r", "0" } },
-
-    { INDEX_op_ext8s_i32, { "r", "r"} },
-    { INDEX_op_ext16s_i32, { "r", "r"} },
-    { INDEX_op_ext8s_i64, { "r", "r"} },
-    { INDEX_op_ext16s_i64, { "r", "r"} },
-    { INDEX_op_ext32s_i64, { "r", "r"} },
-    { INDEX_op_ext8u_i32, { "r", "r"} },
-    { INDEX_op_ext16u_i32, { "r", "r"} },
-    { INDEX_op_ext8u_i64, { "r", "r"} },
-    { INDEX_op_ext16u_i64, { "r", "r"} },
-    { INDEX_op_ext32u_i64, { "r", "r"} },
-
-    { INDEX_op_setcond_i32, { "r", "r", "ri" } },
-    { INDEX_op_setcond_i64, { "r", "r", "re" } },
-
-    { INDEX_op_qemu_ld8u, { "r", "L" } },
-    { INDEX_op_qemu_ld8s, { "r", "L" } },
-    { INDEX_op_qemu_ld16u, { "r", "L" } },
-    { INDEX_op_qemu_ld16s, { "r", "L" } },
-    { INDEX_op_qemu_ld32u, { "r", "L" } },
-    { INDEX_op_qemu_ld32s, { "r", "L" } },
-    { INDEX_op_qemu_ld64, { "r", "L" } },
-
-    { INDEX_op_qemu_st8, { "L", "L" } },
-    { INDEX_op_qemu_st16, { "L", "L" } },
-    { INDEX_op_qemu_st32, { "L", "L" } },
-    { INDEX_op_qemu_st64, { "L", "L" } },
-
-    { -1 },
-};
-
-void tcg_target_init(TCGContext *s)
-{
-    /* fail safe */
-    if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
-        tcg_abort();
-
-    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
-    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
-    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
-                     (1 << TCG_REG_RDI) |
-                     (1 << TCG_REG_RSI) |
-                     (1 << TCG_REG_RDX) |
-                     (1 << TCG_REG_RCX) |
-                     (1 << TCG_REG_R8) |
-                     (1 << TCG_REG_R9) |
-                     (1 << TCG_REG_RAX) |
-                     (1 << TCG_REG_R10) |
-                     (1 << TCG_REG_R11));
-
-    tcg_regset_clear(s->reserved_regs);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_RSP);
-
-    tcg_add_target_add_op_defs(x86_64_op_defs);
-}
diff --git a/tcg/x86_64/tcg-target.h b/tcg/x86_64/tcg-target.h
deleted file mode 100644
index 0b13054..0000000
--- a/tcg/x86_64/tcg-target.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Tiny Code Generator for QEMU
- *
- * Copyright (c) 2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#define TCG_TARGET_X86_64 1
-
-#define TCG_TARGET_REG_BITS 64
-//#define TCG_TARGET_WORDS_BIGENDIAN
-
-#define TCG_TARGET_NB_REGS 16
-
-enum {
-    TCG_REG_RAX = 0,
-    TCG_REG_RCX,
-    TCG_REG_RDX,
-    TCG_REG_RBX,
-    TCG_REG_RSP,
-    TCG_REG_RBP,
-    TCG_REG_RSI,
-    TCG_REG_RDI,
-    TCG_REG_R8,
-    TCG_REG_R9,
-    TCG_REG_R10,
-    TCG_REG_R11,
-    TCG_REG_R12,
-    TCG_REG_R13,
-    TCG_REG_R14,
-    TCG_REG_R15,
-};
-
-#define TCG_CT_CONST_S32 0x100
-#define TCG_CT_CONST_U32 0x200
-
-/* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_RSP
-#define TCG_TARGET_STACK_ALIGN 16
-#define TCG_TARGET_CALL_STACK_OFFSET 0
-
-/* optional instructions */
-#define TCG_TARGET_HAS_bswap16_i32
-#define TCG_TARGET_HAS_bswap16_i64
-#define TCG_TARGET_HAS_bswap32_i32
-#define TCG_TARGET_HAS_bswap32_i64
-#define TCG_TARGET_HAS_bswap64_i64
-#define TCG_TARGET_HAS_neg_i32
-#define TCG_TARGET_HAS_neg_i64
-#define TCG_TARGET_HAS_not_i32
-#define TCG_TARGET_HAS_not_i64
-#define TCG_TARGET_HAS_ext8s_i32
-#define TCG_TARGET_HAS_ext16s_i32
-#define TCG_TARGET_HAS_ext8s_i64
-#define TCG_TARGET_HAS_ext16s_i64
-#define TCG_TARGET_HAS_ext32s_i64
-#define TCG_TARGET_HAS_ext8u_i32
-#define TCG_TARGET_HAS_ext16u_i32
-#define TCG_TARGET_HAS_ext8u_i64
-#define TCG_TARGET_HAS_ext16u_i64
-#define TCG_TARGET_HAS_ext32u_i64
-#define TCG_TARGET_HAS_rot_i32
-#define TCG_TARGET_HAS_rot_i64
-
-// #define TCG_TARGET_HAS_andc_i32
-// #define TCG_TARGET_HAS_andc_i64
-// #define TCG_TARGET_HAS_orc_i32
-// #define TCG_TARGET_HAS_orc_i64
-
-#define TCG_TARGET_HAS_GUEST_BASE
-
-/* Note: must be synced with dyngen-exec.h */
-#define TCG_AREG0 TCG_REG_R14
-#define TCG_AREG1 TCG_REG_R15
-#define TCG_AREG2 TCG_REG_R12
-
-static inline void flush_icache_range(unsigned long start, unsigned long stop)
-{
-}
diff --git a/telephony/android_modem.c b/telephony/android_modem.c
index 4b06d69..f9730a3 100644
--- a/telephony/android_modem.c
+++ b/telephony/android_modem.c
@@ -596,9 +596,13 @@
     modem->sim = asimcard_create(base_port);
 
     sys_main_init();
-    register_savevm( "android_modem", 0, MODEM_DEV_STATE_SAVE_VERSION,
-                      android_modem_state_save,
-                      android_modem_state_load, modem);
+    register_savevm(NULL,
+                    "android_modem",
+                    0,
+                    MODEM_DEV_STATE_SAVE_VERSION,
+                    android_modem_state_save,
+                    android_modem_state_load,
+                    modem);
 
     aconfig_save_file( modem->nvram_config, modem->nvram_config_filename );
     return  modem;
diff --git a/telephony/sms.c b/telephony/sms.c
index 50ef715..812ac7c 100644
--- a/telephony/sms.c
+++ b/telephony/sms.c
@@ -13,6 +13,7 @@
 #include "gsm.h"
 #include <memory.h>
 #include <stdlib.h>
+#include <string.h>
 #include <assert.h>
 
 #define  DEBUG  1
diff --git a/translate-all.c b/translate-all.c
index d284062..80581bf 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -16,6 +16,12 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/types.h>
+#include <sys/mman.h>
+#endif
 #include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -24,37 +30,84 @@
 
 #include "config.h"
 
+#include "qemu-common.h"
 #define NO_CPU_IO_DEFS
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "disas/disas.h"
 #include "tcg.h"
+#include "exec/cputlb.h"
+#include "translate-all.h"
 #include "qemu/timer.h"
 
+//#define DEBUG_TB_INVALIDATE
+//#define DEBUG_FLUSH
+/* make various TB consistency checks */
+//#define DEBUG_TB_CHECK
+
+#if !defined(CONFIG_USER_ONLY)
+/* TB consistency checks only implemented for usermode emulation.  */
+#undef DEBUG_TB_CHECK
+#endif
+
+#define SMC_BITMAP_USE_THRESHOLD 10
+
+typedef struct PageDesc {
+    /* list of TBs intersecting this ram page */
+    TranslationBlock *first_tb;
+    /* in order to optimize self modifying code, we count the number
+       of lookups we do to a given page to use a bitmap */
+    unsigned int code_write_count;
+    uint8_t *code_bitmap;
+#if defined(CONFIG_USER_ONLY)
+    unsigned long flags;
+#endif
+} PageDesc;
+
+/* In system mode we want L1_MAP to be based on ram offsets,
+   while in user mode we want it to be based on virtual addresses.  */
+#if !defined(CONFIG_USER_ONLY)
+#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
+# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
+#else
+# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
+#endif
+#else
+# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
+#endif
+
+/* The bits remaining after N lower levels of page tables.  */
+#define V_L1_BITS_REM \
+    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
+
+#if V_L1_BITS_REM < 4
+#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
+#else
+#define V_L1_BITS  V_L1_BITS_REM
+#endif
+
+#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
+
+#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
+
+uintptr_t qemu_real_host_page_size;
+uintptr_t qemu_host_page_size;
+uintptr_t qemu_host_page_mask;
+
+/* This is a multi-level map on the virtual address space.
+   The bottom level has pointers to PageDesc.  */
+static void *l1_map[V_L1_SIZE];
+static void* l1_phys_map[V_L1_SIZE];
+
 /* code generation context */
 TCGContext tcg_ctx;
 
-uint16_t gen_opc_buf[OPC_BUF_SIZE];
-TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
-
-target_ulong gen_opc_pc[OPC_BUF_SIZE];
-uint16_t gen_opc_icount[OPC_BUF_SIZE];
-uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
-#if defined(TARGET_I386)
-uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
-#elif defined(TARGET_SPARC)
-target_ulong gen_opc_npc[OPC_BUF_SIZE];
-target_ulong gen_opc_jump_pc[2];
-#elif defined(TARGET_MIPS) || defined(TARGET_SH4)
-uint32_t gen_opc_hflags[OPC_BUF_SIZE];
-#endif
-
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
 /*
  * Memchecker code in this module copies TB PC <-> Guest PC map to the TB
  * descriptor after guest code has been translated in cpu_gen_init routine.
  */
-#include "memcheck/memcheck_api.h"
+#include "android/qemu/memcheck/memcheck_api.h"
 
 /* Array of (tb_pc, guest_pc) pairs, big enough for all translations. This
  * array is used to obtain guest PC address from a translated PC address.
@@ -63,7 +116,7 @@
 void** gen_opc_tpc2gpc_ptr = &gen_opc_tpc2gpc[0];
 /* Number of (tb_pc, guest_pc) pairs stored in gen_opc_tpc2gpc array. */
 unsigned int gen_opc_tpc2gpc_pairs;
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
 
 /* XXX: suppress that */
 unsigned long code_gen_max_block_size(void)
@@ -83,11 +136,12 @@
     return max;
 }
 
+static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
+                         tb_page_addr_t phys_page2);
+
 void cpu_gen_init(void)
 {
     tcg_context_init(&tcg_ctx);
-    tcg_set_frame(&tcg_ctx, TCG_AREG0, offsetof(CPUOldState, temp_buf),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
 
 /* return non zero if the very first instruction is invalid so that
@@ -144,7 +198,7 @@
     s->code_out_len += gen_code_size;
 #endif
 
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
     /* Save translated PC -> guest PC map into TB. */
     if (memcheck_enabled && gen_opc_tpc2gpc_pairs && is_cpu_user(env)) {
         tb->tpc2gpc =
@@ -156,7 +210,7 @@
         }
 
     }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
@@ -171,8 +225,8 @@
 
 /* The cpu state corresponding to 'searched_pc' is restored.
  */
-bool cpu_restore_state(TranslationBlock *tb,
-                       CPUArchState *env, uintptr_t searched_pc)
+static int cpu_restore_state_from_tb(TranslationBlock *tb, CPUArchState *env,
+                                     uintptr_t searched_pc)
 {
     TCGContext *s = &tcg_ctx;
     int j;
@@ -210,11 +264,12 @@
 #endif
     j = tcg_gen_code_search_pc(s, (uint8_t *)tc_ptr, searched_pc - tc_ptr);
     if (j < 0)
-        return false;
+        return -1;
     /* now find start of instruction before */
-    while (gen_opc_instr_start[j] == 0)
+    while (s->gen_opc_instr_start[j] == 0) {
         j--;
-    env->icount_decr.u16.low -= gen_opc_icount[j];
+    }
+    env->icount_decr.u16.low -= s->gen_opc_icount[j];
 
     restore_state_to_opc(env, tb, j);
 
@@ -222,7 +277,1335 @@
     s->restore_time += profile_getclock() - ti;
     s->restore_count++;
 #endif
-    return true;
+    return 0;
+}
+
+bool cpu_restore_state(CPUArchState *env, uintptr_t retaddr)
+{
+    TranslationBlock *tb;
+
+    tb = tb_find_pc(retaddr);
+    if (tb) {
+        cpu_restore_state_from_tb(tb, env, retaddr);
+        return true;
+    }
+    return false;
+}
+
+#ifdef _WIN32
+static inline void map_exec(void *addr, long size)
+{
+    DWORD old_protect;
+    VirtualProtect(addr, size,
+                   PAGE_EXECUTE_READWRITE, &old_protect);
+}
+#else
+static inline void map_exec(void *addr, long size)
+{
+    unsigned long start, end, page_size;
+
+    page_size = getpagesize();
+    start = (unsigned long)addr;
+    start &= ~(page_size - 1);
+
+    end = (unsigned long)addr + size;
+    end += page_size - 1;
+    end &= ~(page_size - 1);
+
+    mprotect((void *)start, end - start,
+             PROT_READ | PROT_WRITE | PROT_EXEC);
+}
+#endif
+
+static void page_init(void)
+{
+    /* NOTE: we can always suppose that qemu_host_page_size >=
+       TARGET_PAGE_SIZE */
+#ifdef _WIN32
+    {
+        SYSTEM_INFO system_info;
+
+        GetSystemInfo(&system_info);
+        qemu_real_host_page_size = system_info.dwPageSize;
+    }
+#else
+    qemu_real_host_page_size = getpagesize();
+#endif
+    if (qemu_host_page_size == 0) {
+        qemu_host_page_size = qemu_real_host_page_size;
+    }
+    if (qemu_host_page_size < TARGET_PAGE_SIZE) {
+        qemu_host_page_size = TARGET_PAGE_SIZE;
+    }
+    qemu_host_page_mask = ~(qemu_host_page_size - 1);
+
+#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
+    {
+#ifdef HAVE_KINFO_GETVMMAP
+        struct kinfo_vmentry *freep;
+        int i, cnt;
+
+        freep = kinfo_getvmmap(getpid(), &cnt);
+        if (freep) {
+            mmap_lock();
+            for (i = 0; i < cnt; i++) {
+                unsigned long startaddr, endaddr;
+
+                startaddr = freep[i].kve_start;
+                endaddr = freep[i].kve_end;
+                if (h2g_valid(startaddr)) {
+                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
+
+                    if (h2g_valid(endaddr)) {
+                        endaddr = h2g(endaddr);
+                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
+                    } else {
+#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
+                        endaddr = ~0ul;
+                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
+#endif
+                    }
+                }
+            }
+            free(freep);
+            mmap_unlock();
+        }
+#else
+        FILE *f;
+
+        last_brk = (unsigned long)sbrk(0);
+
+        f = fopen("/compat/linux/proc/self/maps", "r");
+        if (f) {
+            mmap_lock();
+
+            do {
+                unsigned long startaddr, endaddr;
+                int n;
+
+                n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
+
+                if (n == 2 && h2g_valid(startaddr)) {
+                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
+
+                    if (h2g_valid(endaddr)) {
+                        endaddr = h2g(endaddr);
+                    } else {
+                        endaddr = ~0ul;
+                    }
+                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
+                }
+            } while (!feof(f));
+
+            fclose(f);
+            mmap_unlock();
+        }
+#endif
+    }
+#endif
+}
+
+static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
+{
+    PageDesc *pd;
+    void **lp;
+    int i;
+
+#if defined(CONFIG_USER_ONLY)
+    /* We can't use g_malloc because it may recurse into a locked mutex. */
+# define ALLOC(P, SIZE)                                 \
+    do {                                                \
+        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
+                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
+    } while (0)
+#else
+# define ALLOC(P, SIZE) \
+    do { P = g_malloc0(SIZE); } while (0)
+#endif
+
+    /* Level 1.  Always allocated.  */
+    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
+
+    /* Level 2..N-1.  */
+    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
+        void **p = *lp;
+
+        if (p == NULL) {
+            if (!alloc) {
+                return NULL;
+            }
+            ALLOC(p, sizeof(void *) * L2_SIZE);
+            *lp = p;
+        }
+
+        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
+    }
+
+    pd = *lp;
+    if (pd == NULL) {
+        if (!alloc) {
+            return NULL;
+        }
+        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
+        *lp = pd;
+    }
+
+#undef ALLOC
+
+    return pd + (index & (L2_SIZE - 1));
+}
+
+static inline PageDesc *page_find(tb_page_addr_t index)
+{
+    return page_find_alloc(index, 0);
+}
+
+PhysPageDesc *phys_page_find_alloc(hwaddr index, int alloc)
+{
+    void **lp;
+    PhysPageDesc *pd;
+    int i;
+
+    /* Level 1. Always allocated. */
+    lp = l1_phys_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
+
+    /* Level 2..N-1 */
+    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
+        void **p = *lp;
+
+        if (p == NULL) {
+            if (!alloc) {
+                return NULL;
+            }
+            p = g_malloc0(sizeof(void *) * L2_SIZE);
+            *lp = p;
+        }
+
+        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
+    }
+
+    pd = *lp;
+    if (pd == NULL) {
+        if (!alloc) {
+            return NULL;
+        }
+        pd = g_malloc(sizeof(PhysPageDesc) * L2_SIZE);
+        *lp = pd;
+        for (i = 0; i < L2_SIZE; i++) {
+            pd[i].phys_offset = IO_MEM_UNASSIGNED;
+            pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
+        }
+    }
+    return ((PhysPageDesc *)pd) + (index & (L2_SIZE - 1));
+}
+
+PhysPageDesc *phys_page_find(hwaddr index)
+{
+    return phys_page_find_alloc(index, 0);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+#define mmap_lock() do { } while (0)
+#define mmap_unlock() do { } while (0)
+#endif
+
+#if defined(CONFIG_USER_ONLY)
+/* Currently it is not recommended to allocate big chunks of data in
+   user mode. It will change when a dedicated libc will be used.  */
+/* ??? 64-bit hosts ought to have no problem mmaping data outside the
+   region in which the guest needs to run.  Revisit this.  */
+#define USE_STATIC_CODE_GEN_BUFFER
+#endif
+
+/* ??? Should configure for this, not list operating systems here.  */
+#if (defined(__linux__) \
+    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+    || defined(__DragonFly__) || defined(__OpenBSD__) \
+    || defined(__NetBSD__))
+# define USE_MMAP
+#endif
+
+/* Minimum size of the code gen buffer.  This number is randomly chosen,
+   but not so small that we can't have a fair number of TB's live.  */
+#define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
+
+/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
+   indicated, this is constrained by the range of direct branches on the
+   host cpu, as used by the TCG implementation of goto_tb.  */
+#if defined(__x86_64__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__sparc__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__aarch64__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
+#elif defined(__arm__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
+#elif defined(__s390x__)
+  /* We have a +- 4GB range on the branches; leave some slop.  */
+# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
+#else
+# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
+#endif
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE \
+  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
+   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
+
+static inline size_t size_code_gen_buffer(size_t tb_size)
+{
+    /* Size the buffer.  */
+    if (tb_size == 0) {
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+#else
+        /* ??? Needs adjustments.  */
+        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
+           static buffer, we could size this on RESERVED_VA, on the text
+           segment size of the executable, or continue to use the default.  */
+        tb_size = (unsigned long)(ram_size / 4);
+#endif
+    }
+    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
+    }
+    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
+    }
+    tcg_ctx.code_gen_buffer_size = tb_size;
+    return tb_size;
+}
+
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
+    __attribute__((aligned(CODE_GEN_ALIGN)));
+
+static inline void *alloc_code_gen_buffer(void)
+{
+    map_exec(static_code_gen_buffer, tcg_ctx.code_gen_buffer_size);
+    return static_code_gen_buffer;
+}
+#elif defined(USE_MMAP)
+static inline void *alloc_code_gen_buffer(void)
+{
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+    uintptr_t start = 0;
+    void *buf;
+
+    /* Constrain the position of the buffer based on the host cpu.
+       Note that these addresses are chosen in concert with the
+       addresses assigned in the relevant linker script file.  */
+# if defined(__PIE__) || defined(__PIC__)
+    /* Don't bother setting a preferred location if we're building
+       a position-independent executable.  We're more likely to get
+       an address near the main executable if we let the kernel
+       choose the address.  */
+# elif defined(__x86_64__) && defined(MAP_32BIT)
+    /* Force the memory down into low memory with the executable.
+       Leave the choice of exact location with the kernel.  */
+    flags |= MAP_32BIT;
+    /* Cannot expect to map more than 800MB in low memory.  */
+    if (tcg_ctx.code_gen_buffer_size > 800u * 1024 * 1024) {
+        tcg_ctx.code_gen_buffer_size = 800u * 1024 * 1024;
+    }
+# elif defined(__sparc__)
+    start = 0x40000000ul;
+# elif defined(__s390x__)
+    start = 0x90000000ul;
+# endif
+
+    buf = mmap((void *)start, tcg_ctx.code_gen_buffer_size,
+               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
+    return buf == MAP_FAILED ? NULL : buf;
+}
+#else
+static inline void *alloc_code_gen_buffer(void)
+{
+    void *buf = g_malloc(tcg_ctx.code_gen_buffer_size);
+
+    if (buf) {
+        map_exec(buf, tcg_ctx.code_gen_buffer_size);
+    }
+    return buf;
+}
+#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
+
+static inline void code_gen_alloc(size_t tb_size)
+{
+    tcg_ctx.code_gen_buffer_size = size_code_gen_buffer(tb_size);
+    tcg_ctx.code_gen_buffer = alloc_code_gen_buffer();
+    if (tcg_ctx.code_gen_buffer == NULL) {
+        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+        exit(1);
+    }
+
+    qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size,
+            QEMU_MADV_HUGEPAGE);
+
+    /* Steal room for the prologue at the end of the buffer.  This ensures
+       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
+       from TB's to the prologue are going to be in range.  It also means
+       that we don't need to mark (additional) portions of the data segment
+       as executable.  */
+    tcg_ctx.code_gen_prologue = tcg_ctx.code_gen_buffer +
+            tcg_ctx.code_gen_buffer_size - 1024;
+    tcg_ctx.code_gen_buffer_size -= 1024;
+
+    tcg_ctx.code_gen_buffer_max_size = tcg_ctx.code_gen_buffer_size -
+        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
+    tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size /
+            CODE_GEN_AVG_BLOCK_SIZE;
+    tcg_ctx.tb_ctx.tbs =
+            g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
+}
+
+/* Must be called before using the QEMU cpus. 'tb_size' is the size
+   (in bytes) allocated to the translation buffer. Zero means default
+   size. */
+void tcg_exec_init(unsigned long tb_size)
+{
+    cpu_gen_init();
+    code_gen_alloc(tb_size);
+    tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
+    page_init();
+#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
+    /* There's no guest base to take into account, so go ahead and
+       initialize the prologue now.  */
+    tcg_prologue_init(&tcg_ctx);
+#endif
+}
+
+bool tcg_enabled(void)
+{
+    return tcg_ctx.code_gen_buffer != NULL;
+}
+
+/* Allocate a new translation block. Flush the translation buffer if
+   too many translation blocks or too much generated code. */
+static TranslationBlock *tb_alloc(target_ulong pc)
+{
+    TranslationBlock *tb;
+
+    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks ||
+        (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >=
+         tcg_ctx.code_gen_buffer_max_size) {
+        return NULL;
+    }
+    tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
+    tb->pc = pc;
+    tb->cflags = 0;
+#ifdef CONFIG_ANDROID_MEMCHECK
+    tb->tpc2gpc = NULL;
+    tb->tpc2gpc_pairs = 0;
+#endif  // CONFIG_ANDROID_MEMCHECK
+    return tb;
+}
+
+void tb_free(TranslationBlock *tb)
+{
+    /* In practice this is mostly used for single use temporary TB
+       Ignore the hard cases and just back up if this TB happens to
+       be the last one generated.  */
+    if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
+            tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
+        tcg_ctx.code_gen_ptr = tb->tc_ptr;
+        tcg_ctx.tb_ctx.nb_tbs--;
+    }
+}
+
+static inline void invalidate_page_bitmap(PageDesc *p)
+{
+    if (p->code_bitmap) {
+        g_free(p->code_bitmap);
+        p->code_bitmap = NULL;
+    }
+    p->code_write_count = 0;
+}
+
+/* Set to NULL all the 'first_tb' fields in all PageDescs. */
+static void page_flush_tb_1(int level, void **lp)
+{
+    int i;
+
+    if (*lp == NULL) {
+        return;
+    }
+    if (level == 0) {
+        PageDesc *pd = *lp;
+
+        for (i = 0; i < L2_SIZE; ++i) {
+            pd[i].first_tb = NULL;
+            invalidate_page_bitmap(pd + i);
+        }
+    } else {
+        void **pp = *lp;
+
+        for (i = 0; i < L2_SIZE; ++i) {
+            page_flush_tb_1(level - 1, pp + i);
+        }
+    }
+}
+
+static void page_flush_tb(void)
+{
+    int i;
+
+    for (i = 0; i < V_L1_SIZE; i++) {
+        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
+    }
+}
+
+/* flush all the translation blocks */
+/* XXX: tb_flush is currently not thread safe */
+void tb_flush(CPUArchState *env1)
+{
+    CPUState *cpu;
+#if defined(DEBUG_FLUSH)
+    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
+           (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
+           tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ?
+           ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) /
+           tcg_ctx.tb_ctx.nb_tbs : 0);
+#endif
+    if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)
+        > tcg_ctx.code_gen_buffer_size) {
+        cpu_abort(env1, "Internal error: code buffer overflow\n");
+    }
+    tcg_ctx.tb_ctx.nb_tbs = 0;
+
+    CPU_FOREACH(cpu) {
+        CPUArchState *env = cpu->env_ptr;
+#ifdef CONFIG_ANDROID_MEMCHECK
+        int tb_to_clean;
+        for (tb_to_clean = 0; tb_to_clean < TB_JMP_CACHE_SIZE; tb_to_clean++) {
+            if (env->tb_jmp_cache[tb_to_clean] != NULL &&
+                env->tb_jmp_cache[tb_to_clean]->tpc2gpc != NULL) {
+                g_free(env->tb_jmp_cache[tb_to_clean]->tpc2gpc);
+                env->tb_jmp_cache[tb_to_clean]->tpc2gpc = NULL;
+                env->tb_jmp_cache[tb_to_clean]->tpc2gpc_pairs = 0;
+            }
+        }
+#endif  // CONFIG_ANDROID_MEMCHECK
+        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
+    }
+
+    memset(tcg_ctx.tb_ctx.tb_phys_hash, 0,
+            CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
+    page_flush_tb();
+
+    tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
+    /* XXX: flush processor icache at this point if cache flush is
+       expensive */
+    tcg_ctx.tb_ctx.tb_flush_count++;
+}
+
+#ifdef DEBUG_TB_CHECK
+
+static void tb_invalidate_check(target_ulong address)
+{
+    TranslationBlock *tb;
+    int i;
+
+    address &= TARGET_PAGE_MASK;
+    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+        for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
+                  address >= tb->pc + tb->size)) {
+                printf("ERROR invalidate: address=" TARGET_FMT_lx
+                       " PC=%08lx size=%04x\n",
+                       address, (long)tb->pc, tb->size);
+            }
+        }
+    }
+}
+
+/* verify that all the pages have correct rights for code */
+static void tb_page_check(void)
+{
+    TranslationBlock *tb;
+    int i, flags1, flags2;
+
+    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+        for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
+                tb = tb->phys_hash_next) {
+            flags1 = page_get_flags(tb->pc);
+            flags2 = page_get_flags(tb->pc + tb->size - 1);
+            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
+                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
+                       (long)tb->pc, tb->size, flags1, flags2);
+            }
+        }
+    }
+}
+
+#endif
+
+static inline void tb_hash_remove(TranslationBlock **ptb, TranslationBlock *tb)
+{
+    TranslationBlock *tb1;
+
+    for (;;) {
+        tb1 = *ptb;
+        if (tb1 == tb) {
+            *ptb = tb1->phys_hash_next;
+            break;
+        }
+        ptb = &tb1->phys_hash_next;
+    }
+}
+
+static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
+{
+    TranslationBlock *tb1;
+    unsigned int n1;
+
+    for (;;) {
+        tb1 = *ptb;
+        n1 = (uintptr_t)tb1 & 3;
+        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+        if (tb1 == tb) {
+            *ptb = tb1->page_next[n1];
+            break;
+        }
+        ptb = &tb1->page_next[n1];
+    }
+}
+
+static inline void tb_jmp_remove(TranslationBlock *tb, int n)
+{
+    TranslationBlock *tb1, **ptb;
+    unsigned int n1;
+
+    ptb = &tb->jmp_next[n];
+    tb1 = *ptb;
+    if (tb1) {
+        /* find tb(n) in circular list */
+        for (;;) {
+            tb1 = *ptb;
+            n1 = (uintptr_t)tb1 & 3;
+            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+            if (n1 == n && tb1 == tb) {
+                break;
+            }
+            if (n1 == 2) {
+                ptb = &tb1->jmp_first;
+            } else {
+                ptb = &tb1->jmp_next[n1];
+            }
+        }
+        /* now we can suppress tb(n) from the list */
+        *ptb = tb->jmp_next[n];
+
+        tb->jmp_next[n] = NULL;
+    }
+}
+
+/* reset the jump entry 'n' of a TB so that it is not chained to
+   another TB */
+static inline void tb_reset_jump(TranslationBlock *tb, int n)
+{
+    tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
+}
+
+/* invalidate one TB */
+void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
+{
+    CPUState *cpu;
+    PageDesc *p;
+    unsigned int h, n1;
+    tb_page_addr_t phys_pc;
+    TranslationBlock *tb1, *tb2;
+
+    /* remove the TB from the hash list */
+    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
+    h = tb_phys_hash_func(phys_pc);
+    tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb);
+
+    /* remove the TB from the page list */
+    if (tb->page_addr[0] != page_addr) {
+        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
+        tb_page_remove(&p->first_tb, tb);
+        invalidate_page_bitmap(p);
+    }
+    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
+        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
+        tb_page_remove(&p->first_tb, tb);
+        invalidate_page_bitmap(p);
+    }
+
+    tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
+
+    /* remove the TB from the hash list */
+    h = tb_jmp_cache_hash_func(tb->pc);
+    CPU_FOREACH(cpu) {
+        CPUArchState *env = cpu->env_ptr;
+        if (env->tb_jmp_cache[h] == tb) {
+            env->tb_jmp_cache[h] = NULL;
+        }
+    }
+
+    /* suppress this TB from the two jump lists */
+    tb_jmp_remove(tb, 0);
+    tb_jmp_remove(tb, 1);
+
+    /* suppress any remaining jumps to this TB */
+    tb1 = tb->jmp_first;
+    for (;;) {
+        n1 = (uintptr_t)tb1 & 3;
+        if (n1 == 2) {
+            break;
+        }
+        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+        tb2 = tb1->jmp_next[n1];
+        tb_reset_jump(tb1, n1);
+        tb1->jmp_next[n1] = NULL;
+        tb1 = tb2;
+    }
+    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
+
+#ifdef CONFIG_ANDROID_MEMCHECK
+    if (tb->tpc2gpc != NULL) {
+        g_free(tb->tpc2gpc);
+        tb->tpc2gpc = NULL;
+        tb->tpc2gpc_pairs = 0;
+    }
+#endif  // CONFIG_ANDROID_MEMCHECK
+
+    tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
+}
+
+static inline void set_bits(uint8_t *tab, int start, int len)
+{
+    int end, mask, end1;
+
+    end = start + len;
+    tab += start >> 3;
+    mask = 0xff << (start & 7);
+    if ((start & ~7) == (end & ~7)) {
+        if (start < end) {
+            mask &= ~(0xff << (end & 7));
+            *tab |= mask;
+        }
+    } else {
+        *tab++ |= mask;
+        start = (start + 8) & ~7;
+        end1 = end & ~7;
+        while (start < end1) {
+            *tab++ = 0xff;
+            start += 8;
+        }
+        if (start < end) {
+            mask = ~(0xff << (end & 7));
+            *tab |= mask;
+        }
+    }
+}
+
+static void build_page_bitmap(PageDesc *p)
+{
+    int n, tb_start, tb_end;
+    TranslationBlock *tb;
+
+    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
+
+    tb = p->first_tb;
+    while (tb != NULL) {
+        n = (uintptr_t)tb & 3;
+        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
+        /* NOTE: this is subtle as a TB may span two physical pages */
+        if (n == 0) {
+            /* NOTE: tb_end may be after the end of the page, but
+               it is not a problem */
+            tb_start = tb->pc & ~TARGET_PAGE_MASK;
+            tb_end = tb_start + tb->size;
+            if (tb_end > TARGET_PAGE_SIZE) {
+                tb_end = TARGET_PAGE_SIZE;
+            }
+        } else {
+            tb_start = 0;
+            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
+        }
+        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
+        tb = tb->page_next[n];
+    }
+}
+
+TranslationBlock *tb_gen_code(CPUArchState *env,
+                              target_ulong pc, target_ulong cs_base,
+                              int flags, int cflags)
+{
+    TranslationBlock *tb;
+    uint8_t *tc_ptr;
+    tb_page_addr_t phys_pc, phys_page2;
+    target_ulong virt_page2;
+    int code_gen_size;
+
+    phys_pc = get_page_addr_code(env, pc);
+    tb = tb_alloc(pc);
+    if (!tb) {
+        /* flush must be done */
+        tb_flush(env);
+        /* cannot fail at this point */
+        tb = tb_alloc(pc);
+        /* Don't forget to invalidate previous TB info.  */
+        tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
+    }
+    tc_ptr = tcg_ctx.code_gen_ptr;
+    tb->tc_ptr = tc_ptr;
+    tb->cs_base = cs_base;
+    tb->flags = flags;
+    tb->cflags = cflags;
+    cpu_gen_code(env, tb, &code_gen_size);
+    tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)tcg_ctx.code_gen_ptr +
+            code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+
+    /* check next page if needed */
+    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
+    phys_page2 = -1;
+    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
+        phys_page2 = get_page_addr_code(env, virt_page2);
+    }
+    tb_link_page(tb, phys_pc, phys_page2);
+    return tb;
+}
+
+/*
+ * Invalidate all TBs which intersect with the target physical address range
+ * [start;end[. NOTE: start and end may refer to *different* physical pages.
+ * 'is_cpu_write_access' should be true if called from a real cpu write
+ * access: the virtual CPU will exit the current TB if code is modified inside
+ * this TB.
+ */
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
+                              int is_cpu_write_access)
+{
+    while (start < end) {
+        tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
+        start &= TARGET_PAGE_MASK;
+        start += TARGET_PAGE_SIZE;
+    }
+}
+
+/*
+ * Invalidate all TBs which intersect with the target physical address range
+ * [start;end[. NOTE: start and end must refer to the *same* physical page.
+ * 'is_cpu_write_access' should be true if called from a real cpu write
+ * access: the virtual CPU will exit the current TB if code is modified inside
+ * this TB.
+ */
+void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
+                                   int is_cpu_write_access)
+{
+    TranslationBlock *tb, *tb_next, *saved_tb;
+    CPUState *cpu = current_cpu;
+    CPUArchState *env = cpu ? cpu->env_ptr : NULL;
+    tb_page_addr_t tb_start, tb_end;
+    PageDesc *p;
+    int n;
+#ifdef TARGET_HAS_PRECISE_SMC
+    int current_tb_not_found = is_cpu_write_access;
+    TranslationBlock *current_tb = NULL;
+    int current_tb_modified = 0;
+    target_ulong current_pc = 0;
+    target_ulong current_cs_base = 0;
+    int current_flags = 0;
+#endif /* TARGET_HAS_PRECISE_SMC */
+
+    p = page_find(start >> TARGET_PAGE_BITS);
+    if (!p) {
+        return;
+    }
+    if (!p->code_bitmap &&
+        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
+        is_cpu_write_access) {
+        /* build code bitmap */
+        build_page_bitmap(p);
+    }
+
+    /* we remove all the TBs in the range [start, end[ */
+    /* XXX: see if in some cases it could be faster to invalidate all
+       the code */
+    tb = p->first_tb;
+    while (tb != NULL) {
+        n = (uintptr_t)tb & 3;
+        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
+        tb_next = tb->page_next[n];
+        /* NOTE: this is subtle as a TB may span two physical pages */
+        if (n == 0) {
+            /* NOTE: tb_end may be after the end of the page, but
+               it is not a problem */
+            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
+            tb_end = tb_start + tb->size;
+        } else {
+            tb_start = tb->page_addr[1];
+            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
+        }
+        if (!(tb_end <= start || tb_start >= end)) {
+#ifdef TARGET_HAS_PRECISE_SMC
+            if (current_tb_not_found) {
+                current_tb_not_found = 0;
+                current_tb = NULL;
+                if (env->mem_io_pc) {
+                    /* now we have a real cpu fault */
+                    current_tb = tb_find_pc(env->mem_io_pc);
+                }
+            }
+            if (current_tb == tb &&
+                (current_tb->cflags & CF_COUNT_MASK) != 1) {
+                /* If we are modifying the current TB, we must stop
+                its execution. We could be more precise by checking
+                that the modification is after the current PC, but it
+                would require a specialized function to partially
+                restore the CPU state */
+
+                current_tb_modified = 1;
+                cpu_restore_state_from_tb(current_tb, env, env->mem_io_pc);
+                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
+                                     &current_flags);
+            }
+#endif /* TARGET_HAS_PRECISE_SMC */
+            /* we need to do that to handle the case where a signal
+               occurs while doing tb_phys_invalidate() */
+            saved_tb = NULL;
+            if (env) {
+                saved_tb = env->current_tb;
+                env->current_tb = NULL;
+            }
+            tb_phys_invalidate(tb, -1);
+            if (env) {
+                env->current_tb = saved_tb;
+                if (cpu->interrupt_request && env->current_tb) {
+                    cpu_interrupt(cpu, cpu->interrupt_request);
+                }
+            }
+        }
+        tb = tb_next;
+    }
+#if !defined(CONFIG_USER_ONLY)
+    /* if no code remaining, no need to continue to use slow writes */
+    if (!p->first_tb) {
+        invalidate_page_bitmap(p);
+        if (is_cpu_write_access) {
+            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
+        }
+    }
+#endif
+#ifdef TARGET_HAS_PRECISE_SMC
+    if (current_tb_modified) {
+        /* we generate a block containing just the instruction
+           modifying the memory. It will ensure that it cannot modify
+           itself */
+        env->current_tb = NULL;
+        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
+        cpu_resume_from_signal(env, NULL);
+    }
+#endif
+}
+
+/* len must be <= 8 and start must be a multiple of len */
+void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
+{
+    PageDesc *p;
+    int offset, b;
+
+#if 0
+    if (1) {
+        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
+                  cpu_single_env->mem_io_vaddr, len,
+                  cpu_single_env->eip,
+                  cpu_single_env->eip +
+                  (intptr_t)cpu_single_env->segs[R_CS].base);
+    }
+#endif
+    p = page_find(start >> TARGET_PAGE_BITS);
+    if (!p) {
+        return;
+    }
+    if (p->code_bitmap) {
+        offset = start & ~TARGET_PAGE_MASK;
+        b = p->code_bitmap[offset >> 3] >> (offset & 7);
+        if (b & ((1 << len) - 1)) {
+            goto do_invalidate;
+        }
+    } else {
+    do_invalidate:
+        tb_invalidate_phys_page_range(start, start + len, 1);
+    }
+}
+
+void tb_invalidate_phys_page_fast0(hwaddr start, int len) {
+    tb_invalidate_phys_page_fast(start, len);
+}
+
+#if !defined(CONFIG_SOFTMMU)
+static void tb_invalidate_phys_page(tb_page_addr_t addr,
+                                    uintptr_t pc, void *puc,
+                                    bool locked)
+{
+    TranslationBlock *tb;
+    PageDesc *p;
+    int n;
+#ifdef TARGET_HAS_PRECISE_SMC
+    TranslationBlock *current_tb = NULL;
+    CPUArchState *env = cpu_single_env;
+    int current_tb_modified = 0;
+    target_ulong current_pc = 0;
+    target_ulong current_cs_base = 0;
+    int current_flags = 0;
+#endif
+
+    addr &= TARGET_PAGE_MASK;
+    p = page_find(addr >> TARGET_PAGE_BITS);
+    if (!p) {
+        return;
+    }
+    tb = p->first_tb;
+#ifdef TARGET_HAS_PRECISE_SMC
+    if (tb && pc != 0) {
+        current_tb = tb_find_pc(pc);
+    }
+#endif
+    while (tb != NULL) {
+        n = (uintptr_t)tb & 3;
+        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
+#ifdef TARGET_HAS_PRECISE_SMC
+        if (current_tb == tb &&
+            (current_tb->cflags & CF_COUNT_MASK) != 1) {
+                /* If we are modifying the current TB, we must stop
+                   its execution. We could be more precise by checking
+                   that the modification is after the current PC, but it
+                   would require a specialized function to partially
+                   restore the CPU state */
+
+            current_tb_modified = 1;
+            cpu_restore_state_from_tb(current_tb, env, pc);
+            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
+                                 &current_flags);
+        }
+#endif /* TARGET_HAS_PRECISE_SMC */
+        tb_phys_invalidate(tb, addr);
+        tb = tb->page_next[n];
+    }
+    p->first_tb = NULL;
+#ifdef TARGET_HAS_PRECISE_SMC
+    if (current_tb_modified) {
+        /* we generate a block containing just the instruction
+           modifying the memory. It will ensure that it cannot modify
+           itself */
+        env->current_tb = NULL;
+        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
+        if (locked) {
+            mmap_unlock();
+        }
+        cpu_resume_from_signal(env, puc);
+    }
+#endif
+}
+#endif
+
+/* add the tb in the target page and protect it if necessary */
+static inline void tb_alloc_page(TranslationBlock *tb,
+                                 unsigned int n, tb_page_addr_t page_addr)
+{
+    PageDesc *p;
+#ifndef CONFIG_USER_ONLY
+    bool page_already_protected;
+#endif
+
+    tb->page_addr[n] = page_addr;
+    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
+    tb->page_next[n] = p->first_tb;
+#ifndef CONFIG_USER_ONLY
+    page_already_protected = p->first_tb != NULL;
+#endif
+    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
+    invalidate_page_bitmap(p);
+
+#if defined(TARGET_HAS_SMC) || 1
+
+#if defined(CONFIG_USER_ONLY)
+    if (p->flags & PAGE_WRITE) {
+        target_ulong addr;
+        PageDesc *p2;
+        int prot;
+
+        /* force the host page as non writable (writes will have a
+           page fault + mprotect overhead) */
+        page_addr &= qemu_host_page_mask;
+        prot = 0;
+        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
+            addr += TARGET_PAGE_SIZE) {
+
+            p2 = page_find(addr >> TARGET_PAGE_BITS);
+            if (!p2) {
+                continue;
+            }
+            prot |= p2->flags;
+            p2->flags &= ~PAGE_WRITE;
+          }
+        mprotect(g2h(page_addr), qemu_host_page_size,
+                 (prot & PAGE_BITS) & ~PAGE_WRITE);
+#ifdef DEBUG_TB_INVALIDATE
+        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
+               page_addr);
+#endif
+    }
+#else
+    /* if some code is already present, then the pages are already
+       protected. So we handle the case where only the first TB is
+       allocated in a physical page */
+    if (!page_already_protected) {
+        tlb_protect_code(page_addr);
+    }
+#endif
+
+#endif /* TARGET_HAS_SMC */
+}
+
+/* add a new TB and link it to the physical page tables. phys_page2 is
+   (-1) to indicate that only one page contains the TB. */
+static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
+                         tb_page_addr_t phys_page2)
+{
+    unsigned int h;
+    TranslationBlock **ptb;
+
+    /* Grab the mmap lock to stop another thread invalidating this TB
+       before we are done.  */
+    mmap_lock();
+    /* add in the physical hash table */
+    h = tb_phys_hash_func(phys_pc);
+    ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+    tb->phys_hash_next = *ptb;
+    *ptb = tb;
+
+    /* add in the page list */
+    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
+    if (phys_page2 != -1) {
+        tb_alloc_page(tb, 1, phys_page2);
+    } else {
+        tb->page_addr[1] = -1;
+    }
+
+    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
+    tb->jmp_next[0] = NULL;
+    tb->jmp_next[1] = NULL;
+
+    /* init original jump addresses */
+    if (tb->tb_next_offset[0] != 0xffff) {
+        tb_reset_jump(tb, 0);
+    }
+    if (tb->tb_next_offset[1] != 0xffff) {
+        tb_reset_jump(tb, 1);
+    }
+
+#ifdef DEBUG_TB_CHECK
+    tb_page_check();
+#endif
+    mmap_unlock();
+}
+
+/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
+   tb[1].tc_ptr. Return NULL if not found */
+TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
+{
+    int m_min, m_max, m;
+    uintptr_t v;
+    TranslationBlock *tb;
+
+    if (tcg_ctx.tb_ctx.nb_tbs <= 0) {
+        return NULL;
+    }
+    if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer ||
+        tc_ptr >= (uintptr_t)tcg_ctx.code_gen_ptr) {
+        return NULL;
+    }
+    /* binary search (cf Knuth) */
+    m_min = 0;
+    m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
+    while (m_min <= m_max) {
+        m = (m_min + m_max) >> 1;
+        tb = &tcg_ctx.tb_ctx.tbs[m];
+        v = (uintptr_t)tb->tc_ptr;
+        if (v == tc_ptr) {
+            return tb;
+        } else if (tc_ptr < v) {
+            m_max = m - 1;
+        } else {
+            m_min = m + 1;
+        }
+    }
+    return &tcg_ctx.tb_ctx.tbs[m_max];
+}
+
+#ifndef CONFIG_ANDROID
+#if defined(TARGET_HAS_ICE) && !defined(CONFIG_USER_ONLY)
+void tb_invalidate_phys_addr(hwaddr addr)
+{
+    ram_addr_t ram_addr;
+    MemoryRegion *mr;
+    hwaddr l = 1;
+
+    mr = address_space_translate(&address_space_memory, addr, &addr, &l, false);
+    if (!(memory_region_is_ram(mr)
+          || memory_region_is_romd(mr))) {
+        return;
+    }
+    ram_addr = (memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK)
+        + addr;
+    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
+}
+#endif /* TARGET_HAS_ICE && !defined(CONFIG_USER_ONLY) */
+
+void tb_check_watchpoint(CPUArchState *env)
+{
+    TranslationBlock *tb;
+
+    tb = tb_find_pc(env->mem_io_pc);
+    if (!tb) {
+        cpu_abort(env, "check_watchpoint: could not find TB for pc=%p",
+                  (void *)env->mem_io_pc);
+    }
+    cpu_restore_state_from_tb(tb, env, env->mem_io_pc);
+    tb_phys_invalidate(tb, -1);
+}
+#endif  // !CONFIG_ANDROID
+
+#ifndef CONFIG_USER_ONLY
+/* mask must never be zero, except for A20 change call */
+void cpu_interrupt(CPUState *cpu, int mask)
+{
+    CPUArchState *env = cpu->env_ptr;
+    int old_mask;
+
+    old_mask = cpu->interrupt_request;
+    cpu->interrupt_request |= mask;
+
+    /*
+     * If called from iothread context, wake the target cpu in
+     * case its halted.
+     */
+    if (!qemu_cpu_is_self(cpu)) {
+        qemu_cpu_kick(cpu);
+        return;
+    }
+
+    if (use_icount) {
+        env->icount_decr.u16.high = 0xffff;
+        if (!can_do_io(env)
+            && (mask & ~old_mask) != 0) {
+            cpu_abort(env, "Raised interrupt while not in I/O function");
+        }
+    } else {
+        // cpu->tcg_exit_req = 1;
+        cpu_unlink_tb(env);
+    }
+}
+
+static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
+{
+    TranslationBlock *tb1, *tb_next, **ptb;
+    unsigned int n1;
+
+    tb1 = tb->jmp_next[n];
+    if (tb1 != NULL) {
+        /* find head of list */
+        for(;;) {
+            n1 = (uintptr_t)tb1 & 3;
+            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+            if (n1 == 2)
+                break;
+            tb1 = tb1->jmp_next[n1];
+        }
+        /* we are now sure now that tb jumps to tb1 */
+        tb_next = tb1;
+
+        /* remove tb from the jmp_first list */
+        ptb = &tb_next->jmp_first;
+        for(;;) {
+            tb1 = *ptb;
+            n1 = (uintptr_t)tb1 & 3;
+            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+            if (n1 == n && tb1 == tb)
+                break;
+            ptb = &tb1->jmp_next[n1];
+        }
+        *ptb = tb->jmp_next[n];
+        tb->jmp_next[n] = NULL;
+
+        /* suppress the jump to next tb in generated code */
+        tb_reset_jump(tb, n);
+
+        /* suppress jumps in the tb on which we could have jumped */
+        tb_reset_jump_recursive(tb_next);
+    }
+}
+
+void tb_reset_jump_recursive(TranslationBlock *tb)
+{
+    tb_reset_jump_recursive2(tb, 0);
+    tb_reset_jump_recursive2(tb, 1);
+}
+
+/* in deterministic execution mode, instructions doing device I/Os
+   must be at the end of the TB */
+void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
+{
+    TranslationBlock *tb;
+    uint32_t n, cflags;
+    target_ulong pc, cs_base;
+    uint64_t flags;
+
+    tb = tb_find_pc(retaddr);
+    if (!tb) {
+        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
+                  (void *)retaddr);
+    }
+    n = env->icount_decr.u16.low + tb->icount;
+    cpu_restore_state_from_tb(tb, env, retaddr);
+    /* Calculate how many instructions had been executed before the fault
+       occurred.  */
+    n = n - env->icount_decr.u16.low;
+    /* Generate a new TB ending on the I/O insn.  */
+    n++;
+    /* On MIPS and SH, delay slot instructions can only be restarted if
+       they were already the first instruction in the TB.  If this is not
+       the first instruction in a TB then re-execute the preceding
+       branch.  */
+#if defined(TARGET_MIPS)
+    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
+        env->active_tc.PC -= 4;
+        env->icount_decr.u16.low++;
+        env->hflags &= ~MIPS_HFLAG_BMASK;
+    }
+#elif defined(TARGET_SH4)
+    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
+            && n > 1) {
+        env->pc -= 2;
+        env->icount_decr.u16.low++;
+        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
+    }
+#endif
+    /* This should never happen.  */
+    if (n > CF_COUNT_MASK) {
+        cpu_abort(env, "TB too big during recompile");
+    }
+
+    cflags = n | CF_LAST_IO;
+    pc = tb->pc;
+    cs_base = tb->cs_base;
+    flags = tb->flags;
+    tb_phys_invalidate(tb, -1);
+    /* FIXME: In theory this could raise an exception.  In practice
+       we have already translated the block once so it's probably ok.  */
+    tb_gen_code(env, pc, cs_base, flags, cflags);
+    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
+       the first in the TB) then we end up generating a whole new TB and
+       repeating the fault, which is horribly inefficient.
+       Better would be to execute just this insn uncached, or generate a
+       second new TB.  */
+    cpu_resume_from_signal(env, NULL);
 }
 
 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
@@ -232,10 +1615,348 @@
     /* Discard jump cache entries for any tb which might potentially
        overlap the flushed page.  */
     i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
-    memset (&env->tb_jmp_cache[i], 0,
-            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+    memset(&env->tb_jmp_cache[i], 0,
+           TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
 
     i = tb_jmp_cache_hash_page(addr);
-    memset (&env->tb_jmp_cache[i], 0,
-            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+    memset(&env->tb_jmp_cache[i], 0,
+           TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
 }
+
+void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
+{
+    int i, target_code_size, max_target_code_size;
+    int direct_jmp_count, direct_jmp2_count, cross_page;
+    TranslationBlock *tb;
+
+    target_code_size = 0;
+    max_target_code_size = 0;
+    cross_page = 0;
+    direct_jmp_count = 0;
+    direct_jmp2_count = 0;
+    for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
+        tb = &tcg_ctx.tb_ctx.tbs[i];
+        target_code_size += tb->size;
+        if (tb->size > max_target_code_size) {
+            max_target_code_size = tb->size;
+        }
+        if (tb->page_addr[1] != -1) {
+            cross_page++;
+        }
+        if (tb->tb_next_offset[0] != 0xffff) {
+            direct_jmp_count++;
+            if (tb->tb_next_offset[1] != 0xffff) {
+                direct_jmp2_count++;
+            }
+        }
+    }
+    /* XXX: avoid using doubles ? */
+    cpu_fprintf(f, "Translation buffer state:\n");
+    cpu_fprintf(f, "gen code size       %td/%zd\n",
+                tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
+                tcg_ctx.code_gen_buffer_max_size);
+    cpu_fprintf(f, "TB count            %d/%d\n",
+            tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
+    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
+            tcg_ctx.tb_ctx.nb_tbs ? target_code_size /
+                    tcg_ctx.tb_ctx.nb_tbs : 0,
+            max_target_code_size);
+    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
+            tcg_ctx.tb_ctx.nb_tbs ? (tcg_ctx.code_gen_ptr -
+                                     tcg_ctx.code_gen_buffer) /
+                                     tcg_ctx.tb_ctx.nb_tbs : 0,
+                target_code_size ? (double) (tcg_ctx.code_gen_ptr -
+                                             tcg_ctx.code_gen_buffer) /
+                                             target_code_size : 0);
+    cpu_fprintf(f, "cross page TB count %d (%d%%)\n", cross_page,
+            tcg_ctx.tb_ctx.nb_tbs ? (cross_page * 100) /
+                                    tcg_ctx.tb_ctx.nb_tbs : 0);
+    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
+                direct_jmp_count,
+                tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp_count * 100) /
+                        tcg_ctx.tb_ctx.nb_tbs : 0,
+                direct_jmp2_count,
+                tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) /
+                        tcg_ctx.tb_ctx.nb_tbs : 0);
+    cpu_fprintf(f, "\nStatistics:\n");
+    cpu_fprintf(f, "TB flush count      %d\n", tcg_ctx.tb_ctx.tb_flush_count);
+    cpu_fprintf(f, "TB invalidate count %d\n",
+            tcg_ctx.tb_ctx.tb_phys_invalidate_count);
+    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
+    tcg_dump_info(f, cpu_fprintf);
+}
+
+#else /* CONFIG_USER_ONLY */
+
+void cpu_interrupt(CPUState *cpu, int mask)
+{
+    cpu->interrupt_request |= mask;
+    cpu->tcg_exit_req = 1;
+}
+
+/*
+ * Walks guest process memory "regions" one by one
+ * and calls callback function 'fn' for each region.
+ */
+struct walk_memory_regions_data {
+    walk_memory_regions_fn fn;
+    void *priv;
+    uintptr_t start;
+    int prot;
+};
+
+static int walk_memory_regions_end(struct walk_memory_regions_data *data,
+                                   abi_ulong end, int new_prot)
+{
+    if (data->start != -1ul) {
+        int rc = data->fn(data->priv, data->start, end, data->prot);
+        if (rc != 0) {
+            return rc;
+        }
+    }
+
+    data->start = (new_prot ? end : -1ul);
+    data->prot = new_prot;
+
+    return 0;
+}
+
+static int walk_memory_regions_1(struct walk_memory_regions_data *data,
+                                 abi_ulong base, int level, void **lp)
+{
+    abi_ulong pa;
+    int i, rc;
+
+    if (*lp == NULL) {
+        return walk_memory_regions_end(data, base, 0);
+    }
+
+    if (level == 0) {
+        PageDesc *pd = *lp;
+
+        for (i = 0; i < L2_SIZE; ++i) {
+            int prot = pd[i].flags;
+
+            pa = base | (i << TARGET_PAGE_BITS);
+            if (prot != data->prot) {
+                rc = walk_memory_regions_end(data, pa, prot);
+                if (rc != 0) {
+                    return rc;
+                }
+            }
+        }
+    } else {
+        void **pp = *lp;
+
+        for (i = 0; i < L2_SIZE; ++i) {
+            pa = base | ((abi_ulong)i <<
+                (TARGET_PAGE_BITS + L2_BITS * level));
+            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
+            if (rc != 0) {
+                return rc;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
+{
+    struct walk_memory_regions_data data;
+    uintptr_t i;
+
+    data.fn = fn;
+    data.priv = priv;
+    data.start = -1ul;
+    data.prot = 0;
+
+    for (i = 0; i < V_L1_SIZE; i++) {
+        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
+                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
+
+        if (rc != 0) {
+            return rc;
+        }
+    }
+
+    return walk_memory_regions_end(&data, 0, 0);
+}
+
+static int dump_region(void *priv, abi_ulong start,
+    abi_ulong end, unsigned long prot)
+{
+    FILE *f = (FILE *)priv;
+
+    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
+        " "TARGET_ABI_FMT_lx" %c%c%c\n",
+        start, end, end - start,
+        ((prot & PAGE_READ) ? 'r' : '-'),
+        ((prot & PAGE_WRITE) ? 'w' : '-'),
+        ((prot & PAGE_EXEC) ? 'x' : '-'));
+
+    return 0;
+}
+
+/* dump memory mappings */
+void page_dump(FILE *f)
+{
+    const int length = sizeof(abi_ulong) * 2;
+    (void) fprintf(f, "%-*s %-*s %-*s %s\n",
+            length, "start", length, "end", length, "size", "prot");
+    walk_memory_regions(f, dump_region);
+}
+
+int page_get_flags(target_ulong address)
+{
+    PageDesc *p;
+
+    p = page_find(address >> TARGET_PAGE_BITS);
+    if (!p) {
+        return 0;
+    }
+    return p->flags;
+}
+
+/* Modify the flags of a page and invalidate the code if necessary.
+   The flag PAGE_WRITE_ORG is positioned automatically depending
+   on PAGE_WRITE.  The mmap_lock should already be held.  */
+void page_set_flags(target_ulong start, target_ulong end, int flags)
+{
+    target_ulong addr, len;
+
+    /* This function should never be called with addresses outside the
+       guest address space.  If this assert fires, it probably indicates
+       a missing call to h2g_valid.  */
+#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
+    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
+#endif
+    assert(start < end);
+
+    start = start & TARGET_PAGE_MASK;
+    end = TARGET_PAGE_ALIGN(end);
+
+    if (flags & PAGE_WRITE) {
+        flags |= PAGE_WRITE_ORG;
+    }
+
+    for (addr = start, len = end - start;
+         len != 0;
+         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
+        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
+
+        /* If the write protection bit is set, then we invalidate
+           the code inside.  */
+        if (!(p->flags & PAGE_WRITE) &&
+            (flags & PAGE_WRITE) &&
+            p->first_tb) {
+            tb_invalidate_phys_page(addr, 0, NULL, false);
+        }
+        p->flags = flags;
+    }
+}
+
+int page_check_range(target_ulong start, target_ulong len, int flags)
+{
+    PageDesc *p;
+    target_ulong end;
+    target_ulong addr;
+
+    /* This function should never be called with addresses outside the
+       guest address space.  If this assert fires, it probably indicates
+       a missing call to h2g_valid.  */
+#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
+    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
+#endif
+
+    if (len == 0) {
+        return 0;
+    }
+    if (start + len - 1 < start) {
+        /* We've wrapped around.  */
+        return -1;
+    }
+
+    /* must do before we loose bits in the next step */
+    end = TARGET_PAGE_ALIGN(start + len);
+    start = start & TARGET_PAGE_MASK;
+
+    for (addr = start, len = end - start;
+         len != 0;
+         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
+        p = page_find(addr >> TARGET_PAGE_BITS);
+        if (!p) {
+            return -1;
+        }
+        if (!(p->flags & PAGE_VALID)) {
+            return -1;
+        }
+
+        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
+            return -1;
+        }
+        if (flags & PAGE_WRITE) {
+            if (!(p->flags & PAGE_WRITE_ORG)) {
+                return -1;
+            }
+            /* unprotect the page if it was put read-only because it
+               contains translated code */
+            if (!(p->flags & PAGE_WRITE)) {
+                if (!page_unprotect(addr, 0, NULL)) {
+                    return -1;
+                }
+            }
+            return 0;
+        }
+    }
+    return 0;
+}
+
+/* called from signal handler: invalidate the code and unprotect the
+   page. Return TRUE if the fault was successfully handled. */
+int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
+{
+    unsigned int prot;
+    PageDesc *p;
+    target_ulong host_start, host_end, addr;
+
+    /* Technically this isn't safe inside a signal handler.  However we
+       know this only ever happens in a synchronous SEGV handler, so in
+       practice it seems to be ok.  */
+    mmap_lock();
+
+    p = page_find(address >> TARGET_PAGE_BITS);
+    if (!p) {
+        mmap_unlock();
+        return 0;
+    }
+
+    /* if the page was really writable, then we change its
+       protection back to writable */
+    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
+        host_start = address & qemu_host_page_mask;
+        host_end = host_start + qemu_host_page_size;
+
+        prot = 0;
+        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
+            p = page_find(addr >> TARGET_PAGE_BITS);
+            p->flags |= PAGE_WRITE;
+            prot |= p->flags;
+
+            /* and since the content will be modified, we must invalidate
+               the corresponding translated code. */
+            tb_invalidate_phys_page(addr, pc, puc, true);
+#ifdef DEBUG_TB_CHECK
+            tb_invalidate_check(addr);
+#endif
+        }
+        mprotect((void *)g2h(host_start), qemu_host_page_size,
+                 prot & PAGE_BITS);
+
+        mmap_unlock();
+        return 1;
+    }
+    mmap_unlock();
+    return 0;
+}
+#endif /* CONFIG_USER_ONLY */
diff --git a/translate-all.h b/translate-all.h
new file mode 100644
index 0000000..167eb58
--- /dev/null
+++ b/translate-all.h
@@ -0,0 +1,34 @@
+/*
+ *  Translated block handling
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef TRANSLATE_ALL_H
+#define TRANSLATE_ALL_H
+
+/* Size of the L2 (and L3, etc) page tables.  */
+#define L2_BITS 10
+#define L2_SIZE (1 << L2_BITS)
+
+#define P_L2_LEVELS \
+    (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
+
+/* translate-all.c */
+void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len);
+void cpu_unlink_tb(CPUOldState *cpu);
+void tb_check_watchpoint(CPUArchState *env);
+
+#endif /* TRANSLATE_ALL_H */
diff --git a/ui/console.c b/ui/console.c
index 8c51a2c..1b2b144 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -147,7 +147,7 @@
 
 /* ??? This is mis-named.
    It is used for both text and graphical consoles.  */
-struct TextConsole {
+struct QEMUConsole {
     console_type_t console_type;
     DisplayState *ds;
     /* Graphic console state.  */
@@ -189,8 +189,8 @@
 };
 
 static DisplayState *display_state;
-static TextConsole *active_console;
-static TextConsole *consoles[MAX_CONSOLES];
+static QEMUConsole *active_console;
+static QEMUConsole *consoles[MAX_CONSOLES];
 static int nb_consoles = 0;
 
 #ifdef CONFIG_ANDROID
@@ -216,7 +216,7 @@
 
 void vga_hw_screen_dump(const char *filename)
 {
-    TextConsole *previous_active_console;
+    QEMUConsole *previous_active_console;
 
     previous_active_console = active_console;
     active_console = consoles[0];
@@ -545,7 +545,7 @@
     }
 }
 
-static void text_console_resize(TextConsole *s)
+static void text_console_resize(QEMUConsole *s)
 {
     TextCell *cells, *c, *c1;
     int w1, x, y, last_width;
@@ -577,7 +577,7 @@
     s->cells = cells;
 }
 
-static inline void text_update_xy(TextConsole *s, int x, int y)
+static inline void text_update_xy(QEMUConsole *s, int x, int y)
 {
     s->text_x[0] = MIN(s->text_x[0], x);
     s->text_x[1] = MAX(s->text_x[1], x);
@@ -585,7 +585,7 @@
     s->text_y[1] = MAX(s->text_y[1], y);
 }
 
-static void invalidate_xy(TextConsole *s, int x, int y)
+static void invalidate_xy(QEMUConsole *s, int x, int y)
 {
     if (s->update_x0 > x * FONT_WIDTH)
         s->update_x0 = x * FONT_WIDTH;
@@ -597,7 +597,7 @@
         s->update_y1 = (y + 1) * FONT_HEIGHT;
 }
 
-static void update_xy(TextConsole *s, int x, int y)
+static void update_xy(QEMUConsole *s, int x, int y)
 {
     TextCell *c;
     int y1, y2;
@@ -621,7 +621,7 @@
     }
 }
 
-static void console_show_cursor(TextConsole *s, int show)
+static void console_show_cursor(QEMUConsole *s, int show)
 {
     TextCell *c;
     int y, y1;
@@ -655,7 +655,7 @@
     }
 }
 
-static void console_refresh(TextConsole *s)
+static void console_refresh(QEMUConsole *s)
 {
     TextCell *c;
     int x, y, y1;
@@ -690,7 +690,7 @@
 
 static void console_scroll(int ydelta)
 {
-    TextConsole *s;
+    QEMUConsole *s;
     int i, y1;
 
     s = active_console;
@@ -722,7 +722,7 @@
     console_refresh(s);
 }
 
-static void console_put_lf(TextConsole *s)
+static void console_put_lf(QEMUConsole *s)
 {
     TextCell *c;
     int x, y1;
@@ -773,7 +773,7 @@
  * NOTE: I know this code is not very efficient (checking every color for it
  * self) but it is more readable and better maintainable.
  */
-static void console_handle_escape(TextConsole *s)
+static void console_handle_escape(QEMUConsole *s)
 {
     int i;
 
@@ -866,7 +866,7 @@
     }
 }
 
-static void console_clear_xy(TextConsole *s, int x, int y)
+static void console_clear_xy(QEMUConsole *s, int x, int y)
 {
     int y1 = (s->y_base + y) % s->total_height;
     TextCell *c = &s->cells[y1 * s->width + x];
@@ -875,7 +875,7 @@
     update_xy(s, x, y);
 }
 
-static void console_putchar(TextConsole *s, int ch)
+static void console_putchar(QEMUConsole *s, int ch)
 {
     TextCell *c;
     int y1, i;
@@ -1099,7 +1099,7 @@
 
 void console_select(unsigned int index)
 {
-    TextConsole *s;
+    QEMUConsole *s;
 
     if (index >= MAX_CONSOLES)
         return;
@@ -1124,7 +1124,7 @@
 
 static int console_puts(CharDriverState *chr, const uint8_t *buf, int len)
 {
-    TextConsole *s = chr->opaque;
+    QEMUConsole *s = chr->opaque;
     int i;
 
     s->update_x0 = s->width * FONT_WIDTH;
@@ -1146,7 +1146,7 @@
 
 static void console_send_event(CharDriverState *chr, int event)
 {
-    TextConsole *s = chr->opaque;
+    QEMUConsole *s = chr->opaque;
     int i;
 
     if (event == CHR_EVENT_FOCUS) {
@@ -1161,7 +1161,7 @@
 
 static void kbd_send_chars(void *opaque)
 {
-    TextConsole *s = opaque;
+    QEMUConsole *s = opaque;
     int len;
     uint8_t buf[16];
 
@@ -1184,7 +1184,7 @@
 /* called when an ascii key is pressed */
 void kbd_put_keysym(int keysym)
 {
-    TextConsole *s;
+    QEMUConsole *s;
     uint8_t buf[16], *q;
     int c;
 
@@ -1239,7 +1239,7 @@
 
 static void text_console_invalidate(void *opaque)
 {
-    TextConsole *s = (TextConsole *) opaque;
+    QEMUConsole *s = (QEMUConsole *) opaque;
     if (!ds_get_bits_per_pixel(s->ds) && s->console_type == TEXT_CONSOLE) {
         s->g_width = ds_get_width(s->ds);
         s->g_height = ds_get_height(s->ds);
@@ -1250,7 +1250,7 @@
 
 static void text_console_update(void *opaque, console_ch_t *chardata)
 {
-    TextConsole *s = (TextConsole *) opaque;
+    QEMUConsole *s = (QEMUConsole *) opaque;
     int i, j, src;
 
     if (s->text_x[0] <= s->text_x[1]) {
@@ -1275,10 +1275,10 @@
     }
 }
 
-static TextConsole *get_graphic_console(DisplayState *ds)
+static QEMUConsole *get_graphic_console(DisplayState *ds)
 {
     int i;
-    TextConsole *s;
+    QEMUConsole *s;
     for (i = 0; i < nb_consoles; i++) {
         s = consoles[i];
         if (s->console_type == GRAPHIC_CONSOLE && s->ds == ds)
@@ -1287,14 +1287,14 @@
     return NULL;
 }
 
-static TextConsole *new_console(DisplayState *ds, console_type_t console_type)
+static QEMUConsole *new_console(DisplayState *ds, console_type_t console_type)
 {
-    TextConsole *s;
+    QEMUConsole *s;
     int i;
 
     if (nb_consoles >= MAX_CONSOLES)
         return NULL;
-    s = g_malloc0(sizeof(TextConsole));
+    s = g_malloc0(sizeof(QEMUConsole));
     if (!active_console || ((active_console->console_type != GRAPHIC_CONSOLE) &&
         (console_type == GRAPHIC_CONSOLE))) {
         active_console = s;
@@ -1435,7 +1435,7 @@
                                    vga_hw_text_update_ptr text_update,
                                    void *opaque)
 {
-    TextConsole *s;
+    QEMUConsole *s;
     DisplayState *ds;
 
     ds = (DisplayState *) g_malloc0(sizeof(DisplayState));
@@ -1488,14 +1488,14 @@
 
 static void text_console_set_echo(CharDriverState *chr, bool echo)
 {
-    TextConsole *s = chr->opaque;
+    QEMUConsole *s = chr->opaque;
 
     s->echo = echo;
 }
 
 static void text_console_do_init(CharDriverState *chr, DisplayState *ds)
 {
-    TextConsole *s;
+    QEMUConsole *s;
     static int color_inited;
 
     s = chr->opaque;
@@ -1556,7 +1556,7 @@
 CharDriverState *text_console_init(QemuOpts *opts)
 {
     CharDriverState *chr;
-    TextConsole *s;
+    QEMUConsole *s;
     unsigned width;
     unsigned height;
 
@@ -1641,7 +1641,7 @@
 
 void qemu_console_resize(DisplayState *ds, int width, int height)
 {
-    TextConsole *s = get_graphic_console(ds);
+    QEMUConsole *s = get_graphic_console(ds);
     if (!s) return;
 
     s->g_width = width;
diff --git a/util/bitops.c b/util/bitops.c
new file mode 100644
index 0000000..227c38b
--- /dev/null
+++ b/util/bitops.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * Copyright (C) 2008 IBM Corporation
+ * Written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "qemu/bitops.h"
+
+#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
+
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+			    unsigned long offset)
+{
+    const unsigned long *p = addr + BITOP_WORD(offset);
+    unsigned long result = offset & ~(BITS_PER_LONG-1);
+    unsigned long tmp;
+
+    if (offset >= size) {
+        return size;
+    }
+    size -= result;
+    offset %= BITS_PER_LONG;
+    if (offset) {
+        tmp = *(p++);
+        tmp &= (~0UL << offset);
+        if (size < BITS_PER_LONG) {
+            goto found_first;
+        }
+        if (tmp) {
+            goto found_middle;
+        }
+        size -= BITS_PER_LONG;
+        result += BITS_PER_LONG;
+    }
+    while (size >= 4*BITS_PER_LONG) {
+        unsigned long d1, d2, d3;
+        tmp = *p;
+        d1 = *(p+1);
+        d2 = *(p+2);
+        d3 = *(p+3);
+        if (tmp) {
+            goto found_middle;
+        }
+        if (d1 | d2 | d3) {
+            break;
+        }
+        p += 4;
+        result += 4*BITS_PER_LONG;
+        size -= 4*BITS_PER_LONG;
+    }
+    while (size >= BITS_PER_LONG) {
+        if ((tmp = *(p++))) {
+            goto found_middle;
+        }
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size) {
+        return result;
+    }
+    tmp = *p;
+
+found_first:
+    tmp &= (~0UL >> (BITS_PER_LONG - size));
+    if (tmp == 0UL) {		/* Are any bits set? */
+        return result + size;	/* Nope. */
+    }
+found_middle:
+    return result + ctzl(tmp);
+}
+
+/*
+ * This implementation of find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h.
+ */
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+				 unsigned long offset)
+{
+    const unsigned long *p = addr + BITOP_WORD(offset);
+    unsigned long result = offset & ~(BITS_PER_LONG-1);
+    unsigned long tmp;
+
+    if (offset >= size) {
+        return size;
+    }
+    size -= result;
+    offset %= BITS_PER_LONG;
+    if (offset) {
+        tmp = *(p++);
+        tmp |= ~0UL >> (BITS_PER_LONG - offset);
+        if (size < BITS_PER_LONG) {
+            goto found_first;
+        }
+        if (~tmp) {
+            goto found_middle;
+        }
+        size -= BITS_PER_LONG;
+        result += BITS_PER_LONG;
+    }
+    while (size & ~(BITS_PER_LONG-1)) {
+        if (~(tmp = *(p++))) {
+            goto found_middle;
+        }
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size) {
+        return result;
+    }
+    tmp = *p;
+
+found_first:
+    tmp |= ~0UL << size;
+    if (tmp == ~0UL) {	/* Are any bits zero? */
+        return result + size;	/* Nope. */
+    }
+found_middle:
+    return result + ctzl(~tmp);
+}
+
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+    unsigned long words;
+    unsigned long tmp;
+
+    /* Start at final word. */
+    words = size / BITS_PER_LONG;
+
+    /* Partial final word? */
+    if (size & (BITS_PER_LONG-1)) {
+        tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
+                                       - (size & (BITS_PER_LONG-1)))));
+        if (tmp) {
+            goto found;
+        }
+    }
+
+    while (words) {
+        tmp = addr[--words];
+        if (tmp) {
+        found:
+            return words * BITS_PER_LONG + BITS_PER_LONG - 1 - clzl(tmp);
+        }
+    }
+
+    /* Not found */
+    return size;
+}
diff --git a/util/osdep.c b/util/osdep.c
index a02858a..f726b4e 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -30,6 +30,10 @@
 #include <unistd.h>
 #include <fcntl.h>
 
+#ifndef _WIN32
+#include <sys/socket.h>
+#endif
+
 /* Needed early for CONFIG_BSD etc. */
 #include "config-host.h"
 
@@ -490,7 +494,8 @@
 }
 #endif
 
-#ifdef WIN32
+#if defined(_WIN32) && !QEMU_GNUC_PREREQ(4,4)
+// Older Mingw32 didn't provide these.
 int asprintf( char **, const char *, ... );
 int vasprintf( char **, const char *, va_list );
 
@@ -512,4 +517,4 @@
     va_end( argv );
     return retval;
 }
-#endif
+#endif  // _WIN32 && !QEMU_GNUC_PREREQ(4,4)
diff --git a/vl-android-ui.c b/vl-android-ui.c
deleted file mode 100644
index 5e06159..0000000
--- a/vl-android-ui.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-/* the following is needed on Linux to define ptsname() in stdlib.h */
-#if defined(__linux__)
-#define _GNU_SOURCE 1
-#endif
-
-#ifndef _WIN32
-#include <sys/wait.h>
-#endif  // _WIN32
-
-#ifdef _WIN32
-#include <windows.h>
-#include <sys/timeb.h>
-#endif
-
-#include "qemu-common.h"
-#include "net/net.h"
-#include "ui/console.h"
-#include "qemu/timer.h"
-#include "sysemu/char.h"
-#include "block/block.h"
-#include "android/sockets.h"
-#include "audio/audio.h"
-
-#include "android/android.h"
-#include "android/charpipe.h"
-#include "android/globals.h"
-#include "android/utils/bufprint.h"
-#include "android/utils/system.h"
-#include "android/protocol/core-connection.h"
-#include "android/protocol/attach-ui-impl.h"
-#include "android/protocol/fb-updates-impl.h"
-#include "android/protocol/user-events-proxy.h"
-#include "android/protocol/core-commands-proxy.h"
-#include "android/protocol/ui-commands-impl.h"
-#include "android/qemulator.h"
-
-static Looper*  mainLooper;
-
-/***********************************************************/
-/* I/O handling */
-
-typedef struct IOHandlerRecord {
-    LoopIo  io[1];
-    IOHandler* fd_read;
-    IOHandler* fd_write;
-    int        running;
-    int        deleted;
-    void*      opaque;
-    struct IOHandlerRecord *next;
-} IOHandlerRecord;
-
-static IOHandlerRecord *first_io_handler;
-
-static void ioh_callback(void* opaque, int fd, unsigned events)
-{
-    IOHandlerRecord* ioh = opaque;
-    ioh->running = 1;
-    if ((events & LOOP_IO_READ) != 0) {
-        ioh->fd_read(ioh->opaque);
-    }
-    if (!ioh->deleted && (events & LOOP_IO_WRITE) != 0) {
-        ioh->fd_write(ioh->opaque);
-    }
-    ioh->running = 0;
-    if (ioh->deleted) {
-        loopIo_done(ioh->io);
-        free(ioh);
-    }
-}
-
-int qemu_set_fd_handler(int fd,
-                        IOHandler *fd_read,
-                        IOHandler *fd_write,
-                        void *opaque)
-{
-    IOHandlerRecord **pioh, *ioh;
-
-    if (!fd_read && !fd_write) {
-        pioh = &first_io_handler;
-        for(;;) {
-            ioh = *pioh;
-            if (ioh == NULL)
-                return 0;
-            if (ioh->io->fd == fd) {
-                break;
-            }
-            pioh = &ioh->next;
-        }
-        if (ioh->running) {
-            ioh->deleted = 1;
-        } else {
-            *pioh = ioh->next;
-            loopIo_done(ioh->io);
-            free(ioh);
-        }
-    } else {
-        for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next) {
-            if (ioh->io->fd == fd)
-                goto found;
-        }
-        ANEW0(ioh);
-        ioh->next = first_io_handler;
-        first_io_handler = ioh;
-        loopIo_init(ioh->io, mainLooper, fd, ioh_callback, ioh);
-    found:
-        ioh->fd_read  = fd_read;
-        ioh->fd_write = fd_write;
-        ioh->opaque   = opaque;
-
-        if (fd_read != NULL)
-            loopIo_wantRead(ioh->io);
-        else
-            loopIo_dontWantRead(ioh->io);
-
-        if (fd_write != NULL)
-            loopIo_wantWrite(ioh->io);
-        else
-            loopIo_dontWantWrite(ioh->io);
-    }
-    return 0;
-}
-
-/***********************************************************/
-/* main execution loop */
-
-static LoopTimer  gui_timer[1];
-
-static void gui_update(void *opaque)
-{
-    LoopTimer* timer = opaque;
-    qframebuffer_pulse();
-    loopTimer_startRelative(timer, GUI_REFRESH_INTERVAL);
-}
-
-static void init_gui_timer(Looper* looper)
-{
-    loopTimer_init(gui_timer, looper, gui_update, gui_timer);
-    loopTimer_startRelative(gui_timer, 0);
-    qframebuffer_invalidate_all();
-}
-
-/* Called from qemulator.c */
-void qemu_system_shutdown_request(void)
-{
-    looper_forceQuit(mainLooper);
-}
-
-#ifndef _WIN32
-
-static void termsig_handler(int signal)
-{
-    qemu_system_shutdown_request();
-}
-
-static void sigchld_handler(int signal)
-{
-    waitpid(-1, NULL, WNOHANG);
-}
-
-static void sighandler_setup(void)
-{
-    struct sigaction act;
-
-    memset(&act, 0, sizeof(act));
-    act.sa_handler = termsig_handler;
-    sigaction(SIGINT,  &act, NULL);
-    sigaction(SIGHUP,  &act, NULL);
-    sigaction(SIGTERM, &act, NULL);
-
-    act.sa_handler = sigchld_handler;
-    act.sa_flags = SA_NOCLDSTOP;
-    sigaction(SIGCHLD, &act, NULL);
-}
-
-#endif
-
-#ifdef _WIN32
-static BOOL WINAPI qemu_ctrl_handler(DWORD type)
-{
-    exit(STATUS_CONTROL_C_EXIT);
-    return TRUE;
-}
-#endif
-
-int qemu_main(int argc, char **argv, char **envp)
-{
-#ifndef _WIN32
-    {
-        struct sigaction act;
-        sigfillset(&act.sa_mask);
-        act.sa_flags = 0;
-        act.sa_handler = SIG_IGN;
-        sigaction(SIGPIPE, &act, NULL);
-    }
-#else
-    SetConsoleCtrlHandler(qemu_ctrl_handler, TRUE);
-    /* Note: cpu_interrupt() is currently not SMP safe, so we force
-       QEMU to run on a single CPU */
-    {
-        HANDLE h;
-        DWORD mask, smask;
-        int i;
-        h = GetCurrentProcess();
-        if (GetProcessAffinityMask(h, &mask, &smask)) {
-            for(i = 0; i < 32; i++) {
-                if (mask & (1 << i))
-                    break;
-            }
-            if (i != 32) {
-                mask = 1 << i;
-                SetProcessAffinityMask(h, mask);
-            }
-        }
-    }
-#endif
-
-#ifdef _WIN32
-    socket_init();
-#endif
-
-#ifndef _WIN32
-    /* must be after terminal init, SDL library changes signal handlers */
-    sighandler_setup();
-#endif
-
-    mainLooper = looper_newGeneric();
-
-    /* Register a timer to call qframebuffer_pulse periodically */
-    init_gui_timer(mainLooper);
-
-    // Connect to the core's framebuffer service
-    if (fbUpdatesImpl_create(attachUiImpl_get_console_socket(), "-raw",
-                             qemulator_get_first_framebuffer(qemulator_get()),
-                             mainLooper)) {
-        return -1;
-    }
-
-    // Attach the recepient of UI commands.
-    if (uiCmdImpl_create(attachUiImpl_get_console_socket(), mainLooper)) {
-        return -1;
-    }
-
-    looper_run(mainLooper);
-
-    fbUpdatesImpl_destroy();
-    userEventsProxy_destroy();
-    coreCmdProxy_destroy();
-    uiCmdImpl_destroy();
-    attachUiImpl_destroy();
-
-    return 0;
-}
diff --git a/vl-android.c b/vl-android.c
index be33559..cc1a4d8 100644
--- a/vl-android.c
+++ b/vl-android.c
@@ -76,9 +76,9 @@
 #include "exec/hwaddr.h"
 #include "android/tcpdump.h"
 
-#ifdef CONFIG_MEMCHECK
-#include "memcheck/memcheck.h"
-#endif  // CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
+#include "android/qemu/memcheck/memcheck.h"
+#endif  // CONFIG_ANDROID_MEMCHECK
 
 #include <unistd.h>
 #include <fcntl.h>
@@ -261,6 +261,7 @@
 const char* keyboard_layout = NULL;
 int64_t ticks_per_sec;
 ram_addr_t ram_size;
+bool xen_allowed;
 const char *mem_path = NULL;
 #ifdef MAP_POPULATE
 int mem_prealloc = 0; /* force preallocation of physical target memory */
@@ -571,197 +572,6 @@
 }
 
 /***********************************************************/
-/* Bluetooth support */
-static int nb_hcis;
-static int cur_hci;
-static struct HCIInfo *hci_table[MAX_NICS];
-
-static struct bt_vlan_s {
-    struct bt_scatternet_s net;
-    int id;
-    struct bt_vlan_s *next;
-} *first_bt_vlan;
-
-/* find or alloc a new bluetooth "VLAN" */
-static struct bt_scatternet_s *qemu_find_bt_vlan(int id)
-{
-    struct bt_vlan_s **pvlan, *vlan;
-    for (vlan = first_bt_vlan; vlan != NULL; vlan = vlan->next) {
-        if (vlan->id == id)
-            return &vlan->net;
-    }
-    vlan = g_malloc0(sizeof(struct bt_vlan_s));
-    vlan->id = id;
-    pvlan = &first_bt_vlan;
-    while (*pvlan != NULL)
-        pvlan = &(*pvlan)->next;
-    *pvlan = vlan;
-    return &vlan->net;
-}
-
-static void null_hci_send(struct HCIInfo *hci, const uint8_t *data, int len)
-{
-}
-
-static int null_hci_addr_set(struct HCIInfo *hci, const uint8_t *bd_addr)
-{
-    return -ENOTSUP;
-}
-
-static struct HCIInfo null_hci = {
-    .cmd_send = null_hci_send,
-    .sco_send = null_hci_send,
-    .acl_send = null_hci_send,
-    .bdaddr_set = null_hci_addr_set,
-};
-
-struct HCIInfo *qemu_next_hci(void)
-{
-    if (cur_hci == nb_hcis)
-        return &null_hci;
-
-    return hci_table[cur_hci++];
-}
-
-static struct HCIInfo *hci_init(const char *str)
-{
-    char *endp;
-    struct bt_scatternet_s *vlan = 0;
-
-    if (!strcmp(str, "null"))
-        /* null */
-        return &null_hci;
-    else if (!strncmp(str, "host", 4) && (str[4] == '\0' || str[4] == ':'))
-        /* host[:hciN] */
-        return bt_host_hci(str[4] ? str + 5 : "hci0");
-    else if (!strncmp(str, "hci", 3)) {
-        /* hci[,vlan=n] */
-        if (str[3]) {
-            if (!strncmp(str + 3, ",vlan=", 6)) {
-                vlan = qemu_find_bt_vlan(strtol(str + 9, &endp, 0));
-                if (*endp)
-                    vlan = 0;
-            }
-        } else
-            vlan = qemu_find_bt_vlan(0);
-        if (vlan)
-           return bt_new_hci(vlan);
-    }
-
-    fprintf(stderr, "qemu: Unknown bluetooth HCI `%s'.\n", str);
-
-    return 0;
-}
-
-static int bt_hci_parse(const char *str)
-{
-    struct HCIInfo *hci;
-    bdaddr_t bdaddr;
-
-    if (nb_hcis >= MAX_NICS) {
-        fprintf(stderr, "qemu: Too many bluetooth HCIs (max %i).\n", MAX_NICS);
-        return -1;
-    }
-
-    hci = hci_init(str);
-    if (!hci)
-        return -1;
-
-    bdaddr.b[0] = 0x52;
-    bdaddr.b[1] = 0x54;
-    bdaddr.b[2] = 0x00;
-    bdaddr.b[3] = 0x12;
-    bdaddr.b[4] = 0x34;
-    bdaddr.b[5] = 0x56 + nb_hcis;
-    hci->bdaddr_set(hci, bdaddr.b);
-
-    hci_table[nb_hcis++] = hci;
-
-    return 0;
-}
-
-static void bt_vhci_add(int vlan_id)
-{
-    struct bt_scatternet_s *vlan = qemu_find_bt_vlan(vlan_id);
-
-    if (!vlan->slave)
-        fprintf(stderr, "qemu: warning: adding a VHCI to "
-                        "an empty scatternet %i\n", vlan_id);
-
-    bt_vhci_init(bt_new_hci(vlan));
-}
-
-static struct bt_device_s *bt_device_add(const char *opt)
-{
-    struct bt_scatternet_s *vlan;
-    int vlan_id = 0;
-    char *endp = strstr(opt, ",vlan=");
-    int len = (endp ? endp - opt : strlen(opt)) + 1;
-    char devname[10];
-
-    pstrcpy(devname, MIN(sizeof(devname), len), opt);
-
-    if (endp) {
-        vlan_id = strtol(endp + 6, &endp, 0);
-        if (*endp) {
-            fprintf(stderr, "qemu: unrecognised bluetooth vlan Id\n");
-            return 0;
-        }
-    }
-
-    vlan = qemu_find_bt_vlan(vlan_id);
-
-    if (!vlan->slave)
-        fprintf(stderr, "qemu: warning: adding a slave device to "
-                        "an empty scatternet %i\n", vlan_id);
-
-    if (!strcmp(devname, "keyboard"))
-        return bt_keyboard_init(vlan);
-
-    fprintf(stderr, "qemu: unsupported bluetooth device `%s'\n", devname);
-    return 0;
-}
-
-static int bt_parse(const char *opt)
-{
-    const char *endp, *p;
-    int vlan;
-
-    if (strstart(opt, "hci", &endp)) {
-        if (!*endp || *endp == ',') {
-            if (*endp)
-                if (!strstart(endp, ",vlan=", 0))
-                    opt = endp + 1;
-
-            return bt_hci_parse(opt);
-       }
-    } else if (strstart(opt, "vhci", &endp)) {
-        if (!*endp || *endp == ',') {
-            if (*endp) {
-                if (strstart(endp, ",vlan=", &p)) {
-                    vlan = strtol(p, (char **) &endp, 0);
-                    if (*endp) {
-                        fprintf(stderr, "qemu: bad scatternet '%s'\n", p);
-                        return 1;
-                    }
-                } else {
-                    fprintf(stderr, "qemu: bad parameter '%s'\n", endp + 1);
-                    return 1;
-                }
-            } else
-                vlan = 0;
-
-            bt_vhci_add(vlan);
-            return 0;
-        }
-    } else if (strstart(opt, "device:", &endp))
-        return !bt_device_add(endp);
-
-    fprintf(stderr, "qemu: bad bluetooth parameter '%s'\n", opt);
-    return 1;
-}
-
-/***********************************************************/
 /* QEMU Block devices */
 
 #define HD_ALIAS "index=%d,media=disk"
@@ -1332,219 +1142,6 @@
 }
 
 /***********************************************************/
-/* USB devices */
-
-static USBPort *used_usb_ports;
-static USBPort *free_usb_ports;
-
-/* ??? Maybe change this to register a hub to keep track of the topology.  */
-void qemu_register_usb_port(USBPort *port, void *opaque, int index,
-                            usb_attachfn attach)
-{
-    port->opaque = opaque;
-    port->index = index;
-    port->attach = attach;
-    port->next = free_usb_ports;
-    free_usb_ports = port;
-}
-
-int usb_device_add_dev(USBDevice *dev)
-{
-    USBPort *port;
-
-    /* Find a USB port to add the device to.  */
-    port = free_usb_ports;
-    if (!port){
-        USBDevice *hub = usb_hub_init(VM_USB_HUB_SIZE);
-        port = free_usb_ports;
-        usb_attach(port, hub);
-    }
-    else if (!port->next){
-        USBDevice *hub;
-
-        /* Create a new hub and chain it on.  */
-        free_usb_ports = NULL;
-        port->next = used_usb_ports;
-        used_usb_ports = port;
-
-        hub = usb_hub_init(VM_USB_HUB_SIZE);
-        usb_attach(port, hub);
-        port = free_usb_ports;
-    }
-
-    free_usb_ports = port->next;
-    port->next = used_usb_ports;
-    used_usb_ports = port;
-    usb_attach(port, dev);
-    return 0;
-}
-
-#if 0
-static void usb_msd_password_cb(void *opaque, int err)
-{
-    USBDevice *dev = opaque;
-
-    if (!err)
-        usb_device_add_dev(dev);
-    else
-        dev->handle_destroy(dev);
-}
-#endif
-
-static int usb_device_add(const char *devname, int is_hotplug)
-{
-    const char *p;
-    USBDevice *dev;
-
-    if (strstart(devname, "host:", &p)) {
-        dev = usb_host_device_open(p);
-    } else if (!strcmp(devname, "mouse")) {
-        dev = usb_mouse_init();
-    } else if (!strcmp(devname, "tablet")) {
-        dev = usb_tablet_init();
-    } else if (!strcmp(devname, "keyboard")) {
-        dev = usb_keyboard_init();
-    } else if (strstart(devname, "disk:", &p)) {
-#if 0
-        BlockDriverState *bs;
-#endif
-        dev = usb_msd_init(p);
-        if (!dev)
-            return -1;
-#if 0
-        bs = usb_msd_get_bdrv(dev);
-        if (bdrv_key_required(bs)) {
-            autostart = 0;
-            if (is_hotplug) {
-                monitor_read_bdrv_key_start(cur_mon, bs, usb_msd_password_cb,
-                                            dev);
-                return 0;
-            }
-        }
-    } else if (!strcmp(devname, "wacom-tablet")) {
-        dev = usb_wacom_init();
-    } else if (strstart(devname, "serial:", &p)) {
-        dev = usb_serial_init(p);
-#ifdef CONFIG_BRLAPI
-    } else if (!strcmp(devname, "braille")) {
-        dev = usb_baum_init();
-#endif
-    } else if (strstart(devname, "net:", &p)) {
-        int nic = nb_nics;
-
-        if (net_client_init("nic", p) < 0)
-            return -1;
-        nd_table[nic].model = "usb";
-        dev = usb_net_init(&nd_table[nic]);
-    } else if (!strcmp(devname, "bt") || strstart(devname, "bt:", &p)) {
-        dev = usb_bt_init(devname[2] ? hci_init(p) :
-                        bt_new_hci(qemu_find_bt_vlan(0)));
-#endif
-    } else {
-        return -1;
-    }
-    if (!dev)
-        return -1;
-
-    return usb_device_add_dev(dev);
-}
-
-int usb_device_del_addr(int bus_num, int addr)
-{
-    USBPort *port;
-    USBPort **lastp;
-    USBDevice *dev;
-
-    if (!used_usb_ports)
-        return -1;
-
-    if (bus_num != 0)
-        return -1;
-
-    lastp = &used_usb_ports;
-    port = used_usb_ports;
-    while (port && port->dev->addr != addr) {
-        lastp = &port->next;
-        port = port->next;
-    }
-
-    if (!port)
-        return -1;
-
-    dev = port->dev;
-    *lastp = port->next;
-    usb_attach(port, NULL);
-    dev->handle_destroy(dev);
-    port->next = free_usb_ports;
-    free_usb_ports = port;
-    return 0;
-}
-
-static int usb_device_del(const char *devname)
-{
-    int bus_num, addr;
-    const char *p;
-
-    if (strstart(devname, "host:", &p))
-        return usb_host_device_close(p);
-
-    if (!used_usb_ports)
-        return -1;
-
-    p = strchr(devname, '.');
-    if (!p)
-        return -1;
-    bus_num = strtoul(devname, NULL, 0);
-    addr = strtoul(p + 1, NULL, 0);
-
-    return usb_device_del_addr(bus_num, addr);
-}
-
-void do_usb_add(Monitor *mon, const char *devname)
-{
-    usb_device_add(devname, 1);
-}
-
-void do_usb_del(Monitor *mon, const char *devname)
-{
-    usb_device_del(devname);
-}
-
-void usb_info(Monitor *mon)
-{
-    USBDevice *dev;
-    USBPort *port;
-    const char *speed_str;
-
-    if (!usb_enabled) {
-        monitor_printf(mon, "USB support not enabled\n");
-        return;
-    }
-
-    for (port = used_usb_ports; port; port = port->next) {
-        dev = port->dev;
-        if (!dev)
-            continue;
-        switch(dev->speed) {
-        case USB_SPEED_LOW:
-            speed_str = "1.5";
-            break;
-        case USB_SPEED_FULL:
-            speed_str = "12";
-            break;
-        case USB_SPEED_HIGH:
-            speed_str = "480";
-            break;
-        default:
-            speed_str = "?";
-            break;
-        }
-        monitor_printf(mon, "  Device %d.%d, Speed %s Mb/s, Product %s\n",
-                       0, dev->addr, speed_str, dev->devname);
-    }
-}
-
-/***********************************************************/
 /* PCMCIA/Cardbus */
 
 static struct pcmcia_socket_entry_s {
@@ -2286,8 +1883,6 @@
     QemuOpts *hdb_opts = NULL;
     const char *net_clients[MAX_NET_CLIENTS];
     int nb_net_clients;
-    const char *bt_opts[MAX_BT_CMDLINE];
-    int nb_bt_opts;
     int optind;
     const char *r, *optarg;
     CharDriverState *monitor_hd = NULL;
@@ -2301,14 +1896,12 @@
     const char *loadvm = NULL;
     QEMUMachine *machine;
     const char *cpu_model;
-    const char *usb_devices[MAX_USB_CMDLINE];
-    int usb_devices_index;
     int tb_size;
     const char *pid_file = NULL;
     const char *incoming = NULL;
     const char* log_mask = NULL;
     const char* log_file = NULL;
-    CPUOldState *env;
+    CPUState *cpu;
     int show_vnc_port = 0;
     IniFile*  hw_ini = NULL;
     STRALLOC_DEFINE(kernel_params);
@@ -2360,10 +1953,7 @@
         node_cpumask[i] = 0;
     }
 
-    usb_devices_index = 0;
-
     nb_net_clients = 0;
-    nb_bt_opts = 0;
 #ifdef MAX_DRIVES
     nb_drives = 0;
     nb_drives_opt = 0;
@@ -2620,12 +2210,6 @@
                 net_slirp_redir(NULL, optarg, NULL);
                 break;
 #endif
-            case QEMU_OPTION_bt:
-                if (nb_bt_opts >= MAX_BT_CMDLINE) {
-                    PANIC("qemu: too many bluetooth options");
-                }
-                bt_opts[nb_bt_opts++] = optarg;
-                break;
 #ifdef HAS_AUDIO
             case QEMU_OPTION_audio_help:
                 AUD_help ();
@@ -2835,17 +2419,6 @@
                 kvm_allowed = 0;
                 break;
 #endif /* CONFIG_KVM */
-            case QEMU_OPTION_usb:
-                usb_enabled = 1;
-                break;
-            case QEMU_OPTION_usbdevice:
-                usb_enabled = 1;
-                if (usb_devices_index >= MAX_USB_CMDLINE) {
-                    PANIC("Too many USB devices");
-                }
-                usb_devices[usb_devices_index] = optarg;
-                usb_devices_index++;
-                break;
             case QEMU_OPTION_smp:
                 smp_cpus = atoi(optarg);
                 if (smp_cpus < 1) {
@@ -3177,14 +2750,14 @@
                 }
                 break;
 
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
             case QEMU_OPTION_android_memcheck:
                 android_op_memcheck = (char*)optarg;
                 /* This will set ro.kernel.memcheck system property
                  * to memcheck's tracing flags. */
                 stralloc_add_format(kernel_config, " memcheck=%s", android_op_memcheck);
                 break;
-#endif // CONFIG_MEMCHECK
+#endif // CONFIG_ANDROID_MEMCHECK
 
             case QEMU_OPTION_snapshot_no_time_update:
                 android_snapshot_update_time = 0;
@@ -3241,6 +2814,11 @@
 
     iniFile_free(hw_ini);
 
+    const char* kernelSerialDevicePrefix =
+            androidHwConfig_getKernelSerialPrefix(android_hw);
+    VERBOSE_PRINT(init, "Using kernel serial device prefix: %s",
+                  kernelSerialDevicePrefix);
+
     {
         int width  = android_hw->hw_lcd_width;
         int height = android_hw->hw_lcd_height;
@@ -3578,11 +3156,11 @@
         stralloc_add_format(kernel_config, " ndns=%d", dns_count);
     }
 
-#ifdef CONFIG_MEMCHECK
+#ifdef CONFIG_ANDROID_MEMCHECK
     if (android_op_memcheck) {
         memcheck_init(android_op_memcheck);
     }
-#endif  // CONFIG_MEMCHECK
+#endif  // CONFIG_ANDROID_MEMCHECK
 
     /* Initialize cache partition, if any */
     if (android_hw->disk_cachePartition != 0) {
@@ -3666,12 +3244,16 @@
     /* We always initialize the first serial port for the android-kmsg
      * character device (used to send kernel messages) */
     serial_hds_add_at(0, "android-kmsg");
-    stralloc_add_str(kernel_params, " console=ttyS0");
+    stralloc_add_format(kernel_params,
+                        " console=%s0",
+                        kernelSerialDevicePrefix);
 
     /* We always initialize the second serial port for the android-qemud
      * character device as well */
     serial_hds_add_at(1, "android-qemud");
-    stralloc_add_str(kernel_params, " android.qemud=ttyS1");
+    stralloc_add_format(kernel_params,
+                        " android.qemud=%s1",
+                        kernelSerialDevicePrefix);
 
     if (pid_file && qemu_create_pidfile(pid_file) != 0) {
         os_pidfile_error();
@@ -3800,12 +3382,6 @@
     }
 #endif
 
-    /* init the bluetooth world */
-    for (i = 0; i < nb_bt_opts; i++)
-        if (bt_parse(bt_opts[i])) {
-            PANIC("Unable to parse bluetooth options");
-        }
-
     /* init the memory */
     if (ram_size == 0) {
         ram_size = android_hw->hw_ramSize * 1024LL * 1024;
@@ -3871,8 +3447,18 @@
     if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func, &machine->use_scsi, 1) != 0)
         exit(1);
 
-    //register_savevm("timer", 0, 2, timer_save, timer_load, &timers_state);
-    register_savevm_live("ram", 0, 3, ram_save_live, NULL, ram_load, NULL);
+    //register_savevm(NULL, "timer", 0, 2, timer_save, timer_load, &timers_state);
+
+    SaveVMHandlers* ops = g_malloc0(sizeof(*ops));
+    ops->save_live_state = ram_save_live;
+    ops->load_state = ram_load;
+
+    register_savevm_live(NULL,
+                         "ram",
+                         0,
+                         3,
+                         ops,
+                         NULL);
 
     /* must be after terminal init, SDL library changes signal handlers */
     os_setup_signal_handling();
@@ -4065,10 +3651,10 @@
         stralloc_reset(kernel_config);
     }
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+    CPU_FOREACH(cpu) {
         for (i = 0; i < nb_numa_nodes; i++) {
-            if (node_cpumask[i] & (1 << env->cpu_index)) {
-                env->numa_node = i;
+            if (node_cpumask[i] & (1 << cpu->cpu_index)) {
+                cpu->numa_node = i;
             }
         }
     }
@@ -4090,16 +3676,6 @@
         hax_sync_vcpus();
 #endif
 
-    /* init USB devices */
-    if (usb_enabled) {
-        for(i = 0; i < usb_devices_index; i++) {
-            if (usb_device_add(usb_devices[i], 0) < 0) {
-                fprintf(stderr, "Warning: could not add USB device %s\n",
-                        usb_devices[i]);
-            }
-        }
-    }
-
     /* just use the first displaystate for the moment */
     ds = get_displaystate();
 
diff --git a/vl.c b/vl.c
deleted file mode 100644
index 88933aa..0000000
--- a/vl.c
+++ /dev/null
@@ -1,3166 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include <unistd.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <time.h>
-#include <errno.h>
-#include <sys/time.h>
-#include <zlib.h>
-
-/* Needed early for CONFIG_BSD etc. */
-#include "config-host.h"
-
-#ifndef _WIN32
-#include <libgen.h>
-#include <sys/times.h>
-#include <sys/wait.h>
-#include <termios.h>
-#include <sys/mman.h>
-#include <sys/ioctl.h>
-#include <sys/resource.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <net/if.h>
-#if defined(__NetBSD__)
-#include <net/if_tap.h>
-#endif
-#ifdef __linux__
-#include <linux/if_tun.h>
-#endif
-#include <arpa/inet.h>
-#include <dirent.h>
-#include <netdb.h>
-#include <sys/select.h>
-#ifdef CONFIG_BSD
-#include <sys/stat.h>
-#if defined(__FreeBSD__) || defined(__DragonFly__)
-#include <libutil.h>
-#else
-#include <util.h>
-#endif
-#elif defined (__GLIBC__) && defined (__FreeBSD_kernel__)
-#include <freebsd/stdlib.h>
-#else
-#ifdef __linux__
-#include <pty.h>
-#include <malloc.h>
-#include <linux/rtc.h>
-
-/* For the benefit of older linux systems which don't supply it,
-   we use a local copy of hpet.h. */
-/* #include <linux/hpet.h> */
-#include "hw/timer/hpet.h"
-
-#include <linux/ppdev.h>
-#include <linux/parport.h>
-#endif
-#ifdef __sun__
-#include <sys/stat.h>
-#include <sys/ethernet.h>
-#include <sys/sockio.h>
-#include <netinet/arp.h>
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/ip.h>
-#include <netinet/ip_icmp.h> // must come after ip.h
-#include <netinet/udp.h>
-#include <netinet/tcp.h>
-#include <net/if.h>
-#include <syslog.h>
-#include <stropts.h>
-#endif
-#endif
-#endif
-
-#if defined(__OpenBSD__)
-#include <util.h>
-#endif
-
-#if defined(CONFIG_VDE)
-#include <libvdeplug.h>
-#endif
-
-#ifdef _WIN32
-#include <windows.h>
-#include <malloc.h>
-#include <sys/timeb.h>
-#include <mmsystem.h>
-#define getopt_long_only getopt_long
-#define memalign(align, size) malloc(size)
-#endif
-
-#include "sysemu/cpus.h"
-#include "sysemu/arch_init.h"
-
-#ifdef CONFIG_SDL
-#ifdef __APPLE__
-#include <SDL.h>
-int qemu_main(int argc, char **argv, char **envp);
-int main(int argc, char **argv)
-{
-    qemu_main(argc, argv, NULL);
-}
-#undef main
-#define main qemu_main
-#endif
-#endif /* CONFIG_SDL */
-
-#ifdef CONFIG_COCOA
-int qemu_main(int argc, char **argv, char **envp);
-#undef main
-#define main qemu_main
-#endif /* CONFIG_COCOA */
-
-#include "hw/hw.h"
-#include "hw/boards.h"
-#include "hw/usb.h"
-#include "hw/pcmcia.h"
-#include "hw/i386/pc.h"
-#include "hw/isa/isa.h"
-#include "hw/baum.h"
-#include "hw/bt.h"
-#include "sysemu/watchdog.h"
-#include "hw/i386/smbios.h"
-#include "hw/xen/xen.h"
-#include "sysemu/bt.h"
-#include "net/net.h"
-#include "monitor/monitor.h"
-#include "ui/console.h"
-#include "sysemu/sysemu.h"
-#include "exec/gdbstub.h"
-#include "qemu/timer.h"
-#include "sysemu/char.h"
-#include "qemu/cache-utils.h"
-#include "block/block.h"
-#include "sysemu/dma.h"
-#include "audio/audio.h"
-#include "migration/migration.h"
-#include "sysemu/kvm.h"
-#include "sysemu/balloon.h"
-#include "qemu/option.h"
-
-#include "disas/disas.h"
-
-#include "exec/exec-all.h"
-
-#include "qemu/sockets.h"
-
-#if defined(CONFIG_SLIRP)
-#include "libslirp.h"
-#endif
-
-
-
-#define DEFAULT_RAM_SIZE 128
-
-/* Max number of USB devices that can be specified on the commandline.  */
-#define MAX_USB_CMDLINE 8
-
-/* Max number of bluetooth switches on the commandline.  */
-#define MAX_BT_CMDLINE 10
-
-/* XXX: use a two level table to limit memory usage */
-#define MAX_IOPORTS 65536
-
-static const char *data_dir;
-const char *bios_name = NULL;
-static void *ioport_opaque[MAX_IOPORTS];
-static IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS];
-static IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS];
-/* Note: drives_table[MAX_DRIVES] is a dummy block driver if none available
-   to store the VM snapshots */
-DriveInfo drives_table[MAX_DRIVES+1];
-int nb_drives;
-enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
-DisplayType display_type = DT_DEFAULT;
-const char* keyboard_layout = NULL;
-ram_addr_t ram_size;
-const char *mem_path = NULL;
-#ifdef MAP_POPULATE
-int mem_prealloc = 0; /* force preallocation of physical target memory */
-#endif
-int nb_nics;
-NICInfo nd_table[MAX_NICS];
-int vm_running;
-static int autostart;
-static int rtc_utc = 1;
-static int rtc_date_offset = -1; /* -1 means no change */
-int cirrus_vga_enabled = 1;
-int std_vga_enabled = 0;
-int vmsvga_enabled = 0;
-int xenfb_enabled = 0;
-QEMUClock *rtc_clock;
-static int full_screen = 0;
-#ifdef CONFIG_SDL
-static int no_frame = 0;
-#endif
-int no_quit = 0;
-CharDriverState *serial_hds[MAX_SERIAL_PORTS];
-CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
-CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
-#ifdef TARGET_I386
-int win2k_install_hack = 0;
-int rtc_td_hack = 0;
-#endif
-int usb_enabled = 0;
-int singlestep = 0;
-int smp_cpus = 1;
-const char *vnc_display;
-int acpi_enabled = 1;
-int no_hpet = 0;
-int no_virtio_balloon = 0;
-int fd_bootchk = 1;
-int no_reboot = 0;
-int no_shutdown = 0;
-int cursor_hide = 1;
-int graphic_rotate = 0;
-WatchdogTimerModel *watchdog = NULL;
-int watchdog_action = WDT_RESET;
-const char *option_rom[MAX_OPTION_ROMS];
-int nb_option_roms;
-int semihosting_enabled = 0;
-#ifdef TARGET_ARM
-int old_param = 0;
-#endif
-const char *qemu_name;
-int alt_grab = 0;
-#if defined(TARGET_SPARC) || defined(TARGET_PPC)
-unsigned int nb_prom_envs = 0;
-const char *prom_envs[MAX_PROM_ENVS];
-#endif
-int nb_drives_opt;
-struct drive_opt drives_opt[MAX_DRIVES];
-
-int nb_numa_nodes;
-uint64_t node_mem[MAX_NODES];
-uint64_t node_cpumask[MAX_NODES];
-
-static QEMUTimer *nographic_timer;
-
-uint8_t qemu_uuid[16];
-
-/***********************************************************/
-/* x86 ISA bus support */
-
-hwaddr isa_mem_base = 0;
-PicState2 *isa_pic;
-
-static IOPortReadFunc default_ioport_readb, default_ioport_readw, default_ioport_readl;
-static IOPortWriteFunc default_ioport_writeb, default_ioport_writew, default_ioport_writel;
-
-static uint32_t ioport_read(int index, uint32_t address)
-{
-    static IOPortReadFunc *default_func[3] = {
-        default_ioport_readb,
-        default_ioport_readw,
-        default_ioport_readl
-    };
-    IOPortReadFunc *func = ioport_read_table[index][address];
-    if (!func)
-        func = default_func[index];
-    return func(ioport_opaque[address], address);
-}
-
-static void ioport_write(int index, uint32_t address, uint32_t data)
-{
-    static IOPortWriteFunc *default_func[3] = {
-        default_ioport_writeb,
-        default_ioport_writew,
-        default_ioport_writel
-    };
-    IOPortWriteFunc *func = ioport_write_table[index][address];
-    if (!func)
-        func = default_func[index];
-    func(ioport_opaque[address], address, data);
-}
-
-static uint32_t default_ioport_readb(void *opaque, uint32_t address)
-{
-#ifdef DEBUG_UNUSED_IOPORT
-    fprintf(stderr, "unused inb: port=0x%04x\n", address);
-#endif
-    return 0xff;
-}
-
-static void default_ioport_writeb(void *opaque, uint32_t address, uint32_t data)
-{
-#ifdef DEBUG_UNUSED_IOPORT
-    fprintf(stderr, "unused outb: port=0x%04x data=0x%02x\n", address, data);
-#endif
-}
-
-/* default is to make two byte accesses */
-static uint32_t default_ioport_readw(void *opaque, uint32_t address)
-{
-    uint32_t data;
-    data = ioport_read(0, address);
-    address = (address + 1) & (MAX_IOPORTS - 1);
-    data |= ioport_read(0, address) << 8;
-    return data;
-}
-
-static void default_ioport_writew(void *opaque, uint32_t address, uint32_t data)
-{
-    ioport_write(0, address, data & 0xff);
-    address = (address + 1) & (MAX_IOPORTS - 1);
-    ioport_write(0, address, (data >> 8) & 0xff);
-}
-
-static uint32_t default_ioport_readl(void *opaque, uint32_t address)
-{
-#ifdef DEBUG_UNUSED_IOPORT
-    fprintf(stderr, "unused inl: port=0x%04x\n", address);
-#endif
-    return 0xffffffff;
-}
-
-static void default_ioport_writel(void *opaque, uint32_t address, uint32_t data)
-{
-#ifdef DEBUG_UNUSED_IOPORT
-    fprintf(stderr, "unused outl: port=0x%04x data=0x%02x\n", address, data);
-#endif
-}
-
-
-/***************/
-/* ballooning */
-
-static QEMUBalloonEvent *qemu_balloon_event;
-void *qemu_balloon_event_opaque;
-
-void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque)
-{
-    qemu_balloon_event = func;
-    qemu_balloon_event_opaque = opaque;
-}
-
-void qemu_balloon(ram_addr_t target)
-{
-    if (qemu_balloon_event)
-        qemu_balloon_event(qemu_balloon_event_opaque, target);
-}
-
-ram_addr_t qemu_balloon_status(void)
-{
-    if (qemu_balloon_event)
-        return qemu_balloon_event(qemu_balloon_event_opaque, 0);
-    return 0;
-}
-
-/***********************************************************/
-/* host time/date access */
-void qemu_get_timedate(struct tm *tm, int offset)
-{
-    time_t ti;
-    struct tm *ret;
-
-    time(&ti);
-    ti += offset;
-    if (rtc_date_offset == -1) {
-        if (rtc_utc)
-            ret = gmtime(&ti);
-        else
-            ret = localtime(&ti);
-    } else {
-        ti -= rtc_date_offset;
-        ret = gmtime(&ti);
-    }
-
-    memcpy(tm, ret, sizeof(struct tm));
-}
-
-int qemu_timedate_diff(struct tm *tm)
-{
-    time_t seconds;
-
-    if (rtc_date_offset == -1)
-        if (rtc_utc)
-            seconds = mktimegm(tm);
-        else
-            seconds = mktime(tm);
-    else
-        seconds = mktimegm(tm) + rtc_date_offset;
-
-    return seconds - time(NULL);
-}
-
-#ifdef _WIN32
-static void socket_cleanup(void)
-{
-    WSACleanup();
-}
-
-static int socket_init(void)
-{
-    WSADATA Data;
-    int ret, err;
-
-    ret = WSAStartup(MAKEWORD(2,2), &Data);
-    if (ret != 0) {
-        err = WSAGetLastError();
-        fprintf(stderr, "WSAStartup: %d\n", err);
-        return -1;
-    }
-    atexit(socket_cleanup);
-    return 0;
-}
-#endif
-
-int get_param_value(char *buf, int buf_size,
-                    const char *tag, const char *str)
-{
-    const char *p;
-    char option[128];
-
-    p = str;
-    for(;;) {
-        p = get_opt_name(option, sizeof(option), p, '=');
-        if (*p != '=')
-            break;
-        p++;
-        if (!strcmp(tag, option)) {
-            (void)get_opt_value(buf, buf_size, p);
-            return strlen(buf);
-        } else {
-            p = get_opt_value(NULL, 0, p);
-        }
-        if (*p != ',')
-            break;
-        p++;
-    }
-    return 0;
-}
-
-int check_params(char *buf, int buf_size,
-                 const char * const *params, const char *str)
-{
-    const char *p;
-    int i;
-
-    p = str;
-    while (*p != '\0') {
-        p = get_opt_name(buf, buf_size, p, '=');
-        if (*p != '=') {
-            return -1;
-        }
-        p++;
-        for (i = 0; params[i] != NULL; i++) {
-            if (!strcmp(params[i], buf)) {
-                break;
-            }
-        }
-        if (params[i] == NULL) {
-            return -1;
-        }
-        p = get_opt_value(NULL, 0, p);
-        if (*p != ',') {
-            break;
-        }
-        p++;
-    }
-    return 0;
-}
-
-/***********************************************************/
-/* Bluetooth support */
-static int nb_hcis;
-static int cur_hci;
-static struct HCIInfo *hci_table[MAX_NICS];
-
-static struct bt_vlan_s {
-    struct bt_scatternet_s net;
-    int id;
-    struct bt_vlan_s *next;
-} *first_bt_vlan;
-
-/* find or alloc a new bluetooth "VLAN" */
-static struct bt_scatternet_s *qemu_find_bt_vlan(int id)
-{
-    struct bt_vlan_s **pvlan, *vlan;
-    for (vlan = first_bt_vlan; vlan != NULL; vlan = vlan->next) {
-        if (vlan->id == id)
-            return &vlan->net;
-    }
-    vlan = g_malloc0(sizeof(struct bt_vlan_s));
-    vlan->id = id;
-    pvlan = &first_bt_vlan;
-    while (*pvlan != NULL)
-        pvlan = &(*pvlan)->next;
-    *pvlan = vlan;
-    return &vlan->net;
-}
-
-static void null_hci_send(struct HCIInfo *hci, const uint8_t *data, int len)
-{
-}
-
-static int null_hci_addr_set(struct HCIInfo *hci, const uint8_t *bd_addr)
-{
-    return -ENOTSUP;
-}
-
-static struct HCIInfo null_hci = {
-    .cmd_send = null_hci_send,
-    .sco_send = null_hci_send,
-    .acl_send = null_hci_send,
-    .bdaddr_set = null_hci_addr_set,
-};
-
-struct HCIInfo *qemu_next_hci(void)
-{
-    if (cur_hci == nb_hcis)
-        return &null_hci;
-
-    return hci_table[cur_hci++];
-}
-
-static struct HCIInfo *hci_init(const char *str)
-{
-    char *endp;
-    struct bt_scatternet_s *vlan = 0;
-
-    if (!strcmp(str, "null"))
-        /* null */
-        return &null_hci;
-    else if (!strncmp(str, "host", 4) && (str[4] == '\0' || str[4] == ':'))
-        /* host[:hciN] */
-        return bt_host_hci(str[4] ? str + 5 : "hci0");
-    else if (!strncmp(str, "hci", 3)) {
-        /* hci[,vlan=n] */
-        if (str[3]) {
-            if (!strncmp(str + 3, ",vlan=", 6)) {
-                vlan = qemu_find_bt_vlan(strtol(str + 9, &endp, 0));
-                if (*endp)
-                    vlan = 0;
-            }
-        } else
-            vlan = qemu_find_bt_vlan(0);
-        if (vlan)
-           return bt_new_hci(vlan);
-    }
-
-    fprintf(stderr, "qemu: Unknown bluetooth HCI `%s'.\n", str);
-
-    return 0;
-}
-
-static int bt_hci_parse(const char *str)
-{
-    struct HCIInfo *hci;
-    bdaddr_t bdaddr;
-
-    if (nb_hcis >= MAX_NICS) {
-        fprintf(stderr, "qemu: Too many bluetooth HCIs (max %i).\n", MAX_NICS);
-        return -1;
-    }
-
-    hci = hci_init(str);
-    if (!hci)
-        return -1;
-
-    bdaddr.b[0] = 0x52;
-    bdaddr.b[1] = 0x54;
-    bdaddr.b[2] = 0x00;
-    bdaddr.b[3] = 0x12;
-    bdaddr.b[4] = 0x34;
-    bdaddr.b[5] = 0x56 + nb_hcis;
-    hci->bdaddr_set(hci, bdaddr.b);
-
-    hci_table[nb_hcis++] = hci;
-
-    return 0;
-}
-
-static void bt_vhci_add(int vlan_id)
-{
-    struct bt_scatternet_s *vlan = qemu_find_bt_vlan(vlan_id);
-
-    if (!vlan->slave)
-        fprintf(stderr, "qemu: warning: adding a VHCI to "
-                        "an empty scatternet %i\n", vlan_id);
-
-    bt_vhci_init(bt_new_hci(vlan));
-}
-
-static struct bt_device_s *bt_device_add(const char *opt)
-{
-    struct bt_scatternet_s *vlan;
-    int vlan_id = 0;
-    char *endp = strstr(opt, ",vlan=");
-    int len = (endp ? endp - opt : strlen(opt)) + 1;
-    char devname[10];
-
-    pstrcpy(devname, MIN(sizeof(devname), len), opt);
-
-    if (endp) {
-        vlan_id = strtol(endp + 6, &endp, 0);
-        if (*endp) {
-            fprintf(stderr, "qemu: unrecognised bluetooth vlan Id\n");
-            return 0;
-        }
-    }
-
-    vlan = qemu_find_bt_vlan(vlan_id);
-
-    if (!vlan->slave)
-        fprintf(stderr, "qemu: warning: adding a slave device to "
-                        "an empty scatternet %i\n", vlan_id);
-
-    if (!strcmp(devname, "keyboard"))
-        return bt_keyboard_init(vlan);
-
-    fprintf(stderr, "qemu: unsupported bluetooth device `%s'\n", devname);
-    return 0;
-}
-
-static int bt_parse(const char *opt)
-{
-    const char *endp, *p;
-    int vlan;
-
-    if (strstart(opt, "hci", &endp)) {
-        if (!*endp || *endp == ',') {
-            if (*endp)
-                if (!strstart(endp, ",vlan=", 0))
-                    opt = endp + 1;
-
-            return bt_hci_parse(opt);
-       }
-    } else if (strstart(opt, "vhci", &endp)) {
-        if (!*endp || *endp == ',') {
-            if (*endp) {
-                if (strstart(endp, ",vlan=", &p)) {
-                    vlan = strtol(p, (char **) &endp, 0);
-                    if (*endp) {
-                        fprintf(stderr, "qemu: bad scatternet '%s'\n", p);
-                        return 1;
-                    }
-                } else {
-                    fprintf(stderr, "qemu: bad parameter '%s'\n", endp + 1);
-                    return 1;
-                }
-            } else
-                vlan = 0;
-
-            bt_vhci_add(vlan);
-            return 0;
-        }
-    } else if (strstart(opt, "device:", &endp))
-        return !bt_device_add(endp);
-
-    fprintf(stderr, "qemu: bad bluetooth parameter '%s'\n", opt);
-    return 1;
-}
-
-/***********************************************************/
-/* QEMU Block devices */
-
-#define HD_ALIAS "index=%d,media=disk"
-#define CDROM_ALIAS "index=2,media=cdrom"
-#define FD_ALIAS "index=%d,if=floppy"
-#define PFLASH_ALIAS "if=pflash"
-#define MTD_ALIAS "if=mtd"
-#define SD_ALIAS "index=0,if=sd"
-
-static int drive_init_func(QemuOpts *opts, void *opaque)
-{
-    int *use_scsi = opaque;
-    int fatal_error = 0;
-
-    if (drive_init(opts, *use_scsi, &fatal_error) == NULL) {
-        if (fatal_error)
-            return 1;
-    }
-    return 0;
-}
-
-static int drive_enable_snapshot(QemuOpts *opts, void *opaque)
-{
-    if (NULL == qemu_opt_get(opts, "snapshot")) {
-        qemu_opt_set(opts, "snapshot", "on");
-    }
-    return 0;
-}
-
-static int drive_opt_get_free_idx(void)
-{
-    int index;
-
-    for (index = 0; index < MAX_DRIVES; index++)
-        if (!drives_opt[index].used) {
-            drives_opt[index].used = 1;
-            return index;
-        }
-
-    return -1;
-}
-
-static int drive_get_free_idx(void)
-{
-    int index;
-
-    for (index = 0; index < MAX_DRIVES; index++)
-        if (!drives_table[index].used) {
-            drives_table[index].used = 1;
-            return index;
-        }
-
-    return -1;
-}
-
-int drive_add(const char *file, const char *fmt, ...)
-{
-    va_list ap;
-    int index = drive_opt_get_free_idx();
-
-    if (nb_drives_opt >= MAX_DRIVES || index == -1) {
-        fprintf(stderr, "qemu: too many drives\n");
-        return -1;
-    }
-
-    drives_opt[index].file = file;
-    va_start(ap, fmt);
-    vsnprintf(drives_opt[index].opt,
-              sizeof(drives_opt[0].opt), fmt, ap);
-    va_end(ap);
-
-    nb_drives_opt++;
-    return index;
-}
-
-void drive_remove(int index)
-{
-    drives_opt[index].used = 0;
-    nb_drives_opt--;
-}
-
-int drive_get_index(BlockInterfaceType type, int bus, int unit)
-{
-    int index;
-
-    /* seek interface, bus and unit */
-
-    for (index = 0; index < MAX_DRIVES; index++)
-        if (drives_table[index].type == type &&
-	    drives_table[index].bus == bus &&
-	    drives_table[index].unit == unit &&
-	    drives_table[index].used)
-        return index;
-
-    return -1;
-}
-
-int drive_get_max_bus(BlockInterfaceType type)
-{
-    int max_bus;
-    int index;
-
-    max_bus = -1;
-    for (index = 0; index < nb_drives; index++) {
-        if(drives_table[index].type == type &&
-           drives_table[index].bus > max_bus)
-            max_bus = drives_table[index].bus;
-    }
-    return max_bus;
-}
-
-const char *drive_get_serial(BlockDriverState *bdrv)
-{
-    int index;
-
-    for (index = 0; index < nb_drives; index++)
-        if (drives_table[index].bdrv == bdrv)
-            return drives_table[index].serial;
-
-    return "\0";
-}
-
-BlockInterfaceErrorAction drive_get_onerror(BlockDriverState *bdrv)
-{
-    int index;
-
-    for (index = 0; index < nb_drives; index++)
-        if (drives_table[index].bdrv == bdrv)
-            return drives_table[index].onerror;
-
-    return BLOCK_ERR_STOP_ENOSPC;
-}
-
-static void bdrv_format_print(void *opaque, const char *name)
-{
-    fprintf(stderr, " %s", name);
-}
-
-void drive_uninit(BlockDriverState *bdrv)
-{
-    int i;
-
-    for (i = 0; i < MAX_DRIVES; i++)
-        if (drives_table[i].bdrv == bdrv) {
-            drives_table[i].bdrv = NULL;
-            drives_table[i].used = 0;
-            drive_remove(drives_table[i].drive_opt_idx);
-            nb_drives--;
-            break;
-        }
-}
-
-int drive_init(struct drive_opt *arg, int snapshot, void *opaque)
-{
-    char buf[128];
-    char file[1024];
-    char devname[128];
-    char serial[21];
-    const char *mediastr = "";
-    BlockInterfaceType type;
-    enum { MEDIA_DISK, MEDIA_CDROM } media;
-    int bus_id, unit_id;
-    int cyls, heads, secs, translation;
-    BlockDriverState *bdrv;
-    BlockDriver *drv = NULL;
-    QEMUMachine *machine = opaque;
-    int max_devs;
-    int index;
-    int cache;
-    int bdrv_flags, onerror;
-    int drives_table_idx;
-    char *str = arg->opt;
-    static const char * const params[] = { "bus", "unit", "if", "index",
-                                           "cyls", "heads", "secs", "trans",
-                                           "media", "snapshot", "file",
-                                           "cache", "format", "serial", "werror",
-                                           NULL };
-
-    if (check_params(buf, sizeof(buf), params, str) < 0) {
-         fprintf(stderr, "qemu: unknown parameter '%s' in '%s'\n",
-                         buf, str);
-         return -1;
-    }
-
-    file[0] = 0;
-    cyls = heads = secs = 0;
-    bus_id = 0;
-    unit_id = -1;
-    translation = BIOS_ATA_TRANSLATION_AUTO;
-    index = -1;
-    cache = 3;
-
-    if (machine->use_scsi) {
-        type = IF_SCSI;
-        max_devs = MAX_SCSI_DEVS;
-        pstrcpy(devname, sizeof(devname), "scsi");
-    } else {
-        type = IF_IDE;
-        max_devs = MAX_IDE_DEVS;
-        pstrcpy(devname, sizeof(devname), "ide");
-    }
-    media = MEDIA_DISK;
-
-    /* extract parameters */
-
-    if (get_param_value(buf, sizeof(buf), "bus", str)) {
-        bus_id = strtol(buf, NULL, 0);
-	if (bus_id < 0) {
-	    fprintf(stderr, "qemu: '%s' invalid bus id\n", str);
-	    return -1;
-	}
-    }
-
-    if (get_param_value(buf, sizeof(buf), "unit", str)) {
-        unit_id = strtol(buf, NULL, 0);
-	if (unit_id < 0) {
-	    fprintf(stderr, "qemu: '%s' invalid unit id\n", str);
-	    return -1;
-	}
-    }
-
-    if (get_param_value(buf, sizeof(buf), "if", str)) {
-        pstrcpy(devname, sizeof(devname), buf);
-        if (!strcmp(buf, "ide")) {
-	    type = IF_IDE;
-            max_devs = MAX_IDE_DEVS;
-        } else if (!strcmp(buf, "scsi")) {
-	    type = IF_SCSI;
-            max_devs = MAX_SCSI_DEVS;
-        } else if (!strcmp(buf, "floppy")) {
-	    type = IF_FLOPPY;
-            max_devs = 0;
-        } else if (!strcmp(buf, "pflash")) {
-	    type = IF_PFLASH;
-            max_devs = 0;
-	} else if (!strcmp(buf, "mtd")) {
-	    type = IF_MTD;
-            max_devs = 0;
-	} else if (!strcmp(buf, "sd")) {
-	    type = IF_SD;
-            max_devs = 0;
-        } else if (!strcmp(buf, "virtio")) {
-            type = IF_VIRTIO;
-            max_devs = 0;
-	} else if (!strcmp(buf, "xen")) {
-	    type = IF_XEN;
-            max_devs = 0;
-	} else {
-            fprintf(stderr, "qemu: '%s' unsupported bus type '%s'\n", str, buf);
-            return -1;
-	}
-    }
-
-    if (get_param_value(buf, sizeof(buf), "index", str)) {
-        index = strtol(buf, NULL, 0);
-	if (index < 0) {
-	    fprintf(stderr, "qemu: '%s' invalid index\n", str);
-	    return -1;
-	}
-    }
-
-    if (get_param_value(buf, sizeof(buf), "cyls", str)) {
-        cyls = strtol(buf, NULL, 0);
-    }
-
-    if (get_param_value(buf, sizeof(buf), "heads", str)) {
-        heads = strtol(buf, NULL, 0);
-    }
-
-    if (get_param_value(buf, sizeof(buf), "secs", str)) {
-        secs = strtol(buf, NULL, 0);
-    }
-
-    if (cyls || heads || secs) {
-        if (cyls < 1 || cyls > 16383) {
-            fprintf(stderr, "qemu: '%s' invalid physical cyls number\n", str);
-	    return -1;
-	}
-        if (heads < 1 || heads > 16) {
-            fprintf(stderr, "qemu: '%s' invalid physical heads number\n", str);
-	    return -1;
-	}
-        if (secs < 1 || secs > 63) {
-            fprintf(stderr, "qemu: '%s' invalid physical secs number\n", str);
-	    return -1;
-	}
-    }
-
-    if (get_param_value(buf, sizeof(buf), "trans", str)) {
-        if (!cyls) {
-            fprintf(stderr,
-                    "qemu: '%s' trans must be used with cyls,heads and secs\n",
-                    str);
-            return -1;
-        }
-        if (!strcmp(buf, "none"))
-            translation = BIOS_ATA_TRANSLATION_NONE;
-        else if (!strcmp(buf, "lba"))
-            translation = BIOS_ATA_TRANSLATION_LBA;
-        else if (!strcmp(buf, "auto"))
-            translation = BIOS_ATA_TRANSLATION_AUTO;
-	else {
-            fprintf(stderr, "qemu: '%s' invalid translation type\n", str);
-	    return -1;
-	}
-    }
-
-    if (get_param_value(buf, sizeof(buf), "media", str)) {
-        if (!strcmp(buf, "disk")) {
-	    media = MEDIA_DISK;
-	} else if (!strcmp(buf, "cdrom")) {
-            if (cyls || secs || heads) {
-                fprintf(stderr,
-                        "qemu: '%s' invalid physical CHS format\n", str);
-	        return -1;
-            }
-	    media = MEDIA_CDROM;
-	} else {
-	    fprintf(stderr, "qemu: '%s' invalid media\n", str);
-	    return -1;
-	}
-    }
-
-    if (get_param_value(buf, sizeof(buf), "snapshot", str)) {
-        if (!strcmp(buf, "on"))
-	    snapshot = 1;
-        else if (!strcmp(buf, "off"))
-	    snapshot = 0;
-	else {
-	    fprintf(stderr, "qemu: '%s' invalid snapshot option\n", str);
-	    return -1;
-	}
-    }
-
-    if (get_param_value(buf, sizeof(buf), "cache", str)) {
-        if (!strcmp(buf, "off") || !strcmp(buf, "none"))
-            cache = 0;
-        else if (!strcmp(buf, "writethrough"))
-            cache = 1;
-        else if (!strcmp(buf, "writeback"))
-            cache = 2;
-        else {
-           fprintf(stderr, "qemu: invalid cache option\n");
-           return -1;
-        }
-    }
-
-    if (get_param_value(buf, sizeof(buf), "format", str)) {
-       if (strcmp(buf, "?") == 0) {
-            fprintf(stderr, "qemu: Supported formats:");
-            bdrv_iterate_format(bdrv_format_print, NULL);
-            fprintf(stderr, "\n");
-	    return -1;
-        }
-        drv = bdrv_find_format(buf);
-        if (!drv) {
-            fprintf(stderr, "qemu: '%s' invalid format\n", buf);
-            return -1;
-        }
-    }
-
-    if (arg->file == NULL)
-        get_param_value(file, sizeof(file), "file", str);
-    else
-        pstrcpy(file, sizeof(file), arg->file);
-
-    if (!get_param_value(serial, sizeof(serial), "serial", str))
-	    memset(serial, 0,  sizeof(serial));
-
-    onerror = BLOCK_ERR_STOP_ENOSPC;
-    if (get_param_value(buf, sizeof(serial), "werror", str)) {
-        if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO) {
-            fprintf(stderr, "werror is no supported by this format\n");
-            return -1;
-        }
-        if (!strcmp(buf, "ignore"))
-            onerror = BLOCK_ERR_IGNORE;
-        else if (!strcmp(buf, "enospc"))
-            onerror = BLOCK_ERR_STOP_ENOSPC;
-        else if (!strcmp(buf, "stop"))
-            onerror = BLOCK_ERR_STOP_ANY;
-        else if (!strcmp(buf, "report"))
-            onerror = BLOCK_ERR_REPORT;
-        else {
-            fprintf(stderr, "qemu: '%s' invalid write error action\n", buf);
-            return -1;
-        }
-    }
-
-    /* compute bus and unit according index */
-
-    if (index != -1) {
-        if (bus_id != 0 || unit_id != -1) {
-            fprintf(stderr,
-                    "qemu: '%s' index cannot be used with bus and unit\n", str);
-            return -1;
-        }
-        if (max_devs == 0)
-        {
-            unit_id = index;
-            bus_id = 0;
-        } else {
-            unit_id = index % max_devs;
-            bus_id = index / max_devs;
-        }
-    }
-
-    /* if user doesn't specify a unit_id,
-     * try to find the first free
-     */
-
-    if (unit_id == -1) {
-       unit_id = 0;
-       while (drive_get_index(type, bus_id, unit_id) != -1) {
-           unit_id++;
-           if (max_devs && unit_id >= max_devs) {
-               unit_id -= max_devs;
-               bus_id++;
-           }
-       }
-    }
-
-    /* check unit id */
-
-    if (max_devs && unit_id >= max_devs) {
-        fprintf(stderr, "qemu: '%s' unit %d too big (max is %d)\n",
-                        str, unit_id, max_devs - 1);
-        return -1;
-    }
-
-    /*
-     * ignore multiple definitions
-     */
-
-    if (drive_get_index(type, bus_id, unit_id) != -1)
-        return -2;
-
-    /* init */
-
-    if (type == IF_IDE || type == IF_SCSI)
-        mediastr = (media == MEDIA_CDROM) ? "-cd" : "-hd";
-    if (max_devs)
-        snprintf(buf, sizeof(buf), "%s%i%s%i",
-                 devname, bus_id, mediastr, unit_id);
-    else
-        snprintf(buf, sizeof(buf), "%s%s%i",
-                 devname, mediastr, unit_id);
-    bdrv = bdrv_new(buf);
-    drives_table_idx = drive_get_free_idx();
-    drives_table[drives_table_idx].bdrv = bdrv;
-    drives_table[drives_table_idx].type = type;
-    drives_table[drives_table_idx].bus = bus_id;
-    drives_table[drives_table_idx].unit = unit_id;
-    drives_table[drives_table_idx].onerror = onerror;
-    drives_table[drives_table_idx].drive_opt_idx = arg - drives_opt;
-    strncpy(drives_table[drives_table_idx].serial, serial, sizeof(serial));
-    nb_drives++;
-
-    switch(type) {
-    case IF_IDE:
-    case IF_SCSI:
-    case IF_XEN:
-        switch(media) {
-	case MEDIA_DISK:
-            if (cyls != 0) {
-                bdrv_set_geometry_hint(bdrv, cyls, heads, secs);
-                bdrv_set_translation_hint(bdrv, translation);
-            }
-	    break;
-	case MEDIA_CDROM:
-            bdrv_set_type_hint(bdrv, BDRV_TYPE_CDROM);
-	    break;
-	}
-        break;
-    case IF_SD:
-        /* FIXME: This isn't really a floppy, but it's a reasonable
-           approximation.  */
-    case IF_FLOPPY:
-        bdrv_set_type_hint(bdrv, BDRV_TYPE_FLOPPY);
-        break;
-    case IF_PFLASH:
-    case IF_MTD:
-    case IF_VIRTIO:
-        break;
-    case IF_COUNT:
-        abort();
-    }
-    if (!file[0])
-        return -2;
-    bdrv_flags = 0;
-    if (snapshot) {
-        bdrv_flags |= BDRV_O_SNAPSHOT;
-        cache = 2; /* always use write-back with snapshot */
-    }
-    if (cache == 0) /* no caching */
-        bdrv_flags |= BDRV_O_NOCACHE;
-    else if (cache == 2) /* write-back */
-        bdrv_flags |= BDRV_O_CACHE_WB;
-    else if (cache == 3) /* not specified */
-        bdrv_flags |= BDRV_O_CACHE_DEF;
-    if (bdrv_open2(bdrv, file, bdrv_flags, drv) < 0) {
-        fprintf(stderr, "qemu: could not open disk image %s\n",
-                        file);
-        return -1;
-    }
-    if (bdrv_key_required(bdrv))
-        autostart = 0;
-    return drives_table_idx;
-}
-
-static void numa_add(const char *optarg)
-{
-    char option[128];
-    char *endptr;
-    unsigned long long value, endvalue;
-    int nodenr;
-
-    optarg = get_opt_name(option, 128, optarg, ',') + 1;
-    if (!strcmp(option, "node")) {
-        if (get_param_value(option, 128, "nodeid", optarg) == 0) {
-            nodenr = nb_numa_nodes;
-        } else {
-            nodenr = strtoull(option, NULL, 10);
-        }
-
-        if (get_param_value(option, 128, "mem", optarg) == 0) {
-            node_mem[nodenr] = 0;
-        } else {
-            value = strtoull(option, &endptr, 0);
-            switch (*endptr) {
-            case 0: case 'M': case 'm':
-                value <<= 20;
-                break;
-            case 'G': case 'g':
-                value <<= 30;
-                break;
-            }
-            node_mem[nodenr] = value;
-        }
-        if (get_param_value(option, 128, "cpus", optarg) == 0) {
-            node_cpumask[nodenr] = 0;
-        } else {
-            value = strtoull(option, &endptr, 10);
-            if (value >= 64) {
-                value = 63;
-                fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n");
-            } else {
-                if (*endptr == '-') {
-                    endvalue = strtoull(endptr+1, &endptr, 10);
-                    if (endvalue >= 63) {
-                        endvalue = 62;
-                        fprintf(stderr,
-                            "only 63 CPUs in NUMA mode supported.\n");
-                    }
-                    value = (1 << (endvalue + 1)) - (1 << value);
-                } else {
-                    value = 1 << value;
-                }
-            }
-            node_cpumask[nodenr] = value;
-        }
-        nb_numa_nodes++;
-    }
-    return;
-}
-
-/***********************************************************/
-/* USB devices */
-
-static USBPort *used_usb_ports;
-static USBPort *free_usb_ports;
-
-/* ??? Maybe change this to register a hub to keep track of the topology.  */
-void qemu_register_usb_port(USBPort *port, void *opaque, int index,
-                            usb_attachfn attach)
-{
-    port->opaque = opaque;
-    port->index = index;
-    port->attach = attach;
-    port->next = free_usb_ports;
-    free_usb_ports = port;
-}
-
-int usb_device_add_dev(USBDevice *dev)
-{
-    USBPort *port;
-
-    /* Find a USB port to add the device to.  */
-    port = free_usb_ports;
-    if (!port->next) {
-        USBDevice *hub;
-
-        /* Create a new hub and chain it on.  */
-        free_usb_ports = NULL;
-        port->next = used_usb_ports;
-        used_usb_ports = port;
-
-        hub = usb_hub_init(VM_USB_HUB_SIZE);
-        usb_attach(port, hub);
-        port = free_usb_ports;
-    }
-
-    free_usb_ports = port->next;
-    port->next = used_usb_ports;
-    used_usb_ports = port;
-    usb_attach(port, dev);
-    return 0;
-}
-
-static void usb_msd_password_cb(void *opaque, int err)
-{
-    USBDevice *dev = opaque;
-
-    if (!err)
-        usb_device_add_dev(dev);
-    else
-        dev->handle_destroy(dev);
-}
-
-static int usb_device_add(const char *devname, int is_hotplug)
-{
-    const char *p;
-    USBDevice *dev;
-
-    if (!free_usb_ports)
-        return -1;
-
-    if (strstart(devname, "host:", &p)) {
-        dev = usb_host_device_open(p);
-    } else if (!strcmp(devname, "mouse")) {
-        dev = usb_mouse_init();
-    } else if (!strcmp(devname, "tablet")) {
-        dev = usb_tablet_init();
-    } else if (!strcmp(devname, "keyboard")) {
-        dev = usb_keyboard_init();
-    } else if (strstart(devname, "disk:", &p)) {
-        BlockDriverState *bs;
-
-        dev = usb_msd_init(p);
-        if (!dev)
-            return -1;
-        bs = usb_msd_get_bdrv(dev);
-        if (bdrv_key_required(bs)) {
-            autostart = 0;
-            if (is_hotplug) {
-                monitor_read_bdrv_key_start(cur_mon, bs, usb_msd_password_cb,
-                                            dev);
-                return 0;
-            }
-        }
-    } else if (!strcmp(devname, "wacom-tablet")) {
-        dev = usb_wacom_init();
-    } else if (strstart(devname, "serial:", &p)) {
-        dev = usb_serial_init(p);
-#ifdef CONFIG_BRLAPI
-    } else if (!strcmp(devname, "braille")) {
-        dev = usb_baum_init();
-#endif
-    } else if (strstart(devname, "net:", &p)) {
-        int nic = nb_nics;
-
-        if (net_client_init(NULL, "nic", p) < 0)
-            return -1;
-        nd_table[nic].model = "usb";
-        dev = usb_net_init(&nd_table[nic]);
-    } else if (!strcmp(devname, "bt") || strstart(devname, "bt:", &p)) {
-        dev = usb_bt_init(devname[2] ? hci_init(p) :
-                        bt_new_hci(qemu_find_bt_vlan(0)));
-    } else {
-        return -1;
-    }
-    if (!dev)
-        return -1;
-
-    return usb_device_add_dev(dev);
-}
-
-int usb_device_del_addr(int bus_num, int addr)
-{
-    USBPort *port;
-    USBPort **lastp;
-    USBDevice *dev;
-
-    if (!used_usb_ports)
-        return -1;
-
-    if (bus_num != 0)
-        return -1;
-
-    lastp = &used_usb_ports;
-    port = used_usb_ports;
-    while (port && port->dev->addr != addr) {
-        lastp = &port->next;
-        port = port->next;
-    }
-
-    if (!port)
-        return -1;
-
-    dev = port->dev;
-    *lastp = port->next;
-    usb_attach(port, NULL);
-    dev->handle_destroy(dev);
-    port->next = free_usb_ports;
-    free_usb_ports = port;
-    return 0;
-}
-
-static int usb_device_del(const char *devname)
-{
-    int bus_num, addr;
-    const char *p;
-
-    if (strstart(devname, "host:", &p))
-        return usb_host_device_close(p);
-
-    if (!used_usb_ports)
-        return -1;
-
-    p = strchr(devname, '.');
-    if (!p)
-        return -1;
-    bus_num = strtoul(devname, NULL, 0);
-    addr = strtoul(p + 1, NULL, 0);
-
-    return usb_device_del_addr(bus_num, addr);
-}
-
-void do_usb_add(Monitor *mon, const char *devname)
-{
-    usb_device_add(devname, 1);
-}
-
-void do_usb_del(Monitor *mon, const char *devname)
-{
-    usb_device_del(devname);
-}
-
-void usb_info(Monitor *mon)
-{
-    USBDevice *dev;
-    USBPort *port;
-    const char *speed_str;
-
-    if (!usb_enabled) {
-        monitor_printf(mon, "USB support not enabled\n");
-        return;
-    }
-
-    for (port = used_usb_ports; port; port = port->next) {
-        dev = port->dev;
-        if (!dev)
-            continue;
-        switch(dev->speed) {
-        case USB_SPEED_LOW:
-            speed_str = "1.5";
-            break;
-        case USB_SPEED_FULL:
-            speed_str = "12";
-            break;
-        case USB_SPEED_HIGH:
-            speed_str = "480";
-            break;
-        default:
-            speed_str = "?";
-            break;
-        }
-        monitor_printf(mon, "  Device %d.%d, Speed %s Mb/s, Product %s\n",
-                       0, dev->addr, speed_str, dev->devname);
-    }
-}
-
-/***********************************************************/
-/* PCMCIA/Cardbus */
-
-static struct pcmcia_socket_entry_s {
-    PCMCIASocket *socket;
-    struct pcmcia_socket_entry_s *next;
-} *pcmcia_sockets = 0;
-
-void pcmcia_socket_register(PCMCIASocket *socket)
-{
-    struct pcmcia_socket_entry_s *entry;
-
-    entry = g_malloc(sizeof(struct pcmcia_socket_entry_s));
-    entry->socket = socket;
-    entry->next = pcmcia_sockets;
-    pcmcia_sockets = entry;
-}
-
-void pcmcia_socket_unregister(PCMCIASocket *socket)
-{
-    struct pcmcia_socket_entry_s *entry, **ptr;
-
-    ptr = &pcmcia_sockets;
-    for (entry = *ptr; entry; ptr = &entry->next, entry = *ptr)
-        if (entry->socket == socket) {
-            *ptr = entry->next;
-            g_free(entry);
-        }
-}
-
-void pcmcia_info(Monitor *mon)
-{
-    struct pcmcia_socket_entry_s *iter;
-
-    if (!pcmcia_sockets)
-        monitor_printf(mon, "No PCMCIA sockets\n");
-
-    for (iter = pcmcia_sockets; iter; iter = iter->next)
-        monitor_printf(mon, "%s: %s\n", iter->socket->slot_string,
-                       iter->socket->attached ? iter->socket->card_string :
-                       "Empty");
-}
-
-/***********************************************************/
-/* machine registration */
-
-static QEMUMachine *first_machine = NULL;
-QEMUMachine *current_machine = NULL;
-
-int qemu_register_machine(QEMUMachine *m)
-{
-    QEMUMachine **pm;
-    pm = &first_machine;
-    while (*pm != NULL)
-        pm = &(*pm)->next;
-    m->next = NULL;
-    *pm = m;
-    return 0;
-}
-
-static QEMUMachine *find_machine(const char *name)
-{
-    QEMUMachine *m;
-
-    for(m = first_machine; m != NULL; m = m->next) {
-        if (!strcmp(m->name, name))
-            return m;
-    }
-    return NULL;
-}
-
-static QEMUMachine *find_default_machine(void)
-{
-    QEMUMachine *m;
-
-    for(m = first_machine; m != NULL; m = m->next) {
-        if (m->is_default) {
-            return m;
-        }
-    }
-    return NULL;
-}
-
-/***********************************************************/
-/* main execution loop */
-
-static void gui_update(void *opaque)
-{
-    uint64_t interval = GUI_REFRESH_INTERVAL;
-    DisplayState *ds = opaque;
-    DisplayChangeListener *dcl = ds->listeners;
-
-    dpy_refresh(ds);
-
-    while (dcl != NULL) {
-        if (dcl->gui_timer_interval &&
-            dcl->gui_timer_interval < interval)
-            interval = dcl->gui_timer_interval;
-        dcl = dcl->next;
-    }
-    timer_mod(ds->gui_timer, interval + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
-}
-
-static void nographic_update(void *opaque)
-{
-    uint64_t interval = GUI_REFRESH_INTERVAL;
-
-    timer_mod(nographic_timer, interval + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
-}
-
-struct vm_change_state_entry {
-    VMChangeStateHandler *cb;
-    void *opaque;
-    QLIST_ENTRY (vm_change_state_entry) entries;
-};
-
-static QLIST_HEAD(vm_change_state_head, vm_change_state_entry) vm_change_state_head;
-
-VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
-                                                     void *opaque)
-{
-    VMChangeStateEntry *e;
-
-    e = g_malloc0(sizeof (*e));
-
-    e->cb = cb;
-    e->opaque = opaque;
-    QLIST_INSERT_HEAD(&vm_change_state_head, e, entries);
-    return e;
-}
-
-void qemu_del_vm_change_state_handler(VMChangeStateEntry *e)
-{
-    QLIST_REMOVE (e, entries);
-    g_free (e);
-}
-
-void vm_state_notify(int running, int reason)
-{
-    VMChangeStateEntry *e;
-
-    for (e = vm_change_state_head.lh_first; e; e = e->entries.le_next) {
-        e->cb(e->opaque, running, reason);
-    }
-}
-
-void vm_start(void)
-{
-    if (!vm_running) {
-        cpu_enable_ticks();
-        vm_running = 1;
-        vm_state_notify(1, 0);
-        //qemu_rearm_alarm_timer(alarm_timer);
-        resume_all_vcpus();
-    }
-}
-
-/* reset/shutdown handler */
-
-typedef struct QEMUResetEntry {
-    QTAILQ_ENTRY(QEMUResetEntry) entry;
-    QEMUResetHandler *func;
-    void *opaque;
-} QEMUResetEntry;
-
-static QTAILQ_HEAD(reset_handlers, QEMUResetEntry) reset_handlers =
-    QTAILQ_HEAD_INITIALIZER(reset_handlers);
-static int reset_requested;
-static int shutdown_requested, shutdown_signal = -1;
-static pid_t shutdown_pid;
-static int powerdown_requested;
-int debug_requested;
-static int vmstop_requested;
-
-int qemu_shutdown_requested(void)
-{
-    int r = shutdown_requested;
-    shutdown_requested = 0;
-    return r;
-}
-
-int qemu_reset_requested(void)
-{
-    int r = reset_requested;
-    reset_requested = 0;
-    return r;
-}
-
-int qemu_powerdown_requested(void)
-{
-    int r = powerdown_requested;
-    powerdown_requested = 0;
-    return r;
-}
-
-static int qemu_debug_requested(void)
-{
-    int r = debug_requested;
-    debug_requested = 0;
-    return r;
-}
-
-static int qemu_vmstop_requested(void)
-{
-    int r = vmstop_requested;
-    vmstop_requested = 0;
-    return r;
-}
-
-void qemu_register_reset(QEMUResetHandler *func, int order, void *opaque)
-{
-    QEMUResetEntry **pre, *re;
-
-    pre = &first_reset_entry;
-    while (*pre != NULL && (*pre)->order >= order) {
-        pre = &(*pre)->next;
-    }
-    re = g_malloc0(sizeof(QEMUResetEntry));
-    re->func = func;
-    re->opaque = opaque;
-    re->order = order;
-    re->next = NULL;
-    *pre = re;
-}
-
-void qemu_system_reset(void)
-{
-    QEMUResetEntry *re;
-
-    /* reset all devices */
-    QTAILQ_FOREACH_SAFE(re, &reset_handlers, entry, nre) {
-        re->func(re->opaque);
-    }
-}
-
-void qemu_system_reset_request(void)
-{
-    if (no_reboot) {
-        shutdown_requested = 1;
-    } else {
-        reset_requested = 1;
-    }
-    qemu_notify_event();
-}
-
-void qemu_system_killed(int signal, pid_t pid)
-{
-    shutdown_signal = signal;
-    shutdown_pid = pid;
-    qemu_system_shutdown_request();
-}
-
-void qemu_system_shutdown_request(void)
-{
-    shutdown_requested = 1;
-    qemu_notify_event();
-}
-
-void qemu_system_powerdown_request(void)
-{
-    powerdown_requested = 1;
-    qemu_notify_event();
-}
-
-void main_loop_wait(int timeout)
-{
-    fd_set rfds, wfds, xfds;
-    int ret, nfds;
-    struct timeval tv;
-
-    qemu_bh_update_timeout(&timeout);
-
-    os_host_main_loop_wait(&timeout);
-
-
-    tv.tv_sec = timeout / 1000;
-    tv.tv_usec = (timeout % 1000) * 1000;
-
-    /* poll any events */
-
-    /* XXX: separate device handlers from system ones */
-    nfds = -1;
-    FD_ZERO(&rfds);
-    FD_ZERO(&wfds);
-    FD_ZERO(&xfds);
-    qemu_iohandler_fill(&nfds, &rfds, &wfds, &xfds);
-    if (slirp_is_inited()) {
-        slirp_select_fill(&nfds, &rfds, &wfds, &xfds);
-    }
-
-    qemu_mutex_unlock_iothread();
-    ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv);
-    qemu_mutex_lock_iothread();
-    qemu_iohandler_poll(&rfds, &wfds, &xfds, ret);
-    if (slirp_is_inited()) {
-        if (ret < 0) {
-            FD_ZERO(&rfds);
-            FD_ZERO(&wfds);
-            FD_ZERO(&xfds);
-        }
-        slirp_select_poll(&rfds, &wfds, &xfds);
-    }
-
-    qemu_run_all_timers();
-
-    /* Check bottom-halves last in case any of the earlier events triggered
-       them.  */
-    qemu_bh_poll();
-
-}
-
-static int vm_can_run(void)
-{
-    if (powerdown_requested)
-        return 0;
-    if (reset_requested)
-        return 0;
-    if (shutdown_requested)
-        return 0;
-    if (debug_requested)
-        return 0;
-    return 1;
-}
-
-static void main_loop(void)
-{
-    int r;
-
-    for (;;) {
-        do {
-#ifdef CONFIG_PROFILER
-            int64_t ti;
-#endif
-            tcg_cpu_exec();
-#ifdef CONFIG_PROFILER
-            ti = profile_getclock();
-#endif
-            main_loop_wait(qemu_calculate_timeout());
-#ifdef CONFIG_PROFILER
-            dev_time += profile_getclock() - ti;
-#endif
-        } while (vm_can_run());
-
-        if (qemu_debug_requested())
-            vm_stop(EXCP_DEBUG);
-        if (qemu_shutdown_requested()) {
-            if (no_shutdown) {
-                vm_stop(0);
-                no_shutdown = 0;
-            } else
-                break;
-        }
-        if (qemu_reset_requested()) {
-            pause_all_vcpus();
-            qemu_system_reset();
-            resume_all_vcpus();
-        }
-        if (qemu_powerdown_requested())
-            qemu_system_powerdown();
-        if ((r = qemu_vmstop_requested()))
-            vm_stop(r);
-    }
-    pause_all_vcpus();
-}
-
-static void version(void)
-{
-    printf("QEMU PC emulator version " QEMU_VERSION QEMU_PKGVERSION ", Copyright (c) 2003-2008 Fabrice Bellard\n");
-}
-
-static void help(int exitcode)
-{
-    version();
-    printf("usage: %s [options] [disk_image]\n"
-           "\n"
-           "'disk_image' is a raw hard image image for IDE hard disk 0\n"
-           "\n"
-#define DEF(option, opt_arg, opt_enum, opt_help)        \
-           opt_help
-#define DEFHEADING(text) stringify(text) "\n"
-#include "qemu-options.def"
-#undef DEF
-#undef DEFHEADING
-#undef GEN_DOCS
-           "\n"
-           "During emulation, the following keys are useful:\n"
-           "ctrl-alt-f      toggle full screen\n"
-           "ctrl-alt-n      switch to virtual console 'n'\n"
-           "ctrl-alt        toggle mouse and keyboard grab\n"
-           "\n"
-           "When using -nographic, press 'ctrl-a h' to get some help.\n"
-           ,
-           "qemu",
-           DEFAULT_RAM_SIZE,
-#ifndef _WIN32
-           DEFAULT_NETWORK_SCRIPT,
-           DEFAULT_NETWORK_DOWN_SCRIPT,
-#endif
-           DEFAULT_GDBSTUB_PORT,
-           "/tmp/qemu.log");
-    exit(exitcode);
-}
-
-#define HAS_ARG 0x0001
-
-enum {
-#define DEF(option, opt_arg, opt_enum, opt_help)        \
-    opt_enum,
-#define DEFHEADING(text)
-#include "qemu-options.def"
-#undef DEF
-#undef DEFHEADING
-#undef GEN_DOCS
-};
-
-typedef struct QEMUOption {
-    const char *name;
-    int flags;
-    int index;
-} QEMUOption;
-
-static const QEMUOption qemu_options[] = {
-    { "h", 0, QEMU_OPTION_h },
-#define DEF(option, opt_arg, opt_enum, opt_help)        \
-    { option, opt_arg, opt_enum },
-#define DEFHEADING(text)
-#include "qemu-options.def"
-#undef DEF
-#undef DEFHEADING
-#undef GEN_DOCS
-    { NULL, 0, 0 },
-};
-
-static void select_vgahw (const char *p)
-{
-    const char *opts;
-
-    cirrus_vga_enabled = 0;
-    std_vga_enabled = 0;
-    vmsvga_enabled = 0;
-    xenfb_enabled = 0;
-    if (strstart(p, "std", &opts)) {
-        std_vga_enabled = 1;
-    } else if (strstart(p, "cirrus", &opts)) {
-        cirrus_vga_enabled = 1;
-    } else if (strstart(p, "vmware", &opts)) {
-        vmsvga_enabled = 1;
-    } else if (strstart(p, "xenfb", &opts)) {
-        xenfb_enabled = 1;
-    } else if (!strstart(p, "none", &opts)) {
-    invalid_vga:
-        fprintf(stderr, "Unknown vga type: %s\n", p);
-        exit(1);
-    }
-    while (*opts) {
-        const char *nextopt;
-
-        if (strstart(opts, ",retrace=", &nextopt)) {
-            opts = nextopt;
-            if (strstart(opts, "dumb", &nextopt))
-                vga_retrace_method = VGA_RETRACE_DUMB;
-            else if (strstart(opts, "precise", &nextopt))
-                vga_retrace_method = VGA_RETRACE_PRECISE;
-            else goto invalid_vga;
-        } else goto invalid_vga;
-        opts = nextopt;
-    }
-}
-
-#define MAX_NET_CLIENTS 32
-
-#ifdef _WIN32
-/* Look for support files in the same directory as the executable.  */
-static char *find_datadir(const char *argv0)
-{
-    char *p;
-    char buf[MAX_PATH];
-    DWORD len;
-
-    len = GetModuleFileName(NULL, buf, sizeof(buf) - 1);
-    if (len == 0) {
-        return NULL;
-    }
-
-    buf[len] = 0;
-    p = buf + len - 1;
-    while (p != buf && *p != '\\')
-        p--;
-    *p = 0;
-    if (access(buf, R_OK) == 0) {
-        return g_strdup(buf);
-    }
-    return NULL;
-}
-#else /* !_WIN32 */
-
-/* Find a likely location for support files using the location of the binary.
-   For installed binaries this will be "$bindir/../share/qemu".  When
-   running from the build tree this will be "$bindir/../pc-bios".  */
-#define SHARE_SUFFIX "/share/qemu"
-#define BUILD_SUFFIX "/pc-bios"
-static char *find_datadir(const char *argv0)
-{
-    char *dir;
-    char *p = NULL;
-    char *res;
-#ifdef PATH_MAX
-    char buf[PATH_MAX];
-#endif
-    size_t max_len;
-
-#if defined(__linux__)
-    {
-        int len;
-        len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
-        if (len > 0) {
-            buf[len] = 0;
-            p = buf;
-        }
-    }
-#elif defined(__FreeBSD__)
-    {
-        int len;
-        len = readlink("/proc/curproc/file", buf, sizeof(buf) - 1);
-        if (len > 0) {
-            buf[len] = 0;
-            p = buf;
-        }
-    }
-#endif
-    /* If we don't have any way of figuring out the actual executable
-       location then try argv[0].  */
-    if (!p) {
-#ifdef PATH_MAX
-        p = buf;
-#endif
-        p = realpath(argv0, p);
-        if (!p) {
-            return NULL;
-        }
-    }
-    dir = dirname(p);
-    dir = dirname(dir);
-
-    max_len = strlen(dir) +
-        MAX(strlen(SHARE_SUFFIX), strlen(BUILD_SUFFIX)) + 1;
-    res = g_malloc0(max_len);
-    snprintf(res, max_len, "%s%s", dir, SHARE_SUFFIX);
-    if (access(res, R_OK)) {
-        snprintf(res, max_len, "%s%s", dir, BUILD_SUFFIX);
-        if (access(res, R_OK)) {
-            g_free(res);
-            res = NULL;
-        }
-    }
-#ifndef PATH_MAX
-    free(p);
-#endif
-    return res;
-}
-#undef SHARE_SUFFIX
-#undef BUILD_SUFFIX
-#endif
-
-char *qemu_find_file(int type, const char *name)
-{
-    int len;
-    const char *subdir;
-    char *buf;
-
-    /* If name contains path separators then try it as a straight path.  */
-    if ((strchr(name, '/') || strchr(name, '\\'))
-        && access(name, R_OK) == 0) {
-        return g_strdup(name);
-    }
-    switch (type) {
-    case QEMU_FILE_TYPE_BIOS:
-        subdir = "";
-        break;
-    case QEMU_FILE_TYPE_KEYMAP:
-        subdir = "keymaps/";
-        break;
-    default:
-        abort();
-    }
-    len = strlen(data_dir) + strlen(name) + strlen(subdir) + 2;
-    buf = g_malloc0(len);
-    snprintf(buf, len, "%s/%s%s", data_dir, subdir, name);
-    if (access(buf, R_OK)) {
-        g_free(buf);
-        return NULL;
-    }
-    return buf;
-}
-
-int main(int argc, char **argv, char **envp)
-{
-    const char *gdbstub_dev = NULL;
-    uint32_t boot_devices_bitmap = 0;
-    int i;
-    int snapshot, linux_boot, net_boot;
-    const char *icount_option = NULL;
-    const char *initrd_filename;
-    const char *kernel_filename, *kernel_cmdline;
-    const char *boot_devices = "";
-    DisplayState *ds;
-    DisplayChangeListener *dcl;
-    int cyls, heads, secs, translation;
-    const char *net_clients[MAX_NET_CLIENTS];
-    int nb_net_clients;
-    const char *bt_opts[MAX_BT_CMDLINE];
-    int nb_bt_opts;
-    int hda_index;
-    int optind;
-    const char *r, *optarg;
-    CharDriverState *monitor_hd = NULL;
-    const char *monitor_device;
-    const char *serial_devices[MAX_SERIAL_PORTS];
-    int serial_device_index;
-    const char *parallel_devices[MAX_PARALLEL_PORTS];
-    int parallel_device_index;
-    const char *virtio_consoles[MAX_VIRTIO_CONSOLES];
-    int virtio_console_index;
-    const char *loadvm = NULL;
-    QEMUMachine *machine;
-    const char *cpu_model;
-    const char *usb_devices[MAX_USB_CMDLINE];
-    int usb_devices_index;
-    int tb_size;
-    const char *pid_file = NULL;
-    const char *incoming = NULL;
-    CPUOldState *env;
-    int show_vnc_port = 0;
-
-    init_clocks();
-
-    qemu_cache_utils_init(envp);
-
-    QLIST_INIT (&vm_change_state_head);
-    os_setup_early_signal_handling();
-
-    module_call_init(MODULE_INIT_MACHINE);
-    machine = find_default_machine();
-    cpu_model = NULL;
-    initrd_filename = NULL;
-    ram_size = 0;
-    snapshot = 0;
-    kernel_filename = NULL;
-    kernel_cmdline = "";
-    cyls = heads = secs = 0;
-    translation = BIOS_ATA_TRANSLATION_AUTO;
-    monitor_device = "vc:80Cx24C";
-
-    serial_devices[0] = "vc:80Cx24C";
-    for(i = 1; i < MAX_SERIAL_PORTS; i++)
-        serial_devices[i] = NULL;
-    serial_device_index = 0;
-
-    parallel_devices[0] = "vc:80Cx24C";
-    for(i = 1; i < MAX_PARALLEL_PORTS; i++)
-        parallel_devices[i] = NULL;
-    parallel_device_index = 0;
-
-    for(i = 0; i < MAX_VIRTIO_CONSOLES; i++)
-        virtio_consoles[i] = NULL;
-    virtio_console_index = 0;
-
-    for (i = 0; i < MAX_NODES; i++) {
-        node_mem[i] = 0;
-        node_cpumask[i] = 0;
-    }
-
-    usb_devices_index = 0;
-
-    nb_net_clients = 0;
-    nb_bt_opts = 0;
-    nb_drives = 0;
-    nb_drives_opt = 0;
-    nb_numa_nodes = 0;
-    hda_index = -1;
-
-    nb_nics = 0;
-
-    tb_size = 0;
-    autostart= 1;
-
-    register_watchdogs();
-
-    optind = 1;
-    for(;;) {
-        if (optind >= argc)
-            break;
-        r = argv[optind];
-        if (r[0] != '-') {
-	    hda_index = drive_add(argv[optind++], HD_ALIAS, 0);
-        } else {
-            const QEMUOption *popt;
-
-            optind++;
-            /* Treat --foo the same as -foo.  */
-            if (r[1] == '-')
-                r++;
-            popt = qemu_options;
-            for(;;) {
-                if (!popt->name) {
-                    fprintf(stderr, "%s: invalid option -- '%s'\n",
-                            argv[0], r);
-                    exit(1);
-                }
-                if (!strcmp(popt->name, r + 1))
-                    break;
-                popt++;
-            }
-            if (popt->flags & HAS_ARG) {
-                if (optind >= argc) {
-                    fprintf(stderr, "%s: option '%s' requires an argument\n",
-                            argv[0], r);
-                    exit(1);
-                }
-                optarg = argv[optind++];
-            } else {
-                optarg = NULL;
-            }
-
-            switch(popt->index) {
-            case QEMU_OPTION_M:
-                machine = find_machine(optarg);
-                if (!machine) {
-                    QEMUMachine *m;
-                    printf("Supported machines are:\n");
-                    for(m = first_machine; m != NULL; m = m->next) {
-                        printf("%-10s %s%s\n",
-                               m->name, m->desc,
-                               m->is_default ? " (default)" : "");
-                    }
-                    exit(*optarg != '?');
-                }
-                break;
-            case QEMU_OPTION_cpu:
-                /* hw initialization will check this */
-                if (*optarg == '?') {
-/* XXX: implement xxx_cpu_list for targets that still miss it */
-#if defined(cpu_list)
-                    cpu_list(stdout, &fprintf);
-#endif
-                    exit(0);
-                } else {
-                    cpu_model = optarg;
-                }
-                break;
-            case QEMU_OPTION_initrd:
-                initrd_filename = optarg;
-                break;
-            case QEMU_OPTION_hda:
-                if (cyls == 0)
-                    hda_index = drive_add(optarg, HD_ALIAS, 0);
-                else
-                    hda_index = drive_add(optarg, HD_ALIAS
-			     ",cyls=%d,heads=%d,secs=%d%s",
-                             0, cyls, heads, secs,
-                             translation == BIOS_ATA_TRANSLATION_LBA ?
-                                 ",trans=lba" :
-                             translation == BIOS_ATA_TRANSLATION_NONE ?
-                                 ",trans=none" : "");
-                 break;
-            case QEMU_OPTION_hdb:
-            case QEMU_OPTION_hdc:
-            case QEMU_OPTION_hdd:
-                drive_add(optarg, HD_ALIAS, popt->index - QEMU_OPTION_hda);
-                break;
-            case QEMU_OPTION_drive:
-                drive_add(NULL, "%s", optarg);
-	        break;
-            case QEMU_OPTION_mtdblock:
-                drive_add(optarg, MTD_ALIAS);
-                break;
-            case QEMU_OPTION_sd:
-                drive_add(optarg, SD_ALIAS);
-                break;
-            case QEMU_OPTION_pflash:
-                drive_add(optarg, PFLASH_ALIAS);
-                break;
-            case QEMU_OPTION_snapshot:
-                snapshot = 1;
-                break;
-            case QEMU_OPTION_hdachs:
-                {
-                    const char *p;
-                    p = optarg;
-                    cyls = strtol(p, (char **)&p, 0);
-                    if (cyls < 1 || cyls > 16383)
-                        goto chs_fail;
-                    if (*p != ',')
-                        goto chs_fail;
-                    p++;
-                    heads = strtol(p, (char **)&p, 0);
-                    if (heads < 1 || heads > 16)
-                        goto chs_fail;
-                    if (*p != ',')
-                        goto chs_fail;
-                    p++;
-                    secs = strtol(p, (char **)&p, 0);
-                    if (secs < 1 || secs > 63)
-                        goto chs_fail;
-                    if (*p == ',') {
-                        p++;
-                        if (!strcmp(p, "none"))
-                            translation = BIOS_ATA_TRANSLATION_NONE;
-                        else if (!strcmp(p, "lba"))
-                            translation = BIOS_ATA_TRANSLATION_LBA;
-                        else if (!strcmp(p, "auto"))
-                            translation = BIOS_ATA_TRANSLATION_AUTO;
-                        else
-                            goto chs_fail;
-                    } else if (*p != '\0') {
-                    chs_fail:
-                        fprintf(stderr, "qemu: invalid physical CHS format\n");
-                        exit(1);
-                    }
-		    if (hda_index != -1)
-                        snprintf(drives_opt[hda_index].opt,
-                                 sizeof(drives_opt[hda_index].opt),
-                                 HD_ALIAS ",cyls=%d,heads=%d,secs=%d%s",
-                                 0, cyls, heads, secs,
-			         translation == BIOS_ATA_TRANSLATION_LBA ?
-			     	    ",trans=lba" :
-			         translation == BIOS_ATA_TRANSLATION_NONE ?
-			             ",trans=none" : "");
-                }
-                break;
-            case QEMU_OPTION_numa:
-                if (nb_numa_nodes >= MAX_NODES) {
-                    fprintf(stderr, "qemu: too many NUMA nodes\n");
-                    exit(1);
-                }
-                numa_add(optarg);
-                break;
-            case QEMU_OPTION_nographic:
-                display_type = DT_NOGRAPHIC;
-                break;
-#ifdef CONFIG_CURSES
-            case QEMU_OPTION_curses:
-                display_type = DT_CURSES;
-                break;
-#endif
-            case QEMU_OPTION_portrait:
-                graphic_rotate = 1;
-                break;
-            case QEMU_OPTION_kernel:
-                kernel_filename = optarg;
-                break;
-            case QEMU_OPTION_append:
-                kernel_cmdline = optarg;
-                break;
-            case QEMU_OPTION_cdrom:
-                drive_add(optarg, CDROM_ALIAS);
-                break;
-            case QEMU_OPTION_boot:
-                boot_devices = optarg;
-                /* We just do some generic consistency checks */
-                {
-                    /* Could easily be extended to 64 devices if needed */
-                    const char *p;
-
-                    boot_devices_bitmap = 0;
-                    for (p = boot_devices; *p != '\0'; p++) {
-                        /* Allowed boot devices are:
-                         * a b     : floppy disk drives
-                         * c ... f : IDE disk drives
-                         * g ... m : machine implementation dependant drives
-                         * n ... p : network devices
-                         * It's up to each machine implementation to check
-                         * if the given boot devices match the actual hardware
-                         * implementation and firmware features.
-                         */
-                        if (*p < 'a' || *p > 'q') {
-                            fprintf(stderr, "Invalid boot device '%c'\n", *p);
-                            exit(1);
-                        }
-                        if (boot_devices_bitmap & (1 << (*p - 'a'))) {
-                            fprintf(stderr,
-                                    "Boot device '%c' was given twice\n",*p);
-                            exit(1);
-                        }
-                        boot_devices_bitmap |= 1 << (*p - 'a');
-                    }
-                }
-                break;
-            case QEMU_OPTION_fda:
-            case QEMU_OPTION_fdb:
-                drive_add(optarg, FD_ALIAS, popt->index - QEMU_OPTION_fda);
-                break;
-#ifdef TARGET_I386
-            case QEMU_OPTION_no_fd_bootchk:
-                fd_bootchk = 0;
-                break;
-#endif
-            case QEMU_OPTION_net:
-                if (nb_net_clients >= MAX_NET_CLIENTS) {
-                    fprintf(stderr, "qemu: too many network clients\n");
-                    exit(1);
-                }
-                net_clients[nb_net_clients] = optarg;
-                nb_net_clients++;
-                break;
-#ifdef CONFIG_SLIRP
-            case QEMU_OPTION_tftp:
-		tftp_prefix = optarg;
-                break;
-            case QEMU_OPTION_bootp:
-                bootp_filename = optarg;
-                break;
-            case QEMU_OPTION_redir:
-                net_slirp_redir(NULL, optarg, NULL);
-                break;
-#endif
-            case QEMU_OPTION_bt:
-                if (nb_bt_opts >= MAX_BT_CMDLINE) {
-                    fprintf(stderr, "qemu: too many bluetooth options\n");
-                    exit(1);
-                }
-                bt_opts[nb_bt_opts++] = optarg;
-                break;
-#ifdef HAS_AUDIO
-            case QEMU_OPTION_audio_help:
-                AUD_help ();
-                exit (0);
-                break;
-            case QEMU_OPTION_soundhw:
-                select_soundhw (optarg);
-                break;
-#endif
-            case QEMU_OPTION_h:
-                help(0);
-                break;
-            case QEMU_OPTION_version:
-                version();
-                exit(0);
-                break;
-            case QEMU_OPTION_m: {
-                uint64_t value;
-                char *ptr;
-
-                value = strtoul(optarg, &ptr, 10);
-                switch (*ptr) {
-                case 0: case 'M': case 'm':
-                    value <<= 20;
-                    break;
-                case 'G': case 'g':
-                    value <<= 30;
-                    break;
-                default:
-                    fprintf(stderr, "qemu: invalid ram size: %s\n", optarg);
-                    exit(1);
-                }
-
-                /* On 32-bit hosts, QEMU is limited by virtual address space */
-                if (value > (2047 << 20) && HOST_LONG_BITS == 32) {
-                    fprintf(stderr, "qemu: at most 2047 MB RAM can be simulated\n");
-                    exit(1);
-                }
-                if (value != (uint64_t)(ram_addr_t)value) {
-                    fprintf(stderr, "qemu: ram size too large\n");
-                    exit(1);
-                }
-                ram_size = value;
-                break;
-            }
-            case QEMU_OPTION_d:
-                {
-                    int mask;
-                    const CPULogItem *item;
-
-                    mask = cpu_str_to_log_mask(optarg);
-                    if (!mask) {
-                        printf("Log items (comma separated):\n");
-                    for(item = cpu_log_items; item->mask != 0; item++) {
-                        printf("%-10s %s\n", item->name, item->help);
-                    }
-                    exit(1);
-                    }
-                    cpu_set_log(mask);
-                }
-                break;
-            case QEMU_OPTION_s:
-                gdbstub_dev = "tcp::" DEFAULT_GDBSTUB_PORT;
-                break;
-            case QEMU_OPTION_gdb:
-                gdbstub_dev = optarg;
-                break;
-            case QEMU_OPTION_L:
-                data_dir = optarg;
-                break;
-            case QEMU_OPTION_bios:
-                bios_name = optarg;
-                break;
-            case QEMU_OPTION_singlestep:
-                singlestep = 1;
-                break;
-            case QEMU_OPTION_S:
-                autostart = 0;
-                break;
-#ifndef _WIN32
-	    case QEMU_OPTION_k:
-		keyboard_layout = optarg;
-		break;
-#endif
-            case QEMU_OPTION_localtime:
-                rtc_utc = 0;
-                break;
-            case QEMU_OPTION_vga:
-                select_vgahw (optarg);
-                break;
-#if defined(TARGET_PPC) || defined(TARGET_SPARC)
-            case QEMU_OPTION_g:
-                {
-                    const char *p;
-                    int w, h, depth;
-                    p = optarg;
-                    w = strtol(p, (char **)&p, 10);
-                    if (w <= 0) {
-                    graphic_error:
-                        fprintf(stderr, "qemu: invalid resolution or depth\n");
-                        exit(1);
-                    }
-                    if (*p != 'x')
-                        goto graphic_error;
-                    p++;
-                    h = strtol(p, (char **)&p, 10);
-                    if (h <= 0)
-                        goto graphic_error;
-                    if (*p == 'x') {
-                        p++;
-                        depth = strtol(p, (char **)&p, 10);
-                        if (depth != 8 && depth != 15 && depth != 16 &&
-                            depth != 24 && depth != 32)
-                            goto graphic_error;
-                    } else if (*p == '\0') {
-                        depth = graphic_depth;
-                    } else {
-                        goto graphic_error;
-                    }
-
-                    graphic_width = w;
-                    graphic_height = h;
-                    graphic_depth = depth;
-                }
-                break;
-#endif
-            case QEMU_OPTION_echr:
-                {
-                    char *r;
-                    term_escape_char = strtol(optarg, &r, 0);
-                    if (r == optarg)
-                        printf("Bad argument to echr\n");
-                    break;
-                }
-            case QEMU_OPTION_monitor:
-                monitor_device = optarg;
-                break;
-            case QEMU_OPTION_serial:
-                if (serial_device_index >= MAX_SERIAL_PORTS) {
-                    fprintf(stderr, "qemu: too many serial ports\n");
-                    exit(1);
-                }
-                serial_devices[serial_device_index] = optarg;
-                serial_device_index++;
-                break;
-            case QEMU_OPTION_watchdog:
-                i = select_watchdog(optarg);
-                if (i > 0)
-                    exit (i == 1 ? 1 : 0);
-                break;
-            case QEMU_OPTION_watchdog_action:
-                if (select_watchdog_action(optarg) == -1) {
-                    fprintf(stderr, "Unknown -watchdog-action parameter\n");
-                    exit(1);
-                }
-                break;
-            case QEMU_OPTION_virtiocon:
-                if (virtio_console_index >= MAX_VIRTIO_CONSOLES) {
-                    fprintf(stderr, "qemu: too many virtio consoles\n");
-                    exit(1);
-                }
-                virtio_consoles[virtio_console_index] = optarg;
-                virtio_console_index++;
-                break;
-            case QEMU_OPTION_parallel:
-                if (parallel_device_index >= MAX_PARALLEL_PORTS) {
-                    fprintf(stderr, "qemu: too many parallel ports\n");
-                    exit(1);
-                }
-                parallel_devices[parallel_device_index] = optarg;
-                parallel_device_index++;
-                break;
-	    case QEMU_OPTION_loadvm:
-		loadvm = optarg;
-		break;
-            case QEMU_OPTION_full_screen:
-                full_screen = 1;
-                break;
-#ifdef CONFIG_SDL
-            case QEMU_OPTION_no_frame:
-                no_frame = 1;
-                break;
-            case QEMU_OPTION_alt_grab:
-                alt_grab = 1;
-                break;
-            case QEMU_OPTION_no_quit:
-                no_quit = 1;
-                break;
-            case QEMU_OPTION_sdl:
-                display_type = DT_SDL;
-                break;
-#endif
-            case QEMU_OPTION_pidfile:
-                pid_file = optarg;
-                break;
-#ifdef TARGET_I386
-            case QEMU_OPTION_win2k_hack:
-                win2k_install_hack = 1;
-                break;
-            case QEMU_OPTION_rtc_td_hack:
-                rtc_td_hack = 1;
-                break;
-            case QEMU_OPTION_acpitable:
-                if(acpi_table_add(optarg) < 0) {
-                    fprintf(stderr, "Wrong acpi table provided\n");
-                    exit(1);
-                }
-                break;
-            case QEMU_OPTION_smbios:
-                if(smbios_entry_add(optarg) < 0) {
-                    fprintf(stderr, "Wrong smbios provided\n");
-                    exit(1);
-                }
-                break;
-#endif
-#ifdef CONFIG_KVM
-            case QEMU_OPTION_enable_kvm:
-                kvm_allowed = 1;
-                break;
-#endif
-            case QEMU_OPTION_usb:
-                usb_enabled = 1;
-                break;
-            case QEMU_OPTION_usbdevice:
-                usb_enabled = 1;
-                if (usb_devices_index >= MAX_USB_CMDLINE) {
-                    fprintf(stderr, "Too many USB devices\n");
-                    exit(1);
-                }
-                usb_devices[usb_devices_index] = optarg;
-                usb_devices_index++;
-                break;
-            case QEMU_OPTION_smp:
-                smp_cpus = atoi(optarg);
-                if (smp_cpus < 1) {
-                    fprintf(stderr, "Invalid number of CPUs\n");
-                    exit(1);
-                }
-                break;
-	    case QEMU_OPTION_vnc:
-                display_type = DT_VNC;
-		vnc_display = optarg;
-		break;
-#ifdef TARGET_I386
-            case QEMU_OPTION_no_acpi:
-                acpi_enabled = 0;
-                break;
-            case QEMU_OPTION_no_hpet:
-                no_hpet = 1;
-                break;
-            case QEMU_OPTION_no_virtio_balloon:
-                no_virtio_balloon = 1;
-                break;
-#endif
-            case QEMU_OPTION_no_reboot:
-                no_reboot = 1;
-                break;
-            case QEMU_OPTION_no_shutdown:
-                no_shutdown = 1;
-                break;
-            case QEMU_OPTION_show_cursor:
-                cursor_hide = 0;
-                break;
-            case QEMU_OPTION_uuid:
-                if(qemu_uuid_parse(optarg, qemu_uuid) < 0) {
-                    fprintf(stderr, "Fail to parse UUID string."
-                            " Wrong format.\n");
-                    exit(1);
-                }
-                break;
-	    case QEMU_OPTION_option_rom:
-		if (nb_option_roms >= MAX_OPTION_ROMS) {
-		    fprintf(stderr, "Too many option ROMs\n");
-		    exit(1);
-		}
-		option_rom[nb_option_roms] = optarg;
-		nb_option_roms++;
-		break;
-#if defined(TARGET_ARM) || defined(TARGET_M68K)
-            case QEMU_OPTION_semihosting:
-                semihosting_enabled = 1;
-                break;
-#endif
-            case QEMU_OPTION_name:
-                qemu_name = optarg;
-                break;
-#if defined(TARGET_SPARC) || defined(TARGET_PPC)
-            case QEMU_OPTION_prom_env:
-                if (nb_prom_envs >= MAX_PROM_ENVS) {
-                    fprintf(stderr, "Too many prom variables\n");
-                    exit(1);
-                }
-                prom_envs[nb_prom_envs] = optarg;
-                nb_prom_envs++;
-                break;
-#endif
-#ifdef TARGET_ARM
-            case QEMU_OPTION_old_param:
-                old_param = 1;
-                break;
-#endif
-            case QEMU_OPTION_clock:
-                configure_alarms(optarg);
-                break;
-            case QEMU_OPTION_startdate:
-                {
-                    struct tm tm;
-                    time_t rtc_start_date = 0;
-                    if (!strcmp(optarg, "now")) {
-                        rtc_date_offset = -1;
-                    } else {
-                        if (sscanf(optarg, "%d-%d-%dT%d:%d:%d",
-                               &tm.tm_year,
-                               &tm.tm_mon,
-                               &tm.tm_mday,
-                               &tm.tm_hour,
-                               &tm.tm_min,
-                               &tm.tm_sec) == 6) {
-                            /* OK */
-                        } else if (sscanf(optarg, "%d-%d-%d",
-                                          &tm.tm_year,
-                                          &tm.tm_mon,
-                                          &tm.tm_mday) == 3) {
-                            tm.tm_hour = 0;
-                            tm.tm_min = 0;
-                            tm.tm_sec = 0;
-                        } else {
-                            goto date_fail;
-                        }
-                        tm.tm_year -= 1900;
-                        tm.tm_mon--;
-                        rtc_start_date = mktimegm(&tm);
-                        if (rtc_start_date == -1) {
-                        date_fail:
-                            fprintf(stderr, "Invalid date format. Valid format are:\n"
-                                    "'now' or '2006-06-17T16:01:21' or '2006-06-17'\n");
-                            exit(1);
-                        }
-                        rtc_date_offset = time(NULL) - rtc_start_date;
-                    }
-                }
-                break;
-            case QEMU_OPTION_tb_size:
-                tb_size = strtol(optarg, NULL, 0);
-                if (tb_size < 0)
-                    tb_size = 0;
-                break;
-            case QEMU_OPTION_icount:
-                icount_option = optarg;
-                break;
-            case QEMU_OPTION_incoming:
-                incoming = optarg;
-                break;
-#ifdef CONFIG_XEN
-            case QEMU_OPTION_xen_domid:
-                xen_domid = atoi(optarg);
-                break;
-            case QEMU_OPTION_xen_create:
-                xen_mode = XEN_CREATE;
-                break;
-            case QEMU_OPTION_xen_attach:
-                xen_mode = XEN_ATTACH;
-                break;
-#endif
-            }
-        }
-    }
-
-    /* If no data_dir is specified then try to find it relative to the
-       executable path.  */
-    if (!data_dir) {
-        data_dir = find_datadir(argv[0]);
-    }
-    /* If all else fails use the install patch specified when building.  */
-    if (!data_dir) {
-        data_dir = CONFIG_QEMU_SHAREDIR;
-    }
-
-    if (pid_file && qemu_create_pidfile(pid_file) != 0) {
-        os_pidfile_error();
-        exit(1);
-    }
-
-    machine->max_cpus = machine->max_cpus ?: 1; /* Default to UP */
-    if (smp_cpus > machine->max_cpus) {
-        fprintf(stderr, "Number of SMP cpus requested (%d), exceeds max cpus "
-                "supported by machine `%s' (%d)\n", smp_cpus,  machine->name,
-                machine->max_cpus);
-        exit(1);
-    }
-
-    if (display_type == DT_NOGRAPHIC) {
-       if (serial_device_index == 0)
-           serial_devices[0] = "stdio";
-       if (parallel_device_index == 0)
-           parallel_devices[0] = "null";
-       if (strncmp(monitor_device, "vc", 2) == 0)
-           monitor_device = "stdio";
-    }
-
-    if (qemu_init_main_loop()) {
-        fprintf(stderr, "qemu_init_main_loop failed\n");
-        exit(1);
-    }
-    linux_boot = (kernel_filename != NULL);
-    net_boot = (boot_devices_bitmap >> ('n' - 'a')) & 0xF;
-
-    if (!linux_boot && *kernel_cmdline != '\0') {
-        fprintf(stderr, "-append only allowed with -kernel option\n");
-        exit(1);
-    }
-
-    if (!linux_boot && initrd_filename != NULL) {
-        fprintf(stderr, "-initrd only allowed with -kernel option\n");
-        exit(1);
-    }
-
-    /* boot to floppy or the default cd if no hard disk defined yet */
-    if (!boot_devices[0]) {
-        boot_devices = "cad";
-    }
-    os_set_line_buffering();
-
-    if (init_timer_alarm() < 0) {
-        fprintf(stderr, "could not initialize alarm timer\n");
-        exit(1);
-    }
-    configure_icount(icount_option);
-
-#ifdef _WIN32
-    socket_init();
-#endif
-
-    /* init network clients */
-    if (nb_net_clients == 0) {
-        /* if no clients, we use a default config */
-        net_clients[nb_net_clients++] = "nic";
-#ifdef CONFIG_SLIRP
-        net_clients[nb_net_clients++] = "user";
-#endif
-    }
-
-    for(i = 0;i < nb_net_clients; i++) {
-        if (net_client_parse(net_clients[i]) < 0)
-            exit(1);
-    }
-    net_client_check();
-
-#ifdef TARGET_I386
-    /* XXX: this should be moved in the PC machine instantiation code */
-    if (net_boot != 0) {
-        int netroms = 0;
-	for (i = 0; i < nb_nics && i < 4; i++) {
-	    const char *model = nd_table[i].model;
-	    char buf[1024];
-            char *filename;
-            if (net_boot & (1 << i)) {
-                if (model == NULL)
-                    model = "ne2k_pci";
-                snprintf(buf, sizeof(buf), "pxe-%s.bin", model);
-                filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, buf);
-                if (filename && get_image_size(filename) > 0) {
-                    if (nb_option_roms >= MAX_OPTION_ROMS) {
-                        fprintf(stderr, "Too many option ROMs\n");
-                        exit(1);
-                    }
-                    option_rom[nb_option_roms] = g_strdup(buf);
-                    nb_option_roms++;
-                    netroms++;
-                }
-                if (filename) {
-                    g_free(filename);
-                }
-            }
-	}
-	if (netroms == 0) {
-	    fprintf(stderr, "No valid PXE rom found for network device\n");
-	    exit(1);
-	}
-    }
-#endif
-
-    /* init the bluetooth world */
-    for (i = 0; i < nb_bt_opts; i++)
-        if (bt_parse(bt_opts[i]))
-            exit(1);
-
-    /* init the memory */
-    if (ram_size == 0)
-        ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
-
-    /* init the dynamic translator */
-    cpu_exec_init_all(tb_size * 1024 * 1024);
-
-    bdrv_init();
-
-    /* we always create the cdrom drive, even if no disk is there */
-
-    if (nb_drives_opt < MAX_DRIVES)
-        drive_add(NULL, CDROM_ALIAS);
-
-    /* we always create at least one floppy */
-
-    if (nb_drives_opt < MAX_DRIVES)
-        drive_add(NULL, FD_ALIAS, 0);
-
-    /* we always create one sd slot, even if no card is in it */
-
-    if (nb_drives_opt < MAX_DRIVES) {
-        drive_add(NULL, SD_ALIAS);
-    }
-
-    /* open the virtual block devices */
-    if (snapshot)
-        qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot, NULL, 0);
-    if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func, &machine->use_scsi, 1) != 0)
-        exit(1);
-
-    //register_savevm("timer", 0, 2, timer_save, timer_load, NULL);
-    register_savevm_live("ram", 0, 3, ram_save_live, NULL, ram_load, NULL);
-
-    /* must be after terminal init, SDL library changes signal handlers */
-    os_setup_signal_handling();
-
-    /* Maintain compatibility with multiple stdio monitors */
-    if (!strcmp(monitor_device,"stdio")) {
-        for (i = 0; i < MAX_SERIAL_PORTS; i++) {
-            const char *devname = serial_devices[i];
-            if (devname && !strcmp(devname,"mon:stdio")) {
-                monitor_device = NULL;
-                break;
-            } else if (devname && !strcmp(devname,"stdio")) {
-                monitor_device = NULL;
-                serial_devices[i] = "mon:stdio";
-                break;
-            }
-        }
-    }
-
-    if (nb_numa_nodes > 0) {
-        int i;
-
-        if (nb_numa_nodes > smp_cpus) {
-            nb_numa_nodes = smp_cpus;
-        }
-
-        /* If no memory size if given for any node, assume the default case
-         * and distribute the available memory equally across all nodes
-         */
-        for (i = 0; i < nb_numa_nodes; i++) {
-            if (node_mem[i] != 0)
-                break;
-        }
-        if (i == nb_numa_nodes) {
-            uint64_t usedmem = 0;
-
-            /* On Linux, the each node's border has to be 8MB aligned,
-             * the final node gets the rest.
-             */
-            for (i = 0; i < nb_numa_nodes - 1; i++) {
-                node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1);
-                usedmem += node_mem[i];
-            }
-            node_mem[i] = ram_size - usedmem;
-        }
-
-        for (i = 0; i < nb_numa_nodes; i++) {
-            if (node_cpumask[i] != 0)
-                break;
-        }
-        /* assigning the VCPUs round-robin is easier to implement, guest OSes
-         * must cope with this anyway, because there are BIOSes out there in
-         * real machines which also use this scheme.
-         */
-        if (i == nb_numa_nodes) {
-            for (i = 0; i < smp_cpus; i++) {
-                node_cpumask[i % nb_numa_nodes] |= 1 << i;
-            }
-        }
-    }
-
-    if (kvm_enabled()) {
-        int ret;
-
-        ret = kvm_init(smp_cpus);
-        if (ret < 0) {
-            fprintf(stderr, "failed to initialize KVM\n");
-            exit(1);
-        }
-    }
-
-    if (monitor_device) {
-        monitor_hd = qemu_chr_open("monitor", monitor_device, NULL);
-        if (!monitor_hd) {
-            fprintf(stderr, "qemu: could not open monitor device '%s'\n", monitor_device);
-            exit(1);
-        }
-    }
-
-    for(i = 0; i < MAX_SERIAL_PORTS; i++) {
-        const char *devname = serial_devices[i];
-        if (devname && strcmp(devname, "none")) {
-            char label[32];
-            snprintf(label, sizeof(label), "serial%d", i);
-            serial_hds[i] = qemu_chr_open(label, devname, NULL);
-            if (!serial_hds[i]) {
-                fprintf(stderr, "qemu: could not open serial device '%s'\n",
-                        devname);
-                exit(1);
-            }
-        }
-    }
-
-    for(i = 0; i < MAX_PARALLEL_PORTS; i++) {
-        const char *devname = parallel_devices[i];
-        if (devname && strcmp(devname, "none")) {
-            char label[32];
-            snprintf(label, sizeof(label), "parallel%d", i);
-            parallel_hds[i] = qemu_chr_open(label, devname, NULL);
-            if (!parallel_hds[i]) {
-                fprintf(stderr, "qemu: could not open parallel device '%s'\n",
-                        devname);
-                exit(1);
-            }
-        }
-    }
-
-    for(i = 0; i < MAX_VIRTIO_CONSOLES; i++) {
-        const char *devname = virtio_consoles[i];
-        if (devname && strcmp(devname, "none")) {
-            char label[32];
-            snprintf(label, sizeof(label), "virtcon%d", i);
-            virtcon_hds[i] = qemu_chr_open(label, devname, NULL);
-            if (!virtcon_hds[i]) {
-                fprintf(stderr, "qemu: could not open virtio console '%s'\n",
-                        devname);
-                exit(1);
-            }
-        }
-    }
-
-    module_call_init(MODULE_INIT_DEVICE);
-
-    machine->init(ram_size, boot_devices,
-                  kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
-
-
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        for (i = 0; i < nb_numa_nodes; i++) {
-            if (node_cpumask[i] & (1 << env->cpu_index)) {
-                env->numa_node = i;
-            }
-        }
-    }
-
-    current_machine = machine;
-
-    /* Set KVM's vcpu state to qemu's initial CPUOldState. */
-    if (kvm_enabled()) {
-        int ret;
-
-        ret = kvm_sync_vcpus();
-        if (ret < 0) {
-            fprintf(stderr, "failed to initialize vcpus\n");
-            exit(1);
-        }
-    }
-
-    /* init USB devices */
-    if (usb_enabled) {
-        for(i = 0; i < usb_devices_index; i++) {
-            if (usb_device_add(usb_devices[i], 0) < 0) {
-                fprintf(stderr, "Warning: could not add USB device %s\n",
-                        usb_devices[i]);
-            }
-        }
-    }
-
-    if (!display_state)
-        dumb_display_init();
-    /* just use the first displaystate for the moment */
-    ds = display_state;
-
-    if (display_type == DT_DEFAULT) {
-#if defined(CONFIG_SDL) || defined(CONFIG_COCOA)
-        display_type = DT_SDL;
-#else
-        display_type = DT_VNC;
-        vnc_display = "localhost:0,to=99";
-        show_vnc_port = 1;
-#endif
-    }
-
-
-    switch (display_type) {
-    case DT_NOGRAPHIC:
-        break;
-#if defined(CONFIG_CURSES)
-    case DT_CURSES:
-        curses_display_init(ds, full_screen);
-        break;
-#endif
-#if defined(CONFIG_SDL)
-    case DT_SDL:
-        sdl_display_init(ds, full_screen, no_frame);
-        break;
-#elif defined(CONFIG_COCOA)
-    case DT_SDL:
-        cocoa_display_init(ds, full_screen);
-        break;
-#endif
-    case DT_VNC:
-        vnc_display_init(ds);
-        if (vnc_display_open(ds, vnc_display) < 0)
-            exit(1);
-
-        if (show_vnc_port) {
-            printf("VNC server running on `%s'\n", vnc_display_local_addr(ds));
-        }
-        break;
-    default:
-        break;
-    }
-    dpy_resize(ds);
-
-    dcl = ds->listeners;
-    while (dcl != NULL) {
-        if (dcl->dpy_refresh != NULL) {
-            ds->gui_timer = timer_new(QEMU_CLOCK_REALTIME, SCALE_MS, gui_update, ds);
-            timer_mod(ds->gui_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
-        }
-        dcl = dcl->next;
-    }
-
-    if (display_type == DT_NOGRAPHIC || display_type == DT_VNC) {
-        nographic_timer = timer_new(QEMU_CLOCK_REALTIME, SCALE_MS, nographic_update, NULL);
-        timer_mod(nographic_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
-    }
-
-    text_consoles_set_display(display_state);
-    qemu_chr_initial_reset();
-
-    if (monitor_device && monitor_hd)
-        monitor_init(monitor_hd, MONITOR_USE_READLINE | MONITOR_IS_DEFAULT);
-
-    for(i = 0; i < MAX_SERIAL_PORTS; i++) {
-        const char *devname = serial_devices[i];
-        if (devname && strcmp(devname, "none")) {
-            if (strstart(devname, "vc", 0))
-                qemu_chr_printf(serial_hds[i], "serial%d console\r\n", i);
-        }
-    }
-
-    for(i = 0; i < MAX_PARALLEL_PORTS; i++) {
-        const char *devname = parallel_devices[i];
-        if (devname && strcmp(devname, "none")) {
-            if (strstart(devname, "vc", 0))
-                qemu_chr_printf(parallel_hds[i], "parallel%d console\r\n", i);
-        }
-    }
-
-    for(i = 0; i < MAX_VIRTIO_CONSOLES; i++) {
-        const char *devname = virtio_consoles[i];
-        if (virtcon_hds[i] && devname) {
-            if (strstart(devname, "vc", 0))
-                qemu_chr_printf(virtcon_hds[i], "virtio console%d\r\n", i);
-        }
-    }
-
-    if (gdbstub_dev && gdbserver_start(gdbstub_dev) < 0) {
-        fprintf(stderr, "qemu: could not open gdbserver on device '%s'\n",
-                gdbstub_dev);
-        exit(1);
-    }
-
-    if (loadvm)
-        do_loadvm(cur_mon, loadvm);
-
-    if (incoming) {
-        autostart = 0; /* fixme how to deal with -daemonize */
-        qemu_start_incoming_migration(incoming);
-    }
-
-    if (autostart)
-        vm_start();
-
-    os_setup_post();
-
-    main_loop();
-    quit_timers();
-    net_cleanup();
-
-    return 0;
-}