Add HAX emulation support.

This patch adds support for full virtualization of x86 and x86_64
target platforms on Windows and OS X through Intel's HAXM.

To enable this when using qemu-upstream-i386 or qemu-upstream-x86_64,
use -enable-hax on the command-line.

TESTS=Works fine on Windows, OS X still needs testing.
diff --git a/Makefile.target b/Makefile.target
index a440bcb..9bd4c2e 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -149,6 +149,17 @@
 obj-$(call lnot,$(CONFIG_XEN)) += xen-common-stub.o
 obj-$(call lnot,$(CONFIG_XEN_I386)) += xen-hvm-stub.o
 
+# HAX support, only when targetting i386 or x86_64
+ifeq (y,$(CONFIG_HAX))
+ifneq (,$(filter i386 x86_64,$(TARGET_NAME))
+obj-y += target-i386/hax-all.o target-i386/hax-slot.o
+obj-$(CONFIG_WIN32) += target-i386/hax-windows.o
+obj-$(CONFIG_DARWIN) += target-i386/hax-darwin.o
+else
+obj-y += hax-stub.o
+endif
+endif # CONFIG_HAX
+
 # Hardware support
 ifeq ($(TARGET_NAME), sparc64)
 obj-y += hw/sparc64/
diff --git a/android-qemu2-glue/build/Makefile.qemu2-target.mk b/android-qemu2-glue/build/Makefile.qemu2-target.mk
index 2edc3aa..2c45668 100644
--- a/android-qemu2-glue/build/Makefile.qemu2-target.mk
+++ b/android-qemu2-glue/build/Makefile.qemu2-target.mk
@@ -124,6 +124,25 @@
         stubs/vhost.c \
         ) \
 
+# HAX support.
+HAX_COMMON_SOURCES := \
+    target-i386/hax-all.c \
+    target-i386/hax-slot.c \
+
+LOCAL_SRC_FILES += \
+    $(call qemu2-if-target,x86 x86_64, \
+        $(call qemu2-if-windows, \
+            $(HAX_COMMON_SOURCES) \
+            target-i386/hax-windows.c) \
+        $(call qemu2-if-darwin, \
+            $(HAX_COMMON_SOURCES) \
+            target-i386/hax-darwin.c) \
+	$(call qemu2-if-linux, \
+	    hax-stub.c) \
+    , \
+        hax-stub.c \
+    ) \
+
 LOCAL_PREBUILTS_OBJ_FILES += \
     $(call qemu2-if-windows,$(QEMU2_AUTO_GENERATED_DIR)/version.o)
 
diff --git a/arch_init.c b/arch_init.c
index 3ea51ab..1d09f32 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -285,6 +285,15 @@
 #endif
 }
 
+int hax_available(void)
+{
+#ifdef CONFIG_HAX
+    return 1;
+#else
+    return 0;
+#endif
+}
+
 int xen_available(void)
 {
 #ifdef CONFIG_XEN
@@ -294,7 +303,6 @@
 #endif
 }
 
-
 TargetInfo *qmp_query_target(Error **errp)
 {
     TargetInfo *info = g_malloc0(sizeof(*info));
diff --git a/configure b/configure
index e21943f..890c14c 100755
--- a/configure
+++ b/configure
@@ -230,6 +230,7 @@
 vhost_net="no"
 vhost_scsi="no"
 kvm="no"
+hax="no"
 rdma=""
 gprof="no"
 debug_tcg="no"
@@ -606,6 +607,7 @@
 Darwin)
   bsd="yes"
   darwin="yes"
+  hax="yes"
   LDFLAGS_SHARED="-bundle -undefined dynamic_lookup"
   if [ "$cpu" = "x86_64" ] ; then
     QEMU_CFLAGS="-arch x86_64 $QEMU_CFLAGS"
@@ -918,6 +920,10 @@
   ;;
   --enable-kvm) kvm="yes"
   ;;
+  --disable-hax) hax="no"
+  ;;
+  --enable-hax) hax="yes"
+  ;;
   --disable-tcg-interpreter) tcg_interpreter="no"
   ;;
   --enable-tcg-interpreter) tcg_interpreter="yes"
@@ -1351,6 +1357,7 @@
   fdt             fdt device tree
   bluez           bluez stack connectivity
   kvm             KVM acceleration support
+  hax             HAX acceleration support
   rdma            RDMA-based migration support
   uuid            uuid support
   vde             support for vde network
@@ -4864,6 +4871,7 @@
 echo "ATTR/XATTR support $attr"
 echo "Install blobs     $blobs"
 echo "KVM support       $kvm"
+echo "HAX support       $hax"
 echo "RDMA support      $rdma"
 echo "TCG interpreter   $tcg_interpreter"
 echo "fdt support       $fdt"
@@ -5811,6 +5819,15 @@
       fi
     fi
 esac
+if test "$hax" = "yes" ; then
+  if test "$target_softmmu" = "yes" ; then
+    case "$target_name" in
+    i386|x86_64)
+      echo "CONFIG_HAX=y" >> $config_target_mak
+    ;;
+    esac
+  fi
+fi
 if test "$target_bigendian" = "yes" ; then
   echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
 fi
diff --git a/cpu-exec.c b/cpu-exec.c
index 5d9710a..b211cd9 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -23,6 +23,9 @@
 #include "exec/exec-all.h"
 #include "tcg.h"
 #include "qemu/atomic.h"
+#ifdef CONFIG_HAX
+#include "sysemu/hax.h"
+#endif /* CONFIG_HAX */
 #include "sysemu/qtest.h"
 #include "qemu/timer.h"
 #include "exec/address-spaces.h"
@@ -447,11 +450,27 @@
     return false;
 }
 
+static inline int cpu_get_interrupt_request(CPUState *cpu)
+{
+#ifdef CONFIG_HAX
+    /* When HAX is enabled, there are two cases where TCG emulation might happen:
+     * MMIO instructions, or non-paged mode. When this is due to an MMIO, the interrupt
+     * should not be emulated because only one instruction will be translated and run
+     * through TCG before returning to the HAX kernel.
+     */
+    if (hax_enabled() && !hax_vcpu_emulation_mode(cpu)) {
+        /* Mask interrupt during MMIO emulation. */
+        return 0;
+    }
+#endif
+    return cpu->interrupt_request;
+}
+
 static inline void cpu_handle_interrupt(CPUState *cpu,
                                         TranslationBlock **last_tb)
 {
     CPUClass *cc = CPU_GET_CLASS(cpu);
-    int interrupt_request = cpu->interrupt_request;
+    int interrupt_request = cpu_get_interrupt_request(cpu);
 
     if (unlikely(interrupt_request)) {
         if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
@@ -618,11 +637,35 @@
                 break;
             }
 
+#ifdef CONFIG_HAX
+            /* When HAX is enabled but VMX "unrestricted guest" mode is not
+             * supported, call hax_vcpu_exec() to run the current instructions.
+             * The function returns 1 when execution should stop immediately
+             * (e.g. if the vCPU is halted, or received an interrupt). However,
+             * it will return 0 to indicate that the next instructions need to
+             * be handled through TCG. This happens when the virtual CPU runs
+             * in "real mode", or to handle MMIO operations only. */
+            if (hax_enabled() && !hax_vcpu_exec(cpu)) {
+                break;
+            }
+#endif /* CONFIG_HAX */
+
             cpu->tb_flushed = false; /* reset before first TB lookup */
             for(;;) {
                 cpu_handle_interrupt(cpu, &last_tb);
                 tb = tb_find_fast(cpu, &last_tb, tb_exit);
                 cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
+#ifdef CONFIG_HAX
+                if (hax_enabled() && hax_stop_emulation(cpu)) {
+                    /* This will end TCG emulation of instructions if the vCPU
+                     * just switched to paged-mode (which can be handled by
+                     * hax_vcpu_exec() in the next call to this function), or
+                     * if the single-instruction MMIO operation has completed.
+                     * (see target-i386/translate.c).
+                     */ 
+                    cpu_loop_exit(cpu);
+                }
+#endif /* CONFIG_HAX */
                 /* Try to align the host and virtual clocks
                    if the guest is in advance */
                 align_clocks(&sc, cpu);
diff --git a/cpus.c b/cpus.c
index 84c3520..a2be482 100644
--- a/cpus.c
+++ b/cpus.c
@@ -34,6 +34,7 @@
 #include "exec/gdbstub.h"
 #include "sysemu/dma.h"
 #include "sysemu/kvm.h"
+#include "sysemu/hax.h"
 #include "qmp-commands.h"
 #include "exec/exec-all.h"
 
@@ -711,6 +712,11 @@
 
     CPU_FOREACH(cpu) {
         cpu_synchronize_state(cpu);
+#ifdef CONFIG_HAX
+        if (hax_enabled() && hax_ug_platform()) {
+            hax_cpu_synchronize_state(cpu);
+        }
+#endif
     }
 }
 
@@ -720,6 +726,10 @@
 
     CPU_FOREACH(cpu) {
         cpu_synchronize_post_reset(cpu);
+#ifdef CONFIG_HAX
+        if (hax_enabled() && hax_ug_platform())
+            hax_cpu_synchronize_post_reset(cpu);
+#endif
     }
 }
 
@@ -729,6 +739,10 @@
 
     CPU_FOREACH(cpu) {
         cpu_synchronize_post_init(cpu);
+#ifdef CONFIG_HAX
+        if (hax_enabled() && hax_ug_platform())
+            hax_cpu_synchronize_post_init(cpu);
+#endif
     }
 }
 
@@ -1038,6 +1052,16 @@
     }
 }
 
+#ifdef CONFIG_HAX
+static void qemu_hax_wait_io_event(CPUState *cpu)
+{
+    while (cpu_thread_is_idle(cpu)) {
+        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
+    }
+    qemu_wait_io_event_common(cpu);
+}
+#endif /* CONFIG_HAX */
+
 static void qemu_kvm_wait_io_event(CPUState *cpu)
 {
     while (cpu_thread_is_idle(cpu)) {
@@ -1096,6 +1120,7 @@
     fprintf(stderr, "qtest is not supported under Windows\n");
     exit(1);
 #else
+
     CPUState *cpu = arg;
     sigset_t waitset;
     int r;
@@ -1195,6 +1220,51 @@
     return NULL;
 }
 
+#ifdef CONFIG_HAX
+/* The HAX-specific vCPU thread function. This one should only run when the host
+ * CPU supports the VMX "unrestricted guest" feature. */
+static void *qemu_hax_cpu_thread_fn(void *arg)
+{
+    CPUState *cpu = arg;
+    int r;
+
+    assert(hax_enabled() && hax_ug_platform());
+
+    rcu_register_thread();
+
+    qemu_mutex_lock(&qemu_global_mutex);
+    qemu_thread_get_self(cpu->thread);
+
+    cpu->thread_id = qemu_get_thread_id();
+    cpu->can_do_io = 1;
+//     cpu->created = true;
+//     cpu->halted = 0;
+    current_cpu = cpu;
+
+    hax_init_vcpu(cpu);
+
+    /* signal CPU creation */
+    cpu->created = true;
+    qemu_cond_signal(&qemu_cpu_cond);
+
+    do {
+        if (cpu_can_run(cpu)) {
+            r = hax_smp_cpu_exec(cpu);
+            if (r == EXCP_DEBUG) {
+                cpu_handle_guest_debug(cpu);
+            }
+        }
+        qemu_hax_wait_io_event(cpu);
+    } while (!cpu->unplug || cpu_can_run(cpu));
+
+    hax_vcpu_destroy(cpu);
+    cpu->created = false;
+    qemu_cond_signal(&qemu_cpu_cond);
+    qemu_mutex_unlock_iothread();
+    return NULL;
+}
+#endif /* CONFIG_HAX */
+
 static void qemu_cpu_kick_thread(CPUState *cpu)
 {
 #ifndef _WIN32
@@ -1209,9 +1279,49 @@
         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
         exit(1);
     }
+#ifdef __APPLE__
+    // On OS X, the signal isn't caught reliably during shutdown.
+    if (!atomic_mb_read(&exit_request)) {
+        cpu_exit(cpu);
+        atomic_mb_set(&exit_request, 1);
+    }
+#endif /* __APPLE__ */
+#ifdef CONFIG_HAX
+    if (hax_enabled() && hax_ug_platform()) {
+        cpu_exit(cpu);
+    }
+#endif /* CONFIG_HAX */
 #else /* _WIN32 */
-    abort();
-#endif
+    if (cpu->thread_kicked) {
+        return;
+    }
+    cpu->thread_kicked = true;
+    if (!qemu_cpu_is_self(cpu)) {
+        CONTEXT tcgContext;
+
+        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
+            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
+                    GetLastError());
+            exit(1);
+        }
+
+        /* On multi-core systems, we are not sure that the thread is actually
+         * suspended until we can get the context. */
+        tcgContext.ContextFlags = CONTEXT_CONTROL;
+        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
+            continue;
+        }
+
+        cpu_exit(cpu);
+        atomic_mb_set(&exit_request, 1);
+
+        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
+            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
+                    GetLastError());
+            exit(1);
+        }
+    }
+#endif /* _WIN32 */
 }
 
 static void qemu_cpu_kick_no_halt(void)
@@ -1230,7 +1340,21 @@
 void qemu_cpu_kick(CPUState *cpu)
 {
     qemu_cond_broadcast(cpu->halt_cond);
+    /* There are three cases to consider here:
+     *
+     * - TCG is being used without HAX, then qemu_cpu_kick_no_halt() can be
+     *   called directly.
+     * 
+     * - TCG is being used with HAX, then kicking the thread with a signal (on Posix)
+     *   or with a thread suspend/resume (on Win32) is still needed.
+     * 
+     * - TCG is not being used, kick the thread with a signal or suspend/resume.
+     */
+#ifdef CONFIG_HAX
+    if (tcg_enabled() && !(hax_enabled() && hax_ug_platform())) {
+#else
     if (tcg_enabled()) {
+#endif
         qemu_cpu_kick_no_halt();
     } else {
         qemu_cpu_kick_thread(cpu);
@@ -1262,11 +1386,52 @@
 
 void qemu_mutex_lock_iothread(void)
 {
+    /* Technical note on what's going on here, because it's really subtle :-)
+     * 
+     * The single TCG vCPU thread always holds the global mutex when executing
+     * instructions, and only releases it very briefly in qemu_tcg_wait_io_event(),
+     * which gets called periodically to process interrupts.
+     *
+     * Under heavy guest CPU load, it will be hard for other threads to acquire
+     * the lock due to this. To counter that, several things are implemented here:
+     * 
+     * - First, |iothread_requesting_mutex| is used as a global atomic counter that
+     *   will be > 0 whenever other threads are trying to acquire the lock. It is
+     *   actually read by qemu_tcg_wait_io_event() to force the vCPU thread to
+     *   release the lock until its value reaches 0 again. The |qemu_io_proceeded_cond|
+     *   condition variable is used to do that.
+     * 
+     * - Second, if TCG is enabled, a trylock() is first tried to acquire the lock.
+     *   If this fail, the TCG vCPU thread is kicked(), which forces generated code
+     *   to exit to qemu_tcg_wait_io_event() as soon as possible.
+     * 
+     * NOTE: It looks like the use of |iothread_requesting_mutex| isn't needed at all
+     *       when KVM or HAX execution modes are being used, because the corresponding
+     *       vCPU threads actually _release_ the lock just before entering guest mode
+     *       (and re-acquire it just after exiting from it).
+     */
     atomic_inc(&iothread_requesting_mutex);
-    /* In the simple case there is no need to bump the VCPU thread out of
-     * TCG code execution.
+
+    /* A simple lock is sufficient in the following cases:
+     * 
+     * - TCG is not enabled (KVM execution mode).
+     *   [This is the !tcg_enabled() check]
+     * 
+     * - TCG is enabled, but this called from the TCG vCPU thread directly.
+     *   [This is the qemu_in_vcpu_thread() check]
+     *
+     * - TCG is enabled, but so is HAX in "unrestricted guest" mode, which allows it
+     *   to execute all guest code directly (i.e. there is no TCG vCPU thread).
+     *   [This is the (hax_enabled() && hax_ug_platform()) check].
+     * 
+     * - TCG is enabled, but its thread has not started yet (e.g. when this
+     *   function is called during virtual device realization).
+     *   [This is (!first_cpu || !first_cpu->created)].
      */
     if (!tcg_enabled() || qemu_in_vcpu_thread() ||
+#ifdef CONFIG_HAX
+        (hax_enabled() && hax_ug_platform()) ||
+#endif
         !first_cpu || !first_cpu->created) {
         qemu_mutex_lock(&qemu_global_mutex);
         atomic_dec(&iothread_requesting_mutex);
@@ -1370,6 +1535,17 @@
     static QemuCond *tcg_halt_cond;
     static QemuThread *tcg_cpu_thread;
 
+#ifdef CONFIG_HAX
+    if (hax_enabled()) {
+        /* This code path should only be taken when HAX is enabled but the
+         * CPU doesn't support "unrestricted guest" mode. */
+        assert(!hax_ug_platform());
+        /* Initialize HAX-related state for the TCG thread. This is required for
+         * cpu_exec() to work correctly when HAX is enabled. */
+        hax_init_vcpu(cpu);
+    }
+#endif /* CONFIG_HAX */
+
     /* share a single thread for all cpus with TCG */
     if (!tcg_cpu_thread) {
         cpu->thread = g_malloc0(sizeof(QemuThread));
@@ -1393,6 +1569,35 @@
     }
 }
 
+#ifdef CONFIG_HAX
+static void qemu_hax_start_vcpu(CPUState *cpu)
+{
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+
+    /* This function shall only be called when HAX is enabled, and the host CPU
+     * supports "unrestricted guest" mode. This allows emulation of "real mode"
+     * and completely avoids the use of TCG. It's only the only way to get
+     * multi-core accelerated emulation with HAX. */
+    assert(hax_enabled());
+    assert(hax_ug_platform());
+
+    cpu->thread = g_malloc0(sizeof(QemuThread));
+    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+    qemu_cond_init(cpu->halt_cond);
+
+    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
+             cpu->cpu_index);
+    qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
+                       cpu, QEMU_THREAD_JOINABLE);
+#ifdef _WIN32
+    cpu->hThread = qemu_thread_get_handle(cpu->thread);
+#endif
+    while (!cpu->created) {
+        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
+    }
+}
+#endif /* CONFIG_HAX */
+
 static void qemu_kvm_start_vcpu(CPUState *cpu)
 {
     char thread_name[VCPU_THREAD_NAME_SIZE];
@@ -1443,6 +1648,10 @@
 
     if (kvm_enabled()) {
         qemu_kvm_start_vcpu(cpu);
+#ifdef CONFIG_HAX
+    } else if (hax_enabled() && hax_ug_platform()) {
+        qemu_hax_start_vcpu(cpu);
+#endif 
     } else if (tcg_enabled()) {
         qemu_tcg_init_vcpu(cpu);
     } else {
diff --git a/exec.c b/exec.c
index 8ffde75..8858d8b 100644
--- a/exec.c
+++ b/exec.c
@@ -31,6 +31,9 @@
 #include "hw/xen/xen.h"
 #endif
 #include "sysemu/kvm.h"
+#ifdef CONFIG_HAX
+#include "sysemu/hax.h"
+#endif /* CONFIG_HAX */
 #include "sysemu/sysemu.h"
 #include "qemu/timer.h"
 #include "qemu/config-file.h"
@@ -1574,6 +1577,25 @@
                 qemu_mutex_unlock_ramlist();
                 return;
             }
+#ifdef CONFIG_HAX
+            /*
+             * In Hax, the qemu allocate the virtual address, and HAX kernel
+             * populate the memory with physical memory. Currently we have no
+             * paging, so user should make sure enough free memory in advance
+             */
+            if (hax_enabled()) {
+                int ret;
+                ret = hax_populate_ram((uint64_t)(uintptr_t)new_block->host,
+                                       new_block->max_length);
+                if (ret < 0) {
+                    error_setg_errno(errp, errno,
+                                     "Hax failed to populate RAM for: '%s'",
+                                     memory_region_name(new_block->mr));
+                    qemu_mutex_unlock_ramlist();
+                    return;
+                }
+            }
+#endif
             memory_try_enable_merging(new_block->host, new_block->max_length);
         }
     }
diff --git a/hax-stub.c b/hax-stub.c
new file mode 100644
index 0000000..2e2b048
--- /dev/null
+++ b/hax-stub.c
@@ -0,0 +1,42 @@
+/*
+ * QEMU HAXM support
+ *
+ * Copyright (c) 2015, Intel Corporation
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "sysemu/hax.h"
+
+int hax_sync_vcpus(void)
+{
+    return 0;
+}
+
+void hax_disable(int disable)
+{
+   return;
+}
+
+int hax_pre_init(uint64_t ram_size)
+{
+   return 0;
+}
+
+int hax_enabled(void)
+{
+   return 0;
+}
+
+int hax_ug_platform(void)
+{
+    return 0;
+}
+
+int hax_get_max_ram(uint64_t *max_ram) {
+    return 0;
+}
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0458934..fc37220 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -46,6 +46,9 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/numa.h"
 #include "sysemu/kvm.h"
+#ifdef CONFIG_HAX
+#include "sysemu/hax.h"
+#endif
 #include "sysemu/qtest.h"
 #include "kvm_i386.h"
 #include "hw/xen/xen.h"
@@ -2089,6 +2092,10 @@
         smm_available = true;
     } else if (kvm_enabled()) {
         smm_available = kvm_has_smm();
+#ifdef CONFIG_HAX
+    } else if (hax_enabled()) {
+        smm_available = false;
+#endif
     }
 
     if (smm_available) {
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 14ac43c..cc77d80 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -306,11 +306,15 @@
     info = APIC_COMMON_GET_CLASS(s);
     info->realize(dev, errp);
 
+    /* NOTE: Why this needs to be disabled for HAX exactly? */
+#ifndef CONFIG_HAX
     /* Note: We need at least 1M to map the VAPIC option ROM */
     if (!vapic && s->vapic_control & VAPIC_ENABLE_MASK &&
         ram_size >= 1024 * 1024) {
         vapic = sysbus_create_simple("kvmvapic", -1, NULL);
     }
+#endif /* CONFIG_HAX */
+
     s->vapic = vapic;
     if (apic_report_tpr_access && info->enable_tpr_reporting) {
         info->enable_tpr_reporting(s, true);
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index b5238fb..7c1199c 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -229,6 +229,10 @@
 struct KVMState;
 struct kvm_run;
 
+#ifdef CONFIG_HAX
+struct hax_vcpu_state;
+#endif
+
 #define TB_JMP_CACHE_BITS 12
 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
 
@@ -374,6 +378,11 @@
        (absolute value) offset as small as possible.  This reduces code
        size, especially for hosts without large memory offsets.  */
     uint32_t tcg_exit_req;
+    
+#ifdef CONFIG_HAX
+    bool hax_vcpu_dirty;
+    struct hax_vcpu_state *hax_vcpu;
+#endif
 };
 
 QTAILQ_HEAD(CPUTailQ, CPUState);
diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h
index c5bbea8..0aba314 100644
--- a/include/sysemu/arch_init.h
+++ b/include/sysemu/arch_init.h
@@ -35,6 +35,7 @@
 bool audio_init(void);
 int kvm_available(void);
 int xen_available(void);
+int hax_available(void);
 
 CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp);
 
diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h
new file mode 100644
index 0000000..d0ad6d9
--- /dev/null
+++ b/include/sysemu/hax.h
@@ -0,0 +1,73 @@
+/*
+ * QEMU HAXM support
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *  Xin Xiaohui<xiaohui.xin@intel.com>
+ *  Zhang Xiantao<xiantao.zhang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/* header to be included in non-HAX-specific code */
+#ifndef _HAX_H
+#define _HAX_H
+
+#include "config-host.h"
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+/* Returns 1 if HAX is available and enabled, 0 otherwise. */
+int hax_enabled(void);
+
+/* Disable HAX if |disable| is 1, otherwise, enable it iff it is supported by the host CPU.
+ * Use hax_enabled() after this to get the result. */
+void hax_disable(int disable);
+
+/* Returns non-0 if the host CPU supports the VMX "unrestricted guest" feature which
+ * allows the virtual CPU to directly run in "real mode". If true, this allows QEMU to run
+ * several vCPU threads in parallel (see cpus.c). Otherwise, only a a single TCG thread
+ * can run, and it will call HAX to run the current instructions, except in case of
+ * "real mode" (paging disabled, typically at boot time), or MMIO operations. */
+int hax_ug_platform(void);
+
+int hax_pre_init(uint64_t ram_size);
+
+int hax_sync_vcpus(void);
+
+/* get the max haxm ram even before haxm library is initialized */
+int hax_get_max_ram(uint64_t *max_ram);
+
+#ifdef CONFIG_HAX
+
+#include "hw/hw.h"
+#include "qemu/bitops.h"
+#include "exec/memory.h"
+
+int hax_init_vcpu(CPUState *cpu);
+int hax_vcpu_exec(CPUState *cpu);
+int hax_smp_cpu_exec(CPUState *cpu);
+void hax_cpu_synchronize_state(CPUState *cpu);
+void hax_cpu_synchronize_post_reset(CPUState *cpu);
+void hax_cpu_synchronize_post_init(CPUState *cpu);
+int hax_populate_ram(uint64_t va, uint32_t size);
+int hax_vcpu_emulation_mode(CPUState *cpu);
+int hax_stop_emulation(CPUState *cpu);
+int hax_stop_translate(CPUState *cpu);
+int hax_vcpu_destroy(CPUState *cpu);
+void hax_raise_event(CPUState *cpu);
+void hax_reset_vcpu_state(void *opaque);
+// #include "target-i386/hax-interface.h"
+// #include "target-i386/hax-i386.h"
+
+#endif
+
+#endif /* _HAX_H */
diff --git a/qemu-options.def b/qemu-options.def
index 9546406..a117697 100644
--- a/qemu-options.def
+++ b/qemu-options.def
@@ -673,6 +673,9 @@
 DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, \
 "-enable-kvm     enable KVM full virtualization support\n", QEMU_ARCH_ALL)
 
+DEF("enable-hax", 0, QEMU_OPTION_enable_hax, \
+"-enable-hax     enable HAX virtualization support\n", QEMU_ARCH_I386)
+
 DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid,
 "-xen-domid id   specify xen guest domain id\n", QEMU_ARCH_ALL)
 DEF("xen-create", 0, QEMU_OPTION_xen_create,
diff --git a/qemu-options.hx b/qemu-options.hx
index 741814f..9360754 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3260,6 +3260,16 @@
 if KVM support is enabled when compiling.
 ETEXI
 
+DEF("enable-hax", 0, QEMU_OPTION_enable_hax, \
+    "-enable-hax     enable HAX virtualization support\n", QEMU_ARCH_I386)
+STEXI
+@item -enable-hax
+@findex -enable-hax
+Enable HAX (Hardware-based Acceleration eXecution) support, used for full
+virtualization support on OS X and Windows.  This option
+is only available if HAX support is enabled when compiling.
+ETEXI
+
 DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid,
     "-xen-domid id   specify xen guest domain id\n", QEMU_ARCH_ALL)
 DEF("xen-create", 0, QEMU_OPTION_xen_create,
diff --git a/target-i386/Makefile.objs b/target-i386/Makefile.objs
index b223d79..2820b49 100644
--- a/target-i386/Makefile.objs
+++ b/target-i386/Makefile.objs
@@ -5,3 +5,8 @@
 obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o monitor.o
 obj-$(CONFIG_KVM) += kvm.o hyperv.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
+ifeq (y,$(CONFIG_HAX))
+obj-y += hax-all.o hax-slot.o
+obj-$(CONFIG_WIN32) += hax-windows.o
+obj-$(CONFIG_DARWIN) += hax-darwin.o
+endif
diff --git a/target-i386/hax-all.c b/target-i386/hax-all.c
new file mode 100644
index 0000000..197e817
--- /dev/null
+++ b/target-i386/hax-all.c
@@ -0,0 +1,1470 @@
+/*
+ * QEMU HAX support
+ *
+ * Copyright IBM, Corp. 2008
+ *           Red Hat, Inc. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Glauber Costa     <gcosta@redhat.com>
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *  Xin Xiaohui<xiaohui.xin@intel.com>
+ *  Zhang Xiantao<xiantao.zhang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/*
+ * HAX common code for both windows and darwin
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hax-i386.h"
+#include "hax-slot.h"
+
+#include "exec/address-spaces.h"
+#include "exec/exec-all.h"
+#include "exec/ioport.h"
+#include "qemu/main-loop.h"
+#include "strings.h"
+#include "sysemu/accel.h"
+
+#ifdef _WIN32
+#include "sysemu/os-win32.h"
+#endif
+
+static const char kHaxVcpuSyncFailed[] = "Failed to sync HAX vcpu context";
+
+#define derror(msg) do { fprintf(stderr, (msg)); } while (0)
+
+/* #define DEBUG_HAX */
+
+#ifdef DEBUG_HAX
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stdout, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+/* Current version */
+const uint32_t hax_cur_version = 0x3;    /* ver 2.0: support fast mmio */
+/* Minimum  HAX kernel version */
+const uint32_t hax_min_version = 0x3;
+
+#define TYPE_HAX_ACCEL ACCEL_CLASS_NAME("hax")
+
+#define HAX_EMUL_ONE    0x1
+#define HAX_EMUL_REAL   0x2
+#define HAX_EMUL_HLT    0x4
+#define HAX_EMUL_EXITLOOP    0x5
+
+#define HAX_EMULATE_STATE_MMIO  0x1
+#define HAX_EMULATE_STATE_REAL  0x2
+#define HAX_EMULATE_STATE_NONE  0x3
+#define HAX_EMULATE_STATE_INITIAL       0x4
+
+#define HAX_NON_UG_PLATFORM 0x0
+#define HAX_UG_PLATFORM     0x1
+
+bool hax_allowed;
+
+static void hax_vcpu_sync_state(CPUArchState * env, int modified);
+static int hax_arch_get_registers(CPUArchState * env);
+static int hax_handle_io(CPUArchState * env, uint32_t df, uint16_t port,
+                         int direction, int size, int count, void *buffer);
+static int hax_handle_fastmmio(CPUArchState * env, struct hax_fastmmio *hft);
+
+struct hax_state hax_global;
+int ret_hax_init = 0;
+static int hax_disabled = 1;
+
+int hax_support = -1;
+int ug_support = 0;
+
+/* Called after hax_init */
+int hax_enabled(void)
+{
+    return (!hax_disabled && hax_support);
+}
+
+void hax_disable(int disable)
+{
+    hax_disabled = disable;
+}
+
+/* Called after hax_init */
+int hax_ug_platform(void)
+{
+    return ug_support;
+}
+
+/* Currently non-PG modes are emulated by QEMU */
+int hax_vcpu_emulation_mode(CPUState * cpu)
+{
+    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
+    return !(env->cr[0] & CR0_PG_MASK);
+}
+
+static int hax_prepare_emulation(CPUArchState * env)
+{
+    /* Flush all emulation states */
+    tlb_flush(ENV_GET_CPU(env), 1);
+    tb_flush(ENV_GET_CPU(env));
+    /* Sync the vcpu state from hax kernel module */
+    hax_vcpu_sync_state(env, 0);
+    return 0;
+}
+
+/*
+ * Check whether to break the translation block loop
+ * break tbloop after one MMIO emulation, or after finish emulation mode
+ */
+static int hax_stop_tbloop(CPUArchState * env)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+    switch (cpu->hax_vcpu->emulation_state) {
+    case HAX_EMULATE_STATE_MMIO:
+        if (cpu->hax_vcpu->resync) {
+            hax_prepare_emulation(env);
+            cpu->hax_vcpu->resync = 0;
+            return 0;
+        }
+        return 1;
+        break;
+    case HAX_EMULATE_STATE_INITIAL:
+    case HAX_EMULATE_STATE_REAL:
+        if (!hax_vcpu_emulation_mode(cpu))
+            return 1;
+        break;
+    default:
+        fprintf(stderr, "Invalid emulation state in hax_sto_tbloop state %x\n",
+                cpu->hax_vcpu->emulation_state);
+        break;
+    }
+
+    return 0;
+}
+
+int hax_stop_emulation(CPUState * cpu)
+{
+    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
+
+    if (hax_stop_tbloop(env)) {
+        cpu->hax_vcpu->emulation_state = HAX_EMULATE_STATE_NONE;
+        /*
+         * QEMU emulation changes vcpu state,
+         * Sync the vcpu state to HAX kernel module
+         */
+        hax_vcpu_sync_state(env, 1);
+        return 1;
+    }
+
+    return 0;
+}
+
+int hax_stop_translate(CPUState * cpu)
+{
+    struct hax_vcpu_state *vstate = cpu->hax_vcpu;
+
+    assert(vstate->emulation_state);
+    if (vstate->emulation_state == HAX_EMULATE_STATE_MMIO)
+        return 1;
+
+    return 0;
+}
+
+int valid_hax_tunnel_size(uint16_t size)
+{
+    return size >= sizeof(struct hax_tunnel);
+}
+
+hax_fd hax_vcpu_get_fd(CPUArchState * env)
+{
+    struct hax_vcpu_state *vcpu = ENV_GET_CPU(env)->hax_vcpu;
+    if (!vcpu)
+        return HAX_INVALID_FD;
+    return vcpu->fd;
+}
+
+static int hax_get_capability(struct hax_state *hax)
+{
+    int ret;
+    struct hax_capabilityinfo capinfo, *cap = &capinfo;
+
+    ret = hax_capability(hax, cap);
+    if (ret)
+        return ret;
+
+    if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
+        if (cap->winfo & HAX_CAP_FAILREASON_VT)
+            DPRINTF
+                ("VTX feature is not enabled, HAX driver will not work.\n");
+        else if (cap->winfo & HAX_CAP_FAILREASON_NX)
+            DPRINTF
+                ("NX feature is not enabled, HAX driver will not work.\n");
+        return -ENXIO;
+    }
+
+    if ((cap->winfo & HAX_CAP_UG))
+        ug_support = 1;
+
+    if (cap->wstatus & HAX_CAP_MEMQUOTA) {
+        if (cap->mem_quota < hax->mem_quota) {
+            fprintf(stderr, "The memory needed by this VM exceeds the driver limit.\n");
+            return -ENOSPC;
+        }
+    }
+    return 0;
+}
+
+static int hax_version_support(struct hax_state *hax)
+{
+    int ret;
+    struct hax_module_version version;
+
+    ret = hax_mod_version(hax, &version);
+    if (ret < 0)
+        return 0;
+
+    if ((hax_min_version > version.cur_version) ||
+        (hax_cur_version < version.compat_version))
+        return 0;
+
+    return 1;
+}
+
+int hax_vcpu_create(int id)
+{
+    struct hax_vcpu_state *vcpu = NULL;
+    int ret;
+
+    if (!hax_global.vm) {
+        fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
+        return -1;
+    }
+
+    if (hax_global.vm->vcpus[id]) {
+        fprintf(stderr, "vcpu %x allocated already\n", id);
+        return 0;
+    }
+
+    vcpu = g_malloc(sizeof(struct hax_vcpu_state));
+    if (!vcpu) {
+        fprintf(stderr, "Failed to alloc vcpu state\n");
+        return -ENOMEM;
+    }
+
+    memset(vcpu, 0, sizeof(struct hax_vcpu_state));
+
+    ret = hax_host_create_vcpu(hax_global.vm->fd, id);
+    if (ret) {
+        fprintf(stderr, "Failed to create vcpu %x\n", id);
+        goto error;
+    }
+
+    vcpu->vcpu_id = id;
+    vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
+    if (hax_invalid_fd(vcpu->fd)) {
+        fprintf(stderr, "Failed to open the vcpu\n");
+        ret = -ENODEV;
+        goto error;
+    }
+
+    hax_global.vm->vcpus[id] = vcpu;
+
+    ret = hax_host_setup_vcpu_channel(vcpu);
+    if (ret) {
+        fprintf(stderr, "Invalid hax tunnel size \n");
+        ret = -EINVAL;
+        goto error;
+    }
+    return 0;
+
+  error:
+    /* vcpu and tunnel will be closed automatically */
+    if (vcpu && !hax_invalid_fd(vcpu->fd))
+        hax_close_fd(vcpu->fd);
+
+    hax_global.vm->vcpus[id] = NULL;
+    g_free(vcpu);
+    return -1;
+}
+
+int hax_vcpu_destroy(CPUState * cpu)
+{
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
+
+    if (!hax_global.vm) {
+        fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
+        return -1;
+    }
+
+    if (!vcpu)
+        return 0;
+
+    /*
+     * 1. The hax_tunnel is also destroied when vcpu destroy
+     * 2. close fd will cause hax module vcpu be cleaned
+     */
+    hax_close_fd(vcpu->fd);
+    hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
+    g_free(vcpu);
+    return 0;
+}
+
+int hax_init_vcpu(CPUState * cpu)
+{
+    int ret;
+
+    ret = hax_vcpu_create(cpu->cpu_index);
+    if (ret < 0) {
+        fprintf(stderr, "Failed to create HAX vcpu\n");
+        exit(-1);
+    }
+
+    cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
+    cpu->hax_vcpu->emulation_state = HAX_EMULATE_STATE_INITIAL;
+    cpu->hax_vcpu_dirty = true;
+    qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
+
+    return ret;
+}
+
+struct hax_vm *hax_vm_create(struct hax_state *hax)
+{
+    struct hax_vm *vm;
+    int vm_id = 0, ret;
+
+    if (hax_invalid_fd(hax->fd))
+        return NULL;
+
+    if (hax->vm)
+        return hax->vm;
+
+    vm = g_malloc(sizeof(struct hax_vm));
+    if (!vm)
+        return NULL;
+    memset(vm, 0, sizeof(struct hax_vm));
+    ret = hax_host_create_vm(hax, &vm_id);
+    if (ret) {
+        fprintf(stderr, "Failed to create vm %x\n", ret);
+        goto error;
+    }
+    vm->id = vm_id;
+    vm->fd = hax_host_open_vm(hax, vm_id);
+    if (hax_invalid_fd(vm->fd)) {
+        fprintf(stderr, "Failed to open vm %d\n", vm_id);
+        goto error;
+    }
+
+    hax->vm = vm;
+    hax_slot_init_registry();
+    return vm;
+
+  error:
+    g_free(vm);
+    hax->vm = NULL;
+    return NULL;
+}
+
+int hax_vm_destroy(struct hax_vm *vm)
+{
+    int i;
+
+    hax_slot_free_registry();
+    for (i = 0; i < HAX_MAX_VCPU; i++)
+        if (vm->vcpus[i]) {
+            fprintf(stderr, "VCPU should be cleaned before vm clean\n");
+            return -1;
+        }
+    hax_close_fd(vm->fd);
+    g_free(vm);
+    hax_global.vm = NULL;
+    return 0;
+}
+
+static void hax_set_phys_mem(MemoryRegionSection *section)
+{
+    MemoryRegion *mr = section->mr;
+    hwaddr start_pa = section->offset_within_address_space;
+    ram_addr_t size = int128_get64(section->size);
+    unsigned int delta;
+    void *host_ptr;
+    int flags;
+
+    /* We only care about RAM and ROM */
+    if (!memory_region_is_ram(mr)) {
+        return;
+    }
+
+    /* Adjust start_pa and size so that they are page-aligned. (Cf
+     * kvm_set_phys_mem() in kvm-all.c).
+     */
+    delta = TARGET_PAGE_SIZE - (start_pa & ~TARGET_PAGE_MASK);
+    delta &= ~TARGET_PAGE_MASK;
+    if (delta > size) {
+        return;
+    }
+    start_pa += delta;
+    size -= delta;
+    size &= TARGET_PAGE_MASK;
+    if (!size || start_pa & ~TARGET_PAGE_MASK) {
+        return;
+    }
+
+    host_ptr = memory_region_get_ram_ptr(mr) + section->offset_within_region
+               + delta;
+    flags = memory_region_is_rom(mr) ? 1 : 0;
+    hax_slot_register(start_pa, size, (uintptr_t) host_ptr, flags);
+}
+
+static void hax_region_add(MemoryListener * listener,
+                           MemoryRegionSection * section)
+{
+    hax_set_phys_mem(section);
+}
+
+static void hax_region_del(MemoryListener * listener,
+                           MemoryRegionSection * section)
+{
+    // Memory mappings will be removed at VM close.
+}
+
+/* currently we fake the dirty bitmap sync, always dirty */
+/* avoid implicit declaration warning on Windows */
+int ffsl(long value);
+static void hax_log_sync(MemoryListener * listener,
+                         MemoryRegionSection * section)
+{
+    MemoryRegion *mr = section->mr;
+
+    if (!memory_region_is_ram(mr)) {
+        /* Skip MMIO regions */
+        return;
+    }
+
+    unsigned long c;
+    unsigned int len =
+        ((int128_get64(section->size) / TARGET_PAGE_SIZE) + HOST_LONG_BITS -
+         1) / HOST_LONG_BITS;
+    unsigned long bitmap[len];
+    unsigned int i, j;
+
+    for (i = 0; i < len; i++) {
+        bitmap[i] = 1;
+        c = leul_to_cpu(bitmap[i]);
+        do {
+            j = ffsl(c) - 1;
+            c &= ~(1ul << j);
+
+            memory_region_set_dirty(mr, ((uint64_t)i * HOST_LONG_BITS + j) *
+                                    TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
+        }
+        while (c != 0);
+    }
+}
+
+static void hax_log_global_start(struct MemoryListener *listener)
+{
+}
+
+static void hax_log_global_stop(struct MemoryListener *listener)
+{
+}
+
+static void hax_log_start(MemoryListener * listener,
+                          MemoryRegionSection * section,
+                          int old, int new)
+{
+}
+
+static void hax_log_stop(MemoryListener * listener,
+                         MemoryRegionSection * section,
+                         int old, int new)
+{
+}
+
+static void hax_begin(MemoryListener * listener)
+{
+}
+
+static void hax_commit(MemoryListener * listener)
+{
+}
+
+static void hax_region_nop(MemoryListener * listener,
+                           MemoryRegionSection * section)
+{
+}
+
+static MemoryListener hax_memory_listener = {
+    .begin = hax_begin,
+    .commit = hax_commit,
+    .region_add = hax_region_add,
+    .region_del = hax_region_del,
+    .region_nop = hax_region_nop,
+    .log_start = hax_log_start,
+    .log_stop = hax_log_stop,
+    .log_sync = hax_log_sync,
+    .log_global_start = hax_log_global_start,
+    .log_global_stop = hax_log_global_stop,
+};
+
+static void hax_handle_interrupt(CPUState * cpu, int mask)
+{
+    cpu->interrupt_request |= mask;
+
+    if (!qemu_cpu_is_self(cpu)) {
+        qemu_cpu_kick(cpu);
+    }
+}
+
+int hax_pre_init(uint64_t ram_size)
+{
+    struct hax_state *hax = NULL;
+
+    fprintf(stdout, "Hax is %s\n", hax_disabled ? "disabled" : "enabled");
+    if (hax_disabled)
+        return 0;
+    hax = &hax_global;
+    memset(hax, 0, sizeof(struct hax_state));
+    hax->mem_quota = ram_size;
+    fprintf(stdout, "Hax ram_size 0x%llx\n", ram_size);
+
+    return 0;
+}
+
+static int hax_init(void)
+{
+    struct hax_state *hax = NULL;
+    struct hax_qemu_version qversion;
+    int ret;
+
+    hax_support = 0;
+
+    hax = &hax_global;
+
+
+    hax->fd = hax_mod_open();
+    if (hax_invalid_fd(hax->fd)) {
+        hax->fd = 0;
+        ret = -ENODEV;
+        goto error;
+    }
+
+    ret = hax_get_capability(hax);
+
+    if (ret) {
+        if (ret != -ENOSPC)
+            ret = -EINVAL;
+        goto error;
+    }
+
+    if (!hax_version_support(hax)) {
+        fprintf(stderr, "Incompat Hax version. Qemu current version %x ",
+                hax_cur_version);
+        fprintf(stderr, "requires minimum HAX version %x\n", hax_min_version);
+        ret = -EINVAL;
+        goto error;
+    }
+
+    hax->vm = hax_vm_create(hax);
+    if (!hax->vm) {
+        fprintf(stderr, "Failed to create HAX VM\n");
+        ret = -EINVAL;
+        goto error;
+    }
+
+    memory_listener_register(&hax_memory_listener, &address_space_memory);
+
+    qversion.cur_version = hax_cur_version;
+    qversion.min_version = hax_min_version;
+    hax_notify_qemu_version(hax->vm->fd, &qversion);
+    cpu_interrupt_handler = hax_handle_interrupt;
+    hax_support = 1;
+
+    return ret;
+  error:
+    if (hax->vm)
+        hax_vm_destroy(hax->vm);
+    if (hax->fd)
+        hax_mod_close(hax);
+
+    return ret;
+}
+
+static int hax_accel_init(MachineState *ms)
+{
+    ret_hax_init = hax_init();
+
+    if (ret_hax_init && (ret_hax_init != -ENOSPC)) {
+        fprintf(stderr, "No accelerator found.\n");
+        return ret_hax_init;
+    } else {
+        /* need tcg for non-UG platform in real mode */
+        if (!hax_ug_platform())
+           tcg_exec_init(tcg_tb_size * 1024 * 1024);
+
+        fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
+                !ret_hax_init ? "working" : "not working",
+                !ret_hax_init ? "fast virt" : "emulation");
+        return 0;
+    }
+}
+
+static int hax_handle_fastmmio(CPUArchState * env, struct hax_fastmmio *hft)
+{
+    uint64_t buf = 0;
+    /*
+     * With fast MMIO, QEMU need not sync vCPU state with HAXM
+     * driver because it will only invoke MMIO handler
+     * However, some MMIO operations utilize virtual address like qemu_pipe
+     * Thus we need to sync the CR0, CR3 and CR4 so that QEMU
+     * can translate the guest virtual address to guest physical
+     * address
+     */
+    env->cr[0] = hft->_cr0;
+    env->cr[2] = hft->_cr2;
+    env->cr[3] = hft->_cr3;
+    env->cr[4] = hft->_cr4;
+
+    buf = hft->value;
+
+    cpu_physical_memory_rw(hft->gpa, (uint8_t *) & buf, hft->size, hft->direction);
+    if (hft->direction == 0)
+        hft->value = buf;
+
+    return 0;
+}
+
+static int hax_handle_io(CPUArchState * env, uint32_t df, uint16_t port,
+                         int direction, int size, int count, void *buffer)
+{
+    uint8_t *ptr;
+    int i;
+
+    if (!df)
+        ptr = (uint8_t *) buffer;
+    else
+        ptr = buffer + size * count - size;
+    for (i = 0; i < count; i++) {
+        if (direction == HAX_EXIT_IO_IN) {
+            switch (size) {
+            case 1:
+                stb_p(ptr, cpu_inb(port));
+                break;
+            case 2:
+                stw_p(ptr, cpu_inw(port));
+                break;
+            case 4:
+                stl_p(ptr, cpu_inl(port));
+                break;
+            }
+        } else {
+            switch (size) {
+            case 1:
+                cpu_outb(port, ldub_p(ptr));
+                break;
+            case 2:
+                cpu_outw(port, lduw_p(ptr));
+                break;
+            case 4:
+                cpu_outl(port, ldl_p(ptr));
+                break;
+            }
+        }
+        if (!df)
+            ptr += size;
+        else
+            ptr -= size;
+    }
+
+    return 0;
+}
+
+static int hax_vcpu_interrupt(CPUArchState * env)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
+    struct hax_tunnel *ht = vcpu->tunnel;
+
+    /*
+     * Try to inject an interrupt if the guest can accept it
+     * Unlike KVM, HAX kernel check for the eflags, instead of qemu
+     */
+    if (ht->ready_for_interrupt_injection &&
+        (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
+        int irq;
+
+        irq = cpu_get_pic_interrupt(env);
+        if (irq >= 0) {
+            hax_inject_interrupt(env, irq);
+            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
+        }
+    }
+
+    /* If we have an interrupt but the guest is not ready to receive an
+     * interrupt, request an interrupt window exit.  This will
+     * cause a return to userspace as soon as the guest is ready to
+     * receive interrupts. */
+    if ((cpu->interrupt_request & CPU_INTERRUPT_HARD))
+        ht->request_interrupt_window = 1;
+    else
+        ht->request_interrupt_window = 0;
+    return 0;
+}
+
+void hax_raise_event(CPUState * cpu)
+{
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
+
+    if (!vcpu)
+        return;
+    vcpu->tunnel->user_event_pending = 1;
+}
+
+/*
+ * Ask hax kernel module to run the CPU for us till:
+ * 1. Guest crash or shutdown
+ * 2. Need QEMU's emulation like guest execute MMIO instruction or guest
+ *    enter emulation mode (non-PG mode)
+ * 3. Guest execute HLT
+ * 4. Qemu have Signal/event pending
+ * 5. An unknown VMX exit happens
+ */
+extern void qemu_system_reset_request(void);
+static int hax_vcpu_hax_exec(CPUArchState * env, int ug_platform)
+{
+    int ret = 0;
+    CPUState *cpu = ENV_GET_CPU(env);
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
+    struct hax_tunnel *ht = vcpu->tunnel;
+
+    if (!ug_platform) {
+        if (hax_vcpu_emulation_mode(cpu)) {
+            DPRINTF("Trying to execute vcpu at eip:%lx\n", env->eip);
+            return HAX_EMUL_EXITLOOP;
+        }
+
+        cpu->halted = 0;
+
+        if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
+            cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
+            apic_poll_irq(x86_cpu->apic_state);
+        }
+    } else {                        /* UG platform */
+        if (!hax_enabled()) {
+            DPRINTF("Trying to vcpu execute at eip:%lx\n", env->eip);
+            return HAX_EMUL_EXITLOOP;
+        }
+
+        cpu->halted = 0;
+
+        if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
+            cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
+            apic_poll_irq(x86_cpu->apic_state);
+        }
+
+        if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
+            DPRINTF("\nUG hax_vcpu_hax_exec: handling INIT for %d \n",
+                    cpu->cpu_index);
+            do_cpu_init(x86_cpu);
+            hax_vcpu_sync_state(env, 1);
+        }
+
+        if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
+            DPRINTF("UG hax_vcpu_hax_exec: handling SIPI for %d \n",
+                    cpu->cpu_index);
+            hax_vcpu_sync_state(env, 0);
+            do_cpu_sipi(x86_cpu);
+            hax_vcpu_sync_state(env, 1);
+        }
+    }
+
+    do {
+        int hax_ret;
+
+        if (cpu->exit_request) {
+            ret = HAX_EMUL_EXITLOOP;
+            break;
+        }
+
+        hax_vcpu_interrupt(env);
+        if (!ug_platform) {
+            hax_ret = hax_vcpu_run(vcpu);
+        } else {                /* UG platform */
+
+            qemu_mutex_unlock_iothread();
+            hax_ret = hax_vcpu_run(vcpu);
+            qemu_mutex_lock_iothread();
+            current_cpu = cpu;
+        }
+
+        /* Simply continue the vcpu_run if system call interrupted */
+        if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
+            DPRINTF("io window interrupted\n");
+            continue;
+        }
+
+        if (hax_ret < 0) {
+            fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
+            abort();
+        }
+        switch (ht->_exit_status) {
+        case HAX_EXIT_IO:
+            ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
+                            ht->pio._direction,
+                            ht->pio._size, ht->pio._count, vcpu->iobuf);
+            break;
+        case HAX_EXIT_MMIO:
+            ret = HAX_EMUL_ONE;
+            break;
+        case HAX_EXIT_FAST_MMIO:
+            ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
+            break;
+        case HAX_EXIT_REAL:
+            ret = HAX_EMUL_REAL;
+            break;
+        /* Guest state changed, currently only for shutdown */
+        case HAX_EXIT_STATECHANGE:
+            fprintf(stdout, "VCPU shutdown request\n");
+            qemu_system_reset_request();
+            hax_prepare_emulation(env);
+            cpu_dump_state(cpu, stderr, fprintf, 0);
+            ret = HAX_EMUL_EXITLOOP;
+            break;
+        case HAX_EXIT_UNKNOWN_VMEXIT:
+            fprintf(stderr, "Unknown VMX exit %x from guest\n",
+                    ht->_exit_reason);
+            qemu_system_reset_request();
+            hax_prepare_emulation(env);
+            cpu_dump_state(cpu, stderr, fprintf, 0);
+            ret = HAX_EMUL_EXITLOOP;
+            break;
+        case HAX_EXIT_HLT:
+            if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
+                !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
+                /* hlt instruction with interrupt disabled is shutdown */
+                env->eflags |= IF_MASK;
+                cpu->halted = 1;
+                cpu->exception_index = EXCP_HLT;
+                ret = HAX_EMUL_HLT;
+            }
+            break;
+        /* these situation will continue to hax module */
+        case HAX_EXIT_INTERRUPT:
+        case HAX_EXIT_PAUSED:
+            break;
+        default:
+            fprintf(stderr, "Unknow exit %x from hax\n", ht->_exit_status);
+            qemu_system_reset_request();
+            hax_prepare_emulation(env);
+            cpu_dump_state(cpu, stderr, fprintf, 0);
+            ret = HAX_EMUL_EXITLOOP;
+            break;
+        }
+    } while (!ret);
+
+    if (cpu->exit_request) {
+        cpu->exit_request = 0;
+        cpu->exception_index = EXCP_INTERRUPT;
+    }
+    return ret;
+}
+
+static void do_hax_cpu_synchronize_state(void *arg)
+{
+    CPUState *cpu = arg;
+    CPUArchState *env = cpu->env_ptr;
+
+    hax_arch_get_registers(env);
+    cpu->hax_vcpu_dirty = true;
+}
+
+void hax_cpu_synchronize_state(CPUState *cpu)
+{
+    /* TODO: Do not sync if cpu->hax_vcpu_dirty is true. (Cf
+     * kvm_cpu_synchronize_state() in kvm-all.c)
+     * This would require that this flag be updated properly and consistently
+     * wherever a vCPU state sync between QEMU and HAX takes place. For now,
+     * just perform the sync regardless of hax_vcpu_dirty.
+     */
+    run_on_cpu(cpu, do_hax_cpu_synchronize_state, cpu);
+}
+
+static void do_hax_cpu_synchronize_post_reset(void *arg)
+{
+    CPUState *cpu = arg;
+    CPUArchState *env = cpu->env_ptr;
+
+    hax_vcpu_sync_state(env, 1);
+    cpu->hax_vcpu_dirty = false;
+}
+
+void hax_cpu_synchronize_post_reset(CPUState * cpu)
+{
+    run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, cpu);
+}
+
+static void do_hax_cpu_synchronize_post_init(void *arg)
+{
+    CPUState *cpu = arg;
+    CPUArchState *env = cpu->env_ptr;
+
+    hax_vcpu_sync_state(env, 1);
+    cpu->hax_vcpu_dirty = false;
+}
+
+void hax_cpu_synchronize_post_init(CPUState * cpu)
+{
+    run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, cpu);
+}
+
+/*
+ * return 1 when need emulate, 0 when need exit loop
+ */
+int hax_vcpu_exec(CPUState * cpu)
+{
+    int next = 0, ret = 0;
+    struct hax_vcpu_state *vcpu;
+    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
+
+    if (cpu->hax_vcpu->emulation_state != HAX_EMULATE_STATE_NONE)
+        return 1;
+
+    vcpu = cpu->hax_vcpu;
+    next = hax_vcpu_hax_exec(env, HAX_NON_UG_PLATFORM);
+    switch (next) {
+    case HAX_EMUL_ONE:
+        ret = 1;
+        vcpu->emulation_state = HAX_EMULATE_STATE_MMIO;
+        hax_prepare_emulation(env);
+        break;
+    case HAX_EMUL_REAL:
+        ret = 1;
+        vcpu->emulation_state = HAX_EMULATE_STATE_REAL;
+        hax_prepare_emulation(env);
+        break;
+    case HAX_EMUL_HLT:
+    case HAX_EMUL_EXITLOOP:
+        break;
+    default:
+        fprintf(stderr, "Unknown hax vcpu exec return %x\n", next);
+        abort();
+    }
+
+    return ret;
+}
+
+int hax_smp_cpu_exec(CPUState * cpu)
+{
+    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
+    int why;
+    int ret;
+
+    while (1) {
+        if (cpu->exception_index >= EXCP_INTERRUPT) {
+            ret = cpu->exception_index;
+            cpu->exception_index = -1;
+            break;
+        }
+
+        why = hax_vcpu_hax_exec(env, HAX_UG_PLATFORM);
+
+        if ((why != HAX_EMUL_HLT) && (why != HAX_EMUL_EXITLOOP)) {
+            fprintf(stderr, "Unknown hax vcpu return %x\n", why);
+            abort();
+        }
+    }
+
+    return ret;
+}
+
+#define HAX_RAM_INFO_ROM 0x1
+
+static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache * rhs)
+{
+    memset(lhs, 0, sizeof(struct segment_desc_t));
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->type = 3;
+    lhs->present = 1;
+    lhs->dpl = 3;
+    lhs->operand_size = 0;
+    lhs->desc = 1;
+    lhs->long_mode = 0;
+    lhs->granularity = 0;
+    lhs->available = 0;
+}
+
+static void get_seg(SegmentCache * lhs, const struct segment_desc_t *rhs)
+{
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
+        | (rhs->present * DESC_P_MASK)
+        | (rhs->dpl << DESC_DPL_SHIFT)
+        | (rhs->operand_size << DESC_B_SHIFT)
+        | (rhs->desc * DESC_S_MASK)
+        | (rhs->long_mode << DESC_L_SHIFT)
+        | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
+}
+
+static void set_seg(struct segment_desc_t *lhs, const SegmentCache * rhs)
+{
+    unsigned flags = rhs->flags;
+
+    memset(lhs, 0, sizeof(struct segment_desc_t));
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
+    lhs->present = (flags & DESC_P_MASK) != 0;
+    lhs->dpl = rhs->selector & 3;
+    lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
+    lhs->desc = (flags & DESC_S_MASK) != 0;
+    lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
+    lhs->granularity = (flags & DESC_G_MASK) != 0;
+    lhs->available = (flags & DESC_AVL_MASK) != 0;
+}
+
+static void hax_getput_reg(uint64_t * hax_reg, target_ulong * qemu_reg, int set)
+{
+    target_ulong reg = *hax_reg;
+
+    if (set)
+        *hax_reg = *qemu_reg;
+    else
+        *qemu_reg = reg;
+}
+
+/* The sregs has been synced with HAX kernel already before this call */
+static int hax_get_segments(CPUArchState * env, struct vcpu_state_t *sregs)
+{
+    get_seg(&env->segs[R_CS], &sregs->_cs);
+    get_seg(&env->segs[R_DS], &sregs->_ds);
+    get_seg(&env->segs[R_ES], &sregs->_es);
+    get_seg(&env->segs[R_FS], &sregs->_fs);
+    get_seg(&env->segs[R_GS], &sregs->_gs);
+    get_seg(&env->segs[R_SS], &sregs->_ss);
+
+    get_seg(&env->tr, &sregs->_tr);
+    get_seg(&env->ldt, &sregs->_ldt);
+    env->idt.limit = sregs->_idt.limit;
+    env->idt.base = sregs->_idt.base;
+    env->gdt.limit = sregs->_gdt.limit;
+    env->gdt.base = sregs->_gdt.base;
+    return 0;
+}
+
+static int hax_set_segments(CPUArchState * env, struct vcpu_state_t *sregs)
+{
+    if ((env->eflags & VM_MASK)) {
+        set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
+        set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
+        set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
+        set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
+        set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
+        set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
+    } else {
+        set_seg(&sregs->_cs, &env->segs[R_CS]);
+        set_seg(&sregs->_ds, &env->segs[R_DS]);
+        set_seg(&sregs->_es, &env->segs[R_ES]);
+        set_seg(&sregs->_fs, &env->segs[R_FS]);
+        set_seg(&sregs->_gs, &env->segs[R_GS]);
+        set_seg(&sregs->_ss, &env->segs[R_SS]);
+
+        if (env->cr[0] & CR0_PE_MASK) {
+            /* force ss cpl to cs cpl */
+            sregs->_ss.selector = (sregs->_ss.selector & ~3) | (sregs->_cs.selector & 3);
+            sregs->_ss.dpl = sregs->_ss.selector & 3;
+        }
+    }
+
+    set_seg(&sregs->_tr, &env->tr);
+    set_seg(&sregs->_ldt, &env->ldt);
+    sregs->_idt.limit = env->idt.limit;
+    sregs->_idt.base = env->idt.base;
+    sregs->_gdt.limit = env->gdt.limit;
+    sregs->_gdt.base = env->gdt.base;
+    return 0;
+}
+
+/*
+ * After get the state from the kernel module, some
+ * qemu emulator state need be updated also
+ */
+static int hax_setup_qemu_emulator(CPUArchState * env)
+{
+
+#define HFLAG_COPY_MASK ~( \
+  HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
+  HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
+  HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
+  HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
+
+    uint32_t hflags;
+
+    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+    hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
+    hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
+        (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
+    hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
+    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
+
+    if (env->efer & MSR_EFER_LMA) {
+        hflags |= HF_LMA_MASK;
+    }
+
+    if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
+        hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
+    } else {
+        hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
+            (DESC_B_SHIFT - HF_CS32_SHIFT);
+        hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
+            (DESC_B_SHIFT - HF_SS32_SHIFT);
+        if (!(env->cr[0] & CR0_PE_MASK) ||
+            (env->eflags & VM_MASK) || !(hflags & HF_CS32_MASK)) {
+            hflags |= HF_ADDSEG_MASK;
+        } else {
+            hflags |= ((env->segs[R_DS].base |
+                        env->segs[R_ES].base |
+                        env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
+        }
+    }
+    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
+    return 0;
+}
+
+static int hax_sync_vcpu_register(CPUArchState * env, int set)
+{
+    struct vcpu_state_t regs;
+    int ret;
+    memset(&regs, 0, sizeof(struct vcpu_state_t));
+
+    if (!set) {
+        ret = hax_sync_vcpu_state(env, &regs, 0);
+        if (ret < 0)
+            return -1;
+    }
+
+    /* generic register */
+    hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
+    hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
+    hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
+    hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
+    hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
+    hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
+    hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
+    hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
+#ifdef TARGET_X86_64
+    hax_getput_reg(&regs._r8, &env->regs[8], set);
+    hax_getput_reg(&regs._r9, &env->regs[9], set);
+    hax_getput_reg(&regs._r10, &env->regs[10], set);
+    hax_getput_reg(&regs._r11, &env->regs[11], set);
+    hax_getput_reg(&regs._r12, &env->regs[12], set);
+    hax_getput_reg(&regs._r13, &env->regs[13], set);
+    hax_getput_reg(&regs._r14, &env->regs[14], set);
+    hax_getput_reg(&regs._r15, &env->regs[15], set);
+#endif
+    hax_getput_reg(&regs._rflags, &env->eflags, set);
+    hax_getput_reg(&regs._rip, &env->eip, set);
+
+    if (set) {
+        regs._cr0 = env->cr[0];
+        regs._cr2 = env->cr[2];
+        regs._cr3 = env->cr[3];
+        regs._cr4 = env->cr[4];
+        hax_set_segments(env, &regs);
+    } else {
+        env->cr[0] = regs._cr0;
+        env->cr[2] = regs._cr2;
+        env->cr[3] = regs._cr3;
+        env->cr[4] = regs._cr4;
+        hax_get_segments(env, &regs);
+    }
+
+    if (set) {
+        ret = hax_sync_vcpu_state(env, &regs, 1);
+        if (ret < 0)
+            return -1;
+    }
+    if (!set)
+        hax_setup_qemu_emulator(env);
+    return 0;
+}
+
+static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
+                              uint64_t value)
+{
+    item->entry = index;
+    item->value = value;
+}
+
+static int hax_get_msrs(CPUArchState * env)
+{
+    struct hax_msr_data md;
+    struct vmx_msr *msrs = md.entries;
+    int ret, i, n;
+
+    n = 0;
+    msrs[n++].entry = MSR_IA32_SYSENTER_CS;
+    msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
+    msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
+    msrs[n++].entry = MSR_IA32_TSC;
+#ifdef TARGET_X86_64
+    msrs[n++].entry = MSR_EFER;
+    msrs[n++].entry = MSR_STAR;
+    msrs[n++].entry = MSR_LSTAR;
+    msrs[n++].entry = MSR_CSTAR;
+    msrs[n++].entry = MSR_FMASK;
+    msrs[n++].entry = MSR_KERNELGSBASE;
+#endif
+    md.nr_msr = n;
+    ret = hax_sync_msr(env, &md, 0);
+    if (ret < 0)
+        return ret;
+
+    for (i = 0; i < md.done; i++) {
+        switch (msrs[i].entry) {
+        case MSR_IA32_SYSENTER_CS:
+            env->sysenter_cs = msrs[i].value;
+            break;
+        case MSR_IA32_SYSENTER_ESP:
+            env->sysenter_esp = msrs[i].value;
+            break;
+        case MSR_IA32_SYSENTER_EIP:
+            env->sysenter_eip = msrs[i].value;
+            break;
+        case MSR_IA32_TSC:
+            env->tsc = msrs[i].value;
+            break;
+#ifdef TARGET_X86_64
+        case MSR_EFER:
+            env->efer = msrs[i].value;
+            break;
+        case MSR_STAR:
+            env->star = msrs[i].value;
+            break;
+        case MSR_LSTAR:
+            env->lstar = msrs[i].value;
+            break;
+        case MSR_CSTAR:
+            env->cstar = msrs[i].value;
+            break;
+        case MSR_FMASK:
+            env->fmask = msrs[i].value;
+            break;
+        case MSR_KERNELGSBASE:
+            env->kernelgsbase = msrs[i].value;
+            break;
+#endif
+        }
+    }
+
+    return 0;
+}
+
+static int hax_set_msrs(CPUArchState * env)
+{
+    struct hax_msr_data md;
+    struct vmx_msr *msrs;
+    msrs = md.entries;
+    int n = 0;
+
+    memset(&md, 0, sizeof(struct hax_msr_data));
+    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
+    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
+    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
+    hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
+#ifdef TARGET_X86_64
+    hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
+    hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
+    hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
+    hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
+    hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
+    hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
+#endif
+    md.nr_msr = n;
+    md.done = 0;
+
+    return hax_sync_msr(env, &md, 1);
+}
+
+static int hax_get_fpu(CPUArchState * env)
+{
+    struct fx_layout fpu;
+    int i, ret;
+
+    ret = hax_sync_fpu(env, &fpu, 0);
+    if (ret < 0)
+        return ret;
+
+    env->fpstt = (fpu.fsw >> 11) & 7;
+    env->fpus = fpu.fsw;
+    env->fpuc = fpu.fcw;
+    for (i = 0; i < 8; ++i)
+        env->fptags[i] = !((fpu.ftw >> i) & 1);
+    memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
+
+    for (i = 0; i < 8; ++i) {
+        memcpy(&env->xmm_regs[i], fpu.mmx_1[i], sizeof(fpu.mmx_1[i]));
+    }
+    for (i = 0; i < 8; ++i) {
+        memcpy(&env->xmm_regs[8 + i], fpu.mmx_2[i], sizeof(fpu.mmx_2[i]));
+    }
+    env->mxcsr = fpu.mxcsr;
+
+    return 0;
+}
+
+static int hax_set_fpu(CPUArchState * env)
+{
+    struct fx_layout fpu;
+    int i;
+
+    memset(&fpu, 0, sizeof(fpu));
+    fpu.fsw = env->fpus & ~(7 << 11);
+    fpu.fsw |= (env->fpstt & 7) << 11;
+    fpu.fcw = env->fpuc;
+
+    for (i = 0; i < 8; ++i)
+        fpu.ftw |= (!env->fptags[i]) << i;
+
+    memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
+    
+    for (i = 0; i < 8; i++) {
+        memcpy(fpu.mmx_1[i], &env->xmm_regs[i], sizeof(fpu.mmx_1[i]));
+    }
+    for (i = 0; i < 8; i++) {
+        memcpy(fpu.mmx_2[i], &env->xmm_regs[i + 8], sizeof(fpu.mmx_2[i]));
+    }
+
+    fpu.mxcsr = env->mxcsr;
+
+    return hax_sync_fpu(env, &fpu, 1);
+}
+
+static int hax_arch_get_registers(CPUArchState * env)
+{
+    int ret;
+
+    ret = hax_sync_vcpu_register(env, 0);
+    if (ret < 0)
+        return ret;
+
+    ret = hax_get_fpu(env);
+    if (ret < 0)
+        return ret;
+
+    ret = hax_get_msrs(env);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static int hax_arch_set_registers(CPUArchState * env)
+{
+    int ret;
+    ret = hax_sync_vcpu_register(env, 1);
+
+    if (ret < 0) {
+        fprintf(stderr, "Failed to sync vcpu reg\n");
+        return ret;
+    }
+    ret = hax_set_fpu(env);
+    if (ret < 0) {
+        fprintf(stderr, "FPU failed\n");
+        return ret;
+    }
+    ret = hax_set_msrs(env);
+    if (ret < 0) {
+        fprintf(stderr, "MSR failed\n");
+        return ret;
+    }
+
+    return 0;
+}
+
+static void hax_vcpu_sync_state(CPUArchState * env, int modified)
+{
+    if (hax_enabled()) {
+        if (modified)
+            hax_arch_set_registers(env);
+        else
+            hax_arch_get_registers(env);
+    }
+}
+
+/*
+ * much simpler than kvm, at least in first stage because:
+ * We don't need consider the device pass-through, we don't need
+ * consider the framebuffer, and we may even remove the bios at all
+ */
+int hax_sync_vcpus(void)
+{
+    if (hax_enabled()) {
+        CPUState *cpu;
+
+        cpu = first_cpu;
+        if (!cpu)
+            return 0;
+
+        for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
+            int ret;
+
+            ret = hax_arch_set_registers(cpu->env_ptr);
+            if (ret < 0) {
+                derror(kHaxVcpuSyncFailed);
+                return ret;
+            }
+        }
+    }
+
+    return 0;
+}
+
+void hax_reset_vcpu_state(void *opaque)
+{
+    CPUState *cpu;
+    for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
+        DPRINTF("*********ReSet hax_vcpu->emulation_state \n");
+        cpu->hax_vcpu->emulation_state = HAX_EMULATE_STATE_INITIAL;
+        cpu->hax_vcpu->tunnel->user_event_pending = 0;
+        cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
+    }
+}
+
+static void hax_accel_class_init(ObjectClass *oc, void *data)
+{
+    AccelClass *ac = ACCEL_CLASS(oc);
+    ac->name = "HAX";
+    ac->init_machine = hax_accel_init;
+    ac->allowed = &hax_allowed;
+}
+
+static const TypeInfo hax_accel_type = {
+    .name = TYPE_HAX_ACCEL,
+    .parent = TYPE_ACCEL,
+    .class_init = hax_accel_class_init,
+};
+
+static void hax_type_init(void)
+{
+    type_register_static(&hax_accel_type);
+}
+
+type_init(hax_type_init);
+
diff --git a/target-i386/hax-darwin.c b/target-i386/hax-darwin.c
new file mode 100644
index 0000000..06c7a3d
--- /dev/null
+++ b/target-i386/hax-darwin.c
@@ -0,0 +1,315 @@
+/*
+ * QEMU HAXM support
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/* HAX module interface - darwin version */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+
+#include "target-i386/hax-i386.h"
+
+hax_fd hax_mod_open(void)
+{
+    int fd = open("/dev/HAX", O_RDWR);
+    if (fd == -1) {
+        fprintf(stderr, "Failed to open the hax module\n");
+    }
+
+    fcntl(fd, F_SETFD, FD_CLOEXEC);
+
+    return fd;
+}
+
+int hax_populate_ram(uint64_t va, uint32_t size)
+{
+    int ret;
+    struct hax_alloc_ram_info info;
+
+    if (!hax_global.vm || !hax_global.vm->fd) {
+        fprintf(stderr, "Allocate memory before vm create?\n");
+        return -EINVAL;
+    }
+
+    info.size = size;
+    info.va = va;
+    ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_ALLOC_RAM, &info);
+    if (ret < 0) {
+        fprintf(stderr, "Failed to allocate %x memory\n", size);
+        return ret;
+    }
+    return 0;
+}
+
+int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags)
+{
+    struct hax_set_ram_info info;
+    int ret;
+
+    info.pa_start = start_pa;
+    info.size = size;
+    info.va = host_va;
+    info.flags = (uint8_t) flags;
+
+    ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_SET_RAM, &info);
+    if (ret < 0) {
+        return -errno;
+    }
+    return 0;
+}
+
+int hax_get_max_ram(uint64_t *max_ram) {
+    int fd = hax_mod_open();
+    if (fd < 0) {
+        return -1;
+    }
+    struct hax_capabilityinfo cap;
+    int result = ioctl(fd, HAX_IOCTL_CAPABILITY, &cap);
+    close(fd);
+    if (result == -1) {
+        return -1;
+    }
+    *max_ram = cap.mem_quota;
+    return 0;
+}
+
+int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap)
+{
+    int ret;
+
+    ret = ioctl(hax->fd, HAX_IOCTL_CAPABILITY, cap);
+    if (ret == -1) {
+        fprintf(stderr, "Failed to get HAX capability\n");
+        return -errno;
+    }
+
+    return 0;
+}
+
+int hax_mod_version(struct hax_state *hax, struct hax_module_version *version)
+{
+    int ret;
+
+    ret = ioctl(hax->fd, HAX_IOCTL_VERSION, version);
+    if (ret == -1) {
+        fprintf(stderr, "Failed to get HAX version\n");
+        return -errno;
+    }
+
+    return 0;
+}
+
+static char *hax_vm_devfs_string(int vm_id)
+{
+    char *name;
+
+    if (vm_id > MAX_VM_ID) {
+        fprintf(stderr, "Too big VM id\n");
+        return NULL;
+    }
+
+#define HAX_VM_DEVFS "/dev/hax_vm/vmxx"
+    name = g_strdup(HAX_VM_DEVFS);
+    if (!name)
+        return NULL;
+
+    snprintf(name, sizeof HAX_VM_DEVFS, "/dev/hax_vm/vm%02d", vm_id);
+    return name;
+}
+
+static char *hax_vcpu_devfs_string(int vm_id, int vcpu_id)
+{
+    char *name;
+
+    if (vm_id > MAX_VM_ID || vcpu_id > MAX_VCPU_ID) {
+        fprintf(stderr, "Too big vm id %x or vcpu id %x\n", vm_id, vcpu_id);
+        return NULL;
+    }
+
+#define HAX_VCPU_DEVFS "/dev/hax_vmxx/vcpuxx"
+    name = g_strdup(HAX_VCPU_DEVFS);
+    if (!name)
+        return NULL;
+
+    snprintf(name, sizeof HAX_VCPU_DEVFS, "/dev/hax_vm%02d/vcpu%02d",
+             vm_id, vcpu_id);
+    return name;
+}
+
+int hax_host_create_vm(struct hax_state *hax, int *vmid)
+{
+    int ret;
+    int vm_id = 0;
+
+    if (hax_invalid_fd(hax->fd))
+        return -EINVAL;
+
+    if (hax->vm)
+        return 0;
+
+    ret = ioctl(hax->fd, HAX_IOCTL_CREATE_VM, &vm_id);
+    *vmid = vm_id;
+    return ret;
+}
+
+hax_fd hax_host_open_vm(struct hax_state * hax, int vm_id)
+{
+    hax_fd fd;
+    char *vm_name = NULL;
+
+    vm_name = hax_vm_devfs_string(vm_id);
+    if (!vm_name)
+        return -1;
+
+    fd = open(vm_name, O_RDWR);
+    qemu_vfree(vm_name);
+
+    fcntl(fd, F_SETFD, FD_CLOEXEC);
+
+    return fd;
+}
+
+int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion)
+{
+    int ret;
+
+    if (hax_invalid_fd(vm_fd))
+        return -EINVAL;
+
+    ret = ioctl(vm_fd, HAX_VM_IOCTL_NOTIFY_QEMU_VERSION, qversion);
+
+    if (ret < 0) {
+        fprintf(stderr, "Failed to notify qemu API version\n");
+        return ret;
+    }
+    return 0;
+}
+
+/* Simply assume the size should be bigger than the hax_tunnel,
+ * since the hax_tunnel can be extended later with compatibility considered
+ */
+int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid)
+{
+    int ret;
+
+    ret = ioctl(vm_fd, HAX_VM_IOCTL_VCPU_CREATE, &vcpuid);
+    if (ret < 0)
+        fprintf(stderr, "Failed to create vcpu %x\n", vcpuid);
+
+    return ret;
+}
+
+hax_fd hax_host_open_vcpu(int vmid, int vcpuid)
+{
+    char *devfs_path = NULL;
+    hax_fd fd;
+
+    devfs_path = hax_vcpu_devfs_string(vmid, vcpuid);
+    if (!devfs_path) {
+        fprintf(stderr, "Failed to get the devfs\n");
+        return -EINVAL;
+    }
+
+    fd = open(devfs_path, O_RDWR);
+    qemu_vfree(devfs_path);
+    if (fd < 0)
+        fprintf(stderr, "Failed to open the vcpu devfs\n");
+    fcntl(fd, F_SETFD, FD_CLOEXEC);
+    return fd;
+}
+
+int hax_host_setup_vcpu_channel(struct hax_vcpu_state *vcpu)
+{
+    int ret;
+    struct hax_tunnel_info info;
+
+    ret = ioctl(vcpu->fd, HAX_VCPU_IOCTL_SETUP_TUNNEL, &info);
+    if (ret) {
+        fprintf(stderr, "Failed to setup the hax tunnel\n");
+        return ret;
+    }
+
+    if (!valid_hax_tunnel_size(info.size)) {
+        fprintf(stderr, "Invalid hax tunnel size %x\n", info.size);
+        ret = -EINVAL;
+        return ret;
+    }
+
+    vcpu->tunnel = (struct hax_tunnel *) (intptr_t) (info.va);
+    vcpu->iobuf = (unsigned char *) (intptr_t) (info.io_va);
+    return 0;
+}
+
+int hax_vcpu_run(struct hax_vcpu_state *vcpu)
+{
+    int ret;
+
+    ret = ioctl(vcpu->fd, HAX_VCPU_IOCTL_RUN, NULL);
+    return ret;
+}
+
+int hax_sync_fpu(CPUArchState * env, struct fx_layout *fl, int set)
+{
+    int ret, fd;
+
+    fd = hax_vcpu_get_fd(env);
+    if (fd <= 0)
+        return -1;
+
+    if (set)
+        ret = ioctl(fd, HAX_VCPU_IOCTL_SET_FPU, fl);
+    else
+        ret = ioctl(fd, HAX_VCPU_IOCTL_GET_FPU, fl);
+    return ret;
+}
+
+int hax_sync_msr(CPUArchState * env, struct hax_msr_data *msrs, int set)
+{
+    int ret, fd;
+
+    fd = hax_vcpu_get_fd(env);
+    if (fd <= 0)
+        return -1;
+    if (set)
+        ret = ioctl(fd, HAX_VCPU_IOCTL_SET_MSRS, msrs);
+    else
+        ret = ioctl(fd, HAX_VCPU_IOCTL_GET_MSRS, msrs);
+    return ret;
+}
+
+int hax_sync_vcpu_state(CPUArchState * env, struct vcpu_state_t *state, int set)
+{
+    int ret, fd;
+
+    fd = hax_vcpu_get_fd(env);
+    if (fd <= 0)
+        return -1;
+
+    if (set)
+        ret = ioctl(fd, HAX_VCPU_SET_REGS, state);
+    else
+        ret = ioctl(fd, HAX_VCPU_GET_REGS, state);
+    return ret;
+}
+
+int hax_inject_interrupt(CPUArchState * env, int vector)
+{
+    int ret, fd;
+
+    fd = hax_vcpu_get_fd(env);
+    if (fd <= 0)
+        return -1;
+
+    ret = ioctl(fd, HAX_VCPU_IOCTL_INTERRUPT, &vector);
+    return ret;
+}
diff --git a/target-i386/hax-darwin.h b/target-i386/hax-darwin.h
new file mode 100644
index 0000000..38361fc
--- /dev/null
+++ b/target-i386/hax-darwin.h
@@ -0,0 +1,63 @@
+/*
+ * QEMU HAXM support
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *  Xin Xiaohui<xiaohui.xin@intel.com>
+ *  Zhang Xiantao<xiantao.zhang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __HAX_UNIX_H
+#define __HAX_UNIX_H
+
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <stdarg.h>
+
+#define HAX_INVALID_FD  (-1)
+static inline int hax_invalid_fd(hax_fd fd)
+{
+    return fd <= 0;
+}
+
+static inline void hax_mod_close(struct hax_state *hax)
+{
+    close(hax->fd);
+}
+
+static inline void hax_close_fd(hax_fd fd)
+{
+    close(fd);
+}
+
+/* HAX model level ioctl */
+#define HAX_IOCTL_VERSION _IOWR(0, 0x20, struct hax_module_version)
+#define HAX_IOCTL_CREATE_VM _IOWR(0, 0x21, uint32_t)
+#define HAX_IOCTL_DESTROY_VM _IOW(0, 0x22, uint32_t)
+#define HAX_IOCTL_CAPABILITY _IOR(0, 0x23, struct hax_capabilityinfo)
+
+#define HAX_VM_IOCTL_VCPU_CREATE _IOWR(0, 0x80, uint32_t)
+#define HAX_VM_IOCTL_ALLOC_RAM _IOWR(0, 0x81, struct hax_alloc_ram_info)
+#define HAX_VM_IOCTL_SET_RAM _IOWR(0, 0x82, struct hax_set_ram_info)
+#define HAX_VM_IOCTL_VCPU_DESTROY _IOW(0, 0x83, uint32_t)
+#define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION _IOW(0, 0x84, struct hax_qemu_version)
+
+#define HAX_VCPU_IOCTL_RUN  _IO(0, 0xc0)
+#define HAX_VCPU_IOCTL_SET_MSRS _IOWR(0, 0xc1, struct hax_msr_data)
+#define HAX_VCPU_IOCTL_GET_MSRS _IOWR(0, 0xc2, struct hax_msr_data)
+
+#define HAX_VCPU_IOCTL_SET_FPU  _IOW(0, 0xc3, struct fx_layout)
+#define HAX_VCPU_IOCTL_GET_FPU  _IOR(0, 0xc4, struct fx_layout)
+
+#define HAX_VCPU_IOCTL_SETUP_TUNNEL _IOWR(0, 0xc5, struct hax_tunnel_info)
+#define HAX_VCPU_IOCTL_INTERRUPT _IOWR(0, 0xc6, uint32_t)
+#define HAX_VCPU_SET_REGS       _IOWR(0, 0xc7, struct vcpu_state_t)
+#define HAX_VCPU_GET_REGS       _IOWR(0, 0xc8, struct vcpu_state_t)
+
+#endif /* __HAX_UNIX_H */
diff --git a/target-i386/hax-i386.h b/target-i386/hax-i386.h
new file mode 100644
index 0000000..3dc1981
--- /dev/null
+++ b/target-i386/hax-i386.h
@@ -0,0 +1,93 @@
+/*
+ * QEMU HAXM support
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _HAX_I386_H
+#define _HAX_I386_H
+
+#include "sysemu/hax.h"
+#include "target-i386/hax-interface.h"
+#include "cpu.h"
+
+#ifdef CONFIG_DARWIN
+typedef int hax_fd;
+#endif
+
+#ifdef CONFIG_WIN32
+typedef HANDLE hax_fd;
+#endif
+
+extern struct hax_state hax_global;
+struct hax_vcpu_state {
+    hax_fd fd;
+    int vcpu_id;
+    int resync;
+    int emulation_state;
+    struct hax_tunnel *tunnel;
+    unsigned char *iobuf;
+};
+
+struct hax_state {
+    hax_fd fd; /* the global hax device interface */
+    uint32_t version;
+    struct hax_vm *vm;
+    uint64_t mem_quota;
+};
+
+#define HAX_MAX_VCPU 0x10
+#define MAX_VM_ID 0x40
+#define MAX_VCPU_ID 0x40
+
+struct hax_vm {
+    hax_fd fd;
+    int id;
+    struct hax_vcpu_state *vcpus[HAX_MAX_VCPU];
+};
+
+#ifdef NEED_CPU_H
+/* Functions exported to host specific mode */
+hax_fd hax_vcpu_get_fd(CPUArchState * env);
+int valid_hax_tunnel_size(uint16_t size);
+
+/* Host specific functions */
+int hax_mod_version(struct hax_state *hax, struct hax_module_version *version);
+int hax_inject_interrupt(CPUArchState * env, int vector);
+struct hax_vm *hax_vm_create(struct hax_state *hax);
+int hax_vcpu_run(struct hax_vcpu_state *vcpu);
+int hax_vcpu_create(int id);
+int hax_sync_vcpu_state(CPUArchState * env, struct vcpu_state_t *state, int set);
+int hax_sync_msr(CPUArchState * env, struct hax_msr_data *msrs, int set);
+int hax_sync_fpu(CPUArchState * env, struct fx_layout *fl, int set);
+#endif
+
+int hax_vm_destroy(struct hax_vm *vm);
+int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap);
+int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion);
+int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags);
+
+/* Common host function */
+int hax_host_create_vm(struct hax_state *hax, int *vm_id);
+hax_fd hax_host_open_vm(struct hax_state *hax, int vm_id);
+int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid);
+hax_fd hax_host_open_vcpu(int vmid, int vcpuid);
+int hax_host_setup_vcpu_channel(struct hax_vcpu_state *vcpu);
+hax_fd hax_mod_open(void);
+
+
+#ifdef CONFIG_DARWIN
+#include "target-i386/hax-darwin.h"
+#endif
+
+#ifdef CONFIG_WIN32
+#include "target-i386/hax-windows.h"
+#endif
+
+#endif
diff --git a/target-i386/hax-interface.h b/target-i386/hax-interface.h
new file mode 100644
index 0000000..2bc7f1a
--- /dev/null
+++ b/target-i386/hax-interface.h
@@ -0,0 +1,357 @@
+/*
+ * QEMU HAXM support
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *  Xin Xiaohui<xiaohui.xin@intel.com>
+ *  Zhang Xiantao<xiantao.zhang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/* Interface with HAX kernel module */
+
+#ifndef _HAX_INTERFACE_H
+#define _HAX_INTERFACE_H
+
+/* fx_layout has 3 formats table 3-56, 512bytes */
+struct fx_layout {
+    uint16_t fcw;
+    uint16_t fsw;
+    uint8_t ftw;
+    uint8_t res1;
+    uint16_t fop;
+    union {
+        struct {
+            uint32_t fip;
+            uint16_t fcs;
+            uint16_t res2;
+        };
+        uint64_t fpu_ip;
+    };
+    union {
+        struct {
+            uint32_t fdp;
+            uint16_t fds;
+            uint16_t res3;
+        };
+        uint64_t fpu_dp;
+    };
+    uint32_t mxcsr;
+    uint32_t mxcsr_mask;
+    uint8_t st_mm[8][16];
+    uint8_t mmx_1[8][16];
+    uint8_t mmx_2[8][16];
+    uint8_t pad[96];
+} __attribute__ ((aligned(8)));
+
+struct vmx_msr {
+    uint64_t entry;
+    uint64_t value;
+} __attribute__ ((__packed__));
+
+/*
+ * Fixed array is not good, but it makes Mac support a bit easier by avoiding
+ * memory map or copyin staff.
+ */
+#define HAX_MAX_MSR_ARRAY 0x20
+struct hax_msr_data {
+    uint16_t nr_msr;
+    uint16_t done;
+    uint16_t pad[2];
+    struct vmx_msr entries[HAX_MAX_MSR_ARRAY];
+} __attribute__ ((__packed__));
+
+union interruptibility_state_t {
+    uint32_t raw;
+    struct {
+        uint32_t sti_blocking:1;
+        uint32_t movss_blocking:1;
+        uint32_t smi_blocking:1;
+        uint32_t nmi_blocking:1;
+        uint32_t reserved:28;
+    };
+    uint64_t pad;
+};
+
+typedef union interruptibility_state_t interruptibility_state_t;
+
+/* Segment descriptor */
+struct segment_desc_t {
+    uint16_t selector;
+    uint16_t _dummy;
+    uint32_t limit;
+    uint64_t base;
+    union {
+        struct {
+            uint32_t type:4;
+            uint32_t desc:1;
+            uint32_t dpl:2;
+            uint32_t present:1;
+            uint32_t:4;
+            uint32_t available:1;
+            uint32_t long_mode:1;
+            uint32_t operand_size:1;
+            uint32_t granularity:1;
+            uint32_t null:1;
+            uint32_t:15;
+        };
+        uint32_t ar;
+    };
+    uint32_t ipad;
+};
+
+typedef struct segment_desc_t segment_desc_t;
+
+struct vcpu_state_t {
+    union {
+        uint64_t _regs[16];
+        struct {
+            union {
+                struct {
+                    uint8_t _al, _ah;
+                };
+                uint16_t _ax;
+                uint32_t _eax;
+                uint64_t _rax;
+            };
+            union {
+                struct {
+                    uint8_t _cl, _ch;
+                };
+                uint16_t _cx;
+                uint32_t _ecx;
+                uint64_t _rcx;
+            };
+            union {
+                struct {
+                    uint8_t _dl, _dh;
+                };
+                uint16_t _dx;
+                uint32_t _edx;
+                uint64_t _rdx;
+            };
+            union {
+                struct {
+                    uint8_t _bl, _bh;
+                };
+                uint16_t _bx;
+                uint32_t _ebx;
+                uint64_t _rbx;
+            };
+            union {
+                uint16_t _sp;
+                uint32_t _esp;
+                uint64_t _rsp;
+            };
+            union {
+                uint16_t _bp;
+                uint32_t _ebp;
+                uint64_t _rbp;
+            };
+            union {
+                uint16_t _si;
+                uint32_t _esi;
+                uint64_t _rsi;
+            };
+            union {
+                uint16_t _di;
+                uint32_t _edi;
+                uint64_t _rdi;
+            };
+
+            uint64_t _r8;
+            uint64_t _r9;
+            uint64_t _r10;
+            uint64_t _r11;
+            uint64_t _r12;
+            uint64_t _r13;
+            uint64_t _r14;
+            uint64_t _r15;
+        };
+    };
+
+    union {
+        uint32_t _eip;
+        uint64_t _rip;
+    };
+
+    union {
+        uint32_t _eflags;
+        uint64_t _rflags;
+    };
+
+    segment_desc_t _cs;
+    segment_desc_t _ss;
+    segment_desc_t _ds;
+    segment_desc_t _es;
+    segment_desc_t _fs;
+    segment_desc_t _gs;
+    segment_desc_t _ldt;
+    segment_desc_t _tr;
+
+    segment_desc_t _gdt;
+    segment_desc_t _idt;
+
+    uint64_t _cr0;
+    uint64_t _cr2;
+    uint64_t _cr3;
+    uint64_t _cr4;
+
+    uint64_t _dr0;
+    uint64_t _dr1;
+    uint64_t _dr2;
+    uint64_t _dr3;
+    uint64_t _dr6;
+    uint64_t _dr7;
+    uint64_t _pde;
+
+    uint32_t _efer;
+
+    uint32_t _sysenter_cs;
+    uint64_t _sysenter_eip;
+    uint64_t _sysenter_esp;
+
+    uint32_t _activity_state;
+    uint32_t pad;
+    interruptibility_state_t _interruptibility_state;
+};
+
+/* HAX exit status */
+enum exit_status {
+    /* IO port request */
+    HAX_EXIT_IO = 1,
+    /* MMIO instruction emulation */
+    HAX_EXIT_MMIO,
+    /* QEMU emulation mode request, currently means guest enter non-PG mode */
+    HAX_EXIT_REAL,
+    /*
+     * Interrupt window open, qemu can inject interrupt now
+     * Also used when signal pending since at that time qemu usually need
+     * check interrupt
+     */
+    HAX_EXIT_INTERRUPT,
+    /* Unknown vmexit, mostly trigger reboot */
+    HAX_EXIT_UNKNOWN_VMEXIT,
+    /* HALT from guest */
+    HAX_EXIT_HLT,
+    /* Reboot request, like because of tripple fault in guest */
+    HAX_EXIT_STATECHANGE,
+    /* the vcpu is now only paused when destroy, so simply return to hax */
+    HAX_EXIT_PAUSED,
+    HAX_EXIT_FAST_MMIO,
+};
+
+/*
+ * The interface definition:
+ * 1. vcpu_run execute will return 0 on success, otherwise mean failed
+ * 2. exit_status return the exit reason, as stated in enum exit_status
+ * 3. exit_reason is the vmx exit reason
+ */
+struct hax_tunnel {
+    uint32_t _exit_reason;
+    uint32_t _exit_flag;
+    uint32_t _exit_status;
+    uint32_t user_event_pending;
+    int ready_for_interrupt_injection;
+    int request_interrupt_window;
+    union {
+        struct {
+            /* 0: read, 1: write */
+#define HAX_EXIT_IO_IN  1
+#define HAX_EXIT_IO_OUT 0
+            uint8_t _direction;
+            uint8_t _df;
+            uint16_t _size;
+            uint16_t _port;
+            uint16_t _count;
+            uint8_t _flags;
+            uint8_t _pad0;
+            uint16_t _pad1;
+            uint32_t _pad2;
+            uint64_t _vaddr;
+        } pio;
+        struct {
+            uint64_t gla;
+        } mmio;
+        struct {
+        } state;
+    };
+} __attribute__ ((__packed__));
+
+struct hax_module_version {
+    uint32_t compat_version;
+    uint32_t cur_version;
+} __attribute__ ((__packed__));
+
+/* This interface is support only after API version 2 */
+struct hax_qemu_version {
+    /* Current API version in QEMU */
+    uint32_t cur_version;
+    /* The minimum API version supported by QEMU */
+    uint32_t min_version;
+} __attribute__ ((__packed__));
+
+/* The mac specfic interface to qemu, mostly is ioctl related */
+struct hax_tunnel_info {
+    uint64_t va;
+    uint64_t io_va;
+    uint16_t size;
+    uint16_t pad[3];
+} __attribute__ ((__packed__));
+
+struct hax_alloc_ram_info {
+    uint32_t size;
+    uint32_t pad;
+    uint64_t va;
+} __attribute__ ((__packed__));
+#define HAX_RAM_INFO_ROM 0x1
+struct hax_set_ram_info {
+    uint64_t pa_start;
+    uint32_t size;
+    uint8_t flags;
+    uint8_t pad[3];
+    uint64_t va;
+} __attribute__ ((__packed__));
+
+#define HAX_CAP_STATUS_WORKING     0x1
+#define HAX_CAP_STATUS_NOTWORKING  0x0
+#define HAX_CAP_WORKSTATUS_MASK    0x1
+
+#define HAX_CAP_FAILREASON_VT      0x1
+#define HAX_CAP_FAILREASON_NX      0x2
+
+#define HAX_CAP_MEMQUOTA           0x2
+#define HAX_CAP_UG                 0x4
+
+struct hax_capabilityinfo {
+    /* bit 0: 1 - working
+     *        0 - not working, possibly because NT/NX disabled
+     * bit 1: 1 - memory limitation working
+     *        0 - no memory limitation
+     */
+    uint16_t wstatus;
+    /* valid when not working
+     * bit 0: VT not enabeld
+     * bit 1: NX not enabled*/
+    uint16_t winfo;
+    uint32_t pad;
+    uint64_t mem_quota;
+} __attribute__ ((__packed__));
+
+struct hax_fastmmio {
+    uint64_t gpa;
+    uint64_t value;
+    uint8_t size;
+    uint8_t direction;
+    uint16_t reg_index;
+    uint32_t pad0;
+    uint64_t _cr0;
+    uint64_t _cr2;
+    uint64_t _cr3;
+    uint64_t _cr4;
+} __attribute__ ((__packed__));
+#endif
diff --git a/target-i386/hax-slot.c b/target-i386/hax-slot.c
new file mode 100644
index 0000000..b0b3ed9
--- /dev/null
+++ b/target-i386/hax-slot.c
@@ -0,0 +1,328 @@
+/*
+** HAX memory slot operations
+**
+** Copyright (c) 2015-16 Intel Corporation
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#include "target-i386/hax-slot.h"
+#include "target-i386/hax-i386.h"
+#include "qemu/queue.h"
+
+//#define DEBUG_HAX_SLOT
+
+#ifdef DEBUG_HAX_SLOT
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stdout, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+/**
+ * HAXSlot: describes a guest physical memory region and its mapping
+ *
+ * @start_pa: a guest physical address marking the start of the region; must be
+ *            page-aligned
+ * @end_pa: a guest physical address marking the end of the region; must be
+ *          page-aligned
+ * @hva_pa_delta: the host virtual address to which guest physical address 0 is
+ *                mapped; in other words, for any guest physical address within
+ *                the region (start_pa <= pa < end_pa), the corresponding host
+ *                virtual address is calculated by host_va = pa + hva_pa_delta
+ * @flags: parameters for the mapping; must be non-negative
+ * @entry: additional fields for linking #HAXSlot instances together
+ */
+typedef struct HAXSlot {
+    uint64_t start_pa;
+    uint64_t end_pa;
+    uint64_t hva_pa_delta;
+    int flags;
+    QTAILQ_ENTRY(HAXSlot) entry;
+} HAXSlot;
+
+/* A doubly-linked list (actually a tail queue) of all registered slots */
+static QTAILQ_HEAD(HAXSlotListHead, HAXSlot) slot_list =
+    QTAILQ_HEAD_INITIALIZER(slot_list);
+
+void hax_slot_init_registry(void)
+{
+    HAXSlot *initial_slot;
+
+    g_assert(QTAILQ_EMPTY(&slot_list));
+
+    initial_slot = (HAXSlot *) g_malloc0(sizeof(*initial_slot));
+    /* Implied: initial_slot->start_pa = 0; */
+    /* Ideally we want to set end_pa to 2^64, but that is too large for
+     * uint64_t. We don't need to support such a large guest physical address
+     * space anyway; (2^64 - TARGET_PAGE_SIZE) should be (more than) enough.
+     */
+    initial_slot->end_pa = TARGET_PAGE_MASK;
+    /* hva_pa_delta and flags are initialized with invalid values */
+    initial_slot->hva_pa_delta = ~TARGET_PAGE_MASK;
+    initial_slot->flags = -1;
+    QTAILQ_INSERT_TAIL(&slot_list, initial_slot, entry);
+}
+
+void hax_slot_free_registry(void)
+{
+    DPRINTF("%s: Deleting all registered slots\n", __func__);
+    while (!QTAILQ_EMPTY(&slot_list)) {
+        HAXSlot *slot = QTAILQ_FIRST(&slot_list);
+        QTAILQ_REMOVE(&slot_list, slot, entry);
+        g_free(slot);
+    }
+}
+
+/**
+ * hax_slot_dump: dumps a slot to stdout (for debugging)
+ *
+ * @slot: the slot to dump
+ */
+static void hax_slot_dump(HAXSlot *slot)
+{
+    DPRINTF("[ start_pa=0x%016" PRIx64 ", end_pa=0x%016" PRIx64
+            ", hva_pa_delta=0x%016" PRIx64 ", flags=%d ]\n", slot->start_pa,
+            slot->end_pa, slot->hva_pa_delta, slot->flags);
+}
+
+/**
+ * hax_slot_dump_list: dumps @slot_list to stdout (for debugging)
+ */
+static void hax_slot_dump_list(void)
+{
+#ifdef DEBUG_HAX_SLOT
+    HAXSlot *slot;
+    int i = 0;
+
+    DPRINTF("**** BEGIN HAX SLOT LIST DUMP ****\n");
+    QTAILQ_FOREACH(slot, &slot_list, entry) {
+        DPRINTF("Slot %d:\n\t", i++);
+        hax_slot_dump(slot);
+    }
+    DPRINTF("**** END HAX SLOT LIST DUMP ****\n");
+#endif
+}
+
+/**
+ * hax_slot_find: locates the slot containing a guest physical address
+ *
+ * Traverses @slot_list, starting from @start_slot, and returns the slot which
+ * contains @pa. There should be one and only one such slot, because:
+ *
+ * 1) @slot_list is initialized with a slot which covers all valid @pa values.
+ *    This coverage stays unchanged as new slots are inserted into @slot_list.
+ * 2) @slot_list does not contain overlapping slots.
+ *
+ * @start_slot: the first slot from which @slot_list is traversed and searched;
+ *              must not be %NULL
+ * @pa: the guest physical address to locate; must not be less than the lower
+ *      bound of @start_slot
+ */
+static HAXSlot * hax_slot_find(HAXSlot *start_slot, uint64_t pa)
+{
+    HAXSlot *slot;
+
+    g_assert(start_slot);
+    g_assert(start_slot->start_pa <= pa);
+
+    slot = start_slot;
+    do {
+        if (slot->end_pa > pa) {
+            return slot;
+        }
+        slot = QTAILQ_NEXT(slot, entry);
+    } while (slot);
+
+    /* Should never reach here */
+    g_assert_not_reached();
+    return NULL;
+}
+
+/**
+ * hax_slot_split: splits a slot into two
+ *
+ * Shrinks @slot and creates a new slot from the vacated region. Returns the
+ * new slot.
+ *
+ * @slot: the slot to be split/shrinked
+ * @pa: the splitting point; must be page-aligned and within @slot
+ */
+static HAXSlot * hax_slot_split(HAXSlot *slot, uint64_t pa)
+{
+    HAXSlot *new_slot;
+
+    g_assert(slot);
+    g_assert(pa > slot->start_pa && pa < slot->end_pa);
+    g_assert(!(pa & ~TARGET_PAGE_MASK));
+
+    new_slot = (HAXSlot *) g_malloc0(sizeof(*new_slot));
+    new_slot->start_pa = pa;
+    new_slot->end_pa = slot->end_pa;
+    new_slot->hva_pa_delta = slot->hva_pa_delta;
+    new_slot->flags = slot->flags;
+
+    slot->end_pa = pa;
+    QTAILQ_INSERT_AFTER(&slot_list, slot, new_slot, entry);
+    return new_slot;
+}
+
+/**
+ * hax_slot_can_merge: tests if two slots are compatible
+ *
+ * Two slots are considered compatible if they share the same memory mapping
+ * attributes. Compatible slots can be merged if they overlap or are adjacent.
+ *
+ * Returns %true if @slot1 and @slot2 are compatible.
+ *
+ * @slot1: one of the slots to be tested; must not be %NULL
+ * @slot2: the other slot to be tested; must not be %NULL
+ */
+static bool hax_slot_can_merge(HAXSlot *slot1, HAXSlot *slot2)
+{
+    g_assert(slot1 && slot2);
+
+    return slot1->hva_pa_delta == slot2->hva_pa_delta
+           && slot1->flags == slot2->flags;
+}
+
+/**
+ * hax_slot_insert: inserts a slot into @slot_list, with the potential side
+ *                  effect of creating/updating memory mappings
+ *
+ * Causes memory mapping attributes of @slot to override those of overlapping
+ * slots (including partial slots) in @slot_list. For any slot whose mapping
+ * attributes have changed, performs an ioctl to enforce the new mapping.
+ *
+ * Aborts QEMU on error.
+ *
+ * @slot: the slot to be inserted
+ */
+static void hax_slot_insert(HAXSlot *slot)
+{
+    HAXSlot *low_slot, *high_slot;
+    HAXSlot *low_slot_prev, *high_slot_next;
+    HAXSlot *old_slot, *old_slot_next;
+
+    g_assert(!QTAILQ_EMPTY(&slot_list));
+
+    low_slot = hax_slot_find(QTAILQ_FIRST(&slot_list), slot->start_pa);
+    g_assert(low_slot);
+    low_slot_prev = QTAILQ_PREV(low_slot, HAXSlotListHead, entry);
+
+    /* Adjust slot and/or low_slot such that their lower bounds (start_pa)
+     * align.
+     */
+    if (hax_slot_can_merge(low_slot, slot)) {
+        slot->start_pa = low_slot->start_pa;
+    } else if (slot->start_pa == low_slot->start_pa && low_slot_prev
+               && hax_slot_can_merge(low_slot_prev, slot)) {
+        low_slot = low_slot_prev;
+        slot->start_pa = low_slot->start_pa;
+    } else if (slot->start_pa != low_slot->start_pa) {
+        /* low_slot->start_pa < slot->start_pa < low_slot->end_pa */
+        low_slot = hax_slot_split(low_slot, slot->start_pa);
+        g_assert(low_slot);
+    }
+    /* Now we have slot->start_pa == low_slot->start_pa */
+
+    high_slot = hax_slot_find(low_slot, slot->end_pa - 1);
+    g_assert(high_slot);
+    high_slot_next = QTAILQ_NEXT(high_slot, entry);
+
+    /* Adjust slot and/or high_slot such that their upper bounds (end_pa)
+     * align.
+     */
+    if (hax_slot_can_merge(slot, high_slot)) {
+        slot->end_pa = high_slot->end_pa;
+    } else if (slot->end_pa == high_slot->end_pa && high_slot_next
+               && hax_slot_can_merge(slot, high_slot_next)) {
+        high_slot = high_slot_next;
+        slot->end_pa = high_slot->end_pa;
+    } else if (slot->end_pa != high_slot->end_pa) {
+        /* high_slot->start_pa < slot->end_pa < high_slot->end_pa */
+        high_slot_next = hax_slot_split(high_slot, slot->end_pa);
+        g_assert(high_slot_next);
+    }
+    /* Now we have slot->end_pa == high_slot->end_pa */
+
+    /* We are ready for substitution: replace all slots between low_slot and
+     * high_slot (inclusive) with slot. */
+
+    /* Step 1: insert slot into the list, before low_slot */
+    QTAILQ_INSERT_BEFORE(low_slot, slot, entry);
+
+    /* Step 2: remove low_slot..high_slot, one by one */
+    for (old_slot = low_slot;
+         /* This condition always evaluates to 1. See:
+          * https://en.wikipedia.org/wiki/Comma_operator
+          */
+         old_slot_next = QTAILQ_NEXT(old_slot, entry), 1;
+         old_slot = old_slot_next) {
+        g_assert(old_slot);
+
+        QTAILQ_REMOVE(&slot_list, old_slot, entry);
+        if (!hax_slot_can_merge(slot, old_slot)) {
+            /* Mapping for guest memory region [old_slot->start_pa,
+             * old_slot->end_pa) has changed - must do ioctl. */
+            /* TODO: Further reduce the number of ioctl calls by preprocessing
+             * the low_slot..high_slot sublist and combining any two adjacent
+             * slots that are both incompatible with slot.
+             */
+            uint32_t size = old_slot->end_pa - old_slot->start_pa;
+            uint64_t host_va = old_slot->start_pa + slot->hva_pa_delta;
+            int err;
+
+            DPRINTF("%s: Doing ioctl (size=0x%08" PRIx32 ")\n", __func__, size);
+            /* Use the new host_va and flags */
+            err = hax_set_ram(old_slot->start_pa, size, host_va, slot->flags);
+            if (err) {
+                fprintf(stderr, "%s: Failed to set memory mapping (err=%d)\n",
+                        __func__, err);
+                abort();
+            }
+        }
+        g_free(old_slot);
+
+        /* Exit the infinite loop following the removal of high_slot */
+        if (old_slot == high_slot) {
+            break;
+        }
+    }
+}
+
+void hax_slot_register(uint64_t start_pa, uint32_t size, uint64_t host_va,
+                       int flags)
+{
+    uint64_t end_pa = start_pa + size;
+    HAXSlot *slot;
+
+    g_assert(!(start_pa & ~TARGET_PAGE_MASK));
+    g_assert(!(end_pa & ~TARGET_PAGE_MASK));
+    g_assert(start_pa < end_pa);
+    g_assert(host_va);
+    g_assert(flags >= 0);
+
+    slot = g_malloc0(sizeof(*slot));
+    slot->start_pa = start_pa;
+    slot->end_pa = end_pa;
+    slot->hva_pa_delta = host_va - start_pa;
+    slot->flags = flags;
+
+    DPRINTF("%s: Inserting slot:\n\t", __func__);
+    hax_slot_dump(slot);
+    hax_slot_dump_list();
+
+    hax_slot_insert(slot);
+
+    DPRINTF("%s: Done\n", __func__);
+    hax_slot_dump_list();
+}
diff --git a/target-i386/hax-slot.h b/target-i386/hax-slot.h
new file mode 100644
index 0000000..d991c53
--- /dev/null
+++ b/target-i386/hax-slot.h
@@ -0,0 +1,58 @@
+/*
+** HAX memory slot operations
+**
+** Copyright (c) 2015-16 Intel Corporation
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef _HAX_SLOT_H
+#define _HAX_SLOT_H
+
+#include <inttypes.h>
+
+/**
+ * hax_slot_init_registry: initializes the registry of memory slots.
+ *
+ * Should be called during HAX initialization, before any call to
+ * hax_slot_register().
+ */
+void hax_slot_init_registry(void);
+
+/**
+ * hax_slot_free_registry: destroys the registry of memory slots.
+ *
+ * Should be called during HAX cleanup to free up resources used by the
+ * registry of memory slots.
+ */
+void hax_slot_free_registry(void);
+
+/**
+ * hax_slot_register: registers a memory slot, updating HAX memory mappings if
+ * necessary.
+ *
+ * Must be called after hax_slot_init_registry(). Can be called multiple times
+ * to create new memory mappings or update existing ones. This function is smart
+ * enough to avoid asking the HAXM driver to do the same mapping twice for any
+ * guest physical page.
+ *
+ * Aborts QEMU on error.
+ *
+ * @start_pa: a guest physical address marking the start of the slot to
+ *            register; must be page-aligned
+ * @size: size of the slot to register; must be page-aligned and positive
+ * @host_va: a host virtual address to which @start_pa should be mapped
+ * @flags: parameters for the mapping, passed verbatim to the HAXM driver if
+ *         necessary; must be non-negative
+ */
+void hax_slot_register(uint64_t start_pa, uint32_t size, uint64_t host_va,
+                       int flags);
+
+#endif
diff --git a/target-i386/hax-windows.c b/target-i386/hax-windows.c
new file mode 100644
index 0000000..f2d72a3
--- /dev/null
+++ b/target-i386/hax-windows.c
@@ -0,0 +1,475 @@
+/*
+ * QEMU HAXM support
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "hax-i386.h"
+
+/* #define DEBUG_HAX */
+
+#ifdef DEBUG_HAX
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stdout, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+/*
+ * return 0 when success, -1 when driver not loaded,
+ * other negative value for other failure
+ */
+static int hax_open_device(hax_fd * fd)
+{
+    uint32_t errNum = 0;
+    HANDLE hDevice;
+
+    if (!fd)
+        return -2;
+
+    hDevice = CreateFile("\\\\.\\HAX",
+                         GENERIC_READ | GENERIC_WRITE,
+                         0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+
+    if (hDevice == INVALID_HANDLE_VALUE) {
+        fprintf(stderr, "Failed to open the HAX device!\n");
+        errNum = GetLastError();
+        if (errNum == ERROR_FILE_NOT_FOUND)
+            return -1;
+        return -2;
+    }
+    *fd = hDevice;
+    DPRINTF("HAX device fd:%d\n", *fd);
+    return 0;
+}
+
+int hax_get_max_ram(uint64_t *max_ram) {
+    DWORD dSize = 0;
+    struct hax_capabilityinfo cap;
+    hax_fd fd = hax_mod_open();
+    if (fd == NULL) {
+        return -1;
+    }
+    int result = DeviceIoControl(fd, HAX_IOCTL_CAPABILITY, NULL, 0, &cap,
+                          sizeof(cap), &dSize, (LPOVERLAPPED) NULL);
+    CloseHandle(fd);
+
+    if (!result) {
+        return -2;
+    }
+    *max_ram = cap.mem_quota;
+
+    return 0;
+}
+
+/* hax_fd hax_mod_open */
+ hax_fd hax_mod_open(void)
+{
+    int ret;
+    hax_fd fd = NULL;
+
+    ret = hax_open_device(&fd);
+    if (ret != 0)
+        fprintf(stderr, "Open HAX device failed\n");
+
+    return fd;
+}
+
+int hax_populate_ram(uint64_t va, uint32_t size)
+{
+    int ret;
+    struct hax_alloc_ram_info info;
+    HANDLE hDeviceVM;
+    DWORD dSize = 0;
+
+    if (!hax_global.vm || !hax_global.vm->fd) {
+        fprintf(stderr, "Allocate memory before vm create?\n");
+        return -EINVAL;
+    }
+
+    info.size = size;
+    info.va = va;
+
+    hDeviceVM = hax_global.vm->fd;
+
+    ret = DeviceIoControl(hDeviceVM,
+                          HAX_VM_IOCTL_ALLOC_RAM,
+                          &info, sizeof(info), NULL, 0, &dSize,
+                          (LPOVERLAPPED) NULL);
+
+    if (!ret) {
+        fprintf(stderr, "Failed to allocate %x memory\n", size);
+        return ret;
+    }
+
+    return 0;
+}
+
+int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags)
+{
+    struct hax_set_ram_info info;
+    HANDLE hDeviceVM = hax_global.vm->fd;
+    DWORD dSize = 0;
+    int ret;
+
+    info.pa_start = start_pa;
+    info.size = size;
+    info.va = host_va;
+    info.flags = (uint8_t) flags;
+
+    ret = DeviceIoControl(hDeviceVM, HAX_VM_IOCTL_SET_RAM,
+                          &info, sizeof(info), NULL, 0, &dSize,
+                          (LPOVERLAPPED) NULL);
+
+    if (!ret)
+        return -EFAULT;
+    else
+        return 0;
+}
+
+int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap)
+{
+    int ret;
+    HANDLE hDevice = hax->fd;        /* handle to hax module */
+    DWORD dSize = 0;
+    DWORD err = 0;
+
+    if (hax_invalid_fd(hDevice)) {
+        fprintf(stderr, "Invalid fd for hax device!\n");
+        return -ENODEV;
+    }
+
+    ret = DeviceIoControl(hDevice, HAX_IOCTL_CAPABILITY, NULL, 0, cap,
+                          sizeof(*cap), &dSize, (LPOVERLAPPED) NULL);
+
+    if (!ret) {
+        err = GetLastError();
+        if (err == ERROR_INSUFFICIENT_BUFFER || err == ERROR_MORE_DATA)
+            fprintf(stderr, "hax capability is too long to hold.\n");
+        fprintf(stderr, "Failed to get Hax capability:%lu\n", err);
+        return -EFAULT;
+    } else
+        return 0;
+
+}
+
+int hax_mod_version(struct hax_state *hax, struct hax_module_version *version)
+{
+    int ret;
+    HANDLE hDevice = hax->fd; /* handle to hax module */
+    DWORD dSize = 0;
+    DWORD err = 0;
+
+    if (hax_invalid_fd(hDevice)) {
+        fprintf(stderr, "Invalid fd for hax device!\n");
+        return -ENODEV;
+    }
+
+    ret = DeviceIoControl(hDevice,
+                          HAX_IOCTL_VERSION,
+                          NULL, 0,
+                          version, sizeof(*version), &dSize,
+                          (LPOVERLAPPED) NULL);
+
+    if (!ret) {
+        err = GetLastError();
+        if (err == ERROR_INSUFFICIENT_BUFFER || err == ERROR_MORE_DATA)
+            fprintf(stderr, "hax module verion is too long to hold.\n");
+        fprintf(stderr, "Failed to get Hax module version:%lu\n", err);
+        return -EFAULT;
+    } else
+        return 0;
+}
+
+static char *hax_vm_devfs_string(int vm_id)
+{
+    char *name;
+
+    if (vm_id > MAX_VM_ID) {
+        fprintf(stderr, "Too big VM id\n");
+        return NULL;
+    }
+
+#define HAX_VM_DEVFS "\\\\.\\hax_vmxx"
+    name = g_strdup(HAX_VM_DEVFS);
+    if (!name)
+        return NULL;
+
+    snprintf(name, sizeof HAX_VM_DEVFS, "\\\\.\\hax_vm%02d", vm_id);
+    return name;
+}
+
+static char *hax_vcpu_devfs_string(int vm_id, int vcpu_id)
+{
+    char *name;
+
+    if (vm_id > MAX_VM_ID || vcpu_id > MAX_VCPU_ID) {
+        fprintf(stderr, "Too big vm id %x or vcpu id %x\n", vm_id, vcpu_id);
+        return NULL;
+    }
+
+#define HAX_VCPU_DEVFS "\\\\.\\hax_vmxx_vcpuxx"
+    name = g_strdup(HAX_VCPU_DEVFS);
+    if (!name)
+        return NULL;
+
+    snprintf(name, sizeof HAX_VCPU_DEVFS, "\\\\.\\hax_vm%02d_vcpu%02d",
+             vm_id, vcpu_id);
+    return name;
+}
+
+int hax_host_create_vm(struct hax_state *hax, int *vmid)
+{
+    int ret;
+    int vm_id = 0;
+    DWORD dSize = 0;
+
+    if (hax_invalid_fd(hax->fd))
+        return -EINVAL;
+
+    if (hax->vm)
+        return 0;
+
+    ret = DeviceIoControl(hax->fd,
+                          HAX_IOCTL_CREATE_VM,
+                          NULL, 0, &vm_id, sizeof(vm_id), &dSize,
+                          (LPOVERLAPPED) NULL);
+    if (!ret) {
+        fprintf(stderr, "Failed to create VM. Error code: %lu\n",
+                GetLastError());
+        return -1;
+    }
+    *vmid = vm_id;
+    return 0;
+}
+
+hax_fd hax_host_open_vm(struct hax_state * hax, int vm_id)
+{
+    char *vm_name = NULL;
+    hax_fd hDeviceVM;
+
+    vm_name = hax_vm_devfs_string(vm_id);
+    if (!vm_name) {
+        fprintf(stderr, "Failed to open VM. VM name is null\n");
+        return INVALID_HANDLE_VALUE;
+    }
+
+    hDeviceVM = CreateFile(vm_name,
+                           GENERIC_READ | GENERIC_WRITE,
+                           0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+    if (hDeviceVM == INVALID_HANDLE_VALUE)
+        fprintf(stderr, "Open the vm device error:%s, ec:%lu\n",
+                vm_name, GetLastError());
+
+    g_free(vm_name);
+    return hDeviceVM;
+}
+
+int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion)
+{
+    int ret;
+    DWORD dSize = 0;
+    if (hax_invalid_fd(vm_fd))
+        return -EINVAL;
+    ret = DeviceIoControl(vm_fd,
+                          HAX_VM_IOCTL_NOTIFY_QEMU_VERSION,
+                          qversion, sizeof(struct hax_qemu_version),
+                          NULL, 0, &dSize, (LPOVERLAPPED) NULL);
+    if (!ret) {
+        fprintf(stderr, "Failed to notify qemu API version\n");
+        return -1;
+    }
+    return 0;
+}
+
+int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid)
+{
+    int ret;
+    DWORD dSize = 0;
+
+    ret = DeviceIoControl(vm_fd,
+                          HAX_VM_IOCTL_VCPU_CREATE,
+                          &vcpuid, sizeof(vcpuid), NULL, 0, &dSize, (LPOVERLAPPED) NULL);
+    if (!ret) {
+        fprintf(stderr, "Failed to create vcpu %x\n", vcpuid);
+        return -1;
+    }
+
+    return 0;
+}
+
+hax_fd hax_host_open_vcpu(int vmid, int vcpuid)
+{
+    char *devfs_path = NULL;
+    hax_fd hDeviceVCPU;
+
+    devfs_path = hax_vcpu_devfs_string(vmid, vcpuid);
+    if (!devfs_path) {
+        fprintf(stderr, "Failed to get the devfs\n");
+        return INVALID_HANDLE_VALUE;
+    }
+
+    hDeviceVCPU = CreateFile(devfs_path,
+                             GENERIC_READ | GENERIC_WRITE,
+                             0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL,
+                             NULL);
+
+    if (hDeviceVCPU == INVALID_HANDLE_VALUE)
+        fprintf(stderr, "Failed to open the vcpu devfs\n");
+    g_free(devfs_path);
+    return hDeviceVCPU;
+}
+
+int hax_host_setup_vcpu_channel(struct hax_vcpu_state *vcpu)
+{
+    hax_fd hDeviceVCPU = vcpu->fd;
+    int ret;
+    struct hax_tunnel_info info;
+    DWORD dSize = 0;
+
+    ret = DeviceIoControl(hDeviceVCPU,
+                          HAX_VCPU_IOCTL_SETUP_TUNNEL,
+                          NULL, 0, &info, sizeof(info), &dSize,
+                          (LPOVERLAPPED) NULL);
+    if (!ret) {
+        fprintf(stderr, "Failed to setup the hax tunnel\n");
+        return -1;
+    }
+
+    if (!valid_hax_tunnel_size(info.size)) {
+        fprintf(stderr, "Invalid hax tunnel size %x\n", info.size);
+        ret = -EINVAL;
+        return ret;
+    }
+    vcpu->tunnel = (struct hax_tunnel *) (intptr_t) (info.va);
+    vcpu->iobuf = (unsigned char *) (intptr_t) (info.io_va);
+    return 0;
+}
+
+int hax_vcpu_run(struct hax_vcpu_state *vcpu)
+{
+    int ret;
+    HANDLE hDeviceVCPU = vcpu->fd;
+    DWORD dSize = 0;
+
+    ret = DeviceIoControl(hDeviceVCPU,
+                          HAX_VCPU_IOCTL_RUN,
+                          NULL, 0, NULL, 0, &dSize, (LPOVERLAPPED) NULL);
+    if (!ret)
+        return -EFAULT;
+    else
+        return 0;
+}
+
+int hax_sync_fpu(CPUArchState * env, struct fx_layout *fl, int set)
+{
+    int ret;
+    hax_fd fd;
+    HANDLE hDeviceVCPU;
+    DWORD dSize = 0;
+
+    fd = hax_vcpu_get_fd(env);
+    if (hax_invalid_fd(fd))
+        return -1;
+
+    hDeviceVCPU = fd;
+
+    if (set)
+        ret = DeviceIoControl(hDeviceVCPU,
+                              HAX_VCPU_IOCTL_SET_FPU,
+                              fl, sizeof(*fl), NULL, 0, &dSize, (LPOVERLAPPED) NULL);
+    else
+        ret = DeviceIoControl(hDeviceVCPU,
+                              HAX_VCPU_IOCTL_GET_FPU,
+                              NULL, 0, fl, sizeof(*fl), &dSize, (LPOVERLAPPED) NULL);
+    if (!ret)
+        return -EFAULT;
+    else
+        return 0;
+}
+
+int hax_sync_msr(CPUArchState * env, struct hax_msr_data *msrs, int set)
+{
+    int ret;
+    hax_fd fd;
+    HANDLE hDeviceVCPU;
+    DWORD dSize = 0;
+
+    fd = hax_vcpu_get_fd(env);
+    if (hax_invalid_fd(fd))
+        return -1;
+    hDeviceVCPU = fd;
+
+    if (set)
+        ret = DeviceIoControl(hDeviceVCPU,
+                              HAX_VCPU_IOCTL_SET_MSRS,
+                              msrs, sizeof(*msrs),
+                              msrs, sizeof(*msrs), &dSize, (LPOVERLAPPED) NULL);
+    else
+        ret = DeviceIoControl(hDeviceVCPU,
+                              HAX_VCPU_IOCTL_GET_MSRS,
+                              msrs, sizeof(*msrs),
+                              msrs, sizeof(*msrs), &dSize, (LPOVERLAPPED) NULL);
+    if (!ret)
+        return -EFAULT;
+    else
+        return 0;
+}
+
+int hax_sync_vcpu_state(CPUArchState * env, struct vcpu_state_t *state, int set)
+{
+    int ret;
+    hax_fd fd;
+    HANDLE hDeviceVCPU;
+    DWORD dSize;
+
+    fd = hax_vcpu_get_fd(env);
+    if (hax_invalid_fd(fd))
+        return -1;
+
+    hDeviceVCPU = fd;
+
+    if (set)
+        ret = DeviceIoControl(hDeviceVCPU,
+                              HAX_VCPU_SET_REGS,
+                              state, sizeof(*state),
+                              NULL, 0, &dSize, (LPOVERLAPPED) NULL);
+    else
+        ret = DeviceIoControl(hDeviceVCPU,
+                              HAX_VCPU_GET_REGS,
+                              NULL, 0,
+                              state, sizeof(*state), &dSize, (LPOVERLAPPED) NULL);
+    if (!ret)
+        return -EFAULT;
+    else
+        return 0;
+}
+
+int hax_inject_interrupt(CPUArchState * env, int vector)
+{
+    int ret;
+    hax_fd fd;
+    HANDLE hDeviceVCPU;
+    DWORD dSize;
+
+    fd = hax_vcpu_get_fd(env);
+    if (hax_invalid_fd(fd))
+        return -1;
+
+    hDeviceVCPU = fd;
+
+    ret = DeviceIoControl(hDeviceVCPU,
+                          HAX_VCPU_IOCTL_INTERRUPT,
+                          &vector, sizeof(vector), NULL, 0, &dSize, (LPOVERLAPPED) NULL);
+    if (!ret)
+        return -EFAULT;
+    else
+        return 0;
+}
diff --git a/target-i386/hax-windows.h b/target-i386/hax-windows.h
new file mode 100644
index 0000000..3c7533f
--- /dev/null
+++ b/target-i386/hax-windows.h
@@ -0,0 +1,89 @@
+/*
+ * QEMU HAXM support
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *  Xin Xiaohui<xiaohui.xin@intel.com>
+ *  Zhang Xiantao<xiantao.zhang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __HAX_WINDOWS_H
+#define __HAX_WINDOWS_H
+
+#include <windows.h>
+#include <memory.h>
+#include <malloc.h>
+#include <winioctl.h>
+#include <string.h>
+#include <stdio.h>
+#include <windef.h>
+
+#define HAX_INVALID_FD INVALID_HANDLE_VALUE
+
+static inline void hax_mod_close(struct hax_state *hax)
+{
+    CloseHandle(hax->fd);
+}
+
+static inline void hax_close_fd(hax_fd fd)
+{
+    CloseHandle(fd);
+}
+
+static inline int hax_invalid_fd(hax_fd fd)
+{
+    return (fd == INVALID_HANDLE_VALUE);
+}
+
+#define HAX_DEVICE_TYPE 0x4000
+
+#define HAX_IOCTL_VERSION       CTL_CODE(HAX_DEVICE_TYPE, 0x900, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_IOCTL_CREATE_VM     CTL_CODE(HAX_DEVICE_TYPE, 0x901, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_IOCTL_CAPABILITY    CTL_CODE(HAX_DEVICE_TYPE, 0x910, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+
+#define HAX_VM_IOCTL_VCPU_CREATE   CTL_CODE(HAX_DEVICE_TYPE, 0x902, \
+                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VM_IOCTL_ALLOC_RAM     CTL_CODE(HAX_DEVICE_TYPE, 0x903, \
+                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VM_IOCTL_SET_RAM       CTL_CODE(HAX_DEVICE_TYPE, 0x904, \
+                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VM_IOCTL_VCPU_DESTROY  CTL_CODE(HAX_DEVICE_TYPE, 0x905, \
+                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
+
+#define HAX_VCPU_IOCTL_RUN      CTL_CODE(HAX_DEVICE_TYPE, 0x906, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_IOCTL_SET_MSRS CTL_CODE(HAX_DEVICE_TYPE, 0x907, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_IOCTL_GET_MSRS CTL_CODE(HAX_DEVICE_TYPE, 0x908, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_IOCTL_SET_FPU  CTL_CODE(HAX_DEVICE_TYPE, 0x909, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_IOCTL_GET_FPU  CTL_CODE(HAX_DEVICE_TYPE, 0x90a, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+
+#define HAX_VCPU_IOCTL_SETUP_TUNNEL  CTL_CODE(HAX_DEVICE_TYPE, 0x90b, \
+                                              METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_IOCTL_INTERRUPT     CTL_CODE(HAX_DEVICE_TYPE, 0x90c, \
+                                              METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_SET_REGS            CTL_CODE(HAX_DEVICE_TYPE, 0x90d, \
+                                              METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_GET_REGS            CTL_CODE(HAX_DEVICE_TYPE, 0x90e, \
+                                              METHOD_BUFFERED, FILE_ANY_ACCESS)
+
+#define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION CTL_CODE(HAX_DEVICE_TYPE, 0x910, \
+                                                  METHOD_BUFFERED,        \
+                                                  FILE_ANY_ACCESS)
+#endif
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 6cbdf17..8b08393 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -25,6 +25,10 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "exec/log.h"
+#ifdef CONFIG_HAX
+#include "target-i386/hax-i386.h"
+#include "sysemu/hax.h"
+#endif
 
 //#define DEBUG_PCALL
 
@@ -1334,6 +1338,10 @@
             !(env->hflags & HF_SMM_MASK)) {
             cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0);
             cs->interrupt_request &= ~CPU_INTERRUPT_SMI;
+#ifdef CONFIG_HAX
+            if (hax_enabled())
+                cs->hax_vcpu->resync = 1;
+#endif
             do_smm_enter(cpu);
             ret = true;
         } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
diff --git a/target-i386/translate.c b/target-i386/translate.c
index fa2ac48..121527c 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -31,6 +31,9 @@
 #include "trace-tcg.h"
 #include "exec/log.h"
 
+#ifdef CONFIG_HAX
+#include "sysemu/hax.h"
+#endif
 
 #define PREFIX_REPZ   0x01
 #define PREFIX_REPNZ  0x02
@@ -8303,6 +8306,19 @@
         }
 
         pc_ptr = disas_insn(env, dc, pc_ptr);
+        
+#ifdef CONFIG_HAX
+        if (hax_enabled() && hax_stop_translate(cs)) {
+            /* When the host CPU doesn't support VMX "unrestricted guest" mode,
+             * TCG is used to execute MMIO instructions. This code path is used
+             * to limit the translation to a single machine instruction, in order
+             * to try to return to HAX execution as soon as possible. */
+            gen_jmp_im(pc_ptr - dc->cs_base);
+            gen_eob(dc);
+            break;
+        }
+#endif /* CONFIG_HAX */
+
         /* stop translation if indicated */
         if (dc->is_jmp)
             break;
diff --git a/vl.c b/vl.c
index 5f056a3..eb9370e 100755
--- a/vl.c
+++ b/vl.c
@@ -94,6 +94,7 @@
 #include "migration/migration.h"
 #include "sysemu/cpus.h"
 #include "sysemu/kvm.h"
+#include "sysemu/hax.h"
 #include "qapi/qmp/qjson.h"
 #include "qemu/option.h"
 #include "qemu/config-file.h"
@@ -2023,8 +2024,20 @@
 #ifdef CONFIG_PROFILER
     int64_t ti;
 #endif
+
+#ifdef CONFIG_HAX
+    if (hax_sync_vcpus() < 0) {
+        fprintf(stderr, "Internal error: hax sync failed\n");
+        return;
+    }
+#endif
+
     do {
+#ifdef CONFIG_HAX
+        nonblocking = !kvm_enabled() && !xen_enabled() && !hax_enabled() && last_io > 0;
+#else
         nonblocking = !kvm_enabled() && !xen_enabled() && last_io > 0;
+#endif
 #ifdef CONFIG_PROFILER
         ti = profile_getclock();
 #endif
@@ -2994,7 +3007,9 @@
         error_report("ram size too large");
         return false;
     }
-
+#ifdef CONFIG_HAX
+    hax_pre_init(ram_size);
+#endif
     /* store value for the future use */
     qemu_opt_set_number(opts, "size", ram_size, &error_abort);
     *maxram_size = ram_size;
@@ -3844,6 +3859,13 @@
                 olist = qemu_find_opts("machine");
                 qemu_opts_parse_noisily(olist, "accel=kvm", false);
                 break;
+#ifdef CONFIG_HAX
+            case QEMU_OPTION_enable_hax:
+                olist = qemu_find_opts("machine");
+                qemu_opts_parse_noisily(olist, "accel=hax", false);
+                hax_disable(0);
+                break;
+#endif /* CONFIG_HAX */
             case QEMU_OPTION_M:
             case QEMU_OPTION_machine:
                 olist = qemu_find_opts("machine");
@@ -4692,6 +4714,18 @@
         error_report("could not acquire pid file: %s", strerror(errno));
         return 1;
     }
+#ifdef CONFIG_HAX
+    uint64_t hax_max_ram = 0;
+    if (hax_get_max_ram(&hax_max_ram) == 0 && hax_max_ram > 0) {
+        if (ram_size > hax_max_ram) {
+            const int requested_meg = ram_size / (1024 * 1024);
+            const int actual_meg = hax_max_ram / (1024 * 1024);
+            fprintf(stderr, "Warning: requested ram_size %dM too big, reduced to %dM\n",
+                    requested_meg, actual_meg);
+            ram_size = hax_max_ram;
+        }
+    }
+#endif /* CONFIG_HAX */
 
     if (qemu_opts_foreach(qemu_find_opts("device"),
                           device_help_func, NULL, NULL)) {
@@ -4786,10 +4820,17 @@
 
     cpu_ticks_init();
     if (icount_opts) {
+#ifdef CONFIG_HAX
+        if (kvm_enabled() || xen_enabled() || hax_enabled()) {
+            error_report("-icount is not allowed with kvm, hax or xen");
+            return 1;
+        }
+#else
         if (kvm_enabled() || xen_enabled()) {
             error_report("-icount is not allowed with kvm or xen");
             return 1;
         }
+#endif
         configure_icount(icount_opts, &error_abort);
         qemu_opts_del(icount_opts);
     }
@@ -5029,6 +5070,15 @@
 
     numa_post_machine_init();
 
+#ifdef CONFIG_HAX
+    if (hax_enabled()) {
+        if (hax_sync_vcpus() < 0) {
+            fprintf(stderr, "Internal error: Initial hax sync failed\n");
+            return 1;
+        }
+    }
+#endif  /* CONFIG_HAX */
+
     if (qemu_opts_foreach(qemu_find_opts("fw_cfg"),
                           parse_fw_cfg, fw_cfg_find(), NULL) != 0) {
         return 1;