Add HAX emulation support. This patch adds support for full virtualization of x86 and x86_64 target platforms on Windows and OS X through Intel's HAXM. To enable this when using qemu-upstream-i386 or qemu-upstream-x86_64, use -enable-hax on the command-line. TESTS=Works fine on Windows, OS X still needs testing.
diff --git a/Makefile.target b/Makefile.target index a440bcb..9bd4c2e 100644 --- a/Makefile.target +++ b/Makefile.target
@@ -149,6 +149,17 @@ obj-$(call lnot,$(CONFIG_XEN)) += xen-common-stub.o obj-$(call lnot,$(CONFIG_XEN_I386)) += xen-hvm-stub.o +# HAX support, only when targetting i386 or x86_64 +ifeq (y,$(CONFIG_HAX)) +ifneq (,$(filter i386 x86_64,$(TARGET_NAME)) +obj-y += target-i386/hax-all.o target-i386/hax-slot.o +obj-$(CONFIG_WIN32) += target-i386/hax-windows.o +obj-$(CONFIG_DARWIN) += target-i386/hax-darwin.o +else +obj-y += hax-stub.o +endif +endif # CONFIG_HAX + # Hardware support ifeq ($(TARGET_NAME), sparc64) obj-y += hw/sparc64/
diff --git a/android-qemu2-glue/build/Makefile.qemu2-target.mk b/android-qemu2-glue/build/Makefile.qemu2-target.mk index 2edc3aa..2c45668 100644 --- a/android-qemu2-glue/build/Makefile.qemu2-target.mk +++ b/android-qemu2-glue/build/Makefile.qemu2-target.mk
@@ -124,6 +124,25 @@ stubs/vhost.c \ ) \ +# HAX support. +HAX_COMMON_SOURCES := \ + target-i386/hax-all.c \ + target-i386/hax-slot.c \ + +LOCAL_SRC_FILES += \ + $(call qemu2-if-target,x86 x86_64, \ + $(call qemu2-if-windows, \ + $(HAX_COMMON_SOURCES) \ + target-i386/hax-windows.c) \ + $(call qemu2-if-darwin, \ + $(HAX_COMMON_SOURCES) \ + target-i386/hax-darwin.c) \ + $(call qemu2-if-linux, \ + hax-stub.c) \ + , \ + hax-stub.c \ + ) \ + LOCAL_PREBUILTS_OBJ_FILES += \ $(call qemu2-if-windows,$(QEMU2_AUTO_GENERATED_DIR)/version.o)
diff --git a/arch_init.c b/arch_init.c index 3ea51ab..1d09f32 100644 --- a/arch_init.c +++ b/arch_init.c
@@ -285,6 +285,15 @@ #endif } +int hax_available(void) +{ +#ifdef CONFIG_HAX + return 1; +#else + return 0; +#endif +} + int xen_available(void) { #ifdef CONFIG_XEN @@ -294,7 +303,6 @@ #endif } - TargetInfo *qmp_query_target(Error **errp) { TargetInfo *info = g_malloc0(sizeof(*info));
diff --git a/configure b/configure index e21943f..890c14c 100755 --- a/configure +++ b/configure
@@ -230,6 +230,7 @@ vhost_net="no" vhost_scsi="no" kvm="no" +hax="no" rdma="" gprof="no" debug_tcg="no" @@ -606,6 +607,7 @@ Darwin) bsd="yes" darwin="yes" + hax="yes" LDFLAGS_SHARED="-bundle -undefined dynamic_lookup" if [ "$cpu" = "x86_64" ] ; then QEMU_CFLAGS="-arch x86_64 $QEMU_CFLAGS" @@ -918,6 +920,10 @@ ;; --enable-kvm) kvm="yes" ;; + --disable-hax) hax="no" + ;; + --enable-hax) hax="yes" + ;; --disable-tcg-interpreter) tcg_interpreter="no" ;; --enable-tcg-interpreter) tcg_interpreter="yes" @@ -1351,6 +1357,7 @@ fdt fdt device tree bluez bluez stack connectivity kvm KVM acceleration support + hax HAX acceleration support rdma RDMA-based migration support uuid uuid support vde support for vde network @@ -4864,6 +4871,7 @@ echo "ATTR/XATTR support $attr" echo "Install blobs $blobs" echo "KVM support $kvm" +echo "HAX support $hax" echo "RDMA support $rdma" echo "TCG interpreter $tcg_interpreter" echo "fdt support $fdt" @@ -5811,6 +5819,15 @@ fi fi esac +if test "$hax" = "yes" ; then + if test "$target_softmmu" = "yes" ; then + case "$target_name" in + i386|x86_64) + echo "CONFIG_HAX=y" >> $config_target_mak + ;; + esac + fi +fi if test "$target_bigendian" = "yes" ; then echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak fi
diff --git a/cpu-exec.c b/cpu-exec.c index 5d9710a..b211cd9 100644 --- a/cpu-exec.c +++ b/cpu-exec.c
@@ -23,6 +23,9 @@ #include "exec/exec-all.h" #include "tcg.h" #include "qemu/atomic.h" +#ifdef CONFIG_HAX +#include "sysemu/hax.h" +#endif /* CONFIG_HAX */ #include "sysemu/qtest.h" #include "qemu/timer.h" #include "exec/address-spaces.h" @@ -447,11 +450,27 @@ return false; } +static inline int cpu_get_interrupt_request(CPUState *cpu) +{ +#ifdef CONFIG_HAX + /* When HAX is enabled, there are two cases where TCG emulation might happen: + * MMIO instructions, or non-paged mode. When this is due to an MMIO, the interrupt + * should not be emulated because only one instruction will be translated and run + * through TCG before returning to the HAX kernel. + */ + if (hax_enabled() && !hax_vcpu_emulation_mode(cpu)) { + /* Mask interrupt during MMIO emulation. */ + return 0; + } +#endif + return cpu->interrupt_request; +} + static inline void cpu_handle_interrupt(CPUState *cpu, TranslationBlock **last_tb) { CPUClass *cc = CPU_GET_CLASS(cpu); - int interrupt_request = cpu->interrupt_request; + int interrupt_request = cpu_get_interrupt_request(cpu); if (unlikely(interrupt_request)) { if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) { @@ -618,11 +637,35 @@ break; } +#ifdef CONFIG_HAX + /* When HAX is enabled but VMX "unrestricted guest" mode is not + * supported, call hax_vcpu_exec() to run the current instructions. + * The function returns 1 when execution should stop immediately + * (e.g. if the vCPU is halted, or received an interrupt). However, + * it will return 0 to indicate that the next instructions need to + * be handled through TCG. This happens when the virtual CPU runs + * in "real mode", or to handle MMIO operations only. */ + if (hax_enabled() && !hax_vcpu_exec(cpu)) { + break; + } +#endif /* CONFIG_HAX */ + cpu->tb_flushed = false; /* reset before first TB lookup */ for(;;) { cpu_handle_interrupt(cpu, &last_tb); tb = tb_find_fast(cpu, &last_tb, tb_exit); cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc); +#ifdef CONFIG_HAX + if (hax_enabled() && hax_stop_emulation(cpu)) { + /* This will end TCG emulation of instructions if the vCPU + * just switched to paged-mode (which can be handled by + * hax_vcpu_exec() in the next call to this function), or + * if the single-instruction MMIO operation has completed. + * (see target-i386/translate.c). + */ + cpu_loop_exit(cpu); + } +#endif /* CONFIG_HAX */ /* Try to align the host and virtual clocks if the guest is in advance */ align_clocks(&sc, cpu);
diff --git a/cpus.c b/cpus.c index 84c3520..a2be482 100644 --- a/cpus.c +++ b/cpus.c
@@ -34,6 +34,7 @@ #include "exec/gdbstub.h" #include "sysemu/dma.h" #include "sysemu/kvm.h" +#include "sysemu/hax.h" #include "qmp-commands.h" #include "exec/exec-all.h" @@ -711,6 +712,11 @@ CPU_FOREACH(cpu) { cpu_synchronize_state(cpu); +#ifdef CONFIG_HAX + if (hax_enabled() && hax_ug_platform()) { + hax_cpu_synchronize_state(cpu); + } +#endif } } @@ -720,6 +726,10 @@ CPU_FOREACH(cpu) { cpu_synchronize_post_reset(cpu); +#ifdef CONFIG_HAX + if (hax_enabled() && hax_ug_platform()) + hax_cpu_synchronize_post_reset(cpu); +#endif } } @@ -729,6 +739,10 @@ CPU_FOREACH(cpu) { cpu_synchronize_post_init(cpu); +#ifdef CONFIG_HAX + if (hax_enabled() && hax_ug_platform()) + hax_cpu_synchronize_post_init(cpu); +#endif } } @@ -1038,6 +1052,16 @@ } } +#ifdef CONFIG_HAX +static void qemu_hax_wait_io_event(CPUState *cpu) +{ + while (cpu_thread_is_idle(cpu)) { + qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); + } + qemu_wait_io_event_common(cpu); +} +#endif /* CONFIG_HAX */ + static void qemu_kvm_wait_io_event(CPUState *cpu) { while (cpu_thread_is_idle(cpu)) { @@ -1096,6 +1120,7 @@ fprintf(stderr, "qtest is not supported under Windows\n"); exit(1); #else + CPUState *cpu = arg; sigset_t waitset; int r; @@ -1195,6 +1220,51 @@ return NULL; } +#ifdef CONFIG_HAX +/* The HAX-specific vCPU thread function. This one should only run when the host + * CPU supports the VMX "unrestricted guest" feature. */ +static void *qemu_hax_cpu_thread_fn(void *arg) +{ + CPUState *cpu = arg; + int r; + + assert(hax_enabled() && hax_ug_platform()); + + rcu_register_thread(); + + qemu_mutex_lock(&qemu_global_mutex); + qemu_thread_get_self(cpu->thread); + + cpu->thread_id = qemu_get_thread_id(); + cpu->can_do_io = 1; +// cpu->created = true; +// cpu->halted = 0; + current_cpu = cpu; + + hax_init_vcpu(cpu); + + /* signal CPU creation */ + cpu->created = true; + qemu_cond_signal(&qemu_cpu_cond); + + do { + if (cpu_can_run(cpu)) { + r = hax_smp_cpu_exec(cpu); + if (r == EXCP_DEBUG) { + cpu_handle_guest_debug(cpu); + } + } + qemu_hax_wait_io_event(cpu); + } while (!cpu->unplug || cpu_can_run(cpu)); + + hax_vcpu_destroy(cpu); + cpu->created = false; + qemu_cond_signal(&qemu_cpu_cond); + qemu_mutex_unlock_iothread(); + return NULL; +} +#endif /* CONFIG_HAX */ + static void qemu_cpu_kick_thread(CPUState *cpu) { #ifndef _WIN32 @@ -1209,9 +1279,49 @@ fprintf(stderr, "qemu:%s: %s", __func__, strerror(err)); exit(1); } +#ifdef __APPLE__ + // On OS X, the signal isn't caught reliably during shutdown. + if (!atomic_mb_read(&exit_request)) { + cpu_exit(cpu); + atomic_mb_set(&exit_request, 1); + } +#endif /* __APPLE__ */ +#ifdef CONFIG_HAX + if (hax_enabled() && hax_ug_platform()) { + cpu_exit(cpu); + } +#endif /* CONFIG_HAX */ #else /* _WIN32 */ - abort(); -#endif + if (cpu->thread_kicked) { + return; + } + cpu->thread_kicked = true; + if (!qemu_cpu_is_self(cpu)) { + CONTEXT tcgContext; + + if (SuspendThread(cpu->hThread) == (DWORD)-1) { + fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__, + GetLastError()); + exit(1); + } + + /* On multi-core systems, we are not sure that the thread is actually + * suspended until we can get the context. */ + tcgContext.ContextFlags = CONTEXT_CONTROL; + while (GetThreadContext(cpu->hThread, &tcgContext) != 0) { + continue; + } + + cpu_exit(cpu); + atomic_mb_set(&exit_request, 1); + + if (ResumeThread(cpu->hThread) == (DWORD)-1) { + fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__, + GetLastError()); + exit(1); + } + } +#endif /* _WIN32 */ } static void qemu_cpu_kick_no_halt(void) @@ -1230,7 +1340,21 @@ void qemu_cpu_kick(CPUState *cpu) { qemu_cond_broadcast(cpu->halt_cond); + /* There are three cases to consider here: + * + * - TCG is being used without HAX, then qemu_cpu_kick_no_halt() can be + * called directly. + * + * - TCG is being used with HAX, then kicking the thread with a signal (on Posix) + * or with a thread suspend/resume (on Win32) is still needed. + * + * - TCG is not being used, kick the thread with a signal or suspend/resume. + */ +#ifdef CONFIG_HAX + if (tcg_enabled() && !(hax_enabled() && hax_ug_platform())) { +#else if (tcg_enabled()) { +#endif qemu_cpu_kick_no_halt(); } else { qemu_cpu_kick_thread(cpu); @@ -1262,11 +1386,52 @@ void qemu_mutex_lock_iothread(void) { + /* Technical note on what's going on here, because it's really subtle :-) + * + * The single TCG vCPU thread always holds the global mutex when executing + * instructions, and only releases it very briefly in qemu_tcg_wait_io_event(), + * which gets called periodically to process interrupts. + * + * Under heavy guest CPU load, it will be hard for other threads to acquire + * the lock due to this. To counter that, several things are implemented here: + * + * - First, |iothread_requesting_mutex| is used as a global atomic counter that + * will be > 0 whenever other threads are trying to acquire the lock. It is + * actually read by qemu_tcg_wait_io_event() to force the vCPU thread to + * release the lock until its value reaches 0 again. The |qemu_io_proceeded_cond| + * condition variable is used to do that. + * + * - Second, if TCG is enabled, a trylock() is first tried to acquire the lock. + * If this fail, the TCG vCPU thread is kicked(), which forces generated code + * to exit to qemu_tcg_wait_io_event() as soon as possible. + * + * NOTE: It looks like the use of |iothread_requesting_mutex| isn't needed at all + * when KVM or HAX execution modes are being used, because the corresponding + * vCPU threads actually _release_ the lock just before entering guest mode + * (and re-acquire it just after exiting from it). + */ atomic_inc(&iothread_requesting_mutex); - /* In the simple case there is no need to bump the VCPU thread out of - * TCG code execution. + + /* A simple lock is sufficient in the following cases: + * + * - TCG is not enabled (KVM execution mode). + * [This is the !tcg_enabled() check] + * + * - TCG is enabled, but this called from the TCG vCPU thread directly. + * [This is the qemu_in_vcpu_thread() check] + * + * - TCG is enabled, but so is HAX in "unrestricted guest" mode, which allows it + * to execute all guest code directly (i.e. there is no TCG vCPU thread). + * [This is the (hax_enabled() && hax_ug_platform()) check]. + * + * - TCG is enabled, but its thread has not started yet (e.g. when this + * function is called during virtual device realization). + * [This is (!first_cpu || !first_cpu->created)]. */ if (!tcg_enabled() || qemu_in_vcpu_thread() || +#ifdef CONFIG_HAX + (hax_enabled() && hax_ug_platform()) || +#endif !first_cpu || !first_cpu->created) { qemu_mutex_lock(&qemu_global_mutex); atomic_dec(&iothread_requesting_mutex); @@ -1370,6 +1535,17 @@ static QemuCond *tcg_halt_cond; static QemuThread *tcg_cpu_thread; +#ifdef CONFIG_HAX + if (hax_enabled()) { + /* This code path should only be taken when HAX is enabled but the + * CPU doesn't support "unrestricted guest" mode. */ + assert(!hax_ug_platform()); + /* Initialize HAX-related state for the TCG thread. This is required for + * cpu_exec() to work correctly when HAX is enabled. */ + hax_init_vcpu(cpu); + } +#endif /* CONFIG_HAX */ + /* share a single thread for all cpus with TCG */ if (!tcg_cpu_thread) { cpu->thread = g_malloc0(sizeof(QemuThread)); @@ -1393,6 +1569,35 @@ } } +#ifdef CONFIG_HAX +static void qemu_hax_start_vcpu(CPUState *cpu) +{ + char thread_name[VCPU_THREAD_NAME_SIZE]; + + /* This function shall only be called when HAX is enabled, and the host CPU + * supports "unrestricted guest" mode. This allows emulation of "real mode" + * and completely avoids the use of TCG. It's only the only way to get + * multi-core accelerated emulation with HAX. */ + assert(hax_enabled()); + assert(hax_ug_platform()); + + cpu->thread = g_malloc0(sizeof(QemuThread)); + cpu->halt_cond = g_malloc0(sizeof(QemuCond)); + qemu_cond_init(cpu->halt_cond); + + snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX", + cpu->cpu_index); + qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn, + cpu, QEMU_THREAD_JOINABLE); +#ifdef _WIN32 + cpu->hThread = qemu_thread_get_handle(cpu->thread); +#endif + while (!cpu->created) { + qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); + } +} +#endif /* CONFIG_HAX */ + static void qemu_kvm_start_vcpu(CPUState *cpu) { char thread_name[VCPU_THREAD_NAME_SIZE]; @@ -1443,6 +1648,10 @@ if (kvm_enabled()) { qemu_kvm_start_vcpu(cpu); +#ifdef CONFIG_HAX + } else if (hax_enabled() && hax_ug_platform()) { + qemu_hax_start_vcpu(cpu); +#endif } else if (tcg_enabled()) { qemu_tcg_init_vcpu(cpu); } else {
diff --git a/exec.c b/exec.c index 8ffde75..8858d8b 100644 --- a/exec.c +++ b/exec.c
@@ -31,6 +31,9 @@ #include "hw/xen/xen.h" #endif #include "sysemu/kvm.h" +#ifdef CONFIG_HAX +#include "sysemu/hax.h" +#endif /* CONFIG_HAX */ #include "sysemu/sysemu.h" #include "qemu/timer.h" #include "qemu/config-file.h" @@ -1574,6 +1577,25 @@ qemu_mutex_unlock_ramlist(); return; } +#ifdef CONFIG_HAX + /* + * In Hax, the qemu allocate the virtual address, and HAX kernel + * populate the memory with physical memory. Currently we have no + * paging, so user should make sure enough free memory in advance + */ + if (hax_enabled()) { + int ret; + ret = hax_populate_ram((uint64_t)(uintptr_t)new_block->host, + new_block->max_length); + if (ret < 0) { + error_setg_errno(errp, errno, + "Hax failed to populate RAM for: '%s'", + memory_region_name(new_block->mr)); + qemu_mutex_unlock_ramlist(); + return; + } + } +#endif memory_try_enable_merging(new_block->host, new_block->max_length); } }
diff --git a/hax-stub.c b/hax-stub.c new file mode 100644 index 0000000..2e2b048 --- /dev/null +++ b/hax-stub.c
@@ -0,0 +1,42 @@ +/* + * QEMU HAXM support + * + * Copyright (c) 2015, Intel Corporation + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * See the COPYING file in the top-level directory. + * + */ + +#include "sysemu/hax.h" + +int hax_sync_vcpus(void) +{ + return 0; +} + +void hax_disable(int disable) +{ + return; +} + +int hax_pre_init(uint64_t ram_size) +{ + return 0; +} + +int hax_enabled(void) +{ + return 0; +} + +int hax_ug_platform(void) +{ + return 0; +} + +int hax_get_max_ram(uint64_t *max_ram) { + return 0; +}
diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 0458934..fc37220 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c
@@ -46,6 +46,9 @@ #include "sysemu/sysemu.h" #include "sysemu/numa.h" #include "sysemu/kvm.h" +#ifdef CONFIG_HAX +#include "sysemu/hax.h" +#endif #include "sysemu/qtest.h" #include "kvm_i386.h" #include "hw/xen/xen.h" @@ -2089,6 +2092,10 @@ smm_available = true; } else if (kvm_enabled()) { smm_available = kvm_has_smm(); +#ifdef CONFIG_HAX + } else if (hax_enabled()) { + smm_available = false; +#endif } if (smm_available) {
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 14ac43c..cc77d80 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c
@@ -306,11 +306,15 @@ info = APIC_COMMON_GET_CLASS(s); info->realize(dev, errp); + /* NOTE: Why this needs to be disabled for HAX exactly? */ +#ifndef CONFIG_HAX /* Note: We need at least 1M to map the VAPIC option ROM */ if (!vapic && s->vapic_control & VAPIC_ENABLE_MASK && ram_size >= 1024 * 1024) { vapic = sysbus_create_simple("kvmvapic", -1, NULL); } +#endif /* CONFIG_HAX */ + s->vapic = vapic; if (apic_report_tpr_access && info->enable_tpr_reporting) { info->enable_tpr_reporting(s, true);
diff --git a/include/qom/cpu.h b/include/qom/cpu.h index b5238fb..7c1199c 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h
@@ -229,6 +229,10 @@ struct KVMState; struct kvm_run; +#ifdef CONFIG_HAX +struct hax_vcpu_state; +#endif + #define TB_JMP_CACHE_BITS 12 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) @@ -374,6 +378,11 @@ (absolute value) offset as small as possible. This reduces code size, especially for hosts without large memory offsets. */ uint32_t tcg_exit_req; + +#ifdef CONFIG_HAX + bool hax_vcpu_dirty; + struct hax_vcpu_state *hax_vcpu; +#endif }; QTAILQ_HEAD(CPUTailQ, CPUState);
diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h index c5bbea8..0aba314 100644 --- a/include/sysemu/arch_init.h +++ b/include/sysemu/arch_init.h
@@ -35,6 +35,7 @@ bool audio_init(void); int kvm_available(void); int xen_available(void); +int hax_available(void); CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp);
diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h new file mode 100644 index 0000000..d0ad6d9 --- /dev/null +++ b/include/sysemu/hax.h
@@ -0,0 +1,73 @@ +/* + * QEMU HAXM support + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * Copyright (c) 2011 Intel Corporation + * Written by: + * Jiang Yunhong<yunhong.jiang@intel.com> + * Xin Xiaohui<xiaohui.xin@intel.com> + * Zhang Xiantao<xiantao.zhang@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* header to be included in non-HAX-specific code */ +#ifndef _HAX_H +#define _HAX_H + +#include "config-host.h" +#include "qemu/osdep.h" +#include "qemu-common.h" + +/* Returns 1 if HAX is available and enabled, 0 otherwise. */ +int hax_enabled(void); + +/* Disable HAX if |disable| is 1, otherwise, enable it iff it is supported by the host CPU. + * Use hax_enabled() after this to get the result. */ +void hax_disable(int disable); + +/* Returns non-0 if the host CPU supports the VMX "unrestricted guest" feature which + * allows the virtual CPU to directly run in "real mode". If true, this allows QEMU to run + * several vCPU threads in parallel (see cpus.c). Otherwise, only a a single TCG thread + * can run, and it will call HAX to run the current instructions, except in case of + * "real mode" (paging disabled, typically at boot time), or MMIO operations. */ +int hax_ug_platform(void); + +int hax_pre_init(uint64_t ram_size); + +int hax_sync_vcpus(void); + +/* get the max haxm ram even before haxm library is initialized */ +int hax_get_max_ram(uint64_t *max_ram); + +#ifdef CONFIG_HAX + +#include "hw/hw.h" +#include "qemu/bitops.h" +#include "exec/memory.h" + +int hax_init_vcpu(CPUState *cpu); +int hax_vcpu_exec(CPUState *cpu); +int hax_smp_cpu_exec(CPUState *cpu); +void hax_cpu_synchronize_state(CPUState *cpu); +void hax_cpu_synchronize_post_reset(CPUState *cpu); +void hax_cpu_synchronize_post_init(CPUState *cpu); +int hax_populate_ram(uint64_t va, uint32_t size); +int hax_vcpu_emulation_mode(CPUState *cpu); +int hax_stop_emulation(CPUState *cpu); +int hax_stop_translate(CPUState *cpu); +int hax_vcpu_destroy(CPUState *cpu); +void hax_raise_event(CPUState *cpu); +void hax_reset_vcpu_state(void *opaque); +// #include "target-i386/hax-interface.h" +// #include "target-i386/hax-i386.h" + +#endif + +#endif /* _HAX_H */
diff --git a/qemu-options.def b/qemu-options.def index 9546406..a117697 100644 --- a/qemu-options.def +++ b/qemu-options.def
@@ -673,6 +673,9 @@ DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, \ "-enable-kvm enable KVM full virtualization support\n", QEMU_ARCH_ALL) +DEF("enable-hax", 0, QEMU_OPTION_enable_hax, \ +"-enable-hax enable HAX virtualization support\n", QEMU_ARCH_I386) + DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid, "-xen-domid id specify xen guest domain id\n", QEMU_ARCH_ALL) DEF("xen-create", 0, QEMU_OPTION_xen_create,
diff --git a/qemu-options.hx b/qemu-options.hx index 741814f..9360754 100644 --- a/qemu-options.hx +++ b/qemu-options.hx
@@ -3260,6 +3260,16 @@ if KVM support is enabled when compiling. ETEXI +DEF("enable-hax", 0, QEMU_OPTION_enable_hax, \ + "-enable-hax enable HAX virtualization support\n", QEMU_ARCH_I386) +STEXI +@item -enable-hax +@findex -enable-hax +Enable HAX (Hardware-based Acceleration eXecution) support, used for full +virtualization support on OS X and Windows. This option +is only available if HAX support is enabled when compiling. +ETEXI + DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid, "-xen-domid id specify xen guest domain id\n", QEMU_ARCH_ALL) DEF("xen-create", 0, QEMU_OPTION_xen_create,
diff --git a/target-i386/Makefile.objs b/target-i386/Makefile.objs index b223d79..2820b49 100644 --- a/target-i386/Makefile.objs +++ b/target-i386/Makefile.objs
@@ -5,3 +5,8 @@ obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o monitor.o obj-$(CONFIG_KVM) += kvm.o hyperv.o obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o +ifeq (y,$(CONFIG_HAX)) +obj-y += hax-all.o hax-slot.o +obj-$(CONFIG_WIN32) += hax-windows.o +obj-$(CONFIG_DARWIN) += hax-darwin.o +endif
diff --git a/target-i386/hax-all.c b/target-i386/hax-all.c new file mode 100644 index 0000000..197e817 --- /dev/null +++ b/target-i386/hax-all.c
@@ -0,0 +1,1470 @@ +/* + * QEMU HAX support + * + * Copyright IBM, Corp. 2008 + * Red Hat, Inc. 2008 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Glauber Costa <gcosta@redhat.com> + * + * Copyright (c) 2011 Intel Corporation + * Written by: + * Jiang Yunhong<yunhong.jiang@intel.com> + * Xin Xiaohui<xiaohui.xin@intel.com> + * Zhang Xiantao<xiantao.zhang@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* + * HAX common code for both windows and darwin + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" + +#include "hax-i386.h" +#include "hax-slot.h" + +#include "exec/address-spaces.h" +#include "exec/exec-all.h" +#include "exec/ioport.h" +#include "qemu/main-loop.h" +#include "strings.h" +#include "sysemu/accel.h" + +#ifdef _WIN32 +#include "sysemu/os-win32.h" +#endif + +static const char kHaxVcpuSyncFailed[] = "Failed to sync HAX vcpu context"; + +#define derror(msg) do { fprintf(stderr, (msg)); } while (0) + +/* #define DEBUG_HAX */ + +#ifdef DEBUG_HAX +#define DPRINTF(fmt, ...) \ + do { fprintf(stdout, fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +/* Current version */ +const uint32_t hax_cur_version = 0x3; /* ver 2.0: support fast mmio */ +/* Minimum HAX kernel version */ +const uint32_t hax_min_version = 0x3; + +#define TYPE_HAX_ACCEL ACCEL_CLASS_NAME("hax") + +#define HAX_EMUL_ONE 0x1 +#define HAX_EMUL_REAL 0x2 +#define HAX_EMUL_HLT 0x4 +#define HAX_EMUL_EXITLOOP 0x5 + +#define HAX_EMULATE_STATE_MMIO 0x1 +#define HAX_EMULATE_STATE_REAL 0x2 +#define HAX_EMULATE_STATE_NONE 0x3 +#define HAX_EMULATE_STATE_INITIAL 0x4 + +#define HAX_NON_UG_PLATFORM 0x0 +#define HAX_UG_PLATFORM 0x1 + +bool hax_allowed; + +static void hax_vcpu_sync_state(CPUArchState * env, int modified); +static int hax_arch_get_registers(CPUArchState * env); +static int hax_handle_io(CPUArchState * env, uint32_t df, uint16_t port, + int direction, int size, int count, void *buffer); +static int hax_handle_fastmmio(CPUArchState * env, struct hax_fastmmio *hft); + +struct hax_state hax_global; +int ret_hax_init = 0; +static int hax_disabled = 1; + +int hax_support = -1; +int ug_support = 0; + +/* Called after hax_init */ +int hax_enabled(void) +{ + return (!hax_disabled && hax_support); +} + +void hax_disable(int disable) +{ + hax_disabled = disable; +} + +/* Called after hax_init */ +int hax_ug_platform(void) +{ + return ug_support; +} + +/* Currently non-PG modes are emulated by QEMU */ +int hax_vcpu_emulation_mode(CPUState * cpu) +{ + CPUArchState *env = (CPUArchState *) (cpu->env_ptr); + return !(env->cr[0] & CR0_PG_MASK); +} + +static int hax_prepare_emulation(CPUArchState * env) +{ + /* Flush all emulation states */ + tlb_flush(ENV_GET_CPU(env), 1); + tb_flush(ENV_GET_CPU(env)); + /* Sync the vcpu state from hax kernel module */ + hax_vcpu_sync_state(env, 0); + return 0; +} + +/* + * Check whether to break the translation block loop + * break tbloop after one MMIO emulation, or after finish emulation mode + */ +static int hax_stop_tbloop(CPUArchState * env) +{ + CPUState *cpu = ENV_GET_CPU(env); + switch (cpu->hax_vcpu->emulation_state) { + case HAX_EMULATE_STATE_MMIO: + if (cpu->hax_vcpu->resync) { + hax_prepare_emulation(env); + cpu->hax_vcpu->resync = 0; + return 0; + } + return 1; + break; + case HAX_EMULATE_STATE_INITIAL: + case HAX_EMULATE_STATE_REAL: + if (!hax_vcpu_emulation_mode(cpu)) + return 1; + break; + default: + fprintf(stderr, "Invalid emulation state in hax_sto_tbloop state %x\n", + cpu->hax_vcpu->emulation_state); + break; + } + + return 0; +} + +int hax_stop_emulation(CPUState * cpu) +{ + CPUArchState *env = (CPUArchState *) (cpu->env_ptr); + + if (hax_stop_tbloop(env)) { + cpu->hax_vcpu->emulation_state = HAX_EMULATE_STATE_NONE; + /* + * QEMU emulation changes vcpu state, + * Sync the vcpu state to HAX kernel module + */ + hax_vcpu_sync_state(env, 1); + return 1; + } + + return 0; +} + +int hax_stop_translate(CPUState * cpu) +{ + struct hax_vcpu_state *vstate = cpu->hax_vcpu; + + assert(vstate->emulation_state); + if (vstate->emulation_state == HAX_EMULATE_STATE_MMIO) + return 1; + + return 0; +} + +int valid_hax_tunnel_size(uint16_t size) +{ + return size >= sizeof(struct hax_tunnel); +} + +hax_fd hax_vcpu_get_fd(CPUArchState * env) +{ + struct hax_vcpu_state *vcpu = ENV_GET_CPU(env)->hax_vcpu; + if (!vcpu) + return HAX_INVALID_FD; + return vcpu->fd; +} + +static int hax_get_capability(struct hax_state *hax) +{ + int ret; + struct hax_capabilityinfo capinfo, *cap = &capinfo; + + ret = hax_capability(hax, cap); + if (ret) + return ret; + + if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) { + if (cap->winfo & HAX_CAP_FAILREASON_VT) + DPRINTF + ("VTX feature is not enabled, HAX driver will not work.\n"); + else if (cap->winfo & HAX_CAP_FAILREASON_NX) + DPRINTF + ("NX feature is not enabled, HAX driver will not work.\n"); + return -ENXIO; + } + + if ((cap->winfo & HAX_CAP_UG)) + ug_support = 1; + + if (cap->wstatus & HAX_CAP_MEMQUOTA) { + if (cap->mem_quota < hax->mem_quota) { + fprintf(stderr, "The memory needed by this VM exceeds the driver limit.\n"); + return -ENOSPC; + } + } + return 0; +} + +static int hax_version_support(struct hax_state *hax) +{ + int ret; + struct hax_module_version version; + + ret = hax_mod_version(hax, &version); + if (ret < 0) + return 0; + + if ((hax_min_version > version.cur_version) || + (hax_cur_version < version.compat_version)) + return 0; + + return 1; +} + +int hax_vcpu_create(int id) +{ + struct hax_vcpu_state *vcpu = NULL; + int ret; + + if (!hax_global.vm) { + fprintf(stderr, "vcpu %x created failed, vm is null\n", id); + return -1; + } + + if (hax_global.vm->vcpus[id]) { + fprintf(stderr, "vcpu %x allocated already\n", id); + return 0; + } + + vcpu = g_malloc(sizeof(struct hax_vcpu_state)); + if (!vcpu) { + fprintf(stderr, "Failed to alloc vcpu state\n"); + return -ENOMEM; + } + + memset(vcpu, 0, sizeof(struct hax_vcpu_state)); + + ret = hax_host_create_vcpu(hax_global.vm->fd, id); + if (ret) { + fprintf(stderr, "Failed to create vcpu %x\n", id); + goto error; + } + + vcpu->vcpu_id = id; + vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id); + if (hax_invalid_fd(vcpu->fd)) { + fprintf(stderr, "Failed to open the vcpu\n"); + ret = -ENODEV; + goto error; + } + + hax_global.vm->vcpus[id] = vcpu; + + ret = hax_host_setup_vcpu_channel(vcpu); + if (ret) { + fprintf(stderr, "Invalid hax tunnel size \n"); + ret = -EINVAL; + goto error; + } + return 0; + + error: + /* vcpu and tunnel will be closed automatically */ + if (vcpu && !hax_invalid_fd(vcpu->fd)) + hax_close_fd(vcpu->fd); + + hax_global.vm->vcpus[id] = NULL; + g_free(vcpu); + return -1; +} + +int hax_vcpu_destroy(CPUState * cpu) +{ + struct hax_vcpu_state *vcpu = cpu->hax_vcpu; + + if (!hax_global.vm) { + fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id); + return -1; + } + + if (!vcpu) + return 0; + + /* + * 1. The hax_tunnel is also destroied when vcpu destroy + * 2. close fd will cause hax module vcpu be cleaned + */ + hax_close_fd(vcpu->fd); + hax_global.vm->vcpus[vcpu->vcpu_id] = NULL; + g_free(vcpu); + return 0; +} + +int hax_init_vcpu(CPUState * cpu) +{ + int ret; + + ret = hax_vcpu_create(cpu->cpu_index); + if (ret < 0) { + fprintf(stderr, "Failed to create HAX vcpu\n"); + exit(-1); + } + + cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index]; + cpu->hax_vcpu->emulation_state = HAX_EMULATE_STATE_INITIAL; + cpu->hax_vcpu_dirty = true; + qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr)); + + return ret; +} + +struct hax_vm *hax_vm_create(struct hax_state *hax) +{ + struct hax_vm *vm; + int vm_id = 0, ret; + + if (hax_invalid_fd(hax->fd)) + return NULL; + + if (hax->vm) + return hax->vm; + + vm = g_malloc(sizeof(struct hax_vm)); + if (!vm) + return NULL; + memset(vm, 0, sizeof(struct hax_vm)); + ret = hax_host_create_vm(hax, &vm_id); + if (ret) { + fprintf(stderr, "Failed to create vm %x\n", ret); + goto error; + } + vm->id = vm_id; + vm->fd = hax_host_open_vm(hax, vm_id); + if (hax_invalid_fd(vm->fd)) { + fprintf(stderr, "Failed to open vm %d\n", vm_id); + goto error; + } + + hax->vm = vm; + hax_slot_init_registry(); + return vm; + + error: + g_free(vm); + hax->vm = NULL; + return NULL; +} + +int hax_vm_destroy(struct hax_vm *vm) +{ + int i; + + hax_slot_free_registry(); + for (i = 0; i < HAX_MAX_VCPU; i++) + if (vm->vcpus[i]) { + fprintf(stderr, "VCPU should be cleaned before vm clean\n"); + return -1; + } + hax_close_fd(vm->fd); + g_free(vm); + hax_global.vm = NULL; + return 0; +} + +static void hax_set_phys_mem(MemoryRegionSection *section) +{ + MemoryRegion *mr = section->mr; + hwaddr start_pa = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + unsigned int delta; + void *host_ptr; + int flags; + + /* We only care about RAM and ROM */ + if (!memory_region_is_ram(mr)) { + return; + } + + /* Adjust start_pa and size so that they are page-aligned. (Cf + * kvm_set_phys_mem() in kvm-all.c). + */ + delta = TARGET_PAGE_SIZE - (start_pa & ~TARGET_PAGE_MASK); + delta &= ~TARGET_PAGE_MASK; + if (delta > size) { + return; + } + start_pa += delta; + size -= delta; + size &= TARGET_PAGE_MASK; + if (!size || start_pa & ~TARGET_PAGE_MASK) { + return; + } + + host_ptr = memory_region_get_ram_ptr(mr) + section->offset_within_region + + delta; + flags = memory_region_is_rom(mr) ? 1 : 0; + hax_slot_register(start_pa, size, (uintptr_t) host_ptr, flags); +} + +static void hax_region_add(MemoryListener * listener, + MemoryRegionSection * section) +{ + hax_set_phys_mem(section); +} + +static void hax_region_del(MemoryListener * listener, + MemoryRegionSection * section) +{ + // Memory mappings will be removed at VM close. +} + +/* currently we fake the dirty bitmap sync, always dirty */ +/* avoid implicit declaration warning on Windows */ +int ffsl(long value); +static void hax_log_sync(MemoryListener * listener, + MemoryRegionSection * section) +{ + MemoryRegion *mr = section->mr; + + if (!memory_region_is_ram(mr)) { + /* Skip MMIO regions */ + return; + } + + unsigned long c; + unsigned int len = + ((int128_get64(section->size) / TARGET_PAGE_SIZE) + HOST_LONG_BITS - + 1) / HOST_LONG_BITS; + unsigned long bitmap[len]; + unsigned int i, j; + + for (i = 0; i < len; i++) { + bitmap[i] = 1; + c = leul_to_cpu(bitmap[i]); + do { + j = ffsl(c) - 1; + c &= ~(1ul << j); + + memory_region_set_dirty(mr, ((uint64_t)i * HOST_LONG_BITS + j) * + TARGET_PAGE_SIZE, TARGET_PAGE_SIZE); + } + while (c != 0); + } +} + +static void hax_log_global_start(struct MemoryListener *listener) +{ +} + +static void hax_log_global_stop(struct MemoryListener *listener) +{ +} + +static void hax_log_start(MemoryListener * listener, + MemoryRegionSection * section, + int old, int new) +{ +} + +static void hax_log_stop(MemoryListener * listener, + MemoryRegionSection * section, + int old, int new) +{ +} + +static void hax_begin(MemoryListener * listener) +{ +} + +static void hax_commit(MemoryListener * listener) +{ +} + +static void hax_region_nop(MemoryListener * listener, + MemoryRegionSection * section) +{ +} + +static MemoryListener hax_memory_listener = { + .begin = hax_begin, + .commit = hax_commit, + .region_add = hax_region_add, + .region_del = hax_region_del, + .region_nop = hax_region_nop, + .log_start = hax_log_start, + .log_stop = hax_log_stop, + .log_sync = hax_log_sync, + .log_global_start = hax_log_global_start, + .log_global_stop = hax_log_global_stop, +}; + +static void hax_handle_interrupt(CPUState * cpu, int mask) +{ + cpu->interrupt_request |= mask; + + if (!qemu_cpu_is_self(cpu)) { + qemu_cpu_kick(cpu); + } +} + +int hax_pre_init(uint64_t ram_size) +{ + struct hax_state *hax = NULL; + + fprintf(stdout, "Hax is %s\n", hax_disabled ? "disabled" : "enabled"); + if (hax_disabled) + return 0; + hax = &hax_global; + memset(hax, 0, sizeof(struct hax_state)); + hax->mem_quota = ram_size; + fprintf(stdout, "Hax ram_size 0x%llx\n", ram_size); + + return 0; +} + +static int hax_init(void) +{ + struct hax_state *hax = NULL; + struct hax_qemu_version qversion; + int ret; + + hax_support = 0; + + hax = &hax_global; + + + hax->fd = hax_mod_open(); + if (hax_invalid_fd(hax->fd)) { + hax->fd = 0; + ret = -ENODEV; + goto error; + } + + ret = hax_get_capability(hax); + + if (ret) { + if (ret != -ENOSPC) + ret = -EINVAL; + goto error; + } + + if (!hax_version_support(hax)) { + fprintf(stderr, "Incompat Hax version. Qemu current version %x ", + hax_cur_version); + fprintf(stderr, "requires minimum HAX version %x\n", hax_min_version); + ret = -EINVAL; + goto error; + } + + hax->vm = hax_vm_create(hax); + if (!hax->vm) { + fprintf(stderr, "Failed to create HAX VM\n"); + ret = -EINVAL; + goto error; + } + + memory_listener_register(&hax_memory_listener, &address_space_memory); + + qversion.cur_version = hax_cur_version; + qversion.min_version = hax_min_version; + hax_notify_qemu_version(hax->vm->fd, &qversion); + cpu_interrupt_handler = hax_handle_interrupt; + hax_support = 1; + + return ret; + error: + if (hax->vm) + hax_vm_destroy(hax->vm); + if (hax->fd) + hax_mod_close(hax); + + return ret; +} + +static int hax_accel_init(MachineState *ms) +{ + ret_hax_init = hax_init(); + + if (ret_hax_init && (ret_hax_init != -ENOSPC)) { + fprintf(stderr, "No accelerator found.\n"); + return ret_hax_init; + } else { + /* need tcg for non-UG platform in real mode */ + if (!hax_ug_platform()) + tcg_exec_init(tcg_tb_size * 1024 * 1024); + + fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n", + !ret_hax_init ? "working" : "not working", + !ret_hax_init ? "fast virt" : "emulation"); + return 0; + } +} + +static int hax_handle_fastmmio(CPUArchState * env, struct hax_fastmmio *hft) +{ + uint64_t buf = 0; + /* + * With fast MMIO, QEMU need not sync vCPU state with HAXM + * driver because it will only invoke MMIO handler + * However, some MMIO operations utilize virtual address like qemu_pipe + * Thus we need to sync the CR0, CR3 and CR4 so that QEMU + * can translate the guest virtual address to guest physical + * address + */ + env->cr[0] = hft->_cr0; + env->cr[2] = hft->_cr2; + env->cr[3] = hft->_cr3; + env->cr[4] = hft->_cr4; + + buf = hft->value; + + cpu_physical_memory_rw(hft->gpa, (uint8_t *) & buf, hft->size, hft->direction); + if (hft->direction == 0) + hft->value = buf; + + return 0; +} + +static int hax_handle_io(CPUArchState * env, uint32_t df, uint16_t port, + int direction, int size, int count, void *buffer) +{ + uint8_t *ptr; + int i; + + if (!df) + ptr = (uint8_t *) buffer; + else + ptr = buffer + size * count - size; + for (i = 0; i < count; i++) { + if (direction == HAX_EXIT_IO_IN) { + switch (size) { + case 1: + stb_p(ptr, cpu_inb(port)); + break; + case 2: + stw_p(ptr, cpu_inw(port)); + break; + case 4: + stl_p(ptr, cpu_inl(port)); + break; + } + } else { + switch (size) { + case 1: + cpu_outb(port, ldub_p(ptr)); + break; + case 2: + cpu_outw(port, lduw_p(ptr)); + break; + case 4: + cpu_outl(port, ldl_p(ptr)); + break; + } + } + if (!df) + ptr += size; + else + ptr -= size; + } + + return 0; +} + +static int hax_vcpu_interrupt(CPUArchState * env) +{ + CPUState *cpu = ENV_GET_CPU(env); + struct hax_vcpu_state *vcpu = cpu->hax_vcpu; + struct hax_tunnel *ht = vcpu->tunnel; + + /* + * Try to inject an interrupt if the guest can accept it + * Unlike KVM, HAX kernel check for the eflags, instead of qemu + */ + if (ht->ready_for_interrupt_injection && + (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { + int irq; + + irq = cpu_get_pic_interrupt(env); + if (irq >= 0) { + hax_inject_interrupt(env, irq); + cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + } + } + + /* If we have an interrupt but the guest is not ready to receive an + * interrupt, request an interrupt window exit. This will + * cause a return to userspace as soon as the guest is ready to + * receive interrupts. */ + if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) + ht->request_interrupt_window = 1; + else + ht->request_interrupt_window = 0; + return 0; +} + +void hax_raise_event(CPUState * cpu) +{ + struct hax_vcpu_state *vcpu = cpu->hax_vcpu; + + if (!vcpu) + return; + vcpu->tunnel->user_event_pending = 1; +} + +/* + * Ask hax kernel module to run the CPU for us till: + * 1. Guest crash or shutdown + * 2. Need QEMU's emulation like guest execute MMIO instruction or guest + * enter emulation mode (non-PG mode) + * 3. Guest execute HLT + * 4. Qemu have Signal/event pending + * 5. An unknown VMX exit happens + */ +extern void qemu_system_reset_request(void); +static int hax_vcpu_hax_exec(CPUArchState * env, int ug_platform) +{ + int ret = 0; + CPUState *cpu = ENV_GET_CPU(env); + X86CPU *x86_cpu = X86_CPU(cpu); + struct hax_vcpu_state *vcpu = cpu->hax_vcpu; + struct hax_tunnel *ht = vcpu->tunnel; + + if (!ug_platform) { + if (hax_vcpu_emulation_mode(cpu)) { + DPRINTF("Trying to execute vcpu at eip:%lx\n", env->eip); + return HAX_EMUL_EXITLOOP; + } + + cpu->halted = 0; + + if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { + cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; + apic_poll_irq(x86_cpu->apic_state); + } + } else { /* UG platform */ + if (!hax_enabled()) { + DPRINTF("Trying to vcpu execute at eip:%lx\n", env->eip); + return HAX_EMUL_EXITLOOP; + } + + cpu->halted = 0; + + if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { + cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; + apic_poll_irq(x86_cpu->apic_state); + } + + if (cpu->interrupt_request & CPU_INTERRUPT_INIT) { + DPRINTF("\nUG hax_vcpu_hax_exec: handling INIT for %d \n", + cpu->cpu_index); + do_cpu_init(x86_cpu); + hax_vcpu_sync_state(env, 1); + } + + if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { + DPRINTF("UG hax_vcpu_hax_exec: handling SIPI for %d \n", + cpu->cpu_index); + hax_vcpu_sync_state(env, 0); + do_cpu_sipi(x86_cpu); + hax_vcpu_sync_state(env, 1); + } + } + + do { + int hax_ret; + + if (cpu->exit_request) { + ret = HAX_EMUL_EXITLOOP; + break; + } + + hax_vcpu_interrupt(env); + if (!ug_platform) { + hax_ret = hax_vcpu_run(vcpu); + } else { /* UG platform */ + + qemu_mutex_unlock_iothread(); + hax_ret = hax_vcpu_run(vcpu); + qemu_mutex_lock_iothread(); + current_cpu = cpu; + } + + /* Simply continue the vcpu_run if system call interrupted */ + if (hax_ret == -EINTR || hax_ret == -EAGAIN) { + DPRINTF("io window interrupted\n"); + continue; + } + + if (hax_ret < 0) { + fprintf(stderr, "vcpu run failed for vcpu %x\n", vcpu->vcpu_id); + abort(); + } + switch (ht->_exit_status) { + case HAX_EXIT_IO: + ret = hax_handle_io(env, ht->pio._df, ht->pio._port, + ht->pio._direction, + ht->pio._size, ht->pio._count, vcpu->iobuf); + break; + case HAX_EXIT_MMIO: + ret = HAX_EMUL_ONE; + break; + case HAX_EXIT_FAST_MMIO: + ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf); + break; + case HAX_EXIT_REAL: + ret = HAX_EMUL_REAL; + break; + /* Guest state changed, currently only for shutdown */ + case HAX_EXIT_STATECHANGE: + fprintf(stdout, "VCPU shutdown request\n"); + qemu_system_reset_request(); + hax_prepare_emulation(env); + cpu_dump_state(cpu, stderr, fprintf, 0); + ret = HAX_EMUL_EXITLOOP; + break; + case HAX_EXIT_UNKNOWN_VMEXIT: + fprintf(stderr, "Unknown VMX exit %x from guest\n", + ht->_exit_reason); + qemu_system_reset_request(); + hax_prepare_emulation(env); + cpu_dump_state(cpu, stderr, fprintf, 0); + ret = HAX_EMUL_EXITLOOP; + break; + case HAX_EXIT_HLT: + if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) && + !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + /* hlt instruction with interrupt disabled is shutdown */ + env->eflags |= IF_MASK; + cpu->halted = 1; + cpu->exception_index = EXCP_HLT; + ret = HAX_EMUL_HLT; + } + break; + /* these situation will continue to hax module */ + case HAX_EXIT_INTERRUPT: + case HAX_EXIT_PAUSED: + break; + default: + fprintf(stderr, "Unknow exit %x from hax\n", ht->_exit_status); + qemu_system_reset_request(); + hax_prepare_emulation(env); + cpu_dump_state(cpu, stderr, fprintf, 0); + ret = HAX_EMUL_EXITLOOP; + break; + } + } while (!ret); + + if (cpu->exit_request) { + cpu->exit_request = 0; + cpu->exception_index = EXCP_INTERRUPT; + } + return ret; +} + +static void do_hax_cpu_synchronize_state(void *arg) +{ + CPUState *cpu = arg; + CPUArchState *env = cpu->env_ptr; + + hax_arch_get_registers(env); + cpu->hax_vcpu_dirty = true; +} + +void hax_cpu_synchronize_state(CPUState *cpu) +{ + /* TODO: Do not sync if cpu->hax_vcpu_dirty is true. (Cf + * kvm_cpu_synchronize_state() in kvm-all.c) + * This would require that this flag be updated properly and consistently + * wherever a vCPU state sync between QEMU and HAX takes place. For now, + * just perform the sync regardless of hax_vcpu_dirty. + */ + run_on_cpu(cpu, do_hax_cpu_synchronize_state, cpu); +} + +static void do_hax_cpu_synchronize_post_reset(void *arg) +{ + CPUState *cpu = arg; + CPUArchState *env = cpu->env_ptr; + + hax_vcpu_sync_state(env, 1); + cpu->hax_vcpu_dirty = false; +} + +void hax_cpu_synchronize_post_reset(CPUState * cpu) +{ + run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, cpu); +} + +static void do_hax_cpu_synchronize_post_init(void *arg) +{ + CPUState *cpu = arg; + CPUArchState *env = cpu->env_ptr; + + hax_vcpu_sync_state(env, 1); + cpu->hax_vcpu_dirty = false; +} + +void hax_cpu_synchronize_post_init(CPUState * cpu) +{ + run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, cpu); +} + +/* + * return 1 when need emulate, 0 when need exit loop + */ +int hax_vcpu_exec(CPUState * cpu) +{ + int next = 0, ret = 0; + struct hax_vcpu_state *vcpu; + CPUArchState *env = (CPUArchState *) (cpu->env_ptr); + + if (cpu->hax_vcpu->emulation_state != HAX_EMULATE_STATE_NONE) + return 1; + + vcpu = cpu->hax_vcpu; + next = hax_vcpu_hax_exec(env, HAX_NON_UG_PLATFORM); + switch (next) { + case HAX_EMUL_ONE: + ret = 1; + vcpu->emulation_state = HAX_EMULATE_STATE_MMIO; + hax_prepare_emulation(env); + break; + case HAX_EMUL_REAL: + ret = 1; + vcpu->emulation_state = HAX_EMULATE_STATE_REAL; + hax_prepare_emulation(env); + break; + case HAX_EMUL_HLT: + case HAX_EMUL_EXITLOOP: + break; + default: + fprintf(stderr, "Unknown hax vcpu exec return %x\n", next); + abort(); + } + + return ret; +} + +int hax_smp_cpu_exec(CPUState * cpu) +{ + CPUArchState *env = (CPUArchState *) (cpu->env_ptr); + int why; + int ret; + + while (1) { + if (cpu->exception_index >= EXCP_INTERRUPT) { + ret = cpu->exception_index; + cpu->exception_index = -1; + break; + } + + why = hax_vcpu_hax_exec(env, HAX_UG_PLATFORM); + + if ((why != HAX_EMUL_HLT) && (why != HAX_EMUL_EXITLOOP)) { + fprintf(stderr, "Unknown hax vcpu return %x\n", why); + abort(); + } + } + + return ret; +} + +#define HAX_RAM_INFO_ROM 0x1 + +static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache * rhs) +{ + memset(lhs, 0, sizeof(struct segment_desc_t)); + lhs->selector = rhs->selector; + lhs->base = rhs->base; + lhs->limit = rhs->limit; + lhs->type = 3; + lhs->present = 1; + lhs->dpl = 3; + lhs->operand_size = 0; + lhs->desc = 1; + lhs->long_mode = 0; + lhs->granularity = 0; + lhs->available = 0; +} + +static void get_seg(SegmentCache * lhs, const struct segment_desc_t *rhs) +{ + lhs->selector = rhs->selector; + lhs->base = rhs->base; + lhs->limit = rhs->limit; + lhs->flags = (rhs->type << DESC_TYPE_SHIFT) + | (rhs->present * DESC_P_MASK) + | (rhs->dpl << DESC_DPL_SHIFT) + | (rhs->operand_size << DESC_B_SHIFT) + | (rhs->desc * DESC_S_MASK) + | (rhs->long_mode << DESC_L_SHIFT) + | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK); +} + +static void set_seg(struct segment_desc_t *lhs, const SegmentCache * rhs) +{ + unsigned flags = rhs->flags; + + memset(lhs, 0, sizeof(struct segment_desc_t)); + lhs->selector = rhs->selector; + lhs->base = rhs->base; + lhs->limit = rhs->limit; + lhs->type = (flags >> DESC_TYPE_SHIFT) & 15; + lhs->present = (flags & DESC_P_MASK) != 0; + lhs->dpl = rhs->selector & 3; + lhs->operand_size = (flags >> DESC_B_SHIFT) & 1; + lhs->desc = (flags & DESC_S_MASK) != 0; + lhs->long_mode = (flags >> DESC_L_SHIFT) & 1; + lhs->granularity = (flags & DESC_G_MASK) != 0; + lhs->available = (flags & DESC_AVL_MASK) != 0; +} + +static void hax_getput_reg(uint64_t * hax_reg, target_ulong * qemu_reg, int set) +{ + target_ulong reg = *hax_reg; + + if (set) + *hax_reg = *qemu_reg; + else + *qemu_reg = reg; +} + +/* The sregs has been synced with HAX kernel already before this call */ +static int hax_get_segments(CPUArchState * env, struct vcpu_state_t *sregs) +{ + get_seg(&env->segs[R_CS], &sregs->_cs); + get_seg(&env->segs[R_DS], &sregs->_ds); + get_seg(&env->segs[R_ES], &sregs->_es); + get_seg(&env->segs[R_FS], &sregs->_fs); + get_seg(&env->segs[R_GS], &sregs->_gs); + get_seg(&env->segs[R_SS], &sregs->_ss); + + get_seg(&env->tr, &sregs->_tr); + get_seg(&env->ldt, &sregs->_ldt); + env->idt.limit = sregs->_idt.limit; + env->idt.base = sregs->_idt.base; + env->gdt.limit = sregs->_gdt.limit; + env->gdt.base = sregs->_gdt.base; + return 0; +} + +static int hax_set_segments(CPUArchState * env, struct vcpu_state_t *sregs) +{ + if ((env->eflags & VM_MASK)) { + set_v8086_seg(&sregs->_cs, &env->segs[R_CS]); + set_v8086_seg(&sregs->_ds, &env->segs[R_DS]); + set_v8086_seg(&sregs->_es, &env->segs[R_ES]); + set_v8086_seg(&sregs->_fs, &env->segs[R_FS]); + set_v8086_seg(&sregs->_gs, &env->segs[R_GS]); + set_v8086_seg(&sregs->_ss, &env->segs[R_SS]); + } else { + set_seg(&sregs->_cs, &env->segs[R_CS]); + set_seg(&sregs->_ds, &env->segs[R_DS]); + set_seg(&sregs->_es, &env->segs[R_ES]); + set_seg(&sregs->_fs, &env->segs[R_FS]); + set_seg(&sregs->_gs, &env->segs[R_GS]); + set_seg(&sregs->_ss, &env->segs[R_SS]); + + if (env->cr[0] & CR0_PE_MASK) { + /* force ss cpl to cs cpl */ + sregs->_ss.selector = (sregs->_ss.selector & ~3) | (sregs->_cs.selector & 3); + sregs->_ss.dpl = sregs->_ss.selector & 3; + } + } + + set_seg(&sregs->_tr, &env->tr); + set_seg(&sregs->_ldt, &env->ldt); + sregs->_idt.limit = env->idt.limit; + sregs->_idt.base = env->idt.base; + sregs->_gdt.limit = env->gdt.limit; + sregs->_gdt.base = env->gdt.base; + return 0; +} + +/* + * After get the state from the kernel module, some + * qemu emulator state need be updated also + */ +static int hax_setup_qemu_emulator(CPUArchState * env) +{ + +#define HFLAG_COPY_MASK ~( \ + HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \ + HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \ + HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ + HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) + + uint32_t hflags; + + hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; + hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT); + hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & + (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); + hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK)); + hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); + + if (env->efer & MSR_EFER_LMA) { + hflags |= HF_LMA_MASK; + } + + if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) { + hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK; + } else { + hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >> + (DESC_B_SHIFT - HF_CS32_SHIFT); + hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >> + (DESC_B_SHIFT - HF_SS32_SHIFT); + if (!(env->cr[0] & CR0_PE_MASK) || + (env->eflags & VM_MASK) || !(hflags & HF_CS32_MASK)) { + hflags |= HF_ADDSEG_MASK; + } else { + hflags |= ((env->segs[R_DS].base | + env->segs[R_ES].base | + env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT; + } + } + env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags; + return 0; +} + +static int hax_sync_vcpu_register(CPUArchState * env, int set) +{ + struct vcpu_state_t regs; + int ret; + memset(®s, 0, sizeof(struct vcpu_state_t)); + + if (!set) { + ret = hax_sync_vcpu_state(env, ®s, 0); + if (ret < 0) + return -1; + } + + /* generic register */ + hax_getput_reg(®s._rax, &env->regs[R_EAX], set); + hax_getput_reg(®s._rbx, &env->regs[R_EBX], set); + hax_getput_reg(®s._rcx, &env->regs[R_ECX], set); + hax_getput_reg(®s._rdx, &env->regs[R_EDX], set); + hax_getput_reg(®s._rsi, &env->regs[R_ESI], set); + hax_getput_reg(®s._rdi, &env->regs[R_EDI], set); + hax_getput_reg(®s._rsp, &env->regs[R_ESP], set); + hax_getput_reg(®s._rbp, &env->regs[R_EBP], set); +#ifdef TARGET_X86_64 + hax_getput_reg(®s._r8, &env->regs[8], set); + hax_getput_reg(®s._r9, &env->regs[9], set); + hax_getput_reg(®s._r10, &env->regs[10], set); + hax_getput_reg(®s._r11, &env->regs[11], set); + hax_getput_reg(®s._r12, &env->regs[12], set); + hax_getput_reg(®s._r13, &env->regs[13], set); + hax_getput_reg(®s._r14, &env->regs[14], set); + hax_getput_reg(®s._r15, &env->regs[15], set); +#endif + hax_getput_reg(®s._rflags, &env->eflags, set); + hax_getput_reg(®s._rip, &env->eip, set); + + if (set) { + regs._cr0 = env->cr[0]; + regs._cr2 = env->cr[2]; + regs._cr3 = env->cr[3]; + regs._cr4 = env->cr[4]; + hax_set_segments(env, ®s); + } else { + env->cr[0] = regs._cr0; + env->cr[2] = regs._cr2; + env->cr[3] = regs._cr3; + env->cr[4] = regs._cr4; + hax_get_segments(env, ®s); + } + + if (set) { + ret = hax_sync_vcpu_state(env, ®s, 1); + if (ret < 0) + return -1; + } + if (!set) + hax_setup_qemu_emulator(env); + return 0; +} + +static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index, + uint64_t value) +{ + item->entry = index; + item->value = value; +} + +static int hax_get_msrs(CPUArchState * env) +{ + struct hax_msr_data md; + struct vmx_msr *msrs = md.entries; + int ret, i, n; + + n = 0; + msrs[n++].entry = MSR_IA32_SYSENTER_CS; + msrs[n++].entry = MSR_IA32_SYSENTER_ESP; + msrs[n++].entry = MSR_IA32_SYSENTER_EIP; + msrs[n++].entry = MSR_IA32_TSC; +#ifdef TARGET_X86_64 + msrs[n++].entry = MSR_EFER; + msrs[n++].entry = MSR_STAR; + msrs[n++].entry = MSR_LSTAR; + msrs[n++].entry = MSR_CSTAR; + msrs[n++].entry = MSR_FMASK; + msrs[n++].entry = MSR_KERNELGSBASE; +#endif + md.nr_msr = n; + ret = hax_sync_msr(env, &md, 0); + if (ret < 0) + return ret; + + for (i = 0; i < md.done; i++) { + switch (msrs[i].entry) { + case MSR_IA32_SYSENTER_CS: + env->sysenter_cs = msrs[i].value; + break; + case MSR_IA32_SYSENTER_ESP: + env->sysenter_esp = msrs[i].value; + break; + case MSR_IA32_SYSENTER_EIP: + env->sysenter_eip = msrs[i].value; + break; + case MSR_IA32_TSC: + env->tsc = msrs[i].value; + break; +#ifdef TARGET_X86_64 + case MSR_EFER: + env->efer = msrs[i].value; + break; + case MSR_STAR: + env->star = msrs[i].value; + break; + case MSR_LSTAR: + env->lstar = msrs[i].value; + break; + case MSR_CSTAR: + env->cstar = msrs[i].value; + break; + case MSR_FMASK: + env->fmask = msrs[i].value; + break; + case MSR_KERNELGSBASE: + env->kernelgsbase = msrs[i].value; + break; +#endif + } + } + + return 0; +} + +static int hax_set_msrs(CPUArchState * env) +{ + struct hax_msr_data md; + struct vmx_msr *msrs; + msrs = md.entries; + int n = 0; + + memset(&md, 0, sizeof(struct hax_msr_data)); + hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); + hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); + hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); + hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); +#ifdef TARGET_X86_64 + hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer); + hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star); + hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); + hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); + hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask); + hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase); +#endif + md.nr_msr = n; + md.done = 0; + + return hax_sync_msr(env, &md, 1); +} + +static int hax_get_fpu(CPUArchState * env) +{ + struct fx_layout fpu; + int i, ret; + + ret = hax_sync_fpu(env, &fpu, 0); + if (ret < 0) + return ret; + + env->fpstt = (fpu.fsw >> 11) & 7; + env->fpus = fpu.fsw; + env->fpuc = fpu.fcw; + for (i = 0; i < 8; ++i) + env->fptags[i] = !((fpu.ftw >> i) & 1); + memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs)); + + for (i = 0; i < 8; ++i) { + memcpy(&env->xmm_regs[i], fpu.mmx_1[i], sizeof(fpu.mmx_1[i])); + } + for (i = 0; i < 8; ++i) { + memcpy(&env->xmm_regs[8 + i], fpu.mmx_2[i], sizeof(fpu.mmx_2[i])); + } + env->mxcsr = fpu.mxcsr; + + return 0; +} + +static int hax_set_fpu(CPUArchState * env) +{ + struct fx_layout fpu; + int i; + + memset(&fpu, 0, sizeof(fpu)); + fpu.fsw = env->fpus & ~(7 << 11); + fpu.fsw |= (env->fpstt & 7) << 11; + fpu.fcw = env->fpuc; + + for (i = 0; i < 8; ++i) + fpu.ftw |= (!env->fptags[i]) << i; + + memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs)); + + for (i = 0; i < 8; i++) { + memcpy(fpu.mmx_1[i], &env->xmm_regs[i], sizeof(fpu.mmx_1[i])); + } + for (i = 0; i < 8; i++) { + memcpy(fpu.mmx_2[i], &env->xmm_regs[i + 8], sizeof(fpu.mmx_2[i])); + } + + fpu.mxcsr = env->mxcsr; + + return hax_sync_fpu(env, &fpu, 1); +} + +static int hax_arch_get_registers(CPUArchState * env) +{ + int ret; + + ret = hax_sync_vcpu_register(env, 0); + if (ret < 0) + return ret; + + ret = hax_get_fpu(env); + if (ret < 0) + return ret; + + ret = hax_get_msrs(env); + if (ret < 0) + return ret; + + return 0; +} + +static int hax_arch_set_registers(CPUArchState * env) +{ + int ret; + ret = hax_sync_vcpu_register(env, 1); + + if (ret < 0) { + fprintf(stderr, "Failed to sync vcpu reg\n"); + return ret; + } + ret = hax_set_fpu(env); + if (ret < 0) { + fprintf(stderr, "FPU failed\n"); + return ret; + } + ret = hax_set_msrs(env); + if (ret < 0) { + fprintf(stderr, "MSR failed\n"); + return ret; + } + + return 0; +} + +static void hax_vcpu_sync_state(CPUArchState * env, int modified) +{ + if (hax_enabled()) { + if (modified) + hax_arch_set_registers(env); + else + hax_arch_get_registers(env); + } +} + +/* + * much simpler than kvm, at least in first stage because: + * We don't need consider the device pass-through, we don't need + * consider the framebuffer, and we may even remove the bios at all + */ +int hax_sync_vcpus(void) +{ + if (hax_enabled()) { + CPUState *cpu; + + cpu = first_cpu; + if (!cpu) + return 0; + + for (; cpu != NULL; cpu = CPU_NEXT(cpu)) { + int ret; + + ret = hax_arch_set_registers(cpu->env_ptr); + if (ret < 0) { + derror(kHaxVcpuSyncFailed); + return ret; + } + } + } + + return 0; +} + +void hax_reset_vcpu_state(void *opaque) +{ + CPUState *cpu; + for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) { + DPRINTF("*********ReSet hax_vcpu->emulation_state \n"); + cpu->hax_vcpu->emulation_state = HAX_EMULATE_STATE_INITIAL; + cpu->hax_vcpu->tunnel->user_event_pending = 0; + cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0; + } +} + +static void hax_accel_class_init(ObjectClass *oc, void *data) +{ + AccelClass *ac = ACCEL_CLASS(oc); + ac->name = "HAX"; + ac->init_machine = hax_accel_init; + ac->allowed = &hax_allowed; +} + +static const TypeInfo hax_accel_type = { + .name = TYPE_HAX_ACCEL, + .parent = TYPE_ACCEL, + .class_init = hax_accel_class_init, +}; + +static void hax_type_init(void) +{ + type_register_static(&hax_accel_type); +} + +type_init(hax_type_init); +
diff --git a/target-i386/hax-darwin.c b/target-i386/hax-darwin.c new file mode 100644 index 0000000..06c7a3d --- /dev/null +++ b/target-i386/hax-darwin.c
@@ -0,0 +1,315 @@ +/* + * QEMU HAXM support + * + * Copyright (c) 2011 Intel Corporation + * Written by: + * Jiang Yunhong<yunhong.jiang@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* HAX module interface - darwin version */ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/ioctl.h> + +#include "target-i386/hax-i386.h" + +hax_fd hax_mod_open(void) +{ + int fd = open("/dev/HAX", O_RDWR); + if (fd == -1) { + fprintf(stderr, "Failed to open the hax module\n"); + } + + fcntl(fd, F_SETFD, FD_CLOEXEC); + + return fd; +} + +int hax_populate_ram(uint64_t va, uint32_t size) +{ + int ret; + struct hax_alloc_ram_info info; + + if (!hax_global.vm || !hax_global.vm->fd) { + fprintf(stderr, "Allocate memory before vm create?\n"); + return -EINVAL; + } + + info.size = size; + info.va = va; + ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_ALLOC_RAM, &info); + if (ret < 0) { + fprintf(stderr, "Failed to allocate %x memory\n", size); + return ret; + } + return 0; +} + +int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags) +{ + struct hax_set_ram_info info; + int ret; + + info.pa_start = start_pa; + info.size = size; + info.va = host_va; + info.flags = (uint8_t) flags; + + ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_SET_RAM, &info); + if (ret < 0) { + return -errno; + } + return 0; +} + +int hax_get_max_ram(uint64_t *max_ram) { + int fd = hax_mod_open(); + if (fd < 0) { + return -1; + } + struct hax_capabilityinfo cap; + int result = ioctl(fd, HAX_IOCTL_CAPABILITY, &cap); + close(fd); + if (result == -1) { + return -1; + } + *max_ram = cap.mem_quota; + return 0; +} + +int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap) +{ + int ret; + + ret = ioctl(hax->fd, HAX_IOCTL_CAPABILITY, cap); + if (ret == -1) { + fprintf(stderr, "Failed to get HAX capability\n"); + return -errno; + } + + return 0; +} + +int hax_mod_version(struct hax_state *hax, struct hax_module_version *version) +{ + int ret; + + ret = ioctl(hax->fd, HAX_IOCTL_VERSION, version); + if (ret == -1) { + fprintf(stderr, "Failed to get HAX version\n"); + return -errno; + } + + return 0; +} + +static char *hax_vm_devfs_string(int vm_id) +{ + char *name; + + if (vm_id > MAX_VM_ID) { + fprintf(stderr, "Too big VM id\n"); + return NULL; + } + +#define HAX_VM_DEVFS "/dev/hax_vm/vmxx" + name = g_strdup(HAX_VM_DEVFS); + if (!name) + return NULL; + + snprintf(name, sizeof HAX_VM_DEVFS, "/dev/hax_vm/vm%02d", vm_id); + return name; +} + +static char *hax_vcpu_devfs_string(int vm_id, int vcpu_id) +{ + char *name; + + if (vm_id > MAX_VM_ID || vcpu_id > MAX_VCPU_ID) { + fprintf(stderr, "Too big vm id %x or vcpu id %x\n", vm_id, vcpu_id); + return NULL; + } + +#define HAX_VCPU_DEVFS "/dev/hax_vmxx/vcpuxx" + name = g_strdup(HAX_VCPU_DEVFS); + if (!name) + return NULL; + + snprintf(name, sizeof HAX_VCPU_DEVFS, "/dev/hax_vm%02d/vcpu%02d", + vm_id, vcpu_id); + return name; +} + +int hax_host_create_vm(struct hax_state *hax, int *vmid) +{ + int ret; + int vm_id = 0; + + if (hax_invalid_fd(hax->fd)) + return -EINVAL; + + if (hax->vm) + return 0; + + ret = ioctl(hax->fd, HAX_IOCTL_CREATE_VM, &vm_id); + *vmid = vm_id; + return ret; +} + +hax_fd hax_host_open_vm(struct hax_state * hax, int vm_id) +{ + hax_fd fd; + char *vm_name = NULL; + + vm_name = hax_vm_devfs_string(vm_id); + if (!vm_name) + return -1; + + fd = open(vm_name, O_RDWR); + qemu_vfree(vm_name); + + fcntl(fd, F_SETFD, FD_CLOEXEC); + + return fd; +} + +int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion) +{ + int ret; + + if (hax_invalid_fd(vm_fd)) + return -EINVAL; + + ret = ioctl(vm_fd, HAX_VM_IOCTL_NOTIFY_QEMU_VERSION, qversion); + + if (ret < 0) { + fprintf(stderr, "Failed to notify qemu API version\n"); + return ret; + } + return 0; +} + +/* Simply assume the size should be bigger than the hax_tunnel, + * since the hax_tunnel can be extended later with compatibility considered + */ +int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid) +{ + int ret; + + ret = ioctl(vm_fd, HAX_VM_IOCTL_VCPU_CREATE, &vcpuid); + if (ret < 0) + fprintf(stderr, "Failed to create vcpu %x\n", vcpuid); + + return ret; +} + +hax_fd hax_host_open_vcpu(int vmid, int vcpuid) +{ + char *devfs_path = NULL; + hax_fd fd; + + devfs_path = hax_vcpu_devfs_string(vmid, vcpuid); + if (!devfs_path) { + fprintf(stderr, "Failed to get the devfs\n"); + return -EINVAL; + } + + fd = open(devfs_path, O_RDWR); + qemu_vfree(devfs_path); + if (fd < 0) + fprintf(stderr, "Failed to open the vcpu devfs\n"); + fcntl(fd, F_SETFD, FD_CLOEXEC); + return fd; +} + +int hax_host_setup_vcpu_channel(struct hax_vcpu_state *vcpu) +{ + int ret; + struct hax_tunnel_info info; + + ret = ioctl(vcpu->fd, HAX_VCPU_IOCTL_SETUP_TUNNEL, &info); + if (ret) { + fprintf(stderr, "Failed to setup the hax tunnel\n"); + return ret; + } + + if (!valid_hax_tunnel_size(info.size)) { + fprintf(stderr, "Invalid hax tunnel size %x\n", info.size); + ret = -EINVAL; + return ret; + } + + vcpu->tunnel = (struct hax_tunnel *) (intptr_t) (info.va); + vcpu->iobuf = (unsigned char *) (intptr_t) (info.io_va); + return 0; +} + +int hax_vcpu_run(struct hax_vcpu_state *vcpu) +{ + int ret; + + ret = ioctl(vcpu->fd, HAX_VCPU_IOCTL_RUN, NULL); + return ret; +} + +int hax_sync_fpu(CPUArchState * env, struct fx_layout *fl, int set) +{ + int ret, fd; + + fd = hax_vcpu_get_fd(env); + if (fd <= 0) + return -1; + + if (set) + ret = ioctl(fd, HAX_VCPU_IOCTL_SET_FPU, fl); + else + ret = ioctl(fd, HAX_VCPU_IOCTL_GET_FPU, fl); + return ret; +} + +int hax_sync_msr(CPUArchState * env, struct hax_msr_data *msrs, int set) +{ + int ret, fd; + + fd = hax_vcpu_get_fd(env); + if (fd <= 0) + return -1; + if (set) + ret = ioctl(fd, HAX_VCPU_IOCTL_SET_MSRS, msrs); + else + ret = ioctl(fd, HAX_VCPU_IOCTL_GET_MSRS, msrs); + return ret; +} + +int hax_sync_vcpu_state(CPUArchState * env, struct vcpu_state_t *state, int set) +{ + int ret, fd; + + fd = hax_vcpu_get_fd(env); + if (fd <= 0) + return -1; + + if (set) + ret = ioctl(fd, HAX_VCPU_SET_REGS, state); + else + ret = ioctl(fd, HAX_VCPU_GET_REGS, state); + return ret; +} + +int hax_inject_interrupt(CPUArchState * env, int vector) +{ + int ret, fd; + + fd = hax_vcpu_get_fd(env); + if (fd <= 0) + return -1; + + ret = ioctl(fd, HAX_VCPU_IOCTL_INTERRUPT, &vector); + return ret; +}
diff --git a/target-i386/hax-darwin.h b/target-i386/hax-darwin.h new file mode 100644 index 0000000..38361fc --- /dev/null +++ b/target-i386/hax-darwin.h
@@ -0,0 +1,63 @@ +/* + * QEMU HAXM support + * + * Copyright (c) 2011 Intel Corporation + * Written by: + * Jiang Yunhong<yunhong.jiang@intel.com> + * Xin Xiaohui<xiaohui.xin@intel.com> + * Zhang Xiantao<xiantao.zhang@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef __HAX_UNIX_H +#define __HAX_UNIX_H + +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <stdarg.h> + +#define HAX_INVALID_FD (-1) +static inline int hax_invalid_fd(hax_fd fd) +{ + return fd <= 0; +} + +static inline void hax_mod_close(struct hax_state *hax) +{ + close(hax->fd); +} + +static inline void hax_close_fd(hax_fd fd) +{ + close(fd); +} + +/* HAX model level ioctl */ +#define HAX_IOCTL_VERSION _IOWR(0, 0x20, struct hax_module_version) +#define HAX_IOCTL_CREATE_VM _IOWR(0, 0x21, uint32_t) +#define HAX_IOCTL_DESTROY_VM _IOW(0, 0x22, uint32_t) +#define HAX_IOCTL_CAPABILITY _IOR(0, 0x23, struct hax_capabilityinfo) + +#define HAX_VM_IOCTL_VCPU_CREATE _IOWR(0, 0x80, uint32_t) +#define HAX_VM_IOCTL_ALLOC_RAM _IOWR(0, 0x81, struct hax_alloc_ram_info) +#define HAX_VM_IOCTL_SET_RAM _IOWR(0, 0x82, struct hax_set_ram_info) +#define HAX_VM_IOCTL_VCPU_DESTROY _IOW(0, 0x83, uint32_t) +#define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION _IOW(0, 0x84, struct hax_qemu_version) + +#define HAX_VCPU_IOCTL_RUN _IO(0, 0xc0) +#define HAX_VCPU_IOCTL_SET_MSRS _IOWR(0, 0xc1, struct hax_msr_data) +#define HAX_VCPU_IOCTL_GET_MSRS _IOWR(0, 0xc2, struct hax_msr_data) + +#define HAX_VCPU_IOCTL_SET_FPU _IOW(0, 0xc3, struct fx_layout) +#define HAX_VCPU_IOCTL_GET_FPU _IOR(0, 0xc4, struct fx_layout) + +#define HAX_VCPU_IOCTL_SETUP_TUNNEL _IOWR(0, 0xc5, struct hax_tunnel_info) +#define HAX_VCPU_IOCTL_INTERRUPT _IOWR(0, 0xc6, uint32_t) +#define HAX_VCPU_SET_REGS _IOWR(0, 0xc7, struct vcpu_state_t) +#define HAX_VCPU_GET_REGS _IOWR(0, 0xc8, struct vcpu_state_t) + +#endif /* __HAX_UNIX_H */
diff --git a/target-i386/hax-i386.h b/target-i386/hax-i386.h new file mode 100644 index 0000000..3dc1981 --- /dev/null +++ b/target-i386/hax-i386.h
@@ -0,0 +1,93 @@ +/* + * QEMU HAXM support + * + * Copyright (c) 2011 Intel Corporation + * Written by: + * Jiang Yunhong<yunhong.jiang@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef _HAX_I386_H +#define _HAX_I386_H + +#include "sysemu/hax.h" +#include "target-i386/hax-interface.h" +#include "cpu.h" + +#ifdef CONFIG_DARWIN +typedef int hax_fd; +#endif + +#ifdef CONFIG_WIN32 +typedef HANDLE hax_fd; +#endif + +extern struct hax_state hax_global; +struct hax_vcpu_state { + hax_fd fd; + int vcpu_id; + int resync; + int emulation_state; + struct hax_tunnel *tunnel; + unsigned char *iobuf; +}; + +struct hax_state { + hax_fd fd; /* the global hax device interface */ + uint32_t version; + struct hax_vm *vm; + uint64_t mem_quota; +}; + +#define HAX_MAX_VCPU 0x10 +#define MAX_VM_ID 0x40 +#define MAX_VCPU_ID 0x40 + +struct hax_vm { + hax_fd fd; + int id; + struct hax_vcpu_state *vcpus[HAX_MAX_VCPU]; +}; + +#ifdef NEED_CPU_H +/* Functions exported to host specific mode */ +hax_fd hax_vcpu_get_fd(CPUArchState * env); +int valid_hax_tunnel_size(uint16_t size); + +/* Host specific functions */ +int hax_mod_version(struct hax_state *hax, struct hax_module_version *version); +int hax_inject_interrupt(CPUArchState * env, int vector); +struct hax_vm *hax_vm_create(struct hax_state *hax); +int hax_vcpu_run(struct hax_vcpu_state *vcpu); +int hax_vcpu_create(int id); +int hax_sync_vcpu_state(CPUArchState * env, struct vcpu_state_t *state, int set); +int hax_sync_msr(CPUArchState * env, struct hax_msr_data *msrs, int set); +int hax_sync_fpu(CPUArchState * env, struct fx_layout *fl, int set); +#endif + +int hax_vm_destroy(struct hax_vm *vm); +int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap); +int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion); +int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags); + +/* Common host function */ +int hax_host_create_vm(struct hax_state *hax, int *vm_id); +hax_fd hax_host_open_vm(struct hax_state *hax, int vm_id); +int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid); +hax_fd hax_host_open_vcpu(int vmid, int vcpuid); +int hax_host_setup_vcpu_channel(struct hax_vcpu_state *vcpu); +hax_fd hax_mod_open(void); + + +#ifdef CONFIG_DARWIN +#include "target-i386/hax-darwin.h" +#endif + +#ifdef CONFIG_WIN32 +#include "target-i386/hax-windows.h" +#endif + +#endif
diff --git a/target-i386/hax-interface.h b/target-i386/hax-interface.h new file mode 100644 index 0000000..2bc7f1a --- /dev/null +++ b/target-i386/hax-interface.h
@@ -0,0 +1,357 @@ +/* + * QEMU HAXM support + * + * Copyright (c) 2011 Intel Corporation + * Written by: + * Jiang Yunhong<yunhong.jiang@intel.com> + * Xin Xiaohui<xiaohui.xin@intel.com> + * Zhang Xiantao<xiantao.zhang@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* Interface with HAX kernel module */ + +#ifndef _HAX_INTERFACE_H +#define _HAX_INTERFACE_H + +/* fx_layout has 3 formats table 3-56, 512bytes */ +struct fx_layout { + uint16_t fcw; + uint16_t fsw; + uint8_t ftw; + uint8_t res1; + uint16_t fop; + union { + struct { + uint32_t fip; + uint16_t fcs; + uint16_t res2; + }; + uint64_t fpu_ip; + }; + union { + struct { + uint32_t fdp; + uint16_t fds; + uint16_t res3; + }; + uint64_t fpu_dp; + }; + uint32_t mxcsr; + uint32_t mxcsr_mask; + uint8_t st_mm[8][16]; + uint8_t mmx_1[8][16]; + uint8_t mmx_2[8][16]; + uint8_t pad[96]; +} __attribute__ ((aligned(8))); + +struct vmx_msr { + uint64_t entry; + uint64_t value; +} __attribute__ ((__packed__)); + +/* + * Fixed array is not good, but it makes Mac support a bit easier by avoiding + * memory map or copyin staff. + */ +#define HAX_MAX_MSR_ARRAY 0x20 +struct hax_msr_data { + uint16_t nr_msr; + uint16_t done; + uint16_t pad[2]; + struct vmx_msr entries[HAX_MAX_MSR_ARRAY]; +} __attribute__ ((__packed__)); + +union interruptibility_state_t { + uint32_t raw; + struct { + uint32_t sti_blocking:1; + uint32_t movss_blocking:1; + uint32_t smi_blocking:1; + uint32_t nmi_blocking:1; + uint32_t reserved:28; + }; + uint64_t pad; +}; + +typedef union interruptibility_state_t interruptibility_state_t; + +/* Segment descriptor */ +struct segment_desc_t { + uint16_t selector; + uint16_t _dummy; + uint32_t limit; + uint64_t base; + union { + struct { + uint32_t type:4; + uint32_t desc:1; + uint32_t dpl:2; + uint32_t present:1; + uint32_t:4; + uint32_t available:1; + uint32_t long_mode:1; + uint32_t operand_size:1; + uint32_t granularity:1; + uint32_t null:1; + uint32_t:15; + }; + uint32_t ar; + }; + uint32_t ipad; +}; + +typedef struct segment_desc_t segment_desc_t; + +struct vcpu_state_t { + union { + uint64_t _regs[16]; + struct { + union { + struct { + uint8_t _al, _ah; + }; + uint16_t _ax; + uint32_t _eax; + uint64_t _rax; + }; + union { + struct { + uint8_t _cl, _ch; + }; + uint16_t _cx; + uint32_t _ecx; + uint64_t _rcx; + }; + union { + struct { + uint8_t _dl, _dh; + }; + uint16_t _dx; + uint32_t _edx; + uint64_t _rdx; + }; + union { + struct { + uint8_t _bl, _bh; + }; + uint16_t _bx; + uint32_t _ebx; + uint64_t _rbx; + }; + union { + uint16_t _sp; + uint32_t _esp; + uint64_t _rsp; + }; + union { + uint16_t _bp; + uint32_t _ebp; + uint64_t _rbp; + }; + union { + uint16_t _si; + uint32_t _esi; + uint64_t _rsi; + }; + union { + uint16_t _di; + uint32_t _edi; + uint64_t _rdi; + }; + + uint64_t _r8; + uint64_t _r9; + uint64_t _r10; + uint64_t _r11; + uint64_t _r12; + uint64_t _r13; + uint64_t _r14; + uint64_t _r15; + }; + }; + + union { + uint32_t _eip; + uint64_t _rip; + }; + + union { + uint32_t _eflags; + uint64_t _rflags; + }; + + segment_desc_t _cs; + segment_desc_t _ss; + segment_desc_t _ds; + segment_desc_t _es; + segment_desc_t _fs; + segment_desc_t _gs; + segment_desc_t _ldt; + segment_desc_t _tr; + + segment_desc_t _gdt; + segment_desc_t _idt; + + uint64_t _cr0; + uint64_t _cr2; + uint64_t _cr3; + uint64_t _cr4; + + uint64_t _dr0; + uint64_t _dr1; + uint64_t _dr2; + uint64_t _dr3; + uint64_t _dr6; + uint64_t _dr7; + uint64_t _pde; + + uint32_t _efer; + + uint32_t _sysenter_cs; + uint64_t _sysenter_eip; + uint64_t _sysenter_esp; + + uint32_t _activity_state; + uint32_t pad; + interruptibility_state_t _interruptibility_state; +}; + +/* HAX exit status */ +enum exit_status { + /* IO port request */ + HAX_EXIT_IO = 1, + /* MMIO instruction emulation */ + HAX_EXIT_MMIO, + /* QEMU emulation mode request, currently means guest enter non-PG mode */ + HAX_EXIT_REAL, + /* + * Interrupt window open, qemu can inject interrupt now + * Also used when signal pending since at that time qemu usually need + * check interrupt + */ + HAX_EXIT_INTERRUPT, + /* Unknown vmexit, mostly trigger reboot */ + HAX_EXIT_UNKNOWN_VMEXIT, + /* HALT from guest */ + HAX_EXIT_HLT, + /* Reboot request, like because of tripple fault in guest */ + HAX_EXIT_STATECHANGE, + /* the vcpu is now only paused when destroy, so simply return to hax */ + HAX_EXIT_PAUSED, + HAX_EXIT_FAST_MMIO, +}; + +/* + * The interface definition: + * 1. vcpu_run execute will return 0 on success, otherwise mean failed + * 2. exit_status return the exit reason, as stated in enum exit_status + * 3. exit_reason is the vmx exit reason + */ +struct hax_tunnel { + uint32_t _exit_reason; + uint32_t _exit_flag; + uint32_t _exit_status; + uint32_t user_event_pending; + int ready_for_interrupt_injection; + int request_interrupt_window; + union { + struct { + /* 0: read, 1: write */ +#define HAX_EXIT_IO_IN 1 +#define HAX_EXIT_IO_OUT 0 + uint8_t _direction; + uint8_t _df; + uint16_t _size; + uint16_t _port; + uint16_t _count; + uint8_t _flags; + uint8_t _pad0; + uint16_t _pad1; + uint32_t _pad2; + uint64_t _vaddr; + } pio; + struct { + uint64_t gla; + } mmio; + struct { + } state; + }; +} __attribute__ ((__packed__)); + +struct hax_module_version { + uint32_t compat_version; + uint32_t cur_version; +} __attribute__ ((__packed__)); + +/* This interface is support only after API version 2 */ +struct hax_qemu_version { + /* Current API version in QEMU */ + uint32_t cur_version; + /* The minimum API version supported by QEMU */ + uint32_t min_version; +} __attribute__ ((__packed__)); + +/* The mac specfic interface to qemu, mostly is ioctl related */ +struct hax_tunnel_info { + uint64_t va; + uint64_t io_va; + uint16_t size; + uint16_t pad[3]; +} __attribute__ ((__packed__)); + +struct hax_alloc_ram_info { + uint32_t size; + uint32_t pad; + uint64_t va; +} __attribute__ ((__packed__)); +#define HAX_RAM_INFO_ROM 0x1 +struct hax_set_ram_info { + uint64_t pa_start; + uint32_t size; + uint8_t flags; + uint8_t pad[3]; + uint64_t va; +} __attribute__ ((__packed__)); + +#define HAX_CAP_STATUS_WORKING 0x1 +#define HAX_CAP_STATUS_NOTWORKING 0x0 +#define HAX_CAP_WORKSTATUS_MASK 0x1 + +#define HAX_CAP_FAILREASON_VT 0x1 +#define HAX_CAP_FAILREASON_NX 0x2 + +#define HAX_CAP_MEMQUOTA 0x2 +#define HAX_CAP_UG 0x4 + +struct hax_capabilityinfo { + /* bit 0: 1 - working + * 0 - not working, possibly because NT/NX disabled + * bit 1: 1 - memory limitation working + * 0 - no memory limitation + */ + uint16_t wstatus; + /* valid when not working + * bit 0: VT not enabeld + * bit 1: NX not enabled*/ + uint16_t winfo; + uint32_t pad; + uint64_t mem_quota; +} __attribute__ ((__packed__)); + +struct hax_fastmmio { + uint64_t gpa; + uint64_t value; + uint8_t size; + uint8_t direction; + uint16_t reg_index; + uint32_t pad0; + uint64_t _cr0; + uint64_t _cr2; + uint64_t _cr3; + uint64_t _cr4; +} __attribute__ ((__packed__)); +#endif
diff --git a/target-i386/hax-slot.c b/target-i386/hax-slot.c new file mode 100644 index 0000000..b0b3ed9 --- /dev/null +++ b/target-i386/hax-slot.c
@@ -0,0 +1,328 @@ +/* +** HAX memory slot operations +** +** Copyright (c) 2015-16 Intel Corporation +** +** This software is licensed under the terms of the GNU General Public +** License version 2, as published by the Free Software Foundation, and +** may be copied, distributed, and modified under those terms. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +*/ + +#include "target-i386/hax-slot.h" +#include "target-i386/hax-i386.h" +#include "qemu/queue.h" + +//#define DEBUG_HAX_SLOT + +#ifdef DEBUG_HAX_SLOT +#define DPRINTF(fmt, ...) \ + do { fprintf(stdout, fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +/** + * HAXSlot: describes a guest physical memory region and its mapping + * + * @start_pa: a guest physical address marking the start of the region; must be + * page-aligned + * @end_pa: a guest physical address marking the end of the region; must be + * page-aligned + * @hva_pa_delta: the host virtual address to which guest physical address 0 is + * mapped; in other words, for any guest physical address within + * the region (start_pa <= pa < end_pa), the corresponding host + * virtual address is calculated by host_va = pa + hva_pa_delta + * @flags: parameters for the mapping; must be non-negative + * @entry: additional fields for linking #HAXSlot instances together + */ +typedef struct HAXSlot { + uint64_t start_pa; + uint64_t end_pa; + uint64_t hva_pa_delta; + int flags; + QTAILQ_ENTRY(HAXSlot) entry; +} HAXSlot; + +/* A doubly-linked list (actually a tail queue) of all registered slots */ +static QTAILQ_HEAD(HAXSlotListHead, HAXSlot) slot_list = + QTAILQ_HEAD_INITIALIZER(slot_list); + +void hax_slot_init_registry(void) +{ + HAXSlot *initial_slot; + + g_assert(QTAILQ_EMPTY(&slot_list)); + + initial_slot = (HAXSlot *) g_malloc0(sizeof(*initial_slot)); + /* Implied: initial_slot->start_pa = 0; */ + /* Ideally we want to set end_pa to 2^64, but that is too large for + * uint64_t. We don't need to support such a large guest physical address + * space anyway; (2^64 - TARGET_PAGE_SIZE) should be (more than) enough. + */ + initial_slot->end_pa = TARGET_PAGE_MASK; + /* hva_pa_delta and flags are initialized with invalid values */ + initial_slot->hva_pa_delta = ~TARGET_PAGE_MASK; + initial_slot->flags = -1; + QTAILQ_INSERT_TAIL(&slot_list, initial_slot, entry); +} + +void hax_slot_free_registry(void) +{ + DPRINTF("%s: Deleting all registered slots\n", __func__); + while (!QTAILQ_EMPTY(&slot_list)) { + HAXSlot *slot = QTAILQ_FIRST(&slot_list); + QTAILQ_REMOVE(&slot_list, slot, entry); + g_free(slot); + } +} + +/** + * hax_slot_dump: dumps a slot to stdout (for debugging) + * + * @slot: the slot to dump + */ +static void hax_slot_dump(HAXSlot *slot) +{ + DPRINTF("[ start_pa=0x%016" PRIx64 ", end_pa=0x%016" PRIx64 + ", hva_pa_delta=0x%016" PRIx64 ", flags=%d ]\n", slot->start_pa, + slot->end_pa, slot->hva_pa_delta, slot->flags); +} + +/** + * hax_slot_dump_list: dumps @slot_list to stdout (for debugging) + */ +static void hax_slot_dump_list(void) +{ +#ifdef DEBUG_HAX_SLOT + HAXSlot *slot; + int i = 0; + + DPRINTF("**** BEGIN HAX SLOT LIST DUMP ****\n"); + QTAILQ_FOREACH(slot, &slot_list, entry) { + DPRINTF("Slot %d:\n\t", i++); + hax_slot_dump(slot); + } + DPRINTF("**** END HAX SLOT LIST DUMP ****\n"); +#endif +} + +/** + * hax_slot_find: locates the slot containing a guest physical address + * + * Traverses @slot_list, starting from @start_slot, and returns the slot which + * contains @pa. There should be one and only one such slot, because: + * + * 1) @slot_list is initialized with a slot which covers all valid @pa values. + * This coverage stays unchanged as new slots are inserted into @slot_list. + * 2) @slot_list does not contain overlapping slots. + * + * @start_slot: the first slot from which @slot_list is traversed and searched; + * must not be %NULL + * @pa: the guest physical address to locate; must not be less than the lower + * bound of @start_slot + */ +static HAXSlot * hax_slot_find(HAXSlot *start_slot, uint64_t pa) +{ + HAXSlot *slot; + + g_assert(start_slot); + g_assert(start_slot->start_pa <= pa); + + slot = start_slot; + do { + if (slot->end_pa > pa) { + return slot; + } + slot = QTAILQ_NEXT(slot, entry); + } while (slot); + + /* Should never reach here */ + g_assert_not_reached(); + return NULL; +} + +/** + * hax_slot_split: splits a slot into two + * + * Shrinks @slot and creates a new slot from the vacated region. Returns the + * new slot. + * + * @slot: the slot to be split/shrinked + * @pa: the splitting point; must be page-aligned and within @slot + */ +static HAXSlot * hax_slot_split(HAXSlot *slot, uint64_t pa) +{ + HAXSlot *new_slot; + + g_assert(slot); + g_assert(pa > slot->start_pa && pa < slot->end_pa); + g_assert(!(pa & ~TARGET_PAGE_MASK)); + + new_slot = (HAXSlot *) g_malloc0(sizeof(*new_slot)); + new_slot->start_pa = pa; + new_slot->end_pa = slot->end_pa; + new_slot->hva_pa_delta = slot->hva_pa_delta; + new_slot->flags = slot->flags; + + slot->end_pa = pa; + QTAILQ_INSERT_AFTER(&slot_list, slot, new_slot, entry); + return new_slot; +} + +/** + * hax_slot_can_merge: tests if two slots are compatible + * + * Two slots are considered compatible if they share the same memory mapping + * attributes. Compatible slots can be merged if they overlap or are adjacent. + * + * Returns %true if @slot1 and @slot2 are compatible. + * + * @slot1: one of the slots to be tested; must not be %NULL + * @slot2: the other slot to be tested; must not be %NULL + */ +static bool hax_slot_can_merge(HAXSlot *slot1, HAXSlot *slot2) +{ + g_assert(slot1 && slot2); + + return slot1->hva_pa_delta == slot2->hva_pa_delta + && slot1->flags == slot2->flags; +} + +/** + * hax_slot_insert: inserts a slot into @slot_list, with the potential side + * effect of creating/updating memory mappings + * + * Causes memory mapping attributes of @slot to override those of overlapping + * slots (including partial slots) in @slot_list. For any slot whose mapping + * attributes have changed, performs an ioctl to enforce the new mapping. + * + * Aborts QEMU on error. + * + * @slot: the slot to be inserted + */ +static void hax_slot_insert(HAXSlot *slot) +{ + HAXSlot *low_slot, *high_slot; + HAXSlot *low_slot_prev, *high_slot_next; + HAXSlot *old_slot, *old_slot_next; + + g_assert(!QTAILQ_EMPTY(&slot_list)); + + low_slot = hax_slot_find(QTAILQ_FIRST(&slot_list), slot->start_pa); + g_assert(low_slot); + low_slot_prev = QTAILQ_PREV(low_slot, HAXSlotListHead, entry); + + /* Adjust slot and/or low_slot such that their lower bounds (start_pa) + * align. + */ + if (hax_slot_can_merge(low_slot, slot)) { + slot->start_pa = low_slot->start_pa; + } else if (slot->start_pa == low_slot->start_pa && low_slot_prev + && hax_slot_can_merge(low_slot_prev, slot)) { + low_slot = low_slot_prev; + slot->start_pa = low_slot->start_pa; + } else if (slot->start_pa != low_slot->start_pa) { + /* low_slot->start_pa < slot->start_pa < low_slot->end_pa */ + low_slot = hax_slot_split(low_slot, slot->start_pa); + g_assert(low_slot); + } + /* Now we have slot->start_pa == low_slot->start_pa */ + + high_slot = hax_slot_find(low_slot, slot->end_pa - 1); + g_assert(high_slot); + high_slot_next = QTAILQ_NEXT(high_slot, entry); + + /* Adjust slot and/or high_slot such that their upper bounds (end_pa) + * align. + */ + if (hax_slot_can_merge(slot, high_slot)) { + slot->end_pa = high_slot->end_pa; + } else if (slot->end_pa == high_slot->end_pa && high_slot_next + && hax_slot_can_merge(slot, high_slot_next)) { + high_slot = high_slot_next; + slot->end_pa = high_slot->end_pa; + } else if (slot->end_pa != high_slot->end_pa) { + /* high_slot->start_pa < slot->end_pa < high_slot->end_pa */ + high_slot_next = hax_slot_split(high_slot, slot->end_pa); + g_assert(high_slot_next); + } + /* Now we have slot->end_pa == high_slot->end_pa */ + + /* We are ready for substitution: replace all slots between low_slot and + * high_slot (inclusive) with slot. */ + + /* Step 1: insert slot into the list, before low_slot */ + QTAILQ_INSERT_BEFORE(low_slot, slot, entry); + + /* Step 2: remove low_slot..high_slot, one by one */ + for (old_slot = low_slot; + /* This condition always evaluates to 1. See: + * https://en.wikipedia.org/wiki/Comma_operator + */ + old_slot_next = QTAILQ_NEXT(old_slot, entry), 1; + old_slot = old_slot_next) { + g_assert(old_slot); + + QTAILQ_REMOVE(&slot_list, old_slot, entry); + if (!hax_slot_can_merge(slot, old_slot)) { + /* Mapping for guest memory region [old_slot->start_pa, + * old_slot->end_pa) has changed - must do ioctl. */ + /* TODO: Further reduce the number of ioctl calls by preprocessing + * the low_slot..high_slot sublist and combining any two adjacent + * slots that are both incompatible with slot. + */ + uint32_t size = old_slot->end_pa - old_slot->start_pa; + uint64_t host_va = old_slot->start_pa + slot->hva_pa_delta; + int err; + + DPRINTF("%s: Doing ioctl (size=0x%08" PRIx32 ")\n", __func__, size); + /* Use the new host_va and flags */ + err = hax_set_ram(old_slot->start_pa, size, host_va, slot->flags); + if (err) { + fprintf(stderr, "%s: Failed to set memory mapping (err=%d)\n", + __func__, err); + abort(); + } + } + g_free(old_slot); + + /* Exit the infinite loop following the removal of high_slot */ + if (old_slot == high_slot) { + break; + } + } +} + +void hax_slot_register(uint64_t start_pa, uint32_t size, uint64_t host_va, + int flags) +{ + uint64_t end_pa = start_pa + size; + HAXSlot *slot; + + g_assert(!(start_pa & ~TARGET_PAGE_MASK)); + g_assert(!(end_pa & ~TARGET_PAGE_MASK)); + g_assert(start_pa < end_pa); + g_assert(host_va); + g_assert(flags >= 0); + + slot = g_malloc0(sizeof(*slot)); + slot->start_pa = start_pa; + slot->end_pa = end_pa; + slot->hva_pa_delta = host_va - start_pa; + slot->flags = flags; + + DPRINTF("%s: Inserting slot:\n\t", __func__); + hax_slot_dump(slot); + hax_slot_dump_list(); + + hax_slot_insert(slot); + + DPRINTF("%s: Done\n", __func__); + hax_slot_dump_list(); +}
diff --git a/target-i386/hax-slot.h b/target-i386/hax-slot.h new file mode 100644 index 0000000..d991c53 --- /dev/null +++ b/target-i386/hax-slot.h
@@ -0,0 +1,58 @@ +/* +** HAX memory slot operations +** +** Copyright (c) 2015-16 Intel Corporation +** +** This software is licensed under the terms of the GNU General Public +** License version 2, as published by the Free Software Foundation, and +** may be copied, distributed, and modified under those terms. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +*/ + +#ifndef _HAX_SLOT_H +#define _HAX_SLOT_H + +#include <inttypes.h> + +/** + * hax_slot_init_registry: initializes the registry of memory slots. + * + * Should be called during HAX initialization, before any call to + * hax_slot_register(). + */ +void hax_slot_init_registry(void); + +/** + * hax_slot_free_registry: destroys the registry of memory slots. + * + * Should be called during HAX cleanup to free up resources used by the + * registry of memory slots. + */ +void hax_slot_free_registry(void); + +/** + * hax_slot_register: registers a memory slot, updating HAX memory mappings if + * necessary. + * + * Must be called after hax_slot_init_registry(). Can be called multiple times + * to create new memory mappings or update existing ones. This function is smart + * enough to avoid asking the HAXM driver to do the same mapping twice for any + * guest physical page. + * + * Aborts QEMU on error. + * + * @start_pa: a guest physical address marking the start of the slot to + * register; must be page-aligned + * @size: size of the slot to register; must be page-aligned and positive + * @host_va: a host virtual address to which @start_pa should be mapped + * @flags: parameters for the mapping, passed verbatim to the HAXM driver if + * necessary; must be non-negative + */ +void hax_slot_register(uint64_t start_pa, uint32_t size, uint64_t host_va, + int flags); + +#endif
diff --git a/target-i386/hax-windows.c b/target-i386/hax-windows.c new file mode 100644 index 0000000..f2d72a3 --- /dev/null +++ b/target-i386/hax-windows.c
@@ -0,0 +1,475 @@ +/* + * QEMU HAXM support + * + * Copyright (c) 2011 Intel Corporation + * Written by: + * Jiang Yunhong<yunhong.jiang@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "hax-i386.h" + +/* #define DEBUG_HAX */ + +#ifdef DEBUG_HAX +#define DPRINTF(fmt, ...) \ + do { fprintf(stdout, fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +/* + * return 0 when success, -1 when driver not loaded, + * other negative value for other failure + */ +static int hax_open_device(hax_fd * fd) +{ + uint32_t errNum = 0; + HANDLE hDevice; + + if (!fd) + return -2; + + hDevice = CreateFile("\\\\.\\HAX", + GENERIC_READ | GENERIC_WRITE, + 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); + + if (hDevice == INVALID_HANDLE_VALUE) { + fprintf(stderr, "Failed to open the HAX device!\n"); + errNum = GetLastError(); + if (errNum == ERROR_FILE_NOT_FOUND) + return -1; + return -2; + } + *fd = hDevice; + DPRINTF("HAX device fd:%d\n", *fd); + return 0; +} + +int hax_get_max_ram(uint64_t *max_ram) { + DWORD dSize = 0; + struct hax_capabilityinfo cap; + hax_fd fd = hax_mod_open(); + if (fd == NULL) { + return -1; + } + int result = DeviceIoControl(fd, HAX_IOCTL_CAPABILITY, NULL, 0, &cap, + sizeof(cap), &dSize, (LPOVERLAPPED) NULL); + CloseHandle(fd); + + if (!result) { + return -2; + } + *max_ram = cap.mem_quota; + + return 0; +} + +/* hax_fd hax_mod_open */ + hax_fd hax_mod_open(void) +{ + int ret; + hax_fd fd = NULL; + + ret = hax_open_device(&fd); + if (ret != 0) + fprintf(stderr, "Open HAX device failed\n"); + + return fd; +} + +int hax_populate_ram(uint64_t va, uint32_t size) +{ + int ret; + struct hax_alloc_ram_info info; + HANDLE hDeviceVM; + DWORD dSize = 0; + + if (!hax_global.vm || !hax_global.vm->fd) { + fprintf(stderr, "Allocate memory before vm create?\n"); + return -EINVAL; + } + + info.size = size; + info.va = va; + + hDeviceVM = hax_global.vm->fd; + + ret = DeviceIoControl(hDeviceVM, + HAX_VM_IOCTL_ALLOC_RAM, + &info, sizeof(info), NULL, 0, &dSize, + (LPOVERLAPPED) NULL); + + if (!ret) { + fprintf(stderr, "Failed to allocate %x memory\n", size); + return ret; + } + + return 0; +} + +int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags) +{ + struct hax_set_ram_info info; + HANDLE hDeviceVM = hax_global.vm->fd; + DWORD dSize = 0; + int ret; + + info.pa_start = start_pa; + info.size = size; + info.va = host_va; + info.flags = (uint8_t) flags; + + ret = DeviceIoControl(hDeviceVM, HAX_VM_IOCTL_SET_RAM, + &info, sizeof(info), NULL, 0, &dSize, + (LPOVERLAPPED) NULL); + + if (!ret) + return -EFAULT; + else + return 0; +} + +int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap) +{ + int ret; + HANDLE hDevice = hax->fd; /* handle to hax module */ + DWORD dSize = 0; + DWORD err = 0; + + if (hax_invalid_fd(hDevice)) { + fprintf(stderr, "Invalid fd for hax device!\n"); + return -ENODEV; + } + + ret = DeviceIoControl(hDevice, HAX_IOCTL_CAPABILITY, NULL, 0, cap, + sizeof(*cap), &dSize, (LPOVERLAPPED) NULL); + + if (!ret) { + err = GetLastError(); + if (err == ERROR_INSUFFICIENT_BUFFER || err == ERROR_MORE_DATA) + fprintf(stderr, "hax capability is too long to hold.\n"); + fprintf(stderr, "Failed to get Hax capability:%lu\n", err); + return -EFAULT; + } else + return 0; + +} + +int hax_mod_version(struct hax_state *hax, struct hax_module_version *version) +{ + int ret; + HANDLE hDevice = hax->fd; /* handle to hax module */ + DWORD dSize = 0; + DWORD err = 0; + + if (hax_invalid_fd(hDevice)) { + fprintf(stderr, "Invalid fd for hax device!\n"); + return -ENODEV; + } + + ret = DeviceIoControl(hDevice, + HAX_IOCTL_VERSION, + NULL, 0, + version, sizeof(*version), &dSize, + (LPOVERLAPPED) NULL); + + if (!ret) { + err = GetLastError(); + if (err == ERROR_INSUFFICIENT_BUFFER || err == ERROR_MORE_DATA) + fprintf(stderr, "hax module verion is too long to hold.\n"); + fprintf(stderr, "Failed to get Hax module version:%lu\n", err); + return -EFAULT; + } else + return 0; +} + +static char *hax_vm_devfs_string(int vm_id) +{ + char *name; + + if (vm_id > MAX_VM_ID) { + fprintf(stderr, "Too big VM id\n"); + return NULL; + } + +#define HAX_VM_DEVFS "\\\\.\\hax_vmxx" + name = g_strdup(HAX_VM_DEVFS); + if (!name) + return NULL; + + snprintf(name, sizeof HAX_VM_DEVFS, "\\\\.\\hax_vm%02d", vm_id); + return name; +} + +static char *hax_vcpu_devfs_string(int vm_id, int vcpu_id) +{ + char *name; + + if (vm_id > MAX_VM_ID || vcpu_id > MAX_VCPU_ID) { + fprintf(stderr, "Too big vm id %x or vcpu id %x\n", vm_id, vcpu_id); + return NULL; + } + +#define HAX_VCPU_DEVFS "\\\\.\\hax_vmxx_vcpuxx" + name = g_strdup(HAX_VCPU_DEVFS); + if (!name) + return NULL; + + snprintf(name, sizeof HAX_VCPU_DEVFS, "\\\\.\\hax_vm%02d_vcpu%02d", + vm_id, vcpu_id); + return name; +} + +int hax_host_create_vm(struct hax_state *hax, int *vmid) +{ + int ret; + int vm_id = 0; + DWORD dSize = 0; + + if (hax_invalid_fd(hax->fd)) + return -EINVAL; + + if (hax->vm) + return 0; + + ret = DeviceIoControl(hax->fd, + HAX_IOCTL_CREATE_VM, + NULL, 0, &vm_id, sizeof(vm_id), &dSize, + (LPOVERLAPPED) NULL); + if (!ret) { + fprintf(stderr, "Failed to create VM. Error code: %lu\n", + GetLastError()); + return -1; + } + *vmid = vm_id; + return 0; +} + +hax_fd hax_host_open_vm(struct hax_state * hax, int vm_id) +{ + char *vm_name = NULL; + hax_fd hDeviceVM; + + vm_name = hax_vm_devfs_string(vm_id); + if (!vm_name) { + fprintf(stderr, "Failed to open VM. VM name is null\n"); + return INVALID_HANDLE_VALUE; + } + + hDeviceVM = CreateFile(vm_name, + GENERIC_READ | GENERIC_WRITE, + 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); + if (hDeviceVM == INVALID_HANDLE_VALUE) + fprintf(stderr, "Open the vm device error:%s, ec:%lu\n", + vm_name, GetLastError()); + + g_free(vm_name); + return hDeviceVM; +} + +int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion) +{ + int ret; + DWORD dSize = 0; + if (hax_invalid_fd(vm_fd)) + return -EINVAL; + ret = DeviceIoControl(vm_fd, + HAX_VM_IOCTL_NOTIFY_QEMU_VERSION, + qversion, sizeof(struct hax_qemu_version), + NULL, 0, &dSize, (LPOVERLAPPED) NULL); + if (!ret) { + fprintf(stderr, "Failed to notify qemu API version\n"); + return -1; + } + return 0; +} + +int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid) +{ + int ret; + DWORD dSize = 0; + + ret = DeviceIoControl(vm_fd, + HAX_VM_IOCTL_VCPU_CREATE, + &vcpuid, sizeof(vcpuid), NULL, 0, &dSize, (LPOVERLAPPED) NULL); + if (!ret) { + fprintf(stderr, "Failed to create vcpu %x\n", vcpuid); + return -1; + } + + return 0; +} + +hax_fd hax_host_open_vcpu(int vmid, int vcpuid) +{ + char *devfs_path = NULL; + hax_fd hDeviceVCPU; + + devfs_path = hax_vcpu_devfs_string(vmid, vcpuid); + if (!devfs_path) { + fprintf(stderr, "Failed to get the devfs\n"); + return INVALID_HANDLE_VALUE; + } + + hDeviceVCPU = CreateFile(devfs_path, + GENERIC_READ | GENERIC_WRITE, + 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, + NULL); + + if (hDeviceVCPU == INVALID_HANDLE_VALUE) + fprintf(stderr, "Failed to open the vcpu devfs\n"); + g_free(devfs_path); + return hDeviceVCPU; +} + +int hax_host_setup_vcpu_channel(struct hax_vcpu_state *vcpu) +{ + hax_fd hDeviceVCPU = vcpu->fd; + int ret; + struct hax_tunnel_info info; + DWORD dSize = 0; + + ret = DeviceIoControl(hDeviceVCPU, + HAX_VCPU_IOCTL_SETUP_TUNNEL, + NULL, 0, &info, sizeof(info), &dSize, + (LPOVERLAPPED) NULL); + if (!ret) { + fprintf(stderr, "Failed to setup the hax tunnel\n"); + return -1; + } + + if (!valid_hax_tunnel_size(info.size)) { + fprintf(stderr, "Invalid hax tunnel size %x\n", info.size); + ret = -EINVAL; + return ret; + } + vcpu->tunnel = (struct hax_tunnel *) (intptr_t) (info.va); + vcpu->iobuf = (unsigned char *) (intptr_t) (info.io_va); + return 0; +} + +int hax_vcpu_run(struct hax_vcpu_state *vcpu) +{ + int ret; + HANDLE hDeviceVCPU = vcpu->fd; + DWORD dSize = 0; + + ret = DeviceIoControl(hDeviceVCPU, + HAX_VCPU_IOCTL_RUN, + NULL, 0, NULL, 0, &dSize, (LPOVERLAPPED) NULL); + if (!ret) + return -EFAULT; + else + return 0; +} + +int hax_sync_fpu(CPUArchState * env, struct fx_layout *fl, int set) +{ + int ret; + hax_fd fd; + HANDLE hDeviceVCPU; + DWORD dSize = 0; + + fd = hax_vcpu_get_fd(env); + if (hax_invalid_fd(fd)) + return -1; + + hDeviceVCPU = fd; + + if (set) + ret = DeviceIoControl(hDeviceVCPU, + HAX_VCPU_IOCTL_SET_FPU, + fl, sizeof(*fl), NULL, 0, &dSize, (LPOVERLAPPED) NULL); + else + ret = DeviceIoControl(hDeviceVCPU, + HAX_VCPU_IOCTL_GET_FPU, + NULL, 0, fl, sizeof(*fl), &dSize, (LPOVERLAPPED) NULL); + if (!ret) + return -EFAULT; + else + return 0; +} + +int hax_sync_msr(CPUArchState * env, struct hax_msr_data *msrs, int set) +{ + int ret; + hax_fd fd; + HANDLE hDeviceVCPU; + DWORD dSize = 0; + + fd = hax_vcpu_get_fd(env); + if (hax_invalid_fd(fd)) + return -1; + hDeviceVCPU = fd; + + if (set) + ret = DeviceIoControl(hDeviceVCPU, + HAX_VCPU_IOCTL_SET_MSRS, + msrs, sizeof(*msrs), + msrs, sizeof(*msrs), &dSize, (LPOVERLAPPED) NULL); + else + ret = DeviceIoControl(hDeviceVCPU, + HAX_VCPU_IOCTL_GET_MSRS, + msrs, sizeof(*msrs), + msrs, sizeof(*msrs), &dSize, (LPOVERLAPPED) NULL); + if (!ret) + return -EFAULT; + else + return 0; +} + +int hax_sync_vcpu_state(CPUArchState * env, struct vcpu_state_t *state, int set) +{ + int ret; + hax_fd fd; + HANDLE hDeviceVCPU; + DWORD dSize; + + fd = hax_vcpu_get_fd(env); + if (hax_invalid_fd(fd)) + return -1; + + hDeviceVCPU = fd; + + if (set) + ret = DeviceIoControl(hDeviceVCPU, + HAX_VCPU_SET_REGS, + state, sizeof(*state), + NULL, 0, &dSize, (LPOVERLAPPED) NULL); + else + ret = DeviceIoControl(hDeviceVCPU, + HAX_VCPU_GET_REGS, + NULL, 0, + state, sizeof(*state), &dSize, (LPOVERLAPPED) NULL); + if (!ret) + return -EFAULT; + else + return 0; +} + +int hax_inject_interrupt(CPUArchState * env, int vector) +{ + int ret; + hax_fd fd; + HANDLE hDeviceVCPU; + DWORD dSize; + + fd = hax_vcpu_get_fd(env); + if (hax_invalid_fd(fd)) + return -1; + + hDeviceVCPU = fd; + + ret = DeviceIoControl(hDeviceVCPU, + HAX_VCPU_IOCTL_INTERRUPT, + &vector, sizeof(vector), NULL, 0, &dSize, (LPOVERLAPPED) NULL); + if (!ret) + return -EFAULT; + else + return 0; +}
diff --git a/target-i386/hax-windows.h b/target-i386/hax-windows.h new file mode 100644 index 0000000..3c7533f --- /dev/null +++ b/target-i386/hax-windows.h
@@ -0,0 +1,89 @@ +/* + * QEMU HAXM support + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * Copyright (c) 2011 Intel Corporation + * Written by: + * Jiang Yunhong<yunhong.jiang@intel.com> + * Xin Xiaohui<xiaohui.xin@intel.com> + * Zhang Xiantao<xiantao.zhang@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef __HAX_WINDOWS_H +#define __HAX_WINDOWS_H + +#include <windows.h> +#include <memory.h> +#include <malloc.h> +#include <winioctl.h> +#include <string.h> +#include <stdio.h> +#include <windef.h> + +#define HAX_INVALID_FD INVALID_HANDLE_VALUE + +static inline void hax_mod_close(struct hax_state *hax) +{ + CloseHandle(hax->fd); +} + +static inline void hax_close_fd(hax_fd fd) +{ + CloseHandle(fd); +} + +static inline int hax_invalid_fd(hax_fd fd) +{ + return (fd == INVALID_HANDLE_VALUE); +} + +#define HAX_DEVICE_TYPE 0x4000 + +#define HAX_IOCTL_VERSION CTL_CODE(HAX_DEVICE_TYPE, 0x900, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_IOCTL_CREATE_VM CTL_CODE(HAX_DEVICE_TYPE, 0x901, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_IOCTL_CAPABILITY CTL_CODE(HAX_DEVICE_TYPE, 0x910, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) + +#define HAX_VM_IOCTL_VCPU_CREATE CTL_CODE(HAX_DEVICE_TYPE, 0x902, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_VM_IOCTL_ALLOC_RAM CTL_CODE(HAX_DEVICE_TYPE, 0x903, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_VM_IOCTL_SET_RAM CTL_CODE(HAX_DEVICE_TYPE, 0x904, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_VM_IOCTL_VCPU_DESTROY CTL_CODE(HAX_DEVICE_TYPE, 0x905, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) + +#define HAX_VCPU_IOCTL_RUN CTL_CODE(HAX_DEVICE_TYPE, 0x906, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_VCPU_IOCTL_SET_MSRS CTL_CODE(HAX_DEVICE_TYPE, 0x907, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_VCPU_IOCTL_GET_MSRS CTL_CODE(HAX_DEVICE_TYPE, 0x908, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_VCPU_IOCTL_SET_FPU CTL_CODE(HAX_DEVICE_TYPE, 0x909, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_VCPU_IOCTL_GET_FPU CTL_CODE(HAX_DEVICE_TYPE, 0x90a, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) + +#define HAX_VCPU_IOCTL_SETUP_TUNNEL CTL_CODE(HAX_DEVICE_TYPE, 0x90b, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_VCPU_IOCTL_INTERRUPT CTL_CODE(HAX_DEVICE_TYPE, 0x90c, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_VCPU_SET_REGS CTL_CODE(HAX_DEVICE_TYPE, 0x90d, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_VCPU_GET_REGS CTL_CODE(HAX_DEVICE_TYPE, 0x90e, \ + METHOD_BUFFERED, FILE_ANY_ACCESS) + +#define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION CTL_CODE(HAX_DEVICE_TYPE, 0x910, \ + METHOD_BUFFERED, \ + FILE_ANY_ACCESS) +#endif
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c index 6cbdf17..8b08393 100644 --- a/target-i386/seg_helper.c +++ b/target-i386/seg_helper.c
@@ -25,6 +25,10 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/log.h" +#ifdef CONFIG_HAX +#include "target-i386/hax-i386.h" +#include "sysemu/hax.h" +#endif //#define DEBUG_PCALL @@ -1334,6 +1338,10 @@ !(env->hflags & HF_SMM_MASK)) { cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0); cs->interrupt_request &= ~CPU_INTERRUPT_SMI; +#ifdef CONFIG_HAX + if (hax_enabled()) + cs->hax_vcpu->resync = 1; +#endif do_smm_enter(cpu); ret = true; } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
diff --git a/target-i386/translate.c b/target-i386/translate.c index fa2ac48..121527c 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c
@@ -31,6 +31,9 @@ #include "trace-tcg.h" #include "exec/log.h" +#ifdef CONFIG_HAX +#include "sysemu/hax.h" +#endif #define PREFIX_REPZ 0x01 #define PREFIX_REPNZ 0x02 @@ -8303,6 +8306,19 @@ } pc_ptr = disas_insn(env, dc, pc_ptr); + +#ifdef CONFIG_HAX + if (hax_enabled() && hax_stop_translate(cs)) { + /* When the host CPU doesn't support VMX "unrestricted guest" mode, + * TCG is used to execute MMIO instructions. This code path is used + * to limit the translation to a single machine instruction, in order + * to try to return to HAX execution as soon as possible. */ + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } +#endif /* CONFIG_HAX */ + /* stop translation if indicated */ if (dc->is_jmp) break;
diff --git a/vl.c b/vl.c index 5f056a3..eb9370e 100755 --- a/vl.c +++ b/vl.c
@@ -94,6 +94,7 @@ #include "migration/migration.h" #include "sysemu/cpus.h" #include "sysemu/kvm.h" +#include "sysemu/hax.h" #include "qapi/qmp/qjson.h" #include "qemu/option.h" #include "qemu/config-file.h" @@ -2023,8 +2024,20 @@ #ifdef CONFIG_PROFILER int64_t ti; #endif + +#ifdef CONFIG_HAX + if (hax_sync_vcpus() < 0) { + fprintf(stderr, "Internal error: hax sync failed\n"); + return; + } +#endif + do { +#ifdef CONFIG_HAX + nonblocking = !kvm_enabled() && !xen_enabled() && !hax_enabled() && last_io > 0; +#else nonblocking = !kvm_enabled() && !xen_enabled() && last_io > 0; +#endif #ifdef CONFIG_PROFILER ti = profile_getclock(); #endif @@ -2994,7 +3007,9 @@ error_report("ram size too large"); return false; } - +#ifdef CONFIG_HAX + hax_pre_init(ram_size); +#endif /* store value for the future use */ qemu_opt_set_number(opts, "size", ram_size, &error_abort); *maxram_size = ram_size; @@ -3844,6 +3859,13 @@ olist = qemu_find_opts("machine"); qemu_opts_parse_noisily(olist, "accel=kvm", false); break; +#ifdef CONFIG_HAX + case QEMU_OPTION_enable_hax: + olist = qemu_find_opts("machine"); + qemu_opts_parse_noisily(olist, "accel=hax", false); + hax_disable(0); + break; +#endif /* CONFIG_HAX */ case QEMU_OPTION_M: case QEMU_OPTION_machine: olist = qemu_find_opts("machine"); @@ -4692,6 +4714,18 @@ error_report("could not acquire pid file: %s", strerror(errno)); return 1; } +#ifdef CONFIG_HAX + uint64_t hax_max_ram = 0; + if (hax_get_max_ram(&hax_max_ram) == 0 && hax_max_ram > 0) { + if (ram_size > hax_max_ram) { + const int requested_meg = ram_size / (1024 * 1024); + const int actual_meg = hax_max_ram / (1024 * 1024); + fprintf(stderr, "Warning: requested ram_size %dM too big, reduced to %dM\n", + requested_meg, actual_meg); + ram_size = hax_max_ram; + } + } +#endif /* CONFIG_HAX */ if (qemu_opts_foreach(qemu_find_opts("device"), device_help_func, NULL, NULL)) { @@ -4786,10 +4820,17 @@ cpu_ticks_init(); if (icount_opts) { +#ifdef CONFIG_HAX + if (kvm_enabled() || xen_enabled() || hax_enabled()) { + error_report("-icount is not allowed with kvm, hax or xen"); + return 1; + } +#else if (kvm_enabled() || xen_enabled()) { error_report("-icount is not allowed with kvm or xen"); return 1; } +#endif configure_icount(icount_opts, &error_abort); qemu_opts_del(icount_opts); } @@ -5029,6 +5070,15 @@ numa_post_machine_init(); +#ifdef CONFIG_HAX + if (hax_enabled()) { + if (hax_sync_vcpus() < 0) { + fprintf(stderr, "Internal error: Initial hax sync failed\n"); + return 1; + } + } +#endif /* CONFIG_HAX */ + if (qemu_opts_foreach(qemu_find_opts("fw_cfg"), parse_fw_cfg, fw_cfg_find(), NULL) != 0) { return 1;