Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream-smm' into staging

This series implements KVM support for SMM, and lets you enable/disable
it through the "smm" property of x86 machine types.

# gpg: Signature made Mon Jul  6 17:41:05 2015 BST using RSA key ID 78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream-smm:
  pc: add SMM property
  ich9: add smm_enabled field and arguments
  pc_piix: rename kvm_enabled to smm_enabled
  target-i386: register a separate KVM address space including SMRAM regions
  kvm-all: kvm_irqchip_create is not expected to fail
  kvm-all: add support for multiple address spaces
  kvm-all: make KVM's memory listener more generic
  kvm-all: move internal types to kvm_int.h
  kvm-all: remove useless typedef
  kvm-all: put kvm_mem_flags to more work
  target-i386: add support for SMBASE MSR and SMIs
  piix4/ich9: do not raise SMI on ACPI enable/disable commands
  linux-headers: Update to 4.2-rc1

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 8a64ffb..f4dc7a8 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -192,7 +192,7 @@
     acpi_pm_tmr_reset(&pm->acpi_regs);
     acpi_gpe_reset(&pm->acpi_regs);
 
-    if (kvm_enabled()) {
+    if (!pm->smm_enabled) {
         /* Mark SMM as already inited to prevent SMM from running. KVM does not
          * support SMM mode. */
         pm->smi_en |= ICH9_PMIO_SMI_EN_APMC_EN;
@@ -209,7 +209,7 @@
     acpi_pm1_evt_power_down(&pm->acpi_regs);
 }
 
-void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
+void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, bool smm_enabled,
                   qemu_irq sci_irq)
 {
     memory_region_init(&pm->io, OBJECT(lpc_pci), "ich9-pm", ICH9_PMIO_SIZE);
@@ -231,6 +231,7 @@
                           "acpi-smi", 8);
     memory_region_add_subregion(&pm->io, ICH9_PMIO_SMI_EN, &pm->io_smi);
 
+    pm->smm_enabled = smm_enabled;
     pm->irq = sci_irq;
     qemu_register_reset(pm_reset, pm);
     pm->powerdown_notifier.notify = pm_powerdown_req;
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 3bd1d5a..2cd2fee 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -72,7 +72,7 @@
 
     qemu_irq irq;
     qemu_irq smi_irq;
-    int kvm_enabled;
+    int smm_enabled;
     Notifier machine_ready;
     Notifier powerdown_notifier;
 
@@ -112,6 +112,9 @@
 
     /* ACPI specs 3.0, 4.7.2.5 */
     acpi_pm1_cnt_update(&s->ar, val == ACPI_ENABLE, val == ACPI_DISABLE);
+    if (val == ACPI_ENABLE || val == ACPI_DISABLE) {
+        return;
+    }
 
     if (d->config[0x5b] & (1 << 1)) {
         if (s->smi_irq) {
@@ -319,7 +322,7 @@
     pci_conf[0x40] = 0x01; /* PM io base read only bit */
     pci_conf[0x80] = 0;
 
-    if (s->kvm_enabled) {
+    if (!s->smm_enabled) {
         /* Mark SMM as already inited (until KVM supports SMM). */
         pci_conf[0x5B] = 0x02;
     }
@@ -450,7 +453,7 @@
     /* APM */
     apm_init(dev, &s->apm, apm_ctrl_changed, s);
 
-    if (s->kvm_enabled) {
+    if (!s->smm_enabled) {
         /* Mark SMM as already inited to prevent SMM from running.  KVM does not
          * support SMM mode. */
         pci_conf[0x5B] = 0x02;
@@ -501,7 +504,7 @@
 
 I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
                       qemu_irq sci_irq, qemu_irq smi_irq,
-                      int kvm_enabled, DeviceState **piix4_pm)
+                      int smm_enabled, DeviceState **piix4_pm)
 {
     DeviceState *dev;
     PIIX4PMState *s;
@@ -515,7 +518,7 @@
     s = PIIX4_PM(dev);
     s->irq = sci_irq;
     s->smi_irq = smi_irq;
-    s->kvm_enabled = kvm_enabled;
+    s->smm_enabled = smm_enabled;
     if (xen_enabled()) {
         s->use_acpi_pci_hotplug = false;
     }
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index a66416d..7959b44 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1768,6 +1768,48 @@
     visit_type_OnOffAuto(v, &pcms->vmport, name, errp);
 }
 
+bool pc_machine_is_smm_enabled(PCMachineState *pcms)
+{
+    bool smm_available = false;
+
+    if (pcms->smm == ON_OFF_AUTO_OFF) {
+        return false;
+    }
+
+    if (tcg_enabled() || qtest_enabled()) {
+        smm_available = true;
+    } else if (kvm_enabled()) {
+        smm_available = kvm_has_smm();
+    }
+
+    if (smm_available) {
+        return true;
+    }
+
+    if (pcms->smm == ON_OFF_AUTO_ON) {
+        error_report("System Management Mode not supported by this hypervisor.");
+        exit(1);
+    }
+    return false;
+}
+
+static void pc_machine_get_smm(Object *obj, Visitor *v, void *opaque,
+                              const char *name, Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+    OnOffAuto smm = pcms->smm;
+
+    visit_type_OnOffAuto(v, &smm, name, errp);
+}
+
+static void pc_machine_set_smm(Object *obj, Visitor *v, void *opaque,
+                                  const char *name, Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+
+    visit_type_OnOffAuto(v, &pcms->smm, name, errp);
+}
+
 static bool pc_machine_get_aligned_dimm(Object *obj, Error **errp)
 {
     PCMachineState *pcms = PC_MACHINE(obj);
@@ -1792,6 +1834,15 @@
                                     "Maximum ram below the 4G boundary (32bit boundary)",
                                     NULL);
 
+    pcms->smm = ON_OFF_AUTO_AUTO;
+    object_property_add(obj, PC_MACHINE_SMM, "OnOffAuto",
+                        pc_machine_get_smm,
+                        pc_machine_set_smm,
+                        NULL, NULL, NULL);
+    object_property_set_description(obj, PC_MACHINE_SMM,
+                                    "Enable SMM (pc & q35)",
+                                    NULL);
+
     pcms->vmport = ON_OFF_AUTO_AUTO;
     object_property_add(obj, PC_MACHINE_VMPORT, "OnOffAuto",
                         pc_machine_get_vmport,
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index e142f75..56cdcb9 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -287,7 +287,8 @@
         /* TODO: Populate SPD eeprom data.  */
         smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100,
                               gsi[9], smi_irq,
-                              kvm_enabled(), &piix4_pm);
+                              pc_machine_is_smm_enabled(pc_machine),
+                              &piix4_pm);
         smbus_eeprom_init(smbus, 8, NULL, 0);
 
         object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP,
@@ -306,7 +307,11 @@
 
 static void pc_compat_2_3(MachineState *machine)
 {
+    PCMachineState *pcms = PC_MACHINE(machine);
     savevm_skip_section_footers();
+    if (kvm_enabled()) {
+        pcms->smm = ON_OFF_AUTO_OFF;
+    }
 }
 
 static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 082cd93..8aa3a67 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -253,7 +253,7 @@
                          (pc_machine->vmport != ON_OFF_AUTO_ON), 0xff0104);
 
     /* connect pm stuff to lpc */
-    ich9_lpc_pm_init(lpc);
+    ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pc_machine));
 
     /* ahci and SATA device, for q35 1 ahci controller is built-in */
     ahci = pci_create_simple_multifunction(host_bus,
@@ -290,7 +290,11 @@
 
 static void pc_compat_2_3(MachineState *machine)
 {
+    PCMachineState *pcms = PC_MACHINE(machine);
     savevm_skip_section_footers();
+    if (kvm_enabled()) {
+        pcms->smm = ON_OFF_AUTO_OFF;
+    }
 }
 
 static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index b3e0b1f..bd655b8 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -357,11 +357,13 @@
     }
 }
 
-void ich9_lpc_pm_init(PCIDevice *lpc_pci)
+void ich9_lpc_pm_init(PCIDevice *lpc_pci, bool smm_enabled)
 {
     ICH9LPCState *lpc = ICH9_LPC_DEVICE(lpc_pci);
+    qemu_irq sci_irq;
 
-    ich9_pm_init(lpc_pci, &lpc->pm, qemu_allocate_irq(ich9_set_sci, lpc, 0));
+    sci_irq = qemu_allocate_irq(ich9_set_sci, lpc, 0);
+    ich9_pm_init(lpc_pci, &lpc->pm, smm_enabled, sci_irq);
     ich9_lpc_reset(&lpc->d.qdev);
 }
 
@@ -375,6 +377,9 @@
     acpi_pm1_cnt_update(&lpc->pm.acpi_regs,
                         val == ICH9_APM_ACPI_ENABLE,
                         val == ICH9_APM_ACPI_DISABLE);
+    if (val == ICH9_APM_ACPI_ENABLE || val == ICH9_APM_ACPI_DISABLE) {
+        return;
+    }
 
     /* SMI_EN = PMBASE + 30. SMI control and enable register */
     if (lpc->pm.smi_en & ICH9_PMIO_SMI_EN_APMC_EN) {
diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h
index 77cc65c..ac24bbe 100644
--- a/include/hw/acpi/ich9.h
+++ b/include/hw/acpi/ich9.h
@@ -54,10 +54,11 @@
     uint8_t disable_s3;
     uint8_t disable_s4;
     uint8_t s4_val;
+    uint8_t smm_enabled;
 } ICH9LPCPMRegs;
 
 void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
-                  qemu_irq sci_irq);
+                  bool smm_enabled, qemu_irq sci_irq);
 void ich9_pm_iospace_update(ICH9LPCPMRegs *pm, uint32_t pm_io_base);
 extern const VMStateDescription vmstate_ich9_pm;
 
diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h
index a2cc15c..b317a48 100644
--- a/include/hw/i386/ich9.h
+++ b/include/hw/i386/ich9.h
@@ -17,7 +17,7 @@
 void ich9_lpc_set_irq(void *opaque, int irq_num, int level);
 int ich9_lpc_map_irq(PCIDevice *pci_dev, int intx);
 PCIINTxRoute ich9_route_intx_pin_to_irq(void *opaque, int pirq_pin);
-void ich9_lpc_pm_init(PCIDevice *pci_lpc);
+void ich9_lpc_pm_init(PCIDevice *pci_lpc, bool smm_enabled);
 I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base);
 
 #define ICH9_CC_SIZE                            (16 * 1024)     /* 16KB */
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 328c8f7..786a1d5 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -37,6 +37,7 @@
 
     uint64_t max_ram_below_4g;
     OnOffAuto vmport;
+    OnOffAuto smm;
     bool enforce_aligned_dimm;
 };
 
@@ -44,6 +45,7 @@
 #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
 #define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
 #define PC_MACHINE_VMPORT           "vmport"
+#define PC_MACHINE_SMM              "smm"
 #define PC_MACHINE_ENFORCE_ALIGNED_DIMM "enforce-aligned-dimm"
 
 /**
@@ -155,6 +157,7 @@
 /* pc.c */
 extern int fd_bootchk;
 
+bool pc_machine_is_smm_enabled(PCMachineState *pcms);
 void pc_register_ferr_irq(qemu_irq irq);
 void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
 
@@ -214,7 +217,7 @@
 
 I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
                       qemu_irq sci_irq, qemu_irq smi_irq,
-                      int kvm_enabled, DeviceState **piix4_pm);
+                      int smm_enabled, DeviceState **piix4_pm);
 void piix4_smbus_register_device(SMBusDevice *dev, uint8_t addr);
 
 /* hpet.c */
diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h
index b94d365..a459dd2 100644
--- a/include/standard-headers/linux/input.h
+++ b/include/standard-headers/linux/input.h
@@ -367,7 +367,8 @@
 #define KEY_MSDOS		151
 #define KEY_COFFEE		152	/* AL Terminal Lock/Screensaver */
 #define KEY_SCREENLOCK		KEY_COFFEE
-#define KEY_DIRECTION		153
+#define KEY_ROTATE_DISPLAY	153	/* Display orientation for e.g. tablets */
+#define KEY_DIRECTION		KEY_ROTATE_DISPLAY
 #define KEY_CYCLEWINDOWS	154
 #define KEY_MAIL		155
 #define KEY_BOOKMARKS		156	/* AC Bookmarks */
@@ -700,6 +701,10 @@
 #define KEY_NUMERIC_9		0x209
 #define KEY_NUMERIC_STAR	0x20a
 #define KEY_NUMERIC_POUND	0x20b
+#define KEY_NUMERIC_A		0x20c	/* Phone key A - HUT Telephony 0xb9 */
+#define KEY_NUMERIC_B		0x20d
+#define KEY_NUMERIC_C		0x20e
+#define KEY_NUMERIC_D		0x20f
 
 #define KEY_CAMERA_FOCUS	0x210
 #define KEY_WPS_BUTTON		0x211	/* WiFi Protected Setup key */
@@ -971,7 +976,8 @@
  */
 #define MT_TOOL_FINGER		0
 #define MT_TOOL_PEN		1
-#define MT_TOOL_MAX		1
+#define MT_TOOL_PALM		2
+#define MT_TOOL_MAX		2
 
 /*
  * Values describing the status of a force-feedback effect
diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h
index 88ada1d..2e2a6dc 100644
--- a/include/standard-headers/linux/virtio_balloon.h
+++ b/include/standard-headers/linux/virtio_balloon.h
@@ -26,6 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE. */
 #include "standard-headers/linux/types.h"
+#include "standard-headers/linux/virtio_types.h"
 #include "standard-headers/linux/virtio_ids.h"
 #include "standard-headers/linux/virtio_config.h"
 
diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h
index cfcfb46..72ef815 100644
--- a/include/standard-headers/linux/virtio_gpu.h
+++ b/include/standard-headers/linux/virtio_gpu.h
@@ -38,6 +38,8 @@
 #ifndef VIRTIO_GPU_HW_H
 #define VIRTIO_GPU_HW_H
 
+#include "standard-headers/linux/types.h"
+
 enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_UNDEFINED = 0,
 
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
new file mode 100644
index 0000000..888557a
--- /dev/null
+++ b/include/sysemu/kvm_int.h
@@ -0,0 +1,39 @@
+/*
+ * Internal definitions for a target's KVM support
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_KVM_INT_H
+#define QEMU_KVM_INT_H
+
+#include "sysemu/sysemu.h"
+#include "sysemu/accel.h"
+#include "sysemu/kvm.h"
+
+typedef struct KVMSlot
+{
+    hwaddr start_addr;
+    ram_addr_t memory_size;
+    void *ram;
+    int slot;
+    int flags;
+} KVMSlot;
+
+typedef struct KVMMemoryListener {
+    MemoryListener listener;
+    KVMSlot *slots;
+    int as_id;
+} KVMMemoryListener;
+
+#define TYPE_KVM_ACCEL ACCEL_CLASS_NAME("kvm")
+
+#define KVM_STATE(obj) \
+    OBJECT_CHECK(KVMState, (obj), TYPE_KVM_ACCEL)
+
+void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
+                                  AddressSpace *as, int as_id);
+
+#endif
diff --git a/kvm-all.c b/kvm-all.c
index df57da0..edff01c 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -24,13 +24,11 @@
 #include "qemu/atomic.h"
 #include "qemu/option.h"
 #include "qemu/config-file.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/accel.h"
 #include "hw/hw.h"
 #include "hw/pci/msi.h"
 #include "hw/s390x/adapter.h"
 #include "exec/gdbstub.h"
-#include "sysemu/kvm.h"
+#include "sysemu/kvm_int.h"
 #include "qemu/bswap.h"
 #include "exec/memory.h"
 #include "exec/ram_addr.h"
@@ -60,22 +58,10 @@
 
 #define KVM_MSI_HASHTAB_SIZE    256
 
-typedef struct KVMSlot
-{
-    hwaddr start_addr;
-    ram_addr_t memory_size;
-    void *ram;
-    int slot;
-    int flags;
-} KVMSlot;
-
-typedef struct kvm_dirty_log KVMDirtyLog;
-
 struct KVMState
 {
     AccelState parent_obj;
 
-    KVMSlot *slots;
     int nr_slots;
     int fd;
     int vmfd;
@@ -106,13 +92,9 @@
     QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
     bool direct_msi;
 #endif
+    KVMMemoryListener memory_listener;
 };
 
-#define TYPE_KVM_ACCEL ACCEL_CLASS_NAME("kvm")
-
-#define KVM_STATE(obj) \
-    OBJECT_CHECK(KVMState, (obj), TYPE_KVM_ACCEL)
-
 KVMState *kvm_state;
 bool kvm_kernel_irqchip;
 bool kvm_async_interrupts_allowed;
@@ -133,13 +115,14 @@
     KVM_CAP_LAST_INFO
 };
 
-static KVMSlot *kvm_get_free_slot(KVMState *s)
+static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
 {
+    KVMState *s = kvm_state;
     int i;
 
     for (i = 0; i < s->nr_slots; i++) {
-        if (s->slots[i].memory_size == 0) {
-            return &s->slots[i];
+        if (kml->slots[i].memory_size == 0) {
+            return &kml->slots[i];
         }
     }
 
@@ -148,12 +131,14 @@
 
 bool kvm_has_free_slot(MachineState *ms)
 {
-    return kvm_get_free_slot(KVM_STATE(ms->accelerator));
+    KVMState *s = KVM_STATE(ms->accelerator);
+
+    return kvm_get_free_slot(&s->memory_listener);
 }
 
-static KVMSlot *kvm_alloc_slot(KVMState *s)
+static KVMSlot *kvm_alloc_slot(KVMMemoryListener *kml)
 {
-    KVMSlot *slot = kvm_get_free_slot(s);
+    KVMSlot *slot = kvm_get_free_slot(kml);
 
     if (slot) {
         return slot;
@@ -163,14 +148,15 @@
     abort();
 }
 
-static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
+static KVMSlot *kvm_lookup_matching_slot(KVMMemoryListener *kml,
                                          hwaddr start_addr,
                                          hwaddr end_addr)
 {
+    KVMState *s = kvm_state;
     int i;
 
     for (i = 0; i < s->nr_slots; i++) {
-        KVMSlot *mem = &s->slots[i];
+        KVMSlot *mem = &kml->slots[i];
 
         if (start_addr == mem->start_addr &&
             end_addr == mem->start_addr + mem->memory_size) {
@@ -184,15 +170,16 @@
 /*
  * Find overlapping slot with lowest start address
  */
-static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
+static KVMSlot *kvm_lookup_overlapping_slot(KVMMemoryListener *kml,
                                             hwaddr start_addr,
                                             hwaddr end_addr)
 {
+    KVMState *s = kvm_state;
     KVMSlot *found = NULL;
     int i;
 
     for (i = 0; i < s->nr_slots; i++) {
-        KVMSlot *mem = &s->slots[i];
+        KVMSlot *mem = &kml->slots[i];
 
         if (mem->memory_size == 0 ||
             (found && found->start_addr < mem->start_addr)) {
@@ -211,10 +198,11 @@
 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
                                        hwaddr *phys_addr)
 {
+    KVMMemoryListener *kml = &s->memory_listener;
     int i;
 
     for (i = 0; i < s->nr_slots; i++) {
-        KVMSlot *mem = &s->slots[i];
+        KVMSlot *mem = &kml->slots[i];
 
         if (ram >= mem->ram && ram < mem->ram + mem->memory_size) {
             *phys_addr = mem->start_addr + (ram - mem->ram);
@@ -225,11 +213,12 @@
     return 0;
 }
 
-static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
+static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot)
 {
+    KVMState *s = kvm_state;
     struct kvm_userspace_memory_region mem;
 
-    mem.slot = slot->slot;
+    mem.slot = slot->slot | (kml->as_id << 16);
     mem.guest_phys_addr = slot->start_addr;
     mem.userspace_addr = (unsigned long)slot->ram;
     mem.flags = slot->flags;
@@ -291,45 +280,47 @@
  * dirty pages logging control
  */
 
-static int kvm_mem_flags(KVMState *s, bool log_dirty, bool readonly)
+static int kvm_mem_flags(MemoryRegion *mr)
 {
+    bool readonly = mr->readonly || memory_region_is_romd(mr);
     int flags = 0;
-    flags = log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0;
+
+    if (memory_region_get_dirty_log_mask(mr) != 0) {
+        flags |= KVM_MEM_LOG_DIRTY_PAGES;
+    }
     if (readonly && kvm_readonly_mem_allowed) {
         flags |= KVM_MEM_READONLY;
     }
     return flags;
 }
 
-static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty)
+static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem,
+                                 MemoryRegion *mr)
 {
-    KVMState *s = kvm_state;
-    int flags, mask = KVM_MEM_LOG_DIRTY_PAGES;
     int old_flags;
 
     old_flags = mem->flags;
-
-    flags = (mem->flags & ~mask) | kvm_mem_flags(s, log_dirty, false);
-    mem->flags = flags;
+    mem->flags = kvm_mem_flags(mr);
 
     /* If nothing changed effectively, no need to issue ioctl */
-    if (flags == old_flags) {
+    if (mem->flags == old_flags) {
         return 0;
     }
 
-    return kvm_set_user_memory_region(s, mem);
+    return kvm_set_user_memory_region(kml, mem);
 }
 
-static int kvm_dirty_pages_log_change(hwaddr phys_addr,
-                                      ram_addr_t size, bool log_dirty)
+static int kvm_section_update_flags(KVMMemoryListener *kml,
+                                    MemoryRegionSection *section)
 {
-    KVMState *s = kvm_state;
-    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
+    hwaddr phys_addr = section->offset_within_address_space;
+    ram_addr_t size = int128_get64(section->size);
+    KVMSlot *mem = kvm_lookup_matching_slot(kml, phys_addr, phys_addr + size);
 
     if (mem == NULL)  {
         return 0;
     } else {
-        return kvm_slot_dirty_pages_log_change(mem, log_dirty);
+        return kvm_slot_update_flags(kml, mem, section->mr);
     }
 }
 
@@ -337,14 +328,14 @@
                           MemoryRegionSection *section,
                           int old, int new)
 {
+    KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
     int r;
 
     if (old != 0) {
         return;
     }
 
-    r = kvm_dirty_pages_log_change(section->offset_within_address_space,
-                                   int128_get64(section->size), true);
+    r = kvm_section_update_flags(kml, section);
     if (r < 0) {
         abort();
     }
@@ -354,14 +345,14 @@
                           MemoryRegionSection *section,
                           int old, int new)
 {
+    KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
     int r;
 
     if (new != 0) {
         return;
     }
 
-    r = kvm_dirty_pages_log_change(section->offset_within_address_space,
-                                   int128_get64(section->size), false);
+    r = kvm_section_update_flags(kml, section);
     if (r < 0) {
         abort();
     }
@@ -389,11 +380,12 @@
  * @start_add: start of logged region.
  * @end_addr: end of logged region.
  */
-static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section)
+static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
+                                          MemoryRegionSection *section)
 {
     KVMState *s = kvm_state;
     unsigned long size, allocated_size = 0;
-    KVMDirtyLog d = {};
+    struct kvm_dirty_log d = {};
     KVMSlot *mem;
     int ret = 0;
     hwaddr start_addr = section->offset_within_address_space;
@@ -401,7 +393,7 @@
 
     d.dirty_bitmap = NULL;
     while (start_addr < end_addr) {
-        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
+        mem = kvm_lookup_overlapping_slot(kml, start_addr, end_addr);
         if (mem == NULL) {
             break;
         }
@@ -428,8 +420,7 @@
         allocated_size = size;
         memset(d.dirty_bitmap, 0, allocated_size);
 
-        d.slot = mem->slot;
-
+        d.slot = mem->slot | (kml->as_id << 16);
         if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
             DPRINTF("ioctl failed %d\n", errno);
             ret = -1;
@@ -632,15 +623,14 @@
     return NULL;
 }
 
-static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
+static void kvm_set_phys_mem(KVMMemoryListener *kml,
+                             MemoryRegionSection *section, bool add)
 {
     KVMState *s = kvm_state;
     KVMSlot *mem, old;
     int err;
     MemoryRegion *mr = section->mr;
-    bool log_dirty = memory_region_get_dirty_log_mask(mr) != 0;
     bool writeable = !mr->readonly && !mr->rom_device;
-    bool readonly_flag = mr->readonly || memory_region_is_romd(mr);
     hwaddr start_addr = section->offset_within_address_space;
     ram_addr_t size = int128_get64(section->size);
     void *ram = NULL;
@@ -674,7 +664,7 @@
     ram = memory_region_get_ram_ptr(mr) + section->offset_within_region + delta;
 
     while (1) {
-        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
+        mem = kvm_lookup_overlapping_slot(kml, start_addr, start_addr + size);
         if (!mem) {
             break;
         }
@@ -684,19 +674,19 @@
             (ram - start_addr == mem->ram - mem->start_addr)) {
             /* The new slot fits into the existing one and comes with
              * identical parameters - update flags and done. */
-            kvm_slot_dirty_pages_log_change(mem, log_dirty);
+            kvm_slot_update_flags(kml, mem, mr);
             return;
         }
 
         old = *mem;
 
         if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
-            kvm_physical_sync_dirty_bitmap(section);
+            kvm_physical_sync_dirty_bitmap(kml, section);
         }
 
         /* unregister the overlapping slot */
         mem->memory_size = 0;
-        err = kvm_set_user_memory_region(s, mem);
+        err = kvm_set_user_memory_region(kml, mem);
         if (err) {
             fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
                     __func__, strerror(-err));
@@ -713,13 +703,13 @@
          * - and actually require a recent KVM version. */
         if (s->broken_set_mem_region &&
             old.start_addr == start_addr && old.memory_size < size && add) {
-            mem = kvm_alloc_slot(s);
+            mem = kvm_alloc_slot(kml);
             mem->memory_size = old.memory_size;
             mem->start_addr = old.start_addr;
             mem->ram = old.ram;
-            mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag);
+            mem->flags = kvm_mem_flags(mr);
 
-            err = kvm_set_user_memory_region(s, mem);
+            err = kvm_set_user_memory_region(kml, mem);
             if (err) {
                 fprintf(stderr, "%s: error updating slot: %s\n", __func__,
                         strerror(-err));
@@ -734,13 +724,13 @@
 
         /* register prefix slot */
         if (old.start_addr < start_addr) {
-            mem = kvm_alloc_slot(s);
+            mem = kvm_alloc_slot(kml);
             mem->memory_size = start_addr - old.start_addr;
             mem->start_addr = old.start_addr;
             mem->ram = old.ram;
-            mem->flags =  kvm_mem_flags(s, log_dirty, readonly_flag);
+            mem->flags =  kvm_mem_flags(mr);
 
-            err = kvm_set_user_memory_region(s, mem);
+            err = kvm_set_user_memory_region(kml, mem);
             if (err) {
                 fprintf(stderr, "%s: error registering prefix slot: %s\n",
                         __func__, strerror(-err));
@@ -757,14 +747,14 @@
         if (old.start_addr + old.memory_size > start_addr + size) {
             ram_addr_t size_delta;
 
-            mem = kvm_alloc_slot(s);
+            mem = kvm_alloc_slot(kml);
             mem->start_addr = start_addr + size;
             size_delta = mem->start_addr - old.start_addr;
             mem->memory_size = old.memory_size - size_delta;
             mem->ram = old.ram + size_delta;
-            mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag);
+            mem->flags = kvm_mem_flags(mr);
 
-            err = kvm_set_user_memory_region(s, mem);
+            err = kvm_set_user_memory_region(kml, mem);
             if (err) {
                 fprintf(stderr, "%s: error registering suffix slot: %s\n",
                         __func__, strerror(-err));
@@ -780,13 +770,13 @@
     if (!add) {
         return;
     }
-    mem = kvm_alloc_slot(s);
+    mem = kvm_alloc_slot(kml);
     mem->memory_size = size;
     mem->start_addr = start_addr;
     mem->ram = ram;
-    mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag);
+    mem->flags = kvm_mem_flags(mr);
 
-    err = kvm_set_user_memory_region(s, mem);
+    err = kvm_set_user_memory_region(kml, mem);
     if (err) {
         fprintf(stderr, "%s: error registering slot: %s\n", __func__,
                 strerror(-err));
@@ -797,23 +787,28 @@
 static void kvm_region_add(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
+    KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
+
     memory_region_ref(section->mr);
-    kvm_set_phys_mem(section, true);
+    kvm_set_phys_mem(kml, section, true);
 }
 
 static void kvm_region_del(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
-    kvm_set_phys_mem(section, false);
+    KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
+
+    kvm_set_phys_mem(kml, section, false);
     memory_region_unref(section->mr);
 }
 
 static void kvm_log_sync(MemoryListener *listener,
                          MemoryRegionSection *section)
 {
+    KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
     int r;
 
-    r = kvm_physical_sync_dirty_bitmap(section);
+    r = kvm_physical_sync_dirty_bitmap(kml, section);
     if (r < 0) {
         abort();
     }
@@ -888,18 +883,27 @@
     }
 }
 
-static MemoryListener kvm_memory_listener = {
-    .region_add = kvm_region_add,
-    .region_del = kvm_region_del,
-    .log_start = kvm_log_start,
-    .log_stop = kvm_log_stop,
-    .log_sync = kvm_log_sync,
-    .eventfd_add = kvm_mem_ioeventfd_add,
-    .eventfd_del = kvm_mem_ioeventfd_del,
-    .coalesced_mmio_add = kvm_coalesce_mmio_region,
-    .coalesced_mmio_del = kvm_uncoalesce_mmio_region,
-    .priority = 10,
-};
+void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
+                                  AddressSpace *as, int as_id)
+{
+    int i;
+
+    kml->slots = g_malloc0(s->nr_slots * sizeof(KVMSlot));
+    kml->as_id = as_id;
+
+    for (i = 0; i < s->nr_slots; i++) {
+        kml->slots[i].slot = i;
+    }
+
+    kml->listener.region_add = kvm_region_add;
+    kml->listener.region_del = kvm_region_del;
+    kml->listener.log_start = kvm_log_start;
+    kml->listener.log_stop = kvm_log_stop;
+    kml->listener.log_sync = kvm_log_sync;
+    kml->listener.priority = 10;
+
+    memory_listener_register(&kml->listener, as);
+}
 
 static MemoryListener kvm_io_listener = {
     .eventfd_add = kvm_io_ioeventfd_add,
@@ -1341,27 +1345,31 @@
            false);
 }
 
-static int kvm_irqchip_create(MachineState *machine, KVMState *s)
+static void kvm_irqchip_create(MachineState *machine, KVMState *s)
 {
     int ret;
 
-    if (!machine_kernel_irqchip_allowed(machine) ||
-        (!kvm_check_extension(s, KVM_CAP_IRQCHIP) &&
-         (kvm_vm_enable_cap(s, KVM_CAP_S390_IRQCHIP, 0) < 0))) {
-        return 0;
+    if (kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
+        ;
+    } else if (kvm_check_extension(s, KVM_CAP_S390_IRQCHIP)) {
+        ret = kvm_vm_enable_cap(s, KVM_CAP_S390_IRQCHIP, 0);
+        if (ret < 0) {
+            fprintf(stderr, "Enable kernel irqchip failed: %s\n", strerror(-ret));
+            exit(1);
+        }
+    } else {
+        return;
     }
 
     /* First probe and see if there's a arch-specific hook to create the
      * in-kernel irqchip for us */
     ret = kvm_arch_irqchip_create(s);
-    if (ret < 0) {
-        return ret;
-    } else if (ret == 0) {
+    if (ret == 0) {
         ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
-        if (ret < 0) {
-            fprintf(stderr, "Create kernel irqchip failed\n");
-            return ret;
-        }
+    }
+    if (ret < 0) {
+        fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret));
+        exit(1);
     }
 
     kvm_kernel_irqchip = true;
@@ -1372,8 +1380,6 @@
     kvm_halt_in_kernel_allowed = true;
 
     kvm_init_irq_routing(s);
-
-    return 0;
 }
 
 /* Find number of supported CPUs using the recommended
@@ -1410,7 +1416,7 @@
     KVMState *s;
     const KVMCapabilityInfo *missing_cap;
     int ret;
-    int i, type = 0;
+    int type = 0;
     const char *kvm_type;
 
     s = KVM_STATE(ms->accelerator);
@@ -1459,12 +1465,6 @@
         s->nr_slots = 32;
     }
 
-    s->slots = g_malloc0(s->nr_slots * sizeof(KVMSlot));
-
-    for (i = 0; i < s->nr_slots; i++) {
-        s->slots[i].slot = i;
-    }
-
     /* check the vcpu limits */
     soft_vcpus_limit = kvm_recommended_vcpus(s);
     hard_vcpus_limit = kvm_max_vcpus(s);
@@ -1596,14 +1596,21 @@
         goto err;
     }
 
-    ret = kvm_irqchip_create(ms, s);
-    if (ret < 0) {
-        goto err;
+    if (machine_kernel_irqchip_allowed(ms)) {
+        kvm_irqchip_create(ms, s);
     }
 
     kvm_state = s;
-    memory_listener_register(&kvm_memory_listener, &address_space_memory);
-    memory_listener_register(&kvm_io_listener, &address_space_io);
+
+    s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
+    s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
+    s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region;
+    s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region;
+
+    kvm_memory_listener_register(s, &s->memory_listener,
+                                 &address_space_memory, 0);
+    memory_listener_register(&kvm_io_listener,
+                             &address_space_io);
 
     s->many_ioeventfds = kvm_check_many_ioeventfds();
 
@@ -1619,7 +1626,7 @@
     if (s->fd != -1) {
         close(s->fd);
     }
-    g_free(s->slots);
+    g_free(s->memory_listener.slots);
 
     return ret;
 }
diff --git a/linux-headers/asm-x86/hyperv.h b/linux-headers/asm-x86/hyperv.h
index ce6068d..8fba544 100644
--- a/linux-headers/asm-x86/hyperv.h
+++ b/linux-headers/asm-x86/hyperv.h
@@ -199,6 +199,17 @@
 #define HV_X64_MSR_STIMER3_CONFIG		0x400000B6
 #define HV_X64_MSR_STIMER3_COUNT		0x400000B7
 
+/* Hyper-V guest crash notification MSR's */
+#define HV_X64_MSR_CRASH_P0			0x40000100
+#define HV_X64_MSR_CRASH_P1			0x40000101
+#define HV_X64_MSR_CRASH_P2			0x40000102
+#define HV_X64_MSR_CRASH_P3			0x40000103
+#define HV_X64_MSR_CRASH_P4			0x40000104
+#define HV_X64_MSR_CRASH_CTL			0x40000105
+#define HV_X64_MSR_CRASH_CTL_NOTIFY		(1ULL << 63)
+#define HV_X64_MSR_CRASH_PARAMS		\
+		(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
+
 #define HV_X64_MSR_HYPERCALL_ENABLE		0x00000001
 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT	12
 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK	\
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index fad9e5c..3bac873 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -897,7 +897,7 @@
  *
  * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
  * the irqfd to operate in resampling mode for level triggered interrupt
- * emlation.  See Documentation/virtual/kvm/api.txt.
+ * emulation.  See Documentation/virtual/kvm/api.txt.
  */
 #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 0508d0b..aa276bc 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -36,6 +36,8 @@
 /* Two-stage IOMMU */
 #define VFIO_TYPE1_NESTING_IOMMU	6	/* Implies v2 */
 
+#define VFIO_SPAPR_TCE_v2_IOMMU		7
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
@@ -443,6 +445,23 @@
 /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
 
 /*
+ * The SPAPR TCE DDW info struct provides the information about
+ * the details of Dynamic DMA window capability.
+ *
+ * @pgsizes contains a page size bitmask, 4K/64K/16M are supported.
+ * @max_dynamic_windows_supported tells the maximum number of windows
+ * which the platform can create.
+ * @levels tells the maximum number of levels in multi-level IOMMU tables;
+ * this allows splitting a table into smaller chunks which reduces
+ * the amount of physically contiguous memory required for the table.
+ */
+struct vfio_iommu_spapr_tce_ddw_info {
+	__u64 pgsizes;			/* Bitmap of supported page sizes */
+	__u32 max_dynamic_windows_supported;
+	__u32 levels;
+};
+
+/*
  * The SPAPR TCE info struct provides the information about the PCI bus
  * address ranges available for DMA, these values are programmed into
  * the hardware so the guest has to know that information.
@@ -452,14 +471,17 @@
  * addresses too so the window works as a filter rather than an offset
  * for IOVA addresses.
  *
- * A flag will need to be added if other page sizes are supported,
- * so as defined here, it is always 4k.
+ * Flags supported:
+ * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows
+ *   (DDW) support is present. @ddw is only supported when DDW is present.
  */
 struct vfio_iommu_spapr_tce_info {
 	__u32 argsz;
-	__u32 flags;			/* reserved for future use */
+	__u32 flags;
+#define VFIO_IOMMU_SPAPR_INFO_DDW	(1 << 0)	/* DDW supported */
 	__u32 dma32_window_start;	/* 32 bit window start (bytes) */
 	__u32 dma32_window_size;	/* 32 bit window size (bytes) */
+	struct vfio_iommu_spapr_tce_ddw_info ddw;
 };
 
 #define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
@@ -470,12 +492,23 @@
  * - unfreeze IO/DMA for frozen PE;
  * - read PE state;
  * - reset PE;
- * - configure PE.
+ * - configure PE;
+ * - inject EEH error.
  */
+struct vfio_eeh_pe_err {
+	__u32 type;
+	__u32 func;
+	__u64 addr;
+	__u64 mask;
+};
+
 struct vfio_eeh_pe_op {
 	__u32 argsz;
 	__u32 flags;
 	__u32 op;
+	union {
+		struct vfio_eeh_pe_err err;
+	};
 };
 
 #define VFIO_EEH_PE_DISABLE		0	/* Disable EEH functionality */
@@ -492,9 +525,70 @@
 #define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
 #define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
 #define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
+#define VFIO_EEH_PE_INJECT_ERR		9	/* Inject EEH error          */
 
 #define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
 
+/**
+ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory)
+ *
+ * Registers user space memory where DMA is allowed. It pins
+ * user pages and does the locked memory accounting so
+ * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls
+ * get faster.
+ */
+struct vfio_iommu_spapr_register_memory {
+	__u32	argsz;
+	__u32	flags;
+	__u64	vaddr;				/* Process virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 17)
+
+/**
+ * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory)
+ *
+ * Unregisters user space memory registered with
+ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY.
+ * Uses vfio_iommu_spapr_register_memory for parameters.
+ */
+#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 18)
+
+/**
+ * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create)
+ *
+ * Creates an additional TCE table and programs it (sets a new DMA window)
+ * to every IOMMU group in the container. It receives page shift, window
+ * size and number of levels in the TCE table being created.
+ *
+ * It allocates and returns an offset on a PCI bus of the new DMA window.
+ */
+struct vfio_iommu_spapr_tce_create {
+	__u32 argsz;
+	__u32 flags;
+	/* in */
+	__u32 page_shift;
+	__u64 window_size;
+	__u32 levels;
+	/* out */
+	__u64 start_addr;
+};
+#define VFIO_IOMMU_SPAPR_TCE_CREATE	_IO(VFIO_TYPE, VFIO_BASE + 19)
+
+/**
+ * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove)
+ *
+ * Unprograms a TCE table from all groups in the container and destroys it.
+ * It receives a PCI bus offset as a window id.
+ */
+struct vfio_iommu_spapr_tce_remove {
+	__u32 argsz;
+	__u32 flags;
+	/* in */
+	__u64 start_addr;
+};
+#define VFIO_IOMMU_SPAPR_TCE_REMOVE	_IO(VFIO_TYPE, VFIO_BASE + 20)
+
 /* ***************************************************************** */
 
 #endif /* VFIO_H */
diff --git a/linux-headers/linux/virtio_pci.h b/linux-headers/linux/virtio_pci.h
deleted file mode 100644
index 92624e5..0000000
--- a/linux-headers/linux/virtio_pci.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Virtio PCI driver
- *
- * This module allows virtio devices to be used over a virtual PCI device.
- * This can be used with QEMU based VMMs like KVM or Xen.
- *
- * Copyright IBM Corp. 2007
- *
- * Authors:
- *  Anthony Liguori  <aliguori@us.ibm.com>
- *
- * This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of IBM nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _LINUX_VIRTIO_PCI_H
-#define _LINUX_VIRTIO_PCI_H
-
-#include <linux/types.h>
-
-#ifndef VIRTIO_PCI_NO_LEGACY
-
-/* A 32-bit r/o bitmask of the features supported by the host */
-#define VIRTIO_PCI_HOST_FEATURES	0
-
-/* A 32-bit r/w bitmask of features activated by the guest */
-#define VIRTIO_PCI_GUEST_FEATURES	4
-
-/* A 32-bit r/w PFN for the currently selected queue */
-#define VIRTIO_PCI_QUEUE_PFN		8
-
-/* A 16-bit r/o queue size for the currently selected queue */
-#define VIRTIO_PCI_QUEUE_NUM		12
-
-/* A 16-bit r/w queue selector */
-#define VIRTIO_PCI_QUEUE_SEL		14
-
-/* A 16-bit r/w queue notifier */
-#define VIRTIO_PCI_QUEUE_NOTIFY		16
-
-/* An 8-bit device status register.  */
-#define VIRTIO_PCI_STATUS		18
-
-/* An 8-bit r/o interrupt status register.  Reading the value will return the
- * current contents of the ISR and will also clear it.  This is effectively
- * a read-and-acknowledge. */
-#define VIRTIO_PCI_ISR			19
-
-/* MSI-X registers: only enabled if MSI-X is enabled. */
-/* A 16-bit vector for configuration changes. */
-#define VIRTIO_MSI_CONFIG_VECTOR        20
-/* A 16-bit vector for selected queue notifications. */
-#define VIRTIO_MSI_QUEUE_VECTOR         22
-
-/* The remaining space is defined by each driver as the per-driver
- * configuration space */
-#define VIRTIO_PCI_CONFIG_OFF(msix_enabled)	((msix_enabled) ? 24 : 20)
-/* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */
-#define VIRTIO_PCI_CONFIG(dev)	VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled)
-
-/* Virtio ABI version, this must match exactly */
-#define VIRTIO_PCI_ABI_VERSION		0
-
-/* How many bits to shift physical queue address written to QUEUE_PFN.
- * 12 is historical, and due to x86 page size. */
-#define VIRTIO_PCI_QUEUE_ADDR_SHIFT	12
-
-/* The alignment to use between consumer and producer parts of vring.
- * x86 pagesize again. */
-#define VIRTIO_PCI_VRING_ALIGN		4096
-
-#endif /* VIRTIO_PCI_NO_LEGACY */
-
-/* The bit of the ISR which indicates a device configuration change. */
-#define VIRTIO_PCI_ISR_CONFIG		0x2
-/* Vector value used to disable MSI for queue */
-#define VIRTIO_MSI_NO_VECTOR            0xffff
-
-#ifndef VIRTIO_PCI_NO_MODERN
-
-/* IDs for different capabilities.  Must all exist. */
-
-/* Common configuration */
-#define VIRTIO_PCI_CAP_COMMON_CFG	1
-/* Notifications */
-#define VIRTIO_PCI_CAP_NOTIFY_CFG	2
-/* ISR access */
-#define VIRTIO_PCI_CAP_ISR_CFG		3
-/* Device specific confiuration */
-#define VIRTIO_PCI_CAP_DEVICE_CFG	4
-
-/* This is the PCI capability header: */
-struct virtio_pci_cap {
-	__u8 cap_vndr;		/* Generic PCI field: PCI_CAP_ID_VNDR */
-	__u8 cap_next;		/* Generic PCI field: next ptr. */
-	__u8 cap_len;		/* Generic PCI field: capability length */
-	__u8 cfg_type;		/* Identifies the structure. */
-	__u8 bar;		/* Where to find it. */
-	__u8 padding[3];	/* Pad to full dword. */
-	__le32 offset;		/* Offset within bar. */
-	__le32 length;		/* Length of the structure, in bytes. */
-};
-
-struct virtio_pci_notify_cap {
-	struct virtio_pci_cap cap;
-	__le32 notify_off_multiplier;	/* Multiplier for queue_notify_off. */
-};
-
-/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
-struct virtio_pci_common_cfg {
-	/* About the whole device. */
-	__le32 device_feature_select;	/* read-write */
-	__le32 device_feature;		/* read-only */
-	__le32 guest_feature_select;	/* read-write */
-	__le32 guest_feature;		/* read-write */
-	__le16 msix_config;		/* read-write */
-	__le16 num_queues;		/* read-only */
-	__u8 device_status;		/* read-write */
-	__u8 config_generation;		/* read-only */
-
-	/* About a specific virtqueue. */
-	__le16 queue_select;		/* read-write */
-	__le16 queue_size;		/* read-write, power of 2. */
-	__le16 queue_msix_vector;	/* read-write */
-	__le16 queue_enable;		/* read-write */
-	__le16 queue_notify_off;	/* read-only */
-	__le32 queue_desc_lo;		/* read-write */
-	__le32 queue_desc_hi;		/* read-write */
-	__le32 queue_avail_lo;		/* read-write */
-	__le32 queue_avail_hi;		/* read-write */
-	__le32 queue_used_lo;		/* read-write */
-	__le32 queue_used_hi;		/* read-write */
-};
-
-/* Macro versions of offsets for the Old Timers! */
-#define VIRTIO_PCI_CAP_VNDR		0
-#define VIRTIO_PCI_CAP_NEXT		1
-#define VIRTIO_PCI_CAP_LEN		2
-#define VIRTIO_PCI_CAP_CFG_TYPE		3
-#define VIRTIO_PCI_CAP_BAR		4
-#define VIRTIO_PCI_CAP_OFFSET		8
-#define VIRTIO_PCI_CAP_LENGTH		12
-
-#define VIRTIO_PCI_NOTIFY_CAP_MULT	16
-
-
-#define VIRTIO_PCI_COMMON_DFSELECT	0
-#define VIRTIO_PCI_COMMON_DF		4
-#define VIRTIO_PCI_COMMON_GFSELECT	8
-#define VIRTIO_PCI_COMMON_GF		12
-#define VIRTIO_PCI_COMMON_MSIX		16
-#define VIRTIO_PCI_COMMON_NUMQ		18
-#define VIRTIO_PCI_COMMON_STATUS	20
-#define VIRTIO_PCI_COMMON_CFGGENERATION	21
-#define VIRTIO_PCI_COMMON_Q_SELECT	22
-#define VIRTIO_PCI_COMMON_Q_SIZE	24
-#define VIRTIO_PCI_COMMON_Q_MSIX	26
-#define VIRTIO_PCI_COMMON_Q_ENABLE	28
-#define VIRTIO_PCI_COMMON_Q_NOFF	30
-#define VIRTIO_PCI_COMMON_Q_DESCLO	32
-#define VIRTIO_PCI_COMMON_Q_DESCHI	36
-#define VIRTIO_PCI_COMMON_Q_AVAILLO	40
-#define VIRTIO_PCI_COMMON_Q_AVAILHI	44
-#define VIRTIO_PCI_COMMON_Q_USEDLO	48
-#define VIRTIO_PCI_COMMON_Q_USEDHI	52
-
-#endif /* VIRTIO_PCI_NO_MODERN */
-
-#endif
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 603aaf0..ac39291 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -314,6 +314,7 @@
 
 #define MSR_P6_PERFCTR0                 0xc1
 
+#define MSR_IA32_SMBASE                 0x9e
 #define MSR_MTRRcap                     0xfe
 #define MSR_MTRRcap_VCNT                8
 #define MSR_MTRRcap_FIXRANGE_SUPPORT    (1 << 8)
diff --git a/target-i386/kvm-stub.c b/target-i386/kvm-stub.c
index 2b9e801..6fefd65 100644
--- a/target-i386/kvm-stub.c
+++ b/target-i386/kvm-stub.c
@@ -18,6 +18,11 @@
 }
 
 #ifndef __OPTIMIZE__
+bool kvm_has_smm(void)
+{
+    return 1;
+}
+
 /* This function is only called inside conditionals which we
  * rely on the compiler to optimize out when CONFIG_KVM is not
  * defined.
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 6426600..9038bf7 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -22,7 +22,7 @@
 
 #include "qemu-common.h"
 #include "sysemu/sysemu.h"
-#include "sysemu/kvm.h"
+#include "sysemu/kvm_int.h"
 #include "kvm_i386.h"
 #include "cpu.h"
 #include "exec/gdbstub.h"
@@ -73,6 +73,7 @@
 static bool has_msr_async_pf_en;
 static bool has_msr_pv_eoi_en;
 static bool has_msr_misc_enable;
+static bool has_msr_smbase;
 static bool has_msr_bndcfgs;
 static bool has_msr_kvm_steal_time;
 static int lm_capable_kernel;
@@ -85,6 +86,11 @@
 static bool has_msr_architectural_pmu;
 static uint32_t num_architectural_pmu_counters;
 
+bool kvm_has_smm(void)
+{
+    return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM);
+}
+
 bool kvm_allows_irq0_override(void)
 {
     return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
@@ -819,6 +825,10 @@
                     has_msr_tsc_deadline = true;
                     continue;
                 }
+                if (kvm_msr_list->indices[i] == MSR_IA32_SMBASE) {
+                    has_msr_smbase = true;
+                    continue;
+                }
                 if (kvm_msr_list->indices[i] == MSR_IA32_MISC_ENABLE) {
                     has_msr_misc_enable = true;
                     continue;
@@ -840,6 +850,40 @@
     return ret;
 }
 
+static Notifier smram_machine_done;
+static KVMMemoryListener smram_listener;
+static AddressSpace smram_address_space;
+static MemoryRegion smram_as_root;
+static MemoryRegion smram_as_mem;
+
+static void register_smram_listener(Notifier *n, void *unused)
+{
+    MemoryRegion *smram =
+        (MemoryRegion *) object_resolve_path("/machine/smram", NULL);
+
+    /* Outer container... */
+    memory_region_init(&smram_as_root, OBJECT(kvm_state), "mem-container-smram", ~0ull);
+    memory_region_set_enabled(&smram_as_root, true);
+
+    /* ... with two regions inside: normal system memory with low
+     * priority, and...
+     */
+    memory_region_init_alias(&smram_as_mem, OBJECT(kvm_state), "mem-smram",
+                             get_system_memory(), 0, ~0ull);
+    memory_region_add_subregion_overlap(&smram_as_root, 0, &smram_as_mem, 0);
+    memory_region_set_enabled(&smram_as_mem, true);
+
+    if (smram) {
+        /* ... SMRAM with higher priority */
+        memory_region_add_subregion_overlap(&smram_as_root, 0, smram, 10);
+        memory_region_set_enabled(smram, true);
+    }
+
+    address_space_init(&smram_address_space, &smram_as_root, "KVM-SMRAM");
+    kvm_memory_listener_register(kvm_state, &smram_listener,
+                                 &smram_address_space, 1);
+}
+
 int kvm_arch_init(MachineState *ms, KVMState *s)
 {
     uint64_t identity_base = 0xfffbc000;
@@ -898,6 +942,11 @@
             return ret;
         }
     }
+
+    if (kvm_check_extension(s, KVM_CAP_X86_SMM)) {
+        smram_machine_done.notify = register_smram_listener;
+        qemu_add_machine_init_done_notifier(&smram_machine_done);
+    }
     return 0;
 }
 
@@ -1245,6 +1294,9 @@
         kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE,
                           env->msr_ia32_misc_enable);
     }
+    if (has_msr_smbase) {
+        kvm_msr_entry_set(&msrs[n++], MSR_IA32_SMBASE, env->smbase);
+    }
     if (has_msr_bndcfgs) {
         kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs);
     }
@@ -1606,6 +1658,9 @@
     if (has_msr_misc_enable) {
         msrs[n++].index = MSR_IA32_MISC_ENABLE;
     }
+    if (has_msr_smbase) {
+        msrs[n++].index = MSR_IA32_SMBASE;
+    }
     if (has_msr_feature_control) {
         msrs[n++].index = MSR_IA32_FEATURE_CONTROL;
     }
@@ -1760,6 +1815,9 @@
         case MSR_IA32_MISC_ENABLE:
             env->msr_ia32_misc_enable = msrs[i].data;
             break;
+        case MSR_IA32_SMBASE:
+            env->smbase = msrs[i].data;
+            break;
         case MSR_IA32_FEATURE_CONTROL:
             env->msr_ia32_feature_control = msrs[i].data;
             break;
@@ -1923,6 +1981,7 @@
 
 static int kvm_put_vcpu_events(X86CPU *cpu, int level)
 {
+    CPUState *cs = CPU(cpu);
     CPUX86State *env = &cpu->env;
     struct kvm_vcpu_events events = {};
 
@@ -1947,6 +2006,24 @@
 
     events.sipi_vector = env->sipi_vector;
 
+    if (has_msr_smbase) {
+        events.smi.smm = !!(env->hflags & HF_SMM_MASK);
+        events.smi.smm_inside_nmi = !!(env->hflags2 & HF2_SMM_INSIDE_NMI_MASK);
+        if (kvm_irqchip_in_kernel()) {
+            /* As soon as these are moved to the kernel, remove them
+             * from cs->interrupt_request.
+             */
+            events.smi.pending = cs->interrupt_request & CPU_INTERRUPT_SMI;
+            events.smi.latched_init = cs->interrupt_request & CPU_INTERRUPT_INIT;
+            cs->interrupt_request &= ~(CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI);
+        } else {
+            /* Keep these in cs->interrupt_request.  */
+            events.smi.pending = 0;
+            events.smi.latched_init = 0;
+        }
+        events.flags |= KVM_VCPUEVENT_VALID_SMM;
+    }
+
     events.flags = 0;
     if (level >= KVM_PUT_RESET_STATE) {
         events.flags |=
@@ -1966,6 +2043,7 @@
         return 0;
     }
 
+    memset(&events, 0, sizeof(events));
     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events);
     if (ret < 0) {
        return ret;
@@ -1987,6 +2065,29 @@
         env->hflags2 &= ~HF2_NMI_MASK;
     }
 
+    if (events.flags & KVM_VCPUEVENT_VALID_SMM) {
+        if (events.smi.smm) {
+            env->hflags |= HF_SMM_MASK;
+        } else {
+            env->hflags &= ~HF_SMM_MASK;
+        }
+        if (events.smi.pending) {
+            cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
+        } else {
+            cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
+        }
+        if (events.smi.smm_inside_nmi) {
+            env->hflags2 |= HF2_SMM_INSIDE_NMI_MASK;
+        } else {
+            env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK;
+        }
+        if (events.smi.latched_init) {
+            cpu_interrupt(CPU(cpu), CPU_INTERRUPT_INIT);
+        } else {
+            cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_INIT);
+        }
+    }
+
     env->sipi_vector = events.sipi_vector;
 
     return 0;
@@ -2190,16 +2291,28 @@
     int ret;
 
     /* Inject NMI */
-    if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
-        qemu_mutex_lock_iothread();
-        cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
-        qemu_mutex_unlock_iothread();
-
-        DPRINTF("injected NMI\n");
-        ret = kvm_vcpu_ioctl(cpu, KVM_NMI);
-        if (ret < 0) {
-            fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
-                    strerror(-ret));
+    if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
+        if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
+            qemu_mutex_lock_iothread();
+            cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
+            qemu_mutex_unlock_iothread();
+            DPRINTF("injected NMI\n");
+            ret = kvm_vcpu_ioctl(cpu, KVM_NMI);
+            if (ret < 0) {
+                fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
+                        strerror(-ret));
+            }
+        }
+        if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
+            qemu_mutex_lock_iothread();
+            cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
+            qemu_mutex_unlock_iothread();
+            DPRINTF("injected SMI\n");
+            ret = kvm_vcpu_ioctl(cpu, KVM_SMI);
+            if (ret < 0) {
+                fprintf(stderr, "KVM: injection failed, SMI lost (%s)\n",
+                        strerror(-ret));
+            }
         }
     }
 
@@ -2212,7 +2325,13 @@
      * pending TPR access reports.
      */
     if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
-        cpu->exit_request = 1;
+        if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
+            !(env->hflags & HF_SMM_MASK)) {
+            cpu->exit_request = 1;
+        }
+        if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
+            cpu->exit_request = 1;
+        }
     }
 
     if (!kvm_irqchip_in_kernel()) {
@@ -2260,6 +2379,11 @@
     X86CPU *x86_cpu = X86_CPU(cpu);
     CPUX86State *env = &x86_cpu->env;
 
+    if (run->flags & KVM_RUN_X86_SMM) {
+        env->hflags |= HF_SMM_MASK;
+    } else {
+        env->hflags &= HF_SMM_MASK;
+    }
     if (run->if_flag) {
         env->eflags |= IF_MASK;
     } else {
@@ -2307,7 +2431,8 @@
         }
     }
 
-    if (cs->interrupt_request & CPU_INTERRUPT_INIT) {
+    if ((cs->interrupt_request & CPU_INTERRUPT_INIT) &&
+        !(env->hflags & HF_SMM_MASK)) {
         kvm_cpu_synchronize_state(cs);
         do_cpu_init(cpu);
     }
diff --git a/target-i386/kvm_i386.h b/target-i386/kvm_i386.h
index cac30fd..e557e94 100644
--- a/target-i386/kvm_i386.h
+++ b/target-i386/kvm_i386.h
@@ -14,6 +14,7 @@
 #include "sysemu/kvm.h"
 
 bool kvm_allows_irq0_override(void);
+bool kvm_has_smm(void);
 void kvm_arch_reset_vcpu(X86CPU *cs);
 void kvm_arch_do_init_vcpu(X86CPU *cs);