Merge remote branch 'origin/master' into pci

diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt
index f252c8e..f2f9b75 100644
--- a/docs/qdev-device-use.txt
+++ b/docs/qdev-device-use.txt

@@ -97,10 +97,13 @@
 
 * if=virtio
 
-  -device virtio-blk-pci,drive=DRIVE-ID,class=C,vectors=V
+  -device virtio-blk-pci,drive=DRIVE-ID,class=C,vectors=V,ioeventfd=IOEVENTFD
 
   This lets you control PCI device class and MSI-X vectors.
 
+  IOEVENTFD controls whether or not ioeventfd is used for virtqueue notify.  It
+  can be set to on (default) or off.
+
   As for all PCI devices, you can add bus=PCI-BUS,addr=DEVFN to
   control the PCI device address.
 
@@ -240,6 +243,9 @@
 device address, as usual.  The old -net nic provides parameter addr
 for that, it is silently ignored when the NIC is not a PCI device.
 
+For virtio-net-pci, you can control whether or not ioeventfd is used for
+virtqueue notify by setting ioeventfd= to on or off (default).
+
 -net nic accepts vectors=V for all models, but it's silently ignored
 except for virtio-net-pci (model=virtio).  With -device, only devices
 that support it accept it.

diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index 173d781..273097d 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c

@@ -428,6 +428,8 @@
     .qdev.desc          = "PM",
     .qdev.size          = sizeof(PIIX4PMState),
     .qdev.vmsd          = &vmstate_acpi,
+    .qdev.no_user       = 1,
+    .no_hotplug         = 1,
     .init               = piix4_pm_initfn,
     .config_write       = pm_write_config,
     .qdev.props         = (Property[]) {

diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index 75d1cc6..5f45b5d 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c

@@ -3140,6 +3140,7 @@
     .qdev.desc    = "Cirrus CLGD 54xx VGA",
     .qdev.size    = sizeof(PCICirrusVGAState),
     .qdev.vmsd    = &vmstate_pci_cirrus_vga,
+    .no_hotplug   = 1,
     .init         = pci_cirrus_vga_initfn,
     .romfile      = VGABIOS_CIRRUS_FILENAME,
     .config_write = pci_cirrus_write_config,

diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index 1cad906..d4289af 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c

@@ -194,11 +194,13 @@
         .qdev.name    = "piix3-ide",
         .qdev.size    = sizeof(PCIIDEState),
         .qdev.no_user = 1,
+        .no_hotplug   = 1,
         .init         = pci_piix3_ide_initfn,
     },{
         .qdev.name    = "piix4-ide",
         .qdev.size    = sizeof(PCIIDEState),
         .qdev.no_user = 1,
+        .no_hotplug   = 1,
         .init         = pci_piix4_ide_initfn,
     },{
         /* end of list */

diff --git a/hw/pci.c b/hw/pci.c
index d0b51b8..8d0e3df 100644
--- a/hw/pci.c
+++ b/hw/pci.c

@@ -1624,6 +1624,11 @@
                                      info->is_bridge);
     if (pci_dev == NULL)
         return -1;
+    if (qdev->hotplugged && info->no_hotplug) {
+        qerror_report(QERR_DEVICE_NO_HOTPLUG, info->qdev.name);
+        do_pci_unregister_device(pci_dev);
+        return -1;
+    }
     rc = info->init(pci_dev);
     if (rc != 0) {
         do_pci_unregister_device(pci_dev);
@@ -1656,7 +1661,12 @@
 static int pci_unplug_device(DeviceState *qdev)
 {
     PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, qdev);
+    PCIDeviceInfo *info = container_of(qdev->info, PCIDeviceInfo, qdev);
 
+    if (info->no_hotplug) {
+        qerror_report(QERR_DEVICE_NO_HOTPLUG, info->qdev.name);
+        return -1;
+    }
     return dev->bus->hotplug(dev->bus->hotplug_qdev, dev,
                              PCI_HOTPLUG_DISABLED);
 }

diff --git a/hw/pci.h b/hw/pci.h
index 052960e..bc8d5bb 100644
--- a/hw/pci.h
+++ b/hw/pci.h

@@ -436,6 +436,9 @@
     /* pcie stuff */
     int is_express;   /* is this device pci express? */
 
+    /* device isn't hot-pluggable */
+    int no_hotplug;
+
     /* rom bar */
     const char *romfile;
 } PCIDeviceInfo;

diff --git a/hw/piix4.c b/hw/piix4.c
index 5489386..72073cd 100644
--- a/hw/piix4.c
+++ b/hw/piix4.c

@@ -113,6 +113,7 @@
         .qdev.desc    = "ISA bridge",
         .qdev.size    = sizeof(PCIDevice),
         .qdev.no_user = 1,
+        .no_hotplug   = 1,
         .init         = piix4_initfn,
     },{
         /* end of list */

diff --git a/hw/piix_pci.c b/hw/piix_pci.c
index 38f9d9e..358da58 100644
--- a/hw/piix_pci.c
+++ b/hw/piix_pci.c

@@ -348,6 +348,7 @@
         .qdev.size    = sizeof(PCII440FXState),
         .qdev.vmsd    = &vmstate_i440fx,
         .qdev.no_user = 1,
+        .no_hotplug   = 1,
         .init         = i440fx_initfn,
         .config_write = i440fx_write_config,
     },{
@@ -356,6 +357,7 @@
         .qdev.size    = sizeof(PIIX3State),
         .qdev.vmsd    = &vmstate_piix3,
         .qdev.no_user = 1,
+        .no_hotplug   = 1,
         .init         = piix3_initfn,
     },{
         /* end of list */

diff --git a/hw/qdev.c b/hw/qdev.c
index 31eb464..5b8d374 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c

@@ -32,6 +32,8 @@
 #include "blockdev.h"
 
 static int qdev_hotplug = 0;
+static bool qdev_hot_added = false;
+static bool qdev_hot_removed = false;
 
 /* This is a nasty hack to allow passing a NULL bus to qdev_create.  */
 static BusState *main_system_bus;
@@ -93,6 +95,7 @@
     if (qdev_hotplug) {
         assert(bus->allow_hotplug);
         dev->hotplugged = 1;
+        qdev_hot_added = true;
     }
     dev->instance_id_alias = -1;
     dev->state = DEV_STATE_CREATED;
@@ -294,6 +297,8 @@
     }
     assert(dev->info->unplug != NULL);
 
+    qdev_hot_removed = true;
+
     return dev->info->unplug(dev);
 }
 
@@ -395,6 +400,11 @@
     qdev_hotplug = 1;
 }
 
+bool qdev_machine_modified(void)
+{
+    return qdev_hot_added || qdev_hot_removed;
+}
+
 /* Get a character (serial) device interface.  */
 CharDriverState *qdev_init_chardev(DeviceState *dev)
 {

diff --git a/hw/qdev.h b/hw/qdev.h
index 2be775f..e520aaa 100644
--- a/hw/qdev.h
+++ b/hw/qdev.h

@@ -132,6 +132,7 @@
 void qdev_free(DeviceState *dev);
 int qdev_simple_unplug_cb(DeviceState *dev);
 void qdev_machine_creation_done(void);
+bool qdev_machine_modified(void);
 
 qemu_irq qdev_get_gpio_in(DeviceState *dev, int n);
 void qdev_connect_gpio_out(DeviceState *dev, int n, qemu_irq pin);

diff --git a/hw/qxl.c b/hw/qxl.c
index 207aa63..bd71e58 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c

@@ -1546,6 +1546,7 @@
     .qdev.size    = sizeof(PCIQXLDevice),
     .qdev.reset   = qxl_reset_handler,
     .qdev.vmsd    = &qxl_vmstate,
+    .no_hotplug   = 1,
     .init         = qxl_init_primary,
     .config_write = qxl_write_config,
     .romfile      = "vgabios-qxl.bin",

diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index a8aed89..a22530c 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c

@@ -495,6 +495,8 @@
     QEMUTimer *timer;
     int64_t TimerExpire;
 
+    /* Support migration to/from old versions */
+    int rtl8139_mmio_io_addr_dummy;
 } RTL8139State;
 
 static void rtl8139_set_next_tctr_time(RTL8139State *s, int64_t current_time);
@@ -3162,6 +3164,21 @@
     return 0;
 }
 
+static bool rtl8139_hotplug_ready_needed(void *opaque)
+{
+    return qdev_machine_modified();
+}
+
+static const VMStateDescription vmstate_rtl8139_hotplug_ready ={
+    .name = "rtl8139/hotplug_ready",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField []) {
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static void rtl8139_pre_save(void *opaque)
 {
     RTL8139State* s = opaque;
@@ -3171,6 +3188,7 @@
     rtl8139_set_next_tctr_time(s, current_time);
     s->TCTR = muldiv64(current_time - s->TCTR_base, PCI_FREQUENCY,
                        get_ticks_per_sec());
+    s->rtl8139_mmio_io_addr_dummy = s->rtl8139_mmio_io_addr;
 }
 
 static const VMStateDescription vmstate_rtl8139 = {
@@ -3223,7 +3241,7 @@
 
         VMSTATE_UNUSED(4),
         VMSTATE_MACADDR(conf.macaddr, RTL8139State),
-        VMSTATE_INT32(rtl8139_mmio_io_addr, RTL8139State),
+        VMSTATE_INT32(rtl8139_mmio_io_addr_dummy, RTL8139State),
 
         VMSTATE_UINT32(currTxDesc, RTL8139State),
         VMSTATE_UINT32(currCPlusRxDesc, RTL8139State),
@@ -3252,6 +3270,14 @@
 
         VMSTATE_UINT32_V(cplus_enabled, RTL8139State, 4),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (VMStateSubsection []) {
+        {
+            .vmsd = &vmstate_rtl8139_hotplug_ready,
+            .needed = rtl8139_hotplug_ready_needed,
+        }, {
+            /* empty */
+        }
     }
 };
 

diff --git a/hw/vga-pci.c b/hw/vga-pci.c
index 791ca22..ce9ec45 100644
--- a/hw/vga-pci.c
+++ b/hw/vga-pci.c

@@ -110,6 +110,7 @@
     .qdev.name    = "VGA",
     .qdev.size    = sizeof(PCIVGAState),
     .qdev.vmsd    = &vmstate_vga_pci,
+    .no_hotplug   = 1,
     .init         = pci_vga_initfn,
     .config_write = pci_vga_write_config,
     .romfile      = "vgabios-stdvga.bin",

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index ec1bf8d..ccb3e63 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c

@@ -54,8 +54,6 @@
     uint8_t nouni;
     uint8_t nobcast;
     uint8_t vhost_started;
-    bool vm_running;
-    VMChangeStateEntry *vmstate;
     struct {
         int in_use;
         int first_multi;
@@ -102,7 +100,7 @@
 static bool virtio_net_started(VirtIONet *n, uint8_t status)
 {
     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
-        (n->status & VIRTIO_NET_S_LINK_UP) && n->vm_running;
+        (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
 }
 
 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
@@ -453,7 +451,7 @@
 static int virtio_net_can_receive(VLANClientState *nc)
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
-    if (!n->vm_running) {
+    if (!n->vdev.vm_running) {
         return 0;
     }
 
@@ -708,7 +706,7 @@
         return num_packets;
     }
 
-    assert(n->vm_running);
+    assert(n->vdev.vm_running);
 
     if (n->async_tx.elem.out_num) {
         virtio_queue_set_notification(n->tx_vq, 0);
@@ -769,7 +767,7 @@
     VirtIONet *n = to_virtio_net(vdev);
 
     /* This happens when device was stopped but VCPU wasn't. */
-    if (!n->vm_running) {
+    if (!n->vdev.vm_running) {
         n->tx_waiting = 1;
         return;
     }
@@ -796,7 +794,7 @@
     }
     n->tx_waiting = 1;
     /* This happens when device was stopped but VCPU wasn't. */
-    if (!n->vm_running) {
+    if (!n->vdev.vm_running) {
         return;
     }
     virtio_queue_set_notification(vq, 0);
@@ -806,7 +804,7 @@
 static void virtio_net_tx_timer(void *opaque)
 {
     VirtIONet *n = opaque;
-    assert(n->vm_running);
+    assert(n->vdev.vm_running);
 
     n->tx_waiting = 0;
 
@@ -823,7 +821,7 @@
     VirtIONet *n = opaque;
     int32_t ret;
 
-    assert(n->vm_running);
+    assert(n->vdev.vm_running);
 
     n->tx_waiting = 0;
 
@@ -988,16 +986,6 @@
     .link_status_changed = virtio_net_set_link_status,
 };
 
-static void virtio_net_vmstate_change(void *opaque, int running, int reason)
-{
-    VirtIONet *n = opaque;
-    n->vm_running = running;
-    /* This is called when vm is started/stopped,
-     * it will start/stop vhost backend if appropriate
-     * e.g. after migration. */
-    virtio_net_set_status(&n->vdev, n->vdev.status);
-}
-
 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
                               virtio_net_conf *net)
 {
@@ -1052,7 +1040,6 @@
     n->qdev = dev;
     register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
                     virtio_net_save, virtio_net_load, n);
-    n->vmstate = qemu_add_vm_change_state_handler(virtio_net_vmstate_change, n);
 
     add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");
 
@@ -1062,7 +1049,6 @@
 void virtio_net_exit(VirtIODevice *vdev)
 {
     VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
-    qemu_del_vm_change_state_handler(n->vmstate);
 
     /* This will stop vhost backend if appropriate. */
     virtio_net_set_status(vdev, 0);

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 6186142..d07ff97 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c

@@ -80,9 +80,13 @@
  * 12 is historical, and due to x86 page size. */
 #define VIRTIO_PCI_QUEUE_ADDR_SHIFT    12
 
-/* We can catch some guest bugs inside here so we continue supporting older
-   guests. */
-#define VIRTIO_PCI_BUG_BUS_MASTER	(1 << 0)
+/* Flags track per-device state like workarounds for quirks in older guests. */
+#define VIRTIO_PCI_FLAG_BUS_MASTER_BUG  (1 << 0)
+
+/* Performance improves when virtqueue kick processing is decoupled from the
+ * vcpu thread using ioeventfd for some devices. */
+#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1
+#define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
 
 /* QEMU doesn't strictly need write barriers since everything runs in
  * lock-step.  We'll leave the calls to wmb() in though to make it obvious for
@@ -95,7 +99,7 @@
 typedef struct {
     PCIDevice pci_dev;
     VirtIODevice *vdev;
-    uint32_t bugs;
+    uint32_t flags;
     uint32_t addr;
     uint32_t class_code;
     uint32_t nvectors;
@@ -108,6 +112,8 @@
     /* Max. number of ports we can have for a the virtio-serial device */
     uint32_t max_virtserial_ports;
     virtio_net_conf net;
+    bool ioeventfd_disabled;
+    bool ioeventfd_started;
 } VirtIOPCIProxy;
 
 /* virtio device */
@@ -159,7 +165,7 @@
        in ready state. Then we have a buggy guest OS. */
     if ((proxy->vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) &&
         !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
-        proxy->bugs |= VIRTIO_PCI_BUG_BUS_MASTER;
+        proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
     }
     return 0;
 }
@@ -180,12 +186,139 @@
     return 0;
 }
 
+static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy,
+                                                 int n, bool assign)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+    int r;
+    if (assign) {
+        r = event_notifier_init(notifier, 1);
+        if (r < 0) {
+            error_report("%s: unable to init event notifier: %d",
+                         __func__, r);
+            return r;
+        }
+        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
+                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+                                       n, assign);
+        if (r < 0) {
+            error_report("%s: unable to map ioeventfd: %d",
+                         __func__, r);
+            event_notifier_cleanup(notifier);
+        }
+    } else {
+        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
+                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+                                       n, assign);
+        if (r < 0) {
+            error_report("%s: unable to unmap ioeventfd: %d",
+                         __func__, r);
+            return r;
+        }
+
+        /* Handle the race condition where the guest kicked and we deassigned
+         * before we got around to handling the kick.
+         */
+        if (event_notifier_test_and_clear(notifier)) {
+            virtio_queue_notify_vq(vq);
+        }
+
+        event_notifier_cleanup(notifier);
+    }
+    return r;
+}
+
+static void virtio_pci_host_notifier_read(void *opaque)
+{
+    VirtQueue *vq = opaque;
+    EventNotifier *n = virtio_queue_get_host_notifier(vq);
+    if (event_notifier_test_and_clear(n)) {
+        virtio_queue_notify_vq(vq);
+    }
+}
+
+static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy,
+                                                    int n, bool assign)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+    if (assign) {
+        qemu_set_fd_handler(event_notifier_get_fd(notifier),
+                            virtio_pci_host_notifier_read, NULL, vq);
+    } else {
+        qemu_set_fd_handler(event_notifier_get_fd(notifier),
+                            NULL, NULL, NULL);
+    }
+}
+
+static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
+{
+    int n, r;
+
+    if (!(proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) ||
+        proxy->ioeventfd_disabled ||
+        proxy->ioeventfd_started) {
+        return;
+    }
+
+    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
+        if (!virtio_queue_get_num(proxy->vdev, n)) {
+            continue;
+        }
+
+        r = virtio_pci_set_host_notifier_internal(proxy, n, true);
+        if (r < 0) {
+            goto assign_error;
+        }
+
+        virtio_pci_set_host_notifier_fd_handler(proxy, n, true);
+    }
+    proxy->ioeventfd_started = true;
+    return;
+
+assign_error:
+    while (--n >= 0) {
+        if (!virtio_queue_get_num(proxy->vdev, n)) {
+            continue;
+        }
+
+        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
+        r = virtio_pci_set_host_notifier_internal(proxy, n, false);
+        assert(r >= 0);
+    }
+    proxy->ioeventfd_started = false;
+    error_report("%s: failed. Fallback to a userspace (slower).", __func__);
+}
+
+static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
+{
+    int r;
+    int n;
+
+    if (!proxy->ioeventfd_started) {
+        return;
+    }
+
+    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
+        if (!virtio_queue_get_num(proxy->vdev, n)) {
+            continue;
+        }
+
+        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
+        r = virtio_pci_set_host_notifier_internal(proxy, n, false);
+        assert(r >= 0);
+    }
+    proxy->ioeventfd_started = false;
+}
+
 static void virtio_pci_reset(DeviceState *d)
 {
     VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev);
+    virtio_pci_stop_ioeventfd(proxy);
     virtio_reset(proxy->vdev);
     msix_reset(&proxy->pci_dev);
-    proxy->bugs = 0;
+    proxy->flags &= ~VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
 }
 
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -210,6 +343,7 @@
     case VIRTIO_PCI_QUEUE_PFN:
         pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
         if (pa == 0) {
+            virtio_pci_stop_ioeventfd(proxy);
             virtio_reset(proxy->vdev);
             msix_unuse_all_vectors(&proxy->pci_dev);
         }
@@ -224,7 +358,16 @@
         virtio_queue_notify(vdev, val);
         break;
     case VIRTIO_PCI_STATUS:
+        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
+            virtio_pci_stop_ioeventfd(proxy);
+        }
+
         virtio_set_status(vdev, val & 0xFF);
+
+        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
+            virtio_pci_start_ioeventfd(proxy);
+        }
+
         if (vdev->status == 0) {
             virtio_reset(proxy->vdev);
             msix_unuse_all_vectors(&proxy->pci_dev);
@@ -235,7 +378,7 @@
            some safety checks. */
         if ((val & VIRTIO_CONFIG_S_DRIVER_OK) &&
             !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
-            proxy->bugs |= VIRTIO_PCI_BUG_BUS_MASTER;
+            proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
         }
         break;
     case VIRTIO_MSI_CONFIG_VECTOR:
@@ -403,7 +546,8 @@
 
     if (PCI_COMMAND == address) {
         if (!(val & PCI_COMMAND_MASTER)) {
-            if (!(proxy->bugs & VIRTIO_PCI_BUG_BUS_MASTER)) {
+            if (!(proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG)) {
+                virtio_pci_stop_ioeventfd(proxy);
                 virtio_set_status(proxy->vdev,
                                   proxy->vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
             }
@@ -481,30 +625,30 @@
 static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)
 {
     VirtIOPCIProxy *proxy = opaque;
-    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
-    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
-    int r;
+
+    /* Stop using ioeventfd for virtqueue kick if the device starts using host
+     * notifiers.  This makes it easy to avoid stepping on each others' toes.
+     */
+    proxy->ioeventfd_disabled = assign;
     if (assign) {
-        r = event_notifier_init(notifier, 1);
-        if (r < 0) {
-            return r;
-        }
-        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
-                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
-                                       n, assign);
-        if (r < 0) {
-            event_notifier_cleanup(notifier);
-        }
-    } else {
-        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
-                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
-                                       n, assign);
-        if (r < 0) {
-            return r;
-        }
-        event_notifier_cleanup(notifier);
+        virtio_pci_stop_ioeventfd(proxy);
     }
-    return r;
+    /* We don't need to start here: it's not needed because backend
+     * currently only stops on status change away from ok,
+     * reset, vmstop and such. If we do add code to start here,
+     * need to check vmstate, device state etc. */
+    return virtio_pci_set_host_notifier_internal(proxy, n, assign);
+}
+
+static void virtio_pci_vmstate_change(void *opaque, bool running)
+{
+    VirtIOPCIProxy *proxy = opaque;
+
+    if (running) {
+        virtio_pci_start_ioeventfd(proxy);
+    } else {
+        virtio_pci_stop_ioeventfd(proxy);
+    }
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
@@ -516,6 +660,7 @@
     .get_features = virtio_pci_get_features,
     .set_host_notifier = virtio_pci_set_host_notifier,
     .set_guest_notifiers = virtio_pci_set_guest_notifiers,
+    .vmstate_change = virtio_pci_vmstate_change,
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -560,6 +705,10 @@
     pci_register_bar(&proxy->pci_dev, 0, size, PCI_BASE_ADDRESS_SPACE_IO,
                            virtio_map);
 
+    if (!kvm_has_many_ioeventfds()) {
+        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
+    }
+
     virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
     proxy->host_features |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY;
     proxy->host_features |= 0x1 << VIRTIO_F_BAD_FEATURE;
@@ -598,6 +747,7 @@
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
 
+    virtio_pci_stop_ioeventfd(proxy);
     virtio_blk_exit(proxy->vdev);
     blockdev_mark_auto_del(proxy->block.bs);
     return virtio_exit_pci(pci_dev);
@@ -659,6 +809,7 @@
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
 
+    virtio_pci_stop_ioeventfd(proxy);
     virtio_net_exit(proxy->vdev);
     return virtio_exit_pci(pci_dev);
 }
@@ -706,6 +857,8 @@
         .qdev.props = (Property[]) {
             DEFINE_PROP_HEX32("class", VirtIOPCIProxy, class_code, 0),
             DEFINE_BLOCK_PROPERTIES(VirtIOPCIProxy, block),
+            DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                            VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
             DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
             DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
             DEFINE_PROP_END_OF_LIST(),
@@ -718,6 +871,8 @@
         .exit       = virtio_net_exit_pci,
         .romfile    = "pxe-virtio.bin",
         .qdev.props = (Property[]) {
+            DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                            VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false),
             DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
             DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
             DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),

diff --git a/hw/virtio.c b/hw/virtio.c
index 07dbf86..31bd9e3 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c

@@ -575,11 +575,19 @@
     return vdev->vq[n].vring.num;
 }
 
+void virtio_queue_notify_vq(VirtQueue *vq)
+{
+    if (vq->vring.desc) {
+        VirtIODevice *vdev = vq->vdev;
+        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+        vq->handle_output(vdev, vq);
+    }
+}
+
 void virtio_queue_notify(VirtIODevice *vdev, int n)
 {
-    if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) {
-        trace_virtio_queue_notify(vdev, n, &vdev->vq[n]);
-        vdev->vq[n].handle_output(vdev, &vdev->vq[n]);
+    if (n < VIRTIO_PCI_QUEUE_MAX) {
+        virtio_queue_notify_vq(&vdev->vq[n]);
     }
 }
 
@@ -743,11 +751,31 @@
 
 void virtio_cleanup(VirtIODevice *vdev)
 {
+    qemu_del_vm_change_state_handler(vdev->vmstate);
     if (vdev->config)
         qemu_free(vdev->config);
     qemu_free(vdev->vq);
 }
 
+static void virtio_vmstate_change(void *opaque, int running, int reason)
+{
+    VirtIODevice *vdev = opaque;
+    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
+    vdev->vm_running = running;
+
+    if (backend_run) {
+        virtio_set_status(vdev, vdev->status);
+    }
+
+    if (vdev->binding->vmstate_change) {
+        vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
+    }
+
+    if (!backend_run) {
+        virtio_set_status(vdev, vdev->status);
+    }
+}
+
 VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
                                  size_t config_size, size_t struct_size)
 {
@@ -774,6 +802,8 @@
     else
         vdev->config = NULL;
 
+    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, vdev);
+
     return vdev;
 }
 

diff --git a/hw/virtio.h b/hw/virtio.h
index 02fa312..d8546d5 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h

@@ -95,6 +95,7 @@
     unsigned (*get_features)(void * opaque);
     int (*set_guest_notifiers)(void * opaque, bool assigned);
     int (*set_host_notifier)(void * opaque, int n, bool assigned);
+    void (*vmstate_change)(void * opaque, bool running);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 64
@@ -123,6 +124,8 @@
     const VirtIOBindings *binding;
     void *binding_opaque;
     uint16_t device_id;
+    bool vm_running;
+    VMChangeStateEntry *vmstate;
 };
 
 static inline void virtio_set_status(VirtIODevice *vdev, uint8_t val)
@@ -219,5 +222,6 @@
 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
+void virtio_queue_notify_vq(VirtQueue *vq);
 void virtio_irq(VirtQueue *vq);
 #endif

diff --git a/hw/vmware_vga.c b/hw/vmware_vga.c
index d9dd52f..6c59053 100644
--- a/hw/vmware_vga.c
+++ b/hw/vmware_vga.c

@@ -1318,6 +1318,7 @@
     .qdev.name    = "vmware-svga",
     .qdev.size    = sizeof(struct pci_vmsvga_state_s),
     .qdev.vmsd    = &vmstate_vmware_vga,
+    .no_hotplug   = 1,
     .init         = pci_vmsvga_initfn,
     .romfile      = "vgabios-vmware.bin",
 };

diff --git a/kvm-all.c b/kvm-all.c
index cae24bb..255b6fa 100644
--- a/kvm-all.c
+++ b/kvm-all.c

@@ -28,6 +28,11 @@
 #include "kvm.h"
 #include "bswap.h"
 
+/* This check must be after config-host.h is included */
+#ifdef CONFIG_EVENTFD
+#include <sys/eventfd.h>
+#endif
+
 /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
 #define PAGE_SIZE TARGET_PAGE_SIZE
 
@@ -72,6 +77,7 @@
     int irqchip_in_kernel;
     int pit_in_kernel;
     int xsave, xcrs;
+    int many_ioeventfds;
 };
 
 static KVMState *kvm_state;
@@ -441,6 +447,39 @@
     return ret;
 }
 
+static int kvm_check_many_ioeventfds(void)
+{
+    /* Older kernels have a 6 device limit on the KVM io bus.  Find out so we
+     * can avoid creating too many ioeventfds.
+     */
+#ifdef CONFIG_EVENTFD
+    int ioeventfds[7];
+    int i, ret = 0;
+    for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
+        ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
+        if (ioeventfds[i] < 0) {
+            break;
+        }
+        ret = kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, true);
+        if (ret < 0) {
+            close(ioeventfds[i]);
+            break;
+        }
+    }
+
+    /* Decide whether many devices are supported or not */
+    ret = i == ARRAY_SIZE(ioeventfds);
+
+    while (i-- > 0) {
+        kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, false);
+        close(ioeventfds[i]);
+    }
+    return ret;
+#else
+    return 0;
+#endif
+}
+
 static void kvm_set_phys_mem(target_phys_addr_t start_addr,
 			     ram_addr_t size,
 			     ram_addr_t phys_offset)
@@ -717,6 +756,8 @@
     kvm_state = s;
     cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
 
+    s->many_ioeventfds = kvm_check_many_ioeventfds();
+
     return 0;
 
 err:
@@ -1046,6 +1087,14 @@
     return kvm_state->xcrs;
 }
 
+int kvm_has_many_ioeventfds(void)
+{
+    if (!kvm_enabled()) {
+        return 0;
+    }
+    return kvm_state->many_ioeventfds;
+}
+
 void kvm_setup_guest_memory(void *start, size_t size)
 {
     if (!kvm_has_sync_mmu()) {

diff --git a/kvm-stub.c b/kvm-stub.c
index 5384a4b..33d4476 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c

@@ -99,6 +99,11 @@
     return 0;
 }
 
+int kvm_has_many_ioeventfds(void)
+{
+    return 0;
+}
+
 void kvm_setup_guest_memory(void *start, size_t size)
 {
 }

diff --git a/kvm.h b/kvm.h
index 60a9b42..ce08d42 100644
--- a/kvm.h
+++ b/kvm.h

@@ -42,6 +42,7 @@
 int kvm_has_debugregs(void);
 int kvm_has_xsave(void);
 int kvm_has_xcrs(void);
+int kvm_has_many_ioeventfds(void);
 
 #ifdef NEED_CPU_H
 int kvm_init_vcpu(CPUState *env);

diff --git a/qerror.c b/qerror.c
index ac2cdaf..9d0cdeb 100644
--- a/qerror.c
+++ b/qerror.c

@@ -101,6 +101,10 @@
         .desc      = "Device '%(device)' has no child bus",
     },
     {
+        .error_fmt = QERR_DEVICE_NO_HOTPLUG,
+        .desc      = "Device '%(device)' does not support hotplugging",
+    },
+    {
         .error_fmt = QERR_DUPLICATE_ID,
         .desc      = "Duplicate ID '%(id)' for %(object)",
     },

diff --git a/qerror.h b/qerror.h
index 943a24b..b0f69da 100644
--- a/qerror.h
+++ b/qerror.h

@@ -90,6 +90,9 @@
 #define QERR_DEVICE_NO_BUS \
     "{ 'class': 'DeviceNoBus', 'data': { 'device': %s } }"
 
+#define QERR_DEVICE_NO_HOTPLUG \
+    "{ 'class': 'DeviceNoHotplug', 'data': { 'device': %s } }"
+
 #define QERR_DUPLICATE_ID \
     "{ 'class': 'DuplicateId', 'data': { 'id': %s, 'object': %s } }"