Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging

# gpg: Signature made Thu 03 Dec 2015 04:59:48 GMT using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"

* remotes/stefanha/tags/block-pull-request:
  iotests: Add regresion test case for write notifier assertion failure
  iotests: Add "add_drive_raw" method
  block: Don't wait serialising for non-COR read requests
  iothread: include id in thread name

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index fb103b7..63da303 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1555,6 +1555,17 @@
                           d->hotplugged, 1);
 }
 
+static void virtio_ccw_post_plugged(DeviceState *d, Error **errp)
+{
+   VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
+   VirtIODevice *vdev = virtio_bus_get_device(&dev->bus);
+
+   if (!virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        /* A backend didn't support modern virtio. */
+       dev->max_rev = 0;
+   }
+}
+
 static void virtio_ccw_device_unplugged(DeviceState *d)
 {
     VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
@@ -1891,6 +1902,7 @@
     k->save_config = virtio_ccw_save_config;
     k->load_config = virtio_ccw_load_config;
     k->device_plugged = virtio_ccw_device_plugged;
+    k->post_plugged = virtio_ccw_post_plugged;
     k->device_unplugged = virtio_ccw_device_unplugged;
 }
 
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index febda76..81c7cdd 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -56,6 +56,9 @@
     assert(vdc->get_features != NULL);
     vdev->host_features = vdc->get_features(vdev, vdev->host_features,
                                             errp);
+    if (klass->post_plugged != NULL) {
+        klass->post_plugged(qbus->parent, errp);
+    }
 }
 
 /* Reset the virtio_bus */
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index dd48562..94667e6 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1814,13 +1814,10 @@
 
     address_space_init(&proxy->modern_as, &proxy->modern_cfg, "virtio-pci-cfg-as");
 
-    if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE)
-        && !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN)
-        && pci_bus_is_express(pci_dev->bus)
-        && !pci_bus_is_root(pci_dev->bus)) {
+    if (pci_is_express(pci_dev) && pci_bus_is_express(pci_dev->bus) &&
+        !pci_bus_is_root(pci_dev->bus)) {
         int pos;
 
-        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
         pos = pcie_endpoint_cap_init(pci_dev, 0);
         assert(pos > 0);
 
@@ -1832,6 +1829,12 @@
          * PCI Power Management Interface Specification.
          */
         pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
+    } else {
+        /*
+         * make future invocations of pci_is_express() return false
+         * and pci_config_size() return PCI_CONFIG_SPACE_SIZE.
+         */
+        pci_dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS;
     }
 
     virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy);
@@ -1879,10 +1882,25 @@
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
+{
+    VirtioPCIClass *vpciklass = VIRTIO_PCI_GET_CLASS(qdev);
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
+    PCIDevice *pci_dev = &proxy->pci_dev;
+
+    if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
+        !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN)) {
+        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
+    }
+
+    vpciklass->parent_dc_realize(qdev, errp);
+}
+
 static void virtio_pci_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
 
     dc->props = virtio_pci_properties;
     k->realize = virtio_pci_realize;
@@ -1890,6 +1908,8 @@
     k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
     k->revision = VIRTIO_PCI_ABI_VERSION;
     k->class_id = PCI_CLASS_OTHERS;
+    vpciklass->parent_dc_realize = dc->realize;
+    dc->realize = virtio_pci_dc_realize;
     dc->reset = virtio_pci_reset;
 }
 
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index ffb74bb..a104ff2 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -105,6 +105,7 @@
 
 typedef struct VirtioPCIClass {
     PCIDeviceClass parent_class;
+    DeviceRealize parent_dc_realize;
     void (*realize)(VirtIOPCIProxy *vpci_dev, Error **errp);
 } VirtioPCIClass;
 
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 7437fd4..b60d758 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -66,7 +66,6 @@
 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
                    VhostBackendType backend_type);
 void vhost_dev_cleanup(struct vhost_dev *hdev);
-bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev);
 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
 void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
index 6c3d4cb..3f2c136 100644
--- a/include/hw/virtio/virtio-bus.h
+++ b/include/hw/virtio/virtio-bus.h
@@ -60,6 +60,11 @@
      */
     void (*device_plugged)(DeviceState *d, Error **errp);
     /*
+     * Re-evaluate setup after feature bits have been validated
+     * by the device backend.
+     */
+    void (*post_plugged)(DeviceState *d, Error **errp);
+    /*
      * transport independent exit function.
      * This is called by virtio-bus just before the device is unplugged.
      */
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
index 56388e6..0899b2f 100644
--- a/include/qemu/mmap-alloc.h
+++ b/include/qemu/mmap-alloc.h
@@ -3,6 +3,8 @@
 
 #include "qemu-common.h"
 
+size_t qemu_fd_getpagesize(int fd);
+
 void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
 
 void qemu_ram_munmap(void *ptr, size_t size);
diff --git a/migration/migration.c b/migration/migration.c
index 1a42aee..adc6b6f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1674,7 +1674,7 @@
         if (current_time >= initial_time + BUFFER_DELAY) {
             uint64_t transferred_bytes = qemu_ftell(s->file) - initial_bytes;
             uint64_t time_spent = current_time - initial_time;
-            double bandwidth = transferred_bytes / time_spent;
+            double bandwidth = (double)transferred_bytes / time_spent;
             max_size = bandwidth * migrate_max_downtime() / 1000000;
 
             s->mbps = time_spent ? (((double) transferred_bytes * 8.0) /
diff --git a/migration/qemu-file-unix.c b/migration/qemu-file-unix.c
index c503b02..6ca53e7 100644
--- a/migration/qemu-file-unix.c
+++ b/migration/qemu-file-unix.c
@@ -72,7 +72,8 @@
             pfd.fd = s->fd;
             pfd.events = G_IO_OUT | G_IO_ERR;
             pfd.revents = 0;
-            g_poll(&pfd, 1 /* 1 fd */, -1 /* no timeout */);
+            TFR(err = g_poll(&pfd, 1, -1 /* no timeout */));
+            /* Errors other than EINTR intentionally ignored */
         }
      }
 
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 5071602..b368a90 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -316,6 +316,11 @@
     }
 
     queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1;
+    if (queues < 1) {
+        error_setg(errp,
+                   "vhost-user number of queues must be bigger than zero");
+        return -1;
+    }
 
     return net_vhost_user_init(peer, "vhost_user", name, chr, queues);
 }
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 85c4c8a..9fb09f1 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -113,7 +113,6 @@
     return 0;
 }
 
-#if 0
 /* dispatcher_remove() is not currently in use but may be useful
  * in the future. */
 static int
@@ -127,9 +126,9 @@
     }
 
     FD_CLR(sock, &dispr->fdset);
+    DPRINT("Sock %d removed from dispatcher watch.\n", sock);
     return 0;
 }
-#endif
 
 /* timeout in us */
 static int
@@ -156,11 +155,16 @@
     /* Now call callback for every ready socket. */
 
     int sock;
-    for (sock = 0; sock < dispr->max_sock + 1; sock++)
-        if (FD_ISSET(sock, &fdset)) {
+    for (sock = 0; sock < dispr->max_sock + 1; sock++) {
+        /* The callback on a socket can remove other sockets from the
+         * dispatcher, thus we have to check that the socket is
+         * still not removed from dispatcher's list
+         */
+        if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) {
             Event *e = &dispr->events[sock];
             e->callback(sock, e->ctx);
         }
+    }
 
     return 0;
 }
@@ -837,9 +841,10 @@
         if (mmap_addr == MAP_FAILED) {
             vubr_die("mmap");
         }
-
         dev_region->mmap_addr = (uint64_t) mmap_addr;
         DPRINT("    mmap_addr:       0x%016"PRIx64"\n", dev_region->mmap_addr);
+
+        close(vmsg->fds[i]);
     }
 
     return 0;
@@ -950,6 +955,17 @@
      * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
     dev->ready = 0;
 
+    if (dev->vq[index].call_fd != -1) {
+        close(dev->vq[index].call_fd);
+        dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
+        dev->vq[index].call_fd = -1;
+    }
+    if (dev->vq[index].kick_fd != -1) {
+        close(dev->vq[index].kick_fd);
+        dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
+        dev->vq[index].kick_fd = -1;
+    }
+
     /* Reply */
     return 1;
 }
@@ -965,6 +981,10 @@
     assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
     assert(vmsg->fd_num == 1);
 
+    if (dev->vq[index].kick_fd != -1) {
+        close(dev->vq[index].kick_fd);
+        dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
+    }
     dev->vq[index].kick_fd = vmsg->fds[0];
     DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
 
@@ -999,6 +1019,10 @@
     assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
     assert(vmsg->fd_num == 1);
 
+    if (dev->vq[index].call_fd != -1) {
+        close(dev->vq[index].call_fd);
+        dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
+    }
     dev->vq[index].call_fd = vmsg->fds[0];
     DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
 
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index e4c36af..29de739 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -123,6 +123,7 @@
 
 typedef struct TestServer {
     gchar *socket_path;
+    gchar *mig_path;
     gchar *chr_name;
     CharDriverState *chr;
     int fds_num;
@@ -216,8 +217,7 @@
 
 static void *thread_function(void *data)
 {
-    GMainLoop *loop;
-    loop = g_main_loop_new(NULL, FALSE);
+    GMainLoop *loop = data;
     g_main_loop_run(loop);
     return NULL;
 }
@@ -365,6 +365,7 @@
     gchar *chr_path;
 
     server->socket_path = g_strdup_printf("%s/%s.sock", tmpfs, name);
+    server->mig_path = g_strdup_printf("%s/%s.mig", tmpfs, name);
 
     chr_path = g_strdup_printf("unix:%s,server,nowait", server->socket_path);
     server->chr_name = g_strdup_printf("chr-%s", name);
@@ -389,7 +390,7 @@
     g_strdup_printf(QEMU_CMD extra, (mem), (mem), (root), (s)->chr_name,       \
                     (s)->socket_path, (s)->chr_name, ##__VA_ARGS__)
 
-static void test_server_free(TestServer *server)
+static gboolean _test_server_free(TestServer *server)
 {
     int i;
 
@@ -406,9 +407,18 @@
     unlink(server->socket_path);
     g_free(server->socket_path);
 
+    unlink(server->mig_path);
+    g_free(server->mig_path);
 
     g_free(server->chr_name);
     g_free(server);
+
+    return FALSE;
+}
+
+static void test_server_free(TestServer *server)
+{
+    g_idle_add((GSourceFunc)_test_server_free, server);
 }
 
 static void wait_for_log_fd(TestServer *s)
@@ -496,18 +506,29 @@
     return FALSE;
 }
 
+#if !GLIB_CHECK_VERSION(2,36,0)
+/* this callback is unnecessary with glib >2.36, the default
+ * prepare for the source does the same */
+static gboolean
+test_migrate_source_prepare(GSource *source, gint *timeout)
+{
+    *timeout = -1;
+    return FALSE;
+}
+#endif
+
 GSourceFuncs test_migrate_source_funcs = {
-    NULL,
-    test_migrate_source_check,
-    NULL,
-    NULL
+#if !GLIB_CHECK_VERSION(2,36,0)
+    .prepare = test_migrate_source_prepare,
+#endif
+    .check = test_migrate_source_check,
 };
 
 static void test_migrate(void)
 {
     TestServer *s = test_server_new("src");
     TestServer *dest = test_server_new("dest");
-    const char *uri = "tcp:127.0.0.1:1234";
+    char *uri = g_strdup_printf("%s%s", "unix:", dest->mig_path);
     QTestState *global = global_qtest, *from, *to;
     GSource *source;
     gchar *cmd;
@@ -578,6 +599,7 @@
     test_server_free(dest);
     qtest_quit(from);
     test_server_free(s);
+    g_free(uri);
 
     global_qtest = global;
 }
@@ -590,6 +612,8 @@
     char *qemu_cmd = NULL;
     int ret;
     char template[] = "/tmp/vhost-test-XXXXXX";
+    GMainLoop *loop;
+    GThread *thread;
 
     g_test_init(&argc, &argv, NULL);
 
@@ -612,8 +636,9 @@
 
     server = test_server_new("test");
 
+    loop = g_main_loop_new(NULL, FALSE);
     /* run the main loop thread so the chardev may operate */
-    g_thread_new(NULL, thread_function, NULL);
+    thread = g_thread_new(NULL, thread_function, loop);
 
     qemu_cmd = GET_QEMU_CMD(server);
 
@@ -632,6 +657,14 @@
     /* cleanup */
     test_server_free(server);
 
+    /* finish the helper thread and dispatch pending sources */
+    g_main_loop_quit(loop);
+    g_thread_join(thread);
+    while (g_main_context_pending(NULL)) {
+        g_main_context_iteration (NULL, TRUE);
+    }
+    g_main_loop_unref(loop);
+
     ret = rmdir(tmpfs);
     if (ret != 0) {
         g_test_message("unable to rmdir: path (%s): %s\n",
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index c37acbe..54793a5 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -14,6 +14,32 @@
 #include <sys/mman.h>
 #include <assert.h>
 
+#define HUGETLBFS_MAGIC       0x958458f6
+
+#ifdef CONFIG_LINUX
+#include <sys/vfs.h>
+#endif
+
+size_t qemu_fd_getpagesize(int fd)
+{
+#ifdef CONFIG_LINUX
+    struct statfs fs;
+    int ret;
+
+    if (fd != -1) {
+        do {
+            ret = fstatfs(fd, &fs);
+        } while (ret != 0 && errno == EINTR);
+
+        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
+            return fs.f_bsize;
+        }
+    }
+#endif
+
+    return getpagesize();
+}
+
 void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
 {
     /*
@@ -21,7 +47,20 @@
      * space, even if size is already aligned.
      */
     size_t total = size + align;
+#if defined(__powerpc64__) && defined(__linux__)
+    /* On ppc64 mappings in the same segment (aka slice) must share the same
+     * page size. Since we will be re-allocating part of this segment
+     * from the supplied fd, we should make sure to use the same page size,
+     * unless we are using the system page size, in which case anonymous memory
+     * is OK. Use align as a hint for the page size.
+     * In this case, set MAP_NORESERVE to avoid allocating backing store memory.
+     */
+    int anonfd = fd == -1 || qemu_fd_getpagesize(fd) == getpagesize() ? -1 : fd;
+    int flags = anonfd == -1 ? MAP_ANONYMOUS : MAP_NORESERVE;
+    void *ptr = mmap(0, total, PROT_NONE, flags | MAP_PRIVATE, anonfd, 0);
+#else
     void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+#endif
     size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
     void *ptr1;
 
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 914cef5..d25f671 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -46,7 +46,6 @@
 #else
 #  define QEMU_VMALLOC_ALIGN getpagesize()
 #endif
-#define HUGETLBFS_MAGIC       0x958458f6
 
 #include <termios.h>
 #include <unistd.h>
@@ -65,7 +64,6 @@
 
 #ifdef CONFIG_LINUX
 #include <sys/syscall.h>
-#include <sys/vfs.h>
 #endif
 
 #ifdef __FreeBSD__
@@ -340,26 +338,6 @@
     siglongjmp(sigjump, 1);
 }
 
-static size_t fd_getpagesize(int fd)
-{
-#ifdef CONFIG_LINUX
-    struct statfs fs;
-    int ret;
-
-    if (fd != -1) {
-        do {
-            ret = fstatfs(fd, &fs);
-        } while (ret != 0 && errno == EINTR);
-
-        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
-            return fs.f_bsize;
-        }
-    }
-#endif
-
-    return getpagesize();
-}
-
 void os_mem_prealloc(int fd, char *area, size_t memory)
 {
     int ret;
@@ -387,7 +365,7 @@
         exit(1);
     } else {
         int i;
-        size_t hpagesize = fd_getpagesize(fd);
+        size_t hpagesize = qemu_fd_getpagesize(fd);
         size_t numpages = DIV_ROUND_UP(memory, hpagesize);
 
         /* MAP_POPULATE silently ignores failures */