Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20150714' into staging

s390x fixes for 2.4:
- virtio migration regression
- missing diag288 watchdog resets

# gpg: Signature made Tue Jul 14 18:17:54 2015 BST using RSA key ID C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"

* remotes/cohuck/tags/s390x-20150714:
  s390/virtio-ccw: Fix migration
  watchdog/diag288: correctly register for system reset requests

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index 411da3c..978b717 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1169,7 +1169,7 @@
 F: block/vmdk.c
 
 RBD
-M: Josh Durgin <josh.durgin@inktank.com>
+M: Josh Durgin <jdurgin@redhat.com>
 M: Jeff Cody <jcody@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
diff --git a/block.c b/block.c
index 5e80336..d088ee0 100644
--- a/block.c
+++ b/block.c
@@ -1102,12 +1102,46 @@
     return 0;
 }
 
+static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
+                                    BlockDriverState *child_bs,
+                                    const BdrvChildRole *child_role)
+{
+    BdrvChild *child = g_new(BdrvChild, 1);
+    *child = (BdrvChild) {
+        .bs     = child_bs,
+        .role   = child_role,
+    };
+
+    QLIST_INSERT_HEAD(&parent_bs->children, child, next);
+
+    return child;
+}
+
+static void bdrv_detach_child(BdrvChild *child)
+{
+    QLIST_REMOVE(child, next);
+    g_free(child);
+}
+
+void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
+{
+    BlockDriverState *child_bs = child->bs;
+
+    if (child->bs->inherits_from == parent) {
+        child->bs->inherits_from = NULL;
+    }
+
+    bdrv_detach_child(child);
+    bdrv_unref(child_bs);
+}
+
 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
 {
 
     if (bs->backing_hd) {
         assert(bs->backing_blocker);
         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
+        bdrv_detach_child(bs->backing_child);
     } else if (backing_hd) {
         error_setg(&bs->backing_blocker,
                    "node is used as backing hd of '%s'",
@@ -1118,8 +1152,10 @@
     if (!backing_hd) {
         error_free(bs->backing_blocker);
         bs->backing_blocker = NULL;
+        bs->backing_child = NULL;
         goto out;
     }
+    bs->backing_child = bdrv_attach_child(bs, backing_hd, &child_backing);
     bs->open_flags &= ~BDRV_O_NO_BACKING;
     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
@@ -1202,6 +1238,7 @@
         error_free(local_err);
         goto free_exit;
     }
+
     bdrv_set_backing_hd(bs, backing_hd);
 
 free_exit:
@@ -1214,7 +1251,7 @@
  * device's options.
  *
  * If allow_none is true, no image will be opened if filename is false and no
- * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
+ * BlockdevRef is given. NULL will be returned, but errp remains unset.
  *
  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
@@ -1222,6 +1259,56 @@
  * BlockdevRef.
  *
  * The BlockdevRef will be removed from the options QDict.
+ */
+BdrvChild *bdrv_open_child(const char *filename,
+                           QDict *options, const char *bdref_key,
+                           BlockDriverState* parent,
+                           const BdrvChildRole *child_role,
+                           bool allow_none, Error **errp)
+{
+    BdrvChild *c = NULL;
+    BlockDriverState *bs;
+    QDict *image_options;
+    int ret;
+    char *bdref_key_dot;
+    const char *reference;
+
+    assert(child_role != NULL);
+
+    bdref_key_dot = g_strdup_printf("%s.", bdref_key);
+    qdict_extract_subqdict(options, &image_options, bdref_key_dot);
+    g_free(bdref_key_dot);
+
+    reference = qdict_get_try_str(options, bdref_key);
+    if (!filename && !reference && !qdict_size(image_options)) {
+        if (!allow_none) {
+            error_setg(errp, "A block device must be specified for \"%s\"",
+                       bdref_key);
+        }
+        QDECREF(image_options);
+        goto done;
+    }
+
+    bs = NULL;
+    ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
+                            parent, child_role, NULL, errp);
+    if (ret < 0) {
+        goto done;
+    }
+
+    c = bdrv_attach_child(parent, bs, child_role);
+
+done:
+    qdict_del(options, bdref_key);
+    return c;
+}
+
+/*
+ * This is a version of bdrv_open_child() that returns 0/-EINVAL instead of
+ * a BdrvChild object.
+ *
+ * If allow_none is true, no image will be opened if filename is false and no
+ * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
  *
  * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
  */
@@ -1230,37 +1317,24 @@
                     BlockDriverState* parent, const BdrvChildRole *child_role,
                     bool allow_none, Error **errp)
 {
-    QDict *image_options;
-    int ret;
-    char *bdref_key_dot;
-    const char *reference;
+    Error *local_err = NULL;
+    BdrvChild *c;
 
     assert(pbs);
     assert(*pbs == NULL);
 
-    bdref_key_dot = g_strdup_printf("%s.", bdref_key);
-    qdict_extract_subqdict(options, &image_options, bdref_key_dot);
-    g_free(bdref_key_dot);
-
-    reference = qdict_get_try_str(options, bdref_key);
-    if (!filename && !reference && !qdict_size(image_options)) {
-        if (allow_none) {
-            ret = 0;
-        } else {
-            error_setg(errp, "A block device must be specified for \"%s\"",
-                       bdref_key);
-            ret = -EINVAL;
-        }
-        QDECREF(image_options);
-        goto done;
+    c = bdrv_open_child(filename, options, bdref_key, parent, child_role,
+                        allow_none, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return -EINVAL;
     }
 
-    ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0,
-                            parent, child_role, NULL, errp);
+    if (c != NULL) {
+        *pbs = c->bs;
+    }
 
-done:
-    qdict_del(options, bdref_key);
-    return ret;
+    return 0;
 }
 
 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
@@ -1328,19 +1402,6 @@
     return ret;
 }
 
-static void bdrv_attach_child(BlockDriverState *parent_bs,
-                              BlockDriverState *child_bs,
-                              const BdrvChildRole *child_role)
-{
-    BdrvChild *child = g_new(BdrvChild, 1);
-    *child = (BdrvChild) {
-        .bs     = child_bs,
-        .role   = child_role,
-    };
-
-    QLIST_INSERT_HEAD(&parent_bs->children, child, next);
-}
-
 /*
  * Opens a disk image (raw, qcow2, vmdk, ...)
  *
@@ -1393,9 +1454,6 @@
             return -ENODEV;
         }
         bdrv_ref(bs);
-        if (child_role) {
-            bdrv_attach_child(parent, bs, child_role);
-        }
         *pbs = bs;
         return 0;
     }
@@ -1540,10 +1598,6 @@
         goto close_and_fail;
     }
 
-    if (child_role) {
-        bdrv_attach_child(parent, bs, child_role);
-    }
-
     QDECREF(options);
     *pbs = bs;
     return 0;
@@ -1849,20 +1903,23 @@
     if (bs->drv) {
         BdrvChild *child, *next;
 
-        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
-            if (child->bs->inherits_from == bs) {
-                child->bs->inherits_from = NULL;
-            }
-            QLIST_REMOVE(child, next);
-            g_free(child);
-        }
+        bs->drv->bdrv_close(bs);
 
         if (bs->backing_hd) {
             BlockDriverState *backing_hd = bs->backing_hd;
             bdrv_set_backing_hd(bs, NULL);
             bdrv_unref(backing_hd);
         }
-        bs->drv->bdrv_close(bs);
+
+        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
+            /* TODO Remove bdrv_unref() from drivers' close function and use
+             * bdrv_unref_child() here */
+            if (child->bs->inherits_from == bs) {
+                child->bs->inherits_from = NULL;
+            }
+            bdrv_detach_child(child);
+        }
+
         g_free(bs->opaque);
         bs->opaque = NULL;
         bs->drv = NULL;
@@ -2116,7 +2173,6 @@
     /* The contents of 'tmp' will become bs_top, as we are
      * swapping bs_new and bs_top contents. */
     bdrv_set_backing_hd(bs_top, bs_new);
-    bdrv_attach_child(bs_top, bs_new, &child_backing);
 }
 
 static void bdrv_delete(BlockDriverState *bs)
diff --git a/block/rbd.c b/block/rbd.c
index fbe87e0..a60a19d 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -74,25 +74,18 @@
     QEMUIOVector *qiov;
     char *bounce;
     RBDAIOCmd cmd;
-    int64_t sector_num;
     int error;
     struct BDRVRBDState *s;
-    int status;
 } RBDAIOCB;
 
 typedef struct RADOSCB {
-    int rcbid;
     RBDAIOCB *acb;
     struct BDRVRBDState *s;
-    int done;
     int64_t size;
     char *buf;
     int64_t ret;
 } RADOSCB;
 
-#define RBD_FD_READ 0
-#define RBD_FD_WRITE 1
-
 typedef struct BDRVRBDState {
     rados_t cluster;
     rados_ioctx_t io_ctx;
@@ -235,7 +228,9 @@
     return NULL;
 }
 
-static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp)
+static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
+                             bool only_read_conf_file,
+                             Error **errp)
 {
     char *p, *buf;
     char name[RBD_MAX_CONF_NAME_SIZE];
@@ -267,14 +262,18 @@
         qemu_rbd_unescape(value);
 
         if (strcmp(name, "conf") == 0) {
-            ret = rados_conf_read_file(cluster, value);
-            if (ret < 0) {
-                error_setg(errp, "error reading conf file %s", value);
-                break;
+            /* read the conf file alone, so it doesn't override more
+               specific settings for a particular device */
+            if (only_read_conf_file) {
+                ret = rados_conf_read_file(cluster, value);
+                if (ret < 0) {
+                    error_setg(errp, "error reading conf file %s", value);
+                    break;
+                }
             }
         } else if (strcmp(name, "id") == 0) {
             /* ignore, this is parsed by qemu_rbd_parse_clientname() */
-        } else {
+        } else if (!only_read_conf_file) {
             ret = rados_conf_set(cluster, name, value);
             if (ret < 0) {
                 error_setg(errp, "invalid conf option %s", name);
@@ -337,10 +336,15 @@
     if (strstr(conf, "conf=") == NULL) {
         /* try default location, but ignore failure */
         rados_conf_read_file(cluster, NULL);
+    } else if (conf[0] != '\0' &&
+               qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
+        rados_shutdown(cluster);
+        error_propagate(errp, local_err);
+        return -EIO;
     }
 
     if (conf[0] != '\0' &&
-        qemu_rbd_set_conf(cluster, conf, &local_err) < 0) {
+        qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
         rados_shutdown(cluster);
         error_propagate(errp, local_err);
         return -EIO;
@@ -405,7 +409,6 @@
     }
     qemu_vfree(acb->bounce);
     acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
-    acb->status = 0;
 
     qemu_aio_unref(acb);
 }
@@ -468,6 +471,23 @@
         s->snap = g_strdup(snap_buf);
     }
 
+    if (strstr(conf, "conf=") == NULL) {
+        /* try default location, but ignore failure */
+        rados_conf_read_file(s->cluster, NULL);
+    } else if (conf[0] != '\0') {
+        r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
+        if (r < 0) {
+            goto failed_shutdown;
+        }
+    }
+
+    if (conf[0] != '\0') {
+        r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
+        if (r < 0) {
+            goto failed_shutdown;
+        }
+    }
+
     /*
      * Fallback to more conservative semantics if setting cache
      * options fails. Ignore errors from setting rbd_cache because the
@@ -481,18 +501,6 @@
         rados_conf_set(s->cluster, "rbd_cache", "true");
     }
 
-    if (strstr(conf, "conf=") == NULL) {
-        /* try default location, but ignore failure */
-        rados_conf_read_file(s->cluster, NULL);
-    }
-
-    if (conf[0] != '\0') {
-        r = qemu_rbd_set_conf(s->cluster, conf, errp);
-        if (r < 0) {
-            goto failed_shutdown;
-        }
-    }
-
     r = rados_connect(s->cluster);
     if (r < 0) {
         error_setg(errp, "error connecting");
@@ -621,7 +629,6 @@
     acb->error = 0;
     acb->s = s;
     acb->bh = NULL;
-    acb->status = -EINPROGRESS;
 
     if (cmd == RBD_AIO_WRITE) {
         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
@@ -633,7 +640,6 @@
     size = nb_sectors * BDRV_SECTOR_SIZE;
 
     rcb = g_new(RADOSCB, 1);
-    rcb->done = 0;
     rcb->acb = acb;
     rcb->buf = buf;
     rcb->s = acb->s;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index c6a6a0e..40d4880 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -207,11 +207,23 @@
     } else {
         req->status = NVME_INTERNAL_DEV_ERROR;
     }
-
-    qemu_sglist_destroy(&req->qsg);
+    if (req->has_sg) {
+        qemu_sglist_destroy(&req->qsg);
+    }
     nvme_enqueue_req_completion(cq, req);
 }
 
+static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
+    NvmeRequest *req)
+{
+    req->has_sg = false;
+    block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
+         BLOCK_ACCT_FLUSH);
+    req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);
+
+    return NVME_NO_COMPLETE;
+}
+
 static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     NvmeRequest *req)
 {
@@ -235,6 +247,7 @@
     }
     assert((nlb << data_shift) == req->qsg.size);
 
+    req->has_sg = true;
     dma_acct_start(n->conf.blk, &req->acct, &req->qsg,
                    is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
     req->aiocb = is_write ?
@@ -256,7 +269,7 @@
     ns = &n->namespaces[nsid - 1];
     switch (cmd->opcode) {
     case NVME_CMD_FLUSH:
-        return NVME_SUCCESS;
+        return nvme_flush(n, ns, cmd, req);
     case NVME_CMD_WRITE:
     case NVME_CMD_READ:
         return nvme_rw(n, ns, cmd, req);
@@ -474,26 +487,32 @@
 static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+    uint32_t result;
 
     switch (dw10) {
-    case NVME_NUMBER_OF_QUEUES:
-        req->cqe.result =
-            cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
-        break;
     case NVME_VOLATILE_WRITE_CACHE:
-        req->cqe.result = cpu_to_le32(1);
+        result = blk_enable_write_cache(n->conf.blk);
+        break;
+    case NVME_NUMBER_OF_QUEUES:
+        result = cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
         break;
     default:
         return NVME_INVALID_FIELD | NVME_DNR;
     }
+
+    req->cqe.result = result;
     return NVME_SUCCESS;
 }
 
 static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
 
     switch (dw10) {
+    case NVME_VOLATILE_WRITE_CACHE:
+        blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
+        break;
     case NVME_NUMBER_OF_QUEUES:
         req->cqe.result =
             cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
@@ -818,6 +837,9 @@
     id->psd[0].mp = cpu_to_le16(0x9c4);
     id->psd[0].enlat = cpu_to_le32(0x10);
     id->psd[0].exlat = cpu_to_le32(0x4);
+    if (blk_enable_write_cache(n->conf.blk)) {
+        id->vwc = 1;
+    }
 
     n->bar.cap = 0;
     NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index b6ccb65..bf3a3cc 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -638,6 +638,7 @@
     struct NvmeSQueue       *sq;
     BlockAIOCB              *aiocb;
     uint16_t                status;
+    bool                    has_sg;
     NvmeCqe                 cqe;
     BlockAcctCookie         acct;
     QEMUSGList              qsg;
diff --git a/include/block/block.h b/include/block/block.h
index 06e4137..37916f7 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -12,6 +12,7 @@
 /* block.c */
 typedef struct BlockDriver BlockDriver;
 typedef struct BlockJob BlockJob;
+typedef struct BdrvChild BdrvChild;
 typedef struct BdrvChildRole BdrvChildRole;
 
 typedef struct BlockDriverInfo {
@@ -208,6 +209,11 @@
                     QDict *options, const char *bdref_key,
                     BlockDriverState* parent, const BdrvChildRole *child_role,
                     bool allow_none, Error **errp);
+BdrvChild *bdrv_open_child(const char *filename,
+                           QDict *options, const char *bdref_key,
+                           BlockDriverState* parent,
+                           const BdrvChildRole *child_role,
+                           bool allow_none, Error **errp);
 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp);
@@ -507,6 +513,7 @@
 
 void bdrv_ref(BlockDriverState *bs);
 void bdrv_unref(BlockDriverState *bs);
+void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
 
 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8996baf..14ad4c3 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -335,11 +335,11 @@
 extern const BdrvChildRole child_file;
 extern const BdrvChildRole child_format;
 
-typedef struct BdrvChild {
+struct BdrvChild {
     BlockDriverState *bs;
     const BdrvChildRole *role;
     QLIST_ENTRY(BdrvChild) next;
-} BdrvChild;
+};
 
 /*
  * Note: the function bdrv_append() copies and swaps contents of
@@ -379,6 +379,7 @@
     char exact_filename[PATH_MAX];
 
     BlockDriverState *backing_hd;
+    BdrvChild *backing_child;
     BlockDriverState *file;
 
     NotifierList close_notifiers;