Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches

# gpg: Signature made Fri 23 Oct 2015 17:59:56 BST using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream: (37 commits)
  tests: Add test case for aio_disable_external
  block: Add "drained begin/end" for internal snapshot
  block: Add "drained begin/end" for transactional blockdev-backup
  block: Add "drained begin/end" for transactional backup
  block: Add "drained begin/end" for transactional external snapshot
  block: Introduce "drained begin/end" API
  aio: introduce aio_{disable,enable}_external
  dataplane: Mark host notifiers' client type as "external"
  nbd: Mark fd handlers client type as "external"
  aio: Add "is_external" flag for event handlers
  throttle: Remove throttle_group_lock/unlock()
  blockdev: Allow more options for BB-less BDS tree
  blockdev: Pull out blockdev option extraction
  blockdev: Do not create BDS for empty drive
  block: Prepare for NULL BDS
  block: Add blk_insert_bs()
  block: Prepare remaining BB functions for NULL BDS
  block: Fail requests to empty BlockBackend
  block: Make some BB functions fall back to BBRS
  block: Add BlockBackendRootState
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/aio-posix.c b/aio-posix.c
index d477033..0467f23 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -25,6 +25,7 @@
     IOHandler *io_write;
     int deleted;
     void *opaque;
+    bool is_external;
     QLIST_ENTRY(AioHandler) node;
 };
 
@@ -43,6 +44,7 @@
 
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
+                        bool is_external,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         void *opaque)
@@ -82,6 +84,7 @@
         node->io_read = io_read;
         node->io_write = io_write;
         node->opaque = opaque;
+        node->is_external = is_external;
 
         node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
         node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
@@ -92,10 +95,11 @@
 
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *notifier,
+                            bool is_external,
                             EventNotifierHandler *io_read)
 {
     aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
-                       (IOHandler *)io_read, NULL, notifier);
+                       is_external, (IOHandler *)io_read, NULL, notifier);
 }
 
 bool aio_prepare(AioContext *ctx)
@@ -257,7 +261,8 @@
 
     /* fill pollfds */
     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->pfd.events) {
+        if (!node->deleted && node->pfd.events
+            && aio_node_check(ctx, node->is_external)) {
             add_pollfd(node);
         }
     }
diff --git a/aio-win32.c b/aio-win32.c
index 50a6867..43c4c79 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -28,11 +28,13 @@
     GPollFD pfd;
     int deleted;
     void *opaque;
+    bool is_external;
     QLIST_ENTRY(AioHandler) node;
 };
 
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
+                        bool is_external,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         void *opaque)
@@ -86,6 +88,7 @@
         node->opaque = opaque;
         node->io_read = io_read;
         node->io_write = io_write;
+        node->is_external = is_external;
 
         event = event_notifier_get_handle(&ctx->notifier);
         WSAEventSelect(node->pfd.fd, event,
@@ -98,6 +101,7 @@
 
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *e,
+                            bool is_external,
                             EventNotifierHandler *io_notify)
 {
     AioHandler *node;
@@ -133,6 +137,7 @@
             node->e = e;
             node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
             node->pfd.events = G_IO_IN;
+            node->is_external = is_external;
             QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
 
             g_source_add_poll(&ctx->source, &node->pfd);
@@ -304,7 +309,8 @@
     /* fill fd sets */
     count = 0;
     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->io_notify) {
+        if (!node->deleted && node->io_notify
+            && aio_node_check(ctx, node->is_external)) {
             events[count++] = event_notifier_get_handle(node->e);
         }
     }
diff --git a/async.c b/async.c
index efce14b..bdc64a3 100644
--- a/async.c
+++ b/async.c
@@ -247,7 +247,7 @@
     }
     qemu_mutex_unlock(&ctx->bh_lock);
 
-    aio_set_event_notifier(ctx, &ctx->notifier, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL);
     event_notifier_cleanup(&ctx->notifier);
     rfifolock_destroy(&ctx->lock);
     qemu_mutex_destroy(&ctx->bh_lock);
@@ -329,6 +329,7 @@
     }
     g_source_set_can_recurse(&ctx->source, true);
     aio_set_event_notifier(ctx, &ctx->notifier,
+                           false,
                            (EventNotifierHandler *)
                            event_notifier_dummy_cb);
     ctx->thread_pool = NULL;
diff --git a/block.c b/block.c
index 6771c3a..e9f40dc 100644
--- a/block.c
+++ b/block.c
@@ -257,7 +257,6 @@
     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
         QLIST_INIT(&bs->op_blockers[i]);
     }
-    bdrv_iostatus_disable(bs);
     notifier_list_init(&bs->close_notifiers);
     notifier_with_return_list_init(&bs->before_write_notifiers);
     qemu_co_queue_init(&bs->throttled_reqs[0]);
@@ -857,7 +856,6 @@
         goto fail_opts;
     }
 
-    bs->guest_block_size = 512;
     bs->request_alignment = 512;
     bs->zero_beyond_eof = true;
     open_flags = bdrv_open_flags(bs, flags);
@@ -1081,6 +1079,10 @@
         }
     }
 
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        *flags |= BDRV_O_INCOMING;
+    }
+
     return 0;
 }
 
@@ -1908,6 +1910,10 @@
     bdrv_drain(bs); /* in case flush left pending I/O */
     notifier_list_notify(&bs->close_notifiers, bs);
 
+    if (bs->blk) {
+        blk_dev_change_media_cb(bs->blk, false);
+    }
+
     if (bs->drv) {
         BdrvChild *child, *next;
 
@@ -1946,10 +1952,6 @@
         bs->full_open_options = NULL;
     }
 
-    if (bs->blk) {
-        blk_dev_change_media_cb(bs->blk, false);
-    }
-
     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
         g_free(ban);
     }
@@ -1998,19 +2000,10 @@
     /* move some fields that need to stay attached to the device */
 
     /* dev info */
-    bs_dest->guest_block_size   = bs_src->guest_block_size;
     bs_dest->copy_on_read       = bs_src->copy_on_read;
 
     bs_dest->enable_write_cache = bs_src->enable_write_cache;
 
-    /* r/w error */
-    bs_dest->on_read_error      = bs_src->on_read_error;
-    bs_dest->on_write_error     = bs_src->on_write_error;
-
-    /* i/o status */
-    bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
-    bs_dest->iostatus           = bs_src->iostatus;
-
     /* dirty bitmap */
     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
 }
@@ -2497,82 +2490,6 @@
     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
 }
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
-                       BlockdevOnError on_write_error)
-{
-    bs->on_read_error = on_read_error;
-    bs->on_write_error = on_write_error;
-}
-
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
-{
-    return is_read ? bs->on_read_error : bs->on_write_error;
-}
-
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
-{
-    BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
-
-    switch (on_err) {
-    case BLOCKDEV_ON_ERROR_ENOSPC:
-        return (error == ENOSPC) ?
-               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
-    case BLOCKDEV_ON_ERROR_STOP:
-        return BLOCK_ERROR_ACTION_STOP;
-    case BLOCKDEV_ON_ERROR_REPORT:
-        return BLOCK_ERROR_ACTION_REPORT;
-    case BLOCKDEV_ON_ERROR_IGNORE:
-        return BLOCK_ERROR_ACTION_IGNORE;
-    default:
-        abort();
-    }
-}
-
-static void send_qmp_error_event(BlockDriverState *bs,
-                                 BlockErrorAction action,
-                                 bool is_read, int error)
-{
-    IoOperationType optype;
-
-    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
-    qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
-                                   bdrv_iostatus_is_enabled(bs),
-                                   error == ENOSPC, strerror(error),
-                                   &error_abort);
-}
-
-/* This is done by device models because, while the block layer knows
- * about the error, it does not know whether an operation comes from
- * the device or the block layer (from a job, for example).
- */
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
-                       bool is_read, int error)
-{
-    assert(error >= 0);
-
-    if (action == BLOCK_ERROR_ACTION_STOP) {
-        /* First set the iostatus, so that "info block" returns an iostatus
-         * that matches the events raised so far (an additional error iostatus
-         * is fine, but not a lost one).
-         */
-        bdrv_iostatus_set_err(bs, error);
-
-        /* Then raise the request to stop the VM and the event.
-         * qemu_system_vmstop_request_prepare has two effects.  First,
-         * it ensures that the STOP event always comes after the
-         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
-         * can observe the STOP event and do a "cont" before the STOP
-         * event is issued, the VM will not stop.  In this case, vm_start()
-         * also ensures that the STOP/RESUME pair of events is emitted.
-         */
-        qemu_system_vmstop_request_prepare();
-        send_qmp_error_event(bs, action, is_read, error);
-        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
-    } else {
-        send_qmp_error_event(bs, action, is_read, error);
-    }
-}
-
 int bdrv_is_read_only(BlockDriverState *bs)
 {
     return bs->read_only;
@@ -2766,6 +2683,11 @@
         blk = blk_by_name(device);
 
         if (blk) {
+            if (!blk_bs(blk)) {
+                error_setg(errp, "Device '%s' has no medium", device);
+                return NULL;
+            }
+
             return blk_bs(blk);
         }
     }
@@ -3136,15 +3058,23 @@
 /**
  * Return TRUE if the media is present
  */
-int bdrv_is_inserted(BlockDriverState *bs)
+bool bdrv_is_inserted(BlockDriverState *bs)
 {
     BlockDriver *drv = bs->drv;
+    BdrvChild *child;
 
-    if (!drv)
-        return 0;
-    if (!drv->bdrv_is_inserted)
-        return 1;
-    return drv->bdrv_is_inserted(bs);
+    if (!drv) {
+        return false;
+    }
+    if (drv->bdrv_is_inserted) {
+        return drv->bdrv_is_inserted(bs);
+    }
+    QLIST_FOREACH(child, &bs->children, next) {
+        if (!bdrv_is_inserted(child->bs)) {
+            return false;
+        }
+    }
+    return true;
 }
 
 /**
@@ -3195,11 +3125,6 @@
     }
 }
 
-void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
-{
-    bs->guest_block_size = align;
-}
-
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
 {
     BdrvDirtyBitmap *bm;
@@ -3597,46 +3522,6 @@
     return true;
 }
 
-void bdrv_iostatus_enable(BlockDriverState *bs)
-{
-    bs->iostatus_enabled = true;
-    bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-}
-
-/* The I/O status is only enabled if the drive explicitly
- * enables it _and_ the VM is configured to stop on errors */
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
-{
-    return (bs->iostatus_enabled &&
-           (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
-            bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
-            bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
-}
-
-void bdrv_iostatus_disable(BlockDriverState *bs)
-{
-    bs->iostatus_enabled = false;
-}
-
-void bdrv_iostatus_reset(BlockDriverState *bs)
-{
-    if (bdrv_iostatus_is_enabled(bs)) {
-        bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-        if (bs->job) {
-            block_job_iostatus_reset(bs->job);
-        }
-    }
-}
-
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
-{
-    assert(bdrv_iostatus_is_enabled(bs));
-    if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-        bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
-                                         BLOCK_DEVICE_IO_STATUS_FAILED;
-    }
-}
-
 void bdrv_img_create(const char *filename, const char *fmt,
                      const char *base_filename, const char *base_fmt,
                      char *options, uint64_t img_size, int flags,
@@ -4148,14 +4033,3 @@
         QDECREF(json);
     }
 }
-
-/* This accessor function purpose is to allow the device models to access the
- * BlockAcctStats structure embedded inside a BlockDriverState without being
- * aware of the BlockDriverState structure layout.
- * It will go away when the BlockAcctStats structure will be moved inside
- * the device models.
- */
-BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
-{
-    return &bs->stats;
-}
diff --git a/block/accounting.c b/block/accounting.c
index 01d594f..a423560 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -47,14 +47,6 @@
 }
 
 
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
-                               unsigned int nb_sectors)
-{
-    if (stats->wr_highest_sector < sector_num + nb_sectors - 1) {
-        stats->wr_highest_sector = sector_num + nb_sectors - 1;
-    }
-}
-
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                       int num_requests)
 {
diff --git a/block/backup.c b/block/backup.c
index 5696431..ec01db8 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -21,6 +21,7 @@
 #include "block/blockjob.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
 
 #define BACKUP_CLUSTER_BITS 16
 #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
@@ -215,7 +216,9 @@
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common);
 
-    bdrv_iostatus_reset(s->target);
+    if (s->target->blk) {
+        blk_iostatus_reset(s->target->blk);
+    }
 }
 
 static const BlockJobDriver backup_job_driver = {
@@ -360,8 +363,10 @@
     job->bitmap = hbitmap_alloc(end, 0);
 
     bdrv_set_enable_write_cache(target, true);
-    bdrv_set_on_error(target, on_target_error, on_target_error);
-    bdrv_iostatus_enable(target);
+    if (target->blk) {
+        blk_set_on_error(target->blk, on_target_error, on_target_error);
+        blk_iostatus_enable(target->blk);
+    }
 
     bdrv_add_before_write_notifier(bs, &before_write);
 
@@ -451,7 +456,9 @@
     }
     hbitmap_free(job->bitmap);
 
-    bdrv_iostatus_disable(target);
+    if (target->blk) {
+        blk_iostatus_disable(target->blk);
+    }
     bdrv_op_unblock_all(target, job->common.blocker);
 
     data = g_malloc(sizeof(*data));
@@ -480,7 +487,7 @@
 
     if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
          on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
         error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
         return;
     }
diff --git a/block/block-backend.c b/block/block-backend.c
index 2256551..19fdaae 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -12,12 +12,17 @@
 
 #include "sysemu/block-backend.h"
 #include "block/block_int.h"
+#include "block/blockjob.h"
+#include "block/throttle-groups.h"
 #include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
 #include "qapi-event.h"
 
 /* Number of coroutines to reserve per attached device model */
 #define COROUTINE_POOL_RESERVATION 64
 
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
+
 struct BlockBackend {
     char *name;
     int refcnt;
@@ -29,15 +34,31 @@
     /* TODO change to DeviceState when all users are qdevified */
     const BlockDevOps *dev_ops;
     void *dev_opaque;
+
+    /* the block size for which the guest device expects atomicity */
+    int guest_block_size;
+
+    /* If the BDS tree is removed, some of its options are stored here (which
+     * can be used to restore those options in the new BDS on insert) */
+    BlockBackendRootState root_state;
+
+    /* I/O stats (display with "info blockstats"). */
+    BlockAcctStats stats;
+
+    BlockdevOnError on_read_error, on_write_error;
+    bool iostatus_enabled;
+    BlockDeviceIoStatus iostatus;
 };
 
 typedef struct BlockBackendAIOCB {
     BlockAIOCB common;
     QEMUBH *bh;
+    BlockBackend *blk;
     int ret;
 } BlockBackendAIOCB;
 
 static const AIOCBInfo block_backend_aiocb_info = {
+    .get_aio_context = blk_aiocb_get_aio_context,
     .aiocb_size = sizeof(BlockBackendAIOCB),
 };
 
@@ -145,6 +166,10 @@
         bdrv_unref(blk->bs);
         blk->bs = NULL;
     }
+    if (blk->root_state.throttle_state) {
+        g_free(blk->root_state.throttle_group);
+        throttle_group_unref(blk->root_state.throttle_state);
+    }
     /* Avoid double-remove after blk_hide_on_behalf_of_hmp_drive_del() */
     if (blk->name[0]) {
         QTAILQ_REMOVE(&blk_backends, blk, link);
@@ -309,6 +334,17 @@
 }
 
 /*
+ * Associates a new BlockDriverState with @blk.
+ */
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+    assert(!blk->bs && !bs->blk);
+    bdrv_ref(bs);
+    blk->bs = bs;
+    bs->blk = blk;
+}
+
+/*
  * Attach device model @dev to @blk.
  * Return 0 on success, -EBUSY when a device model is attached already.
  */
@@ -320,7 +356,7 @@
     }
     blk_ref(blk);
     blk->dev = dev;
-    bdrv_iostatus_reset(blk->bs);
+    blk_iostatus_reset(blk);
     return 0;
 }
 
@@ -347,7 +383,7 @@
     blk->dev = NULL;
     blk->dev_ops = NULL;
     blk->dev_opaque = NULL;
-    bdrv_set_guest_block_size(blk->bs, 512);
+    blk->guest_block_size = 512;
     blk_unref(blk);
 }
 
@@ -452,7 +488,47 @@
 
 void blk_iostatus_enable(BlockBackend *blk)
 {
-    bdrv_iostatus_enable(blk->bs);
+    blk->iostatus_enabled = true;
+    blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+}
+
+/* The I/O status is only enabled if the drive explicitly
+ * enables it _and_ the VM is configured to stop on errors */
+bool blk_iostatus_is_enabled(const BlockBackend *blk)
+{
+    return (blk->iostatus_enabled &&
+           (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+            blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
+            blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
+}
+
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
+{
+    return blk->iostatus;
+}
+
+void blk_iostatus_disable(BlockBackend *blk)
+{
+    blk->iostatus_enabled = false;
+}
+
+void blk_iostatus_reset(BlockBackend *blk)
+{
+    if (blk_iostatus_is_enabled(blk)) {
+        blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+        if (blk->bs && blk->bs->job) {
+            block_job_iostatus_reset(blk->bs->job);
+        }
+    }
+}
+
+void blk_iostatus_set_err(BlockBackend *blk, int error)
+{
+    assert(blk_iostatus_is_enabled(blk));
+    if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+        blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+                                          BLOCK_DEVICE_IO_STATUS_FAILED;
+    }
 }
 
 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
@@ -464,7 +540,7 @@
         return -EIO;
     }
 
-    if (!blk_is_inserted(blk)) {
+    if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -558,6 +634,7 @@
     QEMUBH *bh;
 
     acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
+    acb->blk = blk;
     acb->ret = ret;
 
     bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
@@ -602,16 +679,28 @@
 
 int64_t blk_getlength(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_getlength(blk->bs);
 }
 
 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
 {
-    bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+    if (!blk->bs) {
+        *nb_sectors_ptr = 0;
+    } else {
+        bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+    }
 }
 
 int64_t blk_nb_sectors(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_nb_sectors(blk->bs);
 }
 
@@ -642,6 +731,10 @@
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
                           BlockCompletionFunc *cb, void *opaque)
 {
+    if (!blk_is_available(blk)) {
+        return abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+    }
+
     return bdrv_aio_flush(blk->bs, cb, opaque);
 }
 
@@ -683,12 +776,20 @@
 
 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_ioctl(blk->bs, req, buf);
 }
 
 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
                           BlockCompletionFunc *cb, void *opaque)
 {
+    if (!blk_is_available(blk)) {
+        return abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+    }
+
     return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque);
 }
 
@@ -704,11 +805,19 @@
 
 int blk_co_flush(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_co_flush(blk->bs);
 }
 
 int blk_flush(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_flush(blk->bs);
 }
 
@@ -719,7 +828,9 @@
 
 void blk_drain(BlockBackend *blk)
 {
-    bdrv_drain(blk->bs);
+    if (blk->bs) {
+        bdrv_drain(blk->bs);
+    }
 }
 
 void blk_drain_all(void)
@@ -727,76 +838,178 @@
     bdrv_drain_all();
 }
 
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+                      BlockdevOnError on_write_error)
+{
+    blk->on_read_error = on_read_error;
+    blk->on_write_error = on_write_error;
+}
+
 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
 {
-    return bdrv_get_on_error(blk->bs, is_read);
+    return is_read ? blk->on_read_error : blk->on_write_error;
 }
 
 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
                                       int error)
 {
-    return bdrv_get_error_action(blk->bs, is_read, error);
+    BlockdevOnError on_err = blk_get_on_error(blk, is_read);
+
+    switch (on_err) {
+    case BLOCKDEV_ON_ERROR_ENOSPC:
+        return (error == ENOSPC) ?
+               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
+    case BLOCKDEV_ON_ERROR_STOP:
+        return BLOCK_ERROR_ACTION_STOP;
+    case BLOCKDEV_ON_ERROR_REPORT:
+        return BLOCK_ERROR_ACTION_REPORT;
+    case BLOCKDEV_ON_ERROR_IGNORE:
+        return BLOCK_ERROR_ACTION_IGNORE;
+    default:
+        abort();
+    }
 }
 
+static void send_qmp_error_event(BlockBackend *blk,
+                                 BlockErrorAction action,
+                                 bool is_read, int error)
+{
+    IoOperationType optype;
+
+    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
+    qapi_event_send_block_io_error(blk_name(blk), optype, action,
+                                   blk_iostatus_is_enabled(blk),
+                                   error == ENOSPC, strerror(error),
+                                   &error_abort);
+}
+
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
 void blk_error_action(BlockBackend *blk, BlockErrorAction action,
                       bool is_read, int error)
 {
-    bdrv_error_action(blk->bs, action, is_read, error);
+    assert(error >= 0);
+
+    if (action == BLOCK_ERROR_ACTION_STOP) {
+        /* First set the iostatus, so that "info block" returns an iostatus
+         * that matches the events raised so far (an additional error iostatus
+         * is fine, but not a lost one).
+         */
+        blk_iostatus_set_err(blk, error);
+
+        /* Then raise the request to stop the VM and the event.
+         * qemu_system_vmstop_request_prepare has two effects.  First,
+         * it ensures that the STOP event always comes after the
+         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
+         * can observe the STOP event and do a "cont" before the STOP
+         * event is issued, the VM will not stop.  In this case, vm_start()
+         * also ensures that the STOP/RESUME pair of events is emitted.
+         */
+        qemu_system_vmstop_request_prepare();
+        send_qmp_error_event(blk, action, is_read, error);
+        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
+    } else {
+        send_qmp_error_event(blk, action, is_read, error);
+    }
 }
 
 int blk_is_read_only(BlockBackend *blk)
 {
-    return bdrv_is_read_only(blk->bs);
+    if (blk->bs) {
+        return bdrv_is_read_only(blk->bs);
+    } else {
+        return blk->root_state.read_only;
+    }
 }
 
 int blk_is_sg(BlockBackend *blk)
 {
+    if (!blk->bs) {
+        return 0;
+    }
+
     return bdrv_is_sg(blk->bs);
 }
 
 int blk_enable_write_cache(BlockBackend *blk)
 {
-    return bdrv_enable_write_cache(blk->bs);
+    if (blk->bs) {
+        return bdrv_enable_write_cache(blk->bs);
+    } else {
+        return !!(blk->root_state.open_flags & BDRV_O_CACHE_WB);
+    }
 }
 
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
 {
-    bdrv_set_enable_write_cache(blk->bs, wce);
+    if (blk->bs) {
+        bdrv_set_enable_write_cache(blk->bs, wce);
+    } else {
+        if (wce) {
+            blk->root_state.open_flags |= BDRV_O_CACHE_WB;
+        } else {
+            blk->root_state.open_flags &= ~BDRV_O_CACHE_WB;
+        }
+    }
 }
 
 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
 {
+    if (!blk->bs) {
+        error_setg(errp, "Device '%s' has no medium", blk->name);
+        return;
+    }
+
     bdrv_invalidate_cache(blk->bs, errp);
 }
 
-int blk_is_inserted(BlockBackend *blk)
+bool blk_is_inserted(BlockBackend *blk)
 {
-    return bdrv_is_inserted(blk->bs);
+    return blk->bs && bdrv_is_inserted(blk->bs);
+}
+
+bool blk_is_available(BlockBackend *blk)
+{
+    return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
 }
 
 void blk_lock_medium(BlockBackend *blk, bool locked)
 {
-    bdrv_lock_medium(blk->bs, locked);
+    if (blk->bs) {
+        bdrv_lock_medium(blk->bs, locked);
+    }
 }
 
 void blk_eject(BlockBackend *blk, bool eject_flag)
 {
-    bdrv_eject(blk->bs, eject_flag);
+    if (blk->bs) {
+        bdrv_eject(blk->bs, eject_flag);
+    }
 }
 
 int blk_get_flags(BlockBackend *blk)
 {
-    return bdrv_get_flags(blk->bs);
+    if (blk->bs) {
+        return bdrv_get_flags(blk->bs);
+    } else {
+        return blk->root_state.open_flags;
+    }
 }
 
 int blk_get_max_transfer_length(BlockBackend *blk)
 {
-    return blk->bs->bl.max_transfer_length;
+    if (blk->bs) {
+        return blk->bs->bl.max_transfer_length;
+    } else {
+        return 0;
+    }
 }
 
 void blk_set_guest_block_size(BlockBackend *blk, int align)
 {
-    bdrv_set_guest_block_size(blk->bs, align);
+    blk->guest_block_size = align;
 }
 
 void *blk_blockalign(BlockBackend *blk, size_t size)
@@ -806,40 +1019,64 @@
 
 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
 {
+    if (!blk->bs) {
+        return false;
+    }
+
     return bdrv_op_is_blocked(blk->bs, op, errp);
 }
 
 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
 {
-    bdrv_op_unblock(blk->bs, op, reason);
+    if (blk->bs) {
+        bdrv_op_unblock(blk->bs, op, reason);
+    }
 }
 
 void blk_op_block_all(BlockBackend *blk, Error *reason)
 {
-    bdrv_op_block_all(blk->bs, reason);
+    if (blk->bs) {
+        bdrv_op_block_all(blk->bs, reason);
+    }
 }
 
 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
 {
-    bdrv_op_unblock_all(blk->bs, reason);
+    if (blk->bs) {
+        bdrv_op_unblock_all(blk->bs, reason);
+    }
 }
 
 AioContext *blk_get_aio_context(BlockBackend *blk)
 {
-    return bdrv_get_aio_context(blk->bs);
+    if (blk->bs) {
+        return bdrv_get_aio_context(blk->bs);
+    } else {
+        return qemu_get_aio_context();
+    }
+}
+
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
+{
+    BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
+    return blk_get_aio_context(blk_acb->blk);
 }
 
 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
 {
-    bdrv_set_aio_context(blk->bs, new_context);
+    if (blk->bs) {
+        bdrv_set_aio_context(blk->bs, new_context);
+    }
 }
 
 void blk_add_aio_context_notifier(BlockBackend *blk,
         void (*attached_aio_context)(AioContext *new_context, void *opaque),
         void (*detach_aio_context)(void *opaque), void *opaque)
 {
-    bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
-                                  detach_aio_context, opaque);
+    if (blk->bs) {
+        bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
+                                      detach_aio_context, opaque);
+    }
 }
 
 void blk_remove_aio_context_notifier(BlockBackend *blk,
@@ -848,28 +1085,36 @@
                                      void (*detach_aio_context)(void *),
                                      void *opaque)
 {
-    bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
-                                     detach_aio_context, opaque);
+    if (blk->bs) {
+        bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
+                                         detach_aio_context, opaque);
+    }
 }
 
 void blk_add_close_notifier(BlockBackend *blk, Notifier *notify)
 {
-    bdrv_add_close_notifier(blk->bs, notify);
+    if (blk->bs) {
+        bdrv_add_close_notifier(blk->bs, notify);
+    }
 }
 
 void blk_io_plug(BlockBackend *blk)
 {
-    bdrv_io_plug(blk->bs);
+    if (blk->bs) {
+        bdrv_io_plug(blk->bs);
+    }
 }
 
 void blk_io_unplug(BlockBackend *blk)
 {
-    bdrv_io_unplug(blk->bs);
+    if (blk->bs) {
+        bdrv_io_unplug(blk->bs);
+    }
 }
 
 BlockAcctStats *blk_get_stats(BlockBackend *blk)
 {
-    return bdrv_get_stats(blk->bs);
+    return &blk->stats;
 }
 
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
@@ -902,6 +1147,10 @@
 
 int blk_truncate(BlockBackend *blk, int64_t offset)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_truncate(blk->bs, offset);
 }
 
@@ -918,20 +1167,67 @@
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
                      int64_t pos, int size)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_save_vmstate(blk->bs, buf, pos, size);
 }
 
 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_load_vmstate(blk->bs, buf, pos, size);
 }
 
 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_probe_blocksizes(blk->bs, bsz);
 }
 
 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_probe_geometry(blk->bs, geo);
 }
+
+/*
+ * Updates the BlockBackendRootState object with data from the currently
+ * attached BlockDriverState.
+ */
+void blk_update_root_state(BlockBackend *blk)
+{
+    assert(blk->bs);
+
+    blk->root_state.open_flags    = blk->bs->open_flags;
+    blk->root_state.read_only     = blk->bs->read_only;
+    blk->root_state.detect_zeroes = blk->bs->detect_zeroes;
+
+    if (blk->root_state.throttle_group) {
+        g_free(blk->root_state.throttle_group);
+        throttle_group_unref(blk->root_state.throttle_state);
+    }
+    if (blk->bs->throttle_state) {
+        const char *name = throttle_group_get_name(blk->bs);
+        blk->root_state.throttle_group = g_strdup(name);
+        blk->root_state.throttle_state = throttle_group_incref(name);
+    } else {
+        blk->root_state.throttle_group = NULL;
+        blk->root_state.throttle_state = NULL;
+    }
+}
+
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
+{
+    return &blk->root_state;
+}
diff --git a/block/commit.c b/block/commit.c
index d12e26f..fdebe87 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -17,6 +17,7 @@
 #include "block/blockjob.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
 
 enum {
     /*
@@ -213,7 +214,7 @@
 
     if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
          on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
         error_setg(errp, "Invalid parameter combination");
         return;
     }
diff --git a/block/curl.c b/block/curl.c
index 032cc8a..8994182 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -154,18 +154,20 @@
     DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
     switch (action) {
         case CURL_POLL_IN:
-            aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
-                               NULL, state);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               curl_multi_read, NULL, state);
             break;
         case CURL_POLL_OUT:
-            aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               NULL, curl_multi_do, state);
             break;
         case CURL_POLL_INOUT:
-            aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
-                               curl_multi_do, state);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               curl_multi_read, curl_multi_do, state);
             break;
         case CURL_POLL_REMOVE:
-            aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               NULL, NULL, NULL);
             break;
     }
 
diff --git a/block/io.c b/block/io.c
index 5311473..5ac6256 100644
--- a/block/io.c
+++ b/block/io.c
@@ -23,6 +23,7 @@
  */
 
 #include "trace.h"
+#include "sysemu/block-backend.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
 #include "block/throttle-groups.h"
@@ -1151,7 +1152,9 @@
 
     bdrv_set_dirty(bs, sector_num, nb_sectors);
 
-    block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
+    if (bs->wr_highest_offset < offset + bytes) {
+        bs->wr_highest_offset = offset + bytes;
+    }
 
     if (ret >= 0) {
         bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
@@ -1903,7 +1906,10 @@
         }
     }
 
-    block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
+    if (bs->blk) {
+        block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
+                              num_reqs - outidx - 1);
+    }
 
     return outidx + 1;
 }
@@ -2618,3 +2624,20 @@
     }
     bdrv_start_throttled_reqs(bs);
 }
+
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+    if (!bs->quiesce_counter++) {
+        aio_disable_external(bdrv_get_aio_context(bs));
+    }
+    bdrv_drain(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+    assert(bs->quiesce_counter > 0);
+    if (--bs->quiesce_counter > 0) {
+        return;
+    }
+    aio_enable_external(bdrv_get_aio_context(bs));
+}
diff --git a/block/iscsi.c b/block/iscsi.c
index 93f1ee4..9a628b7 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -291,8 +291,8 @@
     int ev = iscsi_which_events(iscsi);
 
     if (ev != iscsilun->events) {
-        aio_set_fd_handler(iscsilun->aio_context,
-                           iscsi_get_fd(iscsi),
+        aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
+                           false,
                            (ev & POLLIN) ? iscsi_process_read : NULL,
                            (ev & POLLOUT) ? iscsi_process_write : NULL,
                            iscsilun);
@@ -1280,9 +1280,8 @@
 {
     IscsiLun *iscsilun = bs->opaque;
 
-    aio_set_fd_handler(iscsilun->aio_context,
-                       iscsi_get_fd(iscsilun->iscsi),
-                       NULL, NULL, NULL);
+    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
+                       false, NULL, NULL, NULL);
     iscsilun->events = 0;
 
     if (iscsilun->nop_timer) {
diff --git a/block/linux-aio.c b/block/linux-aio.c
index c991443..88b0520 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -287,7 +287,7 @@
 {
     struct qemu_laio_state *s = s_;
 
-    aio_set_event_notifier(old_context, &s->e, NULL);
+    aio_set_event_notifier(old_context, &s->e, false, NULL);
     qemu_bh_delete(s->completion_bh);
 }
 
@@ -296,7 +296,8 @@
     struct qemu_laio_state *s = s_;
 
     s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
-    aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
+    aio_set_event_notifier(new_context, &s->e, false,
+                           qemu_laio_completion_cb);
 }
 
 void *laio_init(void)
diff --git a/block/mirror.c b/block/mirror.c
index 7e43511..b1252a1 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -14,6 +14,7 @@
 #include "trace.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
@@ -599,7 +600,9 @@
     g_free(s->cow_bitmap);
     g_free(s->in_flight_bitmap);
     bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
-    bdrv_iostatus_disable(s->target);
+    if (s->target->blk) {
+        blk_iostatus_disable(s->target->blk);
+    }
 
     data = g_malloc(sizeof(*data));
     data->ret = ret;
@@ -621,7 +624,9 @@
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
 
-    bdrv_iostatus_reset(s->target);
+    if (s->target->blk) {
+        blk_iostatus_reset(s->target->blk);
+    }
 }
 
 static void mirror_complete(BlockJob *job, Error **errp)
@@ -704,7 +709,7 @@
 
     if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
          on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
         error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
         return;
     }
@@ -740,8 +745,10 @@
         return;
     }
     bdrv_set_enable_write_cache(s->target, true);
-    bdrv_set_on_error(s->target, on_target_error, on_target_error);
-    bdrv_iostatus_enable(s->target);
+    if (s->target->blk) {
+        blk_set_on_error(s->target->blk, on_target_error, on_target_error);
+        blk_iostatus_enable(s->target->blk);
+    }
     s->common.co = qemu_coroutine_create(mirror_run);
     trace_mirror_start(bs, s, s->common.co, opaque);
     qemu_coroutine_enter(s->common.co, s);
diff --git a/block/nbd-client.c b/block/nbd-client.c
index e1bb919..b7fd17a 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -124,7 +124,7 @@
     s->send_coroutine = qemu_coroutine_self();
     aio_context = bdrv_get_aio_context(bs);
 
-    aio_set_fd_handler(aio_context, s->sock,
+    aio_set_fd_handler(aio_context, s->sock, false,
                        nbd_reply_ready, nbd_restart_write, bs);
     if (qiov) {
         if (!s->is_unix) {
@@ -144,7 +144,8 @@
     } else {
         rc = nbd_send_request(s->sock, request);
     }
-    aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, bs);
+    aio_set_fd_handler(aio_context, s->sock, false,
+                       nbd_reply_ready, NULL, bs);
     s->send_coroutine = NULL;
     qemu_co_mutex_unlock(&s->send_mutex);
     return rc;
@@ -348,14 +349,15 @@
 void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
     aio_set_fd_handler(bdrv_get_aio_context(bs),
-                       nbd_get_client_session(bs)->sock, NULL, NULL, NULL);
+                       nbd_get_client_session(bs)->sock,
+                       false, NULL, NULL, NULL);
 }
 
 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                    AioContext *new_context)
 {
     aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sock,
-                       nbd_reply_ready, NULL, bs);
+                       false, nbd_reply_ready, NULL, bs);
 }
 
 void nbd_client_close(BlockDriverState *bs)
diff --git a/block/nfs.c b/block/nfs.c
index 887a98e..fd79f89 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -63,11 +63,10 @@
 {
     int ev = nfs_which_events(client->context);
     if (ev != client->events) {
-        aio_set_fd_handler(client->aio_context,
-                           nfs_get_fd(client->context),
+        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+                           false,
                            (ev & POLLIN) ? nfs_process_read : NULL,
-                           (ev & POLLOUT) ? nfs_process_write : NULL,
-                           client);
+                           (ev & POLLOUT) ? nfs_process_write : NULL, client);
 
     }
     client->events = ev;
@@ -242,9 +241,8 @@
 {
     NFSClient *client = bs->opaque;
 
-    aio_set_fd_handler(client->aio_context,
-                       nfs_get_fd(client->context),
-                       NULL, NULL, NULL);
+    aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+                       false, NULL, NULL, NULL);
     client->events = 0;
 }
 
@@ -263,9 +261,8 @@
         if (client->fh) {
             nfs_close(client->context, client->fh);
         }
-        aio_set_fd_handler(client->aio_context,
-                           nfs_get_fd(client->context),
-                           NULL, NULL, NULL);
+        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+                           false, NULL, NULL, NULL);
         nfs_destroy_context(client->context);
     }
     memset(client, 0, sizeof(NFSClient));
diff --git a/block/qapi.c b/block/qapi.c
index 355ba32..ec0f513 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -301,17 +301,17 @@
         info->tray_open = blk_dev_is_tray_open(blk);
     }
 
-    if (bdrv_iostatus_is_enabled(bs)) {
+    if (blk_iostatus_is_enabled(blk)) {
         info->has_io_status = true;
-        info->io_status = bs->iostatus;
+        info->io_status = blk_iostatus(blk);
     }
 
-    if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
+    if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) {
         info->has_dirty_bitmaps = true;
         info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
     }
 
-    if (bs->drv) {
+    if (bs && bs->drv) {
         info->has_inserted = true;
         info->inserted = bdrv_block_device_info(bs, errp);
         if (info->inserted == NULL) {
@@ -344,18 +344,22 @@
     }
 
     s->stats = g_malloc0(sizeof(*s->stats));
-    s->stats->rd_bytes = bs->stats.nr_bytes[BLOCK_ACCT_READ];
-    s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE];
-    s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ];
-    s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE];
-    s->stats->rd_merged = bs->stats.merged[BLOCK_ACCT_READ];
-    s->stats->wr_merged = bs->stats.merged[BLOCK_ACCT_WRITE];
-    s->stats->wr_highest_offset =
-        bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE;
-    s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH];
-    s->stats->wr_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_WRITE];
-    s->stats->rd_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_READ];
-    s->stats->flush_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_FLUSH];
+    if (bs->blk) {
+        BlockAcctStats *stats = blk_get_stats(bs->blk);
+
+        s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
+        s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+        s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
+        s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
+        s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
+        s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+        s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
+        s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+        s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
+        s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+    }
+
+    s->stats->wr_highest_offset = bs->wr_highest_offset;
 
     if (bs->file) {
         s->has_parent = true;
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 3a527f0..918c756 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,11 +127,6 @@
 
 #define FTYPE_FILE   0
 #define FTYPE_CD     1
-#define FTYPE_FD     2
-
-/* if the FD is not accessed during that time (in ns), we try to
-   reopen it to see if the disk has been changed */
-#define FD_OPEN_TIMEOUT (1000000000)
 
 #define MAX_BLOCKSIZE	4096
 
@@ -141,13 +136,6 @@
     int open_flags;
     size_t buf_align;
 
-#if defined(__linux__)
-    /* linux floppy specific */
-    int64_t fd_open_time;
-    int64_t fd_error_time;
-    int fd_got_error;
-    int fd_media_changed;
-#endif
 #ifdef CONFIG_LINUX_AIO
     int use_aio;
     void *aio_ctx;
@@ -635,7 +623,7 @@
     }
 #endif
 
-    if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
+    if (s->type == FTYPE_CD) {
         raw_s->open_flags |= O_NONBLOCK;
     }
 
@@ -2187,47 +2175,6 @@
 }
 
 #if defined(__linux__)
-/* Note: we do not have a reliable method to detect if the floppy is
-   present. The current method is to try to open the floppy at every
-   I/O and to keep it opened during a few hundreds of ms. */
-static int fd_open(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int last_media_present;
-
-    if (s->type != FTYPE_FD)
-        return 0;
-    last_media_present = (s->fd >= 0);
-    if (s->fd >= 0 &&
-        (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
-        qemu_close(s->fd);
-        s->fd = -1;
-        DPRINTF("Floppy closed\n");
-    }
-    if (s->fd < 0) {
-        if (s->fd_got_error &&
-            (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
-            DPRINTF("No floppy (open delayed)\n");
-            return -EIO;
-        }
-        s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
-        if (s->fd < 0) {
-            s->fd_error_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-            s->fd_got_error = 1;
-            if (last_media_present)
-                s->fd_media_changed = 1;
-            DPRINTF("No floppy\n");
-            return -EIO;
-        }
-        DPRINTF("Floppy opened\n");
-    }
-    if (!last_media_present)
-        s->fd_media_changed = 1;
-    s->fd_open_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    s->fd_got_error = 0;
-    return 0;
-}
-
 static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
     BDRVRawState *s = bs->opaque;
@@ -2256,8 +2203,8 @@
     pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
     return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }
+#endif /* linux */
 
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static int fd_open(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
@@ -2267,14 +2214,6 @@
         return 0;
     return -EIO;
 }
-#else /* !linux && !FreeBSD */
-
-static int fd_open(BlockDriverState *bs)
-{
-    return 0;
-}
-
-#endif /* !linux && !FreeBSD */
 
 static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors,
@@ -2318,14 +2257,13 @@
     int64_t total_size = 0;
     bool has_prefix;
 
-    /* This function is used by all three protocol block drivers and therefore
-     * any of these three prefixes may be given.
+    /* This function is used by both protocol block drivers and therefore either
+     * of these prefixes may be given.
      * The return value has to be stored somewhere, otherwise this is an error
      * due to -Werror=unused-value. */
     has_prefix =
         strstart(filename, "host_device:", &filename) ||
-        strstart(filename, "host_cdrom:" , &filename) ||
-        strstart(filename, "host_floppy:", &filename);
+        strstart(filename, "host_cdrom:" , &filename);
 
     (void)has_prefix;
 
@@ -2405,155 +2343,6 @@
 #endif
 };
 
-#ifdef __linux__
-static void floppy_parse_filename(const char *filename, QDict *options,
-                                  Error **errp)
-{
-    /* The prefix is optional, just as for "file". */
-    strstart(filename, "host_floppy:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
-                       Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;
-
-    s->type = FTYPE_FD;
-
-    /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
-    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
-    if (ret) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        }
-        return ret;
-    }
-
-    /* close fd so that we can reopen it as needed */
-    qemu_close(s->fd);
-    s->fd = -1;
-    s->fd_media_changed = 1;
-
-    error_report("Host floppy pass-through is deprecated");
-    error_printf("Support for it will be removed in a future release.\n");
-    return 0;
-}
-
-static int floppy_probe_device(const char *filename)
-{
-    int fd, ret;
-    int prio = 0;
-    struct floppy_struct fdparam;
-    struct stat st;
-
-    if (strstart(filename, "/dev/fd", NULL) &&
-        !strstart(filename, "/dev/fdset/", NULL) &&
-        !strstart(filename, "/dev/fd/", NULL)) {
-        prio = 50;
-    }
-
-    fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
-    if (fd < 0) {
-        goto out;
-    }
-    ret = fstat(fd, &st);
-    if (ret == -1 || !S_ISBLK(st.st_mode)) {
-        goto outc;
-    }
-
-    /* Attempt to detect via a floppy specific ioctl */
-    ret = ioctl(fd, FDGETPRM, &fdparam);
-    if (ret >= 0)
-        prio = 100;
-
-outc:
-    qemu_close(fd);
-out:
-    return prio;
-}
-
-
-static int floppy_is_inserted(BlockDriverState *bs)
-{
-    return fd_open(bs) >= 0;
-}
-
-static int floppy_media_changed(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret;
-
-    /*
-     * XXX: we do not have a true media changed indication.
-     * It does not work if the floppy is changed without trying to read it.
-     */
-    fd_open(bs);
-    ret = s->fd_media_changed;
-    s->fd_media_changed = 0;
-    DPRINTF("Floppy changed=%d\n", ret);
-    return ret;
-}
-
-static void floppy_eject(BlockDriverState *bs, bool eject_flag)
-{
-    BDRVRawState *s = bs->opaque;
-    int fd;
-
-    if (s->fd >= 0) {
-        qemu_close(s->fd);
-        s->fd = -1;
-    }
-    fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
-    if (fd >= 0) {
-        if (ioctl(fd, FDEJECT, 0) < 0)
-            perror("FDEJECT");
-        qemu_close(fd);
-    }
-}
-
-static BlockDriver bdrv_host_floppy = {
-    .format_name        = "host_floppy",
-    .protocol_name      = "host_floppy",
-    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_probe_device	= floppy_probe_device,
-    .bdrv_parse_filename = floppy_parse_filename,
-    .bdrv_file_open     = floppy_open,
-    .bdrv_close         = raw_close,
-    .bdrv_reopen_prepare = raw_reopen_prepare,
-    .bdrv_reopen_commit  = raw_reopen_commit,
-    .bdrv_reopen_abort   = raw_reopen_abort,
-    .bdrv_create         = hdev_create,
-    .create_opts         = &raw_create_opts,
-
-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
-    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_refresh_limits = raw_refresh_limits,
-    .bdrv_io_plug = raw_aio_plug,
-    .bdrv_io_unplug = raw_aio_unplug,
-    .bdrv_flush_io_queue = raw_aio_flush_io_queue,
-
-    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
-
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
-    /* removable device support */
-    .bdrv_is_inserted   = floppy_is_inserted,
-    .bdrv_media_changed = floppy_media_changed,
-    .bdrv_eject         = floppy_eject,
-};
-#endif
-
 #if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static void cdrom_parse_filename(const char *filename, QDict *options,
                                  Error **errp)
@@ -2609,15 +2398,13 @@
     return prio;
 }
 
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
     int ret;
 
     ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
-    if (ret == CDS_DISC_OK)
-        return 1;
-    return 0;
+    return ret == CDS_DISC_OK;
 }
 
 static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
@@ -2743,7 +2530,7 @@
     return 0;
 }
 
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
 {
     return raw_getlength(bs) > 0;
 }
@@ -2831,7 +2618,6 @@
     bdrv_register(&bdrv_file);
     bdrv_register(&bdrv_host_device);
 #ifdef __linux__
-    bdrv_register(&bdrv_host_floppy);
     bdrv_register(&bdrv_host_cdrom);
 #endif
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 63ee911..0aded31 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -154,11 +154,6 @@
     return bdrv_truncate(bs->file->bs, offset);
 }
 
-static int raw_is_inserted(BlockDriverState *bs)
-{
-    return bdrv_is_inserted(bs->file->bs);
-}
-
 static int raw_media_changed(BlockDriverState *bs)
 {
     return bdrv_media_changed(bs->file->bs);
@@ -264,7 +259,6 @@
     .bdrv_refresh_limits  = &raw_refresh_limits,
     .bdrv_probe_blocksizes = &raw_probe_blocksizes,
     .bdrv_probe_geometry  = &raw_probe_geometry,
-    .bdrv_is_inserted     = &raw_is_inserted,
     .bdrv_media_changed   = &raw_media_changed,
     .bdrv_eject           = &raw_eject,
     .bdrv_lock_medium     = &raw_lock_medium,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index e7e58b7..d80e4ed 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -651,14 +651,16 @@
     unsigned int *rlen = srco->rlen;
 
     co = qemu_coroutine_self();
-    aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       NULL, restart_co_req, co);
 
     ret = send_co_req(sockfd, hdr, data, wlen);
     if (ret < 0) {
         goto out;
     }
 
-    aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       restart_co_req, NULL, co);
 
     ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
     if (ret != sizeof(*hdr)) {
@@ -683,7 +685,8 @@
 out:
     /* there is at most one request for this sockfd, so it is safe to
      * set each handler to NULL. */
-    aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       NULL, NULL, NULL);
 
     srco->ret = ret;
     srco->finished = true;
@@ -735,7 +738,8 @@
     BDRVSheepdogState *s = opaque;
     AIOReq *aio_req, *next;
 
-    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+                       NULL, NULL);
     close(s->fd);
     s->fd = -1;
 
@@ -938,7 +942,8 @@
         return fd;
     }
 
-    aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, fd, false,
+                       co_read_response, NULL, s);
     return fd;
 }
 
@@ -1199,7 +1204,7 @@
 
     qemu_co_mutex_lock(&s->lock);
     s->co_send = qemu_coroutine_self();
-    aio_set_fd_handler(s->aio_context, s->fd,
+    aio_set_fd_handler(s->aio_context, s->fd, false,
                        co_read_response, co_write_request, s);
     socket_set_cork(s->fd, 1);
 
@@ -1218,7 +1223,8 @@
     }
 out:
     socket_set_cork(s->fd, 0);
-    aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, s->fd, false,
+                       co_read_response, NULL, s);
     s->co_send = NULL;
     qemu_co_mutex_unlock(&s->lock);
 }
@@ -1368,7 +1374,8 @@
 {
     BDRVSheepdogState *s = bs->opaque;
 
-    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+                       NULL, NULL);
 }
 
 static void sd_attach_aio_context(BlockDriverState *bs,
@@ -1377,7 +1384,8 @@
     BDRVSheepdogState *s = bs->opaque;
 
     s->aio_context = new_context;
-    aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s);
+    aio_set_fd_handler(new_context, s->fd, false,
+                       co_read_response, NULL, s);
 }
 
 /* TODO Convert to fine grained options */
@@ -1490,7 +1498,8 @@
     g_free(buf);
     return 0;
 out:
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+                       false, NULL, NULL, NULL);
     if (s->fd >= 0) {
         closesocket(s->fd);
     }
@@ -1528,7 +1537,8 @@
     BDRVSheepdogState *s = state->bs->opaque;
 
     if (s->fd) {
-        aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+        aio_set_fd_handler(s->aio_context, s->fd, false,
+                           NULL, NULL, NULL);
         closesocket(s->fd);
     }
 
@@ -1551,7 +1561,8 @@
     }
 
     if (re_s->fd) {
-        aio_set_fd_handler(s->aio_context, re_s->fd, NULL, NULL, NULL);
+        aio_set_fd_handler(s->aio_context, re_s->fd, false,
+                           NULL, NULL, NULL);
         closesocket(re_s->fd);
     }
 
@@ -1935,7 +1946,8 @@
         error_report("%s, %s", sd_strerror(rsp->result), s->name);
     }
 
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+                       false, NULL, NULL, NULL);
     closesocket(s->fd);
     g_free(s->host_spec);
 }
diff --git a/block/ssh.c b/block/ssh.c
index d35b51f..af025c0 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -800,14 +800,15 @@
             rd_handler, wr_handler);
 
     aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       rd_handler, wr_handler, co);
+                       false, rd_handler, wr_handler, co);
 }
 
 static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
                                           BlockDriverState *bs)
 {
     DPRINTF("s->sock=%d", s->sock);
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
+                       false, NULL, NULL, NULL);
 }
 
 /* A non-blocking call returned EAGAIN, so yield, ensuring the
diff --git a/block/stream.c b/block/stream.c
index 3f64fa2..25af7ef 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -16,6 +16,7 @@
 #include "block/blockjob.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
 
 enum {
     /*
@@ -222,7 +223,7 @@
 
     if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
          on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
         error_setg(errp, QERR_INVALID_PARAMETER, "on-error");
         return;
     }
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 1abc6fc..3419af7 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -33,8 +33,7 @@
  * its own locking.
  *
  * This locking is however handled internally in this file, so it's
- * mostly transparent to outside users (but see the documentation in
- * throttle_groups_lock()).
+ * transparent to outside users.
  *
  * The whole ThrottleGroup structure is private and invisible to
  * outside users, that only use it through its ThrottleState.
@@ -76,9 +75,9 @@
  * created.
  *
  * @name: the name of the ThrottleGroup
- * @ret:  the ThrottleGroup
+ * @ret:  the ThrottleState member of the ThrottleGroup
  */
-static ThrottleGroup *throttle_group_incref(const char *name)
+ThrottleState *throttle_group_incref(const char *name)
 {
     ThrottleGroup *tg = NULL;
     ThrottleGroup *iter;
@@ -108,7 +107,7 @@
 
     qemu_mutex_unlock(&throttle_groups_lock);
 
-    return tg;
+    return &tg->ts;
 }
 
 /* Decrease the reference count of a ThrottleGroup.
@@ -116,10 +115,12 @@
  * When the reference count reaches zero the ThrottleGroup is
  * destroyed.
  *
- * @tg:  The ThrottleGroup to unref
+ * @ts:  The ThrottleGroup to unref, given by its ThrottleState member
  */
-static void throttle_group_unref(ThrottleGroup *tg)
+void throttle_group_unref(ThrottleState *ts)
 {
+    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+
     qemu_mutex_lock(&throttle_groups_lock);
     if (--tg->refcount == 0) {
         QTAILQ_REMOVE(&throttle_groups, tg, list);
@@ -401,7 +402,8 @@
 void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
 {
     int i;
-    ThrottleGroup *tg = throttle_group_incref(groupname);
+    ThrottleState *ts = throttle_group_incref(groupname);
+    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
     int clock_type = QEMU_CLOCK_REALTIME;
 
     if (qtest_enabled()) {
@@ -409,7 +411,7 @@
         clock_type = QEMU_CLOCK_VIRTUAL;
     }
 
-    bs->throttle_state = &tg->ts;
+    bs->throttle_state = ts;
 
     qemu_mutex_lock(&tg->lock);
     /* If the ThrottleGroup is new set this BlockDriverState as the token */
@@ -461,38 +463,10 @@
     throttle_timers_destroy(&bs->throttle_timers);
     qemu_mutex_unlock(&tg->lock);
 
-    throttle_group_unref(tg);
+    throttle_group_unref(&tg->ts);
     bs->throttle_state = NULL;
 }
 
-/* Acquire the lock of this throttling group.
- *
- * You won't normally need to use this. None of the functions from the
- * ThrottleGroup API require you to acquire the lock since all of them
- * deal with it internally.
- *
- * This should only be used in exceptional cases when you want to
- * access the protected fields of a BlockDriverState directly
- * (e.g. bdrv_swap()).
- *
- * @bs: a BlockDriverState that is member of the group
- */
-void throttle_group_lock(BlockDriverState *bs)
-{
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    qemu_mutex_lock(&tg->lock);
-}
-
-/* Release the lock of this throttling group.
- *
- * See the comments in throttle_group_lock().
- */
-void throttle_group_unlock(BlockDriverState *bs)
-{
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    qemu_mutex_unlock(&tg->lock);
-}
-
 static void throttle_groups_init(void)
 {
     qemu_mutex_init(&throttle_groups_lock);
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 64e8682..bbf2f01 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -174,7 +174,7 @@
 void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
                                   AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &aio->e, NULL);
+    aio_set_event_notifier(old_context, &aio->e, false, NULL);
     aio->is_aio_context_attached = false;
 }
 
@@ -182,7 +182,8 @@
                                   AioContext *new_context)
 {
     aio->is_aio_context_attached = true;
-    aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb);
+    aio_set_event_notifier(new_context, &aio->e, false,
+                           win32_aio_completion_cb);
 }
 
 QEMUWin32AIOState *win32_aio_init(void)
diff --git a/blockdev.c b/blockdev.c
index 8141b6b..18712d2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -124,15 +124,17 @@
         return;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
+    if (bs) {
+        aio_context = bdrv_get_aio_context(bs);
+        aio_context_acquire(aio_context);
 
-    if (bs->job) {
-        block_job_cancel(bs->job);
+        if (bs->job) {
+            block_job_cancel(bs->job);
+        }
+
+        aio_context_release(aio_context);
     }
 
-    aio_context_release(aio_context);
-
     dinfo->auto_del = 1;
 }
 
@@ -229,8 +231,8 @@
             dinfo->type != IF_NONE) {
             fprintf(stderr, "Warning: Orphaned drive without device: "
                     "id=%s,file=%s,if=%s,bus=%d,unit=%d\n",
-                    blk_name(blk), blk_bs(blk)->filename, if_name[dinfo->type],
-                    dinfo->bus, dinfo->unit);
+                    blk_name(blk), blk_bs(blk) ? blk_bs(blk)->filename : "",
+                    if_name[dinfo->type], dinfo->bus, dinfo->unit);
             rs = true;
         }
     }
@@ -348,25 +350,134 @@
 
 typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType;
 
+/* All parameters but @opts are optional and may be set to NULL. */
+static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
+    const char **throttling_group, ThrottleConfig *throttle_cfg,
+    BlockdevDetectZeroesOptions *detect_zeroes, Error **errp)
+{
+    const char *discard;
+    Error *local_error = NULL;
+    const char *aio;
+
+    if (bdrv_flags) {
+        if (!qemu_opt_get_bool(opts, "read-only", false)) {
+            *bdrv_flags |= BDRV_O_RDWR;
+        }
+        if (qemu_opt_get_bool(opts, "copy-on-read", false)) {
+            *bdrv_flags |= BDRV_O_COPY_ON_READ;
+        }
+
+        if ((discard = qemu_opt_get(opts, "discard")) != NULL) {
+            if (bdrv_parse_discard_flags(discard, bdrv_flags) != 0) {
+                error_setg(errp, "Invalid discard option");
+                return;
+            }
+        }
+
+        if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true)) {
+            *bdrv_flags |= BDRV_O_CACHE_WB;
+        }
+        if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
+            *bdrv_flags |= BDRV_O_NOCACHE;
+        }
+        if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
+            *bdrv_flags |= BDRV_O_NO_FLUSH;
+        }
+
+        if ((aio = qemu_opt_get(opts, "aio")) != NULL) {
+            if (!strcmp(aio, "native")) {
+                *bdrv_flags |= BDRV_O_NATIVE_AIO;
+            } else if (!strcmp(aio, "threads")) {
+                /* this is the default */
+            } else {
+               error_setg(errp, "invalid aio option");
+               return;
+            }
+        }
+    }
+
+    /* disk I/O throttling */
+    if (throttling_group) {
+        *throttling_group = qemu_opt_get(opts, "throttling.group");
+    }
+
+    if (throttle_cfg) {
+        memset(throttle_cfg, 0, sizeof(*throttle_cfg));
+        throttle_cfg->buckets[THROTTLE_BPS_TOTAL].avg =
+            qemu_opt_get_number(opts, "throttling.bps-total", 0);
+        throttle_cfg->buckets[THROTTLE_BPS_READ].avg  =
+            qemu_opt_get_number(opts, "throttling.bps-read", 0);
+        throttle_cfg->buckets[THROTTLE_BPS_WRITE].avg =
+            qemu_opt_get_number(opts, "throttling.bps-write", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_TOTAL].avg =
+            qemu_opt_get_number(opts, "throttling.iops-total", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_READ].avg =
+            qemu_opt_get_number(opts, "throttling.iops-read", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_WRITE].avg =
+            qemu_opt_get_number(opts, "throttling.iops-write", 0);
+
+        throttle_cfg->buckets[THROTTLE_BPS_TOTAL].max =
+            qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
+        throttle_cfg->buckets[THROTTLE_BPS_READ].max  =
+            qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
+        throttle_cfg->buckets[THROTTLE_BPS_WRITE].max =
+            qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_TOTAL].max =
+            qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_READ].max =
+            qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_WRITE].max =
+            qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
+
+        throttle_cfg->op_size =
+            qemu_opt_get_number(opts, "throttling.iops-size", 0);
+
+        if (!check_throttle_config(throttle_cfg, errp)) {
+            return;
+        }
+    }
+
+    if (detect_zeroes) {
+        *detect_zeroes =
+            qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
+                            qemu_opt_get(opts, "detect-zeroes"),
+                            BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
+                            BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+                            &local_error);
+        if (local_error) {
+            error_propagate(errp, local_error);
+            return;
+        }
+
+        if (bdrv_flags &&
+            *detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+            !(*bdrv_flags & BDRV_O_UNMAP))
+        {
+            error_setg(errp, "setting detect-zeroes to unmap is not allowed "
+                             "without setting discard operation to unmap");
+            return;
+        }
+    }
+}
+
 /* Takes the ownership of bs_opts */
 static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
                                    Error **errp)
 {
     const char *buf;
-    int ro = 0;
     int bdrv_flags = 0;
     int on_read_error, on_write_error;
     BlockBackend *blk;
     BlockDriverState *bs;
     ThrottleConfig cfg;
     int snapshot = 0;
-    bool copy_on_read;
     Error *error = NULL;
     QemuOpts *opts;
     const char *id;
     bool has_driver_specific_opts;
-    BlockdevDetectZeroesOptions detect_zeroes;
-    const char *throttling_group;
+    BlockdevDetectZeroesOptions detect_zeroes =
+        BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
+    const char *throttling_group = NULL;
 
     /* Check common options by copying from bs_opts to opts, all other options
      * stay in bs_opts for processing by bdrv_open(). */
@@ -391,35 +502,12 @@
 
     /* extract parameters */
     snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
-    ro = qemu_opt_get_bool(opts, "read-only", 0);
-    copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
 
-    if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
-        if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) {
-            error_setg(errp, "invalid discard option");
-            goto early_err;
-        }
-    }
-
-    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true)) {
-        bdrv_flags |= BDRV_O_CACHE_WB;
-    }
-    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
-        bdrv_flags |= BDRV_O_NOCACHE;
-    }
-    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
-        bdrv_flags |= BDRV_O_NO_FLUSH;
-    }
-
-    if ((buf = qemu_opt_get(opts, "aio")) != NULL) {
-        if (!strcmp(buf, "native")) {
-            bdrv_flags |= BDRV_O_NATIVE_AIO;
-        } else if (!strcmp(buf, "threads")) {
-            /* this is the default */
-        } else {
-           error_setg(errp, "invalid aio option");
-           goto early_err;
-        }
+    extract_common_blockdev_options(opts, &bdrv_flags, &throttling_group, &cfg,
+                                    &detect_zeroes, &error);
+    if (error) {
+        error_propagate(errp, error);
+        goto early_err;
     }
 
     if ((buf = qemu_opt_get(opts, "format")) != NULL) {
@@ -437,43 +525,6 @@
         qdict_put(bs_opts, "driver", qstring_from_str(buf));
     }
 
-    /* disk I/O throttling */
-    memset(&cfg, 0, sizeof(cfg));
-    cfg.buckets[THROTTLE_BPS_TOTAL].avg =
-        qemu_opt_get_number(opts, "throttling.bps-total", 0);
-    cfg.buckets[THROTTLE_BPS_READ].avg  =
-        qemu_opt_get_number(opts, "throttling.bps-read", 0);
-    cfg.buckets[THROTTLE_BPS_WRITE].avg =
-        qemu_opt_get_number(opts, "throttling.bps-write", 0);
-    cfg.buckets[THROTTLE_OPS_TOTAL].avg =
-        qemu_opt_get_number(opts, "throttling.iops-total", 0);
-    cfg.buckets[THROTTLE_OPS_READ].avg =
-        qemu_opt_get_number(opts, "throttling.iops-read", 0);
-    cfg.buckets[THROTTLE_OPS_WRITE].avg =
-        qemu_opt_get_number(opts, "throttling.iops-write", 0);
-
-    cfg.buckets[THROTTLE_BPS_TOTAL].max =
-        qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
-    cfg.buckets[THROTTLE_BPS_READ].max  =
-        qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
-    cfg.buckets[THROTTLE_BPS_WRITE].max =
-        qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
-    cfg.buckets[THROTTLE_OPS_TOTAL].max =
-        qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
-    cfg.buckets[THROTTLE_OPS_READ].max =
-        qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
-    cfg.buckets[THROTTLE_OPS_WRITE].max =
-        qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
-
-    cfg.op_size = qemu_opt_get_number(opts, "throttling.iops-size", 0);
-
-    throttling_group = qemu_opt_get(opts, "throttling.group");
-
-    if (!check_throttle_config(&cfg, &error)) {
-        error_propagate(errp, error);
-        goto early_err;
-    }
-
     on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
     if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
         on_write_error = parse_block_error_action(buf, 0, &error);
@@ -492,34 +543,34 @@
         }
     }
 
-    detect_zeroes =
-        qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
-                        qemu_opt_get(opts, "detect-zeroes"),
-                        BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
-                        BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
-                        &error);
-    if (error) {
-        error_propagate(errp, error);
-        goto early_err;
-    }
-
-    if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
-        !(bdrv_flags & BDRV_O_UNMAP)) {
-        error_setg(errp, "setting detect-zeroes to unmap is not allowed "
-                         "without setting discard operation to unmap");
-        goto early_err;
+    if (snapshot) {
+        /* always use cache=unsafe with snapshot */
+        bdrv_flags &= ~BDRV_O_CACHE_MASK;
+        bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH);
     }
 
     /* init */
     if ((!file || !*file) && !has_driver_specific_opts) {
-        blk = blk_new_with_bs(qemu_opts_id(opts), errp);
+        BlockBackendRootState *blk_rs;
+
+        blk = blk_new(qemu_opts_id(opts), errp);
         if (!blk) {
             goto early_err;
         }
 
-        bs = blk_bs(blk);
-        bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
-        bs->read_only = ro;
+        blk_rs = blk_get_root_state(blk);
+        blk_rs->open_flags    = bdrv_flags;
+        blk_rs->read_only     = !(bdrv_flags & BDRV_O_RDWR);
+        blk_rs->detect_zeroes = detect_zeroes;
+
+        if (throttle_enabled(&cfg)) {
+            if (!throttling_group) {
+                throttling_group = blk_name(blk);
+            }
+            blk_rs->throttle_group = g_strdup(throttling_group);
+            blk_rs->throttle_state = throttle_group_incref(throttling_group);
+            blk_rs->throttle_state->cfg = cfg;
+        }
 
         QDECREF(bs_opts);
     } else {
@@ -527,46 +578,30 @@
             file = NULL;
         }
 
-        if (snapshot) {
-            /* always use cache=unsafe with snapshot */
-            bdrv_flags &= ~BDRV_O_CACHE_MASK;
-            bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH);
-        }
-
-        if (copy_on_read) {
-            bdrv_flags |= BDRV_O_COPY_ON_READ;
-        }
-
-        if (runstate_check(RUN_STATE_INMIGRATE)) {
-            bdrv_flags |= BDRV_O_INCOMING;
-        }
-
-        bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
-
         blk = blk_new_open(qemu_opts_id(opts), file, NULL, bs_opts, bdrv_flags,
                            errp);
         if (!blk) {
             goto err_no_bs_opts;
         }
         bs = blk_bs(blk);
-    }
 
-    bs->detect_zeroes = detect_zeroes;
+        bs->detect_zeroes = detect_zeroes;
 
-    bdrv_set_on_error(bs, on_read_error, on_write_error);
-
-    /* disk I/O throttling */
-    if (throttle_enabled(&cfg)) {
-        if (!throttling_group) {
-            throttling_group = blk_name(blk);
+        /* disk I/O throttling */
+        if (throttle_enabled(&cfg)) {
+            if (!throttling_group) {
+                throttling_group = blk_name(blk);
+            }
+            bdrv_io_limits_enable(bs, throttling_group);
+            bdrv_set_io_limits(bs, &cfg);
         }
-        bdrv_io_limits_enable(bs, throttling_group);
-        bdrv_set_io_limits(bs, &cfg);
+
+        if (bdrv_key_required(bs)) {
+            autostart = 0;
+        }
     }
 
-    if (bdrv_key_required(bs)) {
-        autostart = 0;
-    }
+    blk_set_on_error(blk, on_read_error, on_write_error);
 
 err_no_bs_opts:
     qemu_opts_del(opts);
@@ -579,6 +614,54 @@
     return NULL;
 }
 
+static QemuOptsList qemu_root_bds_opts;
+
+/* Takes the ownership of bs_opts */
+static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
+{
+    BlockDriverState *bs;
+    QemuOpts *opts;
+    Error *local_error = NULL;
+    BlockdevDetectZeroesOptions detect_zeroes;
+    int ret;
+    int bdrv_flags = 0;
+
+    opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp);
+    if (!opts) {
+        goto fail;
+    }
+
+    qemu_opts_absorb_qdict(opts, bs_opts, &local_error);
+    if (local_error) {
+        error_propagate(errp, local_error);
+        goto fail;
+    }
+
+    extract_common_blockdev_options(opts, &bdrv_flags, NULL, NULL,
+                                    &detect_zeroes, &local_error);
+    if (local_error) {
+        error_propagate(errp, local_error);
+        goto fail;
+    }
+
+    bs = NULL;
+    ret = bdrv_open(&bs, NULL, NULL, bs_opts, bdrv_flags, errp);
+    if (ret < 0) {
+        goto fail_no_bs_opts;
+    }
+
+    bs->detect_zeroes = detect_zeroes;
+
+fail_no_bs_opts:
+    qemu_opts_del(opts);
+    return bs;
+
+fail:
+    qemu_opts_del(opts);
+    QDECREF(bs_opts);
+    return NULL;
+}
+
 static void qemu_opt_rename(QemuOpts *opts, const char *from, const char *to,
                             Error **errp)
 {
@@ -1042,6 +1125,10 @@
             monitor_printf(mon, "Device '%s' not found\n", device);
             return;
         }
+        if (!blk_is_available(blk)) {
+            monitor_printf(mon, "Device '%s' has no medium\n", device);
+            return;
+        }
         ret = bdrv_commit(blk_bs(blk));
     }
     if (ret < 0) {
@@ -1121,7 +1208,9 @@
                   "Device '%s' not found", device);
         return NULL;
     }
-    bs = blk_bs(blk);
+
+    aio_context = blk_get_aio_context(blk);
+    aio_context_acquire(aio_context);
 
     if (!has_id) {
         id = NULL;
@@ -1133,11 +1222,14 @@
 
     if (!id && !name) {
         error_setg(errp, "Name or id must be provided");
-        return NULL;
+        goto out_aio_context;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out_aio_context;
+    }
+    bs = blk_bs(blk);
 
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) {
         goto out_aio_context;
@@ -1278,6 +1370,7 @@
     BlockDriverState *bs;
     AioContext *aio_context;
     QEMUSnapshotInfo sn;
+    bool created;
 } InternalSnapshotState;
 
 static void internal_snapshot_prepare(BlkTransactionState *common,
@@ -1311,16 +1404,19 @@
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
     /* AioContext is released in .clean() */
-    state->aio_context = bdrv_get_aio_context(bs);
+    state->aio_context = blk_get_aio_context(blk);
     aio_context_acquire(state->aio_context);
 
-    if (!bdrv_is_inserted(bs)) {
+    if (!blk_is_available(blk)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
         return;
     }
+    bs = blk_bs(blk);
+
+    state->bs = bs;
+    bdrv_drained_begin(bs);
 
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) {
         return;
@@ -1373,7 +1469,7 @@
     }
 
     /* 4. succeed, mark a snapshot is created */
-    state->bs = bs;
+    state->created = true;
 }
 
 static void internal_snapshot_abort(BlkTransactionState *common)
@@ -1384,7 +1480,7 @@
     QEMUSnapshotInfo *sn = &state->sn;
     Error *local_error = NULL;
 
-    if (!bs) {
+    if (!state->created) {
         return;
     }
 
@@ -1405,6 +1501,9 @@
                                              common, common);
 
     if (state->aio_context) {
+        if (state->bs) {
+            bdrv_drained_end(state->bs);
+        }
         aio_context_release(state->aio_context);
     }
 }
@@ -1477,6 +1576,7 @@
     /* Acquire AioContext now so any threads operating on old_bs stop */
     state->aio_context = bdrv_get_aio_context(state->old_bs);
     aio_context_acquire(state->aio_context);
+    bdrv_drained_begin(state->old_bs);
 
     if (!bdrv_is_inserted(state->old_bs)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
@@ -1546,8 +1646,6 @@
      * don't want to abort all of them if one of them fails the reopen */
     bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
                 NULL);
-
-    aio_context_release(state->aio_context);
 }
 
 static void external_snapshot_abort(BlkTransactionState *common)
@@ -1557,7 +1655,14 @@
     if (state->new_bs) {
         bdrv_unref(state->new_bs);
     }
+}
+
+static void external_snapshot_clean(BlkTransactionState *common)
+{
+    ExternalSnapshotState *state =
+                             DO_UPCAST(ExternalSnapshotState, common, common);
     if (state->aio_context) {
+        bdrv_drained_end(state->old_bs);
         aio_context_release(state->aio_context);
     }
 }
@@ -1572,7 +1677,6 @@
 static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
 {
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
-    BlockDriverState *bs;
     BlockBackend *blk;
     DriveBackup *backup;
     Error *local_err = NULL;
@@ -1586,11 +1690,17 @@
                   "Device '%s' not found", backup->device);
         return;
     }
-    bs = blk_bs(blk);
+
+    if (!blk_is_available(blk)) {
+        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+        return;
+    }
 
     /* AioContext is released in .clean() */
-    state->aio_context = bdrv_get_aio_context(bs);
+    state->aio_context = blk_get_aio_context(blk);
     aio_context_acquire(state->aio_context);
+    bdrv_drained_begin(blk_bs(blk));
+    state->bs = blk_bs(blk);
 
     qmp_drive_backup(backup->device, backup->target,
                      backup->has_format, backup->format,
@@ -1606,7 +1716,6 @@
         return;
     }
 
-    state->bs = bs;
     state->job = state->bs->job;
 }
 
@@ -1626,6 +1735,7 @@
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
 
     if (state->aio_context) {
+        bdrv_drained_end(state->bs);
         aio_context_release(state->aio_context);
     }
 }
@@ -1641,8 +1751,7 @@
 {
     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
     BlockdevBackup *backup;
-    BlockDriverState *bs, *target;
-    BlockBackend *blk;
+    BlockBackend *blk, *target;
     Error *local_err = NULL;
 
     assert(common->action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
@@ -1653,23 +1762,28 @@
         error_setg(errp, "Device '%s' not found", backup->device);
         return;
     }
-    bs = blk_bs(blk);
 
-    blk = blk_by_name(backup->target);
-    if (!blk) {
+    if (!blk_is_available(blk)) {
+        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+        return;
+    }
+
+    target = blk_by_name(backup->target);
+    if (!target) {
         error_setg(errp, "Device '%s' not found", backup->target);
         return;
     }
-    target = blk_bs(blk);
 
     /* AioContext is released in .clean() */
-    state->aio_context = bdrv_get_aio_context(bs);
-    if (state->aio_context != bdrv_get_aio_context(target)) {
+    state->aio_context = blk_get_aio_context(blk);
+    if (state->aio_context != blk_get_aio_context(target)) {
         state->aio_context = NULL;
         error_setg(errp, "Backup between two IO threads is not implemented");
         return;
     }
     aio_context_acquire(state->aio_context);
+    state->bs = blk_bs(blk);
+    bdrv_drained_begin(state->bs);
 
     qmp_blockdev_backup(backup->device, backup->target,
                         backup->sync,
@@ -1682,7 +1796,6 @@
         return;
     }
 
-    state->bs = bs;
     state->job = state->bs->job;
 }
 
@@ -1702,6 +1815,7 @@
     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
 
     if (state->aio_context) {
+        bdrv_drained_end(state->bs);
         aio_context_release(state->aio_context);
     }
 }
@@ -1722,6 +1836,7 @@
         .prepare  = external_snapshot_prepare,
         .commit   = external_snapshot_commit,
         .abort = external_snapshot_abort,
+        .clean = external_snapshot_clean,
     },
     [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
         .instance_size = sizeof(DriveBackupState),
@@ -1820,6 +1935,11 @@
     BlockDriverState *bs = blk_bs(blk);
     AioContext *aio_context;
 
+    if (!bs) {
+        /* No medium inserted, so there is nothing to do */
+        return;
+    }
+
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
@@ -1886,7 +2006,8 @@
 }
 
 /* Assumes AioContext is held */
-static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
+static void qmp_bdrv_open_encrypted(BlockDriverState **pbs,
+                                    const char *filename,
                                     int bdrv_flags, const char *format,
                                     const char *password, Error **errp)
 {
@@ -1899,13 +2020,13 @@
         qdict_put(options, "driver", qstring_from_str(format));
     }
 
-    ret = bdrv_open(&bs, filename, NULL, options, bdrv_flags, &local_err);
+    ret = bdrv_open(pbs, filename, NULL, options, bdrv_flags, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         return;
     }
 
-    bdrv_add_key(bs, password, errp);
+    bdrv_add_key(*pbs, password, errp);
 }
 
 void qmp_change_blockdev(const char *device, const char *filename,
@@ -1915,6 +2036,7 @@
     BlockDriverState *bs;
     AioContext *aio_context;
     int bdrv_flags;
+    bool new_bs;
     Error *err = NULL;
 
     blk = blk_by_name(device);
@@ -1924,8 +2046,9 @@
         return;
     }
     bs = blk_bs(blk);
+    new_bs = !bs;
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
     eject_device(blk, 0, &err);
@@ -1934,10 +2057,21 @@
         goto out;
     }
 
-    bdrv_flags = bdrv_is_read_only(bs) ? 0 : BDRV_O_RDWR;
-    bdrv_flags |= bdrv_is_snapshot(bs) ? BDRV_O_SNAPSHOT : 0;
+    bdrv_flags = blk_is_read_only(blk) ? 0 : BDRV_O_RDWR;
+    bdrv_flags |= blk_get_root_state(blk)->open_flags & ~BDRV_O_RDWR;
 
-    qmp_bdrv_open_encrypted(bs, filename, bdrv_flags, format, NULL, errp);
+    qmp_bdrv_open_encrypted(&bs, filename, bdrv_flags, format, NULL, &err);
+    if (err) {
+        error_propagate(errp, err);
+        goto out;
+    }
+
+    if (new_bs) {
+        blk_insert_bs(blk, bs);
+        /* Has been sent automatically by bdrv_open() if blk_bs(blk) was not
+         * NULL */
+        blk_dev_change_media_cb(blk, true);
+    }
 
 out:
     aio_context_release(aio_context);
@@ -1977,7 +2111,15 @@
                   "Device '%s' not found", device);
         return;
     }
+
+    aio_context = blk_get_aio_context(blk);
+    aio_context_acquire(aio_context);
+
     bs = blk_bs(blk);
+    if (!bs) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
 
     memset(&cfg, 0, sizeof(cfg));
     cfg.buckets[THROTTLE_BPS_TOTAL].avg = bps;
@@ -2012,12 +2154,9 @@
     }
 
     if (!check_throttle_config(&cfg, errp)) {
-        return;
+        goto out;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
-
     if (throttle_enabled(&cfg)) {
         /* Enable I/O limits if they're not enabled yet, otherwise
          * just update the throttling group. */
@@ -2033,6 +2172,7 @@
         bdrv_io_limits_disable(bs);
     }
 
+out:
     aio_context_release(aio_context);
 }
 
@@ -2145,7 +2285,6 @@
         error_report("Device '%s' not found", id);
         return;
     }
-    bs = blk_bs(blk);
 
     if (!blk_legacy_dinfo(blk)) {
         error_report("Deleting device added with blockdev-add"
@@ -2153,16 +2292,19 @@
         return;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
-        error_report_err(local_err);
-        aio_context_release(aio_context);
-        return;
-    }
+    bs = blk_bs(blk);
+    if (bs) {
+        if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
+            error_report_err(local_err);
+            aio_context_release(aio_context);
+            return;
+        }
 
-    bdrv_close(bs);
+        bdrv_close(bs);
+    }
 
     /* if we have a device attached to this BlockDriverState
      * then we need to make the drive anonymous until the device
@@ -2172,8 +2314,8 @@
     if (blk_get_attached_dev(blk)) {
         blk_hide_on_behalf_of_hmp_drive_del(blk);
         /* Further I/O must not pause the guest */
-        bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
-                          BLOCKDEV_ON_ERROR_REPORT);
+        blk_set_on_error(blk, BLOCKDEV_ON_ERROR_REPORT,
+                         BLOCKDEV_ON_ERROR_REPORT);
     } else {
         blk_unref(blk);
     }
@@ -2295,11 +2437,16 @@
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
+    bs = blk_bs(blk);
+
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
         goto out;
     }
@@ -2370,11 +2517,16 @@
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
+    bs = blk_bs(blk);
+
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) {
         goto out;
     }
@@ -2480,17 +2632,17 @@
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
     /* Although backup_run has this check too, we need to use bs->drv below, so
      * do an early check redundantly. */
-    if (!bdrv_is_inserted(bs)) {
+    if (!blk_is_available(blk)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
         goto out;
     }
+    bs = blk_bs(blk);
 
     if (!has_format) {
         format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
@@ -2587,7 +2739,7 @@
                          BlockdevOnError on_target_error,
                          Error **errp)
 {
-    BlockBackend *blk;
+    BlockBackend *blk, *target_blk;
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     Error *local_err = NULL;
@@ -2608,17 +2760,27 @@
         error_setg(errp, "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
-    blk = blk_by_name(target);
-    if (!blk) {
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
+    bs = blk_bs(blk);
+
+    target_blk = blk_by_name(target);
+    if (!target_blk) {
         error_setg(errp, "Device '%s' not found", target);
         goto out;
     }
-    target_bs = blk_bs(blk);
+
+    if (!blk_is_available(target_blk)) {
+        error_setg(errp, "Device '%s' has no medium", target);
+        goto out;
+    }
+    target_bs = blk_bs(target_blk);
 
     bdrv_ref(target_bs);
     bdrv_set_aio_context(target_bs, aio_context);
@@ -2695,15 +2857,15 @@
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
-    if (!bdrv_is_inserted(bs)) {
+    if (!blk_is_available(blk)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
         goto out;
     }
+    bs = blk_bs(blk);
 
     if (!has_format) {
         format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
@@ -2833,17 +2995,22 @@
     BlockBackend *blk;
     BlockDriverState *bs;
 
+    *aio_context = NULL;
+
     blk = blk_by_name(device);
     if (!blk) {
         goto notfound;
     }
-    bs = blk_bs(blk);
 
-    *aio_context = bdrv_get_aio_context(bs);
+    *aio_context = blk_get_aio_context(blk);
     aio_context_acquire(*aio_context);
 
+    if (!blk_is_available(blk)) {
+        goto notfound;
+    }
+    bs = blk_bs(blk);
+
     if (!bs->job) {
-        aio_context_release(*aio_context);
         goto notfound;
     }
 
@@ -2852,7 +3019,10 @@
 notfound:
     error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
               "No active block job on device '%s'", device);
-    *aio_context = NULL;
+    if (*aio_context) {
+        aio_context_release(*aio_context);
+        *aio_context = NULL;
+    }
     return NULL;
 }
 
@@ -2959,11 +3129,16 @@
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
+    bs = blk_bs(blk);
+
     image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -3030,17 +3205,12 @@
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
     QmpOutputVisitor *ov = qmp_output_visitor_new();
-    BlockBackend *blk;
+    BlockDriverState *bs;
+    BlockBackend *blk = NULL;
     QObject *obj;
     QDict *qdict;
     Error *local_err = NULL;
 
-    /* Require an ID in the top level */
-    if (!options->has_id) {
-        error_setg(errp, "Block device needs an ID");
-        goto fail;
-    }
-
     /* TODO Sort it out in raw-posix and drive_new(): Reject aio=native with
      * cache.direct=false instead of silently switching to aio=threads, except
      * when called from drive_new().
@@ -3068,14 +3238,33 @@
 
     qdict_flatten(qdict);
 
-    blk = blockdev_init(NULL, qdict, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        goto fail;
+    if (options->has_id) {
+        blk = blockdev_init(NULL, qdict, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            goto fail;
+        }
+
+        bs = blk_bs(blk);
+    } else {
+        if (!qdict_get_try_str(qdict, "node-name")) {
+            error_setg(errp, "'id' and/or 'node-name' need to be specified for "
+                       "the root node");
+            goto fail;
+        }
+
+        bs = bds_tree_init(qdict, errp);
+        if (!bs) {
+            goto fail;
+        }
     }
 
-    if (bdrv_key_required(blk_bs(blk))) {
-        blk_unref(blk);
+    if (bs && bdrv_key_required(bs)) {
+        if (blk) {
+            blk_unref(blk);
+        } else {
+            bdrv_unref(bs);
+        }
         error_setg(errp, "blockdev-add doesn't support encrypted devices");
         goto fail;
     }
@@ -3220,6 +3409,47 @@
     },
 };
 
+static QemuOptsList qemu_root_bds_opts = {
+    .name = "root-bds",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
+    .desc = {
+        {
+            .name = "discard",
+            .type = QEMU_OPT_STRING,
+            .help = "discard operation (ignore/off, unmap/on)",
+        },{
+            .name = "cache.writeback",
+            .type = QEMU_OPT_BOOL,
+            .help = "enables writeback mode for any caches",
+        },{
+            .name = "cache.direct",
+            .type = QEMU_OPT_BOOL,
+            .help = "enables use of O_DIRECT (bypass the host page cache)",
+        },{
+            .name = "cache.no-flush",
+            .type = QEMU_OPT_BOOL,
+            .help = "ignore any flush requests for the device",
+        },{
+            .name = "aio",
+            .type = QEMU_OPT_STRING,
+            .help = "host AIO implementation (threads, native)",
+        },{
+            .name = "read-only",
+            .type = QEMU_OPT_BOOL,
+            .help = "open drive file as read-only",
+        },{
+            .name = "copy-on-read",
+            .type = QEMU_OPT_BOOL,
+            .help = "copy read data from backing file into image file",
+        },{
+            .name = "detect-zeroes",
+            .type = QEMU_OPT_STRING,
+            .help = "try to optimize zero writes (off, on, unmap)",
+        },
+        { /* end of list */ }
+    },
+};
+
 QemuOptsList qemu_drive_opts = {
     .name = "drive",
     .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
diff --git a/blockjob.c b/blockjob.c
index 1da5491..c02fe59 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -29,6 +29,7 @@
 #include "block/block.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
 #include "qemu/coroutine.h"
@@ -354,8 +355,8 @@
         job->user_paused = true;
         block_job_pause(job);
         block_job_iostatus_set_err(job, error);
-        if (bs != job->bs) {
-            bdrv_iostatus_set_err(bs, error);
+        if (bs->blk && bs != job->bs) {
+            blk_iostatus_set_err(bs->blk, error);
         }
     }
     return action;
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 6106e46..c42ddeb 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -283,7 +283,8 @@
 
     /* Get this show started by hooking up our callbacks */
     aio_context_acquire(s->ctx);
-    aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify);
+    aio_set_event_notifier(s->ctx, &s->host_notifier, true,
+                           handle_notify);
     aio_context_release(s->ctx);
     return;
 
@@ -319,7 +320,7 @@
     aio_context_acquire(s->ctx);
 
     /* Stop notifications for new requests from guest */
-    aio_set_event_notifier(s->ctx, &s->host_notifier, NULL);
+    aio_set_event_notifier(s->ctx, &s->host_notifier, true, NULL);
 
     /* Drain and switch bs back to the QEMU main loop */
     blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 6686a72..4292ece 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -192,6 +192,8 @@
     uint8_t ro;               /* Is read-only           */
     uint8_t media_changed;    /* Is media changed       */
     uint8_t media_rate;       /* Data rate of medium    */
+
+    bool media_inserted;      /* Is there a medium in the tray */
 } FDrive;
 
 static void fd_init(FDrive *drv)
@@ -261,7 +263,7 @@
 #endif
         drv->head = head;
         if (drv->track != track) {
-            if (drv->blk != NULL && blk_is_inserted(drv->blk)) {
+            if (drv->media_inserted) {
                 drv->media_changed = 0;
             }
             ret = 1;
@@ -270,7 +272,7 @@
         drv->sect = sect;
     }
 
-    if (drv->blk == NULL || !blk_is_inserted(drv->blk)) {
+    if (!drv->media_inserted) {
         ret = 2;
     }
 
@@ -296,7 +298,7 @@
         ro = blk_is_read_only(drv->blk);
         pick_geometry(drv->blk, &nb_heads, &max_track,
                       &last_sect, drv->drive, &drive, &rate);
-        if (!blk_is_inserted(drv->blk)) {
+        if (!drv->media_inserted) {
             FLOPPY_DPRINTF("No disk in drive\n");
         } else {
             FLOPPY_DPRINTF("Floppy disk (%d h %d t %d s) %s\n", nb_heads,
@@ -692,7 +694,7 @@
 {
     FDrive *drive = opaque;
 
-    return (drive->blk != NULL && drive->media_changed != 1);
+    return (drive->media_inserted && drive->media_changed != 1);
 }
 
 static const VMStateDescription vmstate_fdrive_media_changed = {
@@ -2184,12 +2186,21 @@
 {
     FDrive *drive = opaque;
 
+    drive->media_inserted = load && drive->blk && blk_is_inserted(drive->blk);
+
     drive->media_changed = 1;
     fd_revalidate(drive);
 }
 
+static bool fdctrl_is_tray_open(void *opaque)
+{
+    FDrive *drive = opaque;
+    return !drive->media_inserted;
+}
+
 static const BlockDevOps fdctrl_block_ops = {
     .change_media_cb = fdctrl_change_cb,
+    .is_tray_open = fdctrl_is_tray_open,
 };
 
 /* Init functions */
@@ -2217,6 +2228,7 @@
         fdctrl_change_cb(drive, 0);
         if (drive->blk) {
             blk_set_dev_ops(drive->blk, &fdctrl_block_ops, drive);
+            drive->media_inserted = blk_is_inserted(drive->blk);
         }
     }
 }
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 36d7398..1bbc111 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -931,9 +931,11 @@
     blk_attach_dev_nofail(blkdev->blk, blkdev);
     blkdev->file_size = blk_getlength(blkdev->blk);
     if (blkdev->file_size < 0) {
+        BlockDriverState *bs = blk_bs(blkdev->blk);
+        const char *drv_name = bs ? bdrv_get_format_name(bs) : NULL;
         xen_be_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n",
                       (int)blkdev->file_size, strerror(-blkdev->file_size),
-                      bdrv_get_format_name(blk_bs(blkdev->blk)) ?: "-");
+                      drv_name ?: "-");
         blkdev->file_size = 0;
     }
 
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 1248fd9..0d8d71e 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -60,7 +60,7 @@
     r = g_new(VirtIOSCSIVring, 1);
     r->host_notifier = *virtio_queue_get_host_notifier(vq);
     r->guest_notifier = *virtio_queue_get_guest_notifier(vq);
-    aio_set_event_notifier(s->ctx, &r->host_notifier, handler);
+    aio_set_event_notifier(s->ctx, &r->host_notifier, true, handler);
 
     r->parent = s;
 
@@ -71,7 +71,7 @@
     return r;
 
 fail_vring:
-    aio_set_event_notifier(s->ctx, &r->host_notifier, NULL);
+    aio_set_event_notifier(s->ctx, &r->host_notifier, true, NULL);
     k->set_host_notifier(qbus->parent, n, false);
     g_free(r);
     return NULL;
@@ -162,14 +162,17 @@
     int i;
 
     if (s->ctrl_vring) {
-        aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier, NULL);
+        aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier,
+                               true, NULL);
     }
     if (s->event_vring) {
-        aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier, NULL);
+        aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier,
+                               true, NULL);
     }
     if (s->cmd_vrings) {
         for (i = 0; i < vs->conf.num_queues && s->cmd_vrings[i]; i++) {
-            aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier, NULL);
+            aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier,
+                                   true, NULL);
         }
     }
 }
@@ -290,10 +293,13 @@
 
     aio_context_acquire(s->ctx);
 
-    aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier, NULL);
-    aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier, NULL);
+    aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier,
+                           true, NULL);
+    aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier,
+                           true, NULL);
     for (i = 0; i < vs->conf.num_queues; i++) {
-        aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier, NULL);
+        aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier,
+                               true, NULL);
     }
 
     blk_drain_all(); /* ensure there are no in-flight requests */
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index 9a4e7dc..597d8fd 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -613,20 +613,22 @@
         return;
     }
 
-    bdrv_add_key(blk_bs(blk), NULL, &err);
-    if (err) {
-        if (monitor_cur_is_qmp()) {
-            error_propagate(errp, err);
-            return;
-        }
-        error_free(err);
-        err = NULL;
-        if (cur_mon) {
-            monitor_read_bdrv_key_start(cur_mon, blk_bs(blk),
-                                        usb_msd_password_cb, s);
-            s->dev.auto_attach = 0;
-        } else {
-            autostart = 0;
+    if (blk_bs(blk)) {
+        bdrv_add_key(blk_bs(blk), NULL, &err);
+        if (err) {
+            if (monitor_cur_is_qmp()) {
+                error_propagate(errp, err);
+                return;
+            }
+            error_free(err);
+            err = NULL;
+            if (cur_mon) {
+                monitor_read_bdrv_key_start(cur_mon, blk_bs(blk),
+                                            usb_msd_password_cb, s);
+                s->dev.auto_attach = 0;
+            } else {
+                autostart = 0;
+            }
         }
     }
 
diff --git a/include/block/accounting.h b/include/block/accounting.h
index 4c406cf..66637cd 100644
--- a/include/block/accounting.h
+++ b/include/block/accounting.h
@@ -40,7 +40,6 @@
     uint64_t nr_ops[BLOCK_MAX_IOTYPE];
     uint64_t total_time_ns[BLOCK_MAX_IOTYPE];
     uint64_t merged[BLOCK_MAX_IOTYPE];
-    uint64_t wr_highest_sector;
 } BlockAcctStats;
 
 typedef struct BlockAcctCookie {
@@ -52,8 +51,6 @@
 void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
                       int64_t bytes, enum BlockAcctType type);
 void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie);
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
-                               unsigned int nb_sectors);
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                            int num_requests);
 
diff --git a/include/block/aio.h b/include/block/aio.h
index 400b1b0..bcc7d43 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -122,6 +122,8 @@
 
     /* TimerLists for calling timers - one per clock type */
     QEMUTimerListGroup tlg;
+
+    int external_disable_cnt;
 };
 
 /**
@@ -299,6 +301,7 @@
  */
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
+                        bool is_external,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         void *opaque);
@@ -312,6 +315,7 @@
  */
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *notifier,
+                            bool is_external,
                             EventNotifierHandler *io_read);
 
 /* Return a GSource that lets the main loop poll the file descriptors attached
@@ -373,4 +377,40 @@
  */
 int64_t aio_compute_timeout(AioContext *ctx);
 
+/**
+ * aio_disable_external:
+ * @ctx: the aio context
+ *
+ * Disable the further processing of external clients.
+ */
+static inline void aio_disable_external(AioContext *ctx)
+{
+    atomic_inc(&ctx->external_disable_cnt);
+}
+
+/**
+ * aio_enable_external:
+ * @ctx: the aio context
+ *
+ * Enable the processing of external clients.
+ */
+static inline void aio_enable_external(AioContext *ctx)
+{
+    assert(ctx->external_disable_cnt > 0);
+    atomic_dec(&ctx->external_disable_cnt);
+}
+
+/**
+ * aio_node_check:
+ * @ctx: the aio context
+ * @is_external: Whether or not the checked node is an external event source.
+ *
+ * Check if the node's is_external flag is okay to be polled by the ctx at this
+ * moment. True means green light.
+ */
+static inline bool aio_node_check(AioContext *ctx, bool is_external)
+{
+    return !is_external || !atomic_read(&ctx->external_disable_cnt);
+}
+
 #endif
diff --git a/include/block/block.h b/include/block/block.h
index 84f05ad..610db92 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -174,11 +174,6 @@
     BLOCK_OP_TYPE_MAX,
 } BlockOpType;
 
-void bdrv_iostatus_enable(BlockDriverState *bs);
-void bdrv_iostatus_reset(BlockDriverState *bs);
-void bdrv_iostatus_disable(BlockDriverState *bs);
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
 void bdrv_info_print(Monitor *mon, const QObject *data);
 void bdrv_info(Monitor *mon, QObject **ret_data);
 void bdrv_stats_print(Monitor *mon, const QObject *data);
@@ -389,17 +384,11 @@
 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
                             int64_t sector_num, int nb_sectors, int *pnum);
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
-                       BlockdevOnError on_write_error);
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
-                       bool is_read, int error);
 int bdrv_is_read_only(BlockDriverState *bs);
 int bdrv_is_sg(BlockDriverState *bs);
 int bdrv_enable_write_cache(BlockDriverState *bs);
 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
-int bdrv_is_inserted(BlockDriverState *bs);
+bool bdrv_is_inserted(BlockDriverState *bs);
 int bdrv_media_changed(BlockDriverState *bs);
 void bdrv_lock_medium(BlockDriverState *bs, bool locked);
 void bdrv_eject(BlockDriverState *bs, bool eject_flag);
@@ -466,7 +455,6 @@
 size_t bdrv_min_mem_align(BlockDriverState *bs);
 /* Returns optimal alignment in bytes for bounce buffer */
 size_t bdrv_opt_mem_align(BlockDriverState *bs);
-void bdrv_set_guest_block_size(BlockDriverState *bs, int align);
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
 void *qemu_blockalign0(BlockDriverState *bs, size_t size);
 void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
@@ -622,6 +610,23 @@
 void bdrv_io_unplug(BlockDriverState *bs);
 void bdrv_flush_io_queue(BlockDriverState *bs);
 
-BlockAcctStats *bdrv_get_stats(BlockDriverState *bs);
+/**
+ * bdrv_drained_begin:
+ *
+ * Begin a quiesced section for exclusive access to the BDS, by disabling
+ * external request sources including NBD server and device model. Note that
+ * this doesn't block timers or coroutines from submitting more requests, which
+ * means block_job_pause is still necessary.
+ *
+ * This function can be recursive.
+ */
+void bdrv_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_drained_end:
+ *
+ * End a quiescent section started by bdrv_drained_begin().
+ */
+void bdrv_drained_end(BlockDriverState *bs);
 
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index a480f94..3ceeb5a 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -26,6 +26,7 @@
 
 #include "block/accounting.h"
 #include "block/block.h"
+#include "block/throttle-groups.h"
 #include "qemu/option.h"
 #include "qemu/queue.h"
 #include "qemu/coroutine.h"
@@ -212,7 +213,7 @@
         const char *backing_file, const char *backing_fmt);
 
     /* removable device specific */
-    int (*bdrv_is_inserted)(BlockDriverState *bs);
+    bool (*bdrv_is_inserted)(BlockDriverState *bs);
     int (*bdrv_media_changed)(BlockDriverState *bs);
     void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
     void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
@@ -399,8 +400,8 @@
     unsigned       pending_reqs[2];
     QLIST_ENTRY(BlockDriverState) round_robin;
 
-    /* I/O stats (display with "info blockstats"). */
-    BlockAcctStats stats;
+    /* Offset after the highest byte written to */
+    uint64_t wr_highest_offset;
 
     /* I/O Limits */
     BlockLimits bl;
@@ -411,18 +412,9 @@
     /* Alignment requirement for offset/length of I/O requests */
     unsigned int request_alignment;
 
-    /* the block size for which the guest device expects atomicity */
-    int guest_block_size;
-
     /* do we need to tell the quest if we have a volatile write cache? */
     int enable_write_cache;
 
-    /* NOTE: the following infos are only hints for real hardware
-       drivers. They are not used by the block driver */
-    BlockdevOnError on_read_error, on_write_error;
-    bool iostatus_enabled;
-    BlockDeviceIoStatus iostatus;
-
     /* the following member gives a name to every node on the bs graph. */
     char node_name[32];
     /* element of the list of named nodes building the graph */
@@ -456,6 +448,17 @@
     /* threshold limit for writes, in bytes. "High water mark". */
     uint64_t write_threshold_offset;
     NotifierWithReturn write_threshold_notifier;
+
+    int quiesce_counter;
+};
+
+struct BlockBackendRootState {
+    int open_flags;
+    bool read_only;
+    BlockdevDetectZeroesOptions detect_zeroes;
+
+    char *throttle_group;
+    ThrottleState *throttle_state;
 };
 
 static inline BlockDriverState *backing_bs(BlockDriverState *bs)
diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h
index fab113f..aba28f3 100644
--- a/include/block/throttle-groups.h
+++ b/include/block/throttle-groups.h
@@ -30,6 +30,9 @@
 
 const char *throttle_group_get_name(BlockDriverState *bs);
 
+ThrottleState *throttle_group_incref(const char *name);
+void throttle_group_unref(ThrottleState *ts);
+
 void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg);
 void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg);
 
@@ -40,7 +43,4 @@
                                                         unsigned int bytes,
                                                         bool is_write);
 
-void throttle_group_lock(BlockDriverState *bs);
-void throttle_group_unlock(BlockDriverState *bs);
-
 #endif
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index d4a8f7a..d961362 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -11,6 +11,7 @@
 typedef struct AioContext AioContext;
 typedef struct AudioState AudioState;
 typedef struct BlockBackend BlockBackend;
+typedef struct BlockBackendRootState BlockBackendRootState;
 typedef struct BlockDriverState BlockDriverState;
 typedef struct BusClass BusClass;
 typedef struct BusState BusState;
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 8fc960f..9306a52 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -72,10 +72,16 @@
 BlockBackend *blk_next(BlockBackend *blk);
 
 BlockDriverState *blk_bs(BlockBackend *blk);
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs);
 
 void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk);
 
 void blk_iostatus_enable(BlockBackend *blk);
+bool blk_iostatus_is_enabled(const BlockBackend *blk);
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
+void blk_iostatus_disable(BlockBackend *blk);
+void blk_iostatus_reset(BlockBackend *blk);
+void blk_iostatus_set_err(BlockBackend *blk, int error);
 int blk_attach_dev(BlockBackend *blk, void *dev);
 void blk_attach_dev_nofail(BlockBackend *blk, void *dev);
 void blk_detach_dev(BlockBackend *blk, void *dev);
@@ -120,6 +126,8 @@
 int blk_flush_all(void);
 void blk_drain(BlockBackend *blk);
 void blk_drain_all(void);
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+                      BlockdevOnError on_write_error);
 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
                                       int error);
@@ -130,7 +138,8 @@
 int blk_enable_write_cache(BlockBackend *blk);
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
 void blk_invalidate_cache(BlockBackend *blk, Error **errp);
-int blk_is_inserted(BlockBackend *blk);
+bool blk_is_inserted(BlockBackend *blk);
+bool blk_is_available(BlockBackend *blk);
 void blk_lock_medium(BlockBackend *blk, bool locked);
 void blk_eject(BlockBackend *blk, bool eject_flag);
 int blk_get_flags(BlockBackend *blk);
@@ -155,6 +164,8 @@
 void blk_io_plug(BlockBackend *blk);
 void blk_io_unplug(BlockBackend *blk);
 BlockAcctStats *blk_get_stats(BlockBackend *blk);
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
+void blk_update_root_state(BlockBackend *blk);
 
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
                   BlockCompletionFunc *cb, void *opaque);
diff --git a/iohandler.c b/iohandler.c
index 55f8501..eb68083 100644
--- a/iohandler.c
+++ b/iohandler.c
@@ -55,7 +55,8 @@
                          void *opaque)
 {
     iohandler_init();
-    aio_set_fd_handler(iohandler_ctx, fd, fd_read, fd_write, opaque);
+    aio_set_fd_handler(iohandler_ctx, fd, false,
+                       fd_read, fd_write, opaque);
 }
 
 /* reaping of zombies.  right now we're not passing the status to
diff --git a/migration/block.c b/migration/block.c
index ed865ed..f7bb1e0 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -808,6 +808,11 @@
                 return -EINVAL;
             }
             bs = blk_bs(blk);
+            if (!bs) {
+                fprintf(stderr, "Block device %s has no medium\n",
+                        device_name);
+                return -EINVAL;
+            }
 
             if (bs != bs_prev) {
                 bs_prev = bs;
diff --git a/monitor.c b/monitor.c
index 4f1ba2f..301a143 100644
--- a/monitor.c
+++ b/monitor.c
@@ -4145,6 +4145,10 @@
         monitor_printf(mon, "Device not found %s\n", device);
         return -1;
     }
+    if (!blk_bs(blk)) {
+        monitor_printf(mon, "Device '%s' has no medium\n", device);
+        return -1;
+    }
 
     bdrv_add_key(blk_bs(blk), NULL, &err);
     if (err) {
diff --git a/nbd.c b/nbd.c
index fc34c44..b3d9654 100644
--- a/nbd.c
+++ b/nbd.c
@@ -1446,6 +1446,7 @@
 {
     if (client->exp && client->exp->ctx) {
         aio_set_fd_handler(client->exp->ctx, client->sock,
+                           true,
                            client->can_read ? nbd_read : NULL,
                            client->send_coroutine ? nbd_restart_write : NULL,
                            client);
@@ -1455,7 +1456,8 @@
 static void nbd_unset_handlers(NBDClient *client)
 {
     if (client->exp && client->exp->ctx) {
-        aio_set_fd_handler(client->exp->ctx, client->sock, NULL, NULL, NULL);
+        aio_set_fd_handler(client->exp->ctx, client->sock,
+                           true, NULL, NULL, NULL);
     }
 }
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index bb2189e..425fdab 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -215,10 +215,11 @@
 # @drv: the name of the block format used to open the backing device. As of
 #       0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg',
 #       'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
-#       'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
+#       'http', 'https', 'nbd', 'parallels', 'qcow',
 #       'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
 #       2.2: 'archipelago' added, 'cow' dropped
 #       2.3: 'host_floppy' deprecated
+#       2.5: 'host_floppy' dropped
 #
 # @backing_file: #optional the name of the backing file (for copy-on-write)
 #
@@ -1373,15 +1374,14 @@
 #
 # Drivers that are supported in block device operations.
 #
-# @host_device, @host_cdrom, @host_floppy: Since 2.1
-# @host_floppy: deprecated since 2.3
+# @host_device, @host_cdrom: Since 2.1
 #
 # Since: 2.0
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
             'dmg', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
-            'host_floppy', 'http', 'https', 'null-aio', 'null-co', 'parallels',
+            'http', 'https', 'null-aio', 'null-co', 'parallels',
             'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp', 'vdi', 'vhdx',
             'vmdk', 'vpc', 'vvfat' ] }
 
@@ -1393,9 +1393,12 @@
 #
 # @driver:        block driver name
 # @id:            #optional id by which the new block device can be referred to.
-#                 This is a required option on the top level of blockdev-add, and
-#                 currently not allowed on any other level.
-# @node-name:     #optional the name of a block driver state node (Since 2.0)
+#                 This option is only allowed on the top level of blockdev-add.
+#                 A BlockBackend will be created by blockdev-add if and only if
+#                 this option is given.
+# @node-name:     #optional the name of a block driver state node (Since 2.0).
+#                 This option is required on the top level of blockdev-add if
+#                 the @id option is not given there.
 # @discard:       #optional discard-related options (default: ignore)
 # @cache:         #optional cache-related options
 # @aio:           #optional AIO backend (default: threads)
@@ -1816,7 +1819,6 @@
 # TODO gluster: Wait for structured options
       'host_cdrom': 'BlockdevOptionsFile',
       'host_device':'BlockdevOptionsFile',
-      'host_floppy':'BlockdevOptionsFile',
       'http':       'BlockdevOptionsFile',
       'https':      'BlockdevOptionsFile',
 # TODO iscsi: Wait for structured options
@@ -1860,7 +1862,9 @@
 ##
 # @blockdev-add:
 #
-# Creates a new block device.
+# Creates a new block device. If the @id option is given at the top level, a
+# BlockBackend will be created; otherwise, @node-name is mandatory at the top
+# level and no BlockBackend will be created.
 #
 # This command is still a work in progress.  It doesn't support all
 # block drivers, it lacks a matching blockdev-del, and more.  Stay
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 2b52980..d7cf0ff 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2520,8 +2520,8 @@
     - "wr_total_time_ns": total time spend on writes in nano-seconds (json-int)
     - "rd_total_time_ns": total time spend on reads in nano-seconds (json-int)
     - "flush_total_time_ns": total time spend on cache flushes in nano-seconds (json-int)
-    - "wr_highest_offset": Highest offset of a sector written since the
-                           BlockDriverState has been opened (json-int)
+    - "wr_highest_offset": The offset after the greatest byte written to the
+                           BlockDriverState since it has been opened (json-int)
     - "rd_merged": number of read requests that have been merged into
                    another request (json-int)
     - "wr_merged": number of write requests that have been merged into
diff --git a/qmp.c b/qmp.c
index d9ecede..ff54e5a 100644
--- a/qmp.c
+++ b/qmp.c
@@ -24,6 +24,7 @@
 #include "sysemu/arch_init.h"
 #include "hw/qdev.h"
 #include "sysemu/blockdev.h"
+#include "sysemu/block-backend.h"
 #include "qom/qom-qobject.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qobject.h"
@@ -170,6 +171,7 @@
 void qmp_cont(Error **errp)
 {
     Error *local_err = NULL;
+    BlockBackend *blk;
     BlockDriverState *bs;
 
     if (runstate_needs_reset()) {
@@ -179,8 +181,8 @@
         return;
     }
 
-    for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
-        bdrv_iostatus_reset(bs);
+    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
+        blk_iostatus_reset(blk);
     }
     for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
         bdrv_add_key(bs, NULL, &local_err);
diff --git a/tests/fdc-test.c b/tests/fdc-test.c
index 416394f..b5a4696 100644
--- a/tests/fdc-test.c
+++ b/tests/fdc-test.c
@@ -304,9 +304,7 @@
     qmp_discard_response("{'execute':'change', 'arguments':{"
                          " 'device':'floppy0', 'target': %s, 'arg': 'raw' }}",
                          test_image);
-    qmp_discard_response(""); /* ignore event
-                                 (FIXME open -> open transition?!) */
-    qmp_discard_response(""); /* ignore event */
+    qmp_discard_response(""); /* ignore event (open -> close) */
 
     dir = inb(FLOPPY_BASE + reg_dir);
     assert_bit_set(dir, DSKCHG);
diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071
index 9eaa49b..92ab991 100755
--- a/tests/qemu-iotests/071
+++ b/tests/qemu-iotests/071
@@ -104,11 +104,20 @@
 echo "=== Testing blkdebug on existing block device ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive0",
+            "driver": "file",
+            "filename": "$TEST_IMG"
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "$IMGFMT",
             "id": "drive0-debug",
             "file": {
@@ -133,20 +142,29 @@
 echo "=== Testing blkverify on existing block device ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG,format=$IMGFMT,if=none,id=drive0" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive0",
+            "driver": "$IMGFMT",
+            "file": {
+                "driver": "file",
+                "filename": "$TEST_IMG"
+            }
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "blkverify",
             "id": "drive0-verify",
             "test": "drive0",
             "raw": {
-                "driver": "raw",
-                "file": {
-                    "driver": "file",
-                    "filename": "$TEST_IMG.base"
-                }
+                "driver": "file",
+                "filename": "$TEST_IMG.base"
             }
         }
     }
@@ -163,11 +181,20 @@
 echo "=== Testing blkverify on existing raw block device ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG.base,format=raw,if=none,id=drive0" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive0",
+            "driver": "file",
+            "filename": "$TEST_IMG.base"
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "blkverify",
             "id": "drive0-verify",
             "test": {
@@ -193,11 +220,20 @@
 echo "=== Testing blkdebug's set-state through QMP ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive0",
+            "driver": "file",
+            "filename": "$TEST_IMG"
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "$IMGFMT",
             "id": "drive0-debug",
             "file": {
diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out
index 8d2095a..2b40ead 100644
--- a/tests/qemu-iotests/071.out
+++ b/tests/qemu-iotests/071.out
@@ -42,10 +42,11 @@
 
 === Testing blkdebug on existing block device ===
 
-Testing: -drive file=TEST_DIR/t.IMGFMT,format=raw,if=none,id=drive0
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
+{"return": {}}
 read failed: Input/output error
 {"return": ""}
 {"return": {}}
@@ -56,28 +57,31 @@
 
 === Testing blkverify on existing block device ===
 
-Testing: -drive file=TEST_DIR/t.IMGFMT,format=IMGFMT,if=none,id=drive0
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
+{"return": {}}
 blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
 
 
 === Testing blkverify on existing raw block device ===
 
-Testing: -drive file=TEST_DIR/t.IMGFMT.base,format=raw,if=none,id=drive0
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
+{"return": {}}
 blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
 
 
 === Testing blkdebug's set-state through QMP ===
 
-Testing: -drive file=TEST_DIR/t.IMGFMT,format=raw,if=none,id=drive0
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
+{"return": {}}
 read 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 {"return": ""}
diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index 51873ff..e4b4c6c 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081
@@ -102,17 +102,29 @@
 echo
 echo "== checking mixed reference/option specification =="
 
-run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive2",
+            "driver": "$IMGFMT",
+            "file": {
+                "driver": "file",
+                "filename": "$TEST_DIR/2.raw"
+            }
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "quorum",
             "id": "drive0-quorum",
             "vote-threshold": 2,
             "children": [
                 {
-                    "driver": "raw",
+                    "driver": "$IMGFMT",
                     "file": {
                         "driver": "file",
                         "filename": "$TEST_DIR/1.raw"
@@ -120,7 +132,7 @@
                 },
                 "drive2",
                 {
-                    "driver": "raw",
+                    "driver": "$IMGFMT",
                     "file": {
                         "driver": "file",
                         "filename": "$TEST_DIR/3.raw"
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 044793d..7063231 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -26,11 +26,12 @@
 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
 == checking mixed reference/option specification ==
-Testing: -drive file=TEST_DIR/2.IMGFMT,format=IMGFMT,if=none,id=drive2
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "NODE_NAME", "sectors-count": 20480, "sector-num": 0}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "drive2", "sectors-count": 20480, "sector-num": 0}}
 read 10485760/10485760 bytes at offset 0
 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 {"return": ""}
diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087
index 8694749..af44299 100755
--- a/tests/qemu-iotests/087
+++ b/tests/qemu-iotests/087
@@ -54,7 +54,7 @@
 _make_test_img $size
 
 echo
-echo === Missing ID ===
+echo === Missing ID and node-name ===
 echo
 
 run_qemu <<EOF
diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out
index c509a40..7d62cd5 100644
--- a/tests/qemu-iotests/087.out
+++ b/tests/qemu-iotests/087.out
@@ -1,12 +1,12 @@
 QA output created by 087
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 
-=== Missing ID ===
+=== Missing ID and node-name ===
 
 Testing:
 QMP_VERSION
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Block device needs an ID"}}
+{"error": {"class": "GenericError", "desc": "'id' and/or 'node-name' need to be specified for the root node"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 
diff --git a/tests/test-aio.c b/tests/test-aio.c
index 217e337..1623803 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -118,6 +118,12 @@
     return NULL;
 }
 
+static void set_event_notifier(AioContext *ctx, EventNotifier *notifier,
+                               EventNotifierHandler *handler)
+{
+    aio_set_event_notifier(ctx, notifier, false, handler);
+}
+
 static void dummy_notifier_read(EventNotifier *unused)
 {
     g_assert(false); /* should never be invoked */
@@ -131,7 +137,7 @@
 
     /* Dummy event notifier ensures aio_poll() will block */
     event_notifier_init(&notifier, false);
-    aio_set_event_notifier(ctx, &notifier, dummy_notifier_read);
+    set_event_notifier(ctx, &notifier, dummy_notifier_read);
     g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */
 
     qemu_mutex_init(&data.start_lock);
@@ -149,7 +155,7 @@
     aio_context_release(ctx);
 
     qemu_thread_join(&thread);
-    aio_set_event_notifier(ctx, &notifier, NULL);
+    set_event_notifier(ctx, &notifier, NULL);
     event_notifier_cleanup(&notifier);
 
     g_assert(data.thread_acquired);
@@ -308,11 +314,11 @@
 {
     EventNotifierTestData data = { .n = 0, .active = 0 };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
     event_notifier_cleanup(&data.e);
@@ -322,7 +328,7 @@
 {
     EventNotifierTestData data = { .n = 0, .active = 1 };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
     g_assert_cmpint(data.active, ==, 1);
@@ -336,7 +342,7 @@
     g_assert_cmpint(data.n, ==, 1);
     g_assert_cmpint(data.active, ==, 0);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 1);
 
@@ -347,7 +353,7 @@
 {
     EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
     g_assert_cmpint(data.active, ==, 10);
@@ -363,18 +369,41 @@
     g_assert_cmpint(data.active, ==, 0);
     g_assert(!aio_poll(ctx, false));
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     g_assert(!aio_poll(ctx, false));
     event_notifier_cleanup(&data.e);
 }
 
+static void test_aio_external_client(void)
+{
+    int i, j;
+
+    for (i = 1; i < 3; i++) {
+        EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true };
+        event_notifier_init(&data.e, false);
+        aio_set_event_notifier(ctx, &data.e, true, event_ready_cb);
+        event_notifier_set(&data.e);
+        for (j = 0; j < i; j++) {
+            aio_disable_external(ctx);
+        }
+        for (j = 0; j < i; j++) {
+            assert(!aio_poll(ctx, false));
+            assert(event_notifier_test_and_clear(&data.e));
+            event_notifier_set(&data.e);
+            aio_enable_external(ctx);
+        }
+        assert(aio_poll(ctx, false));
+        event_notifier_cleanup(&data.e);
+    }
+}
+
 static void test_wait_event_notifier_noflush(void)
 {
     EventNotifierTestData data = { .n = 0 };
     EventNotifierTestData dummy = { .n = 0, .active = 1 };
 
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
 
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
@@ -387,7 +416,7 @@
 
     /* An active event notifier forces aio_poll to look at EventNotifiers.  */
     event_notifier_init(&dummy.e, false);
-    aio_set_event_notifier(ctx, &dummy.e, event_ready_cb);
+    set_event_notifier(ctx, &dummy.e, event_ready_cb);
 
     event_notifier_set(&data.e);
     g_assert(aio_poll(ctx, false));
@@ -407,10 +436,10 @@
     g_assert_cmpint(dummy.n, ==, 1);
     g_assert_cmpint(dummy.active, ==, 0);
 
-    aio_set_event_notifier(ctx, &dummy.e, NULL);
+    set_event_notifier(ctx, &dummy.e, NULL);
     event_notifier_cleanup(&dummy.e);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 2);
 
@@ -428,7 +457,7 @@
      * an fd to wait on. Fixing this breaks other tests. So create a dummy one.
      */
     event_notifier_init(&e, false);
-    aio_set_event_notifier(ctx, &e, dummy_io_handler_read);
+    set_event_notifier(ctx, &e, dummy_io_handler_read);
     aio_poll(ctx, false);
 
     aio_timer_init(ctx, &data.timer, data.clock_type,
@@ -467,7 +496,7 @@
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 2);
 
-    aio_set_event_notifier(ctx, &e, NULL);
+    set_event_notifier(ctx, &e, NULL);
     event_notifier_cleanup(&e);
 
     timer_del(&data.timer);
@@ -638,11 +667,11 @@
 {
     EventNotifierTestData data = { .n = 0, .active = 0 };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
     event_notifier_cleanup(&data.e);
@@ -652,7 +681,7 @@
 {
     EventNotifierTestData data = { .n = 0, .active = 1 };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
     g_assert_cmpint(data.active, ==, 1);
@@ -666,7 +695,7 @@
     g_assert_cmpint(data.n, ==, 1);
     g_assert_cmpint(data.active, ==, 0);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 1);
 
@@ -677,7 +706,7 @@
 {
     EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
     g_assert_cmpint(data.active, ==, 10);
@@ -693,7 +722,7 @@
     g_assert_cmpint(data.active, ==, 0);
     g_assert(!g_main_context_iteration(NULL, false));
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     while (g_main_context_iteration(NULL, false));
     event_notifier_cleanup(&data.e);
 }
@@ -704,7 +733,7 @@
     EventNotifierTestData dummy = { .n = 0, .active = 1 };
 
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
 
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
@@ -717,7 +746,7 @@
 
     /* An active event notifier forces aio_poll to look at EventNotifiers.  */
     event_notifier_init(&dummy.e, false);
-    aio_set_event_notifier(ctx, &dummy.e, event_ready_cb);
+    set_event_notifier(ctx, &dummy.e, event_ready_cb);
 
     event_notifier_set(&data.e);
     g_assert(g_main_context_iteration(NULL, false));
@@ -737,10 +766,10 @@
     g_assert_cmpint(dummy.n, ==, 1);
     g_assert_cmpint(dummy.active, ==, 0);
 
-    aio_set_event_notifier(ctx, &dummy.e, NULL);
+    set_event_notifier(ctx, &dummy.e, NULL);
     event_notifier_cleanup(&dummy.e);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 2);
 
@@ -759,7 +788,7 @@
      * an fd to wait on. Fixing this breaks other tests. So create a dummy one.
      */
     event_notifier_init(&e, false);
-    aio_set_event_notifier(ctx, &e, dummy_io_handler_read);
+    set_event_notifier(ctx, &e, dummy_io_handler_read);
     do {} while (g_main_context_iteration(NULL, false));
 
     aio_timer_init(ctx, &data.timer, data.clock_type,
@@ -784,7 +813,7 @@
     g_assert_cmpint(data.n, ==, 2);
     g_assert(qemu_clock_get_ns(data.clock_type) > expiry);
 
-    aio_set_event_notifier(ctx, &e, NULL);
+    set_event_notifier(ctx, &e, NULL);
     event_notifier_cleanup(&e);
 
     timer_del(&data.timer);
@@ -826,6 +855,7 @@
     g_test_add_func("/aio/event/wait",              test_wait_event_notifier);
     g_test_add_func("/aio/event/wait/no-flush-cb",  test_wait_event_notifier_noflush);
     g_test_add_func("/aio/event/flush",             test_flush_event_notifier);
+    g_test_add_func("/aio/external-client",         test_aio_external_client);
     g_test_add_func("/aio/timer/schedule",          test_timer_schedule);
 
     g_test_add_func("/aio-gsource/flush",                   test_source_flush);