Merge remote-tracking branch 'remotes/rth/tags/pull-tile-20151030' into staging Prefetch in y2 pipe # gpg: Signature made Fri 30 Oct 2015 20:40:02 GMT using RSA key ID 4DD0279B # gpg: Good signature from "Richard Henderson <rth7680@gmail.com>" # gpg: aka "Richard Henderson <rth@redhat.com>" # gpg: aka "Richard Henderson <rth@twiddle.net>" * remotes/rth/tags/pull-tile-20151030: target-tilegx: Implement prefetch instructions in pipe y2 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

diff --git a/Makefile b/Makefile
index cbf252d..fc1f8bb 100644
--- a/Makefile
+++ b/Makefile

@@ -151,6 +151,8 @@
                 stub-obj-y \
                 util-obj-y \
                 qga-obj-y \
+                ivshmem-client-obj-y \
+                ivshmem-server-obj-y \
                 qga-vss-dll-obj-y \
                 block-obj-y \
                 block-obj-m \
@@ -323,6 +325,11 @@
 qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
 endif
 
+ivshmem-client$(EXESUF): $(ivshmem-client-obj-y)
+	$(call LINK, $^)
+ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
+	$(call LINK, $^)
+
 clean:
 # avoid old build problems by removing potentially incorrect old files
 	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h

diff --git a/Makefile.objs b/Makefile.objs
index ecfe03c..fe02ee2 100644
--- a/Makefile.objs
+++ b/Makefile.objs

@@ -104,3 +104,8 @@
 # by libqemuutil.a.  These should be moved to a separate .json schema.
 qga-obj-y = qga/
 qga-vss-dll-obj-y = qga/
+
+######################################################################
+# contrib
+ivshmem-client-obj-y = contrib/ivshmem-client/
+ivshmem-server-obj-y = contrib/ivshmem-server/

diff --git a/aio-posix.c b/aio-posix.c
index d477033..0467f23 100644
--- a/aio-posix.c
+++ b/aio-posix.c

@@ -25,6 +25,7 @@
     IOHandler *io_write;
     int deleted;
     void *opaque;
+    bool is_external;
     QLIST_ENTRY(AioHandler) node;
 };
 
@@ -43,6 +44,7 @@
 
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
+                        bool is_external,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         void *opaque)
@@ -82,6 +84,7 @@
         node->io_read = io_read;
         node->io_write = io_write;
         node->opaque = opaque;
+        node->is_external = is_external;
 
         node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
         node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
@@ -92,10 +95,11 @@
 
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *notifier,
+                            bool is_external,
                             EventNotifierHandler *io_read)
 {
     aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
-                       (IOHandler *)io_read, NULL, notifier);
+                       is_external, (IOHandler *)io_read, NULL, notifier);
 }
 
 bool aio_prepare(AioContext *ctx)
@@ -257,7 +261,8 @@
 
     /* fill pollfds */
     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->pfd.events) {
+        if (!node->deleted && node->pfd.events
+            && aio_node_check(ctx, node->is_external)) {
             add_pollfd(node);
         }
     }

diff --git a/aio-win32.c b/aio-win32.c
index 50a6867..43c4c79 100644
--- a/aio-win32.c
+++ b/aio-win32.c

@@ -28,11 +28,13 @@
     GPollFD pfd;
     int deleted;
     void *opaque;
+    bool is_external;
     QLIST_ENTRY(AioHandler) node;
 };
 
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
+                        bool is_external,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         void *opaque)
@@ -86,6 +88,7 @@
         node->opaque = opaque;
         node->io_read = io_read;
         node->io_write = io_write;
+        node->is_external = is_external;
 
         event = event_notifier_get_handle(&ctx->notifier);
         WSAEventSelect(node->pfd.fd, event,
@@ -98,6 +101,7 @@
 
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *e,
+                            bool is_external,
                             EventNotifierHandler *io_notify)
 {
     AioHandler *node;
@@ -133,6 +137,7 @@
             node->e = e;
             node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
             node->pfd.events = G_IO_IN;
+            node->is_external = is_external;
             QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
 
             g_source_add_poll(&ctx->source, &node->pfd);
@@ -304,7 +309,8 @@
     /* fill fd sets */
     count = 0;
     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->io_notify) {
+        if (!node->deleted && node->io_notify
+            && aio_node_check(ctx, node->is_external)) {
             events[count++] = event_notifier_get_handle(node->e);
         }
     }

diff --git a/async.c b/async.c
index efce14b..bdc64a3 100644
--- a/async.c
+++ b/async.c

@@ -247,7 +247,7 @@
     }
     qemu_mutex_unlock(&ctx->bh_lock);
 
-    aio_set_event_notifier(ctx, &ctx->notifier, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL);
     event_notifier_cleanup(&ctx->notifier);
     rfifolock_destroy(&ctx->lock);
     qemu_mutex_destroy(&ctx->bh_lock);
@@ -329,6 +329,7 @@
     }
     g_source_set_can_recurse(&ctx->source, true);
     aio_set_event_notifier(ctx, &ctx->notifier,
+                           false,
                            (EventNotifierHandler *)
                            event_notifier_dummy_cb);
     ctx->thread_pool = NULL;

diff --git a/block.c b/block.c
index 6771c3a..e9f40dc 100644
--- a/block.c
+++ b/block.c

@@ -257,7 +257,6 @@
     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
         QLIST_INIT(&bs->op_blockers[i]);
     }
-    bdrv_iostatus_disable(bs);
     notifier_list_init(&bs->close_notifiers);
     notifier_with_return_list_init(&bs->before_write_notifiers);
     qemu_co_queue_init(&bs->throttled_reqs[0]);
@@ -857,7 +856,6 @@
         goto fail_opts;
     }
 
-    bs->guest_block_size = 512;
     bs->request_alignment = 512;
     bs->zero_beyond_eof = true;
     open_flags = bdrv_open_flags(bs, flags);
@@ -1081,6 +1079,10 @@
         }
     }
 
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        *flags |= BDRV_O_INCOMING;
+    }
+
     return 0;
 }
 
@@ -1908,6 +1910,10 @@
     bdrv_drain(bs); /* in case flush left pending I/O */
     notifier_list_notify(&bs->close_notifiers, bs);
 
+    if (bs->blk) {
+        blk_dev_change_media_cb(bs->blk, false);
+    }
+
     if (bs->drv) {
         BdrvChild *child, *next;
 
@@ -1946,10 +1952,6 @@
         bs->full_open_options = NULL;
     }
 
-    if (bs->blk) {
-        blk_dev_change_media_cb(bs->blk, false);
-    }
-
     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
         g_free(ban);
     }
@@ -1998,19 +2000,10 @@
     /* move some fields that need to stay attached to the device */
 
     /* dev info */
-    bs_dest->guest_block_size   = bs_src->guest_block_size;
     bs_dest->copy_on_read       = bs_src->copy_on_read;
 
     bs_dest->enable_write_cache = bs_src->enable_write_cache;
 
-    /* r/w error */
-    bs_dest->on_read_error      = bs_src->on_read_error;
-    bs_dest->on_write_error     = bs_src->on_write_error;
-
-    /* i/o status */
-    bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
-    bs_dest->iostatus           = bs_src->iostatus;
-
     /* dirty bitmap */
     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
 }
@@ -2497,82 +2490,6 @@
     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
 }
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
-                       BlockdevOnError on_write_error)
-{
-    bs->on_read_error = on_read_error;
-    bs->on_write_error = on_write_error;
-}
-
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
-{
-    return is_read ? bs->on_read_error : bs->on_write_error;
-}
-
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
-{
-    BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
-
-    switch (on_err) {
-    case BLOCKDEV_ON_ERROR_ENOSPC:
-        return (error == ENOSPC) ?
-               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
-    case BLOCKDEV_ON_ERROR_STOP:
-        return BLOCK_ERROR_ACTION_STOP;
-    case BLOCKDEV_ON_ERROR_REPORT:
-        return BLOCK_ERROR_ACTION_REPORT;
-    case BLOCKDEV_ON_ERROR_IGNORE:
-        return BLOCK_ERROR_ACTION_IGNORE;
-    default:
-        abort();
-    }
-}
-
-static void send_qmp_error_event(BlockDriverState *bs,
-                                 BlockErrorAction action,
-                                 bool is_read, int error)
-{
-    IoOperationType optype;
-
-    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
-    qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
-                                   bdrv_iostatus_is_enabled(bs),
-                                   error == ENOSPC, strerror(error),
-                                   &error_abort);
-}
-
-/* This is done by device models because, while the block layer knows
- * about the error, it does not know whether an operation comes from
- * the device or the block layer (from a job, for example).
- */
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
-                       bool is_read, int error)
-{
-    assert(error >= 0);
-
-    if (action == BLOCK_ERROR_ACTION_STOP) {
-        /* First set the iostatus, so that "info block" returns an iostatus
-         * that matches the events raised so far (an additional error iostatus
-         * is fine, but not a lost one).
-         */
-        bdrv_iostatus_set_err(bs, error);
-
-        /* Then raise the request to stop the VM and the event.
-         * qemu_system_vmstop_request_prepare has two effects.  First,
-         * it ensures that the STOP event always comes after the
-         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
-         * can observe the STOP event and do a "cont" before the STOP
-         * event is issued, the VM will not stop.  In this case, vm_start()
-         * also ensures that the STOP/RESUME pair of events is emitted.
-         */
-        qemu_system_vmstop_request_prepare();
-        send_qmp_error_event(bs, action, is_read, error);
-        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
-    } else {
-        send_qmp_error_event(bs, action, is_read, error);
-    }
-}
-
 int bdrv_is_read_only(BlockDriverState *bs)
 {
     return bs->read_only;
@@ -2766,6 +2683,11 @@
         blk = blk_by_name(device);
 
         if (blk) {
+            if (!blk_bs(blk)) {
+                error_setg(errp, "Device '%s' has no medium", device);
+                return NULL;
+            }
+
             return blk_bs(blk);
         }
     }
@@ -3136,15 +3058,23 @@
 /**
  * Return TRUE if the media is present
  */
-int bdrv_is_inserted(BlockDriverState *bs)
+bool bdrv_is_inserted(BlockDriverState *bs)
 {
     BlockDriver *drv = bs->drv;
+    BdrvChild *child;
 
-    if (!drv)
-        return 0;
-    if (!drv->bdrv_is_inserted)
-        return 1;
-    return drv->bdrv_is_inserted(bs);
+    if (!drv) {
+        return false;
+    }
+    if (drv->bdrv_is_inserted) {
+        return drv->bdrv_is_inserted(bs);
+    }
+    QLIST_FOREACH(child, &bs->children, next) {
+        if (!bdrv_is_inserted(child->bs)) {
+            return false;
+        }
+    }
+    return true;
 }
 
 /**
@@ -3195,11 +3125,6 @@
     }
 }
 
-void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
-{
-    bs->guest_block_size = align;
-}
-
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
 {
     BdrvDirtyBitmap *bm;
@@ -3597,46 +3522,6 @@
     return true;
 }
 
-void bdrv_iostatus_enable(BlockDriverState *bs)
-{
-    bs->iostatus_enabled = true;
-    bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-}
-
-/* The I/O status is only enabled if the drive explicitly
- * enables it _and_ the VM is configured to stop on errors */
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
-{
-    return (bs->iostatus_enabled &&
-           (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
-            bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
-            bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
-}
-
-void bdrv_iostatus_disable(BlockDriverState *bs)
-{
-    bs->iostatus_enabled = false;
-}
-
-void bdrv_iostatus_reset(BlockDriverState *bs)
-{
-    if (bdrv_iostatus_is_enabled(bs)) {
-        bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-        if (bs->job) {
-            block_job_iostatus_reset(bs->job);
-        }
-    }
-}
-
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
-{
-    assert(bdrv_iostatus_is_enabled(bs));
-    if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-        bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
-                                         BLOCK_DEVICE_IO_STATUS_FAILED;
-    }
-}
-
 void bdrv_img_create(const char *filename, const char *fmt,
                      const char *base_filename, const char *base_fmt,
                      char *options, uint64_t img_size, int flags,
@@ -4148,14 +4033,3 @@
         QDECREF(json);
     }
 }
-
-/* This accessor function purpose is to allow the device models to access the
- * BlockAcctStats structure embedded inside a BlockDriverState without being
- * aware of the BlockDriverState structure layout.
- * It will go away when the BlockAcctStats structure will be moved inside
- * the device models.
- */
-BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
-{
-    return &bs->stats;
-}

diff --git a/block/accounting.c b/block/accounting.c
index 01d594f..a423560 100644
--- a/block/accounting.c
+++ b/block/accounting.c

@@ -47,14 +47,6 @@
 }
 
 
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
-                               unsigned int nb_sectors)
-{
-    if (stats->wr_highest_sector < sector_num + nb_sectors - 1) {
-        stats->wr_highest_sector = sector_num + nb_sectors - 1;
-    }
-}
-
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                       int num_requests)
 {

diff --git a/block/backup.c b/block/backup.c
index 5696431..ec01db8 100644
--- a/block/backup.c
+++ b/block/backup.c

@@ -21,6 +21,7 @@
 #include "block/blockjob.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
 
 #define BACKUP_CLUSTER_BITS 16
 #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
@@ -215,7 +216,9 @@
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common);
 
-    bdrv_iostatus_reset(s->target);
+    if (s->target->blk) {
+        blk_iostatus_reset(s->target->blk);
+    }
 }
 
 static const BlockJobDriver backup_job_driver = {
@@ -360,8 +363,10 @@
     job->bitmap = hbitmap_alloc(end, 0);
 
     bdrv_set_enable_write_cache(target, true);
-    bdrv_set_on_error(target, on_target_error, on_target_error);
-    bdrv_iostatus_enable(target);
+    if (target->blk) {
+        blk_set_on_error(target->blk, on_target_error, on_target_error);
+        blk_iostatus_enable(target->blk);
+    }
 
     bdrv_add_before_write_notifier(bs, &before_write);
 
@@ -451,7 +456,9 @@
     }
     hbitmap_free(job->bitmap);
 
-    bdrv_iostatus_disable(target);
+    if (target->blk) {
+        blk_iostatus_disable(target->blk);
+    }
     bdrv_op_unblock_all(target, job->common.blocker);
 
     data = g_malloc(sizeof(*data));
@@ -480,7 +487,7 @@
 
     if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
          on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
         error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
         return;
     }

diff --git a/block/block-backend.c b/block/block-backend.c
index 2256551..19fdaae 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c

@@ -12,12 +12,17 @@
 
 #include "sysemu/block-backend.h"
 #include "block/block_int.h"
+#include "block/blockjob.h"
+#include "block/throttle-groups.h"
 #include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
 #include "qapi-event.h"
 
 /* Number of coroutines to reserve per attached device model */
 #define COROUTINE_POOL_RESERVATION 64
 
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
+
 struct BlockBackend {
     char *name;
     int refcnt;
@@ -29,15 +34,31 @@
     /* TODO change to DeviceState when all users are qdevified */
     const BlockDevOps *dev_ops;
     void *dev_opaque;
+
+    /* the block size for which the guest device expects atomicity */
+    int guest_block_size;
+
+    /* If the BDS tree is removed, some of its options are stored here (which
+     * can be used to restore those options in the new BDS on insert) */
+    BlockBackendRootState root_state;
+
+    /* I/O stats (display with "info blockstats"). */
+    BlockAcctStats stats;
+
+    BlockdevOnError on_read_error, on_write_error;
+    bool iostatus_enabled;
+    BlockDeviceIoStatus iostatus;
 };
 
 typedef struct BlockBackendAIOCB {
     BlockAIOCB common;
     QEMUBH *bh;
+    BlockBackend *blk;
     int ret;
 } BlockBackendAIOCB;
 
 static const AIOCBInfo block_backend_aiocb_info = {
+    .get_aio_context = blk_aiocb_get_aio_context,
     .aiocb_size = sizeof(BlockBackendAIOCB),
 };
 
@@ -145,6 +166,10 @@
         bdrv_unref(blk->bs);
         blk->bs = NULL;
     }
+    if (blk->root_state.throttle_state) {
+        g_free(blk->root_state.throttle_group);
+        throttle_group_unref(blk->root_state.throttle_state);
+    }
     /* Avoid double-remove after blk_hide_on_behalf_of_hmp_drive_del() */
     if (blk->name[0]) {
         QTAILQ_REMOVE(&blk_backends, blk, link);
@@ -309,6 +334,17 @@
 }
 
 /*
+ * Associates a new BlockDriverState with @blk.
+ */
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+    assert(!blk->bs && !bs->blk);
+    bdrv_ref(bs);
+    blk->bs = bs;
+    bs->blk = blk;
+}
+
+/*
  * Attach device model @dev to @blk.
  * Return 0 on success, -EBUSY when a device model is attached already.
  */
@@ -320,7 +356,7 @@
     }
     blk_ref(blk);
     blk->dev = dev;
-    bdrv_iostatus_reset(blk->bs);
+    blk_iostatus_reset(blk);
     return 0;
 }
 
@@ -347,7 +383,7 @@
     blk->dev = NULL;
     blk->dev_ops = NULL;
     blk->dev_opaque = NULL;
-    bdrv_set_guest_block_size(blk->bs, 512);
+    blk->guest_block_size = 512;
     blk_unref(blk);
 }
 
@@ -452,7 +488,47 @@
 
 void blk_iostatus_enable(BlockBackend *blk)
 {
-    bdrv_iostatus_enable(blk->bs);
+    blk->iostatus_enabled = true;
+    blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+}
+
+/* The I/O status is only enabled if the drive explicitly
+ * enables it _and_ the VM is configured to stop on errors */
+bool blk_iostatus_is_enabled(const BlockBackend *blk)
+{
+    return (blk->iostatus_enabled &&
+           (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+            blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
+            blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
+}
+
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
+{
+    return blk->iostatus;
+}
+
+void blk_iostatus_disable(BlockBackend *blk)
+{
+    blk->iostatus_enabled = false;
+}
+
+void blk_iostatus_reset(BlockBackend *blk)
+{
+    if (blk_iostatus_is_enabled(blk)) {
+        blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+        if (blk->bs && blk->bs->job) {
+            block_job_iostatus_reset(blk->bs->job);
+        }
+    }
+}
+
+void blk_iostatus_set_err(BlockBackend *blk, int error)
+{
+    assert(blk_iostatus_is_enabled(blk));
+    if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+        blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+                                          BLOCK_DEVICE_IO_STATUS_FAILED;
+    }
 }
 
 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
@@ -464,7 +540,7 @@
         return -EIO;
     }
 
-    if (!blk_is_inserted(blk)) {
+    if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -558,6 +634,7 @@
     QEMUBH *bh;
 
     acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
+    acb->blk = blk;
     acb->ret = ret;
 
     bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
@@ -602,16 +679,28 @@
 
 int64_t blk_getlength(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_getlength(blk->bs);
 }
 
 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
 {
-    bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+    if (!blk->bs) {
+        *nb_sectors_ptr = 0;
+    } else {
+        bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+    }
 }
 
 int64_t blk_nb_sectors(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_nb_sectors(blk->bs);
 }
 
@@ -642,6 +731,10 @@
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
                           BlockCompletionFunc *cb, void *opaque)
 {
+    if (!blk_is_available(blk)) {
+        return abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+    }
+
     return bdrv_aio_flush(blk->bs, cb, opaque);
 }
 
@@ -683,12 +776,20 @@
 
 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_ioctl(blk->bs, req, buf);
 }
 
 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
                           BlockCompletionFunc *cb, void *opaque)
 {
+    if (!blk_is_available(blk)) {
+        return abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+    }
+
     return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque);
 }
 
@@ -704,11 +805,19 @@
 
 int blk_co_flush(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_co_flush(blk->bs);
 }
 
 int blk_flush(BlockBackend *blk)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_flush(blk->bs);
 }
 
@@ -719,7 +828,9 @@
 
 void blk_drain(BlockBackend *blk)
 {
-    bdrv_drain(blk->bs);
+    if (blk->bs) {
+        bdrv_drain(blk->bs);
+    }
 }
 
 void blk_drain_all(void)
@@ -727,76 +838,178 @@
     bdrv_drain_all();
 }
 
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+                      BlockdevOnError on_write_error)
+{
+    blk->on_read_error = on_read_error;
+    blk->on_write_error = on_write_error;
+}
+
 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
 {
-    return bdrv_get_on_error(blk->bs, is_read);
+    return is_read ? blk->on_read_error : blk->on_write_error;
 }
 
 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
                                       int error)
 {
-    return bdrv_get_error_action(blk->bs, is_read, error);
+    BlockdevOnError on_err = blk_get_on_error(blk, is_read);
+
+    switch (on_err) {
+    case BLOCKDEV_ON_ERROR_ENOSPC:
+        return (error == ENOSPC) ?
+               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
+    case BLOCKDEV_ON_ERROR_STOP:
+        return BLOCK_ERROR_ACTION_STOP;
+    case BLOCKDEV_ON_ERROR_REPORT:
+        return BLOCK_ERROR_ACTION_REPORT;
+    case BLOCKDEV_ON_ERROR_IGNORE:
+        return BLOCK_ERROR_ACTION_IGNORE;
+    default:
+        abort();
+    }
 }
 
+static void send_qmp_error_event(BlockBackend *blk,
+                                 BlockErrorAction action,
+                                 bool is_read, int error)
+{
+    IoOperationType optype;
+
+    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
+    qapi_event_send_block_io_error(blk_name(blk), optype, action,
+                                   blk_iostatus_is_enabled(blk),
+                                   error == ENOSPC, strerror(error),
+                                   &error_abort);
+}
+
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
 void blk_error_action(BlockBackend *blk, BlockErrorAction action,
                       bool is_read, int error)
 {
-    bdrv_error_action(blk->bs, action, is_read, error);
+    assert(error >= 0);
+
+    if (action == BLOCK_ERROR_ACTION_STOP) {
+        /* First set the iostatus, so that "info block" returns an iostatus
+         * that matches the events raised so far (an additional error iostatus
+         * is fine, but not a lost one).
+         */
+        blk_iostatus_set_err(blk, error);
+
+        /* Then raise the request to stop the VM and the event.
+         * qemu_system_vmstop_request_prepare has two effects.  First,
+         * it ensures that the STOP event always comes after the
+         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
+         * can observe the STOP event and do a "cont" before the STOP
+         * event is issued, the VM will not stop.  In this case, vm_start()
+         * also ensures that the STOP/RESUME pair of events is emitted.
+         */
+        qemu_system_vmstop_request_prepare();
+        send_qmp_error_event(blk, action, is_read, error);
+        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
+    } else {
+        send_qmp_error_event(blk, action, is_read, error);
+    }
 }
 
 int blk_is_read_only(BlockBackend *blk)
 {
-    return bdrv_is_read_only(blk->bs);
+    if (blk->bs) {
+        return bdrv_is_read_only(blk->bs);
+    } else {
+        return blk->root_state.read_only;
+    }
 }
 
 int blk_is_sg(BlockBackend *blk)
 {
+    if (!blk->bs) {
+        return 0;
+    }
+
     return bdrv_is_sg(blk->bs);
 }
 
 int blk_enable_write_cache(BlockBackend *blk)
 {
-    return bdrv_enable_write_cache(blk->bs);
+    if (blk->bs) {
+        return bdrv_enable_write_cache(blk->bs);
+    } else {
+        return !!(blk->root_state.open_flags & BDRV_O_CACHE_WB);
+    }
 }
 
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
 {
-    bdrv_set_enable_write_cache(blk->bs, wce);
+    if (blk->bs) {
+        bdrv_set_enable_write_cache(blk->bs, wce);
+    } else {
+        if (wce) {
+            blk->root_state.open_flags |= BDRV_O_CACHE_WB;
+        } else {
+            blk->root_state.open_flags &= ~BDRV_O_CACHE_WB;
+        }
+    }
 }
 
 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
 {
+    if (!blk->bs) {
+        error_setg(errp, "Device '%s' has no medium", blk->name);
+        return;
+    }
+
     bdrv_invalidate_cache(blk->bs, errp);
 }
 
-int blk_is_inserted(BlockBackend *blk)
+bool blk_is_inserted(BlockBackend *blk)
 {
-    return bdrv_is_inserted(blk->bs);
+    return blk->bs && bdrv_is_inserted(blk->bs);
+}
+
+bool blk_is_available(BlockBackend *blk)
+{
+    return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
 }
 
 void blk_lock_medium(BlockBackend *blk, bool locked)
 {
-    bdrv_lock_medium(blk->bs, locked);
+    if (blk->bs) {
+        bdrv_lock_medium(blk->bs, locked);
+    }
 }
 
 void blk_eject(BlockBackend *blk, bool eject_flag)
 {
-    bdrv_eject(blk->bs, eject_flag);
+    if (blk->bs) {
+        bdrv_eject(blk->bs, eject_flag);
+    }
 }
 
 int blk_get_flags(BlockBackend *blk)
 {
-    return bdrv_get_flags(blk->bs);
+    if (blk->bs) {
+        return bdrv_get_flags(blk->bs);
+    } else {
+        return blk->root_state.open_flags;
+    }
 }
 
 int blk_get_max_transfer_length(BlockBackend *blk)
 {
-    return blk->bs->bl.max_transfer_length;
+    if (blk->bs) {
+        return blk->bs->bl.max_transfer_length;
+    } else {
+        return 0;
+    }
 }
 
 void blk_set_guest_block_size(BlockBackend *blk, int align)
 {
-    bdrv_set_guest_block_size(blk->bs, align);
+    blk->guest_block_size = align;
 }
 
 void *blk_blockalign(BlockBackend *blk, size_t size)
@@ -806,40 +1019,64 @@
 
 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
 {
+    if (!blk->bs) {
+        return false;
+    }
+
     return bdrv_op_is_blocked(blk->bs, op, errp);
 }
 
 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
 {
-    bdrv_op_unblock(blk->bs, op, reason);
+    if (blk->bs) {
+        bdrv_op_unblock(blk->bs, op, reason);
+    }
 }
 
 void blk_op_block_all(BlockBackend *blk, Error *reason)
 {
-    bdrv_op_block_all(blk->bs, reason);
+    if (blk->bs) {
+        bdrv_op_block_all(blk->bs, reason);
+    }
 }
 
 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
 {
-    bdrv_op_unblock_all(blk->bs, reason);
+    if (blk->bs) {
+        bdrv_op_unblock_all(blk->bs, reason);
+    }
 }
 
 AioContext *blk_get_aio_context(BlockBackend *blk)
 {
-    return bdrv_get_aio_context(blk->bs);
+    if (blk->bs) {
+        return bdrv_get_aio_context(blk->bs);
+    } else {
+        return qemu_get_aio_context();
+    }
+}
+
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
+{
+    BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
+    return blk_get_aio_context(blk_acb->blk);
 }
 
 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
 {
-    bdrv_set_aio_context(blk->bs, new_context);
+    if (blk->bs) {
+        bdrv_set_aio_context(blk->bs, new_context);
+    }
 }
 
 void blk_add_aio_context_notifier(BlockBackend *blk,
         void (*attached_aio_context)(AioContext *new_context, void *opaque),
         void (*detach_aio_context)(void *opaque), void *opaque)
 {
-    bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
-                                  detach_aio_context, opaque);
+    if (blk->bs) {
+        bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
+                                      detach_aio_context, opaque);
+    }
 }
 
 void blk_remove_aio_context_notifier(BlockBackend *blk,
@@ -848,28 +1085,36 @@
                                      void (*detach_aio_context)(void *),
                                      void *opaque)
 {
-    bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
-                                     detach_aio_context, opaque);
+    if (blk->bs) {
+        bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
+                                         detach_aio_context, opaque);
+    }
 }
 
 void blk_add_close_notifier(BlockBackend *blk, Notifier *notify)
 {
-    bdrv_add_close_notifier(blk->bs, notify);
+    if (blk->bs) {
+        bdrv_add_close_notifier(blk->bs, notify);
+    }
 }
 
 void blk_io_plug(BlockBackend *blk)
 {
-    bdrv_io_plug(blk->bs);
+    if (blk->bs) {
+        bdrv_io_plug(blk->bs);
+    }
 }
 
 void blk_io_unplug(BlockBackend *blk)
 {
-    bdrv_io_unplug(blk->bs);
+    if (blk->bs) {
+        bdrv_io_unplug(blk->bs);
+    }
 }
 
 BlockAcctStats *blk_get_stats(BlockBackend *blk)
 {
-    return bdrv_get_stats(blk->bs);
+    return &blk->stats;
 }
 
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
@@ -902,6 +1147,10 @@
 
 int blk_truncate(BlockBackend *blk, int64_t offset)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_truncate(blk->bs, offset);
 }
 
@@ -918,20 +1167,67 @@
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
                      int64_t pos, int size)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_save_vmstate(blk->bs, buf, pos, size);
 }
 
 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_load_vmstate(blk->bs, buf, pos, size);
 }
 
 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_probe_blocksizes(blk->bs, bsz);
 }
 
 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
 {
+    if (!blk_is_available(blk)) {
+        return -ENOMEDIUM;
+    }
+
     return bdrv_probe_geometry(blk->bs, geo);
 }
+
+/*
+ * Updates the BlockBackendRootState object with data from the currently
+ * attached BlockDriverState.
+ */
+void blk_update_root_state(BlockBackend *blk)
+{
+    assert(blk->bs);
+
+    blk->root_state.open_flags    = blk->bs->open_flags;
+    blk->root_state.read_only     = blk->bs->read_only;
+    blk->root_state.detect_zeroes = blk->bs->detect_zeroes;
+
+    if (blk->root_state.throttle_group) {
+        g_free(blk->root_state.throttle_group);
+        throttle_group_unref(blk->root_state.throttle_state);
+    }
+    if (blk->bs->throttle_state) {
+        const char *name = throttle_group_get_name(blk->bs);
+        blk->root_state.throttle_group = g_strdup(name);
+        blk->root_state.throttle_state = throttle_group_incref(name);
+    } else {
+        blk->root_state.throttle_group = NULL;
+        blk->root_state.throttle_state = NULL;
+    }
+}
+
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
+{
+    return &blk->root_state;
+}

diff --git a/block/commit.c b/block/commit.c
index d12e26f..fdebe87 100644
--- a/block/commit.c
+++ b/block/commit.c

@@ -17,6 +17,7 @@
 #include "block/blockjob.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
 
 enum {
     /*
@@ -213,7 +214,7 @@
 
     if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
          on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
         error_setg(errp, "Invalid parameter combination");
         return;
     }

diff --git a/block/curl.c b/block/curl.c
index 032cc8a..8994182 100644
--- a/block/curl.c
+++ b/block/curl.c

@@ -154,18 +154,20 @@
     DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
     switch (action) {
         case CURL_POLL_IN:
-            aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
-                               NULL, state);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               curl_multi_read, NULL, state);
             break;
         case CURL_POLL_OUT:
-            aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               NULL, curl_multi_do, state);
             break;
         case CURL_POLL_INOUT:
-            aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
-                               curl_multi_do, state);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               curl_multi_read, curl_multi_do, state);
             break;
         case CURL_POLL_REMOVE:
-            aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL);
+            aio_set_fd_handler(s->aio_context, fd, false,
+                               NULL, NULL, NULL);
             break;
     }
 

diff --git a/block/io.c b/block/io.c
index 5311473..8dcad3b 100644
--- a/block/io.c
+++ b/block/io.c

@@ -23,6 +23,7 @@
  */
 
 #include "trace.h"
+#include "sysemu/block-backend.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
 #include "block/throttle-groups.h"
@@ -215,6 +216,8 @@
 /* Check if any requests are in-flight (including throttled requests) */
 bool bdrv_requests_pending(BlockDriverState *bs)
 {
+    BdrvChild *child;
+
     if (!QLIST_EMPTY(&bs->tracked_requests)) {
         return true;
     }
@@ -224,12 +227,13 @@
     if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
         return true;
     }
-    if (bs->file && bdrv_requests_pending(bs->file->bs)) {
-        return true;
+
+    QLIST_FOREACH(child, &bs->children, next) {
+        if (bdrv_requests_pending(child->bs)) {
+            return true;
+        }
     }
-    if (bs->backing && bdrv_requests_pending(bs->backing->bs)) {
-        return true;
-    }
+
     return false;
 }
 
@@ -1151,7 +1155,9 @@
 
     bdrv_set_dirty(bs, sector_num, nb_sectors);
 
-    block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
+    if (bs->wr_highest_offset < offset + bytes) {
+        bs->wr_highest_offset = offset + bytes;
+    }
 
     if (ret >= 0) {
         bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
@@ -1903,7 +1909,10 @@
         }
     }
 
-    block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
+    if (bs->blk) {
+        block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
+                              num_reqs - outidx - 1);
+    }
 
     return outidx + 1;
 }
@@ -2618,3 +2627,20 @@
     }
     bdrv_start_throttled_reqs(bs);
 }
+
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+    if (!bs->quiesce_counter++) {
+        aio_disable_external(bdrv_get_aio_context(bs));
+    }
+    bdrv_drain(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+    assert(bs->quiesce_counter > 0);
+    if (--bs->quiesce_counter > 0) {
+        return;
+    }
+    aio_enable_external(bdrv_get_aio_context(bs));
+}

diff --git a/block/iscsi.c b/block/iscsi.c
index 93f1ee4..9a628b7 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c

@@ -291,8 +291,8 @@
     int ev = iscsi_which_events(iscsi);
 
     if (ev != iscsilun->events) {
-        aio_set_fd_handler(iscsilun->aio_context,
-                           iscsi_get_fd(iscsi),
+        aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
+                           false,
                            (ev & POLLIN) ? iscsi_process_read : NULL,
                            (ev & POLLOUT) ? iscsi_process_write : NULL,
                            iscsilun);
@@ -1280,9 +1280,8 @@
 {
     IscsiLun *iscsilun = bs->opaque;
 
-    aio_set_fd_handler(iscsilun->aio_context,
-                       iscsi_get_fd(iscsilun->iscsi),
-                       NULL, NULL, NULL);
+    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
+                       false, NULL, NULL, NULL);
     iscsilun->events = 0;
 
     if (iscsilun->nop_timer) {

diff --git a/block/linux-aio.c b/block/linux-aio.c
index c991443..88b0520 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c

@@ -287,7 +287,7 @@
 {
     struct qemu_laio_state *s = s_;
 
-    aio_set_event_notifier(old_context, &s->e, NULL);
+    aio_set_event_notifier(old_context, &s->e, false, NULL);
     qemu_bh_delete(s->completion_bh);
 }
 
@@ -296,7 +296,8 @@
     struct qemu_laio_state *s = s_;
 
     s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
-    aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
+    aio_set_event_notifier(new_context, &s->e, false,
+                           qemu_laio_completion_cb);
 }
 
 void *laio_init(void)

diff --git a/block/mirror.c b/block/mirror.c
index 7e43511..b1252a1 100644
--- a/block/mirror.c
+++ b/block/mirror.c

@@ -14,6 +14,7 @@
 #include "trace.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
@@ -599,7 +600,9 @@
     g_free(s->cow_bitmap);
     g_free(s->in_flight_bitmap);
     bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
-    bdrv_iostatus_disable(s->target);
+    if (s->target->blk) {
+        blk_iostatus_disable(s->target->blk);
+    }
 
     data = g_malloc(sizeof(*data));
     data->ret = ret;
@@ -621,7 +624,9 @@
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
 
-    bdrv_iostatus_reset(s->target);
+    if (s->target->blk) {
+        blk_iostatus_reset(s->target->blk);
+    }
 }
 
 static void mirror_complete(BlockJob *job, Error **errp)
@@ -704,7 +709,7 @@
 
     if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
          on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
         error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
         return;
     }
@@ -740,8 +745,10 @@
         return;
     }
     bdrv_set_enable_write_cache(s->target, true);
-    bdrv_set_on_error(s->target, on_target_error, on_target_error);
-    bdrv_iostatus_enable(s->target);
+    if (s->target->blk) {
+        blk_set_on_error(s->target->blk, on_target_error, on_target_error);
+        blk_iostatus_enable(s->target->blk);
+    }
     s->common.co = qemu_coroutine_create(mirror_run);
     trace_mirror_start(bs, s, s->common.co, opaque);
     qemu_coroutine_enter(s->common.co, s);

diff --git a/block/nbd-client.c b/block/nbd-client.c
index e1bb919..b7fd17a 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c

@@ -124,7 +124,7 @@
     s->send_coroutine = qemu_coroutine_self();
     aio_context = bdrv_get_aio_context(bs);
 
-    aio_set_fd_handler(aio_context, s->sock,
+    aio_set_fd_handler(aio_context, s->sock, false,
                        nbd_reply_ready, nbd_restart_write, bs);
     if (qiov) {
         if (!s->is_unix) {
@@ -144,7 +144,8 @@
     } else {
         rc = nbd_send_request(s->sock, request);
     }
-    aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, bs);
+    aio_set_fd_handler(aio_context, s->sock, false,
+                       nbd_reply_ready, NULL, bs);
     s->send_coroutine = NULL;
     qemu_co_mutex_unlock(&s->send_mutex);
     return rc;
@@ -348,14 +349,15 @@
 void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
     aio_set_fd_handler(bdrv_get_aio_context(bs),
-                       nbd_get_client_session(bs)->sock, NULL, NULL, NULL);
+                       nbd_get_client_session(bs)->sock,
+                       false, NULL, NULL, NULL);
 }
 
 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                    AioContext *new_context)
 {
     aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sock,
-                       nbd_reply_ready, NULL, bs);
+                       false, nbd_reply_ready, NULL, bs);
 }
 
 void nbd_client_close(BlockDriverState *bs)

diff --git a/block/nfs.c b/block/nfs.c
index 887a98e..fd79f89 100644
--- a/block/nfs.c
+++ b/block/nfs.c

@@ -63,11 +63,10 @@
 {
     int ev = nfs_which_events(client->context);
     if (ev != client->events) {
-        aio_set_fd_handler(client->aio_context,
-                           nfs_get_fd(client->context),
+        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+                           false,
                            (ev & POLLIN) ? nfs_process_read : NULL,
-                           (ev & POLLOUT) ? nfs_process_write : NULL,
-                           client);
+                           (ev & POLLOUT) ? nfs_process_write : NULL, client);
 
     }
     client->events = ev;
@@ -242,9 +241,8 @@
 {
     NFSClient *client = bs->opaque;
 
-    aio_set_fd_handler(client->aio_context,
-                       nfs_get_fd(client->context),
-                       NULL, NULL, NULL);
+    aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+                       false, NULL, NULL, NULL);
     client->events = 0;
 }
 
@@ -263,9 +261,8 @@
         if (client->fh) {
             nfs_close(client->context, client->fh);
         }
-        aio_set_fd_handler(client->aio_context,
-                           nfs_get_fd(client->context),
-                           NULL, NULL, NULL);
+        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+                           false, NULL, NULL, NULL);
         nfs_destroy_context(client->context);
     }
     memset(client, 0, sizeof(NFSClient));

diff --git a/block/qapi.c b/block/qapi.c
index 355ba32..ec0f513 100644
--- a/block/qapi.c
+++ b/block/qapi.c

@@ -301,17 +301,17 @@
         info->tray_open = blk_dev_is_tray_open(blk);
     }
 
-    if (bdrv_iostatus_is_enabled(bs)) {
+    if (blk_iostatus_is_enabled(blk)) {
         info->has_io_status = true;
-        info->io_status = bs->iostatus;
+        info->io_status = blk_iostatus(blk);
     }
 
-    if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
+    if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) {
         info->has_dirty_bitmaps = true;
         info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
     }
 
-    if (bs->drv) {
+    if (bs && bs->drv) {
         info->has_inserted = true;
         info->inserted = bdrv_block_device_info(bs, errp);
         if (info->inserted == NULL) {
@@ -344,18 +344,22 @@
     }
 
     s->stats = g_malloc0(sizeof(*s->stats));
-    s->stats->rd_bytes = bs->stats.nr_bytes[BLOCK_ACCT_READ];
-    s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE];
-    s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ];
-    s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE];
-    s->stats->rd_merged = bs->stats.merged[BLOCK_ACCT_READ];
-    s->stats->wr_merged = bs->stats.merged[BLOCK_ACCT_WRITE];
-    s->stats->wr_highest_offset =
-        bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE;
-    s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH];
-    s->stats->wr_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_WRITE];
-    s->stats->rd_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_READ];
-    s->stats->flush_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_FLUSH];
+    if (bs->blk) {
+        BlockAcctStats *stats = blk_get_stats(bs->blk);
+
+        s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
+        s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+        s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
+        s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
+        s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
+        s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+        s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
+        s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+        s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
+        s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+    }
+
+    s->stats->wr_highest_offset = bs->wr_highest_offset;
 
     if (bs->file) {
         s->has_parent = true;

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 3a527f0..918c756 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c

@@ -127,11 +127,6 @@
 
 #define FTYPE_FILE   0
 #define FTYPE_CD     1
-#define FTYPE_FD     2
-
-/* if the FD is not accessed during that time (in ns), we try to
-   reopen it to see if the disk has been changed */
-#define FD_OPEN_TIMEOUT (1000000000)
 
 #define MAX_BLOCKSIZE	4096
 
@@ -141,13 +136,6 @@
     int open_flags;
     size_t buf_align;
 
-#if defined(__linux__)
-    /* linux floppy specific */
-    int64_t fd_open_time;
-    int64_t fd_error_time;
-    int fd_got_error;
-    int fd_media_changed;
-#endif
 #ifdef CONFIG_LINUX_AIO
     int use_aio;
     void *aio_ctx;
@@ -635,7 +623,7 @@
     }
 #endif
 
-    if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
+    if (s->type == FTYPE_CD) {
         raw_s->open_flags |= O_NONBLOCK;
     }
 
@@ -2187,47 +2175,6 @@
 }
 
 #if defined(__linux__)
-/* Note: we do not have a reliable method to detect if the floppy is
-   present. The current method is to try to open the floppy at every
-   I/O and to keep it opened during a few hundreds of ms. */
-static int fd_open(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int last_media_present;
-
-    if (s->type != FTYPE_FD)
-        return 0;
-    last_media_present = (s->fd >= 0);
-    if (s->fd >= 0 &&
-        (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
-        qemu_close(s->fd);
-        s->fd = -1;
-        DPRINTF("Floppy closed\n");
-    }
-    if (s->fd < 0) {
-        if (s->fd_got_error &&
-            (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
-            DPRINTF("No floppy (open delayed)\n");
-            return -EIO;
-        }
-        s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
-        if (s->fd < 0) {
-            s->fd_error_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-            s->fd_got_error = 1;
-            if (last_media_present)
-                s->fd_media_changed = 1;
-            DPRINTF("No floppy\n");
-            return -EIO;
-        }
-        DPRINTF("Floppy opened\n");
-    }
-    if (!last_media_present)
-        s->fd_media_changed = 1;
-    s->fd_open_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    s->fd_got_error = 0;
-    return 0;
-}
-
 static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
     BDRVRawState *s = bs->opaque;
@@ -2256,8 +2203,8 @@
     pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
     return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }
+#endif /* linux */
 
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static int fd_open(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
@@ -2267,14 +2214,6 @@
         return 0;
     return -EIO;
 }
-#else /* !linux && !FreeBSD */
-
-static int fd_open(BlockDriverState *bs)
-{
-    return 0;
-}
-
-#endif /* !linux && !FreeBSD */
 
 static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors,
@@ -2318,14 +2257,13 @@
     int64_t total_size = 0;
     bool has_prefix;
 
-    /* This function is used by all three protocol block drivers and therefore
-     * any of these three prefixes may be given.
+    /* This function is used by both protocol block drivers and therefore either
+     * of these prefixes may be given.
      * The return value has to be stored somewhere, otherwise this is an error
      * due to -Werror=unused-value. */
     has_prefix =
         strstart(filename, "host_device:", &filename) ||
-        strstart(filename, "host_cdrom:" , &filename) ||
-        strstart(filename, "host_floppy:", &filename);
+        strstart(filename, "host_cdrom:" , &filename);
 
     (void)has_prefix;
 
@@ -2405,155 +2343,6 @@
 #endif
 };
 
-#ifdef __linux__
-static void floppy_parse_filename(const char *filename, QDict *options,
-                                  Error **errp)
-{
-    /* The prefix is optional, just as for "file". */
-    strstart(filename, "host_floppy:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
-                       Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;
-
-    s->type = FTYPE_FD;
-
-    /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
-    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
-    if (ret) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        }
-        return ret;
-    }
-
-    /* close fd so that we can reopen it as needed */
-    qemu_close(s->fd);
-    s->fd = -1;
-    s->fd_media_changed = 1;
-
-    error_report("Host floppy pass-through is deprecated");
-    error_printf("Support for it will be removed in a future release.\n");
-    return 0;
-}
-
-static int floppy_probe_device(const char *filename)
-{
-    int fd, ret;
-    int prio = 0;
-    struct floppy_struct fdparam;
-    struct stat st;
-
-    if (strstart(filename, "/dev/fd", NULL) &&
-        !strstart(filename, "/dev/fdset/", NULL) &&
-        !strstart(filename, "/dev/fd/", NULL)) {
-        prio = 50;
-    }
-
-    fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
-    if (fd < 0) {
-        goto out;
-    }
-    ret = fstat(fd, &st);
-    if (ret == -1 || !S_ISBLK(st.st_mode)) {
-        goto outc;
-    }
-
-    /* Attempt to detect via a floppy specific ioctl */
-    ret = ioctl(fd, FDGETPRM, &fdparam);
-    if (ret >= 0)
-        prio = 100;
-
-outc:
-    qemu_close(fd);
-out:
-    return prio;
-}
-
-
-static int floppy_is_inserted(BlockDriverState *bs)
-{
-    return fd_open(bs) >= 0;
-}
-
-static int floppy_media_changed(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret;
-
-    /*
-     * XXX: we do not have a true media changed indication.
-     * It does not work if the floppy is changed without trying to read it.
-     */
-    fd_open(bs);
-    ret = s->fd_media_changed;
-    s->fd_media_changed = 0;
-    DPRINTF("Floppy changed=%d\n", ret);
-    return ret;
-}
-
-static void floppy_eject(BlockDriverState *bs, bool eject_flag)
-{
-    BDRVRawState *s = bs->opaque;
-    int fd;
-
-    if (s->fd >= 0) {
-        qemu_close(s->fd);
-        s->fd = -1;
-    }
-    fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
-    if (fd >= 0) {
-        if (ioctl(fd, FDEJECT, 0) < 0)
-            perror("FDEJECT");
-        qemu_close(fd);
-    }
-}
-
-static BlockDriver bdrv_host_floppy = {
-    .format_name        = "host_floppy",
-    .protocol_name      = "host_floppy",
-    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_probe_device	= floppy_probe_device,
-    .bdrv_parse_filename = floppy_parse_filename,
-    .bdrv_file_open     = floppy_open,
-    .bdrv_close         = raw_close,
-    .bdrv_reopen_prepare = raw_reopen_prepare,
-    .bdrv_reopen_commit  = raw_reopen_commit,
-    .bdrv_reopen_abort   = raw_reopen_abort,
-    .bdrv_create         = hdev_create,
-    .create_opts         = &raw_create_opts,
-
-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
-    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_refresh_limits = raw_refresh_limits,
-    .bdrv_io_plug = raw_aio_plug,
-    .bdrv_io_unplug = raw_aio_unplug,
-    .bdrv_flush_io_queue = raw_aio_flush_io_queue,
-
-    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
-
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
-    /* removable device support */
-    .bdrv_is_inserted   = floppy_is_inserted,
-    .bdrv_media_changed = floppy_media_changed,
-    .bdrv_eject         = floppy_eject,
-};
-#endif
-
 #if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static void cdrom_parse_filename(const char *filename, QDict *options,
                                  Error **errp)
@@ -2609,15 +2398,13 @@
     return prio;
 }
 
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
     int ret;
 
     ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
-    if (ret == CDS_DISC_OK)
-        return 1;
-    return 0;
+    return ret == CDS_DISC_OK;
 }
 
 static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
@@ -2743,7 +2530,7 @@
     return 0;
 }
 
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
 {
     return raw_getlength(bs) > 0;
 }
@@ -2831,7 +2618,6 @@
     bdrv_register(&bdrv_file);
     bdrv_register(&bdrv_host_device);
 #ifdef __linux__
-    bdrv_register(&bdrv_host_floppy);
     bdrv_register(&bdrv_host_cdrom);
 #endif
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)

diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 63ee911..0aded31 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c

@@ -154,11 +154,6 @@
     return bdrv_truncate(bs->file->bs, offset);
 }
 
-static int raw_is_inserted(BlockDriverState *bs)
-{
-    return bdrv_is_inserted(bs->file->bs);
-}
-
 static int raw_media_changed(BlockDriverState *bs)
 {
     return bdrv_media_changed(bs->file->bs);
@@ -264,7 +259,6 @@
     .bdrv_refresh_limits  = &raw_refresh_limits,
     .bdrv_probe_blocksizes = &raw_probe_blocksizes,
     .bdrv_probe_geometry  = &raw_probe_geometry,
-    .bdrv_is_inserted     = &raw_is_inserted,
     .bdrv_media_changed   = &raw_media_changed,
     .bdrv_eject           = &raw_eject,
     .bdrv_lock_medium     = &raw_lock_medium,

diff --git a/block/sheepdog.c b/block/sheepdog.c
index e7e58b7..d80e4ed 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c

@@ -651,14 +651,16 @@
     unsigned int *rlen = srco->rlen;
 
     co = qemu_coroutine_self();
-    aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       NULL, restart_co_req, co);
 
     ret = send_co_req(sockfd, hdr, data, wlen);
     if (ret < 0) {
         goto out;
     }
 
-    aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       restart_co_req, NULL, co);
 
     ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
     if (ret != sizeof(*hdr)) {
@@ -683,7 +685,8 @@
 out:
     /* there is at most one request for this sockfd, so it is safe to
      * set each handler to NULL. */
-    aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       NULL, NULL, NULL);
 
     srco->ret = ret;
     srco->finished = true;
@@ -735,7 +738,8 @@
     BDRVSheepdogState *s = opaque;
     AIOReq *aio_req, *next;
 
-    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+                       NULL, NULL);
     close(s->fd);
     s->fd = -1;
 
@@ -938,7 +942,8 @@
         return fd;
     }
 
-    aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, fd, false,
+                       co_read_response, NULL, s);
     return fd;
 }
 
@@ -1199,7 +1204,7 @@
 
     qemu_co_mutex_lock(&s->lock);
     s->co_send = qemu_coroutine_self();
-    aio_set_fd_handler(s->aio_context, s->fd,
+    aio_set_fd_handler(s->aio_context, s->fd, false,
                        co_read_response, co_write_request, s);
     socket_set_cork(s->fd, 1);
 
@@ -1218,7 +1223,8 @@
     }
 out:
     socket_set_cork(s->fd, 0);
-    aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, s->fd, false,
+                       co_read_response, NULL, s);
     s->co_send = NULL;
     qemu_co_mutex_unlock(&s->lock);
 }
@@ -1368,7 +1374,8 @@
 {
     BDRVSheepdogState *s = bs->opaque;
 
-    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+                       NULL, NULL);
 }
 
 static void sd_attach_aio_context(BlockDriverState *bs,
@@ -1377,7 +1384,8 @@
     BDRVSheepdogState *s = bs->opaque;
 
     s->aio_context = new_context;
-    aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s);
+    aio_set_fd_handler(new_context, s->fd, false,
+                       co_read_response, NULL, s);
 }
 
 /* TODO Convert to fine grained options */
@@ -1490,7 +1498,8 @@
     g_free(buf);
     return 0;
 out:
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+                       false, NULL, NULL, NULL);
     if (s->fd >= 0) {
         closesocket(s->fd);
     }
@@ -1528,7 +1537,8 @@
     BDRVSheepdogState *s = state->bs->opaque;
 
     if (s->fd) {
-        aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+        aio_set_fd_handler(s->aio_context, s->fd, false,
+                           NULL, NULL, NULL);
         closesocket(s->fd);
     }
 
@@ -1551,7 +1561,8 @@
     }
 
     if (re_s->fd) {
-        aio_set_fd_handler(s->aio_context, re_s->fd, NULL, NULL, NULL);
+        aio_set_fd_handler(s->aio_context, re_s->fd, false,
+                           NULL, NULL, NULL);
         closesocket(re_s->fd);
     }
 
@@ -1935,7 +1946,8 @@
         error_report("%s, %s", sd_strerror(rsp->result), s->name);
     }
 
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+                       false, NULL, NULL, NULL);
     closesocket(s->fd);
     g_free(s->host_spec);
 }

diff --git a/block/ssh.c b/block/ssh.c
index d35b51f..af025c0 100644
--- a/block/ssh.c
+++ b/block/ssh.c

@@ -800,14 +800,15 @@
             rd_handler, wr_handler);
 
     aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       rd_handler, wr_handler, co);
+                       false, rd_handler, wr_handler, co);
 }
 
 static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
                                           BlockDriverState *bs)
 {
     DPRINTF("s->sock=%d", s->sock);
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
+                       false, NULL, NULL, NULL);
 }
 
 /* A non-blocking call returned EAGAIN, so yield, ensuring the

diff --git a/block/stream.c b/block/stream.c
index 3f64fa2..25af7ef 100644
--- a/block/stream.c
+++ b/block/stream.c

@@ -16,6 +16,7 @@
 #include "block/blockjob.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
 
 enum {
     /*
@@ -222,7 +223,7 @@
 
     if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
          on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
+        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
         error_setg(errp, QERR_INVALID_PARAMETER, "on-error");
         return;
     }

diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 1abc6fc..3419af7 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c

@@ -33,8 +33,7 @@
  * its own locking.
  *
  * This locking is however handled internally in this file, so it's
- * mostly transparent to outside users (but see the documentation in
- * throttle_groups_lock()).
+ * transparent to outside users.
  *
  * The whole ThrottleGroup structure is private and invisible to
  * outside users, that only use it through its ThrottleState.
@@ -76,9 +75,9 @@
  * created.
  *
  * @name: the name of the ThrottleGroup
- * @ret:  the ThrottleGroup
+ * @ret:  the ThrottleState member of the ThrottleGroup
  */
-static ThrottleGroup *throttle_group_incref(const char *name)
+ThrottleState *throttle_group_incref(const char *name)
 {
     ThrottleGroup *tg = NULL;
     ThrottleGroup *iter;
@@ -108,7 +107,7 @@
 
     qemu_mutex_unlock(&throttle_groups_lock);
 
-    return tg;
+    return &tg->ts;
 }
 
 /* Decrease the reference count of a ThrottleGroup.
@@ -116,10 +115,12 @@
  * When the reference count reaches zero the ThrottleGroup is
  * destroyed.
  *
- * @tg:  The ThrottleGroup to unref
+ * @ts:  The ThrottleGroup to unref, given by its ThrottleState member
  */
-static void throttle_group_unref(ThrottleGroup *tg)
+void throttle_group_unref(ThrottleState *ts)
 {
+    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+
     qemu_mutex_lock(&throttle_groups_lock);
     if (--tg->refcount == 0) {
         QTAILQ_REMOVE(&throttle_groups, tg, list);
@@ -401,7 +402,8 @@
 void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
 {
     int i;
-    ThrottleGroup *tg = throttle_group_incref(groupname);
+    ThrottleState *ts = throttle_group_incref(groupname);
+    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
     int clock_type = QEMU_CLOCK_REALTIME;
 
     if (qtest_enabled()) {
@@ -409,7 +411,7 @@
         clock_type = QEMU_CLOCK_VIRTUAL;
     }
 
-    bs->throttle_state = &tg->ts;
+    bs->throttle_state = ts;
 
     qemu_mutex_lock(&tg->lock);
     /* If the ThrottleGroup is new set this BlockDriverState as the token */
@@ -461,38 +463,10 @@
     throttle_timers_destroy(&bs->throttle_timers);
     qemu_mutex_unlock(&tg->lock);
 
-    throttle_group_unref(tg);
+    throttle_group_unref(&tg->ts);
     bs->throttle_state = NULL;
 }
 
-/* Acquire the lock of this throttling group.
- *
- * You won't normally need to use this. None of the functions from the
- * ThrottleGroup API require you to acquire the lock since all of them
- * deal with it internally.
- *
- * This should only be used in exceptional cases when you want to
- * access the protected fields of a BlockDriverState directly
- * (e.g. bdrv_swap()).
- *
- * @bs: a BlockDriverState that is member of the group
- */
-void throttle_group_lock(BlockDriverState *bs)
-{
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    qemu_mutex_lock(&tg->lock);
-}
-
-/* Release the lock of this throttling group.
- *
- * See the comments in throttle_group_lock().
- */
-void throttle_group_unlock(BlockDriverState *bs)
-{
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    qemu_mutex_unlock(&tg->lock);
-}
-
 static void throttle_groups_init(void)
 {
     qemu_mutex_init(&throttle_groups_lock);

diff --git a/block/win32-aio.c b/block/win32-aio.c
index 64e8682..bbf2f01 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c

@@ -174,7 +174,7 @@
 void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
                                   AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &aio->e, NULL);
+    aio_set_event_notifier(old_context, &aio->e, false, NULL);
     aio->is_aio_context_attached = false;
 }
 
@@ -182,7 +182,8 @@
                                   AioContext *new_context)
 {
     aio->is_aio_context_attached = true;
-    aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb);
+    aio_set_event_notifier(new_context, &aio->e, false,
+                           win32_aio_completion_cb);
 }
 
 QEMUWin32AIOState *win32_aio_init(void)

diff --git a/blockdev.c b/blockdev.c
index 8141b6b..18712d2 100644
--- a/blockdev.c
+++ b/blockdev.c

@@ -124,15 +124,17 @@
         return;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
+    if (bs) {
+        aio_context = bdrv_get_aio_context(bs);
+        aio_context_acquire(aio_context);
 
-    if (bs->job) {
-        block_job_cancel(bs->job);
+        if (bs->job) {
+            block_job_cancel(bs->job);
+        }
+
+        aio_context_release(aio_context);
     }
 
-    aio_context_release(aio_context);
-
     dinfo->auto_del = 1;
 }
 
@@ -229,8 +231,8 @@
             dinfo->type != IF_NONE) {
             fprintf(stderr, "Warning: Orphaned drive without device: "
                     "id=%s,file=%s,if=%s,bus=%d,unit=%d\n",
-                    blk_name(blk), blk_bs(blk)->filename, if_name[dinfo->type],
-                    dinfo->bus, dinfo->unit);
+                    blk_name(blk), blk_bs(blk) ? blk_bs(blk)->filename : "",
+                    if_name[dinfo->type], dinfo->bus, dinfo->unit);
             rs = true;
         }
     }
@@ -348,25 +350,134 @@
 
 typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType;
 
+/* All parameters but @opts are optional and may be set to NULL. */
+static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
+    const char **throttling_group, ThrottleConfig *throttle_cfg,
+    BlockdevDetectZeroesOptions *detect_zeroes, Error **errp)
+{
+    const char *discard;
+    Error *local_error = NULL;
+    const char *aio;
+
+    if (bdrv_flags) {
+        if (!qemu_opt_get_bool(opts, "read-only", false)) {
+            *bdrv_flags |= BDRV_O_RDWR;
+        }
+        if (qemu_opt_get_bool(opts, "copy-on-read", false)) {
+            *bdrv_flags |= BDRV_O_COPY_ON_READ;
+        }
+
+        if ((discard = qemu_opt_get(opts, "discard")) != NULL) {
+            if (bdrv_parse_discard_flags(discard, bdrv_flags) != 0) {
+                error_setg(errp, "Invalid discard option");
+                return;
+            }
+        }
+
+        if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true)) {
+            *bdrv_flags |= BDRV_O_CACHE_WB;
+        }
+        if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
+            *bdrv_flags |= BDRV_O_NOCACHE;
+        }
+        if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
+            *bdrv_flags |= BDRV_O_NO_FLUSH;
+        }
+
+        if ((aio = qemu_opt_get(opts, "aio")) != NULL) {
+            if (!strcmp(aio, "native")) {
+                *bdrv_flags |= BDRV_O_NATIVE_AIO;
+            } else if (!strcmp(aio, "threads")) {
+                /* this is the default */
+            } else {
+               error_setg(errp, "invalid aio option");
+               return;
+            }
+        }
+    }
+
+    /* disk I/O throttling */
+    if (throttling_group) {
+        *throttling_group = qemu_opt_get(opts, "throttling.group");
+    }
+
+    if (throttle_cfg) {
+        memset(throttle_cfg, 0, sizeof(*throttle_cfg));
+        throttle_cfg->buckets[THROTTLE_BPS_TOTAL].avg =
+            qemu_opt_get_number(opts, "throttling.bps-total", 0);
+        throttle_cfg->buckets[THROTTLE_BPS_READ].avg  =
+            qemu_opt_get_number(opts, "throttling.bps-read", 0);
+        throttle_cfg->buckets[THROTTLE_BPS_WRITE].avg =
+            qemu_opt_get_number(opts, "throttling.bps-write", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_TOTAL].avg =
+            qemu_opt_get_number(opts, "throttling.iops-total", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_READ].avg =
+            qemu_opt_get_number(opts, "throttling.iops-read", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_WRITE].avg =
+            qemu_opt_get_number(opts, "throttling.iops-write", 0);
+
+        throttle_cfg->buckets[THROTTLE_BPS_TOTAL].max =
+            qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
+        throttle_cfg->buckets[THROTTLE_BPS_READ].max  =
+            qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
+        throttle_cfg->buckets[THROTTLE_BPS_WRITE].max =
+            qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_TOTAL].max =
+            qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_READ].max =
+            qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
+        throttle_cfg->buckets[THROTTLE_OPS_WRITE].max =
+            qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
+
+        throttle_cfg->op_size =
+            qemu_opt_get_number(opts, "throttling.iops-size", 0);
+
+        if (!check_throttle_config(throttle_cfg, errp)) {
+            return;
+        }
+    }
+
+    if (detect_zeroes) {
+        *detect_zeroes =
+            qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
+                            qemu_opt_get(opts, "detect-zeroes"),
+                            BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
+                            BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+                            &local_error);
+        if (local_error) {
+            error_propagate(errp, local_error);
+            return;
+        }
+
+        if (bdrv_flags &&
+            *detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+            !(*bdrv_flags & BDRV_O_UNMAP))
+        {
+            error_setg(errp, "setting detect-zeroes to unmap is not allowed "
+                             "without setting discard operation to unmap");
+            return;
+        }
+    }
+}
+
 /* Takes the ownership of bs_opts */
 static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
                                    Error **errp)
 {
     const char *buf;
-    int ro = 0;
     int bdrv_flags = 0;
     int on_read_error, on_write_error;
     BlockBackend *blk;
     BlockDriverState *bs;
     ThrottleConfig cfg;
     int snapshot = 0;
-    bool copy_on_read;
     Error *error = NULL;
     QemuOpts *opts;
     const char *id;
     bool has_driver_specific_opts;
-    BlockdevDetectZeroesOptions detect_zeroes;
-    const char *throttling_group;
+    BlockdevDetectZeroesOptions detect_zeroes =
+        BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
+    const char *throttling_group = NULL;
 
     /* Check common options by copying from bs_opts to opts, all other options
      * stay in bs_opts for processing by bdrv_open(). */
@@ -391,35 +502,12 @@
 
     /* extract parameters */
     snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
-    ro = qemu_opt_get_bool(opts, "read-only", 0);
-    copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
 
-    if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
-        if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) {
-            error_setg(errp, "invalid discard option");
-            goto early_err;
-        }
-    }
-
-    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true)) {
-        bdrv_flags |= BDRV_O_CACHE_WB;
-    }
-    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
-        bdrv_flags |= BDRV_O_NOCACHE;
-    }
-    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
-        bdrv_flags |= BDRV_O_NO_FLUSH;
-    }
-
-    if ((buf = qemu_opt_get(opts, "aio")) != NULL) {
-        if (!strcmp(buf, "native")) {
-            bdrv_flags |= BDRV_O_NATIVE_AIO;
-        } else if (!strcmp(buf, "threads")) {
-            /* this is the default */
-        } else {
-           error_setg(errp, "invalid aio option");
-           goto early_err;
-        }
+    extract_common_blockdev_options(opts, &bdrv_flags, &throttling_group, &cfg,
+                                    &detect_zeroes, &error);
+    if (error) {
+        error_propagate(errp, error);
+        goto early_err;
     }
 
     if ((buf = qemu_opt_get(opts, "format")) != NULL) {
@@ -437,43 +525,6 @@
         qdict_put(bs_opts, "driver", qstring_from_str(buf));
     }
 
-    /* disk I/O throttling */
-    memset(&cfg, 0, sizeof(cfg));
-    cfg.buckets[THROTTLE_BPS_TOTAL].avg =
-        qemu_opt_get_number(opts, "throttling.bps-total", 0);
-    cfg.buckets[THROTTLE_BPS_READ].avg  =
-        qemu_opt_get_number(opts, "throttling.bps-read", 0);
-    cfg.buckets[THROTTLE_BPS_WRITE].avg =
-        qemu_opt_get_number(opts, "throttling.bps-write", 0);
-    cfg.buckets[THROTTLE_OPS_TOTAL].avg =
-        qemu_opt_get_number(opts, "throttling.iops-total", 0);
-    cfg.buckets[THROTTLE_OPS_READ].avg =
-        qemu_opt_get_number(opts, "throttling.iops-read", 0);
-    cfg.buckets[THROTTLE_OPS_WRITE].avg =
-        qemu_opt_get_number(opts, "throttling.iops-write", 0);
-
-    cfg.buckets[THROTTLE_BPS_TOTAL].max =
-        qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
-    cfg.buckets[THROTTLE_BPS_READ].max  =
-        qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
-    cfg.buckets[THROTTLE_BPS_WRITE].max =
-        qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
-    cfg.buckets[THROTTLE_OPS_TOTAL].max =
-        qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
-    cfg.buckets[THROTTLE_OPS_READ].max =
-        qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
-    cfg.buckets[THROTTLE_OPS_WRITE].max =
-        qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
-
-    cfg.op_size = qemu_opt_get_number(opts, "throttling.iops-size", 0);
-
-    throttling_group = qemu_opt_get(opts, "throttling.group");
-
-    if (!check_throttle_config(&cfg, &error)) {
-        error_propagate(errp, error);
-        goto early_err;
-    }
-
     on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
     if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
         on_write_error = parse_block_error_action(buf, 0, &error);
@@ -492,34 +543,34 @@
         }
     }
 
-    detect_zeroes =
-        qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
-                        qemu_opt_get(opts, "detect-zeroes"),
-                        BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
-                        BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
-                        &error);
-    if (error) {
-        error_propagate(errp, error);
-        goto early_err;
-    }
-
-    if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
-        !(bdrv_flags & BDRV_O_UNMAP)) {
-        error_setg(errp, "setting detect-zeroes to unmap is not allowed "
-                         "without setting discard operation to unmap");
-        goto early_err;
+    if (snapshot) {
+        /* always use cache=unsafe with snapshot */
+        bdrv_flags &= ~BDRV_O_CACHE_MASK;
+        bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH);
     }
 
     /* init */
     if ((!file || !*file) && !has_driver_specific_opts) {
-        blk = blk_new_with_bs(qemu_opts_id(opts), errp);
+        BlockBackendRootState *blk_rs;
+
+        blk = blk_new(qemu_opts_id(opts), errp);
         if (!blk) {
             goto early_err;
         }
 
-        bs = blk_bs(blk);
-        bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
-        bs->read_only = ro;
+        blk_rs = blk_get_root_state(blk);
+        blk_rs->open_flags    = bdrv_flags;
+        blk_rs->read_only     = !(bdrv_flags & BDRV_O_RDWR);
+        blk_rs->detect_zeroes = detect_zeroes;
+
+        if (throttle_enabled(&cfg)) {
+            if (!throttling_group) {
+                throttling_group = blk_name(blk);
+            }
+            blk_rs->throttle_group = g_strdup(throttling_group);
+            blk_rs->throttle_state = throttle_group_incref(throttling_group);
+            blk_rs->throttle_state->cfg = cfg;
+        }
 
         QDECREF(bs_opts);
     } else {
@@ -527,46 +578,30 @@
             file = NULL;
         }
 
-        if (snapshot) {
-            /* always use cache=unsafe with snapshot */
-            bdrv_flags &= ~BDRV_O_CACHE_MASK;
-            bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH);
-        }
-
-        if (copy_on_read) {
-            bdrv_flags |= BDRV_O_COPY_ON_READ;
-        }
-
-        if (runstate_check(RUN_STATE_INMIGRATE)) {
-            bdrv_flags |= BDRV_O_INCOMING;
-        }
-
-        bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
-
         blk = blk_new_open(qemu_opts_id(opts), file, NULL, bs_opts, bdrv_flags,
                            errp);
         if (!blk) {
             goto err_no_bs_opts;
         }
         bs = blk_bs(blk);
-    }
 
-    bs->detect_zeroes = detect_zeroes;
+        bs->detect_zeroes = detect_zeroes;
 
-    bdrv_set_on_error(bs, on_read_error, on_write_error);
-
-    /* disk I/O throttling */
-    if (throttle_enabled(&cfg)) {
-        if (!throttling_group) {
-            throttling_group = blk_name(blk);
+        /* disk I/O throttling */
+        if (throttle_enabled(&cfg)) {
+            if (!throttling_group) {
+                throttling_group = blk_name(blk);
+            }
+            bdrv_io_limits_enable(bs, throttling_group);
+            bdrv_set_io_limits(bs, &cfg);
         }
-        bdrv_io_limits_enable(bs, throttling_group);
-        bdrv_set_io_limits(bs, &cfg);
+
+        if (bdrv_key_required(bs)) {
+            autostart = 0;
+        }
     }
 
-    if (bdrv_key_required(bs)) {
-        autostart = 0;
-    }
+    blk_set_on_error(blk, on_read_error, on_write_error);
 
 err_no_bs_opts:
     qemu_opts_del(opts);
@@ -579,6 +614,54 @@
     return NULL;
 }
 
+static QemuOptsList qemu_root_bds_opts;
+
+/* Takes the ownership of bs_opts */
+static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
+{
+    BlockDriverState *bs;
+    QemuOpts *opts;
+    Error *local_error = NULL;
+    BlockdevDetectZeroesOptions detect_zeroes;
+    int ret;
+    int bdrv_flags = 0;
+
+    opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp);
+    if (!opts) {
+        goto fail;
+    }
+
+    qemu_opts_absorb_qdict(opts, bs_opts, &local_error);
+    if (local_error) {
+        error_propagate(errp, local_error);
+        goto fail;
+    }
+
+    extract_common_blockdev_options(opts, &bdrv_flags, NULL, NULL,
+                                    &detect_zeroes, &local_error);
+    if (local_error) {
+        error_propagate(errp, local_error);
+        goto fail;
+    }
+
+    bs = NULL;
+    ret = bdrv_open(&bs, NULL, NULL, bs_opts, bdrv_flags, errp);
+    if (ret < 0) {
+        goto fail_no_bs_opts;
+    }
+
+    bs->detect_zeroes = detect_zeroes;
+
+fail_no_bs_opts:
+    qemu_opts_del(opts);
+    return bs;
+
+fail:
+    qemu_opts_del(opts);
+    QDECREF(bs_opts);
+    return NULL;
+}
+
 static void qemu_opt_rename(QemuOpts *opts, const char *from, const char *to,
                             Error **errp)
 {
@@ -1042,6 +1125,10 @@
             monitor_printf(mon, "Device '%s' not found\n", device);
             return;
         }
+        if (!blk_is_available(blk)) {
+            monitor_printf(mon, "Device '%s' has no medium\n", device);
+            return;
+        }
         ret = bdrv_commit(blk_bs(blk));
     }
     if (ret < 0) {
@@ -1121,7 +1208,9 @@
                   "Device '%s' not found", device);
         return NULL;
     }
-    bs = blk_bs(blk);
+
+    aio_context = blk_get_aio_context(blk);
+    aio_context_acquire(aio_context);
 
     if (!has_id) {
         id = NULL;
@@ -1133,11 +1222,14 @@
 
     if (!id && !name) {
         error_setg(errp, "Name or id must be provided");
-        return NULL;
+        goto out_aio_context;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out_aio_context;
+    }
+    bs = blk_bs(blk);
 
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) {
         goto out_aio_context;
@@ -1278,6 +1370,7 @@
     BlockDriverState *bs;
     AioContext *aio_context;
     QEMUSnapshotInfo sn;
+    bool created;
 } InternalSnapshotState;
 
 static void internal_snapshot_prepare(BlkTransactionState *common,
@@ -1311,16 +1404,19 @@
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
     /* AioContext is released in .clean() */
-    state->aio_context = bdrv_get_aio_context(bs);
+    state->aio_context = blk_get_aio_context(blk);
     aio_context_acquire(state->aio_context);
 
-    if (!bdrv_is_inserted(bs)) {
+    if (!blk_is_available(blk)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
         return;
     }
+    bs = blk_bs(blk);
+
+    state->bs = bs;
+    bdrv_drained_begin(bs);
 
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) {
         return;
@@ -1373,7 +1469,7 @@
     }
 
     /* 4. succeed, mark a snapshot is created */
-    state->bs = bs;
+    state->created = true;
 }
 
 static void internal_snapshot_abort(BlkTransactionState *common)
@@ -1384,7 +1480,7 @@
     QEMUSnapshotInfo *sn = &state->sn;
     Error *local_error = NULL;
 
-    if (!bs) {
+    if (!state->created) {
         return;
     }
 
@@ -1405,6 +1501,9 @@
                                              common, common);
 
     if (state->aio_context) {
+        if (state->bs) {
+            bdrv_drained_end(state->bs);
+        }
         aio_context_release(state->aio_context);
     }
 }
@@ -1477,6 +1576,7 @@
     /* Acquire AioContext now so any threads operating on old_bs stop */
     state->aio_context = bdrv_get_aio_context(state->old_bs);
     aio_context_acquire(state->aio_context);
+    bdrv_drained_begin(state->old_bs);
 
     if (!bdrv_is_inserted(state->old_bs)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
@@ -1546,8 +1646,6 @@
      * don't want to abort all of them if one of them fails the reopen */
     bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
                 NULL);
-
-    aio_context_release(state->aio_context);
 }
 
 static void external_snapshot_abort(BlkTransactionState *common)
@@ -1557,7 +1655,14 @@
     if (state->new_bs) {
         bdrv_unref(state->new_bs);
     }
+}
+
+static void external_snapshot_clean(BlkTransactionState *common)
+{
+    ExternalSnapshotState *state =
+                             DO_UPCAST(ExternalSnapshotState, common, common);
     if (state->aio_context) {
+        bdrv_drained_end(state->old_bs);
         aio_context_release(state->aio_context);
     }
 }
@@ -1572,7 +1677,6 @@
 static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
 {
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
-    BlockDriverState *bs;
     BlockBackend *blk;
     DriveBackup *backup;
     Error *local_err = NULL;
@@ -1586,11 +1690,17 @@
                   "Device '%s' not found", backup->device);
         return;
     }
-    bs = blk_bs(blk);
+
+    if (!blk_is_available(blk)) {
+        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+        return;
+    }
 
     /* AioContext is released in .clean() */
-    state->aio_context = bdrv_get_aio_context(bs);
+    state->aio_context = blk_get_aio_context(blk);
     aio_context_acquire(state->aio_context);
+    bdrv_drained_begin(blk_bs(blk));
+    state->bs = blk_bs(blk);
 
     qmp_drive_backup(backup->device, backup->target,
                      backup->has_format, backup->format,
@@ -1606,7 +1716,6 @@
         return;
     }
 
-    state->bs = bs;
     state->job = state->bs->job;
 }
 
@@ -1626,6 +1735,7 @@
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
 
     if (state->aio_context) {
+        bdrv_drained_end(state->bs);
         aio_context_release(state->aio_context);
     }
 }
@@ -1641,8 +1751,7 @@
 {
     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
     BlockdevBackup *backup;
-    BlockDriverState *bs, *target;
-    BlockBackend *blk;
+    BlockBackend *blk, *target;
     Error *local_err = NULL;
 
     assert(common->action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
@@ -1653,23 +1762,28 @@
         error_setg(errp, "Device '%s' not found", backup->device);
         return;
     }
-    bs = blk_bs(blk);
 
-    blk = blk_by_name(backup->target);
-    if (!blk) {
+    if (!blk_is_available(blk)) {
+        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+        return;
+    }
+
+    target = blk_by_name(backup->target);
+    if (!target) {
         error_setg(errp, "Device '%s' not found", backup->target);
         return;
     }
-    target = blk_bs(blk);
 
     /* AioContext is released in .clean() */
-    state->aio_context = bdrv_get_aio_context(bs);
-    if (state->aio_context != bdrv_get_aio_context(target)) {
+    state->aio_context = blk_get_aio_context(blk);
+    if (state->aio_context != blk_get_aio_context(target)) {
         state->aio_context = NULL;
         error_setg(errp, "Backup between two IO threads is not implemented");
         return;
     }
     aio_context_acquire(state->aio_context);
+    state->bs = blk_bs(blk);
+    bdrv_drained_begin(state->bs);
 
     qmp_blockdev_backup(backup->device, backup->target,
                         backup->sync,
@@ -1682,7 +1796,6 @@
         return;
     }
 
-    state->bs = bs;
     state->job = state->bs->job;
 }
 
@@ -1702,6 +1815,7 @@
     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
 
     if (state->aio_context) {
+        bdrv_drained_end(state->bs);
         aio_context_release(state->aio_context);
     }
 }
@@ -1722,6 +1836,7 @@
         .prepare  = external_snapshot_prepare,
         .commit   = external_snapshot_commit,
         .abort = external_snapshot_abort,
+        .clean = external_snapshot_clean,
     },
     [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
         .instance_size = sizeof(DriveBackupState),
@@ -1820,6 +1935,11 @@
     BlockDriverState *bs = blk_bs(blk);
     AioContext *aio_context;
 
+    if (!bs) {
+        /* No medium inserted, so there is nothing to do */
+        return;
+    }
+
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
@@ -1886,7 +2006,8 @@
 }
 
 /* Assumes AioContext is held */
-static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
+static void qmp_bdrv_open_encrypted(BlockDriverState **pbs,
+                                    const char *filename,
                                     int bdrv_flags, const char *format,
                                     const char *password, Error **errp)
 {
@@ -1899,13 +2020,13 @@
         qdict_put(options, "driver", qstring_from_str(format));
     }
 
-    ret = bdrv_open(&bs, filename, NULL, options, bdrv_flags, &local_err);
+    ret = bdrv_open(pbs, filename, NULL, options, bdrv_flags, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         return;
     }
 
-    bdrv_add_key(bs, password, errp);
+    bdrv_add_key(*pbs, password, errp);
 }
 
 void qmp_change_blockdev(const char *device, const char *filename,
@@ -1915,6 +2036,7 @@
     BlockDriverState *bs;
     AioContext *aio_context;
     int bdrv_flags;
+    bool new_bs;
     Error *err = NULL;
 
     blk = blk_by_name(device);
@@ -1924,8 +2046,9 @@
         return;
     }
     bs = blk_bs(blk);
+    new_bs = !bs;
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
     eject_device(blk, 0, &err);
@@ -1934,10 +2057,21 @@
         goto out;
     }
 
-    bdrv_flags = bdrv_is_read_only(bs) ? 0 : BDRV_O_RDWR;
-    bdrv_flags |= bdrv_is_snapshot(bs) ? BDRV_O_SNAPSHOT : 0;
+    bdrv_flags = blk_is_read_only(blk) ? 0 : BDRV_O_RDWR;
+    bdrv_flags |= blk_get_root_state(blk)->open_flags & ~BDRV_O_RDWR;
 
-    qmp_bdrv_open_encrypted(bs, filename, bdrv_flags, format, NULL, errp);
+    qmp_bdrv_open_encrypted(&bs, filename, bdrv_flags, format, NULL, &err);
+    if (err) {
+        error_propagate(errp, err);
+        goto out;
+    }
+
+    if (new_bs) {
+        blk_insert_bs(blk, bs);
+        /* Has been sent automatically by bdrv_open() if blk_bs(blk) was not
+         * NULL */
+        blk_dev_change_media_cb(blk, true);
+    }
 
 out:
     aio_context_release(aio_context);
@@ -1977,7 +2111,15 @@
                   "Device '%s' not found", device);
         return;
     }
+
+    aio_context = blk_get_aio_context(blk);
+    aio_context_acquire(aio_context);
+
     bs = blk_bs(blk);
+    if (!bs) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
 
     memset(&cfg, 0, sizeof(cfg));
     cfg.buckets[THROTTLE_BPS_TOTAL].avg = bps;
@@ -2012,12 +2154,9 @@
     }
 
     if (!check_throttle_config(&cfg, errp)) {
-        return;
+        goto out;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
-
     if (throttle_enabled(&cfg)) {
         /* Enable I/O limits if they're not enabled yet, otherwise
          * just update the throttling group. */
@@ -2033,6 +2172,7 @@
         bdrv_io_limits_disable(bs);
     }
 
+out:
     aio_context_release(aio_context);
 }
 
@@ -2145,7 +2285,6 @@
         error_report("Device '%s' not found", id);
         return;
     }
-    bs = blk_bs(blk);
 
     if (!blk_legacy_dinfo(blk)) {
         error_report("Deleting device added with blockdev-add"
@@ -2153,16 +2292,19 @@
         return;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
-        error_report_err(local_err);
-        aio_context_release(aio_context);
-        return;
-    }
+    bs = blk_bs(blk);
+    if (bs) {
+        if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
+            error_report_err(local_err);
+            aio_context_release(aio_context);
+            return;
+        }
 
-    bdrv_close(bs);
+        bdrv_close(bs);
+    }
 
     /* if we have a device attached to this BlockDriverState
      * then we need to make the drive anonymous until the device
@@ -2172,8 +2314,8 @@
     if (blk_get_attached_dev(blk)) {
         blk_hide_on_behalf_of_hmp_drive_del(blk);
         /* Further I/O must not pause the guest */
-        bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
-                          BLOCKDEV_ON_ERROR_REPORT);
+        blk_set_on_error(blk, BLOCKDEV_ON_ERROR_REPORT,
+                         BLOCKDEV_ON_ERROR_REPORT);
     } else {
         blk_unref(blk);
     }
@@ -2295,11 +2437,16 @@
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
+    bs = blk_bs(blk);
+
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
         goto out;
     }
@@ -2370,11 +2517,16 @@
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
+    bs = blk_bs(blk);
+
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) {
         goto out;
     }
@@ -2480,17 +2632,17 @@
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
     /* Although backup_run has this check too, we need to use bs->drv below, so
      * do an early check redundantly. */
-    if (!bdrv_is_inserted(bs)) {
+    if (!blk_is_available(blk)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
         goto out;
     }
+    bs = blk_bs(blk);
 
     if (!has_format) {
         format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
@@ -2587,7 +2739,7 @@
                          BlockdevOnError on_target_error,
                          Error **errp)
 {
-    BlockBackend *blk;
+    BlockBackend *blk, *target_blk;
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     Error *local_err = NULL;
@@ -2608,17 +2760,27 @@
         error_setg(errp, "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
-    blk = blk_by_name(target);
-    if (!blk) {
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
+    bs = blk_bs(blk);
+
+    target_blk = blk_by_name(target);
+    if (!target_blk) {
         error_setg(errp, "Device '%s' not found", target);
         goto out;
     }
-    target_bs = blk_bs(blk);
+
+    if (!blk_is_available(target_blk)) {
+        error_setg(errp, "Device '%s' has no medium", target);
+        goto out;
+    }
+    target_bs = blk_bs(target_blk);
 
     bdrv_ref(target_bs);
     bdrv_set_aio_context(target_bs, aio_context);
@@ -2695,15 +2857,15 @@
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
-    if (!bdrv_is_inserted(bs)) {
+    if (!blk_is_available(blk)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
         goto out;
     }
+    bs = blk_bs(blk);
 
     if (!has_format) {
         format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
@@ -2833,17 +2995,22 @@
     BlockBackend *blk;
     BlockDriverState *bs;
 
+    *aio_context = NULL;
+
     blk = blk_by_name(device);
     if (!blk) {
         goto notfound;
     }
-    bs = blk_bs(blk);
 
-    *aio_context = bdrv_get_aio_context(bs);
+    *aio_context = blk_get_aio_context(blk);
     aio_context_acquire(*aio_context);
 
+    if (!blk_is_available(blk)) {
+        goto notfound;
+    }
+    bs = blk_bs(blk);
+
     if (!bs->job) {
-        aio_context_release(*aio_context);
         goto notfound;
     }
 
@@ -2852,7 +3019,10 @@
 notfound:
     error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
               "No active block job on device '%s'", device);
-    *aio_context = NULL;
+    if (*aio_context) {
+        aio_context_release(*aio_context);
+        *aio_context = NULL;
+    }
     return NULL;
 }
 
@@ -2959,11 +3129,16 @@
                   "Device '%s' not found", device);
         return;
     }
-    bs = blk_bs(blk);
 
-    aio_context = bdrv_get_aio_context(bs);
+    aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
+    if (!blk_is_available(blk)) {
+        error_setg(errp, "Device '%s' has no medium", device);
+        goto out;
+    }
+    bs = blk_bs(blk);
+
     image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -3030,17 +3205,12 @@
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
     QmpOutputVisitor *ov = qmp_output_visitor_new();
-    BlockBackend *blk;
+    BlockDriverState *bs;
+    BlockBackend *blk = NULL;
     QObject *obj;
     QDict *qdict;
     Error *local_err = NULL;
 
-    /* Require an ID in the top level */
-    if (!options->has_id) {
-        error_setg(errp, "Block device needs an ID");
-        goto fail;
-    }
-
     /* TODO Sort it out in raw-posix and drive_new(): Reject aio=native with
      * cache.direct=false instead of silently switching to aio=threads, except
      * when called from drive_new().
@@ -3068,14 +3238,33 @@
 
     qdict_flatten(qdict);
 
-    blk = blockdev_init(NULL, qdict, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        goto fail;
+    if (options->has_id) {
+        blk = blockdev_init(NULL, qdict, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            goto fail;
+        }
+
+        bs = blk_bs(blk);
+    } else {
+        if (!qdict_get_try_str(qdict, "node-name")) {
+            error_setg(errp, "'id' and/or 'node-name' need to be specified for "
+                       "the root node");
+            goto fail;
+        }
+
+        bs = bds_tree_init(qdict, errp);
+        if (!bs) {
+            goto fail;
+        }
     }
 
-    if (bdrv_key_required(blk_bs(blk))) {
-        blk_unref(blk);
+    if (bs && bdrv_key_required(bs)) {
+        if (blk) {
+            blk_unref(blk);
+        } else {
+            bdrv_unref(bs);
+        }
         error_setg(errp, "blockdev-add doesn't support encrypted devices");
         goto fail;
     }
@@ -3220,6 +3409,47 @@
     },
 };
 
+static QemuOptsList qemu_root_bds_opts = {
+    .name = "root-bds",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
+    .desc = {
+        {
+            .name = "discard",
+            .type = QEMU_OPT_STRING,
+            .help = "discard operation (ignore/off, unmap/on)",
+        },{
+            .name = "cache.writeback",
+            .type = QEMU_OPT_BOOL,
+            .help = "enables writeback mode for any caches",
+        },{
+            .name = "cache.direct",
+            .type = QEMU_OPT_BOOL,
+            .help = "enables use of O_DIRECT (bypass the host page cache)",
+        },{
+            .name = "cache.no-flush",
+            .type = QEMU_OPT_BOOL,
+            .help = "ignore any flush requests for the device",
+        },{
+            .name = "aio",
+            .type = QEMU_OPT_STRING,
+            .help = "host AIO implementation (threads, native)",
+        },{
+            .name = "read-only",
+            .type = QEMU_OPT_BOOL,
+            .help = "open drive file as read-only",
+        },{
+            .name = "copy-on-read",
+            .type = QEMU_OPT_BOOL,
+            .help = "copy read data from backing file into image file",
+        },{
+            .name = "detect-zeroes",
+            .type = QEMU_OPT_STRING,
+            .help = "try to optimize zero writes (off, on, unmap)",
+        },
+        { /* end of list */ }
+    },
+};
+
 QemuOptsList qemu_drive_opts = {
     .name = "drive",
     .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),

diff --git a/blockjob.c b/blockjob.c
index 1da5491..c02fe59 100644
--- a/blockjob.c
+++ b/blockjob.c

@@ -29,6 +29,7 @@
 #include "block/block.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
 #include "qemu/coroutine.h"
@@ -354,8 +355,8 @@
         job->user_paused = true;
         block_job_pause(job);
         block_job_iostatus_set_err(job, error);
-        if (bs != job->bs) {
-            bdrv_iostatus_set_err(bs, error);
+        if (bs->blk && bs != job->bs) {
+            blk_iostatus_set_err(bs->blk, error);
         }
     }
     return action;

diff --git a/configure b/configure
index 211bc6e..7a1d08d 100755
--- a/configure
+++ b/configure

@@ -331,6 +331,8 @@
 gtk_gl="no"
 gnutls=""
 gnutls_hash=""
+nettle=""
+gcrypt=""
 vte=""
 virglrenderer=""
 tpm="yes"
@@ -417,9 +419,6 @@
     LDFLAGS="-g $LDFLAGS"
 fi
 
-test_cflags=""
-test_libs=""
-
 # make source path absolute
 source_path=`cd "$source_path"; pwd`
 
@@ -1114,6 +1113,14 @@
   ;;
   --enable-gnutls) gnutls="yes"
   ;;
+  --disable-nettle) nettle="no"
+  ;;
+  --enable-nettle) nettle="yes"
+  ;;
+  --disable-gcrypt) gcrypt="no"
+  ;;
+  --enable-gcrypt) gcrypt="yes"
+  ;;
   --enable-rdma) rdma="yes"
   ;;
   --disable-rdma) rdma="no"
@@ -1324,6 +1331,8 @@
   sparse          sparse checker
 
   gnutls          GNUTLS cryptography support
+  nettle          nettle cryptography support
+  gcrypt          libgcrypt cryptography support
   sdl             SDL UI
   --with-sdlabi     select preferred SDL ABI 1.2 or 2.0
   gtk             gtk UI
@@ -2254,20 +2263,76 @@
     gnutls_hash="no"
 fi
 
-if test "$gnutls_gcrypt" != "no"; then
-    if has "libgcrypt-config"; then
+
+# If user didn't give a --disable/enable-gcrypt flag,
+# then mark as disabled if user requested nettle
+# explicitly, or if gnutls links to nettle
+if test -z "$gcrypt"
+then
+    if test "$nettle" = "yes" || test "$gnutls_nettle" = "yes"
+    then
+        gcrypt="no"
+    fi
+fi
+
+# If user didn't give a --disable/enable-nettle flag,
+# then mark as disabled if user requested gcrypt
+# explicitly, or if gnutls links to gcrypt
+if test -z "$nettle"
+then
+    if test "$gcrypt" = "yes" || test "$gnutls_gcrypt" = "yes"
+    then
+        nettle="no"
+    fi
+fi
+
+has_libgcrypt_config() {
+    if ! has "libgcrypt-config"
+    then
+	return 1
+    fi
+
+    if test -n "$cross_prefix"
+    then
+	host=`libgcrypt-config --host`
+	if test "$host-" != $cross_prefix
+	then
+	    return 1
+	fi
+    fi
+
+    return 0
+}
+
+if test "$gcrypt" != "no"; then
+    if has_libgcrypt_config; then
         gcrypt_cflags=`libgcrypt-config --cflags`
         gcrypt_libs=`libgcrypt-config --libs`
+        # Debian has remove -lgpg-error from libgcrypt-config
+        # as it "spreads unnecessary dependencies" which in
+        # turn breaks static builds...
+        if test "$static" = "yes"
+        then
+            gcrypt_libs="$gcrypt_libs -lgpg-error"
+        fi
         libs_softmmu="$gcrypt_libs $libs_softmmu"
         libs_tools="$gcrypt_libs $libs_tools"
         QEMU_CFLAGS="$QEMU_CFLAGS $gcrypt_cflags"
+        gcrypt="yes"
+        if test -z "$nettle"; then
+           nettle="no"
+        fi
     else
-        feature_not_found "gcrypt" "Install gcrypt devel"
+        if test "$gcrypt" = "yes"; then
+            feature_not_found "gcrypt" "Install gcrypt devel"
+        else
+            gcrypt="no"
+        fi
     fi
 fi
 
 
-if test "$gnutls_nettle" != "no"; then
+if test "$nettle" != "no"; then
     if $pkg_config --exists "nettle"; then
         nettle_cflags=`$pkg_config --cflags nettle`
         nettle_libs=`$pkg_config --libs nettle`
@@ -2275,20 +2340,30 @@
         libs_softmmu="$nettle_libs $libs_softmmu"
         libs_tools="$nettle_libs $libs_tools"
         QEMU_CFLAGS="$QEMU_CFLAGS $nettle_cflags"
+        nettle="yes"
     else
-        feature_not_found "nettle" "Install nettle devel"
+        if test "$nettle" = "yes"; then
+            feature_not_found "nettle" "Install nettle devel"
+        else
+            nettle="no"
+        fi
     fi
 fi
 
+if test "$gcrypt" = "yes" && test "$nettle" = "yes"
+then
+    error_exit "Only one of gcrypt & nettle can be enabled"
+fi
+
 ##########################################
 # libtasn1 - only for the TLS creds/session test suite
 
 tasn1=yes
+tasn1_cflags=""
+tasn1_libs=""
 if $pkg_config --exists "libtasn1"; then
     tasn1_cflags=`$pkg_config --cflags libtasn1`
     tasn1_libs=`$pkg_config --libs libtasn1`
-    test_cflags="$test_cflags $tasn1_cflags"
-    test_libs="$test_libs $tasn1_libs"
 else
     tasn1=no
 fi
@@ -4433,6 +4508,7 @@
   tools="qemu-img\$(EXESUF) qemu-io\$(EXESUF) $tools"
   if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then
     tools="qemu-nbd\$(EXESUF) $tools"
+    tools="ivshmem-client\$(EXESUF) ivshmem-server\$(EXESUF) $tools"
   fi
 fi
 if test "$softmmu" = yes ; then
@@ -4621,8 +4697,8 @@
 echo "GTK GL support    $gtk_gl"
 echo "GNUTLS support    $gnutls"
 echo "GNUTLS hash       $gnutls_hash"
-echo "GNUTLS gcrypt     $gnutls_gcrypt"
-echo "GNUTLS nettle     $gnutls_nettle ${gnutls_nettle+($nettle_version)}"
+echo "libgcrypt         $gcrypt"
+echo "nettle            $nettle ${nettle+($nettle_version)}"
 echo "libtasn1          $tasn1"
 echo "VTE support       $vte"
 echo "curses support    $curses"
@@ -4991,11 +5067,11 @@
 if test "$gnutls_hash" = "yes" ; then
   echo "CONFIG_GNUTLS_HASH=y" >> $config_host_mak
 fi
-if test "$gnutls_gcrypt" = "yes" ; then
-  echo "CONFIG_GNUTLS_GCRYPT=y" >> $config_host_mak
+if test "$gcrypt" = "yes" ; then
+  echo "CONFIG_GCRYPT=y" >> $config_host_mak
 fi
-if test "$gnutls_nettle" = "yes" ; then
-  echo "CONFIG_GNUTLS_NETTLE=y" >> $config_host_mak
+if test "$nettle" = "yes" ; then
+  echo "CONFIG_NETTLE=y" >> $config_host_mak
   echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak
 fi
 if test "$tasn1" = "yes" ; then
@@ -5330,8 +5406,8 @@
 echo "DSOSUF=$DSOSUF" >> $config_host_mak
 echo "LDFLAGS_SHARED=$LDFLAGS_SHARED" >> $config_host_mak
 echo "LIBS_QGA+=$libs_qga" >> $config_host_mak
-echo "TEST_LIBS=$test_libs" >> $config_host_mak
-echo "TEST_CFLAGS=$test_cflags" >> $config_host_mak
+echo "TASN1_LIBS=$tasn1_libs" >> $config_host_mak
+echo "TASN1_CFLAGS=$tasn1_cflags" >> $config_host_mak
 echo "POD2MAN=$POD2MAN" >> $config_host_mak
 echo "TRANSLATE_OPT_CFLAGS=$TRANSLATE_OPT_CFLAGS" >> $config_host_mak
 if test "$gcov" = "yes" ; then

diff --git a/contrib/ivshmem-client/Makefile.objs b/contrib/ivshmem-client/Makefile.objs
new file mode 100644
index 0000000..bfab2d2
--- /dev/null
+++ b/contrib/ivshmem-client/Makefile.objs

@@ -0,0 +1 @@
+ivshmem-client-obj-y = ivshmem-client.o main.o

diff --git a/contrib/ivshmem-client/ivshmem-client.c b/contrib/ivshmem-client/ivshmem-client.c
new file mode 100644
index 0000000..31619d8
--- /dev/null
+++ b/contrib/ivshmem-client/ivshmem-client.c

@@ -0,0 +1,446 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "qemu-common.h"
+#include "qemu/queue.h"
+
+#include "ivshmem-client.h"
+
+/* log a message on stdout if verbose=1 */
+#define IVSHMEM_CLIENT_DEBUG(client, fmt, ...) do { \
+        if ((client)->verbose) {         \
+            printf(fmt, ## __VA_ARGS__); \
+        }                                \
+    } while (0)
+
+/* read message from the unix socket */
+static int
+ivshmem_client_read_one_msg(IvshmemClient *client, int64_t *index, int *fd)
+{
+    int ret;
+    struct msghdr msg;
+    struct iovec iov[1];
+    union {
+        struct cmsghdr cmsg;
+        char control[CMSG_SPACE(sizeof(int))];
+    } msg_control;
+    struct cmsghdr *cmsg;
+
+    iov[0].iov_base = index;
+    iov[0].iov_len = sizeof(*index);
+
+    memset(&msg, 0, sizeof(msg));
+    msg.msg_iov = iov;
+    msg.msg_iovlen = 1;
+    msg.msg_control = &msg_control;
+    msg.msg_controllen = sizeof(msg_control);
+
+    ret = recvmsg(client->sock_fd, &msg, 0);
+    if (ret < sizeof(*index)) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot read message: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+    if (ret == 0) {
+        IVSHMEM_CLIENT_DEBUG(client, "lost connection to server\n");
+        return -1;
+    }
+
+    *index = GINT64_FROM_LE(*index);
+    *fd = -1;
+
+    for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+
+        if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)) ||
+            cmsg->cmsg_level != SOL_SOCKET ||
+            cmsg->cmsg_type != SCM_RIGHTS) {
+            continue;
+        }
+
+        memcpy(fd, CMSG_DATA(cmsg), sizeof(*fd));
+    }
+
+    return 0;
+}
+
+/* free a peer when the server advertises a disconnection or when the
+ * client is freed */
+static void
+ivshmem_client_free_peer(IvshmemClient *client, IvshmemClientPeer *peer)
+{
+    unsigned vector;
+
+    QTAILQ_REMOVE(&client->peer_list, peer, next);
+    for (vector = 0; vector < peer->vectors_count; vector++) {
+        close(peer->vectors[vector]);
+    }
+
+    g_free(peer);
+}
+
+/* handle message coming from server (new peer, new vectors) */
+static int
+ivshmem_client_handle_server_msg(IvshmemClient *client)
+{
+    IvshmemClientPeer *peer;
+    int64_t peer_id;
+    int ret, fd;
+
+    ret = ivshmem_client_read_one_msg(client, &peer_id, &fd);
+    if (ret < 0) {
+        return -1;
+    }
+
+    /* can return a peer or the local client */
+    peer = ivshmem_client_search_peer(client, peer_id);
+
+    /* delete peer */
+    if (fd == -1) {
+
+        if (peer == NULL || peer == &client->local) {
+            IVSHMEM_CLIENT_DEBUG(client, "receive delete for invalid "
+                                 "peer %" PRId64 "\n", peer_id);
+            return -1;
+        }
+
+        IVSHMEM_CLIENT_DEBUG(client, "delete peer id = %" PRId64 "\n", peer_id);
+        ivshmem_client_free_peer(client, peer);
+        return 0;
+    }
+
+    /* new peer */
+    if (peer == NULL) {
+        peer = g_malloc0(sizeof(*peer));
+        peer->id = peer_id;
+        peer->vectors_count = 0;
+        QTAILQ_INSERT_TAIL(&client->peer_list, peer, next);
+        IVSHMEM_CLIENT_DEBUG(client, "new peer id = %" PRId64 "\n", peer_id);
+    }
+
+    /* new vector */
+    IVSHMEM_CLIENT_DEBUG(client, "  new vector %d (fd=%d) for peer id %"
+                         PRId64 "\n", peer->vectors_count, fd, peer->id);
+    if (peer->vectors_count >= G_N_ELEMENTS(peer->vectors)) {
+        IVSHMEM_CLIENT_DEBUG(client, "Too many vectors received, failing");
+        return -1;
+    }
+
+    peer->vectors[peer->vectors_count] = fd;
+    peer->vectors_count++;
+
+    return 0;
+}
+
+/* init a new ivshmem client */
+int
+ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path,
+                    IvshmemClientNotifCb notif_cb, void *notif_arg,
+                    bool verbose)
+{
+    int ret;
+    unsigned i;
+
+    memset(client, 0, sizeof(*client));
+
+    ret = snprintf(client->unix_sock_path, sizeof(client->unix_sock_path),
+                   "%s", unix_sock_path);
+
+    if (ret < 0 || ret >= sizeof(client->unix_sock_path)) {
+        IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n");
+        return -1;
+    }
+
+    for (i = 0; i < IVSHMEM_CLIENT_MAX_VECTORS; i++) {
+        client->local.vectors[i] = -1;
+    }
+
+    QTAILQ_INIT(&client->peer_list);
+    client->local.id = -1;
+
+    client->notif_cb = notif_cb;
+    client->notif_arg = notif_arg;
+    client->verbose = verbose;
+    client->shm_fd = -1;
+    client->sock_fd = -1;
+
+    return 0;
+}
+
+/* create and connect to the unix socket */
+int
+ivshmem_client_connect(IvshmemClient *client)
+{
+    struct sockaddr_un sun;
+    int fd, ret;
+    int64_t tmp;
+
+    IVSHMEM_CLIENT_DEBUG(client, "connect to client %s\n",
+                         client->unix_sock_path);
+
+    client->sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (client->sock_fd < 0) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot create socket: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+
+    sun.sun_family = AF_UNIX;
+    ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
+                   client->unix_sock_path);
+    if (ret < 0 || ret >= sizeof(sun.sun_path)) {
+        IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n");
+        goto err_close;
+    }
+
+    if (connect(client->sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot connect to %s: %s\n", sun.sun_path,
+                             strerror(errno));
+        goto err_close;
+    }
+
+    /* first, we expect a protocol version */
+    if (ivshmem_client_read_one_msg(client, &tmp, &fd) < 0 ||
+        (tmp != IVSHMEM_PROTOCOL_VERSION) || fd != -1) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot read from server\n");
+        goto err_close;
+    }
+
+    /* then, we expect our index + a fd == -1 */
+    if (ivshmem_client_read_one_msg(client, &client->local.id, &fd) < 0 ||
+        client->local.id < 0 || fd != -1) {
+        IVSHMEM_CLIENT_DEBUG(client, "cannot read from server (2)\n");
+        goto err_close;
+    }
+    IVSHMEM_CLIENT_DEBUG(client, "our_id=%" PRId64 "\n", client->local.id);
+
+    /* now, we expect shared mem fd + a -1 index, note that shm fd
+     * is not used */
+    if (ivshmem_client_read_one_msg(client, &tmp, &fd) < 0 ||
+        tmp != -1 || fd < 0) {
+        if (fd >= 0) {
+            close(fd);
+        }
+        IVSHMEM_CLIENT_DEBUG(client, "cannot read from server (3)\n");
+        goto err_close;
+    }
+    client->shm_fd = fd;
+    IVSHMEM_CLIENT_DEBUG(client, "shm_fd=%d\n", fd);
+
+    return 0;
+
+err_close:
+    close(client->sock_fd);
+    client->sock_fd = -1;
+    return -1;
+}
+
+/* close connection to the server, and free all peer structures */
+void
+ivshmem_client_close(IvshmemClient *client)
+{
+    IvshmemClientPeer *peer;
+    unsigned i;
+
+    IVSHMEM_CLIENT_DEBUG(client, "close client\n");
+
+    while ((peer = QTAILQ_FIRST(&client->peer_list)) != NULL) {
+        ivshmem_client_free_peer(client, peer);
+    }
+
+    close(client->shm_fd);
+    client->shm_fd = -1;
+    close(client->sock_fd);
+    client->sock_fd = -1;
+    client->local.id = -1;
+    for (i = 0; i < IVSHMEM_CLIENT_MAX_VECTORS; i++) {
+        close(client->local.vectors[i]);
+        client->local.vectors[i] = -1;
+    }
+    client->local.vectors_count = 0;
+}
+
+/* get the fd_set according to the unix socket and peer list */
+void
+ivshmem_client_get_fds(const IvshmemClient *client, fd_set *fds, int *maxfd)
+{
+    int fd;
+    unsigned vector;
+
+    FD_SET(client->sock_fd, fds);
+    if (client->sock_fd >= *maxfd) {
+        *maxfd = client->sock_fd + 1;
+    }
+
+    for (vector = 0; vector < client->local.vectors_count; vector++) {
+        fd = client->local.vectors[vector];
+        FD_SET(fd, fds);
+        if (fd >= *maxfd) {
+            *maxfd = fd + 1;
+        }
+    }
+}
+
+/* handle events from eventfd: just print a message on notification */
+static int
+ivshmem_client_handle_event(IvshmemClient *client, const fd_set *cur, int maxfd)
+{
+    IvshmemClientPeer *peer;
+    uint64_t kick;
+    unsigned i;
+    int ret;
+
+    peer = &client->local;
+
+    for (i = 0; i < peer->vectors_count; i++) {
+        if (peer->vectors[i] >= maxfd || !FD_ISSET(peer->vectors[i], cur)) {
+            continue;
+        }
+
+        ret = read(peer->vectors[i], &kick, sizeof(kick));
+        if (ret < 0) {
+            return ret;
+        }
+        if (ret != sizeof(kick)) {
+            IVSHMEM_CLIENT_DEBUG(client, "invalid read size = %d\n", ret);
+            errno = EINVAL;
+            return -1;
+        }
+        IVSHMEM_CLIENT_DEBUG(client, "received event on fd %d vector %d: %"
+                             PRIu64 "\n", peer->vectors[i], i, kick);
+        if (client->notif_cb != NULL) {
+            client->notif_cb(client, peer, i, client->notif_arg);
+        }
+    }
+
+    return 0;
+}
+
+/* read and handle new messages on the given fd_set */
+int
+ivshmem_client_handle_fds(IvshmemClient *client, fd_set *fds, int maxfd)
+{
+    if (client->sock_fd < maxfd && FD_ISSET(client->sock_fd, fds) &&
+        ivshmem_client_handle_server_msg(client) < 0 && errno != EINTR) {
+        IVSHMEM_CLIENT_DEBUG(client, "ivshmem_client_handle_server_msg() "
+                             "failed\n");
+        return -1;
+    } else if (ivshmem_client_handle_event(client, fds, maxfd) < 0 &&
+               errno != EINTR) {
+        IVSHMEM_CLIENT_DEBUG(client, "ivshmem_client_handle_event() failed\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+/* send a notification on a vector of a peer */
+int
+ivshmem_client_notify(const IvshmemClient *client,
+                      const IvshmemClientPeer *peer, unsigned vector)
+{
+    uint64_t kick;
+    int fd;
+
+    if (vector >= peer->vectors_count) {
+        IVSHMEM_CLIENT_DEBUG(client, "invalid vector %u on peer %" PRId64 "\n",
+                             vector, peer->id);
+        return -1;
+    }
+    fd = peer->vectors[vector];
+    IVSHMEM_CLIENT_DEBUG(client, "notify peer %" PRId64
+                         " on vector %d, fd %d\n", peer->id, vector, fd);
+
+    kick = 1;
+    if (write(fd, &kick, sizeof(kick)) != sizeof(kick)) {
+        fprintf(stderr, "could not write to %d: %s\n", peer->vectors[vector],
+                strerror(errno));
+        return -1;
+    }
+    return 0;
+}
+
+/* send a notification to all vectors of a peer */
+int
+ivshmem_client_notify_all_vects(const IvshmemClient *client,
+                                const IvshmemClientPeer *peer)
+{
+    unsigned vector;
+    int ret = 0;
+
+    for (vector = 0; vector < peer->vectors_count; vector++) {
+        if (ivshmem_client_notify(client, peer, vector) < 0) {
+            ret = -1;
+        }
+    }
+
+    return ret;
+}
+
+/* send a notification to all peers */
+int
+ivshmem_client_notify_broadcast(const IvshmemClient *client)
+{
+    IvshmemClientPeer *peer;
+    int ret = 0;
+
+    QTAILQ_FOREACH(peer, &client->peer_list, next) {
+        if (ivshmem_client_notify_all_vects(client, peer) < 0) {
+            ret = -1;
+        }
+    }
+
+    return ret;
+}
+
+/* lookup peer from its id */
+IvshmemClientPeer *
+ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id)
+{
+    IvshmemClientPeer *peer;
+
+    if (peer_id == client->local.id) {
+        return &client->local;
+    }
+
+    QTAILQ_FOREACH(peer, &client->peer_list, next) {
+        if (peer->id == peer_id) {
+            return peer;
+        }
+    }
+    return NULL;
+}
+
+/* dump our info, the list of peers their vectors on stdout */
+void
+ivshmem_client_dump(const IvshmemClient *client)
+{
+    const IvshmemClientPeer *peer;
+    unsigned vector;
+
+    /* dump local infos */
+    peer = &client->local;
+    printf("our_id = %" PRId64 "\n", peer->id);
+    for (vector = 0; vector < peer->vectors_count; vector++) {
+        printf("  vector %d is enabled (fd=%d)\n", vector,
+               peer->vectors[vector]);
+    }
+
+    /* dump peers */
+    QTAILQ_FOREACH(peer, &client->peer_list, next) {
+        printf("peer_id = %" PRId64 "\n", peer->id);
+
+        for (vector = 0; vector < peer->vectors_count; vector++) {
+            printf("  vector %d is enabled (fd=%d)\n", vector,
+                   peer->vectors[vector]);
+        }
+    }
+}

diff --git a/contrib/ivshmem-client/ivshmem-client.h b/contrib/ivshmem-client/ivshmem-client.h
new file mode 100644
index 0000000..3a4f809
--- /dev/null
+++ b/contrib/ivshmem-client/ivshmem-client.h

@@ -0,0 +1,213 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef _IVSHMEM_CLIENT_H_
+#define _IVSHMEM_CLIENT_H_
+
+/**
+ * This file provides helper to implement an ivshmem client. It is used
+ * on the host to ask QEMU to send an interrupt to an ivshmem PCI device in a
+ * guest. QEMU also implements an ivshmem client similar to this one, they both
+ * connect to an ivshmem server.
+ *
+ * A standalone ivshmem client based on this file is provided for debug/test
+ * purposes.
+ */
+
+#include <limits.h>
+#include <sys/select.h>
+
+#include "qemu/queue.h"
+#include "hw/misc/ivshmem.h"
+
+/**
+ * Maximum number of notification vectors supported by the client
+ */
+#define IVSHMEM_CLIENT_MAX_VECTORS 64
+
+/**
+ * Structure storing a peer
+ *
+ * Each time a client connects to an ivshmem server, it is advertised to
+ * all connected clients through the unix socket. When our ivshmem
+ * client receives a notification, it creates a IvshmemClientPeer
+ * structure to store the infos of this peer.
+ *
+ * This structure is also used to store the information of our own
+ * client in (IvshmemClient)->local.
+ */
+typedef struct IvshmemClientPeer {
+    QTAILQ_ENTRY(IvshmemClientPeer) next;    /**< next in list*/
+    int64_t id;                              /**< the id of the peer */
+    int vectors[IVSHMEM_CLIENT_MAX_VECTORS]; /**< one fd per vector */
+    unsigned vectors_count;                  /**< number of vectors */
+} IvshmemClientPeer;
+QTAILQ_HEAD(IvshmemClientPeerList, IvshmemClientPeer);
+
+typedef struct IvshmemClientPeerList IvshmemClientPeerList;
+typedef struct IvshmemClient IvshmemClient;
+
+/**
+ * Typedef of callback function used when our IvshmemClient receives a
+ * notification from a peer.
+ */
+typedef void (*IvshmemClientNotifCb)(
+    const IvshmemClient *client,
+    const IvshmemClientPeer *peer,
+    unsigned vect, void *arg);
+
+/**
+ * Structure describing an ivshmem client
+ *
+ * This structure stores all information related to our client: the name
+ * of the server unix socket, the list of peers advertised by the
+ * server, our own client information, and a pointer the notification
+ * callback function used when we receive a notification from a peer.
+ */
+struct IvshmemClient {
+    char unix_sock_path[PATH_MAX];      /**< path to unix sock */
+    int sock_fd;                        /**< unix sock filedesc */
+    int shm_fd;                         /**< shm file descriptor */
+
+    IvshmemClientPeerList peer_list;    /**< list of peers */
+    IvshmemClientPeer local;            /**< our own infos */
+
+    IvshmemClientNotifCb notif_cb;      /**< notification callback */
+    void *notif_arg;                    /**< notification argument */
+
+    bool verbose;                       /**< true to enable debug */
+};
+
+/**
+ * Initialize an ivshmem client
+ *
+ * @client:         A pointer to an uninitialized IvshmemClient structure
+ * @unix_sock_path: The pointer to the unix socket file name
+ * @notif_cb:       If not NULL, the pointer to the function to be called when
+ *                  our IvshmemClient receives a notification from a peer
+ * @notif_arg:      Opaque pointer given as-is to the notification callback
+ *                  function
+ * @verbose:        True to enable debug
+ *
+ * Returns:         0 on success, or a negative value on error
+ */
+int ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path,
+                        IvshmemClientNotifCb notif_cb, void *notif_arg,
+                        bool verbose);
+
+/**
+ * Connect to the server
+ *
+ * Connect to the server unix socket, and read the first initial
+ * messages sent by the server, giving the ID of the client and the file
+ * descriptor of the shared memory.
+ *
+ * @client: The ivshmem client
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_connect(IvshmemClient *client);
+
+/**
+ * Close connection to the server and free all peer structures
+ *
+ * @client: The ivshmem client
+ */
+void ivshmem_client_close(IvshmemClient *client);
+
+/**
+ * Fill a fd_set with file descriptors to be monitored
+ *
+ * This function will fill a fd_set with all file descriptors
+ * that must be polled (unix server socket and peers eventfd). The
+ * function will not initialize the fd_set, it is up to the caller
+ * to do this.
+ *
+ * @client: The ivshmem client
+ * @fds:    The fd_set to be updated
+ * @maxfd:  Must be set to the max file descriptor + 1 in fd_set. This value is
+ *          updated if this function adds a greater fd in fd_set.
+ */
+void ivshmem_client_get_fds(const IvshmemClient *client, fd_set *fds,
+                            int *maxfd);
+
+/**
+ * Read and handle new messages
+ *
+ * Given a fd_set filled by select(), handle incoming messages from
+ * server or peers.
+ *
+ * @client: The ivshmem client
+ * @fds:    The fd_set containing the file descriptors to be checked. Note
+ *          that file descriptors that are not related to our client are
+ *          ignored.
+ * @maxfd:  The maximum fd in fd_set, plus one.
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_handle_fds(IvshmemClient *client, fd_set *fds, int maxfd);
+
+/**
+ * Send a notification to a vector of a peer
+ *
+ * @client: The ivshmem client
+ * @peer:   The peer to be notified
+ * @vector: The number of the vector
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_notify(const IvshmemClient *client,
+                          const IvshmemClientPeer *peer, unsigned vector);
+
+/**
+ * Send a notification to all vectors of a peer
+ *
+ * @client: The ivshmem client
+ * @peer:   The peer to be notified
+ *
+ * Returns: 0 on success, or a negative value on error (at least one
+ *          notification failed)
+ */
+int ivshmem_client_notify_all_vects(const IvshmemClient *client,
+                                    const IvshmemClientPeer *peer);
+
+/**
+ * Broadcat a notification to all vectors of all peers
+ *
+ * @client: The ivshmem client
+ *
+ * Returns: 0 on success, or a negative value on error (at least one
+ *          notification failed)
+ */
+int ivshmem_client_notify_broadcast(const IvshmemClient *client);
+
+/**
+ * Search a peer from its identifier
+ *
+ * Return the peer structure from its peer_id. If the given peer_id is
+ * the local id, the function returns the local peer structure.
+ *
+ * @client:  The ivshmem client
+ * @peer_id: The identifier of the peer structure
+ *
+ * Returns:  The peer structure, or NULL if not found
+ */
+IvshmemClientPeer *
+ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id);
+
+/**
+ * Dump information of this ivshmem client on stdout
+ *
+ * Dump the id and the vectors of the given ivshmem client and the list
+ * of its peers and their vectors on stdout.
+ *
+ * @client: The ivshmem client
+ */
+void ivshmem_client_dump(const IvshmemClient *client);
+
+#endif /* _IVSHMEM_CLIENT_H_ */

diff --git a/contrib/ivshmem-client/main.c b/contrib/ivshmem-client/main.c
new file mode 100644
index 0000000..c004870
--- /dev/null
+++ b/contrib/ivshmem-client/main.c

@@ -0,0 +1,240 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu-common.h"
+
+#include "ivshmem-client.h"
+
+#define IVSHMEM_CLIENT_DEFAULT_VERBOSE        0
+#define IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
+
+typedef struct IvshmemClientArgs {
+    bool verbose;
+    const char *unix_sock_path;
+} IvshmemClientArgs;
+
+/* show ivshmem_client_usage and exit with given error code */
+static void
+ivshmem_client_usage(const char *name, int code)
+{
+    fprintf(stderr, "%s [opts]\n", name);
+    fprintf(stderr, "  -h: show this help\n");
+    fprintf(stderr, "  -v: verbose mode\n");
+    fprintf(stderr, "  -S <unix_sock_path>: path to the unix socket\n"
+                    "     to connect to.\n"
+                    "     default=%s\n", IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH);
+    exit(code);
+}
+
+/* parse the program arguments, exit on error */
+static void
+ivshmem_client_parse_args(IvshmemClientArgs *args, int argc, char *argv[])
+{
+    int c;
+
+    while ((c = getopt(argc, argv,
+                       "h"  /* help */
+                       "v"  /* verbose */
+                       "S:" /* unix_sock_path */
+                      )) != -1) {
+
+        switch (c) {
+        case 'h': /* help */
+            ivshmem_client_usage(argv[0], 0);
+            break;
+
+        case 'v': /* verbose */
+            args->verbose = 1;
+            break;
+
+        case 'S': /* unix_sock_path */
+            args->unix_sock_path = optarg;
+            break;
+
+        default:
+            ivshmem_client_usage(argv[0], 1);
+            break;
+        }
+    }
+}
+
+/* show command line help */
+static void
+ivshmem_client_cmdline_help(void)
+{
+    printf("dump: dump peers (including us)\n"
+           "int <peer> <vector>: notify one vector on a peer\n"
+           "int <peer> all: notify all vectors of a peer\n"
+           "int all: notify all vectors of all peers (excepting us)\n");
+}
+
+/* read stdin and handle commands */
+static int
+ivshmem_client_handle_stdin_command(IvshmemClient *client)
+{
+    IvshmemClientPeer *peer;
+    char buf[128];
+    char *s, *token;
+    int ret;
+    int peer_id, vector;
+
+    memset(buf, 0, sizeof(buf));
+    ret = read(0, buf, sizeof(buf) - 1);
+    if (ret < 0) {
+        return -1;
+    }
+
+    s = buf;
+    while ((token = strsep(&s, "\n\r;")) != NULL) {
+        if (!strcmp(token, "")) {
+            continue;
+        }
+        if (!strcmp(token, "?")) {
+            ivshmem_client_cmdline_help();
+        }
+        if (!strcmp(token, "help")) {
+            ivshmem_client_cmdline_help();
+        } else if (!strcmp(token, "dump")) {
+            ivshmem_client_dump(client);
+        } else if (!strcmp(token, "int all")) {
+            ivshmem_client_notify_broadcast(client);
+        } else if (sscanf(token, "int %d %d", &peer_id, &vector) == 2) {
+            peer = ivshmem_client_search_peer(client, peer_id);
+            if (peer == NULL) {
+                printf("cannot find peer_id = %d\n", peer_id);
+                continue;
+            }
+            ivshmem_client_notify(client, peer, vector);
+        } else if (sscanf(token, "int %d all", &peer_id) == 1) {
+            peer = ivshmem_client_search_peer(client, peer_id);
+            if (peer == NULL) {
+                printf("cannot find peer_id = %d\n", peer_id);
+                continue;
+            }
+            ivshmem_client_notify_all_vects(client, peer);
+        } else {
+            printf("invalid command, type help\n");
+        }
+    }
+
+    printf("cmd> ");
+    fflush(stdout);
+    return 0;
+}
+
+/* listen on stdin (command line), on unix socket (notifications of new
+ * and dead peers), and on eventfd (IRQ request) */
+static int
+ivshmem_client_poll_events(IvshmemClient *client)
+{
+    fd_set fds;
+    int ret, maxfd;
+
+    while (1) {
+
+        FD_ZERO(&fds);
+        FD_SET(0, &fds); /* add stdin in fd_set */
+        maxfd = 1;
+
+        ivshmem_client_get_fds(client, &fds, &maxfd);
+
+        ret = select(maxfd, &fds, NULL, NULL, NULL);
+        if (ret < 0) {
+            if (errno == EINTR) {
+                continue;
+            }
+
+            fprintf(stderr, "select error: %s\n", strerror(errno));
+            break;
+        }
+        if (ret == 0) {
+            continue;
+        }
+
+        if (FD_ISSET(0, &fds) &&
+            ivshmem_client_handle_stdin_command(client) < 0 && errno != EINTR) {
+            fprintf(stderr, "ivshmem_client_handle_stdin_command() failed\n");
+            break;
+        }
+
+        if (ivshmem_client_handle_fds(client, &fds, maxfd) < 0) {
+            fprintf(stderr, "ivshmem_client_handle_fds() failed\n");
+            break;
+        }
+    }
+
+    return ret;
+}
+
+/* callback when we receive a notification (just display it) */
+static void
+ivshmem_client_notification_cb(const IvshmemClient *client,
+                               const IvshmemClientPeer *peer,
+                               unsigned vect, void *arg)
+{
+    (void)client;
+    (void)arg;
+    printf("receive notification from peer_id=%" PRId64 " vector=%u\n",
+           peer->id, vect);
+}
+
+int
+main(int argc, char *argv[])
+{
+    struct sigaction sa;
+    IvshmemClient client;
+    IvshmemClientArgs args = {
+        .verbose = IVSHMEM_CLIENT_DEFAULT_VERBOSE,
+        .unix_sock_path = IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH,
+    };
+
+    /* parse arguments, will exit on error */
+    ivshmem_client_parse_args(&args, argc, argv);
+
+    /* Ignore SIGPIPE, see this link for more info:
+     * http://www.mail-archive.com/libevent-users@monkey.org/msg01606.html */
+    sa.sa_handler = SIG_IGN;
+    sa.sa_flags = 0;
+    if (sigemptyset(&sa.sa_mask) == -1 ||
+        sigaction(SIGPIPE, &sa, 0) == -1) {
+        perror("failed to ignore SIGPIPE; sigaction");
+        return 1;
+    }
+
+    ivshmem_client_cmdline_help();
+    printf("cmd> ");
+    fflush(stdout);
+
+    if (ivshmem_client_init(&client, args.unix_sock_path,
+                            ivshmem_client_notification_cb, NULL,
+                            args.verbose) < 0) {
+        fprintf(stderr, "cannot init client\n");
+        return 1;
+    }
+
+    while (1) {
+        if (ivshmem_client_connect(&client) < 0) {
+            fprintf(stderr, "cannot connect to server, retry in 1 second\n");
+            sleep(1);
+            continue;
+        }
+
+        fprintf(stdout, "listen on server socket %d\n", client.sock_fd);
+
+        if (ivshmem_client_poll_events(&client) == 0) {
+            continue;
+        }
+
+        /* disconnected from server, reset all peers */
+        fprintf(stdout, "disconnected from server\n");
+
+        ivshmem_client_close(&client);
+    }
+
+    return 0;
+}

diff --git a/contrib/ivshmem-server/Makefile.objs b/contrib/ivshmem-server/Makefile.objs
new file mode 100644
index 0000000..c060dd3
--- /dev/null
+++ b/contrib/ivshmem-server/Makefile.objs

@@ -0,0 +1 @@
+ivshmem-server-obj-y = ivshmem-server.o main.o

diff --git a/contrib/ivshmem-server/ivshmem-server.c b/contrib/ivshmem-server/ivshmem-server.c
new file mode 100644
index 0000000..5e5239c
--- /dev/null
+++ b/contrib/ivshmem-server/ivshmem-server.c

@@ -0,0 +1,491 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu-common.h"
+#include "qemu/sockets.h"
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#ifdef CONFIG_LINUX
+#include <sys/vfs.h>
+#endif
+
+#include "ivshmem-server.h"
+
+/* log a message on stdout if verbose=1 */
+#define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \
+        if ((server)->verbose) {         \
+            printf(fmt, ## __VA_ARGS__); \
+        }                                \
+    } while (0)
+
+/** maximum size of a huge page, used by ivshmem_server_ftruncate() */
+#define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024)
+
+/** default listen backlog (number of sockets not accepted) */
+#define IVSHMEM_SERVER_LISTEN_BACKLOG 10
+
+/* send message to a client unix socket */
+static int
+ivshmem_server_send_one_msg(int sock_fd, int64_t peer_id, int fd)
+{
+    int ret;
+    struct msghdr msg;
+    struct iovec iov[1];
+    union {
+        struct cmsghdr cmsg;
+        char control[CMSG_SPACE(sizeof(int))];
+    } msg_control;
+    struct cmsghdr *cmsg;
+
+    peer_id = GINT64_TO_LE(peer_id);
+    iov[0].iov_base = &peer_id;
+    iov[0].iov_len = sizeof(peer_id);
+
+    memset(&msg, 0, sizeof(msg));
+    msg.msg_iov = iov;
+    msg.msg_iovlen = 1;
+
+    /* if fd is specified, add it in a cmsg */
+    if (fd >= 0) {
+        memset(&msg_control, 0, sizeof(msg_control));
+        msg.msg_control = &msg_control;
+        msg.msg_controllen = sizeof(msg_control);
+        cmsg = CMSG_FIRSTHDR(&msg);
+        cmsg->cmsg_level = SOL_SOCKET;
+        cmsg->cmsg_type = SCM_RIGHTS;
+        cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+        memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
+    }
+
+    ret = sendmsg(sock_fd, &msg, 0);
+    if (ret <= 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+/* free a peer when the server advertises a disconnection or when the
+ * server is freed */
+static void
+ivshmem_server_free_peer(IvshmemServer *server, IvshmemServerPeer *peer)
+{
+    unsigned vector;
+    IvshmemServerPeer *other_peer;
+
+    IVSHMEM_SERVER_DEBUG(server, "free peer %" PRId64 "\n", peer->id);
+    close(peer->sock_fd);
+    QTAILQ_REMOVE(&server->peer_list, peer, next);
+
+    /* advertise the deletion to other peers */
+    QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+        ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, -1);
+    }
+
+    for (vector = 0; vector < peer->vectors_count; vector++) {
+        event_notifier_cleanup(&peer->vectors[vector]);
+    }
+
+    g_free(peer);
+}
+
+/* send the peer id and the shm_fd just after a new client connection */
+static int
+ivshmem_server_send_initial_info(IvshmemServer *server, IvshmemServerPeer *peer)
+{
+    int ret;
+
+    /* send our protocol version first */
+    ret = ivshmem_server_send_one_msg(peer->sock_fd, IVSHMEM_PROTOCOL_VERSION,
+                                      -1);
+    if (ret < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot send version: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+
+    /* send the peer id to the client */
+    ret = ivshmem_server_send_one_msg(peer->sock_fd, peer->id, -1);
+    if (ret < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot send peer id: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+
+    /* send the shm_fd */
+    ret = ivshmem_server_send_one_msg(peer->sock_fd, -1, server->shm_fd);
+    if (ret < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot send shm fd: %s\n",
+                             strerror(errno));
+        return -1;
+    }
+
+    return 0;
+}
+
+/* handle message on listening unix socket (new client connection) */
+static int
+ivshmem_server_handle_new_conn(IvshmemServer *server)
+{
+    IvshmemServerPeer *peer, *other_peer;
+    struct sockaddr_un unaddr;
+    socklen_t unaddr_len;
+    int newfd;
+    unsigned i;
+
+    /* accept the incoming connection */
+    unaddr_len = sizeof(unaddr);
+    newfd = qemu_accept(server->sock_fd,
+                        (struct sockaddr *)&unaddr, &unaddr_len);
+
+    if (newfd < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot accept() %s\n", strerror(errno));
+        return -1;
+    }
+
+    qemu_set_nonblock(newfd);
+    IVSHMEM_SERVER_DEBUG(server, "accept()=%d\n", newfd);
+
+    /* allocate new structure for this peer */
+    peer = g_malloc0(sizeof(*peer));
+    peer->sock_fd = newfd;
+
+    /* get an unused peer id */
+    /* XXX: this could use id allocation such as Linux IDA, or simply
+     * a free-list */
+    for (i = 0; i < G_MAXUINT16; i++) {
+        if (ivshmem_server_search_peer(server, server->cur_id) == NULL) {
+            break;
+        }
+        server->cur_id++;
+    }
+    if (i == G_MAXUINT16) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot allocate new client id\n");
+        goto fail;
+    }
+    peer->id = server->cur_id++;
+
+    /* create eventfd, one per vector */
+    peer->vectors_count = server->n_vectors;
+    for (i = 0; i < peer->vectors_count; i++) {
+        if (event_notifier_init(&peer->vectors[i], FALSE) < 0) {
+            IVSHMEM_SERVER_DEBUG(server, "cannot create eventfd\n");
+            goto fail;
+        }
+    }
+
+    /* send peer id and shm fd */
+    if (ivshmem_server_send_initial_info(server, peer) < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot send initial info\n");
+        goto fail;
+    }
+
+    /* advertise the new peer to others */
+    QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+        for (i = 0; i < peer->vectors_count; i++) {
+            ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id,
+                                        peer->vectors[i].wfd);
+        }
+    }
+
+    /* advertise the other peers to the new one */
+    QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+        for (i = 0; i < peer->vectors_count; i++) {
+            ivshmem_server_send_one_msg(peer->sock_fd, other_peer->id,
+                                        other_peer->vectors[i].wfd);
+        }
+    }
+
+    /* advertise the new peer to itself */
+    for (i = 0; i < peer->vectors_count; i++) {
+        ivshmem_server_send_one_msg(peer->sock_fd, peer->id,
+                                    event_notifier_get_fd(&peer->vectors[i]));
+    }
+
+    QTAILQ_INSERT_TAIL(&server->peer_list, peer, next);
+    IVSHMEM_SERVER_DEBUG(server, "new peer id = %" PRId64 "\n",
+                         peer->id);
+    return 0;
+
+fail:
+    while (i--) {
+        event_notifier_cleanup(&peer->vectors[i]);
+    }
+    close(newfd);
+    g_free(peer);
+    return -1;
+}
+
+/* Try to ftruncate a file to next power of 2 of shmsize.
+ * If it fails; all power of 2 above shmsize are tested until
+ * we reach the maximum huge page size. This is useful
+ * if the shm file is in a hugetlbfs that cannot be truncated to the
+ * shm_size value. */
+static int
+ivshmem_server_ftruncate(int fd, unsigned shmsize)
+{
+    int ret;
+    struct stat mapstat;
+
+    /* align shmsize to next power of 2 */
+    shmsize = pow2ceil(shmsize);
+
+    if (fstat(fd, &mapstat) != -1 && mapstat.st_size == shmsize) {
+        return 0;
+    }
+
+    while (shmsize <= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE) {
+        ret = ftruncate(fd, shmsize);
+        if (ret == 0) {
+            return ret;
+        }
+        shmsize *= 2;
+    }
+
+    return -1;
+}
+
+/* Init a new ivshmem server */
+int
+ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
+                    const char *shm_path, size_t shm_size, unsigned n_vectors,
+                    bool verbose)
+{
+    int ret;
+
+    memset(server, 0, sizeof(*server));
+    server->verbose = verbose;
+
+    ret = snprintf(server->unix_sock_path, sizeof(server->unix_sock_path),
+                   "%s", unix_sock_path);
+    if (ret < 0 || ret >= sizeof(server->unix_sock_path)) {
+        IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
+        return -1;
+    }
+    ret = snprintf(server->shm_path, sizeof(server->shm_path),
+                   "%s", shm_path);
+    if (ret < 0 || ret >= sizeof(server->shm_path)) {
+        IVSHMEM_SERVER_DEBUG(server, "could not copy shm path\n");
+        return -1;
+    }
+
+    server->shm_size = shm_size;
+    server->n_vectors = n_vectors;
+
+    QTAILQ_INIT(&server->peer_list);
+
+    return 0;
+}
+
+#ifdef CONFIG_LINUX
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+static long gethugepagesize(const char *path)
+{
+    struct statfs fs;
+    int ret;
+
+    do {
+        ret = statfs(path, &fs);
+    } while (ret != 0 && errno == EINTR);
+
+    if (ret != 0) {
+        return -1;
+    }
+
+    if (fs.f_type != HUGETLBFS_MAGIC) {
+        return -1;
+    }
+
+    return fs.f_bsize;
+}
+#endif
+
+/* open shm, create and bind to the unix socket */
+int
+ivshmem_server_start(IvshmemServer *server)
+{
+    struct sockaddr_un sun;
+    int shm_fd, sock_fd, ret;
+
+    /* open shm file */
+#ifdef CONFIG_LINUX
+    long hpagesize;
+
+    hpagesize = gethugepagesize(server->shm_path);
+    if (hpagesize < 0 && errno != ENOENT) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot stat shm file %s: %s\n",
+                             server->shm_path, strerror(errno));
+    }
+
+    if (hpagesize > 0) {
+        gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
+        IVSHMEM_SERVER_DEBUG(server, "Using hugepages: %s\n", server->shm_path);
+        shm_fd = mkstemp(filename);
+        unlink(filename);
+        g_free(filename);
+    } else
+#endif
+    {
+        IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
+                             server->shm_path);
+        shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
+    }
+
+    if (shm_fd < 0) {
+        fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path,
+                strerror(errno));
+        return -1;
+    }
+    if (ivshmem_server_ftruncate(shm_fd, server->shm_size) < 0) {
+        fprintf(stderr, "ftruncate(%s) failed: %s\n", server->shm_path,
+                strerror(errno));
+        goto err_close_shm;
+    }
+
+    IVSHMEM_SERVER_DEBUG(server, "create & bind socket %s\n",
+                         server->unix_sock_path);
+
+    /* create the unix listening socket */
+    sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (sock_fd < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot create socket: %s\n",
+                             strerror(errno));
+        goto err_close_shm;
+    }
+
+    sun.sun_family = AF_UNIX;
+    ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
+                   server->unix_sock_path);
+    if (ret < 0 || ret >= sizeof(sun.sun_path)) {
+        IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
+        goto err_close_sock;
+    }
+    if (bind(sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n", sun.sun_path,
+                             strerror(errno));
+        goto err_close_sock;
+    }
+
+    if (listen(sock_fd, IVSHMEM_SERVER_LISTEN_BACKLOG) < 0) {
+        IVSHMEM_SERVER_DEBUG(server, "listen() failed: %s\n", strerror(errno));
+        goto err_close_sock;
+    }
+
+    server->sock_fd = sock_fd;
+    server->shm_fd = shm_fd;
+
+    return 0;
+
+err_close_sock:
+    close(sock_fd);
+err_close_shm:
+    close(shm_fd);
+    return -1;
+}
+
+/* close connections to clients, the unix socket and the shm fd */
+void
+ivshmem_server_close(IvshmemServer *server)
+{
+    IvshmemServerPeer *peer, *npeer;
+
+    IVSHMEM_SERVER_DEBUG(server, "close server\n");
+
+    QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, npeer) {
+        ivshmem_server_free_peer(server, peer);
+    }
+
+    unlink(server->unix_sock_path);
+    close(server->sock_fd);
+    close(server->shm_fd);
+    server->sock_fd = -1;
+    server->shm_fd = -1;
+}
+
+/* get the fd_set according to the unix socket and the peer list */
+void
+ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd)
+{
+    IvshmemServerPeer *peer;
+
+    if (server->sock_fd == -1) {
+        return;
+    }
+
+    FD_SET(server->sock_fd, fds);
+    if (server->sock_fd >= *maxfd) {
+        *maxfd = server->sock_fd + 1;
+    }
+
+    QTAILQ_FOREACH(peer, &server->peer_list, next) {
+        FD_SET(peer->sock_fd, fds);
+        if (peer->sock_fd >= *maxfd) {
+            *maxfd = peer->sock_fd + 1;
+        }
+    }
+}
+
+/* process incoming messages on the sockets in fd_set */
+int
+ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd)
+{
+    IvshmemServerPeer *peer, *peer_next;
+
+    if (server->sock_fd < maxfd && FD_ISSET(server->sock_fd, fds) &&
+        ivshmem_server_handle_new_conn(server) < 0 && errno != EINTR) {
+        IVSHMEM_SERVER_DEBUG(server, "ivshmem_server_handle_new_conn() "
+                             "failed\n");
+        return -1;
+    }
+
+    QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, peer_next) {
+        /* any message from a peer socket result in a close() */
+        IVSHMEM_SERVER_DEBUG(server, "peer->sock_fd=%d\n", peer->sock_fd);
+        if (peer->sock_fd < maxfd && FD_ISSET(peer->sock_fd, fds)) {
+            ivshmem_server_free_peer(server, peer);
+        }
+    }
+
+    return 0;
+}
+
+/* lookup peer from its id */
+IvshmemServerPeer *
+ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id)
+{
+    IvshmemServerPeer *peer;
+
+    QTAILQ_FOREACH(peer, &server->peer_list, next) {
+        if (peer->id == peer_id) {
+            return peer;
+        }
+    }
+    return NULL;
+}
+
+/* dump our info, the list of peers their vectors on stdout */
+void
+ivshmem_server_dump(const IvshmemServer *server)
+{
+    const IvshmemServerPeer *peer;
+    unsigned vector;
+
+    /* dump peers */
+    QTAILQ_FOREACH(peer, &server->peer_list, next) {
+        printf("peer_id = %" PRId64 "\n", peer->id);
+
+        for (vector = 0; vector < peer->vectors_count; vector++) {
+            printf("  vector %d is enabled (fd=%d)\n", vector,
+                   event_notifier_get_fd(&peer->vectors[vector]));
+        }
+    }
+}

diff --git a/contrib/ivshmem-server/ivshmem-server.h b/contrib/ivshmem-server/ivshmem-server.h
new file mode 100644
index 0000000..c9359a0
--- /dev/null
+++ b/contrib/ivshmem-server/ivshmem-server.h

@@ -0,0 +1,167 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef _IVSHMEM_SERVER_H_
+#define _IVSHMEM_SERVER_H_
+
+/**
+ * The ivshmem server is a daemon that creates a unix socket in listen
+ * mode. The ivshmem clients (qemu or ivshmem-client) connect to this
+ * unix socket. For each client, the server will create some eventfd
+ * (see EVENTFD(2)), one per vector. These fd are transmitted to all
+ * clients using the SCM_RIGHTS cmsg message. Therefore, each client is
+ * able to send a notification to another client without beeing
+ * "profixied" by the server.
+ *
+ * We use this mechanism to send interruptions between guests.
+ * qemu is able to transform an event on a eventfd into a PCI MSI-x
+ * interruption in the guest.
+ *
+ * The ivshmem server is also able to share the file descriptor
+ * associated to the ivshmem shared memory.
+ */
+
+#include <limits.h>
+#include <sys/select.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "qemu/event_notifier.h"
+#include "qemu/queue.h"
+#include "hw/misc/ivshmem.h"
+
+/**
+ * Maximum number of notification vectors supported by the server
+ */
+#define IVSHMEM_SERVER_MAX_VECTORS 64
+
+/**
+ * Structure storing a peer
+ *
+ * Each time a client connects to an ivshmem server, a new
+ * IvshmemServerPeer structure is created. This peer and all its
+ * vectors are advertised to all connected clients through the connected
+ * unix sockets.
+ */
+typedef struct IvshmemServerPeer {
+    QTAILQ_ENTRY(IvshmemServerPeer) next;    /**< next in list*/
+    int sock_fd;                             /**< connected unix sock */
+    int64_t id;                              /**< the id of the peer */
+    EventNotifier vectors[IVSHMEM_SERVER_MAX_VECTORS]; /**< one per vector */
+    unsigned vectors_count;                  /**< number of vectors */
+} IvshmemServerPeer;
+QTAILQ_HEAD(IvshmemServerPeerList, IvshmemServerPeer);
+
+typedef struct IvshmemServerPeerList IvshmemServerPeerList;
+
+/**
+ * Structure describing an ivshmem server
+ *
+ * This structure stores all information related to our server: the name
+ * of the server unix socket and the list of connected peers.
+ */
+typedef struct IvshmemServer {
+    char unix_sock_path[PATH_MAX];   /**< path to unix socket */
+    int sock_fd;                     /**< unix sock file descriptor */
+    char shm_path[PATH_MAX];         /**< path to shm */
+    size_t shm_size;                 /**< size of shm */
+    int shm_fd;                      /**< shm file descriptor */
+    unsigned n_vectors;              /**< number of vectors */
+    uint16_t cur_id;                 /**< id to be given to next client */
+    bool verbose;                    /**< true in verbose mode */
+    IvshmemServerPeerList peer_list; /**< list of peers */
+} IvshmemServer;
+
+/**
+ * Initialize an ivshmem server
+ *
+ * @server:         A pointer to an uninitialized IvshmemServer structure
+ * @unix_sock_path: The pointer to the unix socket file name
+ * @shm_path:       Path to the shared memory. The path corresponds to a POSIX
+ *                  shm name or a hugetlbfs mount point.
+ * @shm_size:       Size of shared memory
+ * @n_vectors:      Number of interrupt vectors per client
+ * @verbose:        True to enable verbose mode
+ *
+ * Returns:         0 on success, or a negative value on error
+ */
+int
+ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
+                    const char *shm_path, size_t shm_size, unsigned n_vectors,
+                    bool verbose);
+
+/**
+ * Open the shm, then create and bind to the unix socket
+ *
+ * @server: The pointer to the initialized IvshmemServer structure
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_server_start(IvshmemServer *server);
+
+/**
+ * Close the server
+ *
+ * Close connections to all clients, close the unix socket and the
+ * shared memory file descriptor. The structure remains initialized, so
+ * it is possible to call ivshmem_server_start() again after a call to
+ * ivshmem_server_close().
+ *
+ * @server: The ivshmem server
+ */
+void ivshmem_server_close(IvshmemServer *server);
+
+/**
+ * Fill a fd_set with file descriptors to be monitored
+ *
+ * This function will fill a fd_set with all file descriptors that must
+ * be polled (unix server socket and peers unix socket). The function
+ * will not initialize the fd_set, it is up to the caller to do it.
+ *
+ * @server: The ivshmem server
+ * @fds:    The fd_set to be updated
+ * @maxfd:  Must be set to the max file descriptor + 1 in fd_set. This value is
+ *          updated if this function adds a greater fd in fd_set.
+ */
+void
+ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd);
+
+/**
+ * Read and handle new messages
+ *
+ * Given a fd_set (for instance filled by a call to select()), handle
+ * incoming messages from peers.
+ *
+ * @server: The ivshmem server
+ * @fds:    The fd_set containing the file descriptors to be checked. Note that
+ *          file descriptors that are not related to our server are ignored.
+ * @maxfd:  The maximum fd in fd_set, plus one.
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd);
+
+/**
+ * Search a peer from its identifier
+ *
+ * @server:  The ivshmem server
+ * @peer_id: The identifier of the peer structure
+ *
+ * Returns:  The peer structure, or NULL if not found
+ */
+IvshmemServerPeer *
+ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id);
+
+/**
+ * Dump information of this ivshmem server and its peers on stdout
+ *
+ * @server: The ivshmem server
+ */
+void ivshmem_server_dump(const IvshmemServer *server);
+
+#endif /* _IVSHMEM_SERVER_H_ */

diff --git a/contrib/ivshmem-server/main.c b/contrib/ivshmem-server/main.c
new file mode 100644
index 0000000..54ff001
--- /dev/null
+++ b/contrib/ivshmem-server/main.c

@@ -0,0 +1,263 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu-common.h"
+
+#include "ivshmem-server.h"
+
+#define IVSHMEM_SERVER_DEFAULT_VERBOSE        0
+#define IVSHMEM_SERVER_DEFAULT_FOREGROUND     0
+#define IVSHMEM_SERVER_DEFAULT_PID_FILE       "/var/run/ivshmem-server.pid"
+#define IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
+#define IVSHMEM_SERVER_DEFAULT_SHM_PATH       "ivshmem"
+#define IVSHMEM_SERVER_DEFAULT_SHM_SIZE       (4*1024*1024)
+#define IVSHMEM_SERVER_DEFAULT_N_VECTORS      1
+
+/* used to quit on signal SIGTERM */
+static int ivshmem_server_quit;
+
+/* arguments given by the user */
+typedef struct IvshmemServerArgs {
+    bool verbose;
+    bool foreground;
+    const char *pid_file;
+    const char *unix_socket_path;
+    const char *shm_path;
+    uint64_t shm_size;
+    unsigned n_vectors;
+} IvshmemServerArgs;
+
+/* show ivshmem_server_usage and exit with given error code */
+static void
+ivshmem_server_usage(const char *name, int code)
+{
+    fprintf(stderr, "%s [opts]\n", name);
+    fprintf(stderr, "  -h: show this help\n");
+    fprintf(stderr, "  -v: verbose mode\n");
+    fprintf(stderr, "  -F: foreground mode (default is to daemonize)\n");
+    fprintf(stderr, "  -p <pid_file>: path to the PID file (used in daemon\n"
+                    "     mode only).\n"
+                    "     Default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
+    fprintf(stderr, "  -S <unix_socket_path>: path to the unix socket\n"
+                    "     to listen to.\n"
+                    "     Default=%s\n", IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH);
+    fprintf(stderr, "  -m <shm_path>: path to the shared memory.\n"
+                    "     The path corresponds to a POSIX shm name or a\n"
+                    "     hugetlbfs mount point.\n"
+                    "     default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
+    fprintf(stderr, "  -l <size>: size of shared memory in bytes. The suffix\n"
+                    "     K, M and G can be used (ex: 1K means 1024).\n"
+                    "     default=%u\n", IVSHMEM_SERVER_DEFAULT_SHM_SIZE);
+    fprintf(stderr, "  -n <n_vects>: number of vectors.\n"
+                    "     default=%u\n", IVSHMEM_SERVER_DEFAULT_N_VECTORS);
+
+    exit(code);
+}
+
+/* parse the program arguments, exit on error */
+static void
+ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
+{
+    int c;
+    unsigned long long v;
+    Error *errp = NULL;
+
+    while ((c = getopt(argc, argv,
+                       "h"  /* help */
+                       "v"  /* verbose */
+                       "F"  /* foreground */
+                       "p:" /* pid_file */
+                       "S:" /* unix_socket_path */
+                       "m:" /* shm_path */
+                       "l:" /* shm_size */
+                       "n:" /* n_vectors */
+                      )) != -1) {
+
+        switch (c) {
+        case 'h': /* help */
+            ivshmem_server_usage(argv[0], 0);
+            break;
+
+        case 'v': /* verbose */
+            args->verbose = 1;
+            break;
+
+        case 'F': /* foreground */
+            args->foreground = 1;
+            break;
+
+        case 'p': /* pid_file */
+            args->pid_file = optarg;
+            break;
+
+        case 'S': /* unix_socket_path */
+            args->unix_socket_path = optarg;
+            break;
+
+        case 'm': /* shm_path */
+            args->shm_path = optarg;
+            break;
+
+        case 'l': /* shm_size */
+            parse_option_size("shm_size", optarg, &args->shm_size, &errp);
+            if (errp) {
+                fprintf(stderr, "cannot parse shm size: %s\n",
+                        error_get_pretty(errp));
+                error_free(errp);
+                ivshmem_server_usage(argv[0], 1);
+            }
+            break;
+
+        case 'n': /* n_vectors */
+            if (parse_uint_full(optarg, &v, 0) < 0) {
+                fprintf(stderr, "cannot parse n_vectors\n");
+                ivshmem_server_usage(argv[0], 1);
+            }
+            args->n_vectors = v;
+            break;
+
+        default:
+            ivshmem_server_usage(argv[0], 1);
+            break;
+        }
+    }
+
+    if (args->n_vectors > IVSHMEM_SERVER_MAX_VECTORS) {
+        fprintf(stderr, "too many requested vectors (max is %d)\n",
+                IVSHMEM_SERVER_MAX_VECTORS);
+        ivshmem_server_usage(argv[0], 1);
+    }
+
+    if (args->verbose == 1 && args->foreground == 0) {
+        fprintf(stderr, "cannot use verbose in daemon mode\n");
+        ivshmem_server_usage(argv[0], 1);
+    }
+}
+
+/* wait for events on listening server unix socket and connected client
+ * sockets */
+static int
+ivshmem_server_poll_events(IvshmemServer *server)
+{
+    fd_set fds;
+    int ret = 0, maxfd;
+
+    while (!ivshmem_server_quit) {
+
+        FD_ZERO(&fds);
+        maxfd = 0;
+        ivshmem_server_get_fds(server, &fds, &maxfd);
+
+        ret = select(maxfd, &fds, NULL, NULL, NULL);
+
+        if (ret < 0) {
+            if (errno == EINTR) {
+                continue;
+            }
+
+            fprintf(stderr, "select error: %s\n", strerror(errno));
+            break;
+        }
+        if (ret == 0) {
+            continue;
+        }
+
+        if (ivshmem_server_handle_fds(server, &fds, maxfd) < 0) {
+            fprintf(stderr, "ivshmem_server_handle_fds() failed\n");
+            break;
+        }
+    }
+
+    return ret;
+}
+
+static void
+ivshmem_server_quit_cb(int signum)
+{
+    ivshmem_server_quit = 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+    IvshmemServer server;
+    struct sigaction sa, sa_quit;
+    IvshmemServerArgs args = {
+        .verbose = IVSHMEM_SERVER_DEFAULT_VERBOSE,
+        .foreground = IVSHMEM_SERVER_DEFAULT_FOREGROUND,
+        .pid_file = IVSHMEM_SERVER_DEFAULT_PID_FILE,
+        .unix_socket_path = IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH,
+        .shm_path = IVSHMEM_SERVER_DEFAULT_SHM_PATH,
+        .shm_size = IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
+        .n_vectors = IVSHMEM_SERVER_DEFAULT_N_VECTORS,
+    };
+    int ret = 1;
+
+    /* parse arguments, will exit on error */
+    ivshmem_server_parse_args(&args, argc, argv);
+
+    /* Ignore SIGPIPE, see this link for more info:
+     * http://www.mail-archive.com/libevent-users@monkey.org/msg01606.html */
+    sa.sa_handler = SIG_IGN;
+    sa.sa_flags = 0;
+    if (sigemptyset(&sa.sa_mask) == -1 ||
+        sigaction(SIGPIPE, &sa, 0) == -1) {
+        perror("failed to ignore SIGPIPE; sigaction");
+        goto err;
+    }
+
+    sa_quit.sa_handler = ivshmem_server_quit_cb;
+    sa_quit.sa_flags = 0;
+    if (sigemptyset(&sa_quit.sa_mask) == -1 ||
+        sigaction(SIGTERM, &sa_quit, 0) == -1) {
+        perror("failed to add SIGTERM handler; sigaction");
+        goto err;
+    }
+
+    /* init the ivshms structure */
+    if (ivshmem_server_init(&server, args.unix_socket_path, args.shm_path,
+                            args.shm_size, args.n_vectors, args.verbose) < 0) {
+        fprintf(stderr, "cannot init server\n");
+        goto err;
+    }
+
+    /* start the ivshmem server (open shm & unix socket) */
+    if (ivshmem_server_start(&server) < 0) {
+        fprintf(stderr, "cannot bind\n");
+        goto err;
+    }
+
+    /* daemonize if asked to */
+    if (!args.foreground) {
+        FILE *fp;
+
+        if (qemu_daemon(1, 1) < 0) {
+            fprintf(stderr, "cannot daemonize: %s\n", strerror(errno));
+            goto err_close;
+        }
+
+        /* write pid file */
+        fp = fopen(args.pid_file, "w");
+        if (fp == NULL) {
+            fprintf(stderr, "cannot write pid file: %s\n", strerror(errno));
+            goto err_close;
+        }
+
+        fprintf(fp, "%d\n", (int) getpid());
+        fclose(fp);
+    }
+
+    ivshmem_server_poll_events(&server);
+    fprintf(stdout, "server disconnected\n");
+    ret = 0;
+
+err_close:
+    ivshmem_server_close(&server);
+err:
+    return ret;
+}

diff --git a/cpu-exec.c b/cpu-exec.c
index 8fd56a6..7eef083 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c

@@ -477,7 +477,8 @@
                 /* see if we can patch the calling TB. When the TB
                    spans two pages, we cannot safely do a direct
                    jump. */
-                if (next_tb != 0 && tb->page_addr[1] == -1) {
+                if (next_tb != 0 && tb->page_addr[1] == -1
+                    && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
                     tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
                                 next_tb & TB_EXIT_MASK, tb);
                 }

diff --git a/crypto/cipher-builtin.c b/crypto/cipher-builtin.c
index 30f4853..39e31a7 100644
--- a/crypto/cipher-builtin.c
+++ b/crypto/cipher-builtin.c

@@ -25,8 +25,7 @@
 struct QCryptoCipherBuiltinAES {
     AES_KEY encrypt_key;
     AES_KEY decrypt_key;
-    uint8_t *iv;
-    size_t niv;
+    uint8_t iv[AES_BLOCK_SIZE];
 };
 typedef struct QCryptoCipherBuiltinDESRFB QCryptoCipherBuiltinDESRFB;
 struct QCryptoCipherBuiltinDESRFB {
@@ -40,6 +39,7 @@
         QCryptoCipherBuiltinAES aes;
         QCryptoCipherBuiltinDESRFB desrfb;
     } state;
+    size_t blocksize;
     void (*free)(QCryptoCipher *cipher);
     int (*setiv)(QCryptoCipher *cipher,
                  const uint8_t *iv, size_t niv,
@@ -61,7 +61,6 @@
 {
     QCryptoCipherBuiltin *ctxt = cipher->opaque;
 
-    g_free(ctxt->state.aes.iv);
     g_free(ctxt);
     cipher->opaque = NULL;
 }
@@ -145,15 +144,13 @@
                                      Error **errp)
 {
     QCryptoCipherBuiltin *ctxt = cipher->opaque;
-    if (niv != 16) {
-        error_setg(errp, "IV must be 16 bytes not %zu", niv);
+    if (niv != AES_BLOCK_SIZE) {
+        error_setg(errp, "IV must be %d bytes not %zu",
+                   AES_BLOCK_SIZE, niv);
         return -1;
     }
 
-    g_free(ctxt->state.aes.iv);
-    ctxt->state.aes.iv = g_new0(uint8_t, niv);
-    memcpy(ctxt->state.aes.iv, iv, niv);
-    ctxt->state.aes.niv = niv;
+    memcpy(ctxt->state.aes.iv, iv, AES_BLOCK_SIZE);
 
     return 0;
 }
@@ -185,6 +182,7 @@
         goto error;
     }
 
+    ctxt->blocksize = AES_BLOCK_SIZE;
     ctxt->free = qcrypto_cipher_free_aes;
     ctxt->setiv = qcrypto_cipher_setiv_aes;
     ctxt->encrypt = qcrypto_cipher_encrypt_aes;
@@ -286,6 +284,7 @@
     memcpy(ctxt->state.desrfb.key, key, nkey);
     ctxt->state.desrfb.nkey = nkey;
 
+    ctxt->blocksize = 8;
     ctxt->free = qcrypto_cipher_free_des_rfb;
     ctxt->setiv = qcrypto_cipher_setiv_des_rfb;
     ctxt->encrypt = qcrypto_cipher_encrypt_des_rfb;
@@ -374,6 +373,12 @@
 {
     QCryptoCipherBuiltin *ctxt = cipher->opaque;
 
+    if (len % ctxt->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctxt->blocksize);
+        return -1;
+    }
+
     return ctxt->encrypt(cipher, in, out, len, errp);
 }
 
@@ -386,6 +391,12 @@
 {
     QCryptoCipherBuiltin *ctxt = cipher->opaque;
 
+    if (len % ctxt->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctxt->blocksize);
+        return -1;
+    }
+
     return ctxt->decrypt(cipher, in, out, len, errp);
 }
 

diff --git a/crypto/cipher-gcrypt.c b/crypto/cipher-gcrypt.c
index 8cfc562..c4f8114 100644
--- a/crypto/cipher-gcrypt.c
+++ b/crypto/cipher-gcrypt.c

@@ -34,6 +34,11 @@
     }
 }
 
+typedef struct QCryptoCipherGcrypt QCryptoCipherGcrypt;
+struct QCryptoCipherGcrypt {
+    gcry_cipher_hd_t handle;
+    size_t blocksize;
+};
 
 QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
                                   QCryptoCipherMode mode,
@@ -41,7 +46,7 @@
                                   Error **errp)
 {
     QCryptoCipher *cipher;
-    gcry_cipher_hd_t handle;
+    QCryptoCipherGcrypt *ctx;
     gcry_error_t err;
     int gcryalg, gcrymode;
 
@@ -87,7 +92,9 @@
     cipher->alg = alg;
     cipher->mode = mode;
 
-    err = gcry_cipher_open(&handle, gcryalg, gcrymode, 0);
+    ctx = g_new0(QCryptoCipherGcrypt, 1);
+
+    err = gcry_cipher_open(&ctx->handle, gcryalg, gcrymode, 0);
     if (err != 0) {
         error_setg(errp, "Cannot initialize cipher: %s",
                    gcry_strerror(err));
@@ -100,10 +107,12 @@
          * bizarre RFB variant of DES :-)
          */
         uint8_t *rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey);
-        err = gcry_cipher_setkey(handle, rfbkey, nkey);
+        err = gcry_cipher_setkey(ctx->handle, rfbkey, nkey);
         g_free(rfbkey);
+        ctx->blocksize = 8;
     } else {
-        err = gcry_cipher_setkey(handle, key, nkey);
+        err = gcry_cipher_setkey(ctx->handle, key, nkey);
+        ctx->blocksize = 16;
     }
     if (err != 0) {
         error_setg(errp, "Cannot set key: %s",
@@ -111,11 +120,12 @@
         goto error;
     }
 
-    cipher->opaque = handle;
+    cipher->opaque = ctx;
     return cipher;
 
  error:
-    gcry_cipher_close(handle);
+    gcry_cipher_close(ctx->handle);
+    g_free(ctx);
     g_free(cipher);
     return NULL;
 }
@@ -123,12 +133,13 @@
 
 void qcrypto_cipher_free(QCryptoCipher *cipher)
 {
-    gcry_cipher_hd_t handle;
+    QCryptoCipherGcrypt *ctx;
     if (!cipher) {
         return;
     }
-    handle = cipher->opaque;
-    gcry_cipher_close(handle);
+    ctx = cipher->opaque;
+    gcry_cipher_close(ctx->handle);
+    g_free(ctx);
     g_free(cipher);
 }
 
@@ -139,10 +150,16 @@
                            size_t len,
                            Error **errp)
 {
-    gcry_cipher_hd_t handle = cipher->opaque;
+    QCryptoCipherGcrypt *ctx = cipher->opaque;
     gcry_error_t err;
 
-    err = gcry_cipher_encrypt(handle,
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
+    err = gcry_cipher_encrypt(ctx->handle,
                               out, len,
                               in, len);
     if (err != 0) {
@@ -161,10 +178,16 @@
                            size_t len,
                            Error **errp)
 {
-    gcry_cipher_hd_t handle = cipher->opaque;
+    QCryptoCipherGcrypt *ctx = cipher->opaque;
     gcry_error_t err;
 
-    err = gcry_cipher_decrypt(handle,
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
+    err = gcry_cipher_decrypt(ctx->handle,
                               out, len,
                               in, len);
     if (err != 0) {
@@ -180,11 +203,17 @@
                          const uint8_t *iv, size_t niv,
                          Error **errp)
 {
-    gcry_cipher_hd_t handle = cipher->opaque;
+    QCryptoCipherGcrypt *ctx = cipher->opaque;
     gcry_error_t err;
 
-    gcry_cipher_reset(handle);
-    err = gcry_cipher_setiv(handle, iv, niv);
+    if (niv != ctx->blocksize) {
+        error_setg(errp, "Expected IV size %zu not %zu",
+                   ctx->blocksize, niv);
+        return -1;
+    }
+
+    gcry_cipher_reset(ctx->handle);
+    err = gcry_cipher_setiv(ctx->handle, iv, niv);
     if (err != 0) {
         error_setg(errp, "Cannot set IV: %s",
                    gcry_strerror(err));

diff --git a/crypto/cipher-nettle.c b/crypto/cipher-nettle.c
index b01cb1c..7449338 100644
--- a/crypto/cipher-nettle.c
+++ b/crypto/cipher-nettle.c

@@ -69,7 +69,7 @@
     nettle_cipher_func *alg_encrypt;
     nettle_cipher_func *alg_decrypt;
     uint8_t *iv;
-    size_t niv;
+    size_t blocksize;
 };
 
 bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
@@ -125,7 +125,7 @@
         ctx->alg_encrypt = des_encrypt_wrapper;
         ctx->alg_decrypt = des_decrypt_wrapper;
 
-        ctx->niv = DES_BLOCK_SIZE;
+        ctx->blocksize = DES_BLOCK_SIZE;
         break;
 
     case QCRYPTO_CIPHER_ALG_AES_128:
@@ -140,14 +140,14 @@
         ctx->alg_encrypt = aes_encrypt_wrapper;
         ctx->alg_decrypt = aes_decrypt_wrapper;
 
-        ctx->niv = AES_BLOCK_SIZE;
+        ctx->blocksize = AES_BLOCK_SIZE;
         break;
     default:
         error_setg(errp, "Unsupported cipher algorithm %d", alg);
         goto error;
     }
 
-    ctx->iv = g_new0(uint8_t, ctx->niv);
+    ctx->iv = g_new0(uint8_t, ctx->blocksize);
     cipher->opaque = ctx;
 
     return cipher;
@@ -184,6 +184,12 @@
 {
     QCryptoCipherNettle *ctx = cipher->opaque;
 
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
     switch (cipher->mode) {
     case QCRYPTO_CIPHER_MODE_ECB:
         ctx->alg_encrypt(ctx->ctx_encrypt, len, out, in);
@@ -191,7 +197,7 @@
 
     case QCRYPTO_CIPHER_MODE_CBC:
         cbc_encrypt(ctx->ctx_encrypt, ctx->alg_encrypt,
-                    ctx->niv, ctx->iv,
+                    ctx->blocksize, ctx->iv,
                     len, out, in);
         break;
     default:
@@ -211,6 +217,12 @@
 {
     QCryptoCipherNettle *ctx = cipher->opaque;
 
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
     switch (cipher->mode) {
     case QCRYPTO_CIPHER_MODE_ECB:
         ctx->alg_decrypt(ctx->ctx_decrypt ? ctx->ctx_decrypt : ctx->ctx_encrypt,
@@ -219,7 +231,7 @@
 
     case QCRYPTO_CIPHER_MODE_CBC:
         cbc_decrypt(ctx->ctx_decrypt ? ctx->ctx_decrypt : ctx->ctx_encrypt,
-                    ctx->alg_decrypt, ctx->niv, ctx->iv,
+                    ctx->alg_decrypt, ctx->blocksize, ctx->iv,
                     len, out, in);
         break;
     default:
@@ -235,9 +247,9 @@
                          Error **errp)
 {
     QCryptoCipherNettle *ctx = cipher->opaque;
-    if (niv != ctx->niv) {
+    if (niv != ctx->blocksize) {
         error_setg(errp, "Expected IV size %zu not %zu",
-                   ctx->niv, niv);
+                   ctx->blocksize, niv);
         return -1;
     }
     memcpy(ctx->iv, iv, niv);

diff --git a/crypto/cipher.c b/crypto/cipher.c
index 024a00c..c8bd180 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c

@@ -47,7 +47,7 @@
     return true;
 }
 
-#if defined(CONFIG_GNUTLS_GCRYPT) || defined(CONFIG_GNUTLS_NETTLE)
+#if defined(CONFIG_GCRYPT) || defined(CONFIG_NETTLE)
 static uint8_t *
 qcrypto_cipher_munge_des_rfb_key(const uint8_t *key,
                                  size_t nkey)
@@ -63,11 +63,11 @@
     }
     return ret;
 }
-#endif /* CONFIG_GNUTLS_GCRYPT || CONFIG_GNUTLS_NETTLE */
+#endif /* CONFIG_GCRYPT || CONFIG_NETTLE */
 
-#ifdef CONFIG_GNUTLS_GCRYPT
+#ifdef CONFIG_GCRYPT
 #include "crypto/cipher-gcrypt.c"
-#elif defined CONFIG_GNUTLS_NETTLE
+#elif defined CONFIG_NETTLE
 #include "crypto/cipher-nettle.c"
 #else
 #include "crypto/cipher-builtin.c"

diff --git a/crypto/init.c b/crypto/init.c
index 7447882..d94faac 100644
--- a/crypto/init.c
+++ b/crypto/init.c

@@ -24,8 +24,9 @@
 #ifdef CONFIG_GNUTLS
 #include <gnutls/gnutls.h>
 #include <gnutls/crypto.h>
+#endif
 
-#ifdef CONFIG_GNUTLS_GCRYPT
+#ifdef CONFIG_GCRYPT
 #include <gcrypt.h>
 #endif
 
@@ -37,6 +38,7 @@
  *  - When GNUTLS >= 2.12, we must not initialize gcrypt threading
  *    because GNUTLS will do that itself
  *  - When GNUTLS < 2.12 we must always initialize gcrypt threading
+ *  - When GNUTLS is disabled we must always initialize gcrypt threading
  *
  * But....
  *
@@ -47,12 +49,15 @@
  *
  *   - gcrypt < 1.6.0
  * AND
- *   - gnutls < 2.12
+ *      - gnutls < 2.12
+ *   OR
+ *      - gnutls is disabled
  *
  */
 
-#if (defined(CONFIG_GNUTLS_GCRYPT) &&           \
-     (!defined(GNUTLS_VERSION_NUMBER) ||        \
+#if (defined(CONFIG_GCRYPT) &&                  \
+     (!defined(CONFIG_GNUTLS) ||                \
+      !defined(GNUTLS_VERSION_NUMBER) ||       \
       (GNUTLS_VERSION_NUMBER < 0x020c00)) &&    \
      (!defined(GCRYPT_VERSION_NUMBER) ||        \
       (GCRYPT_VERSION_NUMBER < 0x010600)))
@@ -113,6 +118,7 @@
 
 int qcrypto_init(Error **errp)
 {
+#ifdef CONFIG_GNUTLS
     int ret;
     ret = gnutls_global_init();
     if (ret < 0) {
@@ -125,8 +131,9 @@
     gnutls_global_set_log_level(10);
     gnutls_global_set_log_function(qcrypto_gnutls_log);
 #endif
+#endif
 
-#ifdef CONFIG_GNUTLS_GCRYPT
+#ifdef CONFIG_GCRYPT
     if (!gcry_check_version(GCRYPT_VERSION)) {
         error_setg(errp, "Unable to initialize gcrypt");
         return -1;
@@ -139,12 +146,3 @@
 
     return 0;
 }
-
-#else /* ! CONFIG_GNUTLS */
-
-int qcrypto_init(Error **errp G_GNUC_UNUSED)
-{
-    return 0;
-}
-
-#endif /* ! CONFIG_GNUTLS */

diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index 7e10903..f250119 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak

@@ -35,5 +35,5 @@
 CONFIG_EDU=y
 CONFIG_VGA=y
 CONFIG_VGA_PCI=y
-CONFIG_IVSHMEM=$(CONFIG_KVM)
+CONFIG_IVSHMEM=$(CONFIG_POSIX)
 CONFIG_ROCKER=y

diff --git a/disas/mips.c b/disas/mips.c
index 01336a8..bf0bbaf 100644
--- a/disas/mips.c
+++ b/disas/mips.c

@@ -2420,9 +2420,11 @@
 {"hibernate","",        0x42000023, 0xffffffff,	0, 			0,		V1	},
 {"ins",     "t,r,+A,+B", 0x7c000004, 0xfc00003f, WR_t|RD_s,    		0,		I33	},
 {"jr",      "s",	0x00000008, 0xfc1fffff,	UBD|RD_s,		0,		I1	},
+{"jr",      "s",	0x00000009, 0xfc1fffff,	UBD|RD_s,		0,		I32R6	}, /* jalr */
 /* jr.hb is officially MIPS{32,64}R2, but it works on R1 as jr with
    the same hazard barrier effect.  */
 {"jr.hb",   "s",	0x00000408, 0xfc1fffff,	UBD|RD_s,		0,		I32	},
+{"jr.hb",   "s",	0x00000409, 0xfc1fffff,	UBD|RD_s,		0,		I32R6	}, /* jalr.hb */
 {"j",       "s",	0x00000008, 0xfc1fffff,	UBD|RD_s,		0,		I1	}, /* jr */
 /* SVR4 PIC code requires special handling for j, so it must be a
    macro.  */

diff --git a/docs/qmp-events.txt b/docs/qmp-events.txt
index d92cc48..d2f1ce4 100644
--- a/docs/qmp-events.txt
+++ b/docs/qmp-events.txt

@@ -28,6 +28,8 @@
     "data": { "actual": 944766976 },
     "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
 
+Note: this event is rate-limited.
+
 BLOCK_IMAGE_CORRUPTED
 ---------------------
 
@@ -296,6 +298,8 @@
      "data": { "reference": "usr1", "sector-num": 345435, "sectors-count": 5 },
      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
 
+Note: this event is rate-limited.
+
 QUORUM_REPORT_BAD
 -----------------
 
@@ -318,6 +322,8 @@
      "data": { "node-name": "1.raw", "sector-num": 345435, "sectors-count": 5 },
      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
 
+Note: this event is rate-limited.
+
 RESET
 -----
 
@@ -358,6 +364,8 @@
     "data": { "offset": 78 },
     "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
 
+Note: this event is rate-limited.
+
 SHUTDOWN
 --------
 
@@ -632,6 +640,8 @@
     "data": { "id": "channel0", "open": true },
     "timestamp": { "seconds": 1401385907, "microseconds": 422329 } }
 
+Note: this event is rate-limited separately for each "id".
+
 WAKEUP
 ------
 
@@ -662,3 +672,5 @@
 
 Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
 followed respectively by the RESET, SHUTDOWN, or STOP events.
+
+Note: this event is rate-limited.

diff --git a/docs/qmp-spec.txt b/docs/qmp-spec.txt
index 4c28cd9..4fb10a5 100644
--- a/docs/qmp-spec.txt
+++ b/docs/qmp-spec.txt

@@ -175,6 +175,11 @@
 For a listing of supported asynchronous events, please, refer to the
 qmp-events.txt file.
 
+Some events are rate-limited to at most one per second.  If additional
+"similar" events arrive within one second, all but the last one are
+dropped, and the last one is delayed.  "Similar" normally means same
+event type.  See qmp-events.txt for details.
+
 2.5 QGA Synchronization
 -----------------------
 

diff --git a/docs/specs/ivshmem_device_spec.txt b/docs/specs/ivshmem_device_spec.txt
index 667a862..d318d65 100644
--- a/docs/specs/ivshmem_device_spec.txt
+++ b/docs/specs/ivshmem_device_spec.txt

@@ -2,30 +2,106 @@
 Device Specification for Inter-VM shared memory device
 ------------------------------------------------------
 
-The Inter-VM shared memory device is designed to share a region of memory to
-userspace in multiple virtual guests.  The memory region does not belong to any
-guest, but is a POSIX memory object on the host.  Optionally, the device may
-support sending interrupts to other guests sharing the same memory region.
+The Inter-VM shared memory device is designed to share a memory region (created
+on the host via the POSIX shared memory API) between multiple QEMU processes
+running different guests. In order for all guests to be able to pick up the
+shared memory area, it is modeled by QEMU as a PCI device exposing said memory
+to the guest as a PCI BAR.
+The memory region does not belong to any guest, but is a POSIX memory object on
+the host. The host can access this shared memory if needed.
+
+The device also provides an optional communication mechanism between guests
+sharing the same memory object. More details about that in the section 'Guest to
+guest communication' section.
 
 
 The Inter-VM PCI device
 -----------------------
 
-*BARs*
+From the VM point of view, the ivshmem PCI device supports three BARs.
 
-The device supports three BARs.  BAR0 is a 1 Kbyte MMIO region to support
-registers.  BAR1 is used for MSI-X when it is enabled in the device.  BAR2 is
-used to map the shared memory object from the host.  The size of BAR2 is
-specified when the guest is started and must be a power of 2 in size.
+- BAR0 is a 1 Kbyte MMIO region to support registers and interrupts when MSI is
+  not used.
+- BAR1 is used for MSI-X when it is enabled in the device.
+- BAR2 is used to access the shared memory object.
 
-*Registers*
+It is your choice how to use the device but you must choose between two
+behaviors :
 
-The device currently supports 4 registers of 32-bits each.  Registers
-are used for synchronization between guests sharing the same memory object when
-interrupts are supported (this requires using the shared memory server).
+- basically, if you only need the shared memory part, you will map BAR2.
+  This way, you have access to the shared memory in guest and can use it as you
+  see fit (memnic, for example, uses it in userland
+  http://dpdk.org/browse/memnic).
 
-The server assigns each VM an ID number and sends this ID number to the QEMU
-process when the guest starts.
+- BAR0 and BAR1 are used to implement an optional communication mechanism
+  through interrupts in the guests. If you need an event mechanism between the
+  guests accessing the shared memory, you will most likely want to write a
+  kernel driver that will handle interrupts. See details in the section 'Guest
+  to guest communication' section.
+
+The behavior is chosen when starting your QEMU processes:
+- no communication mechanism needed, the first QEMU to start creates the shared
+  memory on the host, subsequent QEMU processes will use it.
+
+- communication mechanism needed, an ivshmem server must be started before any
+  QEMU processes, then each QEMU process connects to the server unix socket.
+
+For more details on the QEMU ivshmem parameters, see qemu-doc documentation.
+
+
+Guest to guest communication
+----------------------------
+
+This section details the communication mechanism between the guests accessing
+the ivhsmem shared memory.
+
+*ivshmem server*
+
+This server code is available in qemu.git/contrib/ivshmem-server.
+
+The server must be started on the host before any guest.
+It creates a shared memory object then waits for clients to connect on a unix
+socket. All the messages are little-endian int64_t integer.
+
+For each client (QEMU process) that connects to the server:
+- the server sends a protocol version, if client does not support it, the client
+  closes the communication,
+- the server assigns an ID for this client and sends this ID to him as the first
+  message,
+- the server sends a fd to the shared memory object to this client,
+- the server creates a new set of host eventfds associated to the new client and
+  sends this set to all already connected clients,
+- finally, the server sends all the eventfds sets for all clients to the new
+  client.
+
+The server signals all clients when one of them disconnects.
+
+The client IDs are limited to 16 bits because of the current implementation (see
+Doorbell register in 'PCI device registers' subsection). Hence only 65536
+clients are supported.
+
+All the file descriptors (fd to the shared memory, eventfds for each client)
+are passed to clients using SCM_RIGHTS over the server unix socket.
+
+Apart from the current ivshmem implementation in QEMU, an ivshmem client has
+been provided in qemu.git/contrib/ivshmem-client for debug.
+
+*QEMU as an ivshmem client*
+
+At initialisation, when creating the ivshmem device, QEMU first receives a
+protocol version and closes communication with server if it does not match.
+Then, QEMU gets its ID from the server then makes it available through BAR0
+IVPosition register for the VM to use (see 'PCI device registers' subsection).
+QEMU then uses the fd to the shared memory to map it to BAR2.
+eventfds for all other clients received from the server are stored to implement
+BAR0 Doorbell register (see 'PCI device registers' subsection).
+Finally, eventfds assigned to this QEMU process are used to send interrupts in
+this VM.
+
+*PCI device registers*
+
+From the VM point of view, the ivshmem PCI device supports 4 registers of
+32-bits each.
 
 enum ivshmem_registers {
     IntrMask = 0,
@@ -49,8 +125,8 @@
 IVPosition Register: The IVPosition register is read-only and reports the
 guest's ID number.  The guest IDs are non-negative integers.  When using the
 server, since the server is a separate process, the VM ID will only be set when
-the device is ready (shared memory is received from the server and accessible via
-the device).  If the device is not ready, the IVPosition will return -1.
+the device is ready (shared memory is received from the server and accessible
+via the device).  If the device is not ready, the IVPosition will return -1.
 Applications should ensure that they have a valid VM ID before accessing the
 shared memory.
 
@@ -59,8 +135,8 @@
 two 16-bit fields.  The high 16-bits are the guest ID to interrupt and the low
 16-bits are the interrupt vector to trigger.  The semantics of the value
 written to the doorbell depends on whether the device is using MSI or a regular
-pin-based interrupt.  In short, MSI uses vectors while regular interrupts set the
-status register.
+pin-based interrupt.  In short, MSI uses vectors while regular interrupts set
+the status register.
 
 Regular Interrupts
 
@@ -71,7 +147,7 @@
 
 Message Signalled Interrupts
 
-A ivshmem device may support multiple MSI vectors.  If so, the lower 16-bits
+An ivshmem device may support multiple MSI vectors.  If so, the lower 16-bits
 written to the Doorbell register must be between 0 and the maximum number of
 vectors the guest supports.  The lower 16 bits written to the doorbell is the
 MSI vector that will be raised in the destination guest.  The number of MSI
@@ -83,14 +159,3 @@
 supporting multiple MSI vectors can use different vectors to indicate different
 events have occurred.  The semantics of interrupt vectors are left to the
 user's discretion.
-
-
-Usage in the Guest
-------------------
-
-The shared memory device is intended to be used with the provided UIO driver.
-Very little configuration is needed.  The guest should map BAR0 to access the
-registers (an array of 32-bit ints allows simple writing) and map BAR2 to
-access the shared memory region itself.  The size of the shared memory region
-is specified when the guest (or shared memory server) is started.  A guest may
-map the whole shared memory region or only part of it.

diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c
index 2ff0d5c..ce428df 100644
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c

@@ -238,10 +238,12 @@
 
     mdev->dimm = dev;
     mdev->is_enabled = true;
-    mdev->is_inserting = true;
+    if (dev->hotplugged) {
+        mdev->is_inserting = true;
 
-    /* do ACPI magic */
-    acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS);
+        /* do ACPI magic */
+        acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS);
+    }
     return;
 }
 

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5d38c47..77d9267 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c

@@ -923,7 +923,7 @@
     qemu_irq pic[NUM_IRQS];
     MemoryRegion *sysmem = get_system_memory();
     int gic_version = vms->gic_version;
-    int n;
+    int n, max_cpus;
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     const char *cpu_model = machine->cpu_model;
     VirtBoardInfo *vbi;
@@ -957,6 +957,22 @@
         exit(1);
     }
 
+    /* The maximum number of CPUs depends on the GIC version, or on how
+     * many redistributors we can fit into the memory map.
+     */
+    if (gic_version == 3) {
+        max_cpus = vbi->memmap[VIRT_GIC_REDIST].size / 0x20000;
+    } else {
+        max_cpus = GIC_NCPU;
+    }
+
+    if (smp_cpus > max_cpus) {
+        error_report("Number of SMP CPUs requested (%d) exceeds max CPUs "
+                     "supported by machine 'mach-virt' (%d)",
+                     smp_cpus, max_cpus);
+        exit(1);
+    }
+
     vbi->smp_cpus = smp_cpus;
 
     if (machine->ram_size > vbi->memmap[VIRT_MEM].size) {
@@ -1155,10 +1171,11 @@
 
     mc->desc = "ARM Virtual Machine",
     mc->init = machvirt_init;
-    /* Our maximum number of CPUs depends on how many redistributors
-     * we can fit into memory map
+    /* Start max_cpus at the maximum QEMU supports. We'll further restrict
+     * it later in machvirt_init, where we have more information about the
+     * configuration of the particular instance.
      */
-    mc->max_cpus = a15memmap[VIRT_GIC_REDIST].size / 0x20000;
+    mc->max_cpus = MAX_CPUMASK_BITS;
     mc->has_dynamic_sysbus = true;
     mc->block_default_type = IF_VIRTIO;
     mc->no_cdrom = 1;

diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index b36ca3d..87553bb 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c

@@ -48,6 +48,14 @@
     21, 22,
 };
 
+static const uint64_t sdhci_addr[XLNX_ZYNQMP_NUM_SDHCI] = {
+    0xFF160000, 0xFF170000,
+};
+
+static const int sdhci_intr[XLNX_ZYNQMP_NUM_SDHCI] = {
+    48, 49,
+};
+
 typedef struct XlnxZynqMPGICRegion {
     int region_index;
     uint32_t address;
@@ -97,6 +105,13 @@
 
     object_initialize(&s->sata, sizeof(s->sata), TYPE_SYSBUS_AHCI);
     qdev_set_parent_bus(DEVICE(&s->sata), sysbus_get_default());
+
+    for (i = 0; i < XLNX_ZYNQMP_NUM_SDHCI; i++) {
+        object_initialize(&s->sdhci[i], sizeof(s->sdhci[i]),
+                          TYPE_SYSBUS_SDHCI);
+        qdev_set_parent_bus(DEVICE(&s->sdhci[i]),
+                            sysbus_get_default());
+    }
 }
 
 static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
@@ -258,6 +273,19 @@
 
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->sata), 0, SATA_ADDR);
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->sata), 0, gic_spi[SATA_INTR]);
+
+    for (i = 0; i < XLNX_ZYNQMP_NUM_SDHCI; i++) {
+        object_property_set_bool(OBJECT(&s->sdhci[i]), true,
+                                 "realized", &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->sdhci[i]), 0,
+                        sdhci_addr[i]);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci[i]), 0,
+                           gic_spi[sdhci_intr[i]]);
+    }
 }
 
 static Property xlnx_zynqmp_props[] = {

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 6106e46..c42ddeb 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c

@@ -283,7 +283,8 @@
 
     /* Get this show started by hooking up our callbacks */
     aio_context_acquire(s->ctx);
-    aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify);
+    aio_set_event_notifier(s->ctx, &s->host_notifier, true,
+                           handle_notify);
     aio_context_release(s->ctx);
     return;
 
@@ -319,7 +320,7 @@
     aio_context_acquire(s->ctx);
 
     /* Stop notifications for new requests from guest */
-    aio_set_event_notifier(s->ctx, &s->host_notifier, NULL);
+    aio_set_event_notifier(s->ctx, &s->host_notifier, true, NULL);
 
     /* Drain and switch bs back to the QEMU main loop */
     blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 6686a72..4292ece 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c

@@ -192,6 +192,8 @@
     uint8_t ro;               /* Is read-only           */
     uint8_t media_changed;    /* Is media changed       */
     uint8_t media_rate;       /* Data rate of medium    */
+
+    bool media_inserted;      /* Is there a medium in the tray */
 } FDrive;
 
 static void fd_init(FDrive *drv)
@@ -261,7 +263,7 @@
 #endif
         drv->head = head;
         if (drv->track != track) {
-            if (drv->blk != NULL && blk_is_inserted(drv->blk)) {
+            if (drv->media_inserted) {
                 drv->media_changed = 0;
             }
             ret = 1;
@@ -270,7 +272,7 @@
         drv->sect = sect;
     }
 
-    if (drv->blk == NULL || !blk_is_inserted(drv->blk)) {
+    if (!drv->media_inserted) {
         ret = 2;
     }
 
@@ -296,7 +298,7 @@
         ro = blk_is_read_only(drv->blk);
         pick_geometry(drv->blk, &nb_heads, &max_track,
                       &last_sect, drv->drive, &drive, &rate);
-        if (!blk_is_inserted(drv->blk)) {
+        if (!drv->media_inserted) {
             FLOPPY_DPRINTF("No disk in drive\n");
         } else {
             FLOPPY_DPRINTF("Floppy disk (%d h %d t %d s) %s\n", nb_heads,
@@ -692,7 +694,7 @@
 {
     FDrive *drive = opaque;
 
-    return (drive->blk != NULL && drive->media_changed != 1);
+    return (drive->media_inserted && drive->media_changed != 1);
 }
 
 static const VMStateDescription vmstate_fdrive_media_changed = {
@@ -2184,12 +2186,21 @@
 {
     FDrive *drive = opaque;
 
+    drive->media_inserted = load && drive->blk && blk_is_inserted(drive->blk);
+
     drive->media_changed = 1;
     fd_revalidate(drive);
 }
 
+static bool fdctrl_is_tray_open(void *opaque)
+{
+    FDrive *drive = opaque;
+    return !drive->media_inserted;
+}
+
 static const BlockDevOps fdctrl_block_ops = {
     .change_media_cb = fdctrl_change_cb,
+    .is_tray_open = fdctrl_is_tray_open,
 };
 
 /* Init functions */
@@ -2217,6 +2228,7 @@
         fdctrl_change_cb(drive, 0);
         if (drive->blk) {
             blk_set_dev_ops(drive->blk, &fdctrl_block_ops, drive);
+            drive->media_inserted = blk_is_inserted(drive->blk);
         }
     }
 }

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 8beb26b..093e475 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c

@@ -798,6 +798,11 @@
 static void virtio_blk_save(QEMUFile *f, void *opaque)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+    VirtIOBlock *s = VIRTIO_BLK(vdev);
+
+    if (s->dataplane) {
+        virtio_blk_data_plane_stop(s->dataplane);
+    }
 
     virtio_save(vdev, f);
 }
@@ -839,10 +844,7 @@
         req->next = s->rq;
         s->rq = req;
 
-        virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
-            req->elem.in_num, 1);
-        virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
-            req->elem.out_num, 0);
+        virtqueue_map(&req->elem);
     }
 
     return 0;
@@ -975,7 +977,7 @@
     DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial),
     DEFINE_PROP_BIT("config-wce", VirtIOBlock, conf.config_wce, 0, true),
 #ifdef __linux__
-    DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, true),
+    DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, false),
 #endif
     DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0,
                     true),

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 36d7398..1bbc111 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c

@@ -931,9 +931,11 @@
     blk_attach_dev_nofail(blkdev->blk, blkdev);
     blkdev->file_size = blk_getlength(blkdev->blk);
     if (blkdev->file_size < 0) {
+        BlockDriverState *bs = blk_bs(blkdev->blk);
+        const char *drv_name = bs ? bdrv_get_format_name(bs) : NULL;
         xen_be_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n",
                       (int)blkdev->file_size, strerror(-blkdev->file_size),
-                      bdrv_get_format_name(blk_bs(blkdev->blk)) ?: "-");
+                      drv_name ?: "-");
         blkdev->file_size = 0;
     }
 

diff --git a/hw/char/escc.c b/hw/char/escc.c
index ba653ef..9816154 100644
--- a/hw/char/escc.c
+++ b/hw/char/escc.c

@@ -1035,6 +1035,7 @@
     dc->reset = escc_reset;
     dc->vmsd = &vmstate_escc;
     dc->props = escc_properties;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
 }
 
 static const TypeInfo escc_info = {

diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
index f0c4c72..f30f9c2 100644
--- a/hw/char/imx_serial.c
+++ b/hw/char/imx_serial.c

@@ -22,25 +22,17 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/char.h"
 
-//#define DEBUG_SERIAL 1
-#ifdef DEBUG_SERIAL
-#define DPRINTF(fmt, args...) \
-do { printf("%s: " fmt , TYPE_IMX_SERIAL, ##args); } while (0)
-#else
-#define DPRINTF(fmt, args...) do {} while (0)
+#ifndef DEBUG_IMX_UART
+#define DEBUG_IMX_UART 0
 #endif
 
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-//#define DEBUG_IMPLEMENTATION 1
-#ifdef DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-    do  { fprintf(stderr, "%s: " fmt, TYPE_IMX_SERIAL, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_UART) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_SERIAL, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static const VMStateDescription vmstate_imx_serial = {
     .name = TYPE_IMX_SERIAL,
@@ -115,7 +107,8 @@
     IMXSerialState *s = (IMXSerialState *)opaque;
     uint32_t c;
 
-    DPRINTF("read(offset=%x)\n", offset >> 2);
+    DPRINTF("read(offset=0x%" HWADDR_PRIx ")\n", offset);
+
     switch (offset >> 2) {
     case 0x0: /* URXD */
         c = s->readbuff;
@@ -167,7 +160,8 @@
         return 0x0; /* TODO */
 
     default:
-        IPRINTF("%s: bad offset: 0x%x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
         return 0;
     }
 }
@@ -178,9 +172,8 @@
     IMXSerialState *s = (IMXSerialState *)opaque;
     unsigned char ch;
 
-    DPRINTF("write(offset=%x, value = %x) to %s\n",
-            offset >> 2,
-            (unsigned int)value, s->chr ? s->chr->label : "NODEV");
+    DPRINTF("write(offset=0x%" HWADDR_PRIx ", value = 0x%x) to %s\n",
+            offset, (unsigned int)value, s->chr ? s->chr->label : "NODEV");
 
     switch (offset >> 2) {
     case 0x10: /* UTXD */
@@ -198,7 +191,9 @@
 
     case 0x20: /* UCR1 */
         s->ucr1 = value & 0xffff;
+
         DPRINTF("write(ucr1=%x)\n", (unsigned int)value);
+
         imx_update(s);
         break;
 
@@ -266,12 +261,14 @@
 
     case 0x2d: /* UTS1 */
     case 0x23: /* UCR4 */
-        IPRINTF("Unimplemented Register %x written to\n", offset >> 2);
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: Unimplemented reg 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
         /* TODO */
         break;
 
     default:
-        IPRINTF("%s: Bad offset 0x%x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
     }
 }
 
@@ -284,7 +281,9 @@
 static void imx_put_data(void *opaque, uint32_t value)
 {
     IMXSerialState *s = (IMXSerialState *)opaque;
+
     DPRINTF("received char\n");
+
     s->usr1 |= USR1_RRDY;
     s->usr2 |= USR2_RDR;
     s->uts1 &= ~UTS1_RXEMPTY;
@@ -319,8 +318,7 @@
         qemu_chr_add_handlers(s->chr, imx_can_receive, imx_receive,
                               imx_event, s);
     } else {
-        DPRINTF("No char dev for uart at 0x%lx\n",
-                (unsigned long)s->iomem.ram_addr);
+        DPRINTF("No char dev for uart\n");
     }
 }
 

diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index be97058..497b0af 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c

@@ -705,10 +705,7 @@
 
                 qemu_get_buffer(f, (unsigned char *)&port->elem,
                                 sizeof(port->elem));
-                virtqueue_map_sg(port->elem.in_sg, port->elem.in_addr,
-                                 port->elem.in_num, 1);
-                virtqueue_map_sg(port->elem.out_sg, port->elem.out_addr,
-                                 port->elem.out_num, 1);
+                virtqueue_map(&port->elem);
 
                 /*
                  *  Port was throttled on source machine.  Let's

diff --git a/hw/gpio/imx_gpio.c b/hw/gpio/imx_gpio.c
index d56ffcd..3170585 100644
--- a/hw/gpio/imx_gpio.c
+++ b/hw/gpio/imx_gpio.c

@@ -29,11 +29,12 @@
 } IMXGPIOLevel;
 
 #define DPRINTF(fmt, args...) \
-          do { \
-              if (DEBUG_IMX_GPIO) { \
-                  fprintf(stderr, "%s: " fmt , __func__, ##args); \
-              } \
-          } while (0)
+    do { \
+        if (DEBUG_IMX_GPIO) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_GPIO, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static const char *imx_gpio_reg_name(uint32_t reg)
 {
@@ -176,19 +177,19 @@
         if (s->has_edge_sel) {
             reg_value = s->edge_sel;
         } else {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: EDGE_SEL register not "
+            qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: EDGE_SEL register not "
                           "present on this version of GPIO device\n",
                           TYPE_IMX_GPIO, __func__);
         }
         break;
 
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad register at offset %d\n",
-                      TYPE_IMX_GPIO, __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPIO, __func__, offset);
         break;
     }
 
-    DPRINTF("(%s) = 0x%"PRIx32"\n", imx_gpio_reg_name(offset), reg_value);
+    DPRINTF("(%s) = 0x%" PRIx32 "\n", imx_gpio_reg_name(offset), reg_value);
 
     return reg_value;
 }
@@ -198,7 +199,7 @@
 {
     IMXGPIOState *s = IMX_GPIO(opaque);
 
-    DPRINTF("(%s, value = 0x%"PRIx32")\n", imx_gpio_reg_name(offset),
+    DPRINTF("(%s, value = 0x%" PRIx32 ")\n", imx_gpio_reg_name(offset),
             (uint32_t)value);
 
     switch (offset) {
@@ -238,15 +239,15 @@
             s->edge_sel = value;
             imx_gpio_set_all_int_lines(s);
         } else {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: EDGE_SEL register not "
+            qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: EDGE_SEL register not "
                           "present on this version of GPIO device\n",
                           TYPE_IMX_GPIO, __func__);
         }
         break;
 
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad register at offset %d\n",
-                      TYPE_IMX_GPIO, __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPIO, __func__, offset);
         break;
     }
 

diff --git a/hw/i2c/imx_i2c.c b/hw/i2c/imx_i2c.c
index 8474872..cb62c7a 100644
--- a/hw/i2c/imx_i2c.c
+++ b/hw/i2c/imx_i2c.c

@@ -21,13 +21,17 @@
 #include "hw/i2c/imx_i2c.h"
 #include "hw/i2c/i2c.h"
 
-#ifndef IMX_I2C_DEBUG
-#define IMX_I2C_DEBUG                 0
+#ifndef DEBUG_IMX_I2C
+#define DEBUG_IMX_I2C 0
 #endif
 
-#if IMX_I2C_DEBUG
-#define DPRINT(fmt, args...)              \
-    do { fprintf(stderr, "%s: "fmt, __func__, ## args); } while (0)
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_I2C) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_I2C, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static const char *imx_i2c_get_regname(unsigned offset)
 {
@@ -46,9 +50,6 @@
         return "[?]";
     }
 }
-#else
-#define DPRINT(fmt, args...)              do { } while (0)
-#endif
 
 static inline bool imx_i2c_is_enabled(IMXI2CState *s)
 {
@@ -121,11 +122,11 @@
 
             if (s->address == ADDR_RESET) {
                 /* something is wrong as the address is not set */
-                qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Trying to read "
+                qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Trying to read "
                               "without specifying the slave address\n",
                               TYPE_IMX_I2C, __func__);
             } else if (s->i2cr & I2CR_MTX) {
-                qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Trying to read "
+                qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Trying to read "
                               "but MTX is set\n", TYPE_IMX_I2C, __func__);
             } else {
                 /* get the next byte */
@@ -134,7 +135,7 @@
                 if (ret >= 0) {
                     imx_i2c_raise_interrupt(s);
                 } else {
-                    qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: read failed "
+                    qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: read failed "
                                   "for device 0x%02x\n", TYPE_IMX_I2C,
                                   __func__, s->address);
                     ret = 0xff;
@@ -143,19 +144,19 @@
 
             s->i2dr_read = ret;
         } else {
-            qemu_log_mask(LOG_UNIMP, "%s[%s]: slave mode not implemented\n",
+            qemu_log_mask(LOG_UNIMP, "[%s]%s: slave mode not implemented\n",
                           TYPE_IMX_I2C, __func__);
         }
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_I2C, __func__, s->address);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_I2C, __func__, offset);
         value = 0;
         break;
     }
 
-    DPRINT("read %s [0x%02x] -> 0x%02x\n", imx_i2c_get_regname(offset),
-           (unsigned int)offset, value);
+    DPRINTF("read %s [0x%" HWADDR_PRIx "] -> 0x%02x\n",
+            imx_i2c_get_regname(offset), offset, value);
 
     return (uint64_t)value;
 }
@@ -165,8 +166,8 @@
 {
     IMXI2CState *s = IMX_I2C(opaque);
 
-    DPRINT("write %s [0x%02x] <- 0x%02x\n", imx_i2c_get_regname(offset),
-           (unsigned int)offset, (int)value);
+    DPRINTF("write %s [0x%" HWADDR_PRIx "] <- 0x%02x\n",
+            imx_i2c_get_regname(offset), offset, (int)value);
 
     value &= 0xff;
 
@@ -264,13 +265,13 @@
                 }
             }
         } else {
-            qemu_log_mask(LOG_UNIMP, "%s[%s]: slave mode not implemented\n",
+            qemu_log_mask(LOG_UNIMP, "[%s]%s: slave mode not implemented\n",
                           TYPE_IMX_I2C, __func__);
         }
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_I2C, __func__, s->address);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_I2C, __func__, offset);
         break;
     }
 }

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 3d958ba..0cb8afd 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c

@@ -1616,7 +1616,6 @@
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
-    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MemoryRegion *mr = ddc->get_memory_region(dimm);
@@ -1632,8 +1631,7 @@
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align,
-                        pcmc->inter_dimm_gap, &local_err);
+    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err);
     if (local_err) {
         goto out;
     }
@@ -1953,7 +1951,6 @@
     PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
 
-    pcmc->inter_dimm_gap = true;
     pcmc->get_hotplug_handler = mc->get_hotplug_handler;
     mc->get_hotplug_handler = pc_get_hotpug_handler;
     mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 9d4425a..393dcc4 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c

@@ -487,7 +487,6 @@
     m->alias = NULL;
     m->is_default = 0;
     pcmc->broken_reserved_end = true;
-    pcmc->inter_dimm_gap = false;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
 }
 

diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 3744abd..2f8f396 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c

@@ -385,7 +385,6 @@
     pc_q35_2_5_machine_options(m);
     m->alias = NULL;
     pcmc->broken_reserved_end = true;
-    pcmc->inter_dimm_gap = false;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
 }
 

diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index 8682c42..de83f4e 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c

@@ -33,6 +33,7 @@
 #include "trace.h"
 #include "exec/address-spaces.h"
 #include "sysemu/block-backend.h"
+#include "qemu/error-report.h"
 
 #include <xenguest.h>
 
@@ -382,13 +383,16 @@
     }
 };
 
-static int xen_platform_initfn(PCIDevice *dev)
+static void xen_platform_realize(PCIDevice *dev, Error **errp)
 {
     PCIXenPlatformState *d = XEN_PLATFORM(dev);
     uint8_t *pci_conf;
 
     /* Device will crash on reset if xen is not initialized */
-    assert(xen_enabled());
+    if (!xen_enabled()) {
+        error_setg(errp, "xen-platform device requires the Xen accelerator");
+        return;
+    }
 
     pci_conf = dev->config;
 
@@ -407,8 +411,6 @@
                      &d->mmio_bar);
 
     platform_fixed_ioport_init(d);
-
-    return 0;
 }
 
 static void platform_reset(DeviceState *dev)
@@ -423,7 +425,7 @@
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
-    k->init = xen_platform_initfn;
+    k->realize = xen_platform_realize;
     k->vendor_id = PCI_VENDOR_ID_XEN;
     k->device_id = PCI_DEVICE_ID_XEN_PLATFORM;
     k->class_id = PCI_CLASS_OTHERS << 8 | 0x80;

diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c
index 66fb9d9..27f3da2 100644
--- a/hw/ide/cmd646.c
+++ b/hw/ide/cmd646.c

@@ -417,6 +417,7 @@
     k->config_read = cmd646_pci_config_read;
     k->config_write = cmd646_pci_config_write;
     dc->props = cmd646_ide_properties;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
 }
 
 static const TypeInfo cmd646_ide_info = {

diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 66ac2ba..893c9b9 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c

@@ -590,6 +590,7 @@
     dc->realize = macio_ide_realizefn;
     dc->reset = macio_ide_reset;
     dc->vmsd = &vmstate_pmac;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
 }
 
 static const TypeInfo macio_ide_type_info = {

diff --git a/hw/input/adb.c b/hw/input/adb.c
index a18eea2..09eead9 100644
--- a/hw/input/adb.c
+++ b/hw/input/adb.c

@@ -362,6 +362,7 @@
 
     akc->parent_realize = dc->realize;
     dc->realize = adb_kbd_realizefn;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
 
     adc->devreq = adb_kbd_request;
     dc->reset = adb_kbd_reset;
@@ -566,6 +567,7 @@
 
     amc->parent_realize = dc->realize;
     dc->realize = adb_mouse_realizefn;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
 
     adc->devreq = adb_mouse_request;
     dc->reset = adb_mouse_reset;

diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index e8b2386..0ceebbf 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c

@@ -20,6 +20,7 @@
  */
 
 #include "hw/sysbus.h"
+#include "migration/migration.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
 #include "gic_internal.h"
@@ -307,11 +308,6 @@
     int num_cpu;
     int num_irq;
 
-    if (!kvm_arm_gic_can_save_restore(s)) {
-            DPRINTF("Cannot put kernel gic state, no kernel interface");
-            return;
-    }
-
     /* Note: We do the restore in a slightly different order than the save
      * (where the order doesn't matter and is simply ordered according to the
      * register offset values */
@@ -411,11 +407,6 @@
     int i;
     int cpu;
 
-    if (!kvm_arm_gic_can_save_restore(s)) {
-            DPRINTF("Cannot get kernel gic state, no kernel interface");
-            return;
-    }
-
     /*****************************************************************
      * Distributor State
      */
@@ -503,7 +494,10 @@
     KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
 
     kgc->parent_reset(dev);
-    kvm_arm_gic_put(s);
+
+    if (kvm_arm_gic_can_save_restore(s)) {
+        kvm_arm_gic_put(s);
+    }
 }
 
 static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
@@ -573,6 +567,12 @@
                             KVM_DEV_ARM_VGIC_GRP_ADDR,
                             KVM_VGIC_V2_ADDR_TYPE_CPU,
                             s->dev_fd);
+
+    if (!kvm_arm_gic_can_save_restore(s)) {
+        error_setg(&s->migration_blocker, "This operating system kernel does "
+                                          "not support vGICv2 migration");
+        migrate_add_blocker(s->migration_blocker);
+    }
 }
 
 static void kvm_arm_gic_class_init(ObjectClass *klass, void *data)

diff --git a/hw/intc/imx_avic.c b/hw/intc/imx_avic.c
index 96c376b..e0535ff 100644
--- a/hw/intc/imx_avic.c
+++ b/hw/intc/imx_avic.c

@@ -17,27 +17,17 @@
 
 #include "hw/intc/imx_avic.h"
 
-#define DEBUG_INT 1
-#undef DEBUG_INT /* comment out for debugging */
+#ifndef DEBUG_IMX_AVIC
+#define DEBUG_IMX_AVIC 0
+#endif
 
-#ifdef DEBUG_INT
 #define DPRINTF(fmt, args...) \
-do { printf("%s: " fmt , TYPE_IMX_AVIC, ##args); } while (0)
-#else
-#define DPRINTF(fmt, args...) do {} while (0)
-#endif
-
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-#define DEBUG_IMPLEMENTATION 1
-#if DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-    do  { fprintf(stderr, "%s: " fmt, TYPE_IMX_AVIC, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
+    do { \
+        if (DEBUG_IMX_AVIC) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_AVIC, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static const VMStateDescription vmstate_imx_avic = {
     .name = TYPE_IMX_AVIC,
@@ -115,8 +105,8 @@
 {
     IMXAVICState *s = (IMXAVICState *)opaque;
 
+    DPRINTF("read(offset = 0x%" HWADDR_PRIx ")\n", offset);
 
-    DPRINTF("read(offset = 0x%x)\n", offset >> 2);
     switch (offset >> 2) {
     case 0: /* INTCNTL */
         return s->intcntl;
@@ -213,7 +203,8 @@
         return 0x4;
 
     default:
-        IPRINTF("%s: Bad offset 0x%x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_AVIC, __func__, offset);
         return 0;
     }
 }
@@ -225,13 +216,13 @@
 
     /* Vector Registers not yet supported */
     if (offset >= 0x100 && offset <= 0x2fc) {
-        IPRINTF("%s to vector register %d ignored\n", __func__,
-                (unsigned int)((offset - 0x100) >> 2));
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: vector %d ignored\n",
+                      TYPE_IMX_AVIC, __func__, (int)((offset - 0x100) >> 2));
         return;
     }
 
-    DPRINTF("%s(0x%x) = %x\n", __func__,
-            (unsigned int)offset>>2, (unsigned int)val);
+    DPRINTF("(0x%" HWADDR_PRIx ") = 0x%x\n", offset, (unsigned int)val);
+
     switch (offset >> 2) {
     case 0: /* Interrupt Control Register, INTCNTL */
         s->intcntl = val & (ABFEN | NIDIS | FIDIS | NIAD | FIAD | NM);
@@ -305,7 +296,8 @@
         return;
 
     default:
-        IPRINTF("%s: Bad offset %x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_AVIC, __func__, offset);
     }
     imx_avic_update(s);
 }

diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 14ab0e3..bfcf155 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c

@@ -1643,6 +1643,7 @@
     dc->props = openpic_properties;
     dc->reset = openpic_reset;
     dc->vmsd = &vmstate_openpic;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
 
 static const TypeInfo openpic_info = {

diff --git a/hw/intc/openpic_kvm.c b/hw/intc/openpic_kvm.c
index f7cac58..649f476 100644
--- a/hw/intc/openpic_kvm.c
+++ b/hw/intc/openpic_kvm.c

@@ -275,6 +275,7 @@
     dc->realize = kvm_openpic_realize;
     dc->props = kvm_openpic_properties;
     dc->reset = kvm_openpic_reset;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
 
 static const TypeInfo kvm_openpic_info = {

diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 2bae994..80f424b 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c

@@ -33,8 +33,7 @@
 } pc_dimms_capacity;
 
 void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
-                         MemoryRegion *mr, uint64_t align, bool gap,
-                         Error **errp)
+                         MemoryRegion *mr, uint64_t align, Error **errp)
 {
     int slot;
     MachineState *machine = MACHINE(qdev_get_machine());
@@ -50,7 +49,7 @@
 
     addr = pc_dimm_get_free_addr(hpms->base,
                                  memory_region_size(&hpms->mr),
-                                 !addr ? NULL : &addr, align, gap,
+                                 !addr ? NULL : &addr, align,
                                  memory_region_size(mr), &local_err);
     if (local_err) {
         goto out;
@@ -295,8 +294,8 @@
 
 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
                                uint64_t address_space_size,
-                               uint64_t *hint, uint64_t align, bool gap,
-                               uint64_t size, Error **errp)
+                               uint64_t *hint, uint64_t align, uint64_t size,
+                               Error **errp)
 {
     GSList *list = NULL, *item;
     uint64_t new_addr, ret = 0;
@@ -341,15 +340,13 @@
             goto out;
         }
 
-        if (ranges_overlap(dimm->addr, dimm_size, new_addr,
-                           size + (gap ? 1 : 0))) {
+        if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) {
             if (hint) {
                 DeviceState *d = DEVICE(dimm);
                 error_setg(errp, "address range conflicts with '%s'", d->id);
                 goto out;
             }
-            new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size + (gap ? 1 : 0),
-                                     align);
+            new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size, align);
         }
     }
     ret = new_addr;

diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index c1f570a..91c36ba 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c

@@ -901,7 +901,7 @@
 
     if (kvm_enabled()) {
         /* Start running from the bootloader we wrote to end of RAM */
-        env->active_tc.PC = 0x40000000 + loaderparams.ram_size;
+        env->active_tc.PC = 0x40000000 + loaderparams.ram_low_size;
     }
 }
 

diff --git a/hw/misc/imx_ccm.c b/hw/misc/imx_ccm.c
index 2e19dbb..4cc2bbc 100644
--- a/hw/misc/imx_ccm.c
+++ b/hw/misc/imx_ccm.c

@@ -16,14 +16,18 @@
 #define CKIH_FREQ 26000000 /* 26MHz crystal input */
 #define CKIL_FREQ    32768 /* nominal 32khz clock */
 
-//#define DEBUG_CCM 1
-#ifdef DEBUG_CCM
-#define DPRINTF(fmt, args...) \
-do { printf("%s: " fmt , TYPE_IMX_CCM, ##args); } while (0)
-#else
-#define DPRINTF(fmt, args...) do {} while (0)
+#ifndef DEBUG_IMX_CCM
+#define DEBUG_IMX_CCM 0
 #endif
 
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_CCM) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_CCM, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
+
 static int imx_ccm_post_load(void *opaque, int version_id);
 
 static const VMStateDescription vmstate_imx_ccm = {
@@ -109,7 +113,7 @@
     s->hsp_clk_freq = s->mcu_clk_freq / (1 + EXTRACT(s->pdr0, HSP));
     s->ipg_clk_freq = s->hsp_clk_freq / (1 + EXTRACT(s->pdr0, IPG));
 
-    DPRINTF("%s: mcu %uMHz, HSP %uMHz, IPG %uHz\n", __func__,
+    DPRINTF("mcu %uMHz, HSP %uMHz, IPG %uHz\n",
             s->mcu_clk_freq / 1000000,
             s->hsp_clk_freq / 1000000,
             s->ipg_clk_freq);
@@ -135,7 +139,8 @@
 {
     IMXCCMState *s = (IMXCCMState *)opaque;
 
-    DPRINTF("%s(offset=%x)", __func__, offset >> 2);
+    DPRINTF("(offset=0x%" HWADDR_PRIx ")\n", offset);
+
     switch (offset >> 2) {
     case 0: /* CCMR */
         DPRINTF(" ccmr = 0x%x\n", s->ccmr);
@@ -166,9 +171,11 @@
     case 23:
         DPRINTF(" pcmr0 = 0x%x\n", s->pmcr0);
         return s->pmcr0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_CCM, __func__, offset);
+        return 0;
     }
-    DPRINTF(" return 0\n");
-    return 0;
 }
 
 static void imx_ccm_write(void *opaque, hwaddr offset,
@@ -176,8 +183,9 @@
 {
     IMXCCMState *s = (IMXCCMState *)opaque;
 
-    DPRINTF("%s(offset=%x, value = %x)\n", __func__,
-            offset >> 2, (unsigned int)value);
+    DPRINTF("(offset=0x%" HWADDR_PRIx ", value = 0x%x)\n",
+            offset, (unsigned int)value);
+
     switch (offset >> 2) {
     case 0:
         s->ccmr = CCMR_FPMF | (value & 0x3b6fdfff);
@@ -205,6 +213,8 @@
         return;
 
     default:
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_CCM, __func__, offset);
         return;
     }
     update_clocks(s);

diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index cc76989..83d7bd3 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c

@@ -19,6 +19,7 @@
 #include "hw/hw.h"
 #include "hw/i386/pc.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
 #include "sysemu/kvm.h"
 #include "migration/migration.h"
@@ -26,6 +27,10 @@
 #include "qemu/event_notifier.h"
 #include "qemu/fifo8.h"
 #include "sysemu/char.h"
+#include "sysemu/hostmem.h"
+#include "qapi/visitor.h"
+
+#include "hw/misc/ivshmem.h"
 
 #include <sys/mman.h>
 #include <sys/types.h>
@@ -34,6 +39,7 @@
 #define PCI_VENDOR_ID_IVSHMEM   PCI_VENDOR_ID_REDHAT_QUMRANET
 #define PCI_DEVICE_ID_IVSHMEM   0x1110
 
+#define IVSHMEM_MAX_PEERS G_MAXUINT16
 #define IVSHMEM_IOEVENTFD   0
 #define IVSHMEM_MSI     1
 
@@ -54,24 +60,26 @@
 #define IVSHMEM(obj) \
     OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM)
 
+#define IVSHMEM_MEMDEV_PROP "memdev"
+
 typedef struct Peer {
     int nb_eventfds;
     EventNotifier *eventfds;
 } Peer;
 
-typedef struct EventfdEntry {
+typedef struct MSIVector {
     PCIDevice *pdev;
-    int vector;
-} EventfdEntry;
+    int virq;
+} MSIVector;
 
 typedef struct IVShmemState {
     /*< private >*/
     PCIDevice parent_obj;
     /*< public >*/
 
+    HostMemoryBackend *hostmem;
     uint32_t intrmask;
     uint32_t intrstatus;
-    uint32_t doorbell;
 
     CharDriverState **eventfd_chr;
     CharDriverState *server_chr;
@@ -85,18 +93,15 @@
     MemoryRegion bar;
     MemoryRegion ivshmem;
     uint64_t ivshmem_size; /* size of shared memory region */
-    uint32_t ivshmem_attr;
     uint32_t ivshmem_64bit;
-    int shm_fd; /* shared memory file descriptor */
 
     Peer *peers;
-    int nb_peers; /* how many guests we have space for */
-    int max_peer; /* maximum numbered peer */
+    int nb_peers; /* how many peers we have space for */
 
     int vm_id;
     uint32_t vectors;
     uint32_t features;
-    EventfdEntry *eventfd_table;
+    MSIVector *msi_vectors;
 
     Error *migration_blocker;
 
@@ -119,12 +124,8 @@
     return (ivs->features & (1 << feature));
 }
 
-static inline bool is_power_of_two(uint64_t x) {
-    return (x & (x - 1)) == 0;
-}
-
 /* accessing registers - based on rtl8139 */
-static void ivshmem_update_irq(IVShmemState *s, int val)
+static void ivshmem_update_irq(IVShmemState *s)
 {
     PCIDevice *d = PCI_DEVICE(s);
     int isr;
@@ -145,7 +146,7 @@
 
     s->intrmask = val;
 
-    ivshmem_update_irq(s, val);
+    ivshmem_update_irq(s);
 }
 
 static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
@@ -163,7 +164,7 @@
 
     s->intrstatus = val;
 
-    ivshmem_update_irq(s, val);
+    ivshmem_update_irq(s);
 }
 
 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
@@ -173,7 +174,7 @@
     /* reading ISR clears all interrupts */
     s->intrstatus = 0;
 
-    ivshmem_update_irq(s, 0);
+    ivshmem_update_irq(s);
 
     return ret;
 }
@@ -201,7 +202,7 @@
 
         case DOORBELL:
             /* check that dest VM ID is reasonable */
-            if (dest > s->max_peer) {
+            if (dest >= s->nb_peers) {
                 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
                 break;
             }
@@ -210,10 +211,13 @@
             if (vector < s->peers[dest].nb_eventfds) {
                 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
                 event_notifier_set(&s->peers[dest].eventfds[vector]);
+            } else {
+                IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
+                                vector, dest);
             }
             break;
         default:
-            IVSHMEM_DPRINTF("Invalid VM Doorbell VM %d\n", dest);
+            IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr);
     }
 }
 
@@ -236,7 +240,7 @@
 
         case IVPOSITION:
             /* return my VM ID if the memory is mapped */
-            if (s->shm_fd > 0) {
+            if (memory_region_is_mapped(&s->ivshmem)) {
                 ret = s->vm_id;
             } else {
                 ret = -1;
@@ -265,14 +269,14 @@
 {
     IVShmemState *s = opaque;
 
-    ivshmem_IntrStatus_write(s, *buf);
+    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x size: %d\n", *buf, size);
 
-    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
+    ivshmem_IntrStatus_write(s, *buf);
 }
 
 static int ivshmem_can_receive(void * opaque)
 {
-    return 8;
+    return sizeof(int64_t);
 }
 
 static void ivshmem_event(void *opaque, int event)
@@ -282,11 +286,70 @@
 
 static void fake_irqfd(void *opaque, const uint8_t *buf, int size) {
 
-    EventfdEntry *entry = opaque;
+    MSIVector *entry = opaque;
     PCIDevice *pdev = entry->pdev;
+    IVShmemState *s = IVSHMEM(pdev);
+    int vector = entry - s->msi_vectors;
 
-    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, entry->vector);
-    msix_notify(pdev, entry->vector);
+    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
+    msix_notify(pdev, vector);
+}
+
+static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
+                                 MSIMessage msg)
+{
+    IVShmemState *s = IVSHMEM(dev);
+    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
+    MSIVector *v = &s->msi_vectors[vector];
+    int ret;
+
+    IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
+
+    ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
+}
+
+static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
+{
+    IVShmemState *s = IVSHMEM(dev);
+    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
+    int ret;
+
+    IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
+
+    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n,
+                                                s->msi_vectors[vector].virq);
+    if (ret != 0) {
+        error_report("remove_irqfd_notifier_gsi failed");
+    }
+}
+
+static void ivshmem_vector_poll(PCIDevice *dev,
+                                unsigned int vector_start,
+                                unsigned int vector_end)
+{
+    IVShmemState *s = IVSHMEM(dev);
+    unsigned int vector;
+
+    IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
+
+    vector_end = MIN(vector_end, s->vectors);
+
+    for (vector = vector_start; vector < vector_end; vector++) {
+        EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
+
+        if (!msix_is_masked(dev, vector)) {
+            continue;
+        }
+
+        if (event_notifier_test_and_clear(notifier)) {
+            msix_set_pending(dev, vector);
+        }
+    }
 }
 
 static CharDriverState* create_eventfd_chr_device(void * opaque, EventNotifier *n,
@@ -294,24 +357,26 @@
 {
     /* create a event character device based on the passed eventfd */
     IVShmemState *s = opaque;
-    CharDriverState * chr;
+    PCIDevice *pdev = PCI_DEVICE(s);
     int eventfd = event_notifier_get_fd(n);
+    CharDriverState *chr;
+
+    s->msi_vectors[vector].pdev = pdev;
 
     chr = qemu_chr_open_eventfd(eventfd);
 
     if (chr == NULL) {
-        error_report("creating eventfd for eventfd %d failed", eventfd);
-        exit(1);
+        error_report("creating chardriver for eventfd %d failed", eventfd);
+        return NULL;
     }
     qemu_chr_fe_claim_no_fail(chr);
 
     /* if MSI is supported we need multiple interrupts */
     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
-        s->eventfd_table[vector].pdev = PCI_DEVICE(s);
-        s->eventfd_table[vector].vector = vector;
+        s->msi_vectors[vector].pdev = PCI_DEVICE(s);
 
         qemu_chr_add_handlers(chr, ivshmem_can_receive, fake_irqfd,
-                      ivshmem_event, &s->eventfd_table[vector]);
+                      ivshmem_event, &s->msi_vectors[vector]);
     } else {
         qemu_chr_add_handlers(chr, ivshmem_can_receive, ivshmem_receive,
                       ivshmem_event, s);
@@ -321,22 +386,23 @@
 
 }
 
-static int check_shm_size(IVShmemState *s, int fd) {
+static int check_shm_size(IVShmemState *s, int fd, Error **errp)
+{
     /* check that the guest isn't going to try and map more memory than the
      * the object has allocated return -1 to indicate error */
 
     struct stat buf;
 
     if (fstat(fd, &buf) < 0) {
-        error_report("exiting: fstat on fd %d failed: %s",
-                     fd, strerror(errno));
+        error_setg(errp, "exiting: fstat on fd %d failed: %s",
+                   fd, strerror(errno));
         return -1;
     }
 
     if (s->ivshmem_size > buf.st_size) {
-        error_report("Requested memory size greater"
-                     " than shared object size (%" PRIu64 " > %" PRIu64")",
-                     s->ivshmem_size, (uint64_t)buf.st_size);
+        error_setg(errp, "Requested memory size greater"
+                   " than shared object size (%" PRIu64 " > %" PRIu64")",
+                   s->ivshmem_size, (uint64_t)buf.st_size);
         return -1;
     } else {
         return 0;
@@ -345,13 +411,16 @@
 
 /* create the shared memory BAR when we are not using the server, so we can
  * create the BAR and map the memory immediately */
-static void create_shared_memory_BAR(IVShmemState *s, int fd) {
-
+static int create_shared_memory_BAR(IVShmemState *s, int fd, uint8_t attr,
+                                    Error **errp)
+{
     void * ptr;
 
-    s->shm_fd = fd;
-
     ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+    if (ptr == MAP_FAILED) {
+        error_setg_errno(errp, errno, "Failed to mmap shared memory");
+        return -1;
+    }
 
     memory_region_init_ram_ptr(&s->ivshmem, OBJECT(s), "ivshmem.bar2",
                                s->ivshmem_size, ptr);
@@ -359,7 +428,9 @@
     memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
 
     /* region for shared memory */
-    pci_register_bar(PCI_DEVICE(s), 2, s->ivshmem_attr, &s->bar);
+    pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar);
+
+    return 0;
 }
 
 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
@@ -382,25 +453,26 @@
                               &s->peers[posn].eventfds[i]);
 }
 
-static void close_guest_eventfds(IVShmemState *s, int posn)
+static void close_peer_eventfds(IVShmemState *s, int posn)
 {
-    int i, guest_curr_max;
+    int i, n;
 
     if (!ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
         return;
     }
     if (posn < 0 || posn >= s->nb_peers) {
+        error_report("invalid peer %d", posn);
         return;
     }
 
-    guest_curr_max = s->peers[posn].nb_eventfds;
+    n = s->peers[posn].nb_eventfds;
 
     memory_region_transaction_begin();
-    for (i = 0; i < guest_curr_max; i++) {
+    for (i = 0; i < n; i++) {
         ivshmem_del_eventfd(s, posn, i);
     }
     memory_region_transaction_commit();
-    for (i = 0; i < guest_curr_max; i++) {
+    for (i = 0; i < n; i++) {
         event_notifier_cleanup(&s->peers[posn].eventfds[i]);
     }
 
@@ -409,125 +481,207 @@
 }
 
 /* this function increase the dynamic storage need to store data about other
- * guests */
-static int increase_dynamic_storage(IVShmemState *s, int new_min_size)
+ * peers */
+static int resize_peers(IVShmemState *s, int new_min_size)
 {
 
-    int j, old_nb_alloc;
+    int j, old_size;
 
-    /* check for integer overflow */
-    if (new_min_size >= INT_MAX / sizeof(Peer) - 1 || new_min_size <= 0) {
+    /* limit number of max peers */
+    if (new_min_size <= 0 || new_min_size > IVSHMEM_MAX_PEERS) {
         return -1;
     }
-
-    old_nb_alloc = s->nb_peers;
-
-    if (new_min_size >= s->nb_peers) {
-        /* +1 because #new_min_size is used as last array index */
-        s->nb_peers = new_min_size + 1;
-    } else {
+    if (new_min_size <= s->nb_peers) {
         return 0;
     }
 
-    IVSHMEM_DPRINTF("bumping storage to %d guests\n", s->nb_peers);
+    old_size = s->nb_peers;
+    s->nb_peers = new_min_size;
+
+    IVSHMEM_DPRINTF("bumping storage to %d peers\n", s->nb_peers);
+
     s->peers = g_realloc(s->peers, s->nb_peers * sizeof(Peer));
 
-    /* zero out new pointers */
-    for (j = old_nb_alloc; j < s->nb_peers; j++) {
-        s->peers[j].eventfds = NULL;
+    for (j = old_size; j < s->nb_peers; j++) {
+        s->peers[j].eventfds = g_new0(EventNotifier, s->vectors);
         s->peers[j].nb_eventfds = 0;
     }
 
     return 0;
 }
 
+static bool fifo_update_and_get(IVShmemState *s, const uint8_t *buf, int size,
+                                void *data, size_t len)
+{
+    const uint8_t *p;
+    uint32_t num;
+
+    assert(len <= sizeof(int64_t)); /* limitation of the fifo */
+    if (fifo8_is_empty(&s->incoming_fifo) && size == len) {
+        memcpy(data, buf, size);
+        return true;
+    }
+
+    IVSHMEM_DPRINTF("short read of %d bytes\n", size);
+
+    num = MIN(size, sizeof(int64_t) - fifo8_num_used(&s->incoming_fifo));
+    fifo8_push_all(&s->incoming_fifo, buf, num);
+
+    if (fifo8_num_used(&s->incoming_fifo) < len) {
+        assert(num == 0);
+        return false;
+    }
+
+    size -= num;
+    buf += num;
+    p = fifo8_pop_buf(&s->incoming_fifo, len, &num);
+    assert(num == len);
+
+    memcpy(data, p, len);
+
+    if (size > 0) {
+        fifo8_push_all(&s->incoming_fifo, buf, size);
+    }
+
+    return true;
+}
+
+static bool fifo_update_and_get_i64(IVShmemState *s,
+                                    const uint8_t *buf, int size, int64_t *i64)
+{
+    if (fifo_update_and_get(s, buf, size, i64, sizeof(*i64))) {
+        *i64 = GINT64_FROM_LE(*i64);
+        return true;
+    }
+
+    return false;
+}
+
+static int ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector)
+{
+    PCIDevice *pdev = PCI_DEVICE(s);
+    MSIMessage msg = msix_get_message(pdev, vector);
+    int ret;
+
+    IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
+
+    if (s->msi_vectors[vector].pdev != NULL) {
+        return 0;
+    }
+
+    ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev);
+    if (ret < 0) {
+        error_report("ivshmem: kvm_irqchip_add_msi_route failed");
+        return -1;
+    }
+
+    s->msi_vectors[vector].virq = ret;
+    s->msi_vectors[vector].pdev = pdev;
+
+    return 0;
+}
+
+static void setup_interrupt(IVShmemState *s, int vector)
+{
+    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
+    bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
+        ivshmem_has_feature(s, IVSHMEM_MSI);
+    PCIDevice *pdev = PCI_DEVICE(s);
+
+    IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
+
+    if (!with_irqfd) {
+        IVSHMEM_DPRINTF("with eventfd");
+        s->eventfd_chr[vector] = create_eventfd_chr_device(s, n, vector);
+    } else if (msix_enabled(pdev)) {
+        IVSHMEM_DPRINTF("with irqfd");
+        if (ivshmem_add_kvm_msi_virq(s, vector) < 0) {
+            return;
+        }
+
+        if (!msix_is_masked(pdev, vector)) {
+            kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
+                                               s->msi_vectors[vector].virq);
+        }
+    } else {
+        /* it will be delayed until msix is enabled, in write_config */
+        IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled");
+    }
+}
+
 static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
 {
     IVShmemState *s = opaque;
-    int incoming_fd, tmp_fd;
-    int guest_max_eventfd;
-    long incoming_posn;
+    int incoming_fd;
+    int new_eventfd;
+    int64_t incoming_posn;
+    Error *err = NULL;
+    Peer *peer;
 
-    if (fifo8_is_empty(&s->incoming_fifo) && size == sizeof(incoming_posn)) {
-        memcpy(&incoming_posn, buf, size);
-    } else {
-        const uint8_t *p;
-        uint32_t num;
-
-        IVSHMEM_DPRINTF("short read of %d bytes\n", size);
-        num = MAX(size, sizeof(long) - fifo8_num_used(&s->incoming_fifo));
-        fifo8_push_all(&s->incoming_fifo, buf, num);
-        if (fifo8_num_used(&s->incoming_fifo) < sizeof(incoming_posn)) {
-            return;
-        }
-        size -= num;
-        buf += num;
-        p = fifo8_pop_buf(&s->incoming_fifo, sizeof(incoming_posn), &num);
-        g_assert(num == sizeof(incoming_posn));
-        memcpy(&incoming_posn, p, sizeof(incoming_posn));
-        if (size > 0) {
-            fifo8_push_all(&s->incoming_fifo, buf, size);
-        }
+    if (!fifo_update_and_get_i64(s, buf, size, &incoming_posn)) {
+        return;
     }
 
     if (incoming_posn < -1) {
-        IVSHMEM_DPRINTF("invalid incoming_posn %ld\n", incoming_posn);
+        IVSHMEM_DPRINTF("invalid incoming_posn %" PRId64 "\n", incoming_posn);
         return;
     }
 
     /* pick off s->server_chr->msgfd and store it, posn should accompany msg */
-    tmp_fd = qemu_chr_fe_get_msgfd(s->server_chr);
-    IVSHMEM_DPRINTF("posn is %ld, fd is %d\n", incoming_posn, tmp_fd);
+    incoming_fd = qemu_chr_fe_get_msgfd(s->server_chr);
+    IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n",
+                    incoming_posn, incoming_fd);
 
-    /* make sure we have enough space for this guest */
+    /* make sure we have enough space for this peer */
     if (incoming_posn >= s->nb_peers) {
-        if (increase_dynamic_storage(s, incoming_posn) < 0) {
-            error_report("increase_dynamic_storage() failed");
-            if (tmp_fd != -1) {
-                close(tmp_fd);
+        if (resize_peers(s, incoming_posn + 1) < 0) {
+            error_report("failed to resize peers array");
+            if (incoming_fd != -1) {
+                close(incoming_fd);
             }
             return;
         }
     }
 
-    if (tmp_fd == -1) {
-        /* if posn is positive and unseen before then this is our posn*/
-        if ((incoming_posn >= 0) &&
-                            (s->peers[incoming_posn].eventfds == NULL)) {
-            /* receive our posn */
-            s->vm_id = incoming_posn;
-            return;
-        } else {
-            /* otherwise an fd == -1 means an existing guest has gone away */
-            IVSHMEM_DPRINTF("posn %ld has gone away\n", incoming_posn);
-            close_guest_eventfds(s, incoming_posn);
-            return;
-        }
-    }
-
-    /* because of the implementation of get_msgfd, we need a dup */
-    incoming_fd = dup(tmp_fd);
+    peer = &s->peers[incoming_posn];
 
     if (incoming_fd == -1) {
-        error_report("could not allocate file descriptor %s", strerror(errno));
-        close(tmp_fd);
+        /* if posn is positive and unseen before then this is our posn*/
+        if (incoming_posn >= 0 && s->vm_id == -1) {
+            /* receive our posn */
+            s->vm_id = incoming_posn;
+        } else {
+            /* otherwise an fd == -1 means an existing peer has gone away */
+            IVSHMEM_DPRINTF("posn %" PRId64 " has gone away\n", incoming_posn);
+            close_peer_eventfds(s, incoming_posn);
+        }
         return;
     }
 
     /* if the position is -1, then it's shared memory region fd */
     if (incoming_posn == -1) {
-
         void * map_ptr;
 
-        s->max_peer = 0;
+        if (memory_region_is_mapped(&s->ivshmem)) {
+            error_report("shm already initialized");
+            close(incoming_fd);
+            return;
+        }
 
-        if (check_shm_size(s, incoming_fd) == -1) {
-            exit(1);
+        if (check_shm_size(s, incoming_fd, &err) == -1) {
+            error_report_err(err);
+            close(incoming_fd);
+            return;
         }
 
         /* mmap the region and map into the BAR2 */
         map_ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED,
                                                             incoming_fd, 0);
+        if (map_ptr == MAP_FAILED) {
+            error_report("Failed to mmap shared memory %s", strerror(errno));
+            close(incoming_fd);
+            return;
+        }
         memory_region_init_ram_ptr(&s->ivshmem, OBJECT(s),
                                    "ivshmem.bar2", s->ivshmem_size, map_ptr);
         vmstate_register_ram(&s->ivshmem, DEVICE(s));
@@ -537,46 +691,61 @@
 
         memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
 
-        /* only store the fd if it is successfully mapped */
-        s->shm_fd = incoming_fd;
-
+        close(incoming_fd);
         return;
     }
 
-    /* each guest has an array of eventfds, and we keep track of how many
-     * guests for each VM */
-    guest_max_eventfd = s->peers[incoming_posn].nb_eventfds;
-
-    if (guest_max_eventfd == 0) {
-        /* one eventfd per MSI vector */
-        s->peers[incoming_posn].eventfds = g_new(EventNotifier, s->vectors);
+    /* each peer has an associated array of eventfds, and we keep
+     * track of how many eventfds received so far */
+    /* get a new eventfd: */
+    if (peer->nb_eventfds >= s->vectors) {
+        error_report("Too many eventfd received, device has %d vectors",
+                     s->vectors);
+        close(incoming_fd);
+        return;
     }
 
-    /* this is an eventfd for a particular guest VM */
-    IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn,
-                    guest_max_eventfd, incoming_fd);
-    event_notifier_init_fd(&s->peers[incoming_posn].eventfds[guest_max_eventfd],
-                           incoming_fd);
+    new_eventfd = peer->nb_eventfds++;
 
-    /* increment count for particular guest */
-    s->peers[incoming_posn].nb_eventfds++;
-
-    /* keep track of the maximum VM ID */
-    if (incoming_posn > s->max_peer) {
-        s->max_peer = incoming_posn;
-    }
+    /* this is an eventfd for a particular peer VM */
+    IVSHMEM_DPRINTF("eventfds[%" PRId64 "][%d] = %d\n", incoming_posn,
+                    new_eventfd, incoming_fd);
+    event_notifier_init_fd(&peer->eventfds[new_eventfd], incoming_fd);
+    fcntl_setfl(incoming_fd, O_NONBLOCK); /* msix/irqfd poll non block */
 
     if (incoming_posn == s->vm_id) {
-        s->eventfd_chr[guest_max_eventfd] = create_eventfd_chr_device(s,
-                   &s->peers[s->vm_id].eventfds[guest_max_eventfd],
-                   guest_max_eventfd);
+        setup_interrupt(s, new_eventfd);
     }
 
     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
-        ivshmem_add_eventfd(s, incoming_posn, guest_max_eventfd);
+        ivshmem_add_eventfd(s, incoming_posn, new_eventfd);
     }
 }
 
+static void ivshmem_check_version(void *opaque, const uint8_t * buf, int size)
+{
+    IVShmemState *s = opaque;
+    int tmp;
+    int64_t version;
+
+    if (!fifo_update_and_get_i64(s, buf, size, &version)) {
+        return;
+    }
+
+    tmp = qemu_chr_fe_get_msgfd(s->server_chr);
+    if (tmp != -1 || version != IVSHMEM_PROTOCOL_VERSION) {
+        fprintf(stderr, "incompatible version, you are connecting to a ivshmem-"
+                "server using a different protocol please check your setup\n");
+        qemu_chr_delete(s->server_chr);
+        s->server_chr = NULL;
+        return;
+    }
+
+    IVSHMEM_DPRINTF("version check ok, switch to real chardev handler\n");
+    qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read,
+                          ivshmem_event, s);
+}
+
 /* Select the MSI-X vectors used by device.
  * ivshmem maps events to vectors statically, so
  * we just enable all vectors on init and after reset. */
@@ -585,6 +754,7 @@
     PCIDevice *d = PCI_DEVICE(s);
     int i;
 
+    IVSHMEM_DPRINTF("%s, msix present: %d\n", __func__, msix_present(d));
     if (!msix_present(d)) {
         return;
     }
@@ -599,128 +769,127 @@
     IVShmemState *s = IVSHMEM(d);
 
     s->intrstatus = 0;
+    s->intrmask = 0;
     ivshmem_use_msix(s);
 }
 
-static uint64_t ivshmem_get_size(IVShmemState * s) {
-
-    uint64_t value;
-    char *ptr;
-
-    value = strtoull(s->sizearg, &ptr, 10);
-    switch (*ptr) {
-        case 0: case 'M': case 'm':
-            value <<= 20;
-            break;
-        case 'G': case 'g':
-            value <<= 30;
-            break;
-        default:
-            error_report("invalid ram size: %s", s->sizearg);
-            exit(1);
-    }
-
-    /* BARs must be a power of 2 */
-    if (!is_power_of_two(value)) {
-        error_report("size must be power of 2");
-        exit(1);
-    }
-
-    return value;
-}
-
-static void ivshmem_setup_msi(IVShmemState * s)
+static int ivshmem_setup_msi(IVShmemState * s)
 {
     if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1)) {
-        IVSHMEM_DPRINTF("msix initialization failed\n");
-        exit(1);
+        return -1;
     }
 
     IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
 
     /* allocate QEMU char devices for receiving interrupts */
-    s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry));
+    s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector));
 
     ivshmem_use_msix(s);
-}
-
-static void ivshmem_save(QEMUFile* f, void *opaque)
-{
-    IVShmemState *proxy = opaque;
-    PCIDevice *pci_dev = PCI_DEVICE(proxy);
-
-    IVSHMEM_DPRINTF("ivshmem_save\n");
-    pci_device_save(pci_dev, f);
-
-    if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
-        msix_save(pci_dev, f);
-    } else {
-        qemu_put_be32(f, proxy->intrstatus);
-        qemu_put_be32(f, proxy->intrmask);
-    }
-
-}
-
-static int ivshmem_load(QEMUFile* f, void *opaque, int version_id)
-{
-    IVSHMEM_DPRINTF("ivshmem_load\n");
-
-    IVShmemState *proxy = opaque;
-    PCIDevice *pci_dev = PCI_DEVICE(proxy);
-    int ret;
-
-    if (version_id > 0) {
-        return -EINVAL;
-    }
-
-    if (proxy->role_val == IVSHMEM_PEER) {
-        error_report("'peer' devices are not migratable");
-        return -EINVAL;
-    }
-
-    ret = pci_device_load(pci_dev, f);
-    if (ret) {
-        return ret;
-    }
-
-    if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
-        msix_load(pci_dev, f);
-	ivshmem_use_msix(proxy);
-    } else {
-        proxy->intrstatus = qemu_get_be32(f);
-        proxy->intrmask = qemu_get_be32(f);
-    }
-
     return 0;
 }
 
-static void ivshmem_write_config(PCIDevice *pci_dev, uint32_t address,
-				 uint32_t val, int len)
+static void ivshmem_enable_irqfd(IVShmemState *s)
 {
-    pci_default_write_config(pci_dev, address, val, len);
+    PCIDevice *pdev = PCI_DEVICE(s);
+    int i;
+
+    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
+        ivshmem_add_kvm_msi_virq(s, i);
+    }
+
+    if (msix_set_vector_notifiers(pdev,
+                                  ivshmem_vector_unmask,
+                                  ivshmem_vector_mask,
+                                  ivshmem_vector_poll)) {
+        error_report("ivshmem: msix_set_vector_notifiers failed");
+    }
 }
 
-static int pci_ivshmem_init(PCIDevice *dev)
+static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
+{
+    IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
+
+    if (s->msi_vectors[vector].pdev == NULL) {
+        return;
+    }
+
+    /* it was cleaned when masked in the frontend. */
+    kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
+
+    s->msi_vectors[vector].pdev = NULL;
+}
+
+static void ivshmem_disable_irqfd(IVShmemState *s)
+{
+    PCIDevice *pdev = PCI_DEVICE(s);
+    int i;
+
+    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
+        ivshmem_remove_kvm_msi_virq(s, i);
+    }
+
+    msix_unset_vector_notifiers(pdev);
+}
+
+static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
+                                 uint32_t val, int len)
+{
+    IVShmemState *s = IVSHMEM(pdev);
+    int is_enabled, was_enabled = msix_enabled(pdev);
+
+    pci_default_write_config(pdev, address, val, len);
+    is_enabled = msix_enabled(pdev);
+
+    if (kvm_msi_via_irqfd_enabled() && s->vm_id != -1) {
+        if (!was_enabled && is_enabled) {
+            ivshmem_enable_irqfd(s);
+        } else if (was_enabled && !is_enabled) {
+            ivshmem_disable_irqfd(s);
+        }
+    }
+}
+
+static void pci_ivshmem_realize(PCIDevice *dev, Error **errp)
 {
     IVShmemState *s = IVSHMEM(dev);
     uint8_t *pci_conf;
+    uint8_t attr = PCI_BASE_ADDRESS_SPACE_MEMORY |
+        PCI_BASE_ADDRESS_MEM_PREFETCH;
 
-    if (s->sizearg == NULL)
-        s->ivshmem_size = 4 << 20; /* 4 MB default */
-    else {
-        s->ivshmem_size = ivshmem_get_size(s);
+    if (!!s->server_chr + !!s->shmobj + !!s->hostmem != 1) {
+        error_setg(errp, "You must specify either a shmobj, a chardev"
+                   " or a hostmem");
+        return;
     }
 
-    fifo8_create(&s->incoming_fifo, sizeof(long));
+    if (s->hostmem) {
+        MemoryRegion *mr;
 
-    register_savevm(DEVICE(dev), "ivshmem", 0, 0, ivshmem_save, ivshmem_load,
-                                                                        dev);
+        if (s->sizearg) {
+            g_warning("size argument ignored with hostmem");
+        }
+
+        mr = host_memory_backend_get_memory(s->hostmem, errp);
+        s->ivshmem_size = memory_region_size(mr);
+    } else if (s->sizearg == NULL) {
+        s->ivshmem_size = 4 << 20; /* 4 MB default */
+    } else {
+        char *end;
+        int64_t size = qemu_strtosz(s->sizearg, &end);
+        if (size < 0 || *end != '\0' || !is_power_of_2(size)) {
+            error_setg(errp, "Invalid size %s", s->sizearg);
+            return;
+        }
+        s->ivshmem_size = size;
+    }
+
+    fifo8_create(&s->incoming_fifo, sizeof(int64_t));
 
     /* IRQFD requires MSI */
     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
         !ivshmem_has_feature(s, IVSHMEM_MSI)) {
-        error_report("ioeventfd/irqfd requires MSI");
-        exit(1);
+        error_setg(errp, "ioeventfd/irqfd requires MSI");
+        return;
     }
 
     /* check that role is reasonable */
@@ -730,8 +899,8 @@
         } else if (strncmp(s->role, "master", 7) == 0) {
             s->role_val = IVSHMEM_MASTER;
         } else {
-            error_report("'role' must be 'peer' or 'master'");
-            exit(1);
+            error_setg(errp, "'role' must be 'peer' or 'master'");
+            return;
         }
     } else {
         s->role_val = IVSHMEM_MASTER; /* default */
@@ -748,8 +917,6 @@
 
     pci_config_set_interrupt_pin(pci_conf, 1);
 
-    s->shm_fd = 0;
-
     memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
                           "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
 
@@ -758,51 +925,51 @@
                      &s->ivshmem_mmio);
 
     memory_region_init(&s->bar, OBJECT(s), "ivshmem-bar2-container", s->ivshmem_size);
-    s->ivshmem_attr = PCI_BASE_ADDRESS_SPACE_MEMORY |
-        PCI_BASE_ADDRESS_MEM_PREFETCH;
     if (s->ivshmem_64bit) {
-        s->ivshmem_attr |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+        attr |= PCI_BASE_ADDRESS_MEM_TYPE_64;
     }
 
-    if ((s->server_chr != NULL) &&
-                        (strncmp(s->server_chr->filename, "unix:", 5) == 0)) {
+    if (s->hostmem != NULL) {
+        MemoryRegion *mr;
+
+        IVSHMEM_DPRINTF("using hostmem\n");
+
+        mr = host_memory_backend_get_memory(MEMORY_BACKEND(s->hostmem), errp);
+        vmstate_register_ram(mr, DEVICE(s));
+        memory_region_add_subregion(&s->bar, 0, mr);
+        pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar);
+    } else if (s->server_chr != NULL) {
+        if (strncmp(s->server_chr->filename, "unix:", 5)) {
+            error_setg(errp, "chardev is not a unix client socket");
+            return;
+        }
+
         /* if we get a UNIX socket as the parameter we will talk
          * to the ivshmem server to receive the memory region */
 
-        if (s->shmobj != NULL) {
-            error_report("WARNING: do not specify both 'chardev' "
-                         "and 'shm' with ivshmem");
-        }
-
         IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
                         s->server_chr->filename);
 
-        if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
-            ivshmem_setup_msi(s);
+        if (ivshmem_has_feature(s, IVSHMEM_MSI) &&
+            ivshmem_setup_msi(s)) {
+            error_setg(errp, "msix initialization failed");
+            return;
         }
 
-        /* we allocate enough space for 16 guests and grow as needed */
-        s->nb_peers = 16;
+        /* we allocate enough space for 16 peers and grow as needed */
+        resize_peers(s, 16);
         s->vm_id = -1;
 
-        /* allocate/initialize space for interrupt handling */
-        s->peers = g_malloc0(s->nb_peers * sizeof(Peer));
-
-        pci_register_bar(dev, 2, s->ivshmem_attr, &s->bar);
+        pci_register_bar(dev, 2, attr, &s->bar);
 
         s->eventfd_chr = g_malloc0(s->vectors * sizeof(CharDriverState *));
 
-        qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read,
-                     ivshmem_event, s);
+        qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive,
+                              ivshmem_check_version, ivshmem_event, s);
     } else {
         /* just map the file immediately, we're not using a server */
         int fd;
 
-        if (s->shmobj == NULL) {
-            error_report("Must specify 'chardev' or 'shm' to ivshmem");
-            exit(1);
-        }
-
         IVSHMEM_DPRINTF("using shm_open (shm object = %s)\n", s->shmobj);
 
         /* try opening with O_EXCL and if it succeeds zero the memory
@@ -816,39 +983,155 @@
 
         } else if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR,
                         S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
-            error_report("could not open shared file");
-            exit(1);
-
+            error_setg(errp, "could not open shared file");
+            return;
         }
 
-        if (check_shm_size(s, fd) == -1) {
-            exit(1);
+        if (check_shm_size(s, fd, errp) == -1) {
+            return;
         }
 
-        create_shared_memory_BAR(s, fd);
-
+        create_shared_memory_BAR(s, fd, attr, errp);
+        close(fd);
     }
-
-    dev->config_write = ivshmem_write_config;
-
-    return 0;
 }
 
-static void pci_ivshmem_uninit(PCIDevice *dev)
+static void pci_ivshmem_exit(PCIDevice *dev)
 {
     IVShmemState *s = IVSHMEM(dev);
+    int i;
+
+    fifo8_destroy(&s->incoming_fifo);
 
     if (s->migration_blocker) {
         migrate_del_blocker(s->migration_blocker);
         error_free(s->migration_blocker);
     }
 
-    memory_region_del_subregion(&s->bar, &s->ivshmem);
-    vmstate_unregister_ram(&s->ivshmem, DEVICE(dev));
-    unregister_savevm(DEVICE(dev), "ivshmem", s);
-    fifo8_destroy(&s->incoming_fifo);
+    if (memory_region_is_mapped(&s->ivshmem)) {
+        if (!s->hostmem) {
+            void *addr = memory_region_get_ram_ptr(&s->ivshmem);
+
+            if (munmap(addr, s->ivshmem_size) == -1) {
+                error_report("Failed to munmap shared memory %s",
+                             strerror(errno));
+            }
+        }
+
+        vmstate_unregister_ram(&s->ivshmem, DEVICE(dev));
+        memory_region_del_subregion(&s->bar, &s->ivshmem);
+    }
+
+    if (s->eventfd_chr) {
+        for (i = 0; i < s->vectors; i++) {
+            if (s->eventfd_chr[i]) {
+                qemu_chr_free(s->eventfd_chr[i]);
+            }
+        }
+        g_free(s->eventfd_chr);
+    }
+
+    if (s->peers) {
+        for (i = 0; i < s->nb_peers; i++) {
+            close_peer_eventfds(s, i);
+        }
+        g_free(s->peers);
+    }
+
+    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
+        msix_uninit_exclusive_bar(dev);
+    }
+
+    g_free(s->msi_vectors);
 }
 
+static bool test_msix(void *opaque, int version_id)
+{
+    IVShmemState *s = opaque;
+
+    return ivshmem_has_feature(s, IVSHMEM_MSI);
+}
+
+static bool test_no_msix(void *opaque, int version_id)
+{
+    return !test_msix(opaque, version_id);
+}
+
+static int ivshmem_pre_load(void *opaque)
+{
+    IVShmemState *s = opaque;
+
+    if (s->role_val == IVSHMEM_PEER) {
+        error_report("'peer' devices are not migratable");
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static int ivshmem_post_load(void *opaque, int version_id)
+{
+    IVShmemState *s = opaque;
+
+    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
+        ivshmem_use_msix(s);
+    }
+
+    return 0;
+}
+
+static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id)
+{
+    IVShmemState *s = opaque;
+    PCIDevice *pdev = PCI_DEVICE(s);
+    int ret;
+
+    IVSHMEM_DPRINTF("ivshmem_load_old\n");
+
+    if (version_id != 0) {
+        return -EINVAL;
+    }
+
+    if (s->role_val == IVSHMEM_PEER) {
+        error_report("'peer' devices are not migratable");
+        return -EINVAL;
+    }
+
+    ret = pci_device_load(pdev, f);
+    if (ret) {
+        return ret;
+    }
+
+    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
+        msix_load(pdev, f);
+        ivshmem_use_msix(s);
+    } else {
+        s->intrstatus = qemu_get_be32(f);
+        s->intrmask = qemu_get_be32(f);
+    }
+
+    return 0;
+}
+
+static const VMStateDescription ivshmem_vmsd = {
+    .name = "ivshmem",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_load = ivshmem_pre_load,
+    .post_load = ivshmem_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
+
+        VMSTATE_MSIX_TEST(parent_obj, IVShmemState, test_msix),
+        VMSTATE_UINT32_TEST(intrstatus, IVShmemState, test_no_msix),
+        VMSTATE_UINT32_TEST(intrmask, IVShmemState, test_no_msix),
+
+        VMSTATE_END_OF_LIST()
+    },
+    .load_state_old = ivshmem_load_old,
+    .minimum_version_id_old = 0
+};
+
 static Property ivshmem_properties[] = {
     DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
     DEFINE_PROP_STRING("size", IVShmemState, sizearg),
@@ -866,20 +1149,50 @@
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
-    k->init = pci_ivshmem_init;
-    k->exit = pci_ivshmem_uninit;
+    k->realize = pci_ivshmem_realize;
+    k->exit = pci_ivshmem_exit;
+    k->config_write = ivshmem_write_config;
     k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
     k->device_id = PCI_DEVICE_ID_IVSHMEM;
     k->class_id = PCI_CLASS_MEMORY_RAM;
     dc->reset = ivshmem_reset;
     dc->props = ivshmem_properties;
+    dc->vmsd = &ivshmem_vmsd;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->desc = "Inter-VM shared memory";
+}
+
+static void ivshmem_check_memdev_is_busy(Object *obj, const char *name,
+                                         Object *val, Error **errp)
+{
+    MemoryRegion *mr;
+
+    mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), errp);
+    if (memory_region_is_mapped(mr)) {
+        char *path = object_get_canonical_path_component(val);
+        error_setg(errp, "can't use already busy memdev: %s", path);
+        g_free(path);
+    } else {
+        qdev_prop_allow_set_link_before_realize(obj, name, val, errp);
+    }
+}
+
+static void ivshmem_init(Object *obj)
+{
+    IVShmemState *s = IVSHMEM(obj);
+
+    object_property_add_link(obj, IVSHMEM_MEMDEV_PROP, TYPE_MEMORY_BACKEND,
+                             (Object **)&s->hostmem,
+                             ivshmem_check_memdev_is_busy,
+                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             &error_abort);
 }
 
 static const TypeInfo ivshmem_info = {
     .name          = TYPE_IVSHMEM,
     .parent        = TYPE_PCI_DEVICE,
     .instance_size = sizeof(IVShmemState),
+    .instance_init = ivshmem_init,
     .class_init    = ivshmem_class_init,
 };
 

diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c
index 5d7043e..0fd75b3 100644
--- a/hw/misc/macio/cuda.c
+++ b/hw/misc/macio/cuda.c

@@ -738,6 +738,7 @@
     dc->reset = cuda_reset;
     dc->vmsd = &vmstate_cuda;
     dc->props = cuda_properties;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo cuda_type_info = {

diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index c661f86..adb990e 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c

@@ -393,6 +393,7 @@
     k->vendor_id = PCI_VENDOR_ID_APPLE;
     k->class_id = PCI_CLASS_OTHERS << 8;
     dc->props = macio_properties;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo macio_oldworld_type_info = {

diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index 1127223..3639fc1 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c

@@ -964,6 +964,7 @@
 {
     int i;
     CadenceGEMState *s = CADENCE_GEM(d);
+    const uint8_t *a;
 
     DB_PRINT("\n");
 
@@ -982,6 +983,11 @@
     s->regs[GEM_DESCONF5] = 0x002f2145;
     s->regs[GEM_DESCONF6] = 0x00000200;
 
+    /* Set MAC address */
+    a = &s->conf.macaddr.a[0];
+    s->regs[GEM_SPADDR1LO] = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24);
+    s->regs[GEM_SPADDR1HI] = a[4] | (a[5] << 8);
+
     for (i = 0; i < 4; i++) {
         s->sar_active[i] = false;
     }

diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
index 725f3fa..c50bf7f 100644
--- a/hw/net/imx_fec.c
+++ b/hw/net/imx_fec.c

@@ -27,31 +27,29 @@
 /* For crc32 */
 #include <zlib.h>
 
-#ifndef IMX_FEC_DEBUG
-#define IMX_FEC_DEBUG          0
+#ifndef DEBUG_IMX_FEC
+#define DEBUG_IMX_FEC 0
 #endif
 
-#ifndef IMX_PHY_DEBUG
-#define IMX_PHY_DEBUG          0
-#endif
-
-#if IMX_FEC_DEBUG
-#define FEC_PRINTF(fmt, ...) \
-    do { fprintf(stderr, "%s[%s]: " fmt , TYPE_IMX_FEC, __func__, \
-                 ## __VA_ARGS__); \
+#define FEC_PRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_FEC) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_FEC, \
+                                             __func__, ##args); \
+        } \
     } while (0)
-#else
-#define FEC_PRINTF(fmt, ...) do {} while (0)
+
+#ifndef DEBUG_IMX_PHY
+#define DEBUG_IMX_PHY 0
 #endif
 
-#if IMX_PHY_DEBUG
-#define PHY_PRINTF(fmt, ...) \
-    do { fprintf(stderr, "%s.phy[%s]: " fmt , TYPE_IMX_FEC, __func__, \
-                 ## __VA_ARGS__); \
+#define PHY_PRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_PHY) { \
+            fprintf(stderr, "[%s.phy]%s: " fmt , TYPE_IMX_FEC, \
+                                                 __func__, ##args); \
+        } \
     } while (0)
-#else
-#define PHY_PRINTF(fmt, ...) do {} while (0)
-#endif
 
 static const VMStateDescription vmstate_imx_fec = {
     .name = TYPE_IMX_FEC,
@@ -182,12 +180,12 @@
     case 18:
     case 27:
     case 31:
-        qemu_log_mask(LOG_UNIMP, "%s.phy[%s]: reg %d not implemented\n",
+        qemu_log_mask(LOG_UNIMP, "[%s.phy]%s: reg %d not implemented\n",
                       TYPE_IMX_FEC, __func__, reg);
         val = 0;
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad address at offset %d\n",
                       TYPE_IMX_FEC, __func__, reg);
         val = 0;
         break;
@@ -230,11 +228,11 @@
     case 18:
     case 27:
     case 31:
-        qemu_log_mask(LOG_UNIMP, "%s.phy[%s]: reg %d not implemented\n",
+        qemu_log_mask(LOG_UNIMP, "[%s.phy)%s: reg %d not implemented\n",
                       TYPE_IMX_FEC, __func__, reg);
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s.phy[%s]: Bad address at offset %d\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad address at offset %d\n",
                       TYPE_IMX_FEC, __func__, reg);
         break;
     }
@@ -357,7 +355,7 @@
 {
     IMXFECState *s = IMX_FEC(opaque);
 
-    FEC_PRINTF("reading from @ 0x%03x\n", (int)addr);
+    FEC_PRINTF("reading from @ 0x%" HWADDR_PRIx "\n", addr);
 
     switch (addr & 0x3ff) {
     case 0x004:
@@ -417,8 +415,8 @@
     case 0x308:
         return s->miigsk_enr;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_FEC, __func__, (int)addr);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_FEC, __func__, addr);
         return 0;
     }
 }
@@ -428,7 +426,7 @@
 {
     IMXFECState *s = IMX_FEC(opaque);
 
-    FEC_PRINTF("writing 0x%08x @ 0x%03x\n", (int)value, (int)addr);
+    FEC_PRINTF("writing 0x%08x @ 0x%" HWADDR_PRIx "\n", (int)value, addr);
 
     switch (addr & 0x3ff) {
     case 0x004: /* EIR */
@@ -530,8 +528,8 @@
         s->miigsk_enr = (value & 0x2) ? 0x6 : 0;
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_FEC, __func__, (int)addr);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_FEC, __func__, addr);
         break;
     }
 
@@ -561,7 +559,7 @@
     FEC_PRINTF("len %d\n", (int)size);
 
     if (!s->rx_enabled) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Unexpected packet\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n",
                       TYPE_IMX_FEC, __func__);
         return 0;
     }
@@ -592,14 +590,16 @@
              * save the remainder for when more RX buffers are
              * available, or flag an error.
              */
-            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Lost end of frame\n",
+            qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Lost end of frame\n",
                           TYPE_IMX_FEC, __func__);
             break;
         }
         buf_len = (size <= s->emrbr) ? size : s->emrbr;
         bd.length = buf_len;
         size -= buf_len;
-        FEC_PRINTF("rx_bd %x length %d\n", addr, bd.length);
+
+        FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length);
+
         /* The last 4 bytes are the CRC.  */
         if (size < 4) {
             buf_len += size - 4;

diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 3c5e10d..5e3a233 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c

@@ -1289,6 +1289,10 @@
 static void vmxnet3_fill_stats(VMXNET3State *s)
 {
     int i;
+
+    if (!s->device_active)
+        return;
+
     for (i = 0; i < s->txq_num; i++) {
         cpu_physical_memory_write(s->txq_descr[i].tx_stats_pa,
                                   &s->txq_descr[i].txq_stats,

diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index d35f8a3..9f16566 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c

@@ -123,6 +123,7 @@
     dc->reset = macio_nvram_reset;
     dc->vmsd = &vmstate_macio_nvram;
     dc->props = macio_nvram_properties;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
 
 static const TypeInfo macio_nvram_type_info = {

diff --git a/hw/pci-host/grackle.c b/hw/pci-host/grackle.c
index bfe707a..ea31b72 100644
--- a/hw/pci-host/grackle.c
+++ b/hw/pci-host/grackle.c

@@ -146,8 +146,10 @@
 static void pci_grackle_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->init = pci_grackle_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo grackle_pci_host_info = {

diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c
index f0144eb..215b64f 100644
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c

@@ -446,8 +446,10 @@
 static void pci_unin_main_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     sbc->init = pci_unin_main_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo pci_unin_main_info = {
@@ -460,8 +462,10 @@
 static void pci_u3_agp_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     sbc->init = pci_u3_agp_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo pci_u3_agp_info = {
@@ -474,8 +478,10 @@
 static void pci_unin_agp_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     sbc->init = pci_unin_agp_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo pci_unin_agp_info = {
@@ -488,8 +494,10 @@
 static void pci_unin_internal_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     sbc->init = pci_unin_internal_init_device;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
 static const TypeInfo pci_unin_internal_info = {

diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 2fdada4..64c93d8 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c

@@ -200,8 +200,14 @@
     return pci_get_long(dev->msix_pba + addr);
 }
 
+static void msix_pba_mmio_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+}
+
 static const MemoryRegionOps msix_pba_mmio_ops = {
     .read = msix_pba_mmio_read,
+    .write = msix_pba_mmio_write,
     .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index b0bf540..168b9cc 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c

@@ -847,6 +847,9 @@
     PCIConfigWriteFunc *config_write = pc->config_write;
     Error *local_err = NULL;
     AddressSpace *dma_as;
+    DeviceState *dev = DEVICE(pci_dev);
+
+    pci_dev->bus = bus;
 
     if (devfn < 0) {
         for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
@@ -864,9 +867,17 @@
                    PCI_SLOT(devfn), PCI_FUNC(devfn), name,
                    bus->devices[devfn]->name);
         return NULL;
+    } else if (dev->hotplugged &&
+               pci_get_function_0(pci_dev)) {
+        error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s,"
+                   " new func %s cannot be exposed to guest.",
+                   PCI_SLOT(devfn),
+                   bus->devices[PCI_DEVFN(PCI_SLOT(devfn), 0)]->name,
+                   name);
+
+       return NULL;
     }
 
-    pci_dev->bus = bus;
     pci_dev->devfn = devfn;
     dma_as = pci_device_iommu_address_space(pci_dev);
 
@@ -2454,6 +2465,33 @@
     pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
 }
 
+static bool pcie_has_upstream_port(PCIDevice *dev)
+{
+    PCIDevice *parent_dev = pci_bridge_get_device(dev->bus);
+
+    /* Device associated with an upstream port.
+     * As there are several types of these, it's easier to check the
+     * parent device: upstream ports are always connected to
+     * root or downstream ports.
+     */
+    return parent_dev &&
+        pci_is_express(parent_dev) &&
+        parent_dev->exp.exp_cap &&
+        (pcie_cap_get_type(parent_dev) == PCI_EXP_TYPE_ROOT_PORT ||
+         pcie_cap_get_type(parent_dev) == PCI_EXP_TYPE_DOWNSTREAM);
+}
+
+PCIDevice *pci_get_function_0(PCIDevice *pci_dev)
+{
+    if(pcie_has_upstream_port(pci_dev)) {
+        /* With an upstream PCIe port, we only support 1 device at slot 0 */
+        return pci_dev->bus->devices[0];
+    } else {
+        /* Other bus types might support multiple devices at slots 0-31 */
+        return pci_dev->bus->devices[PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 0)];
+    }
+}
+
 static const TypeInfo pci_device_type_info = {
     .name = TYPE_PCI_DEVICE,
     .parent = TYPE_DEVICE,

diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index 3e26f92..49f59a5 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c

@@ -20,6 +20,7 @@
 
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_host.h"
+#include "hw/pci/pci_bus.h"
 #include "trace.h"
 
 /* debug PCI */
@@ -52,6 +53,13 @@
                                   uint32_t limit, uint32_t val, uint32_t len)
 {
     assert(len <= 4);
+    /* non-zero functions are only exposed when function 0 is present,
+     * allowing direct removal of unexposed functions.
+     */
+    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+        return;
+    }
+
     trace_pci_cfg_write(pci_dev->name, PCI_SLOT(pci_dev->devfn),
                         PCI_FUNC(pci_dev->devfn), addr, val);
     pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr));
@@ -63,6 +71,13 @@
     uint32_t ret;
 
     assert(len <= 4);
+    /* non-zero functions are only exposed when function 0 is present,
+     * allowing direct removal of unexposed functions.
+     */
+    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+        return ~0x0;
+    }
+
     ret = pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr));
     trace_pci_cfg_read(pci_dev->name, PCI_SLOT(pci_dev->devfn),
                        PCI_FUNC(pci_dev->devfn), addr, ret);

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 6e28985..32c65c2 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c

@@ -249,25 +249,43 @@
         return;
     }
 
-    /* TODO: multifunction hot-plug.
-     * Right now, only a device of function = 0 is allowed to be
-     * hot plugged/unplugged.
+    /* To enable multifunction hot-plug, we just ensure the function
+     * 0 added last. When function 0 is added, we set the sltsta and
+     * inform OS via event notification.
      */
-    assert(PCI_FUNC(pci_dev->devfn) == 0);
+    if (pci_get_function_0(pci_dev)) {
+        pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
+                                   PCI_EXP_SLTSTA_PDS);
+        pcie_cap_slot_event(PCI_DEVICE(hotplug_dev),
+                            PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+    }
+}
 
-    pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
-                               PCI_EXP_SLTSTA_PDS);
-    pcie_cap_slot_event(PCI_DEVICE(hotplug_dev),
-                        PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
+{
+    object_unparent(OBJECT(dev));
 }
 
 void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
                                          DeviceState *dev, Error **errp)
 {
     uint8_t *exp_cap;
+    PCIDevice *pci_dev = PCI_DEVICE(dev);
+    PCIBus *bus = pci_dev->bus;
 
     pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
 
+    /* In case user cancel the operation of multi-function hot-add,
+     * remove the function that is unexposed to guest individually,
+     * without interaction with guest.
+     */
+    if (pci_dev->devfn &&
+        !bus->devices[0]) {
+        pcie_unplug_device(bus, pci_dev, NULL);
+
+        return;
+    }
+
     pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev));
 }
 
@@ -378,11 +396,6 @@
     hotplug_event_update_event_status(dev);
 }
 
-static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
-{
-    object_unparent(OBJECT(dev));
-}
-
 void pcie_cap_slot_write_config(PCIDevice *dev,
                                 uint32_t addr, uint32_t val, int len)
 {

diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index d95222b..5ad28f7 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c

@@ -42,11 +42,9 @@
 #include "sysemu/arch_init.h"
 #include "sysemu/qtest.h"
 #include "exec/address-spaces.h"
+#include "trace.h"
 #include "elf.h"
 
-//#define HARD_DEBUG_PPC_IO
-//#define DEBUG_PPC_IO
-
 /* SMP is not enabled, for now */
 #define MAX_CPUS 1
 
@@ -57,26 +55,6 @@
 #define KERNEL_LOAD_ADDR 0x01000000
 #define INITRD_LOAD_ADDR 0x01800000
 
-#if defined (HARD_DEBUG_PPC_IO) && !defined (DEBUG_PPC_IO)
-#define DEBUG_PPC_IO
-#endif
-
-#if defined (HARD_DEBUG_PPC_IO)
-#define PPC_IO_DPRINTF(fmt, ...)                         \
-do {                                                     \
-    if (qemu_loglevel_mask(CPU_LOG_IOPORT)) {            \
-        qemu_log("%s: " fmt, __func__ , ## __VA_ARGS__); \
-    } else {                                             \
-        printf("%s : " fmt, __func__ , ## __VA_ARGS__);  \
-    }                                                    \
-} while (0)
-#elif defined (DEBUG_PPC_IO)
-#define PPC_IO_DPRINTF(fmt, ...) \
-qemu_log_mask(CPU_LOG_IOPORT, fmt, ## __VA_ARGS__)
-#else
-#define PPC_IO_DPRINTF(fmt, ...) do { } while (0)
-#endif
-
 /* Constants for devices init */
 static const int ide_iobase[2] = { 0x1f0, 0x170 };
 static const int ide_iobase2[2] = { 0x3f6, 0x376 };
@@ -199,8 +177,7 @@
 {
     sysctrl_t *sysctrl = opaque;
 
-    PPC_IO_DPRINTF("0x%08" PRIx32 " => 0x%02" PRIx32 "\n",
-                   addr - PPC_IO_BASE, val);
+    trace_prep_io_800_writeb(addr - PPC_IO_BASE, val);
     switch (addr) {
     case 0x0092:
         /* Special port 92 */
@@ -327,8 +304,7 @@
         printf("ERROR: unaffected IO port: %04" PRIx32 " read\n", addr);
         break;
     }
-    PPC_IO_DPRINTF("0x%08" PRIx32 " <= 0x%02" PRIx32 "\n",
-                   addr - PPC_IO_BASE, retval);
+    trace_prep_io_800_readb(addr - PPC_IO_BASE, retval);
 
     return retval;
 }

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 3852ad1..0ed8527 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c

@@ -597,6 +597,24 @@
     uint32_t vcpus_per_socket = smp_threads * smp_cores;
     uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
 
+    /* Note: we keep CI large pages off for now because a 64K capable guest
+     * provisioned with large pages might otherwise try to map a qemu
+     * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages
+     * even if that qemu runs on a 4k host.
+     *
+     * We can later add this bit back when we are confident this is not
+     * an issue (!HV KVM or 64K host)
+     */
+    uint8_t pa_features_206[] = { 6, 0,
+        0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
+    uint8_t pa_features_207[] = { 24, 0,
+        0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
+        0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
+        0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
+    uint8_t *pa_features;
+    size_t pa_size;
+
     _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
     _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
 
@@ -625,6 +643,7 @@
 
     _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
     _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "slb-size", env->slb_nr)));
     _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
     _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
     _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
@@ -662,6 +681,19 @@
                           page_sizes_prop, page_sizes_prop_size)));
     }
 
+    /* Do the ibm,pa-features property, adjust it for ci-large-pages */
+    if (env->mmu_model == POWERPC_MMU_2_06) {
+        pa_features = pa_features_206;
+        pa_size = sizeof(pa_features_206);
+    } else /* env->mmu_model == POWERPC_MMU_2_07 */ {
+        pa_features = pa_features_207;
+        pa_size = sizeof(pa_features_207);
+    }
+    if (env->ci_large_pages) {
+        pa_features[3] |= 0x20;
+    }
+    _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
+
     _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
                            cs->cpu_index / vcpus_per_socket)));
 
@@ -979,7 +1011,7 @@
 #define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
 #define DIRTY_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))
 
-static void spapr_reset_htab(sPAPRMachineState *spapr)
+static void spapr_alloc_htab(sPAPRMachineState *spapr)
 {
     long shift;
     int index;
@@ -992,20 +1024,47 @@
 
     if (shift > 0) {
         /* Kernel handles htab, we don't need to allocate one */
+        if (shift != spapr->htab_shift) {
+            error_setg(&error_abort, "Failed to allocate HTAB of requested size, try with smaller maxmem");
+        }
+
         spapr->htab_shift = shift;
         kvmppc_kern_htab = true;
+    } else {
+        /* Allocate htab */
+        spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
+
+        /* And clear it */
+        memset(spapr->htab, 0, HTAB_SIZE(spapr));
+
+        for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) {
+            DIRTY_HPTE(HPTE(spapr->htab, index));
+        }
+    }
+}
+
+/*
+ * Clear HTAB entries during reset.
+ *
+ * If host kernel has allocated HTAB, KVM_PPC_ALLOCATE_HTAB ioctl is
+ * used to clear HTAB. Otherwise QEMU-allocated HTAB is cleared manually.
+ */
+static void spapr_reset_htab(sPAPRMachineState *spapr)
+{
+    long shift;
+    int index;
+
+    shift = kvmppc_reset_htab(spapr->htab_shift);
+    if (shift > 0) {
+        if (shift != spapr->htab_shift) {
+            error_setg(&error_abort, "Requested HTAB allocation failed during reset");
+        }
 
         /* Tell readers to update their file descriptor */
         if (spapr->htab_fd >= 0) {
             spapr->htab_fd_stale = true;
         }
     } else {
-        if (!spapr->htab) {
-            /* Allocate an htab if we don't yet have one */
-            spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
-        }
-
-        /* And clear it */
         memset(spapr->htab, 0, HTAB_SIZE(spapr));
 
         for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) {
@@ -1710,6 +1769,7 @@
         }
         spapr->htab_shift++;
     }
+    spapr_alloc_htab(spapr);
 
     /* Set up Interrupt Controller before we create the VCPUs */
     spapr->icp = xics_system_init(machine,
@@ -2097,7 +2157,7 @@
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, false, &local_err);
+    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
     if (local_err) {
         goto out;
     }
@@ -2225,7 +2285,11 @@
     },
 };
 
+#define SPAPR_COMPAT_2_4 \
+        HW_COMPAT_2_4
+
 #define SPAPR_COMPAT_2_3 \
+        SPAPR_COMPAT_2_4 \
         HW_COMPAT_2_3 \
         {\
             .driver   = "spapr-pci-host-bridge",\
@@ -2339,11 +2403,16 @@
 
 static void spapr_machine_2_4_class_init(ObjectClass *oc, void *data)
 {
+    static GlobalProperty compat_props[] = {
+        SPAPR_COMPAT_2_4
+        { /* end of list */ }
+    };
     MachineClass *mc = MACHINE_CLASS(oc);
 
     mc->desc = "pSeries Logical Partition (PAPR compliant) v2.4";
     mc->alias = "pseries";
     mc->is_default = 0;
+    mc->compat_props = compat_props;
 }
 
 static const TypeInfo spapr_machine_2_4_info = {

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index f61504e..ed28565 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c

@@ -146,7 +146,7 @@
         tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
                                               window_size,
                                               &tcet->fd,
-                                              tcet->vfio_accel);
+                                              tcet->need_vfio);
     }
 
     if (!tcet->table) {
@@ -168,11 +168,43 @@
     return 0;
 }
 
+void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool need_vfio)
+{
+    size_t table_size = tcet->nb_table * sizeof(uint64_t);
+    void *newtable;
+
+    if (need_vfio == tcet->need_vfio) {
+        /* Nothing to do */
+        return;
+    }
+
+    if (!need_vfio) {
+        /* FIXME: We don't support transition back to KVM accelerated
+         * TCEs yet */
+        return;
+    }
+
+    tcet->need_vfio = true;
+
+    if (tcet->fd < 0) {
+        /* Table is already in userspace, nothing to be do */
+        return;
+    }
+
+    newtable = g_malloc(table_size);
+    memcpy(newtable, tcet->table, table_size);
+
+    kvmppc_remove_spapr_tce(tcet->table, tcet->fd, tcet->nb_table);
+
+    tcet->fd = -1;
+    tcet->table = newtable;
+}
+
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                    uint64_t bus_offset,
                                    uint32_t page_shift,
                                    uint32_t nb_table,
-                                   bool vfio_accel)
+                                   bool need_vfio)
 {
     sPAPRTCETable *tcet;
     char tmp[64];
@@ -192,7 +224,7 @@
     tcet->bus_offset = bus_offset;
     tcet->page_shift = page_shift;
     tcet->nb_table = nb_table;
-    tcet->vfio_accel = vfio_accel;
+    tcet->need_vfio = need_vfio;
 
     snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn);
     object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL);

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 617b7f3..55fa8db 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c

@@ -1083,6 +1083,12 @@
     void *fdt = NULL;
     int fdt_start_offset = 0, fdt_size;
 
+    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+        sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
+
+        spapr_tce_set_need_vfio(tcet, true);
+    }
+
     if (dev->hotplugged) {
         fdt = create_device_tree(&fdt_size);
         fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
@@ -1387,7 +1393,7 @@
     sPAPRTCETable *tcet;
     uint32_t nb_table;
 
-    nb_table = SPAPR_PCI_DMA32_SIZE >> SPAPR_TCE_PAGE_SHIFT;
+    nb_table = sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT;
     tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
                                0, SPAPR_TCE_PAGE_SHIFT, nb_table, false);
     if (!tcet) {
@@ -1397,7 +1403,7 @@
     }
 
     /* Register default 32bit DMA window */
-    memory_region_add_subregion(&sphb->iommu_root, 0,
+    memory_region_add_subregion(&sphb->iommu_root, sphb->dma_win_addr,
                                 spapr_tce_get_iommu(tcet));
 }
 
@@ -1430,6 +1436,9 @@
                        SPAPR_PCI_IO_WIN_SIZE),
     DEFINE_PROP_BOOL("dynamic-reconfiguration", sPAPRPHBState, dr_enabled,
                      true),
+    /* Default DMA window is 0..1GB */
+    DEFINE_PROP_UINT64("dma_win_addr", sPAPRPHBState, dma_win_addr, 0),
+    DEFINE_PROP_UINT64("dma_win_size", sPAPRPHBState, dma_win_size, 0x40000000),
     DEFINE_PROP_END_OF_LIST(),
 };
 

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index faba773..84221f4 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c

@@ -20,6 +20,7 @@
 #include "qemu/config-file.h"
 #include "s390-pci-bus.h"
 #include "hw/s390x/storage-keys.h"
+#include "hw/compat.h"
 
 #define TYPE_S390_CCW_MACHINE               "s390-ccw-machine"
 
@@ -236,6 +237,7 @@
 };
 
 #define CCW_COMPAT_2_4 \
+        HW_COMPAT_2_4 \
         {\
             .driver   = TYPE_S390_SKEYS,\
             .property = "migration-enabled",\

diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c
index 891424f..f4f5140 100644
--- a/hw/scsi/spapr_vscsi.c
+++ b/hw/scsi/spapr_vscsi.c

@@ -750,7 +750,6 @@
     len = n+8;
 
     resp_data = g_malloc0(len);
-    memset(resp_data, 0, len);
     stl_be_p(resp_data, n);
     i = found_lun0 ? 8 : 16;
     QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {

diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 1248fd9..0d8d71e 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c

@@ -60,7 +60,7 @@
     r = g_new(VirtIOSCSIVring, 1);
     r->host_notifier = *virtio_queue_get_host_notifier(vq);
     r->guest_notifier = *virtio_queue_get_guest_notifier(vq);
-    aio_set_event_notifier(s->ctx, &r->host_notifier, handler);
+    aio_set_event_notifier(s->ctx, &r->host_notifier, true, handler);
 
     r->parent = s;
 
@@ -71,7 +71,7 @@
     return r;
 
 fail_vring:
-    aio_set_event_notifier(s->ctx, &r->host_notifier, NULL);
+    aio_set_event_notifier(s->ctx, &r->host_notifier, true, NULL);
     k->set_host_notifier(qbus->parent, n, false);
     g_free(r);
     return NULL;
@@ -162,14 +162,17 @@
     int i;
 
     if (s->ctrl_vring) {
-        aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier, NULL);
+        aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier,
+                               true, NULL);
     }
     if (s->event_vring) {
-        aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier, NULL);
+        aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier,
+                               true, NULL);
     }
     if (s->cmd_vrings) {
         for (i = 0; i < vs->conf.num_queues && s->cmd_vrings[i]; i++) {
-            aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier, NULL);
+            aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier,
+                                   true, NULL);
         }
     }
 }
@@ -290,10 +293,13 @@
 
     aio_context_acquire(s->ctx);
 
-    aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier, NULL);
-    aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier, NULL);
+    aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier,
+                           true, NULL);
+    aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier,
+                           true, NULL);
     for (i = 0; i < vs->conf.num_queues; i++) {
-        aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier, NULL);
+        aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier,
+                               true, NULL);
     }
 
     blk_drain_all(); /* ensure there are no in-flight requests */

diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 20885fb..7655401 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c

@@ -205,20 +205,8 @@
     assert(n < vs->conf.num_queues);
     req = virtio_scsi_init_req(s, vs->cmd_vqs[n]);
     qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem));
-    /* TODO: add a way for SCSIBusInfo's load_request to fail,
-     * and fail migration instead of asserting here.
-     * When we do, we might be able to re-enable NDEBUG below.
-     */
-#ifdef NDEBUG
-#error building with NDEBUG is not supported
-#endif
-    assert(req->elem.in_num <= ARRAY_SIZE(req->elem.in_sg));
-    assert(req->elem.out_num <= ARRAY_SIZE(req->elem.out_sg));
 
-    virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
-                     req->elem.in_num, 1);
-    virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
-                     req->elem.out_num, 0);
+    virtqueue_map(&req->elem);
 
     if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size,
                               sizeof(VirtIOSCSICmdResp) + vs->sense_size) < 0) {
@@ -665,6 +653,11 @@
 static void virtio_scsi_save(QEMUFile *f, void *opaque)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+    VirtIOSCSI *s = VIRTIO_SCSI(vdev);
+
+    if (s->dataplane_started) {
+        virtio_scsi_dataplane_stop(s);
+    }
     virtio_save(vdev, f);
 }
 

diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
index 2209ef1..b430d56 100644
--- a/hw/sd/milkymist-memcard.c
+++ b/hw/sd/milkymist-memcard.c

@@ -28,7 +28,7 @@
 #include "qemu/error-report.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
-#include "hw/sd.h"
+#include "hw/sd/sd.h"
 
 enum {
     ENABLE_CMD_TX   = (1<<0),

diff --git a/hw/sd/omap_mmc.c b/hw/sd/omap_mmc.c
index 35d8033..5bc4719 100644
--- a/hw/sd/omap_mmc.c
+++ b/hw/sd/omap_mmc.c

@@ -18,7 +18,7 @@
  */
 #include "hw/hw.h"
 #include "hw/arm/omap.h"
-#include "hw/sd.h"
+#include "hw/sd/sd.h"
 
 struct omap_mmc_s {
     qemu_irq irq;

diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index 5242176..326c53a 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c

@@ -10,7 +10,7 @@
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "hw/sysbus.h"
-#include "hw/sd.h"
+#include "hw/sd/sd.h"
 
 //#define DEBUG_PL181 1
 

diff --git a/hw/sd/pxa2xx_mmci.c b/hw/sd/pxa2xx_mmci.c
index d1fe6d5..b217080 100644
--- a/hw/sd/pxa2xx_mmci.c
+++ b/hw/sd/pxa2xx_mmci.c

@@ -12,7 +12,7 @@
 
 #include "hw/hw.h"
 #include "hw/arm/pxa.h"
-#include "hw/sd.h"
+#include "hw/sd/sd.h"
 #include "hw/qdev.h"
 
 struct PXA2xxMMCIState {

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 393a75c..ce4d44b 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c

@@ -31,7 +31,7 @@
 
 #include "hw/hw.h"
 #include "sysemu/block-backend.h"
-#include "hw/sd.h"
+#include "hw/sd/sd.h"
 #include "qemu/bitmap.h"
 
 //#define DEBUG_SD 1

diff --git a/hw/sd/sdhci.h b/hw/sd/sdhci-internal.h
similarity index 75%
rename from hw/sd/sdhci.h
rename to hw/sd/sdhci-internal.h
index e2de92d..c712daf 100644
--- a/hw/sd/sdhci.h
+++ b/hw/sd/sdhci-internal.h

@@ -21,15 +21,10 @@
  * You should have received a copy of the GNU General Public License along
  * with this program; if not, see <http://www.gnu.org/licenses/>.
  */
+#ifndef SDHCI_INTERNAL_H
+#define SDHCI_INTERNAL_H
 
-#ifndef SDHCI_H
-#define SDHCI_H
-
-#include "qemu-common.h"
-#include "hw/block/block.h"
-#include "hw/pci/pci.h"
-#include "hw/sysbus.h"
-#include "hw/sd.h"
+#include "hw/sd/sdhci.h"
 
 /* R/W SDMA System Address register 0x0 */
 #define SDHC_SYSAD                     0x00
@@ -232,66 +227,6 @@
     sdhc_gap_write  = 2   /* SDHC stopped at block gap during write operation */
 };
 
-/* SD/MMC host controller state */
-typedef struct SDHCIState {
-    union {
-        PCIDevice pcidev;
-        SysBusDevice busdev;
-    };
-    SDState *card;
-    MemoryRegion iomem;
-    BlockConf conf;
-
-    QEMUTimer *insert_timer;       /* timer for 'changing' sd card. */
-    QEMUTimer *transfer_timer;
-    qemu_irq eject_cb;
-    qemu_irq ro_cb;
-    qemu_irq irq;
-
-    uint32_t sdmasysad;    /* SDMA System Address register */
-    uint16_t blksize;      /* Host DMA Buff Boundary and Transfer BlkSize Reg */
-    uint16_t blkcnt;       /* Blocks count for current transfer */
-    uint32_t argument;     /* Command Argument Register */
-    uint16_t trnmod;       /* Transfer Mode Setting Register */
-    uint16_t cmdreg;       /* Command Register */
-    uint32_t rspreg[4];    /* Response Registers 0-3 */
-    uint32_t prnsts;       /* Present State Register */
-    uint8_t  hostctl;      /* Host Control Register */
-    uint8_t  pwrcon;       /* Power control Register */
-    uint8_t  blkgap;       /* Block Gap Control Register */
-    uint8_t  wakcon;       /* WakeUp Control Register */
-    uint16_t clkcon;       /* Clock control Register */
-    uint8_t  timeoutcon;   /* Timeout Control Register */
-    uint8_t  admaerr;      /* ADMA Error Status Register */
-    uint16_t norintsts;    /* Normal Interrupt Status Register */
-    uint16_t errintsts;    /* Error Interrupt Status Register */
-    uint16_t norintstsen;  /* Normal Interrupt Status Enable Register */
-    uint16_t errintstsen;  /* Error Interrupt Status Enable Register */
-    uint16_t norintsigen;  /* Normal Interrupt Signal Enable Register */
-    uint16_t errintsigen;  /* Error Interrupt Signal Enable Register */
-    uint16_t acmd12errsts; /* Auto CMD12 error status register */
-    uint64_t admasysaddr;  /* ADMA System Address Register */
-
-    uint32_t capareg;      /* Capabilities Register */
-    uint32_t maxcurr;      /* Maximum Current Capabilities Register */
-    uint8_t  *fifo_buffer; /* SD host i/o FIFO buffer */
-    uint32_t buf_maxsz;
-    uint16_t data_count;   /* current element in FIFO buffer */
-    uint8_t  stopped_state;/* Current SDHC state */
-    /* Buffer Data Port Register - virtual access point to R and W buffers */
-    /* Software Reset Register - always reads as 0 */
-    /* Force Event Auto CMD12 Error Interrupt Reg - write only */
-    /* Force Event Error Interrupt Register- write only */
-    /* RO Host Controller Version Register always reads as 0x2401 */
-} SDHCIState;
-
 extern const VMStateDescription sdhci_vmstate;
 
-#define TYPE_PCI_SDHCI "sdhci-pci"
-#define PCI_SDHCI(obj) OBJECT_CHECK(SDHCIState, (obj), TYPE_PCI_SDHCI)
-
-#define TYPE_SYSBUS_SDHCI "generic-sdhci"
-#define SYSBUS_SDHCI(obj)                               \
-     OBJECT_CHECK(SDHCIState, (obj), TYPE_SYSBUS_SDHCI)
-
-#endif /* SDHCI_H */
+#endif

diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 7f73527..d70d1a6 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c

@@ -29,8 +29,7 @@
 #include "sysemu/dma.h"
 #include "qemu/timer.h"
 #include "qemu/bitops.h"
-
-#include "sdhci.h"
+#include "sdhci-internal.h"
 
 /* host controller debug messages */
 #ifndef SDHC_DEBUG

diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c
index e4b2d4f..c49ff62 100644
--- a/hw/sd/ssi-sd.c
+++ b/hw/sd/ssi-sd.c

@@ -13,7 +13,7 @@
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "hw/ssi.h"
-#include "hw/sd.h"
+#include "hw/sd/sd.h"
 
 //#define DEBUG_SSI_SD 1
 

diff --git a/hw/timer/imx_epit.c b/hw/timer/imx_epit.c
index 9649851..967be4a 100644
--- a/hw/timer/imx_epit.c
+++ b/hw/timer/imx_epit.c

@@ -16,8 +16,17 @@
 #include "hw/misc/imx_ccm.h"
 #include "qemu/main-loop.h"
 
-#define DEBUG_TIMER 0
-#if DEBUG_TIMER
+#ifndef DEBUG_IMX_EPIT
+#define DEBUG_IMX_EPIT 0
+#endif
+
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_EPIT) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_EPIT, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static char const *imx_epit_reg_name(uint32_t reg)
 {
@@ -37,24 +46,6 @@
     }
 }
 
-#  define DPRINTF(fmt, args...) \
-    do { fprintf(stderr, "%s: " fmt , __func__, ##args); } while (0)
-#else
-#  define DPRINTF(fmt, args...) do {} while (0)
-#endif
-
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-#define DEBUG_IMPLEMENTATION 1
-#if DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-          do { fprintf(stderr, "%s: " fmt, __func__, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
-
 /*
  * Exact clock frequencies vary from board to board.
  * These are typical.
@@ -136,9 +127,8 @@
 {
     IMXEPITState *s = IMX_EPIT(opaque);
     uint32_t reg_value = 0;
-    uint32_t reg = offset >> 2;
 
-    switch (reg) {
+    switch (offset >> 2) {
     case 0: /* Control Register */
         reg_value = s->cr;
         break;
@@ -161,11 +151,12 @@
         break;
 
     default:
-        IPRINTF("Bad offset %x\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_EPIT, __func__, offset);
         break;
     }
 
-    DPRINTF("(%s) = 0x%08x\n", imx_epit_reg_name(reg), reg_value);
+    DPRINTF("(%s) = 0x%08x\n", imx_epit_reg_name(offset >> 2), reg_value);
 
     return reg_value;
 }
@@ -190,12 +181,12 @@
                            unsigned size)
 {
     IMXEPITState *s = IMX_EPIT(opaque);
-    uint32_t reg = offset >> 2;
     uint64_t oldcr;
 
-    DPRINTF("(%s, value = 0x%08x)\n", imx_epit_reg_name(reg), (uint32_t)value);
+    DPRINTF("(%s, value = 0x%08x)\n", imx_epit_reg_name(offset >> 2),
+            (uint32_t)value);
 
-    switch (reg) {
+    switch (offset >> 2) {
     case 0: /* CR */
 
         oldcr = s->cr;
@@ -271,7 +262,8 @@
         break;
 
     default:
-        IPRINTF("Bad offset %x\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_EPIT, __func__, offset);
 
         break;
     }

diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c
index 4bac67d..7257f42 100644
--- a/hw/timer/imx_gpt.c
+++ b/hw/timer/imx_gpt.c

@@ -16,11 +16,17 @@
 #include "hw/misc/imx_ccm.h"
 #include "qemu/main-loop.h"
 
-/*
- * Define to 1 for debug messages
- */
-#define DEBUG_TIMER 0
-#if DEBUG_TIMER
+#ifndef DEBUG_IMX_GPT
+#define DEBUG_IMX_GPT 0
+#endif
+
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_GPT) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_GPT, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static char const *imx_gpt_reg_name(uint32_t reg)
 {
@@ -50,24 +56,6 @@
     }
 }
 
-#  define DPRINTF(fmt, args...) \
-          do { printf("%s: " fmt , __func__, ##args); } while (0)
-#else
-#  define DPRINTF(fmt, args...) do {} while (0)
-#endif
-
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-#define DEBUG_IMPLEMENTATION 1
-#if DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-          do { fprintf(stderr, "%s: " fmt, __func__, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
-
 static const VMStateDescription vmstate_imx_timer_gpt = {
     .name = TYPE_IMX_GPT,
     .version_id = 3,
@@ -224,9 +212,8 @@
 {
     IMXGPTState *s = IMX_GPT(opaque);
     uint32_t reg_value = 0;
-    uint32_t reg = offset >> 2;
 
-    switch (reg) {
+    switch (offset >> 2) {
     case 0: /* Control Register */
         reg_value = s->cr;
         break;
@@ -256,12 +243,14 @@
         break;
 
     case 7: /* input Capture Register 1 */
-        qemu_log_mask(LOG_UNIMP, "icr1 feature is not implemented\n");
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: icr1 feature is not implemented\n",
+                      TYPE_IMX_GPT, __func__);
         reg_value = s->icr1;
         break;
 
     case 8: /* input Capture Register 2 */
-        qemu_log_mask(LOG_UNIMP, "icr2 feature is not implemented\n");
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: icr2 feature is not implemented\n",
+                      TYPE_IMX_GPT, __func__);
         reg_value = s->icr2;
         break;
 
@@ -271,11 +260,12 @@
         break;
 
     default:
-        IPRINTF("Bad offset %x\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPT, __func__, offset);
         break;
     }
 
-    DPRINTF("(%s) = 0x%08x\n", imx_gpt_reg_name(reg), reg_value);
+    DPRINTF("(%s) = 0x%08x\n", imx_gpt_reg_name(offset >> 2), reg_value);
 
     return reg_value;
 }
@@ -322,12 +312,11 @@
 {
     IMXGPTState *s = IMX_GPT(opaque);
     uint32_t oldreg;
-    uint32_t reg = offset >> 2;
 
-    DPRINTF("(%s, value = 0x%08x)\n", imx_gpt_reg_name(reg),
+    DPRINTF("(%s, value = 0x%08x)\n", imx_gpt_reg_name(offset >> 2),
             (uint32_t)value);
 
-    switch (reg) {
+    switch (offset >> 2) {
     case 0:
         oldreg = s->cr;
         s->cr = value & ~0x7c14;
@@ -403,7 +392,8 @@
         break;
 
     default:
-        IPRINTF("Bad offset %x\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPT, __func__, offset);
         break;
     }
 }

diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index 9a4e7dc..597d8fd 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c

@@ -613,20 +613,22 @@
         return;
     }
 
-    bdrv_add_key(blk_bs(blk), NULL, &err);
-    if (err) {
-        if (monitor_cur_is_qmp()) {
-            error_propagate(errp, err);
-            return;
-        }
-        error_free(err);
-        err = NULL;
-        if (cur_mon) {
-            monitor_read_bdrv_key_start(cur_mon, blk_bs(blk),
-                                        usb_msd_password_cb, s);
-            s->dev.auto_attach = 0;
-        } else {
-            autostart = 0;
+    if (blk_bs(blk)) {
+        bdrv_add_key(blk_bs(blk), NULL, &err);
+        if (err) {
+            if (monitor_cur_is_qmp()) {
+                error_propagate(errp, err);
+                return;
+            }
+            error_free(err);
+            err = NULL;
+            if (cur_mon) {
+                monitor_read_bdrv_key_start(cur_mon, blk_bs(blk),
+                                            usb_msd_password_cb, s);
+                s->dev.auto_attach = 0;
+            } else {
+                autostart = 0;
+            }
         }
     }
 

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 78442ba..83c84f1 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c

@@ -89,7 +89,7 @@
         struct vhost_vring_state state;
         struct vhost_vring_addr addr;
         VhostUserMemory memory;
-    };
+    } payload;
 } QEMU_PACKED VhostUserMsg;
 
 static VhostUserMsg m __attribute__ ((unused));
@@ -200,8 +200,8 @@
     VhostUserMsg msg = {
         .request = VHOST_USER_SET_LOG_BASE,
         .flags = VHOST_USER_VERSION,
-        .u64 = base,
-        .size = sizeof(m.u64),
+        .payload.u64 = base,
+        .size = sizeof(msg.payload.u64),
     };
 
     if (shmfd && log->fd != -1) {
@@ -247,17 +247,17 @@
                                 &ram_addr);
         fd = qemu_get_ram_fd(ram_addr);
         if (fd > 0) {
-            msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
-            msg.memory.regions[fd_num].memory_size  = reg->memory_size;
-            msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
-            msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
+            msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
+            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
+            msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
+            msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
                 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
             assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
             fds[fd_num++] = fd;
         }
     }
 
-    msg.memory.nregions = fd_num;
+    msg.payload.memory.nregions = fd_num;
 
     if (!fd_num) {
         error_report("Failed initializing vhost-user memory map, "
@@ -265,8 +265,8 @@
         return -1;
     }
 
-    msg.size = sizeof(m.memory.nregions);
-    msg.size += sizeof(m.memory.padding);
+    msg.size = sizeof(msg.payload.memory.nregions);
+    msg.size += sizeof(msg.payload.memory.padding);
     msg.size += fd_num * sizeof(VhostUserMemoryRegion);
 
     vhost_user_write(dev, &msg, fds, fd_num);
@@ -280,8 +280,8 @@
     VhostUserMsg msg = {
         .request = VHOST_USER_SET_VRING_ADDR,
         .flags = VHOST_USER_VERSION,
-        .addr = *addr,
-        .size = sizeof(*addr),
+        .payload.addr = *addr,
+        .size = sizeof(msg.payload.addr),
     };
 
     vhost_user_write(dev, &msg, NULL, 0);
@@ -303,8 +303,8 @@
     VhostUserMsg msg = {
         .request = request,
         .flags = VHOST_USER_VERSION,
-        .state = *ring,
-        .size = sizeof(*ring),
+        .payload.state = *ring,
+        .size = sizeof(msg.payload.state),
     };
 
     vhost_user_write(dev, &msg, NULL, 0);
@@ -345,8 +345,8 @@
     VhostUserMsg msg = {
         .request = VHOST_USER_GET_VRING_BASE,
         .flags = VHOST_USER_VERSION,
-        .state = *ring,
-        .size = sizeof(*ring),
+        .payload.state = *ring,
+        .size = sizeof(msg.payload.state),
     };
 
     vhost_user_write(dev, &msg, NULL, 0);
@@ -361,12 +361,12 @@
         return -1;
     }
 
-    if (msg.size != sizeof(m.state)) {
+    if (msg.size != sizeof(msg.payload.state)) {
         error_report("Received bad msg size.");
         return -1;
     }
 
-    *ring = msg.state;
+    *ring = msg.payload.state;
 
     return 0;
 }
@@ -380,14 +380,14 @@
     VhostUserMsg msg = {
         .request = request,
         .flags = VHOST_USER_VERSION,
-        .u64 = file->index & VHOST_USER_VRING_IDX_MASK,
-        .size = sizeof(m.u64),
+        .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
+        .size = sizeof(msg.payload.u64),
     };
 
     if (ioeventfd_enabled() && file->fd > 0) {
         fds[fd_num++] = file->fd;
     } else {
-        msg.u64 |= VHOST_USER_VRING_NOFD_MASK;
+        msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
     }
 
     vhost_user_write(dev, &msg, fds, fd_num);
@@ -412,8 +412,8 @@
     VhostUserMsg msg = {
         .request = request,
         .flags = VHOST_USER_VERSION,
-        .u64 = u64,
-        .size = sizeof(m.u64),
+        .payload.u64 = u64,
+        .size = sizeof(msg.payload.u64),
     };
 
     vhost_user_write(dev, &msg, NULL, 0);
@@ -456,12 +456,12 @@
         return -1;
     }
 
-    if (msg.size != sizeof(m.u64)) {
+    if (msg.size != sizeof(msg.payload.u64)) {
         error_report("Received bad msg size.");
         return -1;
     }
 
-    *u64 = msg.u64;
+    *u64 = msg.payload.u64;
 
     return 0;
 }
@@ -591,8 +591,8 @@
                            VHOST_USER_PROTOCOL_F_RARP)) {
         msg.request = VHOST_USER_SEND_RARP;
         msg.flags = VHOST_USER_VERSION;
-        memcpy((char *)&msg.u64, mac_addr, 6);
-        msg.size = sizeof(m.u64);
+        memcpy((char *)&msg.payload.u64, mac_addr, 6);
+        msg.size = sizeof(msg.payload.u64);
 
         err = vhost_user_write(dev, &msg, NULL, 0);
         return err;

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index d0bc72e..939f802 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c

@@ -448,28 +448,59 @@
     return in_bytes <= in_total && out_bytes <= out_total;
 }
 
-void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
-    size_t num_sg, int is_write)
+static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
+                                unsigned int *num_sg, unsigned int max_size,
+                                int is_write)
 {
     unsigned int i;
     hwaddr len;
 
-    if (num_sg > VIRTQUEUE_MAX_SIZE) {
-        error_report("virtio: map attempt out of bounds: %zd > %d",
-                     num_sg, VIRTQUEUE_MAX_SIZE);
-        exit(1);
-    }
+    /* Note: this function MUST validate input, some callers
+     * are passing in num_sg values received over the network.
+     */
+    /* TODO: teach all callers that this can fail, and return failure instead
+     * of asserting here.
+     * When we do, we might be able to re-enable NDEBUG below.
+     */
+#ifdef NDEBUG
+#error building with NDEBUG is not supported
+#endif
+    assert(*num_sg <= max_size);
 
-    for (i = 0; i < num_sg; i++) {
+    for (i = 0; i < *num_sg; i++) {
         len = sg[i].iov_len;
         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
-        if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
+        if (!sg[i].iov_base) {
             error_report("virtio: error trying to map MMIO memory");
             exit(1);
         }
+        if (len == sg[i].iov_len) {
+            continue;
+        }
+        if (*num_sg >= max_size) {
+            error_report("virtio: memory split makes iovec too large");
+            exit(1);
+        }
+        memmove(sg + i + 1, sg + i, sizeof(*sg) * (*num_sg - i));
+        memmove(addr + i + 1, addr + i, sizeof(*addr) * (*num_sg - i));
+        assert(len < sg[i + 1].iov_len);
+        sg[i].iov_len = len;
+        addr[i + 1] += len;
+        sg[i + 1].iov_len -= len;
+        ++*num_sg;
     }
 }
 
+void virtqueue_map(VirtQueueElement *elem)
+{
+    virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
+                        MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)),
+                        1);
+    virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
+                        MIN(ARRAY_SIZE(elem->out_sg), ARRAY_SIZE(elem->out_addr)),
+                        0);
+}
+
 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 {
     unsigned int i, head, max;
@@ -531,8 +562,7 @@
     } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);
 
     /* Now map what we have collected */
-    virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
-    virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
+    virtqueue_map(elem);
 
     elem->index = head;
 

diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 2b54f52..aa96288 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c

@@ -938,10 +938,18 @@
     dc->props = xen_pci_passthrough_properties;
 };
 
+static void xen_pci_passthrough_finalize(Object *obj)
+{
+    XenPCIPassthroughState *s = XEN_PT_DEVICE(obj);
+
+    xen_pt_msix_delete(s);
+}
+
 static const TypeInfo xen_pci_passthrough_info = {
     .name = TYPE_XEN_PT_DEVICE,
     .parent = TYPE_PCI_DEVICE,
     .instance_size = sizeof(XenPCIPassthroughState),
+    .instance_finalize = xen_pci_passthrough_finalize,
     .class_init = xen_pci_passthrough_class_init,
 };
 

diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index 3bc22eb..c545280 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h

@@ -305,6 +305,7 @@
 
 int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t base);
 void xen_pt_msix_delete(XenPCIPassthroughState *s);
+void xen_pt_msix_unmap(XenPCIPassthroughState *s);
 int xen_pt_msix_update(XenPCIPassthroughState *s);
 int xen_pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index);
 void xen_pt_msix_disable(XenPCIPassthroughState *s);

diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 4a5bc11..0efee11 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c

@@ -2079,7 +2079,7 @@
 
     /* free MSI/MSI-X info table */
     if (s->msix) {
-        xen_pt_msix_delete(s);
+        xen_pt_msix_unmap(s);
     }
     g_free(s->msi);
 

diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c
index e3d7194..82de2bc 100644
--- a/hw/xen/xen_pt_msi.c
+++ b/hw/xen/xen_pt_msi.c

@@ -610,7 +610,7 @@
     return rc;
 }
 
-void xen_pt_msix_delete(XenPCIPassthroughState *s)
+void xen_pt_msix_unmap(XenPCIPassthroughState *s)
 {
     XenPTMSIX *msix = s->msix;
 
@@ -627,6 +627,17 @@
     }
 
     memory_region_del_subregion(&s->bar[msix->bar_index], &msix->mmio);
+}
+
+void xen_pt_msix_delete(XenPCIPassthroughState *s)
+{
+    XenPTMSIX *msix = s->msix;
+
+    if (!msix) {
+        return;
+    }
+
+    object_unparent(OBJECT(&msix->mmio));
 
     g_free(s->msix);
     s->msix = NULL;

diff --git a/include/block/accounting.h b/include/block/accounting.h
index 4c406cf..66637cd 100644
--- a/include/block/accounting.h
+++ b/include/block/accounting.h

@@ -40,7 +40,6 @@
     uint64_t nr_ops[BLOCK_MAX_IOTYPE];
     uint64_t total_time_ns[BLOCK_MAX_IOTYPE];
     uint64_t merged[BLOCK_MAX_IOTYPE];
-    uint64_t wr_highest_sector;
 } BlockAcctStats;
 
 typedef struct BlockAcctCookie {
@@ -52,8 +51,6 @@
 void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
                       int64_t bytes, enum BlockAcctType type);
 void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie);
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
-                               unsigned int nb_sectors);
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                            int num_requests);
 

diff --git a/include/block/aio.h b/include/block/aio.h
index 400b1b0..bcc7d43 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h

@@ -122,6 +122,8 @@
 
     /* TimerLists for calling timers - one per clock type */
     QEMUTimerListGroup tlg;
+
+    int external_disable_cnt;
 };
 
 /**
@@ -299,6 +301,7 @@
  */
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
+                        bool is_external,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         void *opaque);
@@ -312,6 +315,7 @@
  */
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *notifier,
+                            bool is_external,
                             EventNotifierHandler *io_read);
 
 /* Return a GSource that lets the main loop poll the file descriptors attached
@@ -373,4 +377,40 @@
  */
 int64_t aio_compute_timeout(AioContext *ctx);
 
+/**
+ * aio_disable_external:
+ * @ctx: the aio context
+ *
+ * Disable the further processing of external clients.
+ */
+static inline void aio_disable_external(AioContext *ctx)
+{
+    atomic_inc(&ctx->external_disable_cnt);
+}
+
+/**
+ * aio_enable_external:
+ * @ctx: the aio context
+ *
+ * Enable the processing of external clients.
+ */
+static inline void aio_enable_external(AioContext *ctx)
+{
+    assert(ctx->external_disable_cnt > 0);
+    atomic_dec(&ctx->external_disable_cnt);
+}
+
+/**
+ * aio_node_check:
+ * @ctx: the aio context
+ * @is_external: Whether or not the checked node is an external event source.
+ *
+ * Check if the node's is_external flag is okay to be polled by the ctx at this
+ * moment. True means green light.
+ */
+static inline bool aio_node_check(AioContext *ctx, bool is_external)
+{
+    return !is_external || !atomic_read(&ctx->external_disable_cnt);
+}
+
 #endif

diff --git a/include/block/block.h b/include/block/block.h
index 84f05ad..610db92 100644
--- a/include/block/block.h
+++ b/include/block/block.h

@@ -174,11 +174,6 @@
     BLOCK_OP_TYPE_MAX,
 } BlockOpType;
 
-void bdrv_iostatus_enable(BlockDriverState *bs);
-void bdrv_iostatus_reset(BlockDriverState *bs);
-void bdrv_iostatus_disable(BlockDriverState *bs);
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
 void bdrv_info_print(Monitor *mon, const QObject *data);
 void bdrv_info(Monitor *mon, QObject **ret_data);
 void bdrv_stats_print(Monitor *mon, const QObject *data);
@@ -389,17 +384,11 @@
 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
                             int64_t sector_num, int nb_sectors, int *pnum);
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
-                       BlockdevOnError on_write_error);
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
-                       bool is_read, int error);
 int bdrv_is_read_only(BlockDriverState *bs);
 int bdrv_is_sg(BlockDriverState *bs);
 int bdrv_enable_write_cache(BlockDriverState *bs);
 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
-int bdrv_is_inserted(BlockDriverState *bs);
+bool bdrv_is_inserted(BlockDriverState *bs);
 int bdrv_media_changed(BlockDriverState *bs);
 void bdrv_lock_medium(BlockDriverState *bs, bool locked);
 void bdrv_eject(BlockDriverState *bs, bool eject_flag);
@@ -466,7 +455,6 @@
 size_t bdrv_min_mem_align(BlockDriverState *bs);
 /* Returns optimal alignment in bytes for bounce buffer */
 size_t bdrv_opt_mem_align(BlockDriverState *bs);
-void bdrv_set_guest_block_size(BlockDriverState *bs, int align);
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
 void *qemu_blockalign0(BlockDriverState *bs, size_t size);
 void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
@@ -622,6 +610,23 @@
 void bdrv_io_unplug(BlockDriverState *bs);
 void bdrv_flush_io_queue(BlockDriverState *bs);
 
-BlockAcctStats *bdrv_get_stats(BlockDriverState *bs);
+/**
+ * bdrv_drained_begin:
+ *
+ * Begin a quiesced section for exclusive access to the BDS, by disabling
+ * external request sources including NBD server and device model. Note that
+ * this doesn't block timers or coroutines from submitting more requests, which
+ * means block_job_pause is still necessary.
+ *
+ * This function can be recursive.
+ */
+void bdrv_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_drained_end:
+ *
+ * End a quiescent section started by bdrv_drained_begin().
+ */
+void bdrv_drained_end(BlockDriverState *bs);
 
 #endif

diff --git a/include/block/block_int.h b/include/block/block_int.h
index a480f94..3ceeb5a 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h

@@ -26,6 +26,7 @@
 
 #include "block/accounting.h"
 #include "block/block.h"
+#include "block/throttle-groups.h"
 #include "qemu/option.h"
 #include "qemu/queue.h"
 #include "qemu/coroutine.h"
@@ -212,7 +213,7 @@
         const char *backing_file, const char *backing_fmt);
 
     /* removable device specific */
-    int (*bdrv_is_inserted)(BlockDriverState *bs);
+    bool (*bdrv_is_inserted)(BlockDriverState *bs);
     int (*bdrv_media_changed)(BlockDriverState *bs);
     void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
     void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
@@ -399,8 +400,8 @@
     unsigned       pending_reqs[2];
     QLIST_ENTRY(BlockDriverState) round_robin;
 
-    /* I/O stats (display with "info blockstats"). */
-    BlockAcctStats stats;
+    /* Offset after the highest byte written to */
+    uint64_t wr_highest_offset;
 
     /* I/O Limits */
     BlockLimits bl;
@@ -411,18 +412,9 @@
     /* Alignment requirement for offset/length of I/O requests */
     unsigned int request_alignment;
 
-    /* the block size for which the guest device expects atomicity */
-    int guest_block_size;
-
     /* do we need to tell the quest if we have a volatile write cache? */
     int enable_write_cache;
 
-    /* NOTE: the following infos are only hints for real hardware
-       drivers. They are not used by the block driver */
-    BlockdevOnError on_read_error, on_write_error;
-    bool iostatus_enabled;
-    BlockDeviceIoStatus iostatus;
-
     /* the following member gives a name to every node on the bs graph. */
     char node_name[32];
     /* element of the list of named nodes building the graph */
@@ -456,6 +448,17 @@
     /* threshold limit for writes, in bytes. "High water mark". */
     uint64_t write_threshold_offset;
     NotifierWithReturn write_threshold_notifier;
+
+    int quiesce_counter;
+};
+
+struct BlockBackendRootState {
+    int open_flags;
+    bool read_only;
+    BlockdevDetectZeroesOptions detect_zeroes;
+
+    char *throttle_group;
+    ThrottleState *throttle_state;
 };
 
 static inline BlockDriverState *backing_bs(BlockDriverState *bs)

diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h
index fab113f..aba28f3 100644
--- a/include/block/throttle-groups.h
+++ b/include/block/throttle-groups.h

@@ -30,6 +30,9 @@
 
 const char *throttle_group_get_name(BlockDriverState *bs);
 
+ThrottleState *throttle_group_incref(const char *name);
+void throttle_group_unref(ThrottleState *ts);
+
 void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg);
 void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg);
 
@@ -40,7 +43,4 @@
                                                         unsigned int bytes,
                                                         bool is_write);
 
-void throttle_group_lock(BlockDriverState *bs);
-void throttle_group_unlock(BlockDriverState *bs);
-
 #endif

diff --git a/include/glib-compat.h b/include/glib-compat.h
index fb25f43..03d8b12 100644
--- a/include/glib-compat.h
+++ b/include/glib-compat.h

@@ -165,6 +165,14 @@
 #define CompatGCond GCond
 #endif /* glib 2.31 */
 
+#if !GLIB_CHECK_VERSION(2, 32, 0)
+/* Beware, function returns gboolean since 2.39.2, see GLib commit 9101915 */
+static inline void g_hash_table_add(GHashTable *hash_table, gpointer key)
+{
+    g_hash_table_replace(hash_table, key, key);
+}
+#endif
+
 #ifndef g_assert_true
 #define g_assert_true(expr)                                                    \
     do {                                                                       \

diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index 4005a99..d116092 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h

@@ -24,6 +24,7 @@
 #include "hw/char/cadence_uart.h"
 #include "hw/ide/pci.h"
 #include "hw/ide/ahci.h"
+#include "hw/sd/sdhci.h"
 
 #define TYPE_XLNX_ZYNQMP "xlnx,zynqmp"
 #define XLNX_ZYNQMP(obj) OBJECT_CHECK(XlnxZynqMPState, (obj), \
@@ -33,6 +34,7 @@
 #define XLNX_ZYNQMP_NUM_RPU_CPUS 2
 #define XLNX_ZYNQMP_NUM_GEMS 4
 #define XLNX_ZYNQMP_NUM_UARTS 2
+#define XLNX_ZYNQMP_NUM_SDHCI 2
 
 #define XLNX_ZYNQMP_NUM_OCM_BANKS 4
 #define XLNX_ZYNQMP_OCM_RAM_0_ADDRESS 0xFFFC0000
@@ -63,6 +65,7 @@
     CadenceGEMState gem[XLNX_ZYNQMP_NUM_GEMS];
     CadenceUARTState uart[XLNX_ZYNQMP_NUM_UARTS];
     SysbusAHCIState sata;
+    SDHCIState sdhci[XLNX_ZYNQMP_NUM_SDHCI];
 
     char *boot_cpu;
     ARMCPU *boot_cpu_ptr;

diff --git a/include/hw/compat.h b/include/hw/compat.h
index 095de5d..93e71af 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h

@@ -2,7 +2,11 @@
 #define HW_COMPAT_H
 
 #define HW_COMPAT_2_4 \
-        /* empty */
+        {\
+            .driver   = "virtio-blk-device",\
+            .property = "scsi",\
+            .value    = "true",\
+        },
 
 #define HW_COMPAT_2_3 \
         {\

diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index c5961d7..606dbc2 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h

@@ -60,7 +60,6 @@
 
     /*< public >*/
     bool broken_reserved_end;
-    bool inter_dimm_gap;
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
 };
@@ -318,6 +317,11 @@
             .driver   = "Broadwell-noTSX-" TYPE_X86_CPU,\
             .property = "abm",\
             .value    = "off",\
+        },\
+        {\
+            .driver   = "host" "-" TYPE_X86_CPU,\
+            .property = "host-cache-info",\
+            .value    = "on",\
         },
 
 #define PC_COMPAT_2_3 \

diff --git a/include/hw/intc/arm_gic_common.h b/include/hw/intc/arm_gic_common.h
index 564a72b..f4c349a 100644
--- a/include/hw/intc/arm_gic_common.h
+++ b/include/hw/intc/arm_gic_common.h

@@ -111,6 +111,7 @@
     bool security_extn;
     bool irq_reset_nonsecure; /* configure IRQs as group 1 (NS) on reset? */
     int dev_fd; /* kvm device fd if backed by kvm vgic support */
+    Error *migration_blocker;
 } GICState;
 
 #define TYPE_ARM_GIC_COMMON "arm_gic_common"

diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
index c1ee7b0..d83bf30 100644
--- a/include/hw/mem/pc-dimm.h
+++ b/include/hw/mem/pc-dimm.h

@@ -83,16 +83,15 @@
 
 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
                                uint64_t address_space_size,
-                               uint64_t *hint, uint64_t align, bool gap,
-                               uint64_t size, Error **errp);
+                               uint64_t *hint, uint64_t align, uint64_t size,
+                               Error **errp);
 
 int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
 
 int qmp_pc_dimm_device_list(Object *obj, void *opaque);
 uint64_t pc_existing_dimms_capacity(Error **errp);
 void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
-                         MemoryRegion *mr, uint64_t align, bool gap,
-                         Error **errp);
+                         MemoryRegion *mr, uint64_t align, Error **errp);
 void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
                            MemoryRegion *mr);
 #endif

diff --git a/include/hw/misc/ivshmem.h b/include/hw/misc/ivshmem.h
new file mode 100644
index 0000000..433ef53
--- /dev/null
+++ b/include/hw/misc/ivshmem.h

@@ -0,0 +1,25 @@
+
+/*
+ * Inter-VM Shared Memory PCI device.
+ *
+ * Author:
+ *      Cam Macdonell <cam@cs.ualberta.ca>
+ *
+ * Based On: cirrus_vga.c
+ *          Copyright (c) 2004 Fabrice Bellard
+ *          Copyright (c) 2004 Makoto Suzuki (suzu)
+ *
+ *      and rtl8139.c
+ *          Copyright (c) 2006 Igor Kovalenko
+ *
+ * This code is licensed under the GNU GPL v2.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#ifndef IVSHMEM_H
+#define IVSHMEM_H
+
+#define IVSHMEM_PROTOCOL_VERSION 0
+
+#endif /* IVSHMEM_H */

diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 5322b56..7de5e02 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h

@@ -78,6 +78,7 @@
     MemoryRegion memwindow, iowindow, msiwindow;
 
     uint32_t dma_liobn;
+    hwaddr dma_win_addr, dma_win_size;
     AddressSpace iommu_as;
     MemoryRegion iommu_root;
 
@@ -115,8 +116,6 @@
 
 #define SPAPR_PCI_MSI_WINDOW         0x40000000000ULL
 
-#define SPAPR_PCI_DMA32_SIZE         0x40000000
-
 static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());

diff --git a/include/hw/pci/msix.h b/include/hw/pci/msix.h
index 954d82b..72e5f93 100644
--- a/include/hw/pci/msix.h
+++ b/include/hw/pci/msix.h

@@ -46,12 +46,16 @@
 
 extern const VMStateDescription vmstate_msix;
 
-#define VMSTATE_MSIX(_field, _state) {                               \
-    .name       = (stringify(_field)),                               \
-    .size       = sizeof(PCIDevice),                                 \
-    .vmsd       = &vmstate_msix,                                     \
-    .flags      = VMS_STRUCT,                                        \
-    .offset     = vmstate_offset_value(_state, _field, PCIDevice),   \
+#define VMSTATE_MSIX_TEST(_field, _state, _test) {                   \
+    .name         = (stringify(_field)),                             \
+    .size         = sizeof(PCIDevice),                               \
+    .vmsd         = &vmstate_msix,                                   \
+    .flags        = VMS_STRUCT,                                      \
+    .offset       = vmstate_offset_value(_state, _field, PCIDevice), \
+    .field_exists = (_test)                                          \
 }
 
+#define VMSTATE_MSIX(_f, _s)                                         \
+    VMSTATE_MSIX_TEST(_f, _s, NULL)
+
 #endif

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index f5e7fd8..379b6e1 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h

@@ -397,6 +397,7 @@
                                   void *(*begin)(PCIBus *bus, void *parent_state),
                                   void (*end)(PCIBus *bus, void *state),
                                   void *parent_state);
+PCIDevice *pci_get_function_0(PCIDevice *pci_dev);
 
 /* Use this wrapper when specific scan order is not required. */
 static inline

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 56c5b0b..5baa906 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h

@@ -563,7 +563,7 @@
     uint32_t page_shift;
     uint64_t *table;
     bool bypass;
-    bool vfio_accel;
+    bool need_vfio;
     int fd;
     MemoryRegion iommu;
     struct VIOsPAPRDevice *vdev; /* for @bypass migration compatibility only */
@@ -588,7 +588,9 @@
                                    uint64_t bus_offset,
                                    uint32_t page_shift,
                                    uint32_t nb_table,
-                                   bool vfio_accel);
+                                   bool need_vfio);
+void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool need_vfio);
+
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
                  uint32_t liobn, uint64_t window, uint32_t size);

diff --git a/include/hw/sd.h b/include/hw/sd/sd.h
similarity index 100%
rename from include/hw/sd.h
rename to include/hw/sd/sd.h


diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h
new file mode 100644
index 0000000..9b3031f
--- /dev/null
+++ b/include/hw/sd/sdhci.h

@@ -0,0 +1,94 @@
+/*
+ * SD Association Host Standard Specification v2.0 controller emulation
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Mitsyanko Igor <i.mitsyanko@samsung.com>
+ * Peter A.G. Crosthwaite <peter.crosthwaite@petalogix.com>
+ *
+ * Based on MMC controller for Samsung S5PC1xx-based board emulation
+ * by Alexey Merkulov and Vladimir Monakhov.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU _General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef SDHCI_H
+#define SDHCI_H
+
+#include "qemu-common.h"
+#include "hw/block/block.h"
+#include "hw/pci/pci.h"
+#include "hw/sysbus.h"
+#include "hw/sd/sd.h"
+
+/* SD/MMC host controller state */
+typedef struct SDHCIState {
+    union {
+        PCIDevice pcidev;
+        SysBusDevice busdev;
+    };
+    SDState *card;
+    MemoryRegion iomem;
+    BlockConf conf;
+
+    QEMUTimer *insert_timer;       /* timer for 'changing' sd card. */
+    QEMUTimer *transfer_timer;
+    qemu_irq eject_cb;
+    qemu_irq ro_cb;
+    qemu_irq irq;
+
+    uint32_t sdmasysad;    /* SDMA System Address register */
+    uint16_t blksize;      /* Host DMA Buff Boundary and Transfer BlkSize Reg */
+    uint16_t blkcnt;       /* Blocks count for current transfer */
+    uint32_t argument;     /* Command Argument Register */
+    uint16_t trnmod;       /* Transfer Mode Setting Register */
+    uint16_t cmdreg;       /* Command Register */
+    uint32_t rspreg[4];    /* Response Registers 0-3 */
+    uint32_t prnsts;       /* Present State Register */
+    uint8_t  hostctl;      /* Host Control Register */
+    uint8_t  pwrcon;       /* Power control Register */
+    uint8_t  blkgap;       /* Block Gap Control Register */
+    uint8_t  wakcon;       /* WakeUp Control Register */
+    uint16_t clkcon;       /* Clock control Register */
+    uint8_t  timeoutcon;   /* Timeout Control Register */
+    uint8_t  admaerr;      /* ADMA Error Status Register */
+    uint16_t norintsts;    /* Normal Interrupt Status Register */
+    uint16_t errintsts;    /* Error Interrupt Status Register */
+    uint16_t norintstsen;  /* Normal Interrupt Status Enable Register */
+    uint16_t errintstsen;  /* Error Interrupt Status Enable Register */
+    uint16_t norintsigen;  /* Normal Interrupt Signal Enable Register */
+    uint16_t errintsigen;  /* Error Interrupt Signal Enable Register */
+    uint16_t acmd12errsts; /* Auto CMD12 error status register */
+    uint64_t admasysaddr;  /* ADMA System Address Register */
+
+    uint32_t capareg;      /* Capabilities Register */
+    uint32_t maxcurr;      /* Maximum Current Capabilities Register */
+    uint8_t  *fifo_buffer; /* SD host i/o FIFO buffer */
+    uint32_t buf_maxsz;
+    uint16_t data_count;   /* current element in FIFO buffer */
+    uint8_t  stopped_state;/* Current SDHC state */
+    /* Buffer Data Port Register - virtual access point to R and W buffers */
+    /* Software Reset Register - always reads as 0 */
+    /* Force Event Auto CMD12 Error Interrupt Reg - write only */
+    /* Force Event Error Interrupt Register- write only */
+    /* RO Host Controller Version Register always reads as 0x2401 */
+} SDHCIState;
+
+#define TYPE_PCI_SDHCI "sdhci-pci"
+#define PCI_SDHCI(obj) OBJECT_CHECK(SDHCIState, (obj), TYPE_PCI_SDHCI)
+
+#define TYPE_SYSBUS_SDHCI "generic-sdhci"
+#define SYSBUS_SDHCI(obj)                               \
+     OBJECT_CHECK(SDHCIState, (obj), TYPE_SYSBUS_SDHCI)
+
+#endif /* SDHCI_H */

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 9d09115..205fadf 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h

@@ -151,8 +151,7 @@
 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len, unsigned int idx);
 
-void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
-    size_t num_sg, int is_write);
+void virtqueue_map(VirtQueueElement *elem);
 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
                           unsigned int out_bytes);

diff --git a/include/qapi/qmp/qbool.h b/include/qapi/qmp/qbool.h
index 4aa6be3..d9256e4 100644
--- a/include/qapi/qmp/qbool.h
+++ b/include/qapi/qmp/qbool.h

@@ -18,7 +18,7 @@
 #include "qapi/qmp/qobject.h"
 
 typedef struct QBool {
-    QObject_HEAD;
+    QObject base;
     bool value;
 } QBool;
 

diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h
index a37f4c1..787c658 100644
--- a/include/qapi/qmp/qdict.h
+++ b/include/qapi/qmp/qdict.h

@@ -28,7 +28,7 @@
 } QDictEntry;
 
 typedef struct QDict {
-    QObject_HEAD;
+    QObject base;
     size_t size;
     QLIST_HEAD(,QDictEntry) table[QDICT_BUCKET_MAX];
 } QDict;

diff --git a/include/qapi/qmp/qfloat.h b/include/qapi/qmp/qfloat.h
index a865844..46745e5 100644
--- a/include/qapi/qmp/qfloat.h
+++ b/include/qapi/qmp/qfloat.h

@@ -18,7 +18,7 @@
 #include "qapi/qmp/qobject.h"
 
 typedef struct QFloat {
-    QObject_HEAD;
+    QObject base;
     double value;
 } QFloat;
 

diff --git a/include/qapi/qmp/qint.h b/include/qapi/qmp/qint.h
index 48a41b0..339a9ab 100644
--- a/include/qapi/qmp/qint.h
+++ b/include/qapi/qmp/qint.h

@@ -17,7 +17,7 @@
 #include "qapi/qmp/qobject.h"
 
 typedef struct QInt {
-    QObject_HEAD;
+    QObject base;
     int64_t value;
 } QInt;
 

diff --git a/include/qapi/qmp/qlist.h b/include/qapi/qmp/qlist.h
index 6cc4831..b1bf785 100644
--- a/include/qapi/qmp/qlist.h
+++ b/include/qapi/qmp/qlist.h

@@ -22,7 +22,7 @@
 } QListEntry;
 
 typedef struct QList {
-    QObject_HEAD;
+    QObject base;
     QTAILQ_HEAD(,QListEntry) head;
 } QList;
 

diff --git a/include/qapi/qmp/qobject.h b/include/qapi/qmp/qobject.h
index 260d2ed..c856f55 100644
--- a/include/qapi/qmp/qobject.h
+++ b/include/qapi/qmp/qobject.h

@@ -59,10 +59,6 @@
     size_t refcnt;
 } QObject;
 
-/* Objects definitions must include this */
-#define QObject_HEAD  \
-    QObject base
-
 /* Get the 'base' part of an object */
 #define QOBJECT(obj) (&(obj)->base)
 

diff --git a/include/qapi/qmp/qstring.h b/include/qapi/qmp/qstring.h
index 1bc3666..34675a7 100644
--- a/include/qapi/qmp/qstring.h
+++ b/include/qapi/qmp/qstring.h

@@ -17,7 +17,7 @@
 #include "qapi/qmp/qobject.h"
 
 typedef struct QString {
-    QObject_HEAD;
+    QObject base;
     char *string;
     size_t length;
     size_t capacity;

diff --git a/include/qemu/event_notifier.h b/include/qemu/event_notifier.h
index 88b57af..a8f2854 100644
--- a/include/qemu/event_notifier.h
+++ b/include/qemu/event_notifier.h

@@ -38,7 +38,7 @@
 
 #ifdef CONFIG_POSIX
 void event_notifier_init_fd(EventNotifier *, int fd);
-int event_notifier_get_fd(EventNotifier *);
+int event_notifier_get_fd(const EventNotifier *);
 #else
 HANDLE event_notifier_get_handle(EventNotifier *);
 #endif

diff --git a/include/qemu/log.h b/include/qemu/log.h
index f880e66..7de4500 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h

@@ -41,6 +41,7 @@
 #define LOG_UNIMP          (1 << 10)
 #define LOG_GUEST_ERROR    (1 << 11)
 #define CPU_LOG_MMU        (1 << 12)
+#define CPU_LOG_TB_NOCHAIN (1 << 13)
 
 /* Returns true if a bit is set in the current loglevel mask
  */

diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index d4a8f7a..d961362 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h

@@ -11,6 +11,7 @@
 typedef struct AioContext AioContext;
 typedef struct AudioState AudioState;
 typedef struct BlockBackend BlockBackend;
+typedef struct BlockBackendRootState BlockBackendRootState;
 typedef struct BlockDriverState BlockDriverState;
 typedef struct BusClass BusClass;
 typedef struct BusState BusState;

diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 8fc960f..9306a52 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h

@@ -72,10 +72,16 @@
 BlockBackend *blk_next(BlockBackend *blk);
 
 BlockDriverState *blk_bs(BlockBackend *blk);
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs);
 
 void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk);
 
 void blk_iostatus_enable(BlockBackend *blk);
+bool blk_iostatus_is_enabled(const BlockBackend *blk);
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
+void blk_iostatus_disable(BlockBackend *blk);
+void blk_iostatus_reset(BlockBackend *blk);
+void blk_iostatus_set_err(BlockBackend *blk, int error);
 int blk_attach_dev(BlockBackend *blk, void *dev);
 void blk_attach_dev_nofail(BlockBackend *blk, void *dev);
 void blk_detach_dev(BlockBackend *blk, void *dev);
@@ -120,6 +126,8 @@
 int blk_flush_all(void);
 void blk_drain(BlockBackend *blk);
 void blk_drain_all(void);
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+                      BlockdevOnError on_write_error);
 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
                                       int error);
@@ -130,7 +138,8 @@
 int blk_enable_write_cache(BlockBackend *blk);
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
 void blk_invalidate_cache(BlockBackend *blk, Error **errp);
-int blk_is_inserted(BlockBackend *blk);
+bool blk_is_inserted(BlockBackend *blk);
+bool blk_is_available(BlockBackend *blk);
 void blk_lock_medium(BlockBackend *blk, bool locked);
 void blk_eject(BlockBackend *blk, bool eject_flag);
 int blk_get_flags(BlockBackend *blk);
@@ -155,6 +164,8 @@
 void blk_io_plug(BlockBackend *blk);
 void blk_io_unplug(BlockBackend *blk);
 BlockAcctStats *blk_get_stats(BlockBackend *blk);
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
+void blk_update_root_state(BlockBackend *blk);
 
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
                   BlockCompletionFunc *cb, void *opaque);

diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index edf7669..aff193f 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h

@@ -128,11 +128,19 @@
 /**
  * @qemu_chr_delete:
  *
- * Destroy a character backend.
+ * Destroy a character backend and remove it from the list of
+ * identified character backends.
  */
 void qemu_chr_delete(CharDriverState *chr);
 
 /**
+ * @qemu_chr_free:
+ *
+ * Destroy a character backend.
+ */
+void qemu_chr_free(CharDriverState *chr);
+
+/**
  * @qemu_chr_fe_set_echo:
  *
  * Ask the backend to override its normal echo setting.  This only really

diff --git a/iohandler.c b/iohandler.c
index 55f8501..eb68083 100644
--- a/iohandler.c
+++ b/iohandler.c

@@ -55,7 +55,8 @@
                          void *opaque)
 {
     iohandler_init();
-    aio_set_fd_handler(iohandler_ctx, fd, fd_read, fd_write, opaque);
+    aio_set_fd_handler(iohandler_ctx, fd, false,
+                       fd_read, fd_write, opaque);
 }
 
 /* reaping of zombies.  right now we're not passing the status to

diff --git a/migration/block.c b/migration/block.c
index ed865ed..f7bb1e0 100644
--- a/migration/block.c
+++ b/migration/block.c

@@ -808,6 +808,11 @@
                 return -EINVAL;
             }
             bs = blk_bs(blk);
+            if (!bs) {
+                fprintf(stderr, "Block device %s has no medium\n",
+                        device_name);
+                return -EINVAL;
+            }
 
             if (bs != bs_prev) {
                 bs_prev = bs;

diff --git a/monitor.c b/monitor.c
index 4f1ba2f..6cd747f 100644
--- a/monitor.c
+++ b/monitor.c

@@ -181,13 +181,16 @@
  * instance.
  */
 typedef struct MonitorQAPIEventState {
-    QAPIEvent event;    /* Event being tracked */
-    int64_t rate;       /* Minimum time (in ns) between two events */
-    int64_t last;       /* QEMU_CLOCK_REALTIME value at last emission */
+    QAPIEvent event;    /* Throttling state for this event type and... */
+    QDict *data;        /* ... data, see qapi_event_throttle_equal() */
     QEMUTimer *timer;   /* Timer for handling delayed events */
-    QObject *data;      /* Event pending delayed dispatch */
+    QDict *qdict;       /* Delayed event (if any) */
 } MonitorQAPIEventState;
 
+typedef struct {
+    int64_t rate;       /* Minimum time (in ns) between two events */
+} MonitorQAPIEventConf;
+
 struct Monitor {
     CharDriverState *chr;
     int reset_seen;
@@ -438,132 +441,161 @@
 }
 
 
-static MonitorQAPIEventState monitor_qapi_event_state[QAPI_EVENT_MAX];
+static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT_MAX] = {
+    /* Limit guest-triggerable events to 1 per second */
+    [QAPI_EVENT_RTC_CHANGE]        = { 1000 * SCALE_MS },
+    [QAPI_EVENT_WATCHDOG]          = { 1000 * SCALE_MS },
+    [QAPI_EVENT_BALLOON_CHANGE]    = { 1000 * SCALE_MS },
+    [QAPI_EVENT_QUORUM_REPORT_BAD] = { 1000 * SCALE_MS },
+    [QAPI_EVENT_QUORUM_FAILURE]    = { 1000 * SCALE_MS },
+    [QAPI_EVENT_VSERPORT_CHANGE]   = { 1000 * SCALE_MS },
+};
+
+GHashTable *monitor_qapi_event_state;
 
 /*
  * Emits the event to every monitor instance, @event is only used for trace
  * Called with monitor_lock held.
  */
-static void monitor_qapi_event_emit(QAPIEvent event, QObject *data)
+static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict)
 {
     Monitor *mon;
 
-    trace_monitor_protocol_event_emit(event, data);
+    trace_monitor_protocol_event_emit(event, qdict);
     QLIST_FOREACH(mon, &mon_list, entry) {
         if (monitor_is_qmp(mon) && mon->qmp.in_command_mode) {
-            monitor_json_emitter(mon, data);
+            monitor_json_emitter(mon, QOBJECT(qdict));
         }
     }
 }
 
+static void monitor_qapi_event_handler(void *opaque);
+
 /*
  * Queue a new event for emission to Monitor instances,
  * applying any rate limiting if required.
  */
 static void
-monitor_qapi_event_queue(QAPIEvent event, QDict *data, Error **errp)
+monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp)
 {
+    MonitorQAPIEventConf *evconf;
     MonitorQAPIEventState *evstate;
+
     assert(event < QAPI_EVENT_MAX);
-    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    evconf = &monitor_qapi_event_conf[event];
+    trace_monitor_protocol_event_queue(event, qdict, evconf->rate);
 
-    evstate = &(monitor_qapi_event_state[event]);
-    trace_monitor_protocol_event_queue(event,
-                                       data,
-                                       evstate->rate,
-                                       evstate->last,
-                                       now);
-
-    /* Rate limit of 0 indicates no throttling */
     qemu_mutex_lock(&monitor_lock);
-    if (!evstate->rate) {
-        monitor_qapi_event_emit(event, QOBJECT(data));
-        evstate->last = now;
+
+    if (!evconf->rate) {
+        /* Unthrottled event */
+        monitor_qapi_event_emit(event, qdict);
     } else {
-        int64_t delta = now - evstate->last;
-        if (evstate->data ||
-            delta < evstate->rate) {
-            /* If there's an existing event pending, replace
-             * it with the new event, otherwise schedule a
-             * timer for delayed emission
+        QDict *data = qobject_to_qdict(qdict_get(qdict, "data"));
+        MonitorQAPIEventState key = { .event = event, .data = data };
+
+        evstate = g_hash_table_lookup(monitor_qapi_event_state, &key);
+        assert(!evstate || timer_pending(evstate->timer));
+
+        if (evstate) {
+            /*
+             * Timer is pending for (at least) evconf->rate ns after
+             * last send.  Store event for sending when timer fires,
+             * replacing a prior stored event if any.
              */
-            if (evstate->data) {
-                qobject_decref(evstate->data);
-            } else {
-                int64_t then = evstate->last + evstate->rate;
-                timer_mod_ns(evstate->timer, then);
-            }
-            evstate->data = QOBJECT(data);
-            qobject_incref(evstate->data);
+            QDECREF(evstate->qdict);
+            evstate->qdict = qdict;
+            QINCREF(evstate->qdict);
         } else {
-            monitor_qapi_event_emit(event, QOBJECT(data));
-            evstate->last = now;
+            /*
+             * Last send was (at least) evconf->rate ns ago.
+             * Send immediately, and arm the timer to call
+             * monitor_qapi_event_handler() in evconf->rate ns.  Any
+             * events arriving before then will be delayed until then.
+             */
+            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+            monitor_qapi_event_emit(event, qdict);
+
+            evstate = g_new(MonitorQAPIEventState, 1);
+            evstate->event = event;
+            evstate->data = data;
+            QINCREF(evstate->data);
+            evstate->qdict = NULL;
+            evstate->timer = timer_new_ns(QEMU_CLOCK_REALTIME,
+                                          monitor_qapi_event_handler,
+                                          evstate);
+            g_hash_table_add(monitor_qapi_event_state, evstate);
+            timer_mod_ns(evstate->timer, now + evconf->rate);
         }
     }
+
     qemu_mutex_unlock(&monitor_lock);
 }
 
 /*
- * The callback invoked by QemuTimer when a delayed
- * event is ready to be emitted
+ * This function runs evconf->rate ns after sending a throttled
+ * event.
+ * If another event has since been stored, send it.
  */
 static void monitor_qapi_event_handler(void *opaque)
 {
     MonitorQAPIEventState *evstate = opaque;
-    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    MonitorQAPIEventConf *evconf = &monitor_qapi_event_conf[evstate->event];
 
-    trace_monitor_protocol_event_handler(evstate->event,
-                                         evstate->data,
-                                         evstate->last,
-                                         now);
+    trace_monitor_protocol_event_handler(evstate->event, evstate->qdict);
     qemu_mutex_lock(&monitor_lock);
-    if (evstate->data) {
-        monitor_qapi_event_emit(evstate->event, evstate->data);
-        qobject_decref(evstate->data);
-        evstate->data = NULL;
+
+    if (evstate->qdict) {
+        int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+        monitor_qapi_event_emit(evstate->event, evstate->qdict);
+        QDECREF(evstate->qdict);
+        evstate->qdict = NULL;
+        timer_mod_ns(evstate->timer, now + evconf->rate);
+    } else {
+        g_hash_table_remove(monitor_qapi_event_state, evstate);
+        QDECREF(evstate->data);
+        timer_free(evstate->timer);
+        g_free(evstate);
     }
-    evstate->last = now;
+
     qemu_mutex_unlock(&monitor_lock);
 }
 
-/*
- * @event: the event ID to be limited
- * @rate: the rate limit in milliseconds
- *
- * Sets a rate limit on a particular event, so no
- * more than 1 event will be emitted within @rate
- * milliseconds
- */
-static void
-monitor_qapi_event_throttle(QAPIEvent event, int64_t rate)
+static unsigned int qapi_event_throttle_hash(const void *key)
 {
-    MonitorQAPIEventState *evstate;
-    assert(event < QAPI_EVENT_MAX);
+    const MonitorQAPIEventState *evstate = key;
+    unsigned int hash = evstate->event * 255;
 
-    evstate = &(monitor_qapi_event_state[event]);
+    if (evstate->event == QAPI_EVENT_VSERPORT_CHANGE) {
+        hash += g_str_hash(qdict_get_str(evstate->data, "id"));
+    }
 
-    trace_monitor_protocol_event_throttle(event, rate);
-    evstate->event = event;
-    assert(rate * SCALE_MS <= INT64_MAX);
-    evstate->rate = rate * SCALE_MS;
-    evstate->last = 0;
-    evstate->data = NULL;
-    evstate->timer = timer_new(QEMU_CLOCK_REALTIME,
-                               SCALE_MS,
-                               monitor_qapi_event_handler,
-                               evstate);
+    return hash;
+}
+
+static gboolean qapi_event_throttle_equal(const void *a, const void *b)
+{
+    const MonitorQAPIEventState *eva = a;
+    const MonitorQAPIEventState *evb = b;
+
+    if (eva->event != evb->event) {
+        return FALSE;
+    }
+
+    if (eva->event == QAPI_EVENT_VSERPORT_CHANGE) {
+        return !strcmp(qdict_get_str(eva->data, "id"),
+                       qdict_get_str(evb->data, "id"));
+    }
+
+    return TRUE;
 }
 
 static void monitor_qapi_event_init(void)
 {
-    /* Limit guest-triggerable events to 1 per second */
-    monitor_qapi_event_throttle(QAPI_EVENT_RTC_CHANGE, 1000);
-    monitor_qapi_event_throttle(QAPI_EVENT_WATCHDOG, 1000);
-    monitor_qapi_event_throttle(QAPI_EVENT_BALLOON_CHANGE, 1000);
-    monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_REPORT_BAD, 1000);
-    monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_FAILURE, 1000);
-    monitor_qapi_event_throttle(QAPI_EVENT_VSERPORT_CHANGE, 1000);
-
+    monitor_qapi_event_state = g_hash_table_new(qapi_event_throttle_hash,
+                                                qapi_event_throttle_equal);
     qmp_event_set_func_emit(monitor_qapi_event_queue);
 }
 
@@ -4145,6 +4177,10 @@
         monitor_printf(mon, "Device not found %s\n", device);
         return -1;
     }
+    if (!blk_bs(blk)) {
+        monitor_printf(mon, "Device '%s' has no medium\n", device);
+        return -1;
+    }
 
     bdrv_add_key(blk_bs(blk), NULL, &err);
     if (err) {

diff --git a/nbd.c b/nbd.c
index fc34c44..b3d9654 100644
--- a/nbd.c
+++ b/nbd.c

@@ -1446,6 +1446,7 @@
 {
     if (client->exp && client->exp->ctx) {
         aio_set_fd_handler(client->exp->ctx, client->sock,
+                           true,
                            client->can_read ? nbd_read : NULL,
                            client->send_coroutine ? nbd_restart_write : NULL,
                            client);
@@ -1455,7 +1456,8 @@
 static void nbd_unset_handlers(NBDClient *client)
 {
     if (client->exp && client->exp->ctx) {
-        aio_set_fd_handler(client->exp->ctx, client->sock, NULL, NULL, NULL);
+        aio_set_fd_handler(client->exp->ctx, client->sock,
+                           true, NULL, NULL, NULL);
     }
 }
 

diff --git a/net/dump.c b/net/dump.c
index 08259af..dd0555f 100644
--- a/net/dump.c
+++ b/net/dump.c

@@ -25,12 +25,13 @@
 #include "clients.h"
 #include "qemu-common.h"
 #include "qemu/error-report.h"
+#include "qemu/iov.h"
 #include "qemu/log.h"
 #include "qemu/timer.h"
-#include "hub.h"
+#include "qapi/visitor.h"
+#include "net/filter.h"
 
 typedef struct DumpState {
-    NetClientState nc;
     int64_t start_ts;
     int fd;
     int pcap_caplen;
@@ -57,12 +58,13 @@
     uint32_t len;
 };
 
-static ssize_t dump_receive(NetClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt)
 {
-    DumpState *s = DO_UPCAST(DumpState, nc, nc);
     struct pcap_sf_pkthdr hdr;
     int64_t ts;
     int caplen;
+    size_t size = iov_size(iov, cnt);
+    struct iovec dumpiov[cnt + 1];
 
     /* Early return in case of previous error. */
     if (s->fd < 0) {
@@ -76,8 +78,12 @@
     hdr.ts.tv_usec = ts % 1000000;
     hdr.caplen = caplen;
     hdr.len = size;
-    if (write(s->fd, &hdr, sizeof(hdr)) != sizeof(hdr) ||
-        write(s->fd, buf, caplen) != caplen) {
+
+    dumpiov[0].iov_base = &hdr;
+    dumpiov[0].iov_len = sizeof(hdr);
+    cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, 0, caplen);
+
+    if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) {
         qemu_log("-net dump write error - stop dump\n");
         close(s->fd);
         s->fd = -1;
@@ -86,27 +92,16 @@
     return size;
 }
 
-static void dump_cleanup(NetClientState *nc)
+static void dump_cleanup(DumpState *s)
 {
-    DumpState *s = DO_UPCAST(DumpState, nc, nc);
-
     close(s->fd);
+    s->fd = -1;
 }
 
-static NetClientInfo net_dump_info = {
-    .type = NET_CLIENT_OPTIONS_KIND_DUMP,
-    .size = sizeof(DumpState),
-    .receive = dump_receive,
-    .cleanup = dump_cleanup,
-};
-
-static int net_dump_init(NetClientState *peer, const char *device,
-                         const char *name, const char *filename, int len,
-                         Error **errp)
+static int net_dump_state_init(DumpState *s, const char *filename,
+                               int len, Error **errp)
 {
     struct pcap_file_hdr hdr;
-    NetClientState *nc;
-    DumpState *s;
     struct tm tm;
     int fd;
 
@@ -130,13 +125,6 @@
         return -1;
     }
 
-    nc = qemu_new_net_client(&net_dump_info, peer, device, name);
-
-    snprintf(nc->info_str, sizeof(nc->info_str),
-             "dump to %s (len=%d)", filename, len);
-
-    s = DO_UPCAST(DumpState, nc, nc);
-
     s->fd = fd;
     s->pcap_caplen = len;
 
@@ -146,13 +134,58 @@
     return 0;
 }
 
+/* Dumping via VLAN netclient */
+
+struct DumpNetClient {
+    NetClientState nc;
+    DumpState ds;
+};
+typedef struct DumpNetClient DumpNetClient;
+
+static ssize_t dumpclient_receive(NetClientState *nc, const uint8_t *buf,
+                                  size_t size)
+{
+    DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc);
+    struct iovec iov = {
+        .iov_base = (void *)buf,
+        .iov_len = size
+    };
+
+    return dump_receive_iov(&dc->ds, &iov, 1);
+}
+
+static ssize_t dumpclient_receive_iov(NetClientState *nc,
+                                      const struct iovec *iov, int cnt)
+{
+    DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc);
+
+    return dump_receive_iov(&dc->ds, iov, cnt);
+}
+
+static void dumpclient_cleanup(NetClientState *nc)
+{
+    DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc);
+
+    dump_cleanup(&dc->ds);
+}
+
+static NetClientInfo net_dump_info = {
+    .type = NET_CLIENT_OPTIONS_KIND_DUMP,
+    .size = sizeof(DumpNetClient),
+    .receive = dumpclient_receive,
+    .receive_iov = dumpclient_receive_iov,
+    .cleanup = dumpclient_cleanup,
+};
+
 int net_init_dump(const NetClientOptions *opts, const char *name,
                   NetClientState *peer, Error **errp)
 {
-    int len;
+    int len, rc;
     const char *file;
     char def_file[128];
     const NetdevDumpOptions *dump;
+    NetClientState *nc;
+    DumpNetClient *dnc;
 
     assert(opts->kind == NET_CLIENT_OPTIONS_KIND_DUMP);
     dump = opts->dump;
@@ -182,5 +215,140 @@
         len = 65536;
     }
 
-    return net_dump_init(peer, "dump", name, file, len, errp);
+    nc = qemu_new_net_client(&net_dump_info, peer, "dump", name);
+    snprintf(nc->info_str, sizeof(nc->info_str),
+             "dump to %s (len=%d)", file, len);
+
+    dnc = DO_UPCAST(DumpNetClient, nc, nc);
+    rc = net_dump_state_init(&dnc->ds, file, len, errp);
+    if (rc) {
+        qemu_del_net_client(nc);
+    }
+    return rc;
 }
+
+/* Dumping via filter */
+
+#define TYPE_FILTER_DUMP "filter-dump"
+
+#define FILTER_DUMP(obj) \
+    OBJECT_CHECK(NetFilterDumpState, (obj), TYPE_FILTER_DUMP)
+
+struct NetFilterDumpState {
+    NetFilterState nfs;
+    DumpState ds;
+    char *filename;
+    uint32_t maxlen;
+};
+typedef struct NetFilterDumpState NetFilterDumpState;
+
+static ssize_t filter_dump_receive_iov(NetFilterState *nf, NetClientState *sndr,
+                                       unsigned flags, const struct iovec *iov,
+                                       int iovcnt, NetPacketSent *sent_cb)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(nf);
+
+    dump_receive_iov(&nfds->ds, iov, iovcnt);
+    return 0;
+}
+
+static void filter_dump_cleanup(NetFilterState *nf)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(nf);
+
+    dump_cleanup(&nfds->ds);
+}
+
+static void filter_dump_setup(NetFilterState *nf, Error **errp)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(nf);
+
+    if (!nfds->filename) {
+        error_setg(errp, "dump filter needs 'file' property set!");
+        return;
+    }
+
+    net_dump_state_init(&nfds->ds, nfds->filename, nfds->maxlen, errp);
+}
+
+static void filter_dump_get_maxlen(Object *obj, Visitor *v, void *opaque,
+                                   const char *name, Error **errp)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+    uint32_t value = nfds->maxlen;
+
+    visit_type_uint32(v, &value, name, errp);
+}
+
+static void filter_dump_set_maxlen(Object *obj, Visitor *v, void *opaque,
+                                   const char *name, Error **errp)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+    Error *local_err = NULL;
+    uint32_t value;
+
+    visit_type_uint32(v, &value, name, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    if (value == 0) {
+        error_setg(&local_err, "Property '%s.%s' doesn't take value '%u'",
+                   object_get_typename(obj), name, value);
+        goto out;
+    }
+    nfds->maxlen = value;
+
+out:
+    error_propagate(errp, local_err);
+}
+
+static char *file_dump_get_filename(Object *obj, Error **errp)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+    return g_strdup(nfds->filename);
+}
+
+static void file_dump_set_filename(Object *obj, const char *value, Error **errp)
+{
+   NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+    g_free(nfds->filename);
+    nfds->filename = g_strdup(value);
+}
+
+static void filter_dump_instance_init(Object *obj)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+    nfds->maxlen = 65536;
+
+    object_property_add(obj, "maxlen", "int", filter_dump_get_maxlen,
+                        filter_dump_set_maxlen, NULL, NULL, NULL);
+    object_property_add_str(obj, "file", file_dump_get_filename,
+                            file_dump_set_filename, NULL);
+}
+
+static void filter_dump_class_init(ObjectClass *oc, void *data)
+{
+    NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+    nfc->setup = filter_dump_setup;
+    nfc->cleanup = filter_dump_cleanup;
+    nfc->receive_iov = filter_dump_receive_iov;
+}
+
+static const TypeInfo filter_dump_info = {
+    .name = TYPE_FILTER_DUMP,
+    .parent = TYPE_NETFILTER,
+    .class_init = filter_dump_class_init,
+    .instance_init = filter_dump_instance_init,
+    .instance_size = sizeof(NetFilterDumpState),
+};
+
+static void filter_dump_register_types(void)
+{
+    type_register_static(&filter_dump_info);
+}
+
+type_init(filter_dump_register_types);

diff --git a/net/net.c b/net/net.c
index 3c68f3f..a3e9d1a 100644
--- a/net/net.c
+++ b/net/net.c

@@ -708,7 +708,7 @@
         offset = iov[0].iov_len;
     } else {
         buffer = buf;
-        offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer));
+        offset = iov_to_buf(iov, iovcnt, 0, buf, sizeof(buf));
     }
 
     if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
@@ -1197,10 +1197,11 @@
         monitor_printf(mon, "filters:\n");
     }
     QTAILQ_FOREACH(nf, &nc->filters, next) {
-        monitor_printf(mon, "  - %s: type=%s%s\n",
-                       object_get_canonical_path_component(OBJECT(nf)),
+        char *path = object_get_canonical_path_component(OBJECT(nf));
+        monitor_printf(mon, "  - %s: type=%s%s\n", path,
                        object_get_typename(OBJECT(nf)),
                        nf->info_str);
+        g_free(path);
     }
 }
 
@@ -1227,6 +1228,12 @@
             continue;
         }
 
+        /* only query information on queue 0 since the info is per nic,
+         * not per queue
+         */
+        if (nc->queue_index != 0)
+            continue;
+
         if (nc->info->query_rx_filter) {
             info = nc->info->query_rx_filter(nc);
             entry = g_malloc0(sizeof(*entry));

diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index 540e45a..a419bfd 100644
--- a/pc-bios/openbios-ppc
+++ b/pc-bios/openbios-ppc
Binary files differ

diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index 0da1188..cf52787 100644
--- a/pc-bios/openbios-sparc32
+++ b/pc-bios/openbios-sparc32
Binary files differ

diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index 9bf3ce5..08eecdf 100644
--- a/pc-bios/openbios-sparc64
+++ b/pc-bios/openbios-sparc64
Binary files differ

diff --git a/qapi/block-core.json b/qapi/block-core.json
index bb2189e..425fdab 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json

@@ -215,10 +215,11 @@
 # @drv: the name of the block format used to open the backing device. As of
 #       0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg',
 #       'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
-#       'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
+#       'http', 'https', 'nbd', 'parallels', 'qcow',
 #       'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
 #       2.2: 'archipelago' added, 'cow' dropped
 #       2.3: 'host_floppy' deprecated
+#       2.5: 'host_floppy' dropped
 #
 # @backing_file: #optional the name of the backing file (for copy-on-write)
 #
@@ -1373,15 +1374,14 @@
 #
 # Drivers that are supported in block device operations.
 #
-# @host_device, @host_cdrom, @host_floppy: Since 2.1
-# @host_floppy: deprecated since 2.3
+# @host_device, @host_cdrom: Since 2.1
 #
 # Since: 2.0
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
             'dmg', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
-            'host_floppy', 'http', 'https', 'null-aio', 'null-co', 'parallels',
+            'http', 'https', 'null-aio', 'null-co', 'parallels',
             'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp', 'vdi', 'vhdx',
             'vmdk', 'vpc', 'vvfat' ] }
 
@@ -1393,9 +1393,12 @@
 #
 # @driver:        block driver name
 # @id:            #optional id by which the new block device can be referred to.
-#                 This is a required option on the top level of blockdev-add, and
-#                 currently not allowed on any other level.
-# @node-name:     #optional the name of a block driver state node (Since 2.0)
+#                 This option is only allowed on the top level of blockdev-add.
+#                 A BlockBackend will be created by blockdev-add if and only if
+#                 this option is given.
+# @node-name:     #optional the name of a block driver state node (Since 2.0).
+#                 This option is required on the top level of blockdev-add if
+#                 the @id option is not given there.
 # @discard:       #optional discard-related options (default: ignore)
 # @cache:         #optional cache-related options
 # @aio:           #optional AIO backend (default: threads)
@@ -1816,7 +1819,6 @@
 # TODO gluster: Wait for structured options
       'host_cdrom': 'BlockdevOptionsFile',
       'host_device':'BlockdevOptionsFile',
-      'host_floppy':'BlockdevOptionsFile',
       'http':       'BlockdevOptionsFile',
       'https':      'BlockdevOptionsFile',
 # TODO iscsi: Wait for structured options
@@ -1860,7 +1862,9 @@
 ##
 # @blockdev-add:
 #
-# Creates a new block device.
+# Creates a new block device. If the @id option is given at the top level, a
+# BlockBackend will be created; otherwise, @node-name is mandatory at the top
+# level and no BlockBackend will be created.
 #
 # This command is still a work in progress.  It doesn't support all
 # block drivers, it lacks a matching blockdev-del, and more.  Stay

diff --git a/qapi/qmp-input-visitor.c b/qapi/qmp-input-visitor.c
index 5dd9ed5..eb6e110 100644
--- a/qapi/qmp-input-visitor.c
+++ b/qapi/qmp-input-visitor.c

@@ -225,45 +225,45 @@
                                Error **errp)
 {
     QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QInt *qint = qobject_to_qint(qmp_input_get_object(qiv, name, true));
 
-    if (!qobj || qobject_type(qobj) != QTYPE_QINT) {
+    if (!qint) {
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
                    "integer");
         return;
     }
 
-    *obj = qint_get_int(qobject_to_qint(qobj));
+    *obj = qint_get_int(qint);
 }
 
 static void qmp_input_type_bool(Visitor *v, bool *obj, const char *name,
                                 Error **errp)
 {
     QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QBool *qbool = qobject_to_qbool(qmp_input_get_object(qiv, name, true));
 
-    if (!qobj || qobject_type(qobj) != QTYPE_QBOOL) {
+    if (!qbool) {
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
                    "boolean");
         return;
     }
 
-    *obj = qbool_get_bool(qobject_to_qbool(qobj));
+    *obj = qbool_get_bool(qbool);
 }
 
 static void qmp_input_type_str(Visitor *v, char **obj, const char *name,
                                Error **errp)
 {
     QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QString *qstr = qobject_to_qstring(qmp_input_get_object(qiv, name, true));
 
-    if (!qobj || qobject_type(qobj) != QTYPE_QSTRING) {
+    if (!qstr) {
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
                    "string");
         return;
     }
 
-    *obj = g_strdup(qstring_get_str(qobject_to_qstring(qobj)));
+    *obj = g_strdup(qstring_get_str(qstr));
 }
 
 static void qmp_input_type_number(Visitor *v, double *obj, const char *name,
@@ -271,19 +271,23 @@
 {
     QmpInputVisitor *qiv = to_qiv(v);
     QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QInt *qint;
+    QFloat *qfloat;
 
-    if (!qobj || (qobject_type(qobj) != QTYPE_QFLOAT &&
-        qobject_type(qobj) != QTYPE_QINT)) {
-        error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
-                   "number");
+    qint = qobject_to_qint(qobj);
+    if (qint) {
+        *obj = qint_get_int(qobject_to_qint(qobj));
         return;
     }
 
-    if (qobject_type(qobj) == QTYPE_QINT) {
-        *obj = qint_get_int(qobject_to_qint(qobj));
-    } else {
+    qfloat = qobject_to_qfloat(qobj);
+    if (qfloat) {
         *obj = qfloat_get_double(qobject_to_qfloat(qobj));
+        return;
     }
+
+    error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
+               "number");
 }
 
 static void qmp_input_type_any(Visitor *v, QObject **obj, const char *name,

diff --git a/qemu-char.c b/qemu-char.c
index 908e712..c4eb4ee 100644
--- a/qemu-char.c
+++ b/qemu-char.c

@@ -3876,9 +3876,8 @@
     s->avail_connections++;
 }
 
-void qemu_chr_delete(CharDriverState *chr)
+void qemu_chr_free(CharDriverState *chr)
 {
-    QTAILQ_REMOVE(&chardevs, chr, next);
     if (chr->chr_close) {
         chr->chr_close(chr);
     }
@@ -3888,6 +3887,12 @@
     g_free(chr);
 }
 
+void qemu_chr_delete(CharDriverState *chr)
+{
+    QTAILQ_REMOVE(&chardevs, chr, next);
+    qemu_chr_free(chr);
+}
+
 ChardevInfoList *qmp_query_chardev(Error **errp)
 {
     ChardevInfoList *chr_list = NULL;

diff --git a/qemu-doc.texi b/qemu-doc.texi
index 5b81aa0..3126abd 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi

@@ -1266,9 +1266,13 @@
 memory server is:
 
 @example
-qemu-system-i386 -device ivshmem,size=<size in format accepted by -m>[,chardev=<id>]
-                 [,msi=on][,ioeventfd=on][,vectors=n][,role=peer|master]
-qemu-system-i386 -chardev socket,path=<path>,id=<id>
+# First start the ivshmem server once and for all
+ivshmem-server -p <pidfile> -S <path> -m <shm name> -l <shm size> -n <vectors n>
+
+# Then start your qemu instances with matching arguments
+qemu-system-i386 -device ivshmem,size=<shm size>,vectors=<vectors n>,chardev=<id>
+                 [,msi=on][,ioeventfd=on][,role=peer|master]
+                 -chardev socket,path=<path>,id=<id>
 @end example
 
 When using the server, the guest will be assigned a VM ID (>=0) that allows guests
@@ -1289,6 +1293,19 @@
 With the @option{peer} case, the device should be detached and then reattached
 after migration using the PCI hotplug support.
 
+@subsubsection ivshmem and hugepages
+
+Instead of specifying the <shm size> using POSIX shm, you may specify
+a memory backend that has hugepage support:
+
+@example
+qemu-system-i386 -object memory-backend-file,size=1G,mem-path=/mnt/hugepages,id=mb1
+                 -device ivshmem,memdev=mb1
+@end example
+
+ivshmem-server also supports hugepages mount points with the
+@option{-m} memory path argument.
+
 @node direct_linux_boot
 @section Direct Linux Boot
 

diff --git a/qemu-log.c b/qemu-log.c
index 13f3813..efd07c8 100644
--- a/qemu-log.c
+++ b/qemu-log.c

@@ -119,6 +119,9 @@
     { LOG_GUEST_ERROR, "guest_errors",
       "log when the guest OS does something invalid (eg accessing a\n"
       "non-existent register)" },
+    { CPU_LOG_TB_NOCHAIN, "nochain",
+      "do not chain compiled TBs so that \"exec\" and \"cpu\" show\n"
+      "complete traces" },
     { 0, NULL, NULL },
 };
 

diff --git a/qemu-options.hx b/qemu-options.hx
index edee5f4..949db7f 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx

@@ -2012,6 +2012,7 @@
 Dump network traffic on VLAN @var{n} to file @var{file} (@file{qemu-vlan0.pcap} by default).
 At most @var{len} bytes (64k by default) per packet are stored. The file format is
 libpcap, so it can be analyzed with tools such as tcpdump or Wireshark.
+Note: For devices created with '-netdev', use '-object filter-dump,...' instead.
 
 @item -net none
 Indicate that no network devices should be configured. It is used to
@@ -3665,6 +3666,13 @@
 @option{tx}: the filter is attached to the transmit queue of the netdev,
              where it will receive packets sent by the netdev.
 
+@item -object filter-dump,id=@var{id},netdev=@var{dev},file=@var{filename}][,maxlen=@var{len}]
+
+Dump the network traffic on netdev @var{dev} to the file specified by
+@var{filename}. At most @var{len} bytes (64k by default) per packet are stored.
+The file format is libpcap, so it can be analyzed with tools such as tcpdump
+or Wireshark.
+
 @end table
 
 ETEXI

diff --git a/qga/main.c b/qga/main.c
index 068169f..d2a0ffc 100644
--- a/qga/main.c
+++ b/qga/main.c

@@ -573,7 +573,6 @@
 static void process_event(JSONMessageParser *parser, QList *tokens)
 {
     GAState *s = container_of(parser, GAState, parser);
-    QObject *obj;
     QDict *qdict;
     Error *err = NULL;
     int ret;
@@ -581,9 +580,9 @@
     g_assert(s && parser);
 
     g_debug("process_event: called");
-    obj = json_parser_parse_err(tokens, NULL, &err);
-    if (err || !obj || qobject_type(obj) != QTYPE_QDICT) {
-        qobject_decref(obj);
+    qdict = qobject_to_qdict(json_parser_parse_err(tokens, NULL, &err));
+    if (err || !qdict) {
+        QDECREF(qdict);
         qdict = qdict_new();
         if (!err) {
             g_warning("failed to parse event: unknown error");
@@ -593,12 +592,8 @@
         }
         qdict_put_obj(qdict, "error", qmp_build_error_object(err));
         error_free(err);
-    } else {
-        qdict = qobject_to_qdict(obj);
     }
 
-    g_assert(qdict);
-
     /* handle host->guest commands */
     if (qdict_haskey(qdict, "execute")) {
         process_command(s, qdict);

diff --git a/qmp-commands.hx b/qmp-commands.hx
index 2b52980..d7cf0ff 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx

@@ -2520,8 +2520,8 @@
     - "wr_total_time_ns": total time spend on writes in nano-seconds (json-int)
     - "rd_total_time_ns": total time spend on reads in nano-seconds (json-int)
     - "flush_total_time_ns": total time spend on cache flushes in nano-seconds (json-int)
-    - "wr_highest_offset": Highest offset of a sector written since the
-                           BlockDriverState has been opened (json-int)
+    - "wr_highest_offset": The offset after the greatest byte written to the
+                           BlockDriverState since it has been opened (json-int)
     - "rd_merged": number of read requests that have been merged into
                    another request (json-int)
     - "wr_merged": number of write requests that have been merged into

diff --git a/qmp.c b/qmp.c
index d9ecede..ff54e5a 100644
--- a/qmp.c
+++ b/qmp.c

@@ -24,6 +24,7 @@
 #include "sysemu/arch_init.h"
 #include "hw/qdev.h"
 #include "sysemu/blockdev.h"
+#include "sysemu/block-backend.h"
 #include "qom/qom-qobject.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qobject.h"
@@ -170,6 +171,7 @@
 void qmp_cont(Error **errp)
 {
     Error *local_err = NULL;
+    BlockBackend *blk;
     BlockDriverState *bs;
 
     if (runstate_needs_reset()) {
@@ -179,8 +181,8 @@
         return;
     }
 
-    for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
-        bdrv_iostatus_reset(bs);
+    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
+        blk_iostatus_reset(blk);
     }
     for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
         bdrv_add_key(bs, NULL, &local_err);

diff --git a/qobject/qbool.c b/qobject/qbool.c
index 5ff69f0..bc6535f 100644
--- a/qobject/qbool.c
+++ b/qobject/qbool.c

@@ -51,9 +51,9 @@
  */
 QBool *qobject_to_qbool(const QObject *obj)
 {
-    if (qobject_type(obj) != QTYPE_QBOOL)
+    if (!obj || qobject_type(obj) != QTYPE_QBOOL) {
         return NULL;
-
+    }
     return container_of(obj, QBool, base);
 }
 

diff --git a/qobject/qdict.c b/qobject/qdict.c
index 67b1a58..2d67bf1 100644
--- a/qobject/qdict.c
+++ b/qobject/qdict.c

@@ -46,9 +46,9 @@
  */
 QDict *qobject_to_qdict(const QObject *obj)
 {
-    if (qobject_type(obj) != QTYPE_QDICT)
+    if (!obj || qobject_type(obj) != QTYPE_QDICT) {
         return NULL;
-
+    }
     return container_of(obj, QDict, base);
 }
 
@@ -229,8 +229,7 @@
  */
 int64_t qdict_get_int(const QDict *qdict, const char *key)
 {
-    QObject *obj = qdict_get_obj(qdict, key, QTYPE_QINT);
-    return qint_get_int(qobject_to_qint(obj));
+    return qint_get_int(qobject_to_qint(qdict_get(qdict, key)));
 }
 
 /**
@@ -243,8 +242,7 @@
  */
 bool qdict_get_bool(const QDict *qdict, const char *key)
 {
-    QObject *obj = qdict_get_obj(qdict, key, QTYPE_QBOOL);
-    return qbool_get_bool(qobject_to_qbool(obj));
+    return qbool_get_bool(qobject_to_qbool(qdict_get(qdict, key)));
 }
 
 /**
@@ -270,7 +268,7 @@
  */
 QDict *qdict_get_qdict(const QDict *qdict, const char *key)
 {
-    return qobject_to_qdict(qdict_get_obj(qdict, key, QTYPE_QDICT));
+    return qobject_to_qdict(qdict_get(qdict, key));
 }
 
 /**
@@ -284,8 +282,7 @@
  */
 const char *qdict_get_str(const QDict *qdict, const char *key)
 {
-    QObject *obj = qdict_get_obj(qdict, key, QTYPE_QSTRING);
-    return qstring_get_str(qobject_to_qstring(obj));
+    return qstring_get_str(qobject_to_qstring(qdict_get(qdict, key)));
 }
 
 /**
@@ -298,13 +295,9 @@
 int64_t qdict_get_try_int(const QDict *qdict, const char *key,
                           int64_t def_value)
 {
-    QObject *obj;
+    QInt *qint = qobject_to_qint(qdict_get(qdict, key));
 
-    obj = qdict_get(qdict, key);
-    if (!obj || qobject_type(obj) != QTYPE_QINT)
-        return def_value;
-
-    return qint_get_int(qobject_to_qint(obj));
+    return qint ? qint_get_int(qint) : def_value;
 }
 
 /**
@@ -316,13 +309,9 @@
  */
 bool qdict_get_try_bool(const QDict *qdict, const char *key, bool def_value)
 {
-    QObject *obj;
+    QBool *qbool = qobject_to_qbool(qdict_get(qdict, key));
 
-    obj = qdict_get(qdict, key);
-    if (!obj || qobject_type(obj) != QTYPE_QBOOL)
-        return def_value;
-
-    return qbool_get_bool(qobject_to_qbool(obj));
+    return qbool ? qbool_get_bool(qbool) : def_value;
 }
 
 /**
@@ -335,13 +324,9 @@
  */
 const char *qdict_get_try_str(const QDict *qdict, const char *key)
 {
-    QObject *obj;
+    QString *qstr = qobject_to_qstring(qdict_get(qdict, key));
 
-    obj = qdict_get(qdict, key);
-    if (!obj || qobject_type(obj) != QTYPE_QSTRING)
-        return NULL;
-
-    return qstring_get_str(qobject_to_qstring(obj));
+    return qstr ? qstring_get_str(qstr) : NULL;
 }
 
 /**

diff --git a/qobject/qfloat.c b/qobject/qfloat.c
index 7de0992..c865163 100644
--- a/qobject/qfloat.c
+++ b/qobject/qfloat.c

@@ -51,9 +51,9 @@
  */
 QFloat *qobject_to_qfloat(const QObject *obj)
 {
-    if (qobject_type(obj) != QTYPE_QFLOAT)
+    if (!obj || qobject_type(obj) != QTYPE_QFLOAT) {
         return NULL;
-
+    }
     return container_of(obj, QFloat, base);
 }
 

diff --git a/qobject/qint.c b/qobject/qint.c
index 86b9b04..999688e 100644
--- a/qobject/qint.c
+++ b/qobject/qint.c

@@ -50,9 +50,9 @@
  */
 QInt *qobject_to_qint(const QObject *obj)
 {
-    if (qobject_type(obj) != QTYPE_QINT)
+    if (!obj || qobject_type(obj) != QTYPE_QINT) {
         return NULL;
-
+    }
     return container_of(obj, QInt, base);
 }
 

diff --git a/qobject/qlist.c b/qobject/qlist.c
index 1ced0de..298003a 100644
--- a/qobject/qlist.c
+++ b/qobject/qlist.c

@@ -142,10 +142,9 @@
  */
 QList *qobject_to_qlist(const QObject *obj)
 {
-    if (qobject_type(obj) != QTYPE_QLIST) {
+    if (!obj || qobject_type(obj) != QTYPE_QLIST) {
         return NULL;
     }
-
     return container_of(obj, QList, base);
 }
 

diff --git a/qobject/qstring.c b/qobject/qstring.c
index 607b7a1..cb72dfb 100644
--- a/qobject/qstring.c
+++ b/qobject/qstring.c

@@ -117,9 +117,9 @@
  */
 QString *qobject_to_qstring(const QObject *obj)
 {
-    if (qobject_type(obj) != QTYPE_QSTRING)
+    if (!obj || qobject_type(obj) != QTYPE_QSTRING) {
         return NULL;
-
+    }
     return container_of(obj, QString, base);
 }
 

diff --git a/roms/openbios b/roms/openbios
index 18f02b1..3caee17 160000
--- a/roms/openbios
+++ b/roms/openbios

@@ -1 +1 @@
-Subproject commit 18f02b14de795c1aab4fe23c1810bfd0944da6aa
+Subproject commit 3caee1794ac3f742315823d8447d21f33ce019e9

diff --git a/scripts/qemu-gdb.py b/scripts/qemu-gdb.py
index d6f2e5a..b3f8e04 100644
--- a/scripts/qemu-gdb.py
+++ b/scripts/qemu-gdb.py

@@ -26,7 +26,7 @@
 
 sys.path.append(os.path.dirname(__file__))
 
-from qemugdb import mtree, coroutine
+from qemugdb import aio, mtree, coroutine
 
 class QemuCommand(gdb.Command):
     '''Prefix for QEMU debug support commands'''
@@ -37,6 +37,10 @@
 QemuCommand()
 coroutine.CoroutineCommand()
 mtree.MtreeCommand()
+aio.HandlersCommand()
+
+coroutine.CoroutineSPFunction()
+coroutine.CoroutinePCFunction()
 
 # Default to silently passing through SIGUSR1, because QEMU sends it
 # to itself a lot.

diff --git a/scripts/qemugdb/aio.py b/scripts/qemugdb/aio.py
new file mode 100644
index 0000000..2ba00c4
--- /dev/null
+++ b/scripts/qemugdb/aio.py

@@ -0,0 +1,58 @@
+#!/usr/bin/python
+
+# GDB debugging support: aio/iohandler debug
+#
+# Copyright (c) 2015 Red Hat, Inc.
+#
+# Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+#
+
+import gdb
+from qemugdb import coroutine
+
+def isnull(ptr):
+    return ptr == gdb.Value(0).cast(ptr.type)
+
+def dump_aiocontext(context, verbose):
+    '''Display a dump and backtrace for an aiocontext'''
+    cur = context['aio_handlers']['lh_first']
+    # Get pointers to functions we're going to process specially
+    sym_fd_coroutine_enter = gdb.parse_and_eval('fd_coroutine_enter')
+
+    while not isnull(cur):
+        entry = cur.dereference()
+        gdb.write('----\n%s\n' % entry)
+        if verbose and cur['io_read'] == sym_fd_coroutine_enter:
+            coptr = (cur['opaque'].cast(gdb.lookup_type('FDYieldUntilData').pointer()))['co']
+            coptr = coptr.cast(gdb.lookup_type('CoroutineUContext').pointer())
+            coroutine.bt_jmpbuf(coptr['env']['__jmpbuf'])
+        cur = cur['node']['le_next'];
+
+    gdb.write('----\n')
+
+class HandlersCommand(gdb.Command):
+    '''Display aio handlers'''
+    def __init__(self):
+        gdb.Command.__init__(self, 'qemu handlers', gdb.COMMAND_DATA,
+                             gdb.COMPLETE_NONE)
+
+    def invoke(self, arg, from_tty):
+        verbose = False
+        argv = gdb.string_to_argv(arg)
+
+        if len(argv) > 0 and argv[0] == '--verbose':
+            verbose = True
+            argv.pop(0)
+
+        if len(argv) > 1:
+            gdb.write('usage: qemu handlers [--verbose] [handler]\n')
+            return
+
+        if len(argv) == 1:
+            handlers_name = argv[0]
+        else:
+            handlers_name = 'qemu_aio_context'
+        dump_aiocontext(gdb.parse_and_eval(handlers_name), verbose)

diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py
index 3c54918..ab69979 100644
--- a/scripts/qemugdb/coroutine.py
+++ b/scripts/qemugdb/coroutine.py

@@ -15,8 +15,11 @@
 
 import gdb
 
+VOID_PTR = gdb.lookup_type('void').pointer()
+
 def get_fs_base():
-    '''Fetch %fs base value using arch_prctl(ARCH_GET_FS)'''
+    '''Fetch %fs base value using arch_prctl(ARCH_GET_FS).  This is
+       pthread_self().'''
     # %rsp - 120 is scratch space according to the SystemV ABI
     old = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)')
     gdb.execute('call arch_prctl(0x1003, $rsp - 120)', False, True)
@@ -24,17 +27,29 @@
     gdb.execute('set *(uint64_t*)($rsp - 120) = %s' % old, False, True)
     return fs_base
 
+def pthread_self():
+    '''Fetch pthread_self() from the glibc start_thread function.'''
+    f = gdb.newest_frame()
+    while f.name() != 'start_thread':
+        f = f.older()
+        if f is None:
+            return get_fs_base()
+
+    try:
+        return f.read_var("arg")
+    except ValueError:
+        return get_fs_base()
+
 def get_glibc_pointer_guard():
     '''Fetch glibc pointer guard value'''
-    fs_base = get_fs_base()
+    fs_base = pthread_self()
     return gdb.parse_and_eval('*(uint64_t*)((uint64_t)%s + 0x30)' % fs_base)
 
 def glibc_ptr_demangle(val, pointer_guard):
     '''Undo effect of glibc's PTR_MANGLE()'''
     return gdb.parse_and_eval('(((uint64_t)%s >> 0x11) | ((uint64_t)%s << (64 - 0x11))) ^ (uint64_t)%s' % (val, val, pointer_guard))
 
-def bt_jmpbuf(jmpbuf):
-    '''Backtrace a jmpbuf'''
+def get_jmpbuf_regs(jmpbuf):
     JB_RBX  = 0
     JB_RBP  = 1
     JB_R12  = 2
@@ -44,35 +59,35 @@
     JB_RSP  = 6
     JB_PC   = 7
 
-    old_rbx = gdb.parse_and_eval('(uint64_t)$rbx')
-    old_rbp = gdb.parse_and_eval('(uint64_t)$rbp')
-    old_rsp = gdb.parse_and_eval('(uint64_t)$rsp')
-    old_r12 = gdb.parse_and_eval('(uint64_t)$r12')
-    old_r13 = gdb.parse_and_eval('(uint64_t)$r13')
-    old_r14 = gdb.parse_and_eval('(uint64_t)$r14')
-    old_r15 = gdb.parse_and_eval('(uint64_t)$r15')
-    old_rip = gdb.parse_and_eval('(uint64_t)$rip')
-
     pointer_guard = get_glibc_pointer_guard()
-    gdb.execute('set $rbx = %s' % jmpbuf[JB_RBX])
-    gdb.execute('set $rbp = %s' % glibc_ptr_demangle(jmpbuf[JB_RBP], pointer_guard))
-    gdb.execute('set $rsp = %s' % glibc_ptr_demangle(jmpbuf[JB_RSP], pointer_guard))
-    gdb.execute('set $r12 = %s' % jmpbuf[JB_R12])
-    gdb.execute('set $r13 = %s' % jmpbuf[JB_R13])
-    gdb.execute('set $r14 = %s' % jmpbuf[JB_R14])
-    gdb.execute('set $r15 = %s' % jmpbuf[JB_R15])
-    gdb.execute('set $rip = %s' % glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard))
+    return {'rbx': jmpbuf[JB_RBX],
+        'rbp': glibc_ptr_demangle(jmpbuf[JB_RBP], pointer_guard),
+        'rsp': glibc_ptr_demangle(jmpbuf[JB_RSP], pointer_guard),
+        'r12': jmpbuf[JB_R12],
+        'r13': jmpbuf[JB_R13],
+        'r14': jmpbuf[JB_R14],
+        'r15': jmpbuf[JB_R15],
+        'rip': glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard) }
+
+def bt_jmpbuf(jmpbuf):
+    '''Backtrace a jmpbuf'''
+    regs = get_jmpbuf_regs(jmpbuf)
+    old = dict()
+
+    for i in regs:
+        old[i] = gdb.parse_and_eval('(uint64_t)$%s' % i)
+
+    for i in regs:
+        gdb.execute('set $%s = %s' % (i, regs[i]))
 
     gdb.execute('bt')
 
-    gdb.execute('set $rbx = %s' % old_rbx)
-    gdb.execute('set $rbp = %s' % old_rbp)
-    gdb.execute('set $rsp = %s' % old_rsp)
-    gdb.execute('set $r12 = %s' % old_r12)
-    gdb.execute('set $r13 = %s' % old_r13)
-    gdb.execute('set $r14 = %s' % old_r14)
-    gdb.execute('set $r15 = %s' % old_r15)
-    gdb.execute('set $rip = %s' % old_rip)
+    for i in regs:
+        gdb.execute('set $%s = %s' % (i, old[i]))
+
+def coroutine_to_jmpbuf(co):
+    coroutine_pointer = co.cast(gdb.lookup_type('CoroutineUContext').pointer())
+    return coroutine_pointer['env']['__jmpbuf']
 
 
 class CoroutineCommand(gdb.Command):
@@ -87,5 +102,18 @@
             gdb.write('usage: qemu coroutine <coroutine-pointer>\n')
             return
 
-        coroutine_pointer = gdb.parse_and_eval(argv[0]).cast(gdb.lookup_type('CoroutineUContext').pointer())
-        bt_jmpbuf(coroutine_pointer['env']['__jmpbuf'])
+        bt_jmpbuf(coroutine_to_jmpbuf(gdb.parse_and_eval(argv[0])))
+
+class CoroutineSPFunction(gdb.Function):
+    def __init__(self):
+        gdb.Function.__init__(self, 'qemu_coroutine_sp')
+
+    def invoke(self, addr):
+        return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rsp'].cast(VOID_PTR)
+
+class CoroutinePCFunction(gdb.Function):
+    def __init__(self):
+        gdb.Function.__init__(self, 'qemu_coroutine_pc')
+
+    def invoke(self, addr):
+        return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rip'].cast(VOID_PTR)

diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index f936d1b..87950c6 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c

@@ -2917,6 +2917,11 @@
 
         if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
             gen_excp(&ctx, EXCP_DEBUG, 0);
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            ctx.pc += 4;
             break;
         }
         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 3daa7f5..815fef8 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h

@@ -279,6 +279,7 @@
             };
             uint64_t far_el[4];
         };
+        uint64_t hpfar_el2;
         union { /* Translation result. */
             struct {
                 uint64_t _unused_par_0;
@@ -1525,8 +1526,6 @@
     CPUARMState *env = cs->env_ptr;
     unsigned int cur_el = arm_current_el(env);
     bool secure = arm_is_secure(env);
-    bool scr;
-    bool hcr;
     bool pstate_unmasked;
     int8_t unmasked = 0;
 
@@ -1540,31 +1539,10 @@
 
     switch (excp_idx) {
     case EXCP_FIQ:
-        /* If FIQs are routed to EL3 or EL2 then there are cases where we
-         * override the CPSR.F in determining if the exception is masked or
-         * not.  If neither of these are set then we fall back to the CPSR.F
-         * setting otherwise we further assess the state below.
-         */
-        hcr = (env->cp15.hcr_el2 & HCR_FMO);
-        scr = (env->cp15.scr_el3 & SCR_FIQ);
-
-        /* When EL3 is 32-bit, the SCR.FW bit controls whether the CPSR.F bit
-         * masks FIQ interrupts when taken in non-secure state.  If SCR.FW is
-         * set then FIQs can be masked by CPSR.F when non-secure but only
-         * when FIQs are only routed to EL3.
-         */
-        scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr);
         pstate_unmasked = !(env->daif & PSTATE_F);
         break;
 
     case EXCP_IRQ:
-        /* When EL3 execution state is 32-bit, if HCR.IMO is set then we may
-         * override the CPSR.I masking when in non-secure state.  The SCR.IRQ
-         * setting has already been taken into consideration when setting the
-         * target EL, so it does not have a further affect here.
-         */
-        hcr = (env->cp15.hcr_el2 & HCR_IMO);
-        scr = false;
         pstate_unmasked = !(env->daif & PSTATE_I);
         break;
 
@@ -1589,13 +1567,58 @@
      * interrupt.
      */
     if ((target_el > cur_el) && (target_el != 1)) {
-        /* ARM_FEATURE_AARCH64 enabled means the highest EL is AArch64.
-         * This code currently assumes that EL2 is not implemented
-         * (and so that highest EL will be 3 and the target_el also 3).
-         */
-        if (arm_feature(env, ARM_FEATURE_AARCH64) ||
-            ((scr || hcr) && (!secure))) {
-            unmasked = 1;
+        /* Exceptions targeting a higher EL may not be maskable */
+        if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+            /* 64-bit masking rules are simple: exceptions to EL3
+             * can't be masked, and exceptions to EL2 can only be
+             * masked from Secure state. The HCR and SCR settings
+             * don't affect the masking logic, only the interrupt routing.
+             */
+            if (target_el == 3 || !secure) {
+                unmasked = 1;
+            }
+        } else {
+            /* The old 32-bit-only environment has a more complicated
+             * masking setup. HCR and SCR bits not only affect interrupt
+             * routing but also change the behaviour of masking.
+             */
+            bool hcr, scr;
+
+            switch (excp_idx) {
+            case EXCP_FIQ:
+                /* If FIQs are routed to EL3 or EL2 then there are cases where
+                 * we override the CPSR.F in determining if the exception is
+                 * masked or not. If neither of these are set then we fall back
+                 * to the CPSR.F setting otherwise we further assess the state
+                 * below.
+                 */
+                hcr = (env->cp15.hcr_el2 & HCR_FMO);
+                scr = (env->cp15.scr_el3 & SCR_FIQ);
+
+                /* When EL3 is 32-bit, the SCR.FW bit controls whether the
+                 * CPSR.F bit masks FIQ interrupts when taken in non-secure
+                 * state. If SCR.FW is set then FIQs can be masked by CPSR.F
+                 * when non-secure but only when FIQs are only routed to EL3.
+                 */
+                scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr);
+                break;
+            case EXCP_IRQ:
+                /* When EL3 execution state is 32-bit, if HCR.IMO is set then
+                 * we may override the CPSR.I masking when in non-secure state.
+                 * The SCR.IRQ setting has already been taken into consideration
+                 * when setting the target EL, so it does not have a further
+                 * affect here.
+                 */
+                hcr = (env->cp15.hcr_el2 & HCR_IMO);
+                scr = false;
+                break;
+            default:
+                g_assert_not_reached();
+            }
+
+            if ((scr || hcr) && !secure) {
+                unmasked = 1;
+            }
         }
     }
 

diff --git a/target-arm/helper.c b/target-arm/helper.c
index e7fda37..1966f9c 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c

@@ -15,10 +15,17 @@
 #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */
 
 #ifndef CONFIG_USER_ONLY
-static inline bool get_phys_addr(CPUARMState *env, target_ulong address,
-                                 int access_type, ARMMMUIdx mmu_idx,
-                                 hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
-                                 target_ulong *page_size, uint32_t *fsr);
+static bool get_phys_addr(CPUARMState *env, target_ulong address,
+                          int access_type, ARMMMUIdx mmu_idx,
+                          hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
+                          target_ulong *page_size, uint32_t *fsr,
+                          ARMMMUFaultInfo *fi);
+
+static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
+                               int access_type, ARMMMUIdx mmu_idx,
+                               hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
+                               target_ulong *page_size_ptr, uint32_t *fsr,
+                               ARMMMUFaultInfo *fi);
 
 /* Definitions for the PMCCNTR and PMCR registers */
 #define PMCRD   0x8
@@ -1778,9 +1785,10 @@
     bool ret;
     uint64_t par64;
     MemTxAttrs attrs = {};
+    ARMMMUFaultInfo fi = {};
 
     ret = get_phys_addr(env, value, access_type, mmu_idx,
-                        &phys_addr, &attrs, &prot, &page_size, &fsr);
+                        &phys_addr, &attrs, &prot, &page_size, &fsr, &fi);
     if (extended_addresses_enabled(env)) {
         /* fsr is a DFSR/IFSR value for the long descriptor
          * translation table format, but with WnR always clear.
@@ -3230,6 +3238,10 @@
     { .name = "MDCR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 1,
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "HPFAR_EL2", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+      .type = ARM_CP_CONST, .resetvalue = 0 },
     REGINFO_SENTINEL
 };
 
@@ -3288,6 +3300,22 @@
       .type = ARM_CP_ALIAS,
       .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[6]) },
+    { .name = "SPSR_IRQ", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 0,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[4]) },
+    { .name = "SPSR_ABT", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 1,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[2]) },
+    { .name = "SPSR_UND", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 2,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[3]) },
+    { .name = "SPSR_FIQ", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 3,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[5]) },
     { .name = "VBAR_EL2", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .writefn = vbar_write,
@@ -3460,6 +3488,14 @@
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 1,
       .access = PL2_RW, .resetvalue = 0,
       .fieldoffset = offsetof(CPUARMState, cp15.mdcr_el2), },
+    { .name = "HPFAR", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
+      .fieldoffset = offsetof(CPUARMState, cp15.hpfar_el2) },
+    { .name = "HPFAR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4,
+      .access = PL2_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.hpfar_el2) },
     REGINFO_SENTINEL
 };
 
@@ -6051,6 +6087,28 @@
     return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
 }
 
+/* Translate S2 section/page access permissions to protection flags
+ *
+ * @env:     CPUARMState
+ * @s2ap:    The 2-bit stage2 access permissions (S2AP)
+ * @xn:      XN (execute-never) bit
+ */
+static int get_S2prot(CPUARMState *env, int s2ap, int xn)
+{
+    int prot = 0;
+
+    if (s2ap & 1) {
+        prot |= PAGE_READ;
+    }
+    if (s2ap & 2) {
+        prot |= PAGE_WRITE;
+    }
+    if (!xn) {
+        prot |= PAGE_EXEC;
+    }
+    return prot;
+}
+
 /* Translate section/page access permissions to protection flags
  *
  * @env:     CPUARMState
@@ -6155,6 +6213,32 @@
     return true;
 }
 
+/* Translate a S1 pagetable walk through S2 if needed.  */
+static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
+                               hwaddr addr, MemTxAttrs txattrs,
+                               uint32_t *fsr,
+                               ARMMMUFaultInfo *fi)
+{
+    if ((mmu_idx == ARMMMUIdx_S1NSE0 || mmu_idx == ARMMMUIdx_S1NSE1) &&
+        !regime_translation_disabled(env, ARMMMUIdx_S2NS)) {
+        target_ulong s2size;
+        hwaddr s2pa;
+        int s2prot;
+        int ret;
+
+        ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_S2NS, &s2pa,
+                                 &txattrs, &s2prot, &s2size, fsr, fi);
+        if (ret) {
+            fi->s2addr = addr;
+            fi->stage2 = true;
+            fi->s1ptw = true;
+            return ~0;
+        }
+        addr = s2pa;
+    }
+    return addr;
+}
+
 /* All loads done in the course of a page table walk go through here.
  * TODO: rather than ignoring errors from physical memory reads (which
  * are external aborts in ARM terminology) we should propagate this
@@ -6162,26 +6246,43 @@
  * was being done for a CPU load/store or an address translation instruction
  * (but not if it was for a debug access).
  */
-static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure)
+static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
+                            ARMMMUIdx mmu_idx, uint32_t *fsr,
+                            ARMMMUFaultInfo *fi)
 {
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
     MemTxAttrs attrs = {};
 
     attrs.secure = is_secure;
+    addr = S1_ptw_translate(env, mmu_idx, addr, attrs, fsr, fi);
+    if (fi->s1ptw) {
+        return 0;
+    }
     return address_space_ldl(cs->as, addr, attrs, NULL);
 }
 
-static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure)
+static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
+                            ARMMMUIdx mmu_idx, uint32_t *fsr,
+                            ARMMMUFaultInfo *fi)
 {
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
     MemTxAttrs attrs = {};
 
     attrs.secure = is_secure;
+    addr = S1_ptw_translate(env, mmu_idx, addr, attrs, fsr, fi);
+    if (fi->s1ptw) {
+        return 0;
+    }
     return address_space_ldq(cs->as, addr, attrs, NULL);
 }
 
 static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
                              int access_type, ARMMMUIdx mmu_idx,
                              hwaddr *phys_ptr, int *prot,
-                             target_ulong *page_size, uint32_t *fsr)
+                             target_ulong *page_size, uint32_t *fsr,
+                             ARMMMUFaultInfo *fi)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
     int code;
@@ -6201,7 +6302,8 @@
         code = 5;
         goto do_fault;
     }
-    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
+    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
+                       mmu_idx, fsr, fi);
     type = (desc & 3);
     domain = (desc >> 5) & 0x0f;
     if (regime_el(env, mmu_idx) == 1) {
@@ -6237,7 +6339,8 @@
             /* Fine pagetable.  */
             table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
         }
-        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
+        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
+                           mmu_idx, fsr, fi);
         switch (desc & 3) {
         case 0: /* Page translation fault.  */
             code = 7;
@@ -6294,7 +6397,8 @@
 static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
                              int access_type, ARMMMUIdx mmu_idx,
                              hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
-                             target_ulong *page_size, uint32_t *fsr)
+                             target_ulong *page_size, uint32_t *fsr,
+                             ARMMMUFaultInfo *fi)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
     int code;
@@ -6317,7 +6421,8 @@
         code = 5;
         goto do_fault;
     }
-    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
+    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
+                       mmu_idx, fsr, fi);
     type = (desc & 3);
     if (type == 0 || (type == 3 && !arm_feature(env, ARM_FEATURE_PXN))) {
         /* Section translation fault, or attempt to use the encoding
@@ -6368,7 +6473,8 @@
         ns = extract32(desc, 3, 1);
         /* Lookup l2 entry.  */
         table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
-        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
+        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
+                           mmu_idx, fsr, fi);
         ap = ((desc >> 4) & 3) | ((desc >> 7) & 4);
         switch (desc & 3) {
         case 0: /* Page translation fault.  */
@@ -6442,17 +6548,78 @@
     permission_fault = 3,
 } MMUFaultType;
 
+/*
+ * check_s2_startlevel
+ * @cpu:        ARMCPU
+ * @is_aa64:    True if the translation regime is in AArch64 state
+ * @startlevel: Suggested starting level
+ * @inputsize:  Bitsize of IPAs
+ * @stride:     Page-table stride (See the ARM ARM)
+ *
+ * Returns true if the suggested starting level is OK and false otherwise.
+ */
+static bool check_s2_startlevel(ARMCPU *cpu, bool is_aa64, int level,
+                                int inputsize, int stride)
+{
+    /* Negative levels are never allowed.  */
+    if (level < 0) {
+        return false;
+    }
+
+    if (is_aa64) {
+        unsigned int pamax = arm_pamax(cpu);
+
+        switch (stride) {
+        case 13: /* 64KB Pages.  */
+            if (level == 0 || (level == 1 && pamax <= 42)) {
+                return false;
+            }
+            break;
+        case 11: /* 16KB Pages.  */
+            if (level == 0 || (level == 1 && pamax <= 40)) {
+                return false;
+            }
+            break;
+        case 9: /* 4KB Pages.  */
+            if (level == 0 && pamax <= 42) {
+                return false;
+            }
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    } else {
+        const int grainsize = stride + 3;
+        int startsizecheck;
+
+        /* AArch32 only supports 4KB pages. Assert on that.  */
+        assert(stride == 9);
+
+        if (level == 0) {
+            return false;
+        }
+
+        startsizecheck = inputsize - ((3 - level) * stride + grainsize);
+        if (startsizecheck < 1 || startsizecheck > stride + 4) {
+            return false;
+        }
+    }
+    return true;
+}
+
 static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
                                int access_type, ARMMMUIdx mmu_idx,
                                hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
-                               target_ulong *page_size_ptr, uint32_t *fsr)
+                               target_ulong *page_size_ptr, uint32_t *fsr,
+                               ARMMMUFaultInfo *fi)
 {
-    CPUState *cs = CPU(arm_env_get_cpu(env));
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
     /* Read an LPAE long-descriptor translation table. */
     MMUFaultType fault_type = translation_fault;
     uint32_t level = 1;
     uint32_t epd = 0;
-    int32_t tsz;
+    int32_t t0sz, t1sz;
     uint32_t tg;
     uint64_t ttbr;
     int ttbr_select;
@@ -6460,8 +6627,9 @@
     uint32_t tableattrs;
     target_ulong page_size;
     uint32_t attrs;
-    int32_t granule_sz = 9;
+    int32_t stride = 9;
     int32_t va_size = 32;
+    int inputsize;
     int32_t tbi = 0;
     TCR *tcr = regime_tcr(env, mmu_idx);
     int ap, ns, xn, pxn;
@@ -6507,12 +6675,28 @@
      * This is a Non-secure PL0/1 stage 1 translation, so controlled by
      * TTBCR/TTBR0/TTBR1 in accordance with ARM ARM DDI0406C table B-32:
      */
-    uint32_t t0sz = extract32(tcr->raw_tcr, 0, 6);
     if (va_size == 64) {
+        /* AArch64 translation.  */
+        t0sz = extract32(tcr->raw_tcr, 0, 6);
         t0sz = MIN(t0sz, 39);
         t0sz = MAX(t0sz, 16);
+    } else if (mmu_idx != ARMMMUIdx_S2NS) {
+        /* AArch32 stage 1 translation.  */
+        t0sz = extract32(tcr->raw_tcr, 0, 3);
+    } else {
+        /* AArch32 stage 2 translation.  */
+        bool sext = extract32(tcr->raw_tcr, 4, 1);
+        bool sign = extract32(tcr->raw_tcr, 3, 1);
+        t0sz = sextract32(tcr->raw_tcr, 0, 4);
+
+        /* If the sign-extend bit is not the same as t0sz[3], the result
+         * is unpredictable. Flag this as a guest error.  */
+        if (sign != sext) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "AArch32: VTCR.S / VTCR.T0SZ[3] missmatch\n");
+        }
     }
-    uint32_t t1sz = extract32(tcr->raw_tcr, 16, 6);
+    t1sz = extract32(tcr->raw_tcr, 16, 6);
     if (va_size == 64) {
         t1sz = MIN(t1sz, 39);
         t1sz = MAX(t1sz, 16);
@@ -6548,14 +6732,14 @@
         if (el < 2) {
             epd = extract32(tcr->raw_tcr, 7, 1);
         }
-        tsz = t0sz;
+        inputsize = va_size - t0sz;
 
         tg = extract32(tcr->raw_tcr, 14, 2);
         if (tg == 1) { /* 64KB pages */
-            granule_sz = 13;
+            stride = 13;
         }
         if (tg == 2) { /* 16KB pages */
-            granule_sz = 11;
+            stride = 11;
         }
     } else {
         /* We should only be here if TTBR1 is valid */
@@ -6563,19 +6747,19 @@
 
         ttbr = regime_ttbr(env, mmu_idx, 1);
         epd = extract32(tcr->raw_tcr, 23, 1);
-        tsz = t1sz;
+        inputsize = va_size - t1sz;
 
         tg = extract32(tcr->raw_tcr, 30, 2);
         if (tg == 3)  { /* 64KB pages */
-            granule_sz = 13;
+            stride = 13;
         }
         if (tg == 1) { /* 16KB pages */
-            granule_sz = 11;
+            stride = 11;
         }
     }
 
     /* Here we should have set up all the parameters for the translation:
-     * va_size, ttbr, epd, tsz, granule_sz, tbi
+     * va_size, inputsize, ttbr, epd, stride, tbi
      */
 
     if (epd) {
@@ -6585,32 +6769,60 @@
         goto do_fault;
     }
 
-    /* The starting level depends on the virtual address size (which can be
-     * up to 48 bits) and the translation granule size. It indicates the number
-     * of strides (granule_sz bits at a time) needed to consume the bits
-     * of the input address. In the pseudocode this is:
-     *  level = 4 - RoundUp((inputsize - grainsize) / stride)
-     * where their 'inputsize' is our 'va_size - tsz', 'grainsize' is
-     * our 'granule_sz + 3' and 'stride' is our 'granule_sz'.
-     * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying:
-     *     = 4 - (va_size - tsz - granule_sz - 3 + granule_sz - 1) / granule_sz
-     *     = 4 - (va_size - tsz - 4) / granule_sz;
-     */
-    level = 4 - (va_size - tsz - 4) / granule_sz;
+    if (mmu_idx != ARMMMUIdx_S2NS) {
+        /* The starting level depends on the virtual address size (which can
+         * be up to 48 bits) and the translation granule size. It indicates
+         * the number of strides (stride bits at a time) needed to
+         * consume the bits of the input address. In the pseudocode this is:
+         *  level = 4 - RoundUp((inputsize - grainsize) / stride)
+         * where their 'inputsize' is our 'inputsize', 'grainsize' is
+         * our 'stride + 3' and 'stride' is our 'stride'.
+         * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying:
+         * = 4 - (inputsize - stride - 3 + stride - 1) / stride
+         * = 4 - (inputsize - 4) / stride;
+         */
+        level = 4 - (inputsize - 4) / stride;
+    } else {
+        /* For stage 2 translations the starting level is specified by the
+         * VTCR_EL2.SL0 field (whose interpretation depends on the page size)
+         */
+        int startlevel = extract32(tcr->raw_tcr, 6, 2);
+        bool ok;
+
+        if (va_size == 32 || stride == 9) {
+            /* AArch32 or 4KB pages */
+            level = 2 - startlevel;
+        } else {
+            /* 16KB or 64KB pages */
+            level = 3 - startlevel;
+        }
+
+        /* Check that the starting level is valid. */
+        ok = check_s2_startlevel(cpu, va_size == 64, level,
+                                 inputsize, stride);
+        if (!ok) {
+            /* AArch64 reports these as level 0 faults.
+             * AArch32 reports these as level 1 faults.
+             */
+            level = va_size == 64 ? 0 : 1;
+            fault_type = translation_fault;
+            goto do_fault;
+        }
+    }
 
     /* Clear the vaddr bits which aren't part of the within-region address,
      * so that we don't have to special case things when calculating the
      * first descriptor address.
      */
-    if (tsz) {
-        address &= (1ULL << (va_size - tsz)) - 1;
+    if (va_size != inputsize) {
+        address &= (1ULL << inputsize) - 1;
     }
 
-    descmask = (1ULL << (granule_sz + 3)) - 1;
+    descmask = (1ULL << (stride + 3)) - 1;
 
     /* Now we can extract the actual base address from the TTBR */
     descaddr = extract64(ttbr, 0, 48);
-    descaddr &= ~((1ULL << (va_size - tsz - (granule_sz * (4 - level)))) - 1);
+    descaddr &= ~((1ULL << (inputsize - (stride * (4 - level)))) - 1);
 
     /* Secure accesses start with the page table in secure memory and
      * can be downgraded to non-secure at any step. Non-secure accesses
@@ -6622,10 +6834,14 @@
         uint64_t descriptor;
         bool nstable;
 
-        descaddr |= (address >> (granule_sz * (4 - level))) & descmask;
+        descaddr |= (address >> (stride * (4 - level))) & descmask;
         descaddr &= ~7ULL;
         nstable = extract32(tableattrs, 4, 1);
-        descriptor = arm_ldq_ptw(cs, descaddr, !nstable);
+        descriptor = arm_ldq_ptw(cs, descaddr, !nstable, mmu_idx, fsr, fi);
+        if (fi->s1ptw) {
+            goto do_fault;
+        }
+
         if (!(descriptor & 1) ||
             (!(descriptor & 2) && (level == 3))) {
             /* Invalid, or the Reserved level 3 encoding */
@@ -6647,11 +6863,17 @@
          * These are basically the same thing, although the number
          * of bits we pull in from the vaddr varies.
          */
-        page_size = (1ULL << ((granule_sz * (4 - level)) + 3));
+        page_size = (1ULL << ((stride * (4 - level)) + 3));
         descaddr |= (address & (page_size - 1));
-        /* Extract attributes from the descriptor and merge with table attrs */
+        /* Extract attributes from the descriptor */
         attrs = extract64(descriptor, 2, 10)
             | (extract64(descriptor, 52, 12) << 10);
+
+        if (mmu_idx == ARMMMUIdx_S2NS) {
+            /* Stage 2 table descriptors do not include any attribute fields */
+            break;
+        }
+        /* Merge in attributes from table descriptors */
         attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
         attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */
         /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
@@ -6673,11 +6895,16 @@
     }
 
     ap = extract32(attrs, 4, 2);
-    ns = extract32(attrs, 3, 1);
     xn = extract32(attrs, 12, 1);
-    pxn = extract32(attrs, 11, 1);
 
-    *prot = get_S1prot(env, mmu_idx, va_size == 64, ap, ns, xn, pxn);
+    if (mmu_idx == ARMMMUIdx_S2NS) {
+        ns = true;
+        *prot = get_S2prot(env, ap, xn);
+    } else {
+        ns = extract32(attrs, 3, 1);
+        pxn = extract32(attrs, 11, 1);
+        *prot = get_S1prot(env, mmu_idx, va_size == 64, ap, ns, xn, pxn);
+    }
 
     fault_type = permission_fault;
     if (!(*prot & (1 << access_type))) {
@@ -6698,6 +6925,8 @@
 do_fault:
     /* Long-descriptor format IFSR/DFSR value */
     *fsr = (1 << 9) | (fault_type << 2) | level;
+    /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2.  */
+    fi->stage2 = fi->s1ptw || (mmu_idx == ARMMMUIdx_S2NS);
     return true;
 }
 
@@ -6960,20 +7189,45 @@
  * @page_size: set to the size of the page containing phys_ptr
  * @fsr: set to the DFSR/IFSR value on failure
  */
-static inline bool get_phys_addr(CPUARMState *env, target_ulong address,
-                                 int access_type, ARMMMUIdx mmu_idx,
-                                 hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
-                                 target_ulong *page_size, uint32_t *fsr)
+static bool get_phys_addr(CPUARMState *env, target_ulong address,
+                          int access_type, ARMMMUIdx mmu_idx,
+                          hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
+                          target_ulong *page_size, uint32_t *fsr,
+                          ARMMMUFaultInfo *fi)
 {
     if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
-        /* TODO: when we support EL2 we should here call ourselves recursively
-         * to do the stage 1 and then stage 2 translations. The arm_ld*_ptw
-         * functions will also need changing to perform ARMMMUIdx_S2NS loads
-         * rather than direct physical memory loads when appropriate.
-         * For non-EL2 CPUs a stage1+stage2 translation is just stage 1.
+        /* Call ourselves recursively to do the stage 1 and then stage 2
+         * translations.
          */
-        assert(!arm_feature(env, ARM_FEATURE_EL2));
-        mmu_idx += ARMMMUIdx_S1NSE0;
+        if (arm_feature(env, ARM_FEATURE_EL2)) {
+            hwaddr ipa;
+            int s2_prot;
+            int ret;
+
+            ret = get_phys_addr(env, address, access_type,
+                                mmu_idx + ARMMMUIdx_S1NSE0, &ipa, attrs,
+                                prot, page_size, fsr, fi);
+
+            /* If S1 fails or S2 is disabled, return early.  */
+            if (ret || regime_translation_disabled(env, ARMMMUIdx_S2NS)) {
+                *phys_ptr = ipa;
+                return ret;
+            }
+
+            /* S1 is done. Now do S2 translation.  */
+            ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_S2NS,
+                                     phys_ptr, attrs, &s2_prot,
+                                     page_size, fsr, fi);
+            fi->s2addr = ipa;
+            /* Combine the S1 and S2 perms.  */
+            *prot &= s2_prot;
+            return ret;
+        } else {
+            /*
+             * For non-EL2 CPUs a stage1+stage2 translation is just stage 1.
+             */
+            mmu_idx += ARMMMUIdx_S1NSE0;
+        }
     }
 
     /* The page table entries may downgrade secure to non-secure, but
@@ -7022,13 +7276,13 @@
 
     if (regime_using_lpae_format(env, mmu_idx)) {
         return get_phys_addr_lpae(env, address, access_type, mmu_idx, phys_ptr,
-                                  attrs, prot, page_size, fsr);
+                                  attrs, prot, page_size, fsr, fi);
     } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) {
         return get_phys_addr_v6(env, address, access_type, mmu_idx, phys_ptr,
-                                attrs, prot, page_size, fsr);
+                                attrs, prot, page_size, fsr, fi);
     } else {
         return get_phys_addr_v5(env, address, access_type, mmu_idx, phys_ptr,
-                                prot, page_size, fsr);
+                                prot, page_size, fsr, fi);
     }
 }
 
@@ -7037,7 +7291,8 @@
  * fsr with ARM DFSR/IFSR fault register format value on failure.
  */
 bool arm_tlb_fill(CPUState *cs, vaddr address,
-                  int access_type, int mmu_idx, uint32_t *fsr)
+                  int access_type, int mmu_idx, uint32_t *fsr,
+                  ARMMMUFaultInfo *fi)
 {
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
@@ -7048,7 +7303,7 @@
     MemTxAttrs attrs = {};
 
     ret = get_phys_addr(env, address, access_type, mmu_idx, &phys_addr,
-                        &attrs, &prot, &page_size, fsr);
+                        &attrs, &prot, &page_size, fsr, fi);
     if (!ret) {
         /* Map a single [sub]page.  */
         phys_addr &= TARGET_PAGE_MASK;
@@ -7071,9 +7326,10 @@
     bool ret;
     uint32_t fsr;
     MemTxAttrs attrs = {};
+    ARMMMUFaultInfo fi = {};
 
     ret = get_phys_addr(env, addr, 0, cpu_mmu_index(env, false), &phys_addr,
-                        &attrs, &prot, &page_size, &fsr);
+                        &attrs, &prot, &page_size, &fsr, &fi);
 
     if (ret) {
         return -1;

diff --git a/target-arm/internals.h b/target-arm/internals.h
index 36a56aa..412827b 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h

@@ -152,6 +152,31 @@
     aarch64_restore_sp(env, cur_el);
 }
 
+/*
+ * arm_pamax
+ * @cpu: ARMCPU
+ *
+ * Returns the implementation defined bit-width of physical addresses.
+ * The ARMv8 reference manuals refer to this as PAMax().
+ */
+static inline unsigned int arm_pamax(ARMCPU *cpu)
+{
+    static const unsigned int pamax_map[] = {
+        [0] = 32,
+        [1] = 36,
+        [2] = 40,
+        [3] = 42,
+        [4] = 44,
+        [5] = 48,
+    };
+    unsigned int parange = extract32(cpu->id_aa64mmfr0, 0, 4);
+
+    /* id_aa64mmfr0 is a read-only register so values outside of the
+     * supported mappings can be considered an implementation error.  */
+    assert(parange < ARRAY_SIZE(pamax_map));
+    return pamax_map[parange];
+}
+
 /* Return true if extended addresses are enabled.
  * This is always the case if our translation regime is 64 bit,
  * but depends on TTBCR.EAE for 32 bit.
@@ -389,8 +414,21 @@
 void arm_handle_psci_call(ARMCPU *cpu);
 #endif
 
+/**
+ * ARMMMUFaultInfo: Information describing an ARM MMU Fault
+ * @s2addr: Address that caused a fault at stage 2
+ * @stage2: True if we faulted at stage 2
+ * @s1ptw: True if we faulted at stage 2 while doing a stage 1 page-table walk
+ */
+typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
+struct ARMMMUFaultInfo {
+    target_ulong s2addr;
+    bool stage2;
+    bool s1ptw;
+};
+
 /* Do a page table walk and add page to TLB if possible */
 bool arm_tlb_fill(CPUState *cpu, vaddr address, int rw, int mmu_idx,
-                  uint32_t *fsr);
+                  uint32_t *fsr, ARMMMUFaultInfo *fi);
 
 #endif

diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 7929c71..a4c4ebf 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c

@@ -83,19 +83,27 @@
 {
     bool ret;
     uint32_t fsr = 0;
+    ARMMMUFaultInfo fi = {};
 
-    ret = arm_tlb_fill(cs, addr, is_write, mmu_idx, &fsr);
+    ret = arm_tlb_fill(cs, addr, is_write, mmu_idx, &fsr, &fi);
     if (unlikely(ret)) {
         ARMCPU *cpu = ARM_CPU(cs);
         CPUARMState *env = &cpu->env;
         uint32_t syn, exc;
-        bool same_el = (arm_current_el(env) != 0);
+        unsigned int target_el;
+        bool same_el;
 
         if (retaddr) {
             /* now we have a real cpu fault */
             cpu_restore_state(cs, retaddr);
         }
 
+        target_el = exception_target_el(env);
+        if (fi.stage2) {
+            target_el = 2;
+            env->cp15.hpfar_el2 = extract64(fi.s2addr, 12, 47) << 4;
+        }
+        same_el = arm_current_el(env) == target_el;
         /* AArch64 syndrome does not have an LPAE bit */
         syn = fsr & ~(1 << 9);
 
@@ -103,10 +111,10 @@
          * information; this is always true for exceptions reported to EL1.
          */
         if (is_write == 2) {
-            syn = syn_insn_abort(same_el, 0, 0, syn);
+            syn = syn_insn_abort(same_el, 0, fi.s1ptw, syn);
             exc = EXCP_PREFETCH_ABORT;
         } else {
-            syn = syn_data_abort(same_el, 0, 0, 0, is_write == 1, syn);
+            syn = syn_data_abort(same_el, 0, 0, fi.s1ptw, is_write == 1, syn);
             if (is_write == 1 && arm_feature(env, ARM_FEATURE_V6)) {
                 fsr |= (1 << 11);
             }
@@ -115,7 +123,7 @@
 
         env->exception.vaddress = addr;
         env->exception.fsr = fsr;
-        raise_exception(env, exc, syn, exception_target_el(env));
+        raise_exception(env, exc, syn, target_el);
     }
 }
 #endif

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 19f9d8d..83b8376 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c

@@ -11096,8 +11096,11 @@
                         dc->is_jmp = DISAS_UPDATE;
                     } else {
                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
-                        /* Advance PC so that clearing the breakpoint will
-                           invalidate this TB.  */
+                        /* The address covered by the breakpoint must be
+                           included in [tb->pc, tb->pc + tb->size) in order
+                           to for it to be properly cleared -- thus we
+                           increment the PC here so that the logic setting
+                           tb->size below does the right thing.  */
                         dc->pc += 4;
                         goto done_generating;
                     }

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 9f1d740..b10a455 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c

@@ -11179,6 +11179,35 @@
                        default_exception_el(s));
 }
 
+static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
+{
+    /* Return true if the insn at dc->pc might cross a page boundary.
+     * (False positives are OK, false negatives are not.)
+     */
+    uint16_t insn;
+
+    if ((s->pc & 3) == 0) {
+        /* At a 4-aligned address we can't be crossing a page */
+        return false;
+    }
+
+    /* This must be a Thumb insn */
+    insn = arm_lduw_code(env, s->pc, s->bswap_code);
+
+    if ((insn >> 11) >= 0x1d) {
+        /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
+         * First half of a 32-bit Thumb insn. Thumb-1 cores might
+         * end up actually treating this as two 16-bit insns (see the
+         * code at the start of disas_thumb2_insn()) but we don't bother
+         * to check for that as it is unlikely, and false positives here
+         * are harmless.
+         */
+        return true;
+    }
+    /* Definitely a 16-bit insn, can't be crossing a page. */
+    return false;
+}
+
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
    basic block 'tb'.  */
 void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
@@ -11190,6 +11219,7 @@
     target_ulong next_page_start;
     int num_insns;
     int max_insns;
+    bool end_of_page;
 
     /* generate intermediate code */
 
@@ -11348,8 +11378,11 @@
                         dc->is_jmp = DISAS_UPDATE;
                     } else {
                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
-                        /* Advance PC so that clearing the breakpoint will
-                           invalidate this TB.  */
+                        /* The address covered by the breakpoint must be
+                           included in [tb->pc, tb->pc + tb->size) in order
+                           to for it to be properly cleared -- thus we
+                           increment the PC here so that the logic setting
+                           tb->size below does the right thing.  */
                         /* TODO: Advance PC by correct instruction length to
                          * avoid disassembler error messages */
                         dc->pc += 2;
@@ -11411,11 +11444,24 @@
          * Otherwise the subsequent code could get translated several times.
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.  */
+
+        /* We want to stop the TB if the next insn starts in a new page,
+         * or if it spans between this page and the next. This means that
+         * if we're looking at the last halfword in the page we need to
+         * see if it's a 16-bit Thumb insn (which will fit in this TB)
+         * or a 32-bit Thumb insn (which won't).
+         * This is to avoid generating a silly TB with a single 16-bit insn
+         * in it at the end of this page (which would execute correctly
+         * but isn't very efficient).
+         */
+        end_of_page = (dc->pc >= next_page_start) ||
+            ((dc->pc >= next_page_start - 3) && insn_crosses_page(env, dc));
+
     } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
              !dc->ss_active &&
-             dc->pc < next_page_start &&
+             !end_of_page &&
              num_insns < max_insns);
 
     if (tb->cflags & CF_LAST_IO) {

diff --git a/target-cris/translate.c b/target-cris/translate.c
index 964845c..2d710cc 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c

@@ -3166,6 +3166,11 @@
             tcg_gen_movi_tl(env_pc, dc->pc);
             t_gen_raise_exception(EXCP_DEBUG);
             dc->is_jmp = DISAS_UPDATE;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc->pc += 2;
             break;
         }
 

diff --git a/target-i386/bpt_helper.c b/target-i386/bpt_helper.c
index c071c24..dac1b1a3 100644
--- a/target-i386/bpt_helper.c
+++ b/target-i386/bpt_helper.c

@@ -21,64 +21,147 @@
 #include "exec/helper-proto.h"
 
 
-void hw_breakpoint_insert(CPUX86State *env, int index)
+#ifndef CONFIG_USER_ONLY
+static inline bool hw_local_breakpoint_enabled(unsigned long dr7, int index)
+{
+    return (dr7 >> (index * 2)) & 1;
+}
+
+static inline bool hw_global_breakpoint_enabled(unsigned long dr7, int index)
+{
+    return (dr7 >> (index * 2)) & 2;
+
+}
+static inline bool hw_breakpoint_enabled(unsigned long dr7, int index)
+{
+    return hw_global_breakpoint_enabled(dr7, index) ||
+           hw_local_breakpoint_enabled(dr7, index);
+}
+
+static inline int hw_breakpoint_type(unsigned long dr7, int index)
+{
+    return (dr7 >> (DR7_TYPE_SHIFT + (index * 4))) & 3;
+}
+
+static inline int hw_breakpoint_len(unsigned long dr7, int index)
+{
+    int len = ((dr7 >> (DR7_LEN_SHIFT + (index * 4))) & 3);
+    return (len == 2) ? 8 : len + 1;
+}
+
+static int hw_breakpoint_insert(CPUX86State *env, int index)
 {
     CPUState *cs = CPU(x86_env_get_cpu(env));
-    int type = 0, err = 0;
+    target_ulong dr7 = env->dr[7];
+    target_ulong drN = env->dr[index];
+    int err = 0;
 
-    switch (hw_breakpoint_type(env->dr[7], index)) {
+    switch (hw_breakpoint_type(dr7, index)) {
     case DR7_TYPE_BP_INST:
-        if (hw_breakpoint_enabled(env->dr[7], index)) {
-            err = cpu_breakpoint_insert(cs, env->dr[index], BP_CPU,
+        if (hw_breakpoint_enabled(dr7, index)) {
+            err = cpu_breakpoint_insert(cs, drN, BP_CPU,
                                         &env->cpu_breakpoint[index]);
         }
         break;
-    case DR7_TYPE_DATA_WR:
-        type = BP_CPU | BP_MEM_WRITE;
-        break;
+
     case DR7_TYPE_IO_RW:
-        /* No support for I/O watchpoints yet */
+        /* Notice when we should enable calls to bpt_io.  */
+        return hw_breakpoint_enabled(env->dr[7], index)
+               ? HF_IOBPT_MASK : 0;
+
+    case DR7_TYPE_DATA_WR:
+        if (hw_breakpoint_enabled(dr7, index)) {
+            err = cpu_watchpoint_insert(cs, drN,
+                                        hw_breakpoint_len(dr7, index),
+                                        BP_CPU | BP_MEM_WRITE,
+                                        &env->cpu_watchpoint[index]);
+        }
         break;
+
     case DR7_TYPE_DATA_RW:
-        type = BP_CPU | BP_MEM_ACCESS;
+        if (hw_breakpoint_enabled(dr7, index)) {
+            err = cpu_watchpoint_insert(cs, drN,
+                                        hw_breakpoint_len(dr7, index),
+                                        BP_CPU | BP_MEM_ACCESS,
+                                        &env->cpu_watchpoint[index]);
+        }
         break;
     }
-
-    if (type != 0) {
-        err = cpu_watchpoint_insert(cs, env->dr[index],
-                                    hw_breakpoint_len(env->dr[7], index),
-                                    type, &env->cpu_watchpoint[index]);
-    }
-
     if (err) {
         env->cpu_breakpoint[index] = NULL;
     }
+    return 0;
 }
 
-void hw_breakpoint_remove(CPUX86State *env, int index)
+static void hw_breakpoint_remove(CPUX86State *env, int index)
 {
-    CPUState *cs;
+    CPUState *cs = CPU(x86_env_get_cpu(env));
 
-    if (!env->cpu_breakpoint[index]) {
-        return;
-    }
-    cs = CPU(x86_env_get_cpu(env));
     switch (hw_breakpoint_type(env->dr[7], index)) {
     case DR7_TYPE_BP_INST:
-        if (hw_breakpoint_enabled(env->dr[7], index)) {
+        if (env->cpu_breakpoint[index]) {
             cpu_breakpoint_remove_by_ref(cs, env->cpu_breakpoint[index]);
+            env->cpu_breakpoint[index] = NULL;
         }
         break;
+
     case DR7_TYPE_DATA_WR:
     case DR7_TYPE_DATA_RW:
-        cpu_watchpoint_remove_by_ref(cs, env->cpu_watchpoint[index]);
+        if (env->cpu_breakpoint[index]) {
+            cpu_watchpoint_remove_by_ref(cs, env->cpu_watchpoint[index]);
+            env->cpu_breakpoint[index] = NULL;
+        }
         break;
+
     case DR7_TYPE_IO_RW:
-        /* No support for I/O watchpoints yet */
+        /* HF_IOBPT_MASK cleared elsewhere.  */
         break;
     }
 }
 
+void cpu_x86_update_dr7(CPUX86State *env, uint32_t new_dr7)
+{
+    target_ulong old_dr7 = env->dr[7];
+    int iobpt = 0;
+    int i;
+
+    new_dr7 |= DR7_FIXED_1;
+
+    /* If nothing is changing except the global/local enable bits,
+       then we can make the change more efficient.  */
+    if (((old_dr7 ^ new_dr7) & ~0xff) == 0) {
+        /* Fold the global and local enable bits together into the
+           global fields, then xor to show which registers have
+           changed collective enable state.  */
+        int mod = ((old_dr7 | old_dr7 * 2) ^ (new_dr7 | new_dr7 * 2)) & 0xff;
+
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            if ((mod & (2 << i * 2)) && !hw_breakpoint_enabled(new_dr7, i)) {
+                hw_breakpoint_remove(env, i);
+            }
+        }
+        env->dr[7] = new_dr7;
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            if (mod & (2 << i * 2) && hw_breakpoint_enabled(new_dr7, i)) {
+                iobpt |= hw_breakpoint_insert(env, i);
+            } else if (hw_breakpoint_type(new_dr7, i) == DR7_TYPE_IO_RW
+                       && hw_breakpoint_enabled(new_dr7, i)) {
+                iobpt |= HF_IOBPT_MASK;
+            }
+        }
+    } else {
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            hw_breakpoint_remove(env, i);
+        }
+        env->dr[7] = new_dr7;
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            iobpt |= hw_breakpoint_insert(env, i);
+        }
+    }
+
+    env->hflags = (env->hflags & ~HF_IOBPT_MASK) | iobpt;
+}
+
 static bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update)
 {
     target_ulong dr6;
@@ -148,6 +231,7 @@
         }
     }
 }
+#endif
 
 void helper_single_step(CPUX86State *env)
 {
@@ -158,25 +242,85 @@
     raise_exception(env, EXCP01_DB);
 }
 
-void helper_movl_drN_T0(CPUX86State *env, int reg, target_ulong t0)
+void helper_set_dr(CPUX86State *env, int reg, target_ulong t0)
 {
 #ifndef CONFIG_USER_ONLY
-    int i;
+    switch (reg) {
+    case 0: case 1: case 2: case 3:
+        if (hw_breakpoint_enabled(env->dr[7], reg)
+            && hw_breakpoint_type(env->dr[7], reg) != DR7_TYPE_IO_RW) {
+            hw_breakpoint_remove(env, reg);
+            env->dr[reg] = t0;
+            hw_breakpoint_insert(env, reg);
+        } else {
+            env->dr[reg] = t0;
+        }
+        return;
+    case 4:
+        if (env->cr[4] & CR4_DE_MASK) {
+            break;
+        }
+        /* fallthru */
+    case 6:
+        env->dr[6] = t0 | DR6_FIXED_1;
+        return;
+    case 5:
+        if (env->cr[4] & CR4_DE_MASK) {
+            break;
+        }
+        /* fallthru */
+    case 7:
+        cpu_x86_update_dr7(env, t0);
+        return;
+    }
+    raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC());
+#endif
+}
 
-    if (reg < 4) {
-        hw_breakpoint_remove(env, reg);
-        env->dr[reg] = t0;
-        hw_breakpoint_insert(env, reg);
-    } else if (reg == 7) {
-        for (i = 0; i < DR7_MAX_BP; i++) {
-            hw_breakpoint_remove(env, i);
+target_ulong helper_get_dr(CPUX86State *env, int reg)
+{
+    switch (reg) {
+    case 0: case 1: case 2: case 3: case 6: case 7:
+        return env->dr[reg];
+    case 4:
+        if (env->cr[4] & CR4_DE_MASK) {
+            break;
+        } else {
+            return env->dr[6];
         }
-        env->dr[7] = t0;
-        for (i = 0; i < DR7_MAX_BP; i++) {
-            hw_breakpoint_insert(env, i);
+    case 5:
+        if (env->cr[4] & CR4_DE_MASK) {
+            break;
+        } else {
+            return env->dr[7];
         }
-    } else {
-        env->dr[reg] = t0;
+    }
+    raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC());
+}
+
+/* Check if Port I/O is trapped by a breakpoint.  */
+void helper_bpt_io(CPUX86State *env, uint32_t port,
+                   uint32_t size, target_ulong next_eip)
+{
+#ifndef CONFIG_USER_ONLY
+    target_ulong dr7 = env->dr[7];
+    int i, hit = 0;
+
+    for (i = 0; i < DR7_MAX_BP; ++i) {
+        if (hw_breakpoint_type(dr7, i) == DR7_TYPE_IO_RW
+            && hw_breakpoint_enabled(dr7, i)) {
+            int bpt_len = hw_breakpoint_len(dr7, i);
+            if (port + size - 1 >= env->dr[i]
+                && port <= env->dr[i] + bpt_len - 1) {
+                hit |= 1 << i;
+            }
+        }
+    }
+
+    if (hit) {
+        env->dr[6] = (env->dr[6] & ~0xf) | hit;
+        env->eip = next_eip;
+        raise_exception(env, EXCP01_DB);
     }
 #endif
 }

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5f53af2..9280bfc 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c

@@ -312,7 +312,7 @@
           CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \
           CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \
           CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \
-          CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS)
+          CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE)
           /* partly implemented:
           CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */
           /* missing:
@@ -656,7 +656,6 @@
     int stepping;
     FeatureWordArray features;
     char model_id[48];
-    bool cache_info_passthrough;
 };
 
 static X86CPUDefinition builtin_x86_defs[] = {
@@ -1420,6 +1419,7 @@
 
 static Property host_x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("migratable", X86CPU, migratable, true),
+    DEFINE_PROP_BOOL("host-cache-info", X86CPU, cache_info_passthrough, false),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -1446,7 +1446,6 @@
     cpu_x86_fill_model_id(host_cpudef.model_id);
 
     xcc->cpu_def = &host_cpudef;
-    host_cpudef.cache_info_passthrough = true;
 
     /* level, xlevel, xlevel2, and the feature words are initialized on
      * instance_init, because they require KVM to be initialized.
@@ -1492,7 +1491,7 @@
     int i;
 
     for (i = 0; i < 32; ++i) {
-        if (1 << i & mask) {
+        if ((1UL << i) & mask) {
             const char *reg = get_register_name_32(f->cpuid_reg);
             assert(reg);
             fprintf(stderr, "warning: %s doesn't support requested feature: "
@@ -2094,7 +2093,6 @@
     object_property_set_int(OBJECT(cpu), def->stepping, "stepping", errp);
     object_property_set_int(OBJECT(cpu), def->xlevel, "xlevel", errp);
     object_property_set_int(OBJECT(cpu), def->xlevel2, "xlevel2", errp);
-    cpu->cache_info_passthrough = def->cache_info_passthrough;
     object_property_set_str(OBJECT(cpu), def->model_id, "model-id", errp);
     for (w = 0; w < FEATURE_WORDS; w++) {
         env->features[w] = def->features[w];
@@ -3143,7 +3141,7 @@
     DEFINE_PROP_BOOL("hv-reset", X86CPU, hyperv_reset, false),
     DEFINE_PROP_BOOL("hv-vpindex", X86CPU, hyperv_vpindex, false),
     DEFINE_PROP_BOOL("hv-runtime", X86CPU, hyperv_runtime, false),
-    DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, false),
+    DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
     DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
     DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, 0),

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index a395b4b..62f7879 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h

@@ -155,6 +155,7 @@
 #define HF_SVMI_SHIFT       21 /* SVM intercepts are active */
 #define HF_OSFXSR_SHIFT     22 /* CR4.OSFXSR */
 #define HF_SMAP_SHIFT       23 /* CR4.SMAP */
+#define HF_IOBPT_SHIFT      24 /* an io breakpoint enabled */
 
 #define HF_CPL_MASK          (3 << HF_CPL_SHIFT)
 #define HF_SOFTMMU_MASK      (1 << HF_SOFTMMU_SHIFT)
@@ -178,6 +179,7 @@
 #define HF_SVMI_MASK         (1 << HF_SVMI_SHIFT)
 #define HF_OSFXSR_MASK       (1 << HF_OSFXSR_SHIFT)
 #define HF_SMAP_MASK         (1 << HF_SMAP_SHIFT)
+#define HF_IOBPT_MASK        (1 << HF_IOBPT_SHIFT)
 
 /* hflags2 */
 
@@ -235,6 +237,7 @@
 #define DR7_TYPE_SHIFT  16
 #define DR7_LEN_SHIFT   18
 #define DR7_FIXED_1     0x00000400
+#define DR7_GLOBAL_BP_MASK   0xaa
 #define DR7_LOCAL_BP_MASK    0x55
 #define DR7_MAX_BP           4
 #define DR7_TYPE_BP_INST     0x0
@@ -917,7 +920,7 @@
     int error_code;
     int exception_is_int;
     target_ulong exception_next_eip;
-    target_ulong dr[8]; /* debug registers */
+    target_ulong dr[8]; /* debug registers; note dr4 and dr5 are unused */
     union {
         struct CPUBreakpoint *cpu_breakpoint[4];
         struct CPUWatchpoint *cpu_watchpoint[4];
@@ -1127,41 +1130,13 @@
 void x86_stq_phys(CPUState *cs, hwaddr addr, uint64_t val);
 #endif
 
-static inline bool hw_local_breakpoint_enabled(unsigned long dr7, int index)
-{
-    return (dr7 >> (index * 2)) & 1;
-}
-
-static inline bool hw_global_breakpoint_enabled(unsigned long dr7, int index)
-{
-    return (dr7 >> (index * 2)) & 2;
-
-}
-static inline bool hw_breakpoint_enabled(unsigned long dr7, int index)
-{
-    return hw_global_breakpoint_enabled(dr7, index) ||
-           hw_local_breakpoint_enabled(dr7, index);
-}
-
-static inline int hw_breakpoint_type(unsigned long dr7, int index)
-{
-    return (dr7 >> (DR7_TYPE_SHIFT + (index * 4))) & 3;
-}
-
-static inline int hw_breakpoint_len(unsigned long dr7, int index)
-{
-    int len = ((dr7 >> (DR7_LEN_SHIFT + (index * 4))) & 3);
-    return (len == 2) ? 8 : len + 1;
-}
-
-void hw_breakpoint_insert(CPUX86State *env, int index);
-void hw_breakpoint_remove(CPUX86State *env, int index);
 void breakpoint_handler(CPUState *cs);
 
 /* will be suppressed */
 void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0);
 void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3);
 void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4);
+void cpu_x86_update_dr7(CPUX86State *env, uint32_t new_dr7);
 
 /* hw/pc.c */
 uint64_t cpu_get_tsc(CPUX86State *env);

diff --git a/target-i386/helper.h b/target-i386/helper.h
index 8454a04..ecfcfd1 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h

@@ -40,7 +40,8 @@
 DEF_HELPER_3(write_crN, void, env, int, tl)
 DEF_HELPER_2(lmsw, void, env, tl)
 DEF_HELPER_1(clts, void, env)
-DEF_HELPER_3(movl_drN_T0, void, env, int, tl)
+DEF_HELPER_FLAGS_3(set_dr, TCG_CALL_NO_WG, void, env, int, tl)
+DEF_HELPER_FLAGS_2(get_dr, TCG_CALL_NO_WG, tl, env, int)
 DEF_HELPER_2(invlpg, void, env, tl)
 
 DEF_HELPER_4(enter_level, void, env, int, int, tl)
@@ -92,6 +93,7 @@
 DEF_HELPER_2(inw, tl, env, i32)
 DEF_HELPER_3(outl, void, env, i32, i32)
 DEF_HELPER_2(inl, tl, env, i32)
+DEF_HELPER_FLAGS_4(bpt_io, TCG_CALL_NO_WG, void, env, i32, i32, tl)
 
 DEF_HELPER_3(svm_check_intercept_param, void, env, i32, i64)
 DEF_HELPER_3(vmexit, void, env, i32, i64)

diff --git a/target-i386/machine.c b/target-i386/machine.c
index 6737366..a18e16e 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c

@@ -367,8 +367,12 @@
 
     cpu_breakpoint_remove_all(cs, BP_CPU);
     cpu_watchpoint_remove_all(cs, BP_CPU);
-    for (i = 0; i < DR7_MAX_BP; i++) {
-        hw_breakpoint_insert(env, i);
+    {
+        /* Indicate all breakpoints disabled, as they are, then
+           let the helper re-enable them.  */
+        target_ulong dr7 = env->dr[7];
+        env->dr[7] = dr7 & ~(DR7_GLOBAL_BP_MASK | DR7_LOCAL_BP_MASK);
+        cpu_x86_update_dr7(env, dr7);
     }
     tlb_flush(cs, 1);
 

diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 1cbe559..20ee892 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c

@@ -501,13 +501,7 @@
 #ifndef CONFIG_USER_ONLY
     /* reset local breakpoints */
     if (env->dr[7] & DR7_LOCAL_BP_MASK) {
-        for (i = 0; i < DR7_MAX_BP; i++) {
-            if (hw_local_breakpoint_enabled(env->dr[7], i) &&
-                !hw_global_breakpoint_enabled(env->dr[7], i)) {
-                hw_breakpoint_remove(env, i);
-            }
-        }
-        env->dr[7] &= ~DR7_LOCAL_BP_MASK;
+        cpu_x86_update_dr7(env, env->dr[7] & ~DR7_LOCAL_BP_MASK);
     }
 #endif
 }

diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c
index 02e24b9..c272a98 100644
--- a/target-i386/smm_helper.c
+++ b/target-i386/smm_helper.c

@@ -266,7 +266,7 @@
 
     val = x86_ldl_phys(cs, sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = x86_ldl_phys(cs, sm_state + 0x7f00) & ~0x7fff;
+        env->smbase = x86_ldl_phys(cs, sm_state + 0x7f00);
     }
 #else
     cpu_x86_update_cr0(env, x86_ldl_phys(cs, sm_state + 0x7ffc));
@@ -319,7 +319,7 @@
 
     val = x86_ldl_phys(cs, sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = x86_ldl_phys(cs, sm_state + 0x7ef8) & ~0x7fff;
+        env->smbase = x86_ldl_phys(cs, sm_state + 0x7ef8);
     }
 #endif
     if ((env->hflags2 & HF2_SMM_INSIDE_NMI_MASK) == 0) {

diff --git a/target-i386/translate.c b/target-i386/translate.c
index ef10e68..b400d24 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c

@@ -1154,6 +1154,19 @@
     gen_op_add_reg_T0(s->aflag, R_EDI);
 }
 
+static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
+{
+    if (s->flags & HF_IOBPT_MASK) {
+        TCGv_i32 t_size = tcg_const_i32(1 << ot);
+        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
+
+        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
+        tcg_temp_free_i32(t_size);
+        tcg_temp_free(t_next);
+    }
+}
+
+
 static inline void gen_ins(DisasContext *s, TCGMemOp ot)
 {
     if (s->tb->cflags & CF_USE_ICOUNT) {
@@ -1170,6 +1183,7 @@
     gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_bpt_io(s, cpu_tmp2_i32, ot);
     if (s->tb->cflags & CF_USE_ICOUNT) {
         gen_io_end();
     }
@@ -1187,9 +1201,9 @@
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
     tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
     gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
-
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
+    gen_bpt_io(s, cpu_tmp2_i32, ot);
     if (s->tb->cflags & CF_USE_ICOUNT) {
         gen_io_end();
     }
@@ -2418,7 +2432,7 @@
 {
     int i;
     gen_op_movl_A0_reg(R_ESP);
-    gen_op_addl_A0_im(-8 << s->dflag);
+    gen_op_addl_A0_im(-(8 << s->dflag));
     if (!s->ss32)
         tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
     tcg_gen_mov_tl(cpu_T[1], cpu_A0);
@@ -6269,6 +6283,7 @@
         tcg_gen_movi_i32(cpu_tmp2_i32, val);
         gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
         gen_op_mov_reg_v(ot, R_EAX, cpu_T[1]);
+        gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6289,6 +6304,7 @@
         tcg_gen_movi_i32(cpu_tmp2_i32, val);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+        gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6306,6 +6322,7 @@
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
         gen_op_mov_reg_v(ot, R_EAX, cpu_T[1]);
+        gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6325,6 +6342,7 @@
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+        gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -7609,18 +7627,20 @@
                 ot = MO_64;
             else
                 ot = MO_32;
-            /* XXX: do it dynamically with CR4.DE bit */
-            if (reg == 4 || reg == 5 || reg >= 8)
+            if (reg >= 8) {
                 goto illegal_op;
+            }
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
                 gen_op_mov_v_reg(ot, cpu_T[0], rm);
-                gen_helper_movl_drN_T0(cpu_env, tcg_const_i32(reg), cpu_T[0]);
+                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
+                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
-                tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,dr[reg]));
+                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
+                gen_helper_get_dr(cpu_T[0], cpu_env, cpu_tmp2_i32);
                 gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             }
         }
@@ -7942,6 +7962,11 @@
                                          tb->flags & HF_RF_MASK
                                          ? BP_GDB : BP_ANY))) {
             gen_debug(dc, pc_ptr - dc->cs_base);
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            pc_ptr += 1;
             goto done_generating;
         }
         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {

diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index c61ad0f..fa5b0b9 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c

@@ -1078,6 +1078,11 @@
             tcg_gen_movi_tl(cpu_pc, dc->pc);
             t_gen_raise_exception(dc, EXCP_DEBUG);
             dc->is_jmp = DISAS_UPDATE;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc->pc += 4;
             break;
         }
 

diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 5995cce..41ae2c6 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c

@@ -3004,6 +3004,11 @@
         if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
             gen_exception(dc, dc->pc, EXCP_DEBUG);
             dc->is_jmp = DISAS_JUMP;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc->pc += 2;
             break;
         }
 

diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index a9c5010..154b9d6 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c

@@ -1693,6 +1693,11 @@
         if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
             t_gen_raise_exception(dc, EXCP_DEBUG);
             dc->is_jmp = DISAS_UPDATE;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc->pc += 4;
             break;
         }
 

diff --git a/target-mips/cpu.c b/target-mips/cpu.c
index 37880d2..639a24b 100644
--- a/target-mips/cpu.c
+++ b/target-mips/cpu.c

@@ -53,12 +53,15 @@
     CPUMIPSState *env = &cpu->env;
     bool has_work = false;
 
-    /* It is implementation dependent if non-enabled interrupts
-       wake-up the CPU, however most of the implementations only
+    /* Prior to MIPS Release 6 it is implementation dependent if non-enabled
+       interrupts wake-up the CPU, however most of the implementations only
        check for interrupts that can be taken. */
     if ((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
         cpu_mips_hw_interrupts_pending(env)) {
-        has_work = true;
+        if (cpu_mips_hw_interrupts_enabled(env) ||
+            (env->insn_flags & ISA_MIPS32R6)) {
+            has_work = true;
+        }
     }
 
     /* MIPS-MT has the ability to halt the CPU.  */

diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index f32a0fd..fa919c1 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h

@@ -469,6 +469,7 @@
 #define CP0C5_CV         29
 #define CP0C5_EVA        28
 #define CP0C5_MSAEn      27
+#define CP0C5_XNP        13
 #define CP0C5_UFE        9
 #define CP0C5_FRE        8
 #define CP0C5_SBRI       6
@@ -637,23 +638,24 @@
     return env->hflags & MIPS_HFLAG_KSU;
 }
 
-static inline int cpu_mips_hw_interrupts_pending(CPUMIPSState *env)
+static inline bool cpu_mips_hw_interrupts_enabled(CPUMIPSState *env)
 {
-    int32_t pending;
-    int32_t status;
-    int r;
-
-    if (!(env->CP0_Status & (1 << CP0St_IE)) ||
-        (env->CP0_Status & (1 << CP0St_EXL)) ||
-        (env->CP0_Status & (1 << CP0St_ERL)) ||
+    return (env->CP0_Status & (1 << CP0St_IE)) &&
+        !(env->CP0_Status & (1 << CP0St_EXL)) &&
+        !(env->CP0_Status & (1 << CP0St_ERL)) &&
+        !(env->hflags & MIPS_HFLAG_DM) &&
         /* Note that the TCStatus IXMT field is initialized to zero,
            and only MT capable cores can set it to one. So we don't
            need to check for MT capabilities here.  */
-        (env->active_tc.CP0_TCStatus & (1 << CP0TCSt_IXMT)) ||
-        (env->hflags & MIPS_HFLAG_DM)) {
-        /* Interrupts are disabled */
-        return 0;
-    }
+        !(env->active_tc.CP0_TCStatus & (1 << CP0TCSt_IXMT));
+}
+
+/* Check if there is pending and not masked out interrupt */
+static inline bool cpu_mips_hw_interrupts_pending(CPUMIPSState *env)
+{
+    int32_t pending;
+    int32_t status;
+    bool r;
 
     pending = env->CP0_Cause & CP0Ca_IP_mask;
     status = env->CP0_Status & CP0Ca_IP_mask;
@@ -667,7 +669,7 @@
         /* A MIPS configured with compatibility or VInt (Vectored Interrupts)
            treats the pending lines as individual interrupt lines, the status
            lines are individual masks.  */
-        r = pending & status;
+        r = (pending & status) != 0;
     }
     return r;
 }
@@ -1000,7 +1002,12 @@
 
     if (env->insn_flags & ISA_MIPS32R6) {
         bool has_supervisor = extract32(mask, CP0St_KSU, 2) == 0x3;
-
+#if defined(TARGET_MIPS64)
+        uint32_t ksux = (1 << CP0St_KX) & val;
+        ksux |= (ksux >> 1) & val; /* KX = 0 forces SX to be 0 */
+        ksux |= (ksux >> 1) & val; /* SX = 0 forces UX to be 0 */
+        val = (val & ~(7 << CP0St_UX)) | ksux;
+#endif
         if (has_supervisor && extract32(val, CP0St_KSU, 2) == 0x3) {
             mask &= ~(3 << CP0St_KSU);
         }

diff --git a/target-mips/helper.c b/target-mips/helper.c
index 01c4461..b3fe816 100644
--- a/target-mips/helper.c
+++ b/target-mips/helper.c

@@ -293,9 +293,10 @@
         (env->CP0_EntryHi & 0xFF) | (address & (TARGET_PAGE_MASK << 1));
 #if defined(TARGET_MIPS64)
     env->CP0_EntryHi &= env->SEGMask;
-    env->CP0_XContext = (env->CP0_XContext & ((~0ULL) << (env->SEGBITS - 7))) |
-                        ((address & 0xC00000000000ULL) >> (55 - env->SEGBITS)) |
-                        ((address & ((1ULL << env->SEGBITS) - 1) & 0xFFFFFFFFFFFFE000ULL) >> 9);
+    env->CP0_XContext =
+        /* PTEBase */   (env->CP0_XContext & ((~0ULL) << (env->SEGBITS - 7))) |
+        /* R */         (extract64(address, 62, 2) << (env->SEGBITS - 9)) |
+        /* BadVPN2 */   (extract64(address, 13, env->SEGBITS - 13) << 4);
 #endif
     cs->exception_index = exception;
     env->error_code = error_code;
@@ -759,7 +760,8 @@
         MIPSCPU *cpu = MIPS_CPU(cs);
         CPUMIPSState *env = &cpu->env;
 
-        if (cpu_mips_hw_interrupts_pending(env)) {
+        if (cpu_mips_hw_interrupts_enabled(env) &&
+            cpu_mips_hw_interrupts_pending(env)) {
             /* Raise it */
             cs->exception_index = EXCP_EXT_INTERRUPT;
             env->error_code = 0;

diff --git a/target-mips/helper.h b/target-mips/helper.h
index d8cc766..95b9149 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h

@@ -358,6 +358,8 @@
 DEF_HELPER_1(rdhwr_synci_step, tl, env)
 DEF_HELPER_1(rdhwr_cc, tl, env)
 DEF_HELPER_1(rdhwr_ccres, tl, env)
+DEF_HELPER_1(rdhwr_performance, tl, env)
+DEF_HELPER_1(rdhwr_xnp, tl, env)
 DEF_HELPER_2(pmon, void, env, int)
 DEF_HELPER_1(wait, void, env)
 

diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 6739fff..056d53b 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c

@@ -1357,6 +1357,13 @@
 {
     uint32_t mask = 0x0000000F;
 
+    if ((env->CP0_Config1 & (1 << CP0C1_PC)) &&
+        (env->insn_flags & ISA_MIPS32R6)) {
+        mask |= (1 << 4);
+    }
+    if (env->insn_flags & ISA_MIPS32R6) {
+        mask |= (1 << 5);
+    }
     if (env->CP0_Config3 & (1 << CP0C3_ULRI)) {
         mask |= (1 << 29);
 
@@ -2185,53 +2192,52 @@
 }
 #endif /* !CONFIG_USER_ONLY */
 
+static inline void check_hwrena(CPUMIPSState *env, int reg)
+{
+    if ((env->hflags & MIPS_HFLAG_CP0) || (env->CP0_HWREna & (1 << reg))) {
+        return;
+    }
+    do_raise_exception(env, EXCP_RI, GETPC());
+}
+
 target_ulong helper_rdhwr_cpunum(CPUMIPSState *env)
 {
-    if ((env->hflags & MIPS_HFLAG_CP0) ||
-        (env->CP0_HWREna & (1 << 0)))
-        return env->CP0_EBase & 0x3ff;
-    else
-        do_raise_exception(env, EXCP_RI, GETPC());
-
-    return 0;
+    check_hwrena(env, 0);
+    return env->CP0_EBase & 0x3ff;
 }
 
 target_ulong helper_rdhwr_synci_step(CPUMIPSState *env)
 {
-    if ((env->hflags & MIPS_HFLAG_CP0) ||
-        (env->CP0_HWREna & (1 << 1)))
-        return env->SYNCI_Step;
-    else
-        do_raise_exception(env, EXCP_RI, GETPC());
-
-    return 0;
+    check_hwrena(env, 1);
+    return env->SYNCI_Step;
 }
 
 target_ulong helper_rdhwr_cc(CPUMIPSState *env)
 {
-    if ((env->hflags & MIPS_HFLAG_CP0) ||
-        (env->CP0_HWREna & (1 << 2))) {
+    check_hwrena(env, 2);
 #ifdef CONFIG_USER_ONLY
-        return env->CP0_Count;
+    return env->CP0_Count;
 #else
-        return (int32_t)cpu_mips_get_count(env);
+    return (int32_t)cpu_mips_get_count(env);
 #endif
-    } else {
-        do_raise_exception(env, EXCP_RI, GETPC());
-    }
-
-    return 0;
 }
 
 target_ulong helper_rdhwr_ccres(CPUMIPSState *env)
 {
-    if ((env->hflags & MIPS_HFLAG_CP0) ||
-        (env->CP0_HWREna & (1 << 3)))
-        return env->CCRes;
-    else
-        do_raise_exception(env, EXCP_RI, GETPC());
+    check_hwrena(env, 3);
+    return env->CCRes;
+}
 
-    return 0;
+target_ulong helper_rdhwr_performance(CPUMIPSState *env)
+{
+    check_hwrena(env, 4);
+    return env->CP0_Performance0;
+}
+
+target_ulong helper_rdhwr_xnp(CPUMIPSState *env)
+{
+    check_hwrena(env, 5);
+    return (env->CP0_Config5 >> CP0C5_XNP) & 1;
 }
 
 void helper_pmon(CPUMIPSState *env, int function)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index 897839c..5626647 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c

@@ -323,6 +323,7 @@
     OPC_TLTIU    = (0x0B << 16) | OPC_REGIMM,
     OPC_TEQI     = (0x0C << 16) | OPC_REGIMM,
     OPC_TNEI     = (0x0E << 16) | OPC_REGIMM,
+    OPC_SIGRIE   = (0x17 << 16) | OPC_REGIMM,
     OPC_SYNCI    = (0x1F << 16) | OPC_REGIMM,
 
     OPC_DAHI     = (0x06 << 16) | OPC_REGIMM,
@@ -10333,7 +10334,7 @@
     }
 }
 
-static void gen_rdhwr(DisasContext *ctx, int rt, int rd)
+static void gen_rdhwr(DisasContext *ctx, int rt, int rd, int sel)
 {
     TCGv t0;
 
@@ -10361,6 +10362,22 @@
         gen_helper_rdhwr_ccres(t0, cpu_env);
         gen_store_gpr(t0, rt);
         break;
+    case 4:
+        check_insn(ctx, ISA_MIPS32R6);
+        if (sel != 0) {
+            /* Performance counter registers are not implemented other than
+             * control register 0.
+             */
+            generate_exception(ctx, EXCP_RI);
+        }
+        gen_helper_rdhwr_performance(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+    case 5:
+        check_insn(ctx, ISA_MIPS32R6);
+        gen_helper_rdhwr_xnp(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
     case 29:
 #if defined(CONFIG_USER_ONLY)
         tcg_gen_ld_tl(t0, cpu_env,
@@ -11979,6 +11996,7 @@
     ROTR = 0x3,
     SELEQZ = 0x5,
     SELNEZ = 0x6,
+    R6_RDHWR = 0x7,
 
     SLLV = 0x0,
     SRLV = 0x1,
@@ -12009,11 +12027,13 @@
     MODU = 0x7,
 
     /* The following can be distinguished by their lower 6 bits. */
+    BREAK32 = 0x07,
     INS = 0x0c,
     LSA = 0x0f,
     ALIGN = 0x1f,
     EXT = 0x2c,
-    POOL32AXF = 0x3c
+    POOL32AXF = 0x3c,
+    SIGRIE = 0x3f
 };
 
 /* POOL32AXF encoding of minor opcode field extension */
@@ -12931,7 +12951,8 @@
             gen_cl(ctx, mips32_op, rt, rs);
             break;
         case RDHWR:
-            gen_rdhwr(ctx, rt, rs);
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
+            gen_rdhwr(ctx, rt, rs, 0);
             break;
         case WSBH:
             gen_bshfl(ctx, OPC_WSBH, rs, rt);
@@ -13486,6 +13507,10 @@
                 check_insn(ctx, ISA_MIPS32R6);
                 gen_cond_move(ctx, OPC_SELNEZ, rd, rs, rt);
                 break;
+            case R6_RDHWR:
+                check_insn(ctx, ISA_MIPS32R6);
+                gen_rdhwr(ctx, rt, rs, extract32(ctx->opcode, 11, 3));
+                break;
             default:
                 goto pool32a_invalid;
             }
@@ -13629,9 +13654,13 @@
         case POOL32AXF:
             gen_pool32axf(env, ctx, rt, rs);
             break;
-        case 0x07:
+        case BREAK32:
             generate_exception_end(ctx, EXCP_BREAK);
             break;
+        case SIGRIE:
+            check_insn(ctx, ISA_MIPS32R6);
+            generate_exception_end(ctx, EXCP_RI);
+            break;
         default:
         pool32a_invalid:
                 MIPS_INVAL("pool32a");
@@ -17732,7 +17761,7 @@
         break;
 #endif
     case OPC_RDHWR:
-        gen_rdhwr(ctx, rt, rd);
+        gen_rdhwr(ctx, rt, rd, extract32(ctx->opcode, 6, 3));
         break;
     case OPC_FORK:
         check_insn(ctx, ASE_MT);
@@ -18950,6 +18979,10 @@
             check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_trap(ctx, op1, rs, -1, imm);
             break;
+        case OPC_SIGRIE:
+            check_insn(ctx, ISA_MIPS32R6);
+            generate_exception_end(ctx, EXCP_RI);
+            break;
         case OPC_SYNCI:
             check_insn(ctx, ISA_MIPS32R2);
             /* Break the TB to be able to sync copied instructions
@@ -19594,8 +19627,10 @@
             save_cpu_state(&ctx, 1);
             ctx.bstate = BS_BRANCH;
             gen_helper_raise_exception_debug(cpu_env);
-            /* Include the breakpoint location or the tb won't
-             * be flushed when it must be.  */
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
             ctx.pc += 4;
             goto done_generating;
         }

diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c
index 1b45884..bb33c7c 100644
--- a/target-mips/translate_init.c
+++ b/target-mips/translate_init.c

@@ -447,7 +447,7 @@
                        (1 << CP0C3_RXI) | (1U << CP0C3_M),
         .CP0_Config4 = MIPS_CONFIG4 | (0xfc << CP0C4_KScrExist) |
                        (3 << CP0C4_IE) | (1U << CP0C4_M),
-        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_LLB),
+        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_LLB),
         .CP0_Config5_rw_bitmask = (1 << CP0C5_SBRI) | (1 << CP0C5_FRE) |
                                   (1 << CP0C5_UFE),
         .CP0_LLAddr_rw_bitmask = 0,
@@ -665,7 +665,7 @@
                        (1 << CP0C3_RXI) | (1 << CP0C3_LPA),
         .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) |
                        (0xfc << CP0C4_KScrExist),
-        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_LLB),
+        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_LLB),
         .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) |
                                   (1 << CP0C5_FRE) | (1 << CP0C5_UFE),
         .CP0_LLAddr_rw_bitmask = 0,

diff --git a/target-moxie/translate.c b/target-moxie/translate.c
index f84841e..6dedcb7 100644
--- a/target-moxie/translate.c
+++ b/target-moxie/translate.c

@@ -848,6 +848,11 @@
             tcg_gen_movi_i32(cpu_pc, ctx.pc);
             gen_helper_debug(cpu_env);
             ctx.bstate = BS_EXCP;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            ctx.pc += 2;
             goto done_generating;
         }
 

diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index b66fde1..606490a 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c

@@ -1665,6 +1665,11 @@
             tcg_gen_movi_tl(cpu_pc, dc->pc);
             gen_exception(dc, EXCP_DEBUG);
             dc->is_jmp = DISAS_UPDATE;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc->pc += 4;
             break;
         }
 

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 98ce5a7..b34aed6 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h

@@ -117,14 +117,14 @@
 #define POWERPC_MMU_AMR      0x00040000
     /* 64 bits PowerPC MMU                                     */
     POWERPC_MMU_64B        = POWERPC_MMU_64 | 0x00000001,
+    /* Architecture 2.03 and later (has LPCR) */
+    POWERPC_MMU_2_03       = POWERPC_MMU_64 | 0x00000002,
     /* Architecture 2.06 variant                               */
     POWERPC_MMU_2_06       = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
                              | POWERPC_MMU_AMR | 0x00000003,
-    /* Architecture 2.06 "degraded" (no 1T segments)           */
-    POWERPC_MMU_2_06a      = POWERPC_MMU_64 | POWERPC_MMU_AMR
-                             | 0x00000003,
-    /* Architecture 2.06 "degraded" (no 1T segments or AMR)    */
-    POWERPC_MMU_2_06d      = POWERPC_MMU_64 | 0x00000003,
+    /* Architecture 2.07 variant                               */
+    POWERPC_MMU_2_07       = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
+                             | POWERPC_MMU_AMR | 0x00000004,
 #endif /* defined(TARGET_PPC64) */
 };
 
@@ -1073,6 +1073,7 @@
     uint64_t insns_flags2;
 #if defined(TARGET_PPC64)
     struct ppc_segment_page_sizes sps;
+    bool ci_large_pages;
 #endif
 
 #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 38aa927..ac70f08 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c

@@ -259,7 +259,8 @@
             info->flags |= KVM_PPC_1T_SEGMENTS;
         }
 
-        if (env->mmu_model == POWERPC_MMU_2_06) {
+        if (env->mmu_model == POWERPC_MMU_2_06 ||
+            env->mmu_model == POWERPC_MMU_2_07) {
             info->slb_size = 32;
         } else {
             info->slb_size = 64;
@@ -272,8 +273,9 @@
         info->sps[i].enc[0].pte_enc = 0;
         i++;
 
-        /* 64K on MMU 2.06 */
-        if (env->mmu_model == POWERPC_MMU_2_06) {
+        /* 64K on MMU 2.06 and later */
+        if (env->mmu_model == POWERPC_MMU_2_06 ||
+            env->mmu_model == POWERPC_MMU_2_07) {
             info->sps[i].page_shift = 16;
             info->sps[i].slb_enc = 0x110;
             info->sps[i].enc[0].page_shift = 16;
@@ -412,6 +414,13 @@
     /* Convert to QEMU form */
     memset(&env->sps, 0, sizeof(env->sps));
 
+    /* If we have HV KVM, we need to forbid CI large pages if our
+     * host page size is smaller than 64K.
+     */
+    if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
+        env->ci_large_pages = getpagesize() >= 0x10000;
+    }
+
     /*
      * XXX This loop should be an entry wide AND of the capabilities that
      *     the selected CPU has with the capabilities that KVM supports.
@@ -2070,7 +2079,7 @@
 }
 
 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
-                              bool vfio_accel)
+                              bool need_vfio)
 {
     struct kvm_create_spapr_tce args = {
         .liobn = liobn,
@@ -2084,7 +2093,7 @@
      * destroying the table, which the upper layers -will- do
      */
     *pfd = -1;
-    if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
+    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
         return NULL;
     }
 

diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 470f6d6..309cbe0 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h

@@ -36,7 +36,7 @@
 off_t kvmppc_alloc_rma(void **rma);
 bool kvmppc_spapr_use_multitce(void);
 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
-                              bool vfio_accel);
+                              bool need_vfio);
 int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
 int kvmppc_reset_htab(int shift_hint);
 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);

diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index 527c6ad..e52d0e5 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c

@@ -1293,9 +1293,9 @@
         break;
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
     case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_06a:
-    case POWERPC_MMU_2_06d:
+    case POWERPC_MMU_2_07:
         dump_slb(f, cpu_fprintf, env);
         break;
 #endif
@@ -1433,9 +1433,9 @@
     switch (env->mmu_model) {
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
     case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_06a:
-    case POWERPC_MMU_2_06d:
+    case POWERPC_MMU_2_07:
         return ppc_hash64_get_phys_page_debug(env, addr);
 #endif
 
@@ -1937,9 +1937,9 @@
     case POWERPC_MMU_601:
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
     case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_06a:
-    case POWERPC_MMU_2_06d:
+    case POWERPC_MMU_2_07:
 #endif /* defined(TARGET_PPC64) */
         tlb_flush(CPU(cpu), 1);
         break;
@@ -2011,9 +2011,9 @@
         break;
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
     case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_06a:
-    case POWERPC_MMU_2_06d:
+    case POWERPC_MMU_2_07:
         /* tlbie invalidate TLBs for all segments */
         /* XXX: given the fact that there are too many segments to invalidate,
          *      and we still don't have a tlb_flush_mask(env, n, mask) in QEMU,

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index c2bc1a7..308ad68 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c

@@ -11327,9 +11327,9 @@
     case POWERPC_MMU_SOFT_74xx:
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
     case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_06a:
-    case POWERPC_MMU_2_06d:
+    case POWERPC_MMU_2_07:
 #endif
         cpu_fprintf(f, " SDR1 " TARGET_FMT_lx "   DAR " TARGET_FMT_lx
                        "  DSISR " TARGET_FMT_lx "\n", env->spr[SPR_SDR1],
@@ -11488,6 +11488,11 @@
 
         if (unlikely(cpu_breakpoint_test(cs, ctx.nip, BP_ANY))) {
             gen_debug_exception(ctxp);
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            ctx.nip += 4;
             break;
         }
 

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index b541473..4934c80 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c

@@ -7864,6 +7864,7 @@
         gen_spr_book3s_ids(env);
         gen_spr_amr(env);
         gen_spr_book3s_purr(env);
+        env->ci_large_pages = true;
         break;
     default:
         g_assert_not_reached();
@@ -8019,7 +8020,7 @@
                     (1ull << MSR_DR) |
                     (1ull << MSR_PMM) |
                     (1ull << MSR_RI);
-    pcc->mmu_model = POWERPC_MMU_64B;
+    pcc->mmu_model = POWERPC_MMU_2_03;
 #if defined(CONFIG_SOFTMMU)
     pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
 #endif
@@ -8243,7 +8244,7 @@
                     (1ull << MSR_PMM) |
                     (1ull << MSR_RI) |
                     (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_2_06;
+    pcc->mmu_model = POWERPC_MMU_2_07;
 #if defined(CONFIG_SOFTMMU)
     pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
 #endif

diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 05d51fe..c79a2cb 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c

@@ -5360,6 +5360,11 @@
         if (unlikely(cpu_breakpoint_test(cs, dc.pc, BP_ANY))) {
             status = EXIT_PC_STALE;
             do_debug = true;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc.pc += 2;
             break;
         }
 

diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index f764bc2..7bc6216 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c

@@ -1855,6 +1855,11 @@
             tcg_gen_movi_i32(cpu_pc, ctx.pc);
             gen_helper_debug(cpu_env);
             ctx.bstate = BS_BRANCH;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            ctx.pc += 2;
             break;
         }
 

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index b59742a..41a3319 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c

@@ -5247,6 +5247,7 @@
             tcg_gen_insn_start(dc->pc, dc->npc);
         }
         num_insns++;
+        last_pc = dc->pc;
 
         if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
             if (dc->pc != pc_start) {
@@ -5262,7 +5263,6 @@
             gen_io_start();
         }
 
-        last_pc = dc->pc;
         insn = cpu_ldl_code(env, dc->pc);
 
         disas_sparc_insn(dc, insn);

diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index 48f89fb..d2f92f0 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c

@@ -1917,9 +1917,11 @@
             gen_set_pc_im(dc->pc);
             gen_exception(EXCP_DEBUG);
             dc->is_jmp = DISAS_JUMP;
-            /* Advance PC so that clearing the breakpoint will
-               invalidate this TB.  */
-            dc->pc += 2; /* FIXME */
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc->pc += 4;
             goto done_generating;
         }
 

diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index aa0c527..06b0163 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c

@@ -3088,6 +3088,11 @@
             tcg_gen_movi_i32(cpu_pc, dc.pc);
             gen_exception(&dc, EXCP_DEBUG);
             dc.is_jmp = DISAS_UPDATE;
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            dc.pc += 2;
             break;
         }
 

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 4305af9..79e052f 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c

@@ -288,16 +288,24 @@
     OPC_SRLV     = OPC_SPECIAL | 0x06,
     OPC_ROTRV    = OPC_SPECIAL | (0x01 <<  6) | 0x06,
     OPC_SRAV     = OPC_SPECIAL | 0x07,
-    OPC_JR       = OPC_SPECIAL | 0x08,
+    OPC_JR_R5    = OPC_SPECIAL | 0x08,
     OPC_JALR     = OPC_SPECIAL | 0x09,
     OPC_MOVZ     = OPC_SPECIAL | 0x0A,
     OPC_MOVN     = OPC_SPECIAL | 0x0B,
     OPC_MFHI     = OPC_SPECIAL | 0x10,
     OPC_MFLO     = OPC_SPECIAL | 0x12,
     OPC_MULT     = OPC_SPECIAL | 0x18,
+    OPC_MUL_R6   = OPC_SPECIAL | (0x02 <<  6) | 0x18,
+    OPC_MUH      = OPC_SPECIAL | (0x03 <<  6) | 0x18,
     OPC_MULTU    = OPC_SPECIAL | 0x19,
+    OPC_MULU     = OPC_SPECIAL | (0x02 <<  6) | 0x19,
+    OPC_MUHU     = OPC_SPECIAL | (0x03 <<  6) | 0x19,
     OPC_DIV      = OPC_SPECIAL | 0x1A,
+    OPC_DIV_R6   = OPC_SPECIAL | (0x02 <<  6) | 0x1A,
+    OPC_MOD      = OPC_SPECIAL | (0x03 <<  6) | 0x1A,
     OPC_DIVU     = OPC_SPECIAL | 0x1B,
+    OPC_DIVU_R6  = OPC_SPECIAL | (0x02 <<  6) | 0x1B,
+    OPC_MODU     = OPC_SPECIAL | (0x03 <<  6) | 0x1B,
     OPC_ADDU     = OPC_SPECIAL | 0x21,
     OPC_SUBU     = OPC_SPECIAL | 0x23,
     OPC_AND      = OPC_SPECIAL | 0x24,
@@ -306,13 +314,15 @@
     OPC_NOR      = OPC_SPECIAL | 0x27,
     OPC_SLT      = OPC_SPECIAL | 0x2A,
     OPC_SLTU     = OPC_SPECIAL | 0x2B,
+    OPC_SELEQZ   = OPC_SPECIAL | 0x35,
+    OPC_SELNEZ   = OPC_SPECIAL | 0x37,
 
     OPC_REGIMM   = 0x01 << 26,
     OPC_BLTZ     = OPC_REGIMM | (0x00 << 16),
     OPC_BGEZ     = OPC_REGIMM | (0x01 << 16),
 
     OPC_SPECIAL2 = 0x1c << 26,
-    OPC_MUL      = OPC_SPECIAL2 | 0x002,
+    OPC_MUL_R5   = OPC_SPECIAL2 | 0x002,
 
     OPC_SPECIAL3 = 0x1f << 26,
     OPC_EXT      = OPC_SPECIAL3 | 0x000,
@@ -320,6 +330,15 @@
     OPC_WSBH     = OPC_SPECIAL3 | 0x0a0,
     OPC_SEB      = OPC_SPECIAL3 | 0x420,
     OPC_SEH      = OPC_SPECIAL3 | 0x620,
+
+    /* MIPS r6 doesn't have JR, JALR should be used instead */
+    OPC_JR       = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5,
+
+    /*
+     * MIPS r6 replaces MUL with an alternative encoding which is
+     * backwards-compatible at the assembly level.
+     */
+    OPC_MUL      = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5,
 } MIPSInsn;
 
 /*
@@ -841,13 +860,20 @@
 }
 
 static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
-                            TCGReg c1, TCGReg c2, TCGReg v)
+                            TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2)
 {
-    MIPSInsn m_opc = OPC_MOVN;
+    bool eqz = false;
+
+    /* If one of the values is zero, put it last to match SEL*Z instructions */
+    if (use_mips32r6_instructions && v1 == 0) {
+        v1 = v2;
+        v2 = 0;
+        cond = tcg_invert_cond(cond);
+    }
 
     switch (cond) {
     case TCG_COND_EQ:
-        m_opc = OPC_MOVZ;
+        eqz = true;
         /* FALLTHRU */
     case TCG_COND_NE:
         if (c2 != 0) {
@@ -860,14 +886,32 @@
         /* Minimize code size by preferring a compare not requiring INV.  */
         if (mips_cmp_map[cond] & MIPS_CMP_INV) {
             cond = tcg_invert_cond(cond);
-            m_opc = OPC_MOVZ;
+            eqz = true;
         }
         tcg_out_setcond(s, cond, TCG_TMP0, c1, c2);
         c1 = TCG_TMP0;
         break;
     }
 
-    tcg_out_opc_reg(s, m_opc, ret, v, c1);
+    if (use_mips32r6_instructions) {
+        MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ;
+        MIPSInsn m_opc_f = eqz ? OPC_SELNEZ : OPC_SELEQZ;
+
+        if (v2 != 0) {
+            tcg_out_opc_reg(s, m_opc_f, TCG_TMP1, v2, c1);
+        }
+        tcg_out_opc_reg(s, m_opc_t, ret, v1, c1);
+        if (v2 != 0) {
+            tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1);
+        }
+    } else {
+        MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN;
+
+        tcg_out_opc_reg(s, m_opc, ret, v1, c1);
+
+        /* This should be guaranteed via constraints */
+        tcg_debug_assert(v2 == ret);
+    }
 }
 
 static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail)
@@ -1445,21 +1489,45 @@
         i1 = OPC_MULT, i2 = OPC_MFLO;
         goto do_hilo1;
     case INDEX_op_mulsh_i32:
+        if (use_mips32r6_instructions) {
+            tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2);
+            break;
+        }
         i1 = OPC_MULT, i2 = OPC_MFHI;
         goto do_hilo1;
     case INDEX_op_muluh_i32:
+        if (use_mips32r6_instructions) {
+            tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2);
+            break;
+        }
         i1 = OPC_MULTU, i2 = OPC_MFHI;
         goto do_hilo1;
     case INDEX_op_div_i32:
+        if (use_mips32r6_instructions) {
+            tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2);
+            break;
+        }
         i1 = OPC_DIV, i2 = OPC_MFLO;
         goto do_hilo1;
     case INDEX_op_divu_i32:
+        if (use_mips32r6_instructions) {
+            tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2);
+            break;
+        }
         i1 = OPC_DIVU, i2 = OPC_MFLO;
         goto do_hilo1;
     case INDEX_op_rem_i32:
+        if (use_mips32r6_instructions) {
+            tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2);
+            break;
+        }
         i1 = OPC_DIV, i2 = OPC_MFHI;
         goto do_hilo1;
     case INDEX_op_remu_i32:
+        if (use_mips32r6_instructions) {
+            tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2);
+            break;
+        }
         i1 = OPC_DIVU, i2 = OPC_MFHI;
     do_hilo1:
         tcg_out_opc_reg(s, i1, 0, a1, a2);
@@ -1536,7 +1604,7 @@
         break;
 
     case INDEX_op_movcond_i32:
-        tcg_out_movcond(s, args[5], a0, a1, a2, args[3]);
+        tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]);
         break;
 
     case INDEX_op_setcond_i32:
@@ -1592,8 +1660,10 @@
 
     { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
     { INDEX_op_mul_i32, { "r", "rZ", "rZ" } },
+#if !use_mips32r6_instructions
     { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } },
     { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } },
+#endif
     { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_div_i32, { "r", "rZ", "rZ" } },
@@ -1623,7 +1693,11 @@
     { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
 
     { INDEX_op_brcond_i32, { "rZ", "rZ" } },
+#if use_mips32r6_instructions
+    { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
+#else
     { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } },
+#endif
     { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
 

diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index f5ba52c..b1cda37 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h

@@ -96,6 +96,13 @@
 extern bool use_mips32r2_instructions;
 #endif
 
+/* MIPS32R6 instruction set detection */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
+#define use_mips32r6_instructions  1
+#else
+#define use_mips32r6_instructions  0
+#endif
+
 /* optional instructions */
 #define TCG_TARGET_HAS_div_i32          1
 #define TCG_TARGET_HAS_rem_i32          1
@@ -105,8 +112,8 @@
 #define TCG_TARGET_HAS_orc_i32          0
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_mulu2_i32        1
-#define TCG_TARGET_HAS_muls2_i32        1
+#define TCG_TARGET_HAS_mulu2_i32        (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muls2_i32        (!use_mips32r6_instructions)
 #define TCG_TARGET_HAS_muluh_i32        1
 #define TCG_TARGET_HAS_mulsh_i32        1
 

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 92ef719..2c72565 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c

@@ -700,14 +700,14 @@
 {
     int mb, me;
 
-    if ((c & 0xffff) == c) {
+    if (mask_operand(c, &mb, &me)) {
+        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
+    } else if ((c & 0xffff) == c) {
         tcg_out32(s, ANDI | SAI(src, dst, c));
         return;
     } else if ((c & 0xffff0000) == c) {
         tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
         return;
-    } else if (mask_operand(c, &mb, &me)) {
-        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
         tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
@@ -719,18 +719,18 @@
     int mb, me;
 
     assert(TCG_TARGET_REG_BITS == 64);
-    if ((c & 0xffff) == c) {
-        tcg_out32(s, ANDI | SAI(src, dst, c));
-        return;
-    } else if ((c & 0xffff0000) == c) {
-        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
-        return;
-    } else if (mask64_operand(c, &mb, &me)) {
+    if (mask64_operand(c, &mb, &me)) {
         if (mb == 0) {
             tcg_out_rld(s, RLDICR, dst, src, 0, me);
         } else {
             tcg_out_rld(s, RLDICL, dst, src, 0, mb);
         }
+    } else if ((c & 0xffff) == c) {
+        tcg_out32(s, ANDI | SAI(src, dst, c));
+        return;
+    } else if ((c & 0xffff0000) == c) {
+        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
+        return;
     } else {
         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
         tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
@@ -1239,11 +1239,36 @@
 
 void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 {
-    TCGContext s;
+    tcg_insn_unit i1, i2;
+    uint64_t pair;
+    intptr_t diff = addr - jmp_addr;
 
-    s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr;
-    tcg_out_b(&s, 0, (tcg_insn_unit *)addr);
-    flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s));
+    if (in_range_b(diff)) {
+        i1 = B | (diff & 0x3fffffc);
+        i2 = NOP;
+    } else if (USE_REG_RA) {
+        intptr_t lo, hi;
+        diff = addr - (uintptr_t)tb_ret_addr;
+        lo = (int16_t)diff;
+        hi = (int32_t)(diff - lo);
+        assert(diff == hi + lo);
+        i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16);
+        i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo);
+    } else {
+        assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr);
+        i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16);
+        i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr);
+    }
+#ifdef HOST_WORDS_BIGENDIAN
+    pair = (uint64_t)i1 << 32 | i2;
+#else
+    pair = (uint64_t)i2 << 32 | i1;
+#endif
+
+    /* ??? __atomic_store_8, presuming there's some way to do that
+       for 32-bit, otherwise this is good enough for 64-bit.  */
+    *(uint64_t *)jmp_addr = pair;
+    flush_icache_range(jmp_addr, jmp_addr + 8);
 }
 
 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
@@ -1855,12 +1880,10 @@
         if (USE_REG_RA) {
             ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr);
 
-            /* If we can use a direct branch, otherwise use the value in RA.
-               Note that the direct branch is always forward.  If it's in
-               range now, it'll still be in range after the movi.  Don't
-               bother about the 20 bytes where the test here fails but it
-               would succeed below.  */
-            if (!in_range_b(disp)) {
+            /* Use a direct branch if we can, otherwise use the value in RA.
+               Note that the direct branch is always backward, thus we need
+               to account for the possibility of 5 insns from the movi.  */
+            if (!in_range_b(disp - 20)) {
                 tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR);
                 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
                 tcg_out32(s, BCCTR | BO_ALWAYS);
@@ -1871,14 +1894,16 @@
         tcg_out_b(s, 0, tb_ret_addr);
         break;
     case INDEX_op_goto_tb:
-        if (s->tb_jmp_offset) {
-            /* Direct jump method.  */
-            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
-            s->code_ptr += 7;
-        } else {
-            /* Indirect jump method.  */
-            tcg_abort();
+        tcg_debug_assert(s->tb_jmp_offset);
+        /* Direct jump.  Ensure the next insns are 8-byte aligned. */
+        if ((uintptr_t)s->code_ptr & 7) {
+            tcg_out32(s, NOP);
         }
+        s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
+        /* To be replaced by either a branch+nop or a load into TMP1.  */
+        s->code_ptr += 2;
+        tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
+        tcg_out32(s, BCCTR | BO_ALWAYS);
         s->tb_next_offset[args[0]] = tcg_current_code_size(s);
         break;
     case INDEX_op_br:

diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index c6f9570..6d0410c 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h

@@ -173,18 +173,15 @@
 DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64))
 DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64))
 
+#define TLADDR_ARGS  (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
+#define DATA64_ARGS  (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
+
 /* QEMU specific */
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-DEF(insn_start, 0, 0, 2 * TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT)
-#else
-DEF(insn_start, 0, 0, TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT)
-#endif
+DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
+    TCG_OPF_NOT_PRESENT)
 DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
 DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
 
-#define TLADDR_ARGS    (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
-#define DATA64_ARGS  (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
-
 DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1,

diff --git a/tests/Makefile b/tests/Makefile
index 0531b30..0739bfe 100644
--- a/tests/Makefile
+++ b/tests/Makefile

@@ -150,6 +150,8 @@
 gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c
 check-qtest-pci-y += tests/intel-hda-test$(EXESUF)
 gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c
+check-qtest-pci-$(CONFIG_POSIX) += tests/ivshmem-test$(EXESUF)
+gcov-files-pci-y += hw/misc/ivshmem.c
 
 check-qtest-i386-y = tests/endianness-test$(EXESUF)
 check-qtest-i386-y += tests/fdc-test$(EXESUF)
@@ -444,8 +446,16 @@
 tests/test-bitops$(EXESUF): tests/test-bitops.o $(test-util-obj-y)
 tests/test-crypto-hash$(EXESUF): tests/test-crypto-hash.o $(test-crypto-obj-y)
 tests/test-crypto-cipher$(EXESUF): tests/test-crypto-cipher.o $(test-crypto-obj-y)
+
+tests/crypto-tls-x509-helpers.o-cflags := $(TASN1_CFLAGS)
+tests/crypto-tls-x509-helpers.o-libs := $(TASN1_LIBS)
+tests/pkix_asn1_tab.o-cflags := $(TASN1_CFLAGS)
+
+tests/test-crypto-tlscredsx509.o-cflags := $(TASN1_CFLAGS)
 tests/test-crypto-tlscredsx509$(EXESUF): tests/test-crypto-tlscredsx509.o \
 	tests/crypto-tls-x509-helpers.o tests/pkix_asn1_tab.o $(test-crypto-obj-y)
+
+tests/test-crypto-tlssession.o-cflags := $(TASN1_CFLAGS)
 tests/test-crypto-tlssession$(EXESUF): tests/test-crypto-tlssession.o \
 	tests/crypto-tls-x509-helpers.o tests/pkix_asn1_tab.o $(test-crypto-obj-y)
 
@@ -514,12 +524,12 @@
 tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
 tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y)
 tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y)
+tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
+tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o
 
 ifeq ($(CONFIG_POSIX),y)
 LIBS += -lutil
 endif
-LIBS += $(TEST_LIBS)
-CFLAGS += $(TEST_CFLAGS)
 
 # QTest rules
 

diff --git a/tests/fdc-test.c b/tests/fdc-test.c
index 416394f..b5a4696 100644
--- a/tests/fdc-test.c
+++ b/tests/fdc-test.c

@@ -304,9 +304,7 @@
     qmp_discard_response("{'execute':'change', 'arguments':{"
                          " 'device':'floppy0', 'target': %s, 'arg': 'raw' }}",
                          test_image);
-    qmp_discard_response(""); /* ignore event
-                                 (FIXME open -> open transition?!) */
-    qmp_discard_response(""); /* ignore event */
+    qmp_discard_response(""); /* ignore event (open -> close) */
 
     dir = inb(FLOPPY_BASE + reg_dir);
     assert_bit_set(dir, DSKCHG);

diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c
new file mode 100644
index 0000000..c8f0cf0
--- /dev/null
+++ b/tests/ivshmem-test.c

@@ -0,0 +1,491 @@
+/*
+ * QTest testcase for ivshmem
+ *
+ * Copyright (c) 2014 SUSE LINUX Products GmbH
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <glib.h>
+#include <glib/gstdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include "contrib/ivshmem-server/ivshmem-server.h"
+#include "libqos/pci-pc.h"
+#include "libqtest.h"
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#define TMPSHMSIZE (1 << 20)
+static char *tmpshm;
+static void *tmpshmem;
+static char *tmpdir;
+static char *tmpserver;
+
+static void save_fn(QPCIDevice *dev, int devfn, void *data)
+{
+    QPCIDevice **pdev = (QPCIDevice **) data;
+
+    *pdev = dev;
+}
+
+static QPCIDevice *get_device(void)
+{
+    QPCIDevice *dev;
+    QPCIBus *pcibus;
+
+    pcibus = qpci_init_pc();
+    qpci_device_foreach(pcibus, 0x1af4, 0x1110, save_fn, &dev);
+    g_assert(dev != NULL);
+
+    return dev;
+}
+
+typedef struct _IVState {
+    QTestState *qtest;
+    void *reg_base, *mem_base;
+    QPCIDevice *dev;
+} IVState;
+
+enum Reg {
+    INTRMASK = 0,
+    INTRSTATUS = 4,
+    IVPOSITION = 8,
+    DOORBELL = 12,
+};
+
+static const char* reg2str(enum Reg reg) {
+    switch (reg) {
+    case INTRMASK:
+        return "IntrMask";
+    case INTRSTATUS:
+        return "IntrStatus";
+    case IVPOSITION:
+        return "IVPosition";
+    case DOORBELL:
+        return "DoorBell";
+    default:
+        return NULL;
+    }
+}
+
+static inline unsigned in_reg(IVState *s, enum Reg reg)
+{
+    const char *name = reg2str(reg);
+    QTestState *qtest = global_qtest;
+    unsigned res;
+
+    global_qtest = s->qtest;
+    res = qpci_io_readl(s->dev, s->reg_base + reg);
+    g_test_message("*%s -> %x\n", name, res);
+    global_qtest = qtest;
+
+    return res;
+}
+
+static inline void out_reg(IVState *s, enum Reg reg, unsigned v)
+{
+    const char *name = reg2str(reg);
+    QTestState *qtest = global_qtest;
+
+    global_qtest = s->qtest;
+    g_test_message("%x -> *%s\n", v, name);
+    qpci_io_writel(s->dev, s->reg_base + reg, v);
+    global_qtest = qtest;
+}
+
+static void setup_vm_cmd(IVState *s, const char *cmd, bool msix)
+{
+    uint64_t barsize;
+
+    s->qtest = qtest_start(cmd);
+
+    s->dev = get_device();
+
+    /* FIXME: other bar order fails, mappings changes */
+    s->mem_base = qpci_iomap(s->dev, 2, &barsize);
+    g_assert_nonnull(s->mem_base);
+    g_assert_cmpuint(barsize, ==, TMPSHMSIZE);
+
+    if (msix) {
+        qpci_msix_enable(s->dev);
+    }
+
+    s->reg_base = qpci_iomap(s->dev, 0, &barsize);
+    g_assert_nonnull(s->reg_base);
+    g_assert_cmpuint(barsize, ==, 256);
+
+    qpci_device_enable(s->dev);
+}
+
+static void setup_vm(IVState *s)
+{
+    char *cmd = g_strdup_printf("-device ivshmem,shm=%s,size=1M", tmpshm);
+
+    setup_vm_cmd(s, cmd, false);
+
+    g_free(cmd);
+}
+
+static void test_ivshmem_single(void)
+{
+    IVState state, *s;
+    uint32_t data[1024];
+    int i;
+
+    setup_vm(&state);
+    s = &state;
+
+    /* valid io */
+    out_reg(s, INTRMASK, 0);
+    in_reg(s, INTRSTATUS);
+    in_reg(s, IVPOSITION);
+
+    out_reg(s, INTRMASK, 0xffffffff);
+    g_assert_cmpuint(in_reg(s, INTRMASK), ==, 0xffffffff);
+    out_reg(s, INTRSTATUS, 1);
+    /* XXX: intercept IRQ, not seen in resp */
+    g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 1);
+
+    /* invalid io */
+    out_reg(s, IVPOSITION, 1);
+    out_reg(s, DOORBELL, 8 << 16);
+
+    for (i = 0; i < G_N_ELEMENTS(data); i++) {
+        data[i] = i;
+    }
+    qtest_memwrite(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
+
+    for (i = 0; i < G_N_ELEMENTS(data); i++) {
+        g_assert_cmpuint(((uint32_t *)tmpshmem)[i], ==, i);
+    }
+
+    memset(data, 0, sizeof(data));
+
+    qtest_memread(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
+    for (i = 0; i < G_N_ELEMENTS(data); i++) {
+        g_assert_cmpuint(data[i], ==, i);
+    }
+
+    qtest_quit(s->qtest);
+}
+
+static void test_ivshmem_pair(void)
+{
+    IVState state1, state2, *s1, *s2;
+    char *data;
+    int i;
+
+    setup_vm(&state1);
+    s1 = &state1;
+    setup_vm(&state2);
+    s2 = &state2;
+
+    data = g_malloc0(TMPSHMSIZE);
+
+    /* host write, guest 1 & 2 read */
+    memset(tmpshmem, 0x42, TMPSHMSIZE);
+    qtest_memread(s1->qtest, (uintptr_t)s1->mem_base, data, TMPSHMSIZE);
+    for (i = 0; i < TMPSHMSIZE; i++) {
+        g_assert_cmpuint(data[i], ==, 0x42);
+    }
+    qtest_memread(s2->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
+    for (i = 0; i < TMPSHMSIZE; i++) {
+        g_assert_cmpuint(data[i], ==, 0x42);
+    }
+
+    /* guest 1 write, guest 2 read */
+    memset(data, 0x43, TMPSHMSIZE);
+    qtest_memwrite(s1->qtest, (uintptr_t)s1->mem_base, data, TMPSHMSIZE);
+    memset(data, 0, TMPSHMSIZE);
+    qtest_memread(s2->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
+    for (i = 0; i < TMPSHMSIZE; i++) {
+        g_assert_cmpuint(data[i], ==, 0x43);
+    }
+
+    /* guest 2 write, guest 1 read */
+    memset(data, 0x44, TMPSHMSIZE);
+    qtest_memwrite(s2->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
+    memset(data, 0, TMPSHMSIZE);
+    qtest_memread(s1->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
+    for (i = 0; i < TMPSHMSIZE; i++) {
+        g_assert_cmpuint(data[i], ==, 0x44);
+    }
+
+    qtest_quit(s1->qtest);
+    qtest_quit(s2->qtest);
+    g_free(data);
+}
+
+typedef struct ServerThread {
+    GThread *thread;
+    IvshmemServer *server;
+    int pipe[2]; /* to handle quit */
+} ServerThread;
+
+static void *server_thread(void *data)
+{
+    ServerThread *t = data;
+    IvshmemServer *server = t->server;
+
+    while (true) {
+        fd_set fds;
+        int maxfd, ret;
+
+        FD_ZERO(&fds);
+        FD_SET(t->pipe[0], &fds);
+        maxfd = t->pipe[0] + 1;
+
+        ivshmem_server_get_fds(server, &fds, &maxfd);
+
+        ret = select(maxfd, &fds, NULL, NULL, NULL);
+
+        if (ret < 0) {
+            if (errno == EINTR) {
+                continue;
+            }
+
+            g_critical("select error: %s\n", strerror(errno));
+            break;
+        }
+        if (ret == 0) {
+            continue;
+        }
+
+        if (FD_ISSET(t->pipe[0], &fds)) {
+            break;
+        }
+
+        if (ivshmem_server_handle_fds(server, &fds, maxfd) < 0) {
+            g_critical("ivshmem_server_handle_fds() failed\n");
+            break;
+        }
+    }
+
+    return NULL;
+}
+
+static void setup_vm_with_server(IVState *s, int nvectors)
+{
+    char *cmd = g_strdup_printf("-chardev socket,id=chr0,path=%s,nowait "
+                                "-device ivshmem,size=1M,chardev=chr0,vectors=%d",
+                                tmpserver, nvectors);
+
+    setup_vm_cmd(s, cmd, true);
+
+    g_free(cmd);
+}
+
+static void test_ivshmem_server(void)
+{
+    IVState state1, state2, *s1, *s2;
+    ServerThread thread;
+    IvshmemServer server;
+    int ret, vm1, vm2;
+    int nvectors = 2;
+    guint64 end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
+
+    memset(tmpshmem, 0x42, TMPSHMSIZE);
+    ret = ivshmem_server_init(&server, tmpserver, tmpshm,
+                              TMPSHMSIZE, nvectors,
+                              g_test_verbose());
+    g_assert_cmpint(ret, ==, 0);
+
+    ret = ivshmem_server_start(&server);
+    g_assert_cmpint(ret, ==, 0);
+
+    setup_vm_with_server(&state1, nvectors);
+    s1 = &state1;
+    setup_vm_with_server(&state2, nvectors);
+    s2 = &state2;
+
+    g_assert_cmpuint(in_reg(s1, IVPOSITION), ==, 0xffffffff);
+    g_assert_cmpuint(in_reg(s2, IVPOSITION), ==, 0xffffffff);
+
+    g_assert_cmpuint(qtest_readb(s1->qtest, (uintptr_t)s1->mem_base), ==, 0x00);
+
+    thread.server = &server;
+    ret = pipe(thread.pipe);
+    g_assert_cmpint(ret, ==, 0);
+    thread.thread = g_thread_new("ivshmem-server", server_thread, &thread);
+    g_assert(thread.thread != NULL);
+
+    /* waiting until mapping is done */
+    while (g_get_monotonic_time() < end_time) {
+        g_usleep(1000);
+
+        if (qtest_readb(s1->qtest, (uintptr_t)s1->mem_base) == 0x42 &&
+            qtest_readb(s2->qtest, (uintptr_t)s2->mem_base) == 0x42) {
+            break;
+        }
+    }
+
+    /* check got different VM ids */
+    vm1 = in_reg(s1, IVPOSITION);
+    vm2 = in_reg(s2, IVPOSITION);
+    g_assert_cmpuint(vm1, !=, vm2);
+
+    global_qtest = s1->qtest;
+    ret = qpci_msix_table_size(s1->dev);
+    g_assert_cmpuint(ret, ==, nvectors);
+
+    /* ping vm2 -> vm1 */
+    ret = qpci_msix_pending(s1->dev, 0);
+    g_assert_cmpuint(ret, ==, 0);
+    out_reg(s2, DOORBELL, vm1 << 16);
+    do {
+        g_usleep(10000);
+        ret = qpci_msix_pending(s1->dev, 0);
+    } while (ret == 0 && g_get_monotonic_time() < end_time);
+    g_assert_cmpuint(ret, !=, 0);
+
+    /* ping vm1 -> vm2 */
+    global_qtest = s2->qtest;
+    ret = qpci_msix_pending(s2->dev, 0);
+    g_assert_cmpuint(ret, ==, 0);
+    out_reg(s1, DOORBELL, vm2 << 16);
+    do {
+        g_usleep(10000);
+        ret = qpci_msix_pending(s2->dev, 0);
+    } while (ret == 0 && g_get_monotonic_time() < end_time);
+    g_assert_cmpuint(ret, !=, 0);
+
+    qtest_quit(s2->qtest);
+    qtest_quit(s1->qtest);
+
+    if (qemu_write_full(thread.pipe[1], "q", 1) != 1) {
+        g_error("qemu_write_full: %s", g_strerror(errno));
+    }
+
+    g_thread_join(thread.thread);
+
+    ivshmem_server_close(&server);
+    close(thread.pipe[1]);
+    close(thread.pipe[0]);
+}
+
+#define PCI_SLOT_HP             0x06
+
+static void test_ivshmem_hotplug(void)
+{
+    gchar *opts;
+
+    qtest_start("");
+
+    opts = g_strdup_printf("'shm': '%s', 'size': '1M'", tmpshm);
+
+    qpci_plug_device_test("ivshmem", "iv1", PCI_SLOT_HP, opts);
+    qpci_unplug_acpi_device_test("iv1", PCI_SLOT_HP);
+
+    qtest_end();
+    g_free(opts);
+}
+
+static void test_ivshmem_memdev(void)
+{
+    IVState state;
+
+    /* just for the sake of checking memory-backend property */
+    setup_vm_cmd(&state, "-object memory-backend-ram,size=1M,id=mb1"
+                 " -device ivshmem,memdev=mb1", false);
+
+    qtest_quit(state.qtest);
+}
+
+static void cleanup(void)
+{
+    if (tmpshmem) {
+        munmap(tmpshmem, TMPSHMSIZE);
+        tmpshmem = NULL;
+    }
+
+    if (tmpshm) {
+        shm_unlink(tmpshm);
+        g_free(tmpshm);
+        tmpshm = NULL;
+    }
+
+    if (tmpserver) {
+        g_unlink(tmpserver);
+        g_free(tmpserver);
+        tmpserver = NULL;
+    }
+
+    if (tmpdir) {
+        g_rmdir(tmpdir);
+        tmpdir = NULL;
+    }
+}
+
+static void abrt_handler(void *data)
+{
+    cleanup();
+}
+
+static gchar *mktempshm(int size, int *fd)
+{
+    while (true) {
+        gchar *name;
+
+        name = g_strdup_printf("/qtest-%u-%u", getpid(), g_random_int());
+        *fd = shm_open(name, O_CREAT|O_RDWR|O_EXCL,
+                       S_IRWXU|S_IRWXG|S_IRWXO);
+        if (*fd > 0) {
+            g_assert(ftruncate(*fd, size) == 0);
+            return name;
+        }
+
+        g_free(name);
+
+        if (errno != EEXIST) {
+            perror("shm_open");
+            return NULL;
+        }
+    }
+}
+
+int main(int argc, char **argv)
+{
+    int ret, fd;
+    gchar dir[] = "/tmp/ivshmem-test.XXXXXX";
+
+#if !GLIB_CHECK_VERSION(2, 31, 0)
+    if (!g_thread_supported()) {
+        g_thread_init(NULL);
+    }
+#endif
+
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_abrt_handler(abrt_handler, NULL);
+    /* shm */
+    tmpshm = mktempshm(TMPSHMSIZE, &fd);
+    if (!tmpshm) {
+        return 0;
+    }
+    tmpshmem = mmap(0, TMPSHMSIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+    g_assert(tmpshmem != MAP_FAILED);
+    /* server */
+    if (mkdtemp(dir) == NULL) {
+        g_error("mkdtemp: %s", g_strerror(errno));
+    }
+    tmpdir = dir;
+    tmpserver = g_strconcat(tmpdir, "/server", NULL);
+
+    qtest_add_func("/ivshmem/single", test_ivshmem_single);
+    qtest_add_func("/ivshmem/pair", test_ivshmem_pair);
+    qtest_add_func("/ivshmem/server", test_ivshmem_server);
+    qtest_add_func("/ivshmem/hotplug", test_ivshmem_hotplug);
+    qtest_add_func("/ivshmem/memdev", test_ivshmem_memdev);
+
+    ret = g_test_run();
+
+    cleanup();
+
+    return ret;
+}

diff --git a/tests/libqtest.c b/tests/libqtest.c
index b6d700c..f6f3d7a 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c

@@ -48,6 +48,7 @@
     pid_t qemu_pid;  /* our child QEMU process */
 };
 
+static GHookList abrt_hooks;
 static GList *qtest_instances;
 static struct sigaction sigact_old;
 
@@ -111,10 +112,7 @@
 
 static void sigabrt_handler(int signo)
 {
-    GList *elem;
-    for (elem = qtest_instances; elem; elem = elem->next) {
-        kill_qemu(elem->data);
-    }
+    g_hook_list_invoke(&abrt_hooks, FALSE);
 }
 
 static void setup_sigabrt_handler(void)
@@ -135,6 +133,23 @@
     sigaction(SIGABRT, &sigact_old, NULL);
 }
 
+void qtest_add_abrt_handler(void (*fn), const void *data)
+{
+    GHook *hook;
+
+    /* Only install SIGABRT handler once */
+    if (!abrt_hooks.is_setup) {
+        g_hook_list_init(&abrt_hooks, sizeof(GHook));
+        setup_sigabrt_handler();
+    }
+
+    hook = g_hook_alloc(&abrt_hooks);
+    hook->func = fn;
+    hook->data = (void *)data;
+
+    g_hook_prepend(&abrt_hooks, hook);
+}
+
 QTestState *qtest_init(const char *extra_args)
 {
     QTestState *s;
@@ -155,12 +170,7 @@
     sock = init_socket(socket_path);
     qmpsock = init_socket(qmp_socket_path);
 
-    /* Only install SIGABRT handler once */
-    if (!qtest_instances) {
-        setup_sigabrt_handler();
-    }
-
-    qtest_instances = g_list_prepend(qtest_instances, s);
+    qtest_add_abrt_handler(kill_qemu, s);
 
     s->qemu_pid = fork();
     if (s->qemu_pid == 0) {
@@ -208,13 +218,14 @@
 
 void qtest_quit(QTestState *s)
 {
+    qtest_instances = g_list_remove(qtest_instances, s);
+    g_hook_destroy_link(&abrt_hooks, g_hook_find_data(&abrt_hooks, TRUE, s));
+
     /* Uninstall SIGABRT handler on last instance */
-    if (qtest_instances && !qtest_instances->next) {
+    if (!qtest_instances) {
         cleanup_sigabrt_handler();
     }
 
-    qtest_instances = g_list_remove(qtest_instances, s);
-
     kill_qemu(s);
     close(s->fd);
     close(s->qmp_fd);

diff --git a/tests/libqtest.h b/tests/libqtest.h
index 9818ef7..df08745 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h

@@ -450,6 +450,8 @@
         g_free(path); \
     } while (0)
 
+void qtest_add_abrt_handler(void (*fn), const void *data);
+
 /**
  * qtest_start:
  * @args: other arguments to pass to QEMU

diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071
index 9eaa49b..92ab991 100755
--- a/tests/qemu-iotests/071
+++ b/tests/qemu-iotests/071

@@ -104,11 +104,20 @@
 echo "=== Testing blkdebug on existing block device ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive0",
+            "driver": "file",
+            "filename": "$TEST_IMG"
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "$IMGFMT",
             "id": "drive0-debug",
             "file": {
@@ -133,20 +142,29 @@
 echo "=== Testing blkverify on existing block device ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG,format=$IMGFMT,if=none,id=drive0" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive0",
+            "driver": "$IMGFMT",
+            "file": {
+                "driver": "file",
+                "filename": "$TEST_IMG"
+            }
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "blkverify",
             "id": "drive0-verify",
             "test": "drive0",
             "raw": {
-                "driver": "raw",
-                "file": {
-                    "driver": "file",
-                    "filename": "$TEST_IMG.base"
-                }
+                "driver": "file",
+                "filename": "$TEST_IMG.base"
             }
         }
     }
@@ -163,11 +181,20 @@
 echo "=== Testing blkverify on existing raw block device ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG.base,format=raw,if=none,id=drive0" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive0",
+            "driver": "file",
+            "filename": "$TEST_IMG.base"
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "blkverify",
             "id": "drive0-verify",
             "test": {
@@ -193,11 +220,20 @@
 echo "=== Testing blkdebug's set-state through QMP ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive0",
+            "driver": "file",
+            "filename": "$TEST_IMG"
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "$IMGFMT",
             "id": "drive0-debug",
             "file": {

diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out
index 8d2095a..2b40ead 100644
--- a/tests/qemu-iotests/071.out
+++ b/tests/qemu-iotests/071.out

@@ -42,10 +42,11 @@
 
 === Testing blkdebug on existing block device ===
 
-Testing: -drive file=TEST_DIR/t.IMGFMT,format=raw,if=none,id=drive0
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
+{"return": {}}
 read failed: Input/output error
 {"return": ""}
 {"return": {}}
@@ -56,28 +57,31 @@
 
 === Testing blkverify on existing block device ===
 
-Testing: -drive file=TEST_DIR/t.IMGFMT,format=IMGFMT,if=none,id=drive0
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
+{"return": {}}
 blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
 
 
 === Testing blkverify on existing raw block device ===
 
-Testing: -drive file=TEST_DIR/t.IMGFMT.base,format=raw,if=none,id=drive0
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
+{"return": {}}
 blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
 
 
 === Testing blkdebug's set-state through QMP ===
 
-Testing: -drive file=TEST_DIR/t.IMGFMT,format=raw,if=none,id=drive0
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
+{"return": {}}
 read 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 {"return": ""}

diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index 51873ff..e4b4c6c 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081

@@ -102,17 +102,29 @@
 echo
 echo "== checking mixed reference/option specification =="
 
-run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
+run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
         "options": {
+            "node-name": "drive2",
+            "driver": "$IMGFMT",
+            "file": {
+                "driver": "file",
+                "filename": "$TEST_DIR/2.raw"
+            }
+        }
+    }
+}
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
             "driver": "quorum",
             "id": "drive0-quorum",
             "vote-threshold": 2,
             "children": [
                 {
-                    "driver": "raw",
+                    "driver": "$IMGFMT",
                     "file": {
                         "driver": "file",
                         "filename": "$TEST_DIR/1.raw"
@@ -120,7 +132,7 @@
                 },
                 "drive2",
                 {
-                    "driver": "raw",
+                    "driver": "$IMGFMT",
                     "file": {
                         "driver": "file",
                         "filename": "$TEST_DIR/3.raw"

diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 044793d..7063231 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out

@@ -26,11 +26,12 @@
 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
 == checking mixed reference/option specification ==
-Testing: -drive file=TEST_DIR/2.IMGFMT,format=IMGFMT,if=none,id=drive2
+Testing:
 QMP_VERSION
 {"return": {}}
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "NODE_NAME", "sectors-count": 20480, "sector-num": 0}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "drive2", "sectors-count": 20480, "sector-num": 0}}
 read 10485760/10485760 bytes at offset 0
 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 {"return": ""}

diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087
index 8694749..af44299 100755
--- a/tests/qemu-iotests/087
+++ b/tests/qemu-iotests/087

@@ -54,7 +54,7 @@
 _make_test_img $size
 
 echo
-echo === Missing ID ===
+echo === Missing ID and node-name ===
 echo
 
 run_qemu <<EOF

diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out
index c509a40..7d62cd5 100644
--- a/tests/qemu-iotests/087.out
+++ b/tests/qemu-iotests/087.out

@@ -1,12 +1,12 @@
 QA output created by 087
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 
-=== Missing ID ===
+=== Missing ID and node-name ===
 
 Testing:
 QMP_VERSION
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Block device needs an ID"}}
+{"error": {"class": "GenericError", "desc": "'id' and/or 'node-name' need to be specified for the root node"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 

diff --git a/tests/test-aio.c b/tests/test-aio.c
index 217e337..1623803 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c

@@ -118,6 +118,12 @@
     return NULL;
 }
 
+static void set_event_notifier(AioContext *ctx, EventNotifier *notifier,
+                               EventNotifierHandler *handler)
+{
+    aio_set_event_notifier(ctx, notifier, false, handler);
+}
+
 static void dummy_notifier_read(EventNotifier *unused)
 {
     g_assert(false); /* should never be invoked */
@@ -131,7 +137,7 @@
 
     /* Dummy event notifier ensures aio_poll() will block */
     event_notifier_init(&notifier, false);
-    aio_set_event_notifier(ctx, &notifier, dummy_notifier_read);
+    set_event_notifier(ctx, &notifier, dummy_notifier_read);
     g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */
 
     qemu_mutex_init(&data.start_lock);
@@ -149,7 +155,7 @@
     aio_context_release(ctx);
 
     qemu_thread_join(&thread);
-    aio_set_event_notifier(ctx, &notifier, NULL);
+    set_event_notifier(ctx, &notifier, NULL);
     event_notifier_cleanup(&notifier);
 
     g_assert(data.thread_acquired);
@@ -308,11 +314,11 @@
 {
     EventNotifierTestData data = { .n = 0, .active = 0 };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
     event_notifier_cleanup(&data.e);
@@ -322,7 +328,7 @@
 {
     EventNotifierTestData data = { .n = 0, .active = 1 };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
     g_assert_cmpint(data.active, ==, 1);
@@ -336,7 +342,7 @@
     g_assert_cmpint(data.n, ==, 1);
     g_assert_cmpint(data.active, ==, 0);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 1);
 
@@ -347,7 +353,7 @@
 {
     EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
     g_assert_cmpint(data.active, ==, 10);
@@ -363,18 +369,41 @@
     g_assert_cmpint(data.active, ==, 0);
     g_assert(!aio_poll(ctx, false));
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     g_assert(!aio_poll(ctx, false));
     event_notifier_cleanup(&data.e);
 }
 
+static void test_aio_external_client(void)
+{
+    int i, j;
+
+    for (i = 1; i < 3; i++) {
+        EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true };
+        event_notifier_init(&data.e, false);
+        aio_set_event_notifier(ctx, &data.e, true, event_ready_cb);
+        event_notifier_set(&data.e);
+        for (j = 0; j < i; j++) {
+            aio_disable_external(ctx);
+        }
+        for (j = 0; j < i; j++) {
+            assert(!aio_poll(ctx, false));
+            assert(event_notifier_test_and_clear(&data.e));
+            event_notifier_set(&data.e);
+            aio_enable_external(ctx);
+        }
+        assert(aio_poll(ctx, false));
+        event_notifier_cleanup(&data.e);
+    }
+}
+
 static void test_wait_event_notifier_noflush(void)
 {
     EventNotifierTestData data = { .n = 0 };
     EventNotifierTestData dummy = { .n = 0, .active = 1 };
 
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
 
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 0);
@@ -387,7 +416,7 @@
 
     /* An active event notifier forces aio_poll to look at EventNotifiers.  */
     event_notifier_init(&dummy.e, false);
-    aio_set_event_notifier(ctx, &dummy.e, event_ready_cb);
+    set_event_notifier(ctx, &dummy.e, event_ready_cb);
 
     event_notifier_set(&data.e);
     g_assert(aio_poll(ctx, false));
@@ -407,10 +436,10 @@
     g_assert_cmpint(dummy.n, ==, 1);
     g_assert_cmpint(dummy.active, ==, 0);
 
-    aio_set_event_notifier(ctx, &dummy.e, NULL);
+    set_event_notifier(ctx, &dummy.e, NULL);
     event_notifier_cleanup(&dummy.e);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 2);
 
@@ -428,7 +457,7 @@
      * an fd to wait on. Fixing this breaks other tests. So create a dummy one.
      */
     event_notifier_init(&e, false);
-    aio_set_event_notifier(ctx, &e, dummy_io_handler_read);
+    set_event_notifier(ctx, &e, dummy_io_handler_read);
     aio_poll(ctx, false);
 
     aio_timer_init(ctx, &data.timer, data.clock_type,
@@ -467,7 +496,7 @@
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 2);
 
-    aio_set_event_notifier(ctx, &e, NULL);
+    set_event_notifier(ctx, &e, NULL);
     event_notifier_cleanup(&e);
 
     timer_del(&data.timer);
@@ -638,11 +667,11 @@
 {
     EventNotifierTestData data = { .n = 0, .active = 0 };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
     event_notifier_cleanup(&data.e);
@@ -652,7 +681,7 @@
 {
     EventNotifierTestData data = { .n = 0, .active = 1 };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
     g_assert_cmpint(data.active, ==, 1);
@@ -666,7 +695,7 @@
     g_assert_cmpint(data.n, ==, 1);
     g_assert_cmpint(data.active, ==, 0);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 1);
 
@@ -677,7 +706,7 @@
 {
     EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true };
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
     g_assert_cmpint(data.active, ==, 10);
@@ -693,7 +722,7 @@
     g_assert_cmpint(data.active, ==, 0);
     g_assert(!g_main_context_iteration(NULL, false));
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     while (g_main_context_iteration(NULL, false));
     event_notifier_cleanup(&data.e);
 }
@@ -704,7 +733,7 @@
     EventNotifierTestData dummy = { .n = 0, .active = 1 };
 
     event_notifier_init(&data.e, false);
-    aio_set_event_notifier(ctx, &data.e, event_ready_cb);
+    set_event_notifier(ctx, &data.e, event_ready_cb);
 
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 0);
@@ -717,7 +746,7 @@
 
     /* An active event notifier forces aio_poll to look at EventNotifiers.  */
     event_notifier_init(&dummy.e, false);
-    aio_set_event_notifier(ctx, &dummy.e, event_ready_cb);
+    set_event_notifier(ctx, &dummy.e, event_ready_cb);
 
     event_notifier_set(&data.e);
     g_assert(g_main_context_iteration(NULL, false));
@@ -737,10 +766,10 @@
     g_assert_cmpint(dummy.n, ==, 1);
     g_assert_cmpint(dummy.active, ==, 0);
 
-    aio_set_event_notifier(ctx, &dummy.e, NULL);
+    set_event_notifier(ctx, &dummy.e, NULL);
     event_notifier_cleanup(&dummy.e);
 
-    aio_set_event_notifier(ctx, &data.e, NULL);
+    set_event_notifier(ctx, &data.e, NULL);
     while (g_main_context_iteration(NULL, false));
     g_assert_cmpint(data.n, ==, 2);
 
@@ -759,7 +788,7 @@
      * an fd to wait on. Fixing this breaks other tests. So create a dummy one.
      */
     event_notifier_init(&e, false);
-    aio_set_event_notifier(ctx, &e, dummy_io_handler_read);
+    set_event_notifier(ctx, &e, dummy_io_handler_read);
     do {} while (g_main_context_iteration(NULL, false));
 
     aio_timer_init(ctx, &data.timer, data.clock_type,
@@ -784,7 +813,7 @@
     g_assert_cmpint(data.n, ==, 2);
     g_assert(qemu_clock_get_ns(data.clock_type) > expiry);
 
-    aio_set_event_notifier(ctx, &e, NULL);
+    set_event_notifier(ctx, &e, NULL);
     event_notifier_cleanup(&e);
 
     timer_del(&data.timer);
@@ -826,6 +855,7 @@
     g_test_add_func("/aio/event/wait",              test_wait_event_notifier);
     g_test_add_func("/aio/event/wait/no-flush-cb",  test_wait_event_notifier_noflush);
     g_test_add_func("/aio/event/flush",             test_flush_event_notifier);
+    g_test_add_func("/aio/external-client",         test_aio_external_client);
     g_test_add_func("/aio/timer/schedule",          test_timer_schedule);
 
     g_test_add_func("/aio-gsource/flush",                   test_source_flush);

diff --git a/tests/test-crypto-cipher.c b/tests/test-crypto-cipher.c
index 9d38d26..f4946a0 100644
--- a/tests/test-crypto-cipher.c
+++ b/tests/test-crypto-cipher.c

@@ -287,6 +287,79 @@
     qcrypto_cipher_free(cipher);
 }
 
+
+static void test_cipher_null_iv(void)
+{
+    QCryptoCipher *cipher;
+    uint8_t key[32] = { 0 };
+    uint8_t plaintext[32] = { 0 };
+    uint8_t ciphertext[32] = { 0 };
+
+    cipher = qcrypto_cipher_new(
+        QCRYPTO_CIPHER_ALG_AES_256,
+        QCRYPTO_CIPHER_MODE_CBC,
+        key, sizeof(key),
+        &error_abort);
+    g_assert(cipher != NULL);
+
+    /* Don't call qcrypto_cipher_setiv */
+
+    qcrypto_cipher_encrypt(cipher,
+                           plaintext,
+                           ciphertext,
+                           sizeof(plaintext),
+                           &error_abort);
+
+    qcrypto_cipher_free(cipher);
+}
+
+static void test_cipher_short_plaintext(void)
+{
+    Error *err = NULL;
+    QCryptoCipher *cipher;
+    uint8_t key[32] = { 0 };
+    uint8_t plaintext1[20] = { 0 };
+    uint8_t ciphertext1[20] = { 0 };
+    uint8_t plaintext2[40] = { 0 };
+    uint8_t ciphertext2[40] = { 0 };
+    int ret;
+
+    cipher = qcrypto_cipher_new(
+        QCRYPTO_CIPHER_ALG_AES_256,
+        QCRYPTO_CIPHER_MODE_CBC,
+        key, sizeof(key),
+        &error_abort);
+    g_assert(cipher != NULL);
+
+    /* Should report an error as plaintext is shorter
+     * than block size
+     */
+    ret = qcrypto_cipher_encrypt(cipher,
+                                 plaintext1,
+                                 ciphertext1,
+                                 sizeof(plaintext1),
+                                 &err);
+    g_assert(ret == -1);
+    g_assert(err != NULL);
+
+    error_free(err);
+    err = NULL;
+
+    /* Should report an error as plaintext is larger than
+     * block size, but not a multiple of block size
+     */
+    ret = qcrypto_cipher_encrypt(cipher,
+                                 plaintext2,
+                                 ciphertext2,
+                                 sizeof(plaintext2),
+                                 &err);
+    g_assert(ret == -1);
+    g_assert(err != NULL);
+
+    error_free(err);
+    qcrypto_cipher_free(cipher);
+}
+
 int main(int argc, char **argv)
 {
     size_t i;
@@ -298,5 +371,12 @@
     for (i = 0; i < G_N_ELEMENTS(test_data); i++) {
         g_test_add_data_func(test_data[i].path, &test_data[i], test_cipher);
     }
+
+    g_test_add_func("/crypto/cipher/null-iv",
+                    test_cipher_null_iv);
+
+    g_test_add_func("/crypto/cipher/short-plaintext",
+                    test_cipher_short_plaintext);
+
     return g_test_run();
 }

diff --git a/tests/test-qga.c b/tests/test-qga.c
index 0531c9f..6473846 100644
--- a/tests/test-qga.c
+++ b/tests/test-qga.c

@@ -273,13 +273,15 @@
     g_assert_nonnull(ret);
     qmp_assert_no_error(ret);
 
-    /* check there is at least a fs */
+    /* sanity-check the response if there are any filesystems */
     list = qdict_get_qlist(ret, "return");
     entry = qlist_first(list);
-    g_assert(qdict_haskey(qobject_to_qdict(entry->value), "name"));
-    g_assert(qdict_haskey(qobject_to_qdict(entry->value), "mountpoint"));
-    g_assert(qdict_haskey(qobject_to_qdict(entry->value), "type"));
-    g_assert(qdict_haskey(qobject_to_qdict(entry->value), "disk"));
+    if (entry) {
+        g_assert(qdict_haskey(qobject_to_qdict(entry->value), "name"));
+        g_assert(qdict_haskey(qobject_to_qdict(entry->value), "mountpoint"));
+        g_assert(qdict_haskey(qobject_to_qdict(entry->value), "type"));
+        g_assert(qdict_haskey(qobject_to_qdict(entry->value), "disk"));
+    }
 
     QDECREF(ret);
 }

diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
new file mode 100644
index 0000000..fa18ad5
--- /dev/null
+++ b/tests/vhost-user-bridge.c

@@ -0,0 +1,1110 @@
+/*
+ * Vhost User Bridge
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Victor Kaplansky <victork@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+/*
+ * TODO:
+ *     - main should get parameters from the command line.
+ *     - implement all request handlers.
+ *     - test for broken requests and virtqueue.
+ *     - implement features defined by Virtio 1.0 spec.
+ *     - support mergeable buffers and indirect descriptors.
+ *     - implement RESET_DEVICE request.
+ *     - implement clean shutdown.
+ *     - implement non-blocking writes to UDP backend.
+ *     - implement polling strategy.
+ */
+
+#include <stddef.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/unistd.h>
+#include <sys/mman.h>
+#include <sys/eventfd.h>
+#include <arpa/inet.h>
+
+#include <linux/vhost.h>
+
+#include "qemu/atomic.h"
+#include "standard-headers/linux/virtio_net.h"
+#include "standard-headers/linux/virtio_ring.h"
+
+#define VHOST_USER_BRIDGE_DEBUG 1
+
+#define DPRINT(...) \
+    do { \
+        if (VHOST_USER_BRIDGE_DEBUG) { \
+            printf(__VA_ARGS__); \
+        } \
+    } while (0)
+
+typedef void (*CallbackFunc)(int sock, void *ctx);
+
+typedef struct Event {
+    void *ctx;
+    CallbackFunc callback;
+} Event;
+
+typedef struct Dispatcher {
+    int max_sock;
+    fd_set fdset;
+    Event events[FD_SETSIZE];
+} Dispatcher;
+
+static void
+vubr_die(const char *s)
+{
+    perror(s);
+    exit(1);
+}
+
+static int
+dispatcher_init(Dispatcher *dispr)
+{
+    FD_ZERO(&dispr->fdset);
+    dispr->max_sock = -1;
+    return 0;
+}
+
+static int
+dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
+{
+    if (sock >= FD_SETSIZE) {
+        fprintf(stderr,
+                "Error: Failed to add new event. sock %d should be less than %d\n",
+                sock, FD_SETSIZE);
+        return -1;
+    }
+
+    dispr->events[sock].ctx = ctx;
+    dispr->events[sock].callback = cb;
+
+    FD_SET(sock, &dispr->fdset);
+    if (sock > dispr->max_sock) {
+        dispr->max_sock = sock;
+    }
+    DPRINT("Added sock %d for watching. max_sock: %d\n",
+           sock, dispr->max_sock);
+    return 0;
+}
+
+#if 0
+/* dispatcher_remove() is not currently in use but may be useful
+ * in the future. */
+static int
+dispatcher_remove(Dispatcher *dispr, int sock)
+{
+    if (sock >= FD_SETSIZE) {
+        fprintf(stderr,
+                "Error: Failed to remove event. sock %d should be less than %d\n",
+                sock, FD_SETSIZE);
+        return -1;
+    }
+
+    FD_CLR(sock, &dispr->fdset);
+    return 0;
+}
+#endif
+
+/* timeout in us */
+static int
+dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
+{
+    struct timeval tv;
+    tv.tv_sec = timeout / 1000000;
+    tv.tv_usec = timeout % 1000000;
+
+    fd_set fdset = dispr->fdset;
+
+    /* wait until some of sockets become readable. */
+    int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
+
+    if (rc == -1) {
+        vubr_die("select");
+    }
+
+    /* Timeout */
+    if (rc == 0) {
+        return 0;
+    }
+
+    /* Now call callback for every ready socket. */
+
+    int sock;
+    for (sock = 0; sock < dispr->max_sock + 1; sock++)
+        if (FD_ISSET(sock, &fdset)) {
+            Event *e = &dispr->events[sock];
+            e->callback(sock, e->ctx);
+        }
+
+    return 0;
+}
+
+typedef struct VubrVirtq {
+    int call_fd;
+    int kick_fd;
+    uint32_t size;
+    uint16_t last_avail_index;
+    uint16_t last_used_index;
+    struct vring_desc *desc;
+    struct vring_avail *avail;
+    struct vring_used *used;
+} VubrVirtq;
+
+/* Based on qemu/hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS    8
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+
+enum VhostUserProtocolFeature {
+    VHOST_USER_PROTOCOL_F_MQ = 0,
+    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
+    VHOST_USER_PROTOCOL_F_RARP = 2,
+
+    VHOST_USER_PROTOCOL_F_MAX
+};
+
+#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
+
+typedef enum VhostUserRequest {
+    VHOST_USER_NONE = 0,
+    VHOST_USER_GET_FEATURES = 1,
+    VHOST_USER_SET_FEATURES = 2,
+    VHOST_USER_SET_OWNER = 3,
+    VHOST_USER_RESET_DEVICE = 4,
+    VHOST_USER_SET_MEM_TABLE = 5,
+    VHOST_USER_SET_LOG_BASE = 6,
+    VHOST_USER_SET_LOG_FD = 7,
+    VHOST_USER_SET_VRING_NUM = 8,
+    VHOST_USER_SET_VRING_ADDR = 9,
+    VHOST_USER_SET_VRING_BASE = 10,
+    VHOST_USER_GET_VRING_BASE = 11,
+    VHOST_USER_SET_VRING_KICK = 12,
+    VHOST_USER_SET_VRING_CALL = 13,
+    VHOST_USER_SET_VRING_ERR = 14,
+    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+    VHOST_USER_GET_QUEUE_NUM = 17,
+    VHOST_USER_SET_VRING_ENABLE = 18,
+    VHOST_USER_SEND_RARP = 19,
+    VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+    uint64_t guest_phys_addr;
+    uint64_t memory_size;
+    uint64_t userspace_addr;
+    uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+    uint32_t nregions;
+    uint32_t padding;
+    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+    VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     (0x3)
+#define VHOST_USER_REPLY_MASK       (0x1<<2)
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+    union {
+#define VHOST_USER_VRING_IDX_MASK   (0xff)
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+        uint64_t u64;
+        struct vhost_vring_state state;
+        struct vhost_vring_addr addr;
+        VhostUserMemory memory;
+    } payload;
+    int fds[VHOST_MEMORY_MAX_NREGIONS];
+    int fd_num;
+} QEMU_PACKED VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    (0x1)
+
+#define MAX_NR_VIRTQUEUE (8)
+
+typedef struct VubrDevRegion {
+    /* Guest Physical address. */
+    uint64_t gpa;
+    /* Memory region size. */
+    uint64_t size;
+    /* QEMU virtual address (userspace). */
+    uint64_t qva;
+    /* Starting offset in our mmaped space. */
+    uint64_t mmap_offset;
+    /* Start address of mmaped space. */
+    uint64_t mmap_addr;
+} VubrDevRegion;
+
+typedef struct VubrDev {
+    int sock;
+    Dispatcher dispatcher;
+    uint32_t nregions;
+    VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+    VubrVirtq vq[MAX_NR_VIRTQUEUE];
+    int backend_udp_sock;
+    struct sockaddr_in backend_udp_dest;
+} VubrDev;
+
+static const char *vubr_request_str[] = {
+    [VHOST_USER_NONE]                   =  "VHOST_USER_NONE",
+    [VHOST_USER_GET_FEATURES]           =  "VHOST_USER_GET_FEATURES",
+    [VHOST_USER_SET_FEATURES]           =  "VHOST_USER_SET_FEATURES",
+    [VHOST_USER_SET_OWNER]              =  "VHOST_USER_SET_OWNER",
+    [VHOST_USER_RESET_DEVICE]           =  "VHOST_USER_RESET_DEVICE",
+    [VHOST_USER_SET_MEM_TABLE]          =  "VHOST_USER_SET_MEM_TABLE",
+    [VHOST_USER_SET_LOG_BASE]           =  "VHOST_USER_SET_LOG_BASE",
+    [VHOST_USER_SET_LOG_FD]             =  "VHOST_USER_SET_LOG_FD",
+    [VHOST_USER_SET_VRING_NUM]          =  "VHOST_USER_SET_VRING_NUM",
+    [VHOST_USER_SET_VRING_ADDR]         =  "VHOST_USER_SET_VRING_ADDR",
+    [VHOST_USER_SET_VRING_BASE]         =  "VHOST_USER_SET_VRING_BASE",
+    [VHOST_USER_GET_VRING_BASE]         =  "VHOST_USER_GET_VRING_BASE",
+    [VHOST_USER_SET_VRING_KICK]         =  "VHOST_USER_SET_VRING_KICK",
+    [VHOST_USER_SET_VRING_CALL]         =  "VHOST_USER_SET_VRING_CALL",
+    [VHOST_USER_SET_VRING_ERR]          =  "VHOST_USER_SET_VRING_ERR",
+    [VHOST_USER_GET_PROTOCOL_FEATURES]  =  "VHOST_USER_GET_PROTOCOL_FEATURES",
+    [VHOST_USER_SET_PROTOCOL_FEATURES]  =  "VHOST_USER_SET_PROTOCOL_FEATURES",
+    [VHOST_USER_GET_QUEUE_NUM]          =  "VHOST_USER_GET_QUEUE_NUM",
+    [VHOST_USER_SET_VRING_ENABLE]       =  "VHOST_USER_SET_VRING_ENABLE",
+    [VHOST_USER_SEND_RARP]              =  "VHOST_USER_SEND_RARP",
+    [VHOST_USER_MAX]                    =  "VHOST_USER_MAX",
+};
+
+static void
+print_buffer(uint8_t *buf, size_t len)
+{
+    int i;
+    printf("Raw buffer:\n");
+    for (i = 0; i < len; i++) {
+        if (i % 16 == 0) {
+            printf("\n");
+        }
+        if (i % 4 == 0) {
+            printf("   ");
+        }
+        printf("%02x ", buf[i]);
+    }
+    printf("\n............................................................\n");
+}
+
+/* Translate guest physical address to our virtual address.  */
+static uint64_t
+gpa_to_va(VubrDev *dev, uint64_t guest_addr)
+{
+    int i;
+
+    /* Find matching memory region.  */
+    for (i = 0; i < dev->nregions; i++) {
+        VubrDevRegion *r = &dev->regions[i];
+
+        if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
+            return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
+        }
+    }
+
+    assert(!"address not found in regions");
+    return 0;
+}
+
+/* Translate qemu virtual address to our virtual address.  */
+static uint64_t
+qva_to_va(VubrDev *dev, uint64_t qemu_addr)
+{
+    int i;
+
+    /* Find matching memory region.  */
+    for (i = 0; i < dev->nregions; i++) {
+        VubrDevRegion *r = &dev->regions[i];
+
+        if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
+            return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
+        }
+    }
+
+    assert(!"address not found in regions");
+    return 0;
+}
+
+static void
+vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
+{
+    char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
+    struct iovec iov = {
+        .iov_base = (char *)vmsg,
+        .iov_len = VHOST_USER_HDR_SIZE,
+    };
+    struct msghdr msg = {
+        .msg_iov = &iov,
+        .msg_iovlen = 1,
+        .msg_control = control,
+        .msg_controllen = sizeof(control),
+    };
+    size_t fd_size;
+    struct cmsghdr *cmsg;
+    int rc;
+
+    rc = recvmsg(conn_fd, &msg, 0);
+
+    if (rc <= 0) {
+        vubr_die("recvmsg");
+    }
+
+    vmsg->fd_num = 0;
+    for (cmsg = CMSG_FIRSTHDR(&msg);
+         cmsg != NULL;
+         cmsg = CMSG_NXTHDR(&msg, cmsg))
+    {
+        if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+            fd_size = cmsg->cmsg_len - CMSG_LEN(0);
+            vmsg->fd_num = fd_size / sizeof(int);
+            memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
+            break;
+        }
+    }
+
+    if (vmsg->size > sizeof(vmsg->payload)) {
+        fprintf(stderr,
+                "Error: too big message request: %d, size: vmsg->size: %u, "
+                "while sizeof(vmsg->payload) = %lu\n",
+                vmsg->request, vmsg->size, sizeof(vmsg->payload));
+        exit(1);
+    }
+
+    if (vmsg->size) {
+        rc = read(conn_fd, &vmsg->payload, vmsg->size);
+        if (rc <= 0) {
+            vubr_die("recvmsg");
+        }
+
+        assert(rc == vmsg->size);
+    }
+}
+
+static void
+vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
+{
+    int rc;
+
+    do {
+        rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
+    } while (rc < 0 && errno == EINTR);
+
+    if (rc < 0) {
+        vubr_die("write");
+    }
+}
+
+static void
+vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
+{
+    int slen = sizeof(struct sockaddr_in);
+
+    if (sendto(dev->backend_udp_sock, buf, len, 0,
+               (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
+        vubr_die("sendto()");
+    }
+}
+
+static int
+vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
+{
+    int slen = sizeof(struct sockaddr_in);
+    int rc;
+
+    rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
+                  (struct sockaddr *) &dev->backend_udp_dest,
+                  (socklen_t *)&slen);
+    if (rc == -1) {
+        vubr_die("recvfrom()");
+    }
+
+    return rc;
+}
+
+static void
+vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
+{
+    int hdrlen = sizeof(struct virtio_net_hdr_v1);
+
+    if (VHOST_USER_BRIDGE_DEBUG) {
+        print_buffer(buf, len);
+    }
+    vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
+}
+
+/* Kick the guest if necessary. */
+static void
+vubr_virtqueue_kick(VubrVirtq *vq)
+{
+    if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
+        DPRINT("Kicking the guest...\n");
+        eventfd_write(vq->call_fd, 1);
+    }
+}
+
+static void
+vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
+{
+    struct vring_desc *desc   = vq->desc;
+    struct vring_avail *avail = vq->avail;
+    struct vring_used *used   = vq->used;
+
+    unsigned int size = vq->size;
+
+    uint16_t avail_index = atomic_mb_read(&avail->idx);
+
+    /* We check the available descriptors before posting the
+     * buffer, so here we assume that enough available
+     * descriptors. */
+    assert(vq->last_avail_index != avail_index);
+    uint16_t a_index = vq->last_avail_index % size;
+    uint16_t u_index = vq->last_used_index % size;
+    uint16_t d_index = avail->ring[a_index];
+
+    int i = d_index;
+
+    DPRINT("Post packet to guest on vq:\n");
+    DPRINT("    size             = %d\n", vq->size);
+    DPRINT("    last_avail_index = %d\n", vq->last_avail_index);
+    DPRINT("    last_used_index  = %d\n", vq->last_used_index);
+    DPRINT("    a_index = %d\n", a_index);
+    DPRINT("    u_index = %d\n", u_index);
+    DPRINT("    d_index = %d\n", d_index);
+    DPRINT("    desc[%d].addr    = 0x%016"PRIx64"\n", i, desc[i].addr);
+    DPRINT("    desc[%d].len     = %d\n", i, desc[i].len);
+    DPRINT("    desc[%d].flags   = %d\n", i, desc[i].flags);
+    DPRINT("    avail->idx = %d\n", avail_index);
+    DPRINT("    used->idx  = %d\n", used->idx);
+
+    if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
+        /* FIXME: we should find writable descriptor. */
+        fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
+        exit(1);
+    }
+
+    void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
+    uint32_t chunk_len = desc[i].len;
+
+    if (len <= chunk_len) {
+        memcpy(chunk_start, buf, len);
+    } else {
+        fprintf(stderr,
+                "Received too long packet from the backend. Dropping...\n");
+        return;
+    }
+
+    /* Add descriptor to the used ring. */
+    used->ring[u_index].id = d_index;
+    used->ring[u_index].len = len;
+
+    vq->last_avail_index++;
+    vq->last_used_index++;
+
+    atomic_mb_set(&used->idx, vq->last_used_index);
+
+    /* Kick the guest if necessary. */
+    vubr_virtqueue_kick(vq);
+}
+
+static int
+vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
+{
+    struct vring_desc *desc   = vq->desc;
+    struct vring_avail *avail = vq->avail;
+    struct vring_used *used   = vq->used;
+
+    unsigned int size = vq->size;
+
+    uint16_t a_index = vq->last_avail_index % size;
+    uint16_t u_index = vq->last_used_index % size;
+    uint16_t d_index = avail->ring[a_index];
+
+    uint32_t i, len = 0;
+    size_t buf_size = 4096;
+    uint8_t buf[4096];
+
+    DPRINT("Chunks: ");
+    i = d_index;
+    do {
+        void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
+        uint32_t chunk_len = desc[i].len;
+
+        if (len + chunk_len < buf_size) {
+            memcpy(buf + len, chunk_start, chunk_len);
+            DPRINT("%d ", chunk_len);
+        } else {
+            fprintf(stderr, "Error: too long packet. Dropping...\n");
+            break;
+        }
+
+        len += chunk_len;
+
+        if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
+            break;
+        }
+
+        i = desc[i].next;
+    } while (1);
+    DPRINT("\n");
+
+    if (!len) {
+        return -1;
+    }
+
+    /* Add descriptor to the used ring. */
+    used->ring[u_index].id = d_index;
+    used->ring[u_index].len = len;
+
+    vubr_consume_raw_packet(dev, buf, len);
+
+    return 0;
+}
+
+static void
+vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
+{
+    struct vring_avail *avail = vq->avail;
+    struct vring_used *used = vq->used;
+
+    while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
+        vubr_process_desc(dev, vq);
+        vq->last_avail_index++;
+        vq->last_used_index++;
+    }
+
+    atomic_mb_set(&used->idx, vq->last_used_index);
+}
+
+static void
+vubr_backend_recv_cb(int sock, void *ctx)
+{
+    VubrDev *dev = (VubrDev *) ctx;
+    VubrVirtq *rx_vq = &dev->vq[0];
+    uint8_t buf[4096];
+    struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
+    int hdrlen = sizeof(struct virtio_net_hdr_v1);
+    int buflen = sizeof(buf);
+    int len;
+
+    DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
+
+    uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
+
+    /* If there is no available descriptors, just do nothing.
+     * The buffer will be handled by next arrived UDP packet,
+     * or next kick on receive virtq. */
+    if (rx_vq->last_avail_index == avail_index) {
+        DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
+        return;
+    }
+
+    len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
+
+    *hdr = (struct virtio_net_hdr_v1) { };
+    hdr->num_buffers = 1;
+    vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
+}
+
+static void
+vubr_kick_cb(int sock, void *ctx)
+{
+    VubrDev *dev = (VubrDev *) ctx;
+    eventfd_t kick_data;
+    ssize_t rc;
+
+    rc = eventfd_read(sock, &kick_data);
+    if (rc == -1) {
+        vubr_die("eventfd_read()");
+    } else {
+        DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
+        vubr_process_avail(dev, &dev->vq[1]);
+    }
+}
+
+static int
+vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    vmsg->payload.u64 =
+            ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
+             (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+             (1ULL << VIRTIO_NET_F_CTRL_RX) |
+             (1ULL << VHOST_F_LOG_ALL));
+    vmsg->size = sizeof(vmsg->payload.u64);
+
+    DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    /* reply */
+    return 1;
+}
+
+static int
+vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    return 0;
+}
+
+static int
+vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    return 0;
+}
+
+static int
+vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    int i;
+    VhostUserMemory *memory = &vmsg->payload.memory;
+    dev->nregions = memory->nregions;
+
+    DPRINT("Nregions: %d\n", memory->nregions);
+    for (i = 0; i < dev->nregions; i++) {
+        void *mmap_addr;
+        VhostUserMemoryRegion *msg_region = &memory->regions[i];
+        VubrDevRegion *dev_region = &dev->regions[i];
+
+        DPRINT("Region %d\n", i);
+        DPRINT("    guest_phys_addr: 0x%016"PRIx64"\n",
+               msg_region->guest_phys_addr);
+        DPRINT("    memory_size:     0x%016"PRIx64"\n",
+               msg_region->memory_size);
+        DPRINT("    userspace_addr   0x%016"PRIx64"\n",
+               msg_region->userspace_addr);
+        DPRINT("    mmap_offset      0x%016"PRIx64"\n",
+               msg_region->mmap_offset);
+
+        dev_region->gpa         = msg_region->guest_phys_addr;
+        dev_region->size        = msg_region->memory_size;
+        dev_region->qva         = msg_region->userspace_addr;
+        dev_region->mmap_offset = msg_region->mmap_offset;
+
+        /* We don't use offset argument of mmap() since the
+         * mapped address has to be page aligned, and we use huge
+         * pages.  */
+        mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
+                         PROT_READ | PROT_WRITE, MAP_SHARED,
+                         vmsg->fds[i], 0);
+
+        if (mmap_addr == MAP_FAILED) {
+            vubr_die("mmap");
+        }
+
+        dev_region->mmap_addr = (uint64_t) mmap_addr;
+        DPRINT("    mmap_addr:       0x%016"PRIx64"\n", dev_region->mmap_addr);
+    }
+
+    return 0;
+}
+
+static int
+vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    unsigned int index = vmsg->payload.state.index;
+    unsigned int num = vmsg->payload.state.num;
+
+    DPRINT("State.index: %d\n", index);
+    DPRINT("State.num:   %d\n", num);
+    dev->vq[index].size = num;
+    return 0;
+}
+
+static int
+vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    struct vhost_vring_addr *vra = &vmsg->payload.addr;
+    unsigned int index = vra->index;
+    VubrVirtq *vq = &dev->vq[index];
+
+    DPRINT("vhost_vring_addr:\n");
+    DPRINT("    index:  %d\n", vra->index);
+    DPRINT("    flags:  %d\n", vra->flags);
+    DPRINT("    desc_user_addr:   0x%016llx\n", vra->desc_user_addr);
+    DPRINT("    used_user_addr:   0x%016llx\n", vra->used_user_addr);
+    DPRINT("    avail_user_addr:  0x%016llx\n", vra->avail_user_addr);
+    DPRINT("    log_guest_addr:   0x%016llx\n", vra->log_guest_addr);
+
+    vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
+    vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
+    vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
+
+    DPRINT("Setting virtq addresses:\n");
+    DPRINT("    vring_desc  at %p\n", vq->desc);
+    DPRINT("    vring_used  at %p\n", vq->used);
+    DPRINT("    vring_avail at %p\n", vq->avail);
+
+    vq->last_used_index = vq->used->idx;
+    return 0;
+}
+
+static int
+vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    unsigned int index = vmsg->payload.state.index;
+    unsigned int num = vmsg->payload.state.num;
+
+    DPRINT("State.index: %d\n", index);
+    DPRINT("State.num:   %d\n", num);
+    dev->vq[index].last_avail_index = num;
+
+    return 0;
+}
+
+static int
+vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    uint64_t u64_arg = vmsg->payload.u64;
+    int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
+
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
+    assert(vmsg->fd_num == 1);
+
+    dev->vq[index].kick_fd = vmsg->fds[0];
+    DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
+
+    if (index % 2 == 1) {
+        /* TX queue. */
+        dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
+                       dev, vubr_kick_cb);
+
+        DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
+               dev->vq[index].kick_fd, index);
+    }
+    return 0;
+}
+
+static int
+vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    uint64_t u64_arg = vmsg->payload.u64;
+    int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
+
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
+    assert(vmsg->fd_num == 1);
+
+    dev->vq[index].call_fd = vmsg->fds[0];
+    DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
+
+    return 0;
+}
+
+static int
+vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    return 0;
+}
+
+static int
+vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    /* FIXME: unimplented */
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    return 0;
+}
+
+static int
+vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    /* FIXME: unimplented */
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+    return 0;
+}
+
+static int
+vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return 0;
+}
+
+static int
+vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
+{
+    /* Print out generic part of the request. */
+    DPRINT(
+           "==================   Vhost user message from QEMU   ==================\n");
+    DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
+           vmsg->request);
+    DPRINT("Flags:   0x%x\n", vmsg->flags);
+    DPRINT("Size:    %d\n", vmsg->size);
+
+    if (vmsg->fd_num) {
+        int i;
+        DPRINT("Fds:");
+        for (i = 0; i < vmsg->fd_num; i++) {
+            DPRINT(" %d", vmsg->fds[i]);
+        }
+        DPRINT("\n");
+    }
+
+    switch (vmsg->request) {
+    case VHOST_USER_NONE:
+        return vubr_none_exec(dev, vmsg);
+    case VHOST_USER_GET_FEATURES:
+        return vubr_get_features_exec(dev, vmsg);
+    case VHOST_USER_SET_FEATURES:
+        return vubr_set_features_exec(dev, vmsg);
+    case VHOST_USER_SET_OWNER:
+        return vubr_set_owner_exec(dev, vmsg);
+    case VHOST_USER_RESET_DEVICE:
+        return vubr_reset_device_exec(dev, vmsg);
+    case VHOST_USER_SET_MEM_TABLE:
+        return vubr_set_mem_table_exec(dev, vmsg);
+    case VHOST_USER_SET_LOG_BASE:
+        return vubr_set_log_base_exec(dev, vmsg);
+    case VHOST_USER_SET_LOG_FD:
+        return vubr_set_log_fd_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_NUM:
+        return vubr_set_vring_num_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_ADDR:
+        return vubr_set_vring_addr_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_BASE:
+        return vubr_set_vring_base_exec(dev, vmsg);
+    case VHOST_USER_GET_VRING_BASE:
+        return vubr_get_vring_base_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_KICK:
+        return vubr_set_vring_kick_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_CALL:
+        return vubr_set_vring_call_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_ERR:
+        return vubr_set_vring_err_exec(dev, vmsg);
+    case VHOST_USER_GET_PROTOCOL_FEATURES:
+        return vubr_get_protocol_features_exec(dev, vmsg);
+    case VHOST_USER_SET_PROTOCOL_FEATURES:
+        return vubr_set_protocol_features_exec(dev, vmsg);
+    case VHOST_USER_GET_QUEUE_NUM:
+        return vubr_get_queue_num_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_ENABLE:
+        return vubr_set_vring_enable_exec(dev, vmsg);
+    case VHOST_USER_SEND_RARP:
+        return vubr_send_rarp_exec(dev, vmsg);
+
+    case VHOST_USER_MAX:
+        assert(vmsg->request != VHOST_USER_MAX);
+    }
+    return 0;
+}
+
+static void
+vubr_receive_cb(int sock, void *ctx)
+{
+    VubrDev *dev = (VubrDev *) ctx;
+    VhostUserMsg vmsg;
+    int reply_requested;
+
+    vubr_message_read(sock, &vmsg);
+    reply_requested = vubr_execute_request(dev, &vmsg);
+    if (reply_requested) {
+        /* Set the version in the flags when sending the reply */
+        vmsg.flags &= ~VHOST_USER_VERSION_MASK;
+        vmsg.flags |= VHOST_USER_VERSION;
+        vmsg.flags |= VHOST_USER_REPLY_MASK;
+        vubr_message_write(sock, &vmsg);
+    }
+}
+
+static void
+vubr_accept_cb(int sock, void *ctx)
+{
+    VubrDev *dev = (VubrDev *)ctx;
+    int conn_fd;
+    struct sockaddr_un un;
+    socklen_t len = sizeof(un);
+
+    conn_fd = accept(sock, (struct sockaddr *) &un, &len);
+    if (conn_fd  == -1) {
+        vubr_die("accept()");
+    }
+    DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
+    dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
+}
+
+static VubrDev *
+vubr_new(const char *path)
+{
+    VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
+    dev->nregions = 0;
+    int i;
+    struct sockaddr_un un;
+    size_t len;
+
+    for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
+        dev->vq[i] = (VubrVirtq) {
+            .call_fd = -1, .kick_fd = -1,
+            .size = 0,
+            .last_avail_index = 0, .last_used_index = 0,
+            .desc = 0, .avail = 0, .used = 0,
+        };
+    }
+
+    /* Get a UNIX socket. */
+    dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (dev->sock == -1) {
+        vubr_die("socket");
+    }
+
+    un.sun_family = AF_UNIX;
+    strcpy(un.sun_path, path);
+    len = sizeof(un.sun_family) + strlen(path);
+    unlink(path);
+
+    if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
+        vubr_die("bind");
+    }
+
+    if (listen(dev->sock, 1) == -1) {
+        vubr_die("listen");
+    }
+
+    dispatcher_init(&dev->dispatcher);
+    dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev,
+                   vubr_accept_cb);
+
+    DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
+    return dev;
+}
+
+static void
+vubr_backend_udp_setup(VubrDev *dev,
+                       const char *local_host,
+                       uint16_t local_port,
+                       const char *dest_host,
+                       uint16_t dest_port)
+{
+    int sock;
+    struct sockaddr_in si_local = {
+        .sin_family = AF_INET,
+        .sin_port = htons(local_port),
+    };
+
+    if (inet_aton(local_host, &si_local.sin_addr) == 0) {
+        fprintf(stderr, "inet_aton() failed.\n");
+        exit(1);
+    }
+
+    /* setup destination for sends */
+    dev->backend_udp_dest = (struct sockaddr_in) {
+        .sin_family = AF_INET,
+        .sin_port = htons(dest_port),
+    };
+    if (inet_aton(dest_host, &dev->backend_udp_dest.sin_addr) == 0) {
+        fprintf(stderr, "inet_aton() failed.\n");
+        exit(1);
+    }
+
+    sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+    if (sock == -1) {
+        vubr_die("socket");
+    }
+
+    if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
+        vubr_die("bind");
+    }
+
+    dev->backend_udp_sock = sock;
+    dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
+    DPRINT("Waiting for data from udp backend on %s:%d...\n",
+           local_host, local_port);
+}
+
+static void
+vubr_run(VubrDev *dev)
+{
+    while (1) {
+        /* timeout 200ms */
+        dispatcher_wait(&dev->dispatcher, 200000);
+        /* Here one can try polling strategy. */
+    }
+}
+
+int
+main(int argc, char *argv[])
+{
+    VubrDev *dev;
+
+    dev = vubr_new("/tmp/vubr.sock");
+    if (!dev) {
+        return 1;
+    }
+
+    vubr_backend_udp_setup(dev,
+                                 "127.0.0.1", 4444,
+                                 "127.0.0.1", 5555);
+    vubr_run(dev);
+    return 0;
+}

diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index a74c934..b6dde75 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c

@@ -98,7 +98,7 @@
         struct vhost_vring_state state;
         struct vhost_vring_addr addr;
         VhostUserMemory memory;
-    };
+    } payload;
 } QEMU_PACKED VhostUserMsg;
 
 static VhostUserMsg m __attribute__ ((unused));
@@ -242,23 +242,23 @@
     case VHOST_USER_GET_FEATURES:
         /* send back features to qemu */
         msg.flags |= VHOST_USER_REPLY_MASK;
-        msg.size = sizeof(m.u64);
-        msg.u64 = 0x1ULL << VHOST_F_LOG_ALL |
+        msg.size = sizeof(m.payload.u64);
+        msg.payload.u64 = 0x1ULL << VHOST_F_LOG_ALL |
             0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
         p = (uint8_t *) &msg;
         qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
         break;
 
     case VHOST_USER_SET_FEATURES:
-	g_assert_cmpint(msg.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES),
+	g_assert_cmpint(msg.payload.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES),
 			!=, 0ULL);
         break;
 
     case VHOST_USER_GET_PROTOCOL_FEATURES:
         /* send back features to qemu */
         msg.flags |= VHOST_USER_REPLY_MASK;
-        msg.size = sizeof(m.u64);
-        msg.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
+        msg.size = sizeof(m.payload.u64);
+        msg.payload.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
         p = (uint8_t *) &msg;
         qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
         break;
@@ -266,15 +266,15 @@
     case VHOST_USER_GET_VRING_BASE:
         /* send back vring base to qemu */
         msg.flags |= VHOST_USER_REPLY_MASK;
-        msg.size = sizeof(m.state);
-        msg.state.num = 0;
+        msg.size = sizeof(m.payload.state);
+        msg.payload.state.num = 0;
         p = (uint8_t *) &msg;
         qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
         break;
 
     case VHOST_USER_SET_MEM_TABLE:
         /* received the mem table */
-        memcpy(&s->memory, &msg.memory, sizeof(msg.memory));
+        memcpy(&s->memory, &msg.payload.memory, sizeof(msg.payload.memory));
         s->fds_num = qemu_chr_fe_get_msgfds(chr, s->fds, G_N_ELEMENTS(s->fds));
 
         /* signal the test that it can continue */

diff --git a/trace-events b/trace-events
index f237c7f..72136b9 100644
--- a/trace-events
+++ b/trace-events

@@ -1031,9 +1031,9 @@
 # monitor.c
 handle_qmp_command(void *mon, const char *cmd_name) "mon %p cmd_name \"%s\""
 monitor_protocol_emitter(void *mon) "mon %p"
-monitor_protocol_event_handler(uint32_t event, void *data, uint64_t last, uint64_t now) "event=%d data=%p last=%" PRId64 " now=%" PRId64
+monitor_protocol_event_handler(uint32_t event, void *qdict) "event=%d data=%p"
 monitor_protocol_event_emit(uint32_t event, void *data) "event=%d data=%p"
-monitor_protocol_event_queue(uint32_t event, void *data, uint64_t rate, uint64_t last, uint64_t now) "event=%d data=%p rate=%" PRId64 " last=%" PRId64 " now=%" PRId64
+monitor_protocol_event_queue(uint32_t event, void *qdict, uint64_t rate) "event=%d data=%p rate=%" PRId64
 monitor_protocol_event_throttle(uint32_t event, uint64_t rate) "event=%d rate=%" PRId64
 
 # hw/net/opencores_eth.c
@@ -1383,6 +1383,10 @@
 # hw/ppc/ppc.c
 ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
 
+# hw/ppc/prep.c
+prep_io_800_writeb(uint32_t addr, uint32_t val) "0x%08" PRIx32 " => 0x%02" PRIx32
+prep_io_800_readb(uint32_t addr, uint32_t retval) "0x%08" PRIx32 " <= 0x%02" PRIx32
+
 # util/hbitmap.c
 hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx"
 hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64

diff --git a/translate-all.c b/translate-all.c
index 333eba4..20ce40e 100644
--- a/translate-all.c
+++ b/translate-all.c

@@ -468,6 +468,8 @@
 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 #elif defined(__sparc__)
 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__powerpc64__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 #elif defined(__aarch64__)
 # define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
 #elif defined(__arm__)

diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
index ed4ca2b..d4a0c63 100644
--- a/util/event_notifier-posix.c
+++ b/util/event_notifier-posix.c

@@ -77,7 +77,7 @@
     close(e->wfd);
 }
 
-int event_notifier_get_fd(EventNotifier *e)
+int event_notifier_get_fd(const EventNotifier *e)
 {
     return e->rfd;
 }

diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 1394269..c37acbe 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c

@@ -26,7 +26,7 @@
     void *ptr1;
 
     if (ptr == MAP_FAILED) {
-        return NULL;
+        return MAP_FAILED;
     }
 
     /* Make sure align is a power of 2 */
@@ -41,7 +41,7 @@
                 fd, 0);
     if (ptr1 == MAP_FAILED) {
         munmap(ptr, total);
-        return NULL;
+        return MAP_FAILED;
     }
 
     ptr += offset;

diff --git a/vl.c b/vl.c
index 332d828..f5f7c3f 100644
--- a/vl.c
+++ b/vl.c

@@ -2769,7 +2769,12 @@
         return false;
     }
 
-    if (g_str_equal(type, "filter-buffer")) {
+    /*
+     * return false for concrete netfilters since
+     * they depend on netdevs already existing
+     */
+    if (g_str_equal(type, "filter-buffer") ||
+        g_str_equal(type, "filter-dump")) {
         return false;
     }
 
@@ -4101,8 +4106,8 @@
 
     machine_class->max_cpus = machine_class->max_cpus ?: 1; /* Default to UP */
     if (max_cpus > machine_class->max_cpus) {
-        fprintf(stderr, "Number of SMP cpus requested (%d), exceeds max cpus "
-                "supported by machine `%s' (%d)\n", max_cpus,
+        fprintf(stderr, "Number of SMP CPUs requested (%d) exceeds max CPUs "
+                "supported by machine '%s' (%d)\n", max_cpus,
                 machine_class->name, machine_class->max_cpus);
         exit(1);
     }