Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

* more of Peter Crosthwaite's multiarch preparation patches
* unlocked MMIO support in KVM
* support for compilation with ICC

# gpg: Signature made Mon Jul  6 13:59:20 2015 BST using RSA key ID 78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  exec: skip MMIO regions correctly in cpu_physical_memory_write_rom_internal
  Stop including qemu-common.h in memory.h
  kvm: Switch to unlocked MMIO
  acpi: mark PMTIMER as unlocked
  kvm: Switch to unlocked PIO
  kvm: First step to push iothread lock out of inner run loop
  memory: let address_space_rw/ld*/st* run outside the BQL
  exec: pull qemu_flush_coalesced_mmio_buffer() into address_space_rw/ld*/st*
  memory: Add global-locking property to memory regions
  main-loop: introduce qemu_mutex_iothread_locked
  main-loop: use qemu_mutex_lock_iothread consistently
  Fix irq route entries exceeding KVM_MAX_IRQ_ROUTES
  cpu-defs: Move out TB_JMP defines
  include/exec: Move tb hash functions out
  include/exec: Move standard exceptions to cpu-all.h
  cpu-defs: Move CPU_TEMP_BUF_NLONGS to tcg
  memory_mapping: Rework cpu related includes
  cutils: allow compilation with icc
  qemu-common: add VEC_OR macro

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index 3639173..3d48a6b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -644,7 +644,19 @@
 S: Supported
 F: include/hw/pci/*
 F: hw/pci/*
+
+ACPI
+M: Michael S. Tsirkin <mst@redhat.com>
+M: Igor Mammedov <imammedo@redhat.com>
+S: Supported
+F: include/hw/acpi/*
+F: hw/mem/*
 F: hw/acpi/*
+F: hw/i386/acpi-build.[hc]
+F: hw/i386/*dsl
+F: hw/arm/virt-acpi-build.c
+F: include/hw/arm/virt-acpi-build.h
+F: scripts/acpi*py
 
 ppc4xx
 M: Alexander Graf <agraf@suse.de>
diff --git a/block.c b/block.c
index 81233be..7e130cc 100644
--- a/block.c
+++ b/block.c
@@ -3528,18 +3528,6 @@
     }
 }
 
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
-                      int nr_sectors)
-{
-    BdrvDirtyBitmap *bitmap;
-    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
-        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
-            continue;
-        }
-        hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
-    }
-}
-
 /**
  * Advance an HBitmapIter to an arbitrary offset.
  */
diff --git a/block/backup.c b/block/backup.c
index 4a1af68..d3c7d9f 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -38,7 +38,7 @@
 typedef struct BackupBlockJob {
     BlockJob common;
     BlockDriverState *target;
-    /* bitmap for sync=dirty-bitmap */
+    /* bitmap for sync=incremental */
     BdrvDirtyBitmap *sync_bitmap;
     MirrorSyncMode sync_mode;
     RateLimit limit;
@@ -365,7 +365,7 @@
             qemu_coroutine_yield();
             job->common.busy = true;
         }
-    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+    } else if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
         ret = backup_run_incremental(job);
     } else {
         /* Both FULL and TOP SYNC_MODE's require copying.. */
@@ -497,10 +497,10 @@
         return;
     }
 
-    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+    if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
         if (!sync_bitmap) {
             error_setg(errp, "must provide a valid bitmap name for "
-                             "\"dirty-bitmap\" sync mode");
+                             "\"incremental\" sync mode");
             return;
         }
 
@@ -535,7 +535,7 @@
     job->on_target_error = on_target_error;
     job->target = target;
     job->sync_mode = sync_mode;
-    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
+    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
                        sync_bitmap : NULL;
     job->common.len = len;
     job->common.co = qemu_coroutine_create(backup_run);
diff --git a/block/io.c b/block/io.c
index e295992..305e0d9 100644
--- a/block/io.c
+++ b/block/io.c
@@ -283,7 +283,7 @@
         }
         aio_context_release(aio_context);
 
-        if (!aio_ctxs || !g_slist_find(aio_ctxs, aio_context)) {
+        if (!g_slist_find(aio_ctxs, aio_context)) {
             aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
         }
     }
@@ -1531,28 +1531,54 @@
     return ret;
 }
 
-/* Coroutine wrapper for bdrv_get_block_status() */
-static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
+static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
+        BlockDriverState *base,
+        int64_t sector_num,
+        int nb_sectors,
+        int *pnum)
+{
+    BlockDriverState *p;
+    int64_t ret = 0;
+
+    assert(bs != base);
+    for (p = bs; p != base; p = p->backing_hd) {
+        ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum);
+        if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) {
+            break;
+        }
+        /* [sector_num, pnum] unallocated on this layer, which could be only
+         * the first part of [sector_num, nb_sectors].  */
+        nb_sectors = MIN(nb_sectors, *pnum);
+    }
+    return ret;
+}
+
+/* Coroutine wrapper for bdrv_get_block_status_above() */
+static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
 {
     BdrvCoGetBlockStatusData *data = opaque;
-    BlockDriverState *bs = data->bs;
 
-    data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
-                                         data->pnum);
+    data->ret = bdrv_co_get_block_status_above(data->bs, data->base,
+                                               data->sector_num,
+                                               data->nb_sectors,
+                                               data->pnum);
     data->done = true;
 }
 
 /*
- * Synchronous wrapper around bdrv_co_get_block_status().
+ * Synchronous wrapper around bdrv_co_get_block_status_above().
  *
- * See bdrv_co_get_block_status() for details.
+ * See bdrv_co_get_block_status_above() for details.
  */
-int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
-                              int nb_sectors, int *pnum)
+int64_t bdrv_get_block_status_above(BlockDriverState *bs,
+                                    BlockDriverState *base,
+                                    int64_t sector_num,
+                                    int nb_sectors, int *pnum)
 {
     Coroutine *co;
     BdrvCoGetBlockStatusData data = {
         .bs = bs,
+        .base = base,
         .sector_num = sector_num,
         .nb_sectors = nb_sectors,
         .pnum = pnum,
@@ -1561,11 +1587,11 @@
 
     if (qemu_in_coroutine()) {
         /* Fast-path if already in coroutine context */
-        bdrv_get_block_status_co_entry(&data);
+        bdrv_get_block_status_above_co_entry(&data);
     } else {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
-        co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
+        co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry);
         qemu_coroutine_enter(co, &data);
         while (!data.done) {
             aio_poll(aio_context, true);
@@ -1574,6 +1600,14 @@
     return data.ret;
 }
 
+int64_t bdrv_get_block_status(BlockDriverState *bs,
+                              int64_t sector_num,
+                              int nb_sectors, int *pnum)
+{
+    return bdrv_get_block_status_above(bs, bs->backing_hd,
+                                       sector_num, nb_sectors, pnum);
+}
+
 int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
                                    int nb_sectors, int *pnum)
 {
@@ -2378,8 +2412,6 @@
         return -EPERM;
     }
 
-    bdrv_reset_dirty(bs, sector_num, nb_sectors);
-
     /* Do nothing if disabled.  */
     if (!(bs->open_flags & BDRV_O_UNMAP)) {
         return 0;
@@ -2389,6 +2421,8 @@
         return 0;
     }
 
+    bdrv_set_dirty(bs, sector_num, nb_sectors);
+
     max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
     while (nb_sectors > 0) {
         int ret;
diff --git a/block/iscsi.c b/block/iscsi.c
index 49cee4d..5002916 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -70,6 +70,7 @@
     bool dpofua;
     bool has_write_same;
     bool force_next_flush;
+    bool request_timed_out;
 } IscsiLun;
 
 typedef struct IscsiTask {
@@ -100,7 +101,8 @@
 #endif
 } IscsiAIOCB;
 
-#define EVENT_INTERVAL 250
+/* libiscsi uses time_t so its enough to process events every second */
+#define EVENT_INTERVAL 1000
 #define NOP_INTERVAL 5000
 #define MAX_NOP_FAILURES 3
 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
@@ -167,6 +169,19 @@
     return -mean * log((double)rand() / RAND_MAX);
 }
 
+/* SCSI_STATUS_TASK_SET_FULL and SCSI_STATUS_TIMEOUT were introduced
+ * in libiscsi 1.10.0 as part of an enum. The LIBISCSI_API_VERSION
+ * macro was introduced in 1.11.0. So use the API_VERSION macro as
+ * a hint that the macros are defined and define them ourselves
+ * otherwise to keep the required libiscsi version at 1.9.0 */
+#if !defined(LIBISCSI_API_VERSION)
+#define QEMU_SCSI_STATUS_TASK_SET_FULL  0x28
+#define QEMU_SCSI_STATUS_TIMEOUT        0x0f000002
+#else
+#define QEMU_SCSI_STATUS_TASK_SET_FULL  SCSI_STATUS_TASK_SET_FULL
+#define QEMU_SCSI_STATUS_TIMEOUT        SCSI_STATUS_TIMEOUT
+#endif
+
 static void
 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
                         void *command_data, void *opaque)
@@ -187,13 +202,19 @@
                 iTask->do_retry = 1;
                 goto out;
             }
-            /* status 0x28 is SCSI_TASK_SET_FULL. It was first introduced
-             * in libiscsi 1.10.0. Hardcode this value here to avoid
-             * the need to bump the libiscsi requirement to 1.10.0 */
-            if (status == SCSI_STATUS_BUSY || status == 0x28) {
+            if (status == SCSI_STATUS_BUSY ||
+                status == QEMU_SCSI_STATUS_TIMEOUT ||
+                status == QEMU_SCSI_STATUS_TASK_SET_FULL) {
                 unsigned retry_time =
                     exp_random(iscsi_retry_times[iTask->retries - 1]);
-                error_report("iSCSI Busy/TaskSetFull (retry #%u in %u ms): %s",
+                if (status == QEMU_SCSI_STATUS_TIMEOUT) {
+                    /* make sure the request is rescheduled AFTER the
+                     * reconnect is initiated */
+                    retry_time = EVENT_INTERVAL * 2;
+                    iTask->iscsilun->request_timed_out = true;
+                }
+                error_report("iSCSI Busy/TaskSetFull/TimeOut"
+                             " (retry #%u in %u ms): %s",
                              iTask->retries, retry_time,
                              iscsi_get_error(iscsi));
                 aio_timer_init(iTask->iscsilun->aio_context,
@@ -277,20 +298,26 @@
                            iscsilun);
         iscsilun->events = ev;
     }
-
-    /* newer versions of libiscsi may return zero events. In this
-     * case start a timer to ensure we are able to return to service
-     * once this situation changes. */
-    if (!ev) {
-        timer_mod(iscsilun->event_timer,
-                  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
-    }
 }
 
-static void iscsi_timed_set_events(void *opaque)
+static void iscsi_timed_check_events(void *opaque)
 {
     IscsiLun *iscsilun = opaque;
+
+    /* check for timed out requests */
+    iscsi_service(iscsilun->iscsi, 0);
+
+    if (iscsilun->request_timed_out) {
+        iscsilun->request_timed_out = false;
+        iscsi_reconnect(iscsilun->iscsi);
+    }
+
+    /* newer versions of libiscsi may return zero events. Ensure we are able
+     * to return to service once this situation changes. */
     iscsi_set_events(iscsilun);
+
+    timer_mod(iscsilun->event_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
 }
 
 static void
@@ -1093,16 +1120,37 @@
     return iscsi_name;
 }
 
+static int parse_timeout(const char *target)
+{
+    QemuOptsList *list;
+    QemuOpts *opts;
+    const char *timeout;
+
+    list = qemu_find_opts("iscsi");
+    if (list) {
+        opts = qemu_opts_find(list, target);
+        if (!opts) {
+            opts = QTAILQ_FIRST(&list->head);
+        }
+        if (opts) {
+            timeout = qemu_opt_get(opts, "timeout");
+            if (timeout) {
+                return atoi(timeout);
+            }
+        }
+    }
+
+    return 0;
+}
+
 static void iscsi_nop_timed_event(void *opaque)
 {
     IscsiLun *iscsilun = opaque;
 
-    if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
+    if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
         error_report("iSCSI: NOP timeout. Reconnecting...");
-        iscsi_reconnect(iscsilun->iscsi);
-    }
-
-    if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
+        iscsilun->request_timed_out = true;
+    } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
         return;
     }
@@ -1260,10 +1308,13 @@
     timer_mod(iscsilun->nop_timer,
               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
 
-    /* Prepare a timer for a delayed call to iscsi_set_events */
+    /* Set up a timer for periodic calls to iscsi_set_events and to
+     * scan for command timeout */
     iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
                                           QEMU_CLOCK_REALTIME, SCALE_MS,
-                                          iscsi_timed_set_events, iscsilun);
+                                          iscsi_timed_check_events, iscsilun);
+    timer_mod(iscsilun->event_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
 }
 
 static void iscsi_modesense_sync(IscsiLun *iscsilun)
@@ -1318,7 +1369,7 @@
     QemuOpts *opts;
     Error *local_err = NULL;
     const char *filename;
-    int i, ret = 0;
+    int i, ret = 0, timeout = 0;
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1388,6 +1439,16 @@
         goto out;
     }
 
+    /* timeout handling is broken in libiscsi before 1.15.0 */
+    timeout = parse_timeout(iscsi_url->target);
+#if defined(LIBISCSI_API_VERSION) && LIBISCSI_API_VERSION >= 20150621
+    iscsi_set_timeout(iscsi, timeout);
+#else
+    if (timeout) {
+        error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
+    }
+#endif
+
     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
             iscsi_get_error(iscsi));
@@ -1736,6 +1797,10 @@
             .name = "initiator-name",
             .type = QEMU_OPT_STRING,
             .help = "Initiator iqn name to use when connecting",
+        },{
+            .name = "timeout",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Request timeout in seconds (default 0 = no timeout)",
         },
         { /* end of list */ }
     },
diff --git a/block/mirror.c b/block/mirror.c
index 048e452..8888cea 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -58,6 +58,7 @@
     int in_flight;
     int sectors_in_flight;
     int ret;
+    bool unmap;
 } MirrorBlockJob;
 
 typedef struct MirrorOp {
@@ -164,6 +165,8 @@
     int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
     uint64_t delay_ns = 0;
     MirrorOp *op;
+    int pnum;
+    int64_t ret;
 
     s->sector_num = hbitmap_iter_next(&s->hbi);
     if (s->sector_num < 0) {
@@ -290,8 +293,22 @@
     s->in_flight++;
     s->sectors_in_flight += nb_sectors;
     trace_mirror_one_iteration(s, sector_num, nb_sectors);
-    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
-                   mirror_read_complete, op);
+
+    ret = bdrv_get_block_status_above(source, NULL, sector_num,
+                                      nb_sectors, &pnum);
+    if (ret < 0 || pnum < nb_sectors ||
+            (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) {
+        bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+                       mirror_read_complete, op);
+    } else if (ret & BDRV_BLOCK_ZERO) {
+        bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
+                              s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
+                              mirror_write_complete, op);
+    } else {
+        assert(!(ret & BDRV_BLOCK_DATA));
+        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
+                         mirror_write_complete, op);
+    }
     return delay_ns;
 }
 
@@ -652,6 +669,7 @@
                              int64_t buf_size,
                              BlockdevOnError on_source_error,
                              BlockdevOnError on_target_error,
+                             bool unmap,
                              BlockCompletionFunc *cb,
                              void *opaque, Error **errp,
                              const BlockJobDriver *driver,
@@ -686,6 +704,7 @@
     s->base = base;
     s->granularity = granularity;
     s->buf_size = MAX(buf_size, granularity);
+    s->unmap = unmap;
 
     s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
     if (!s->dirty_bitmap) {
@@ -704,21 +723,22 @@
                   int64_t speed, uint32_t granularity, int64_t buf_size,
                   MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
+                  bool unmap,
                   BlockCompletionFunc *cb,
                   void *opaque, Error **errp)
 {
     bool is_none_mode;
     BlockDriverState *base;
 
-    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
-        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
+    if (mode == MIRROR_SYNC_MODE_INCREMENTAL) {
+        error_setg(errp, "Sync mode 'incremental' not supported");
         return;
     }
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
     mirror_start_job(bs, target, replaces,
                      speed, granularity, buf_size,
-                     on_source_error, on_target_error, cb, opaque, errp,
+                     on_source_error, on_target_error, unmap, cb, opaque, errp,
                      &mirror_job_driver, is_none_mode, base);
 }
 
@@ -766,7 +786,7 @@
 
     bdrv_ref(base);
     mirror_start_job(bs, base, NULL, speed, 0, 0,
-                     on_error, on_error, cb, opaque, &local_err,
+                     on_error, on_error, false, cb, opaque, &local_err,
                      &commit_active_job_driver, false, base);
     if (local_err) {
         error_propagate(errp, local_err);
diff --git a/block/nfs.c b/block/nfs.c
index ca9e24e..c026ff6 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -35,6 +35,8 @@
 #include "sysemu/sysemu.h"
 #include <nfsc/libnfs.h>
 
+#define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
+
 typedef struct NFSClient {
     struct nfs_context *context;
     struct nfsfh *fh;
@@ -327,6 +329,11 @@
             nfs_set_tcp_syncnt(client->context, val);
 #ifdef LIBNFS_FEATURE_READAHEAD
         } else if (!strcmp(qp->p[i].name, "readahead")) {
+            if (val > QEMU_NFS_MAX_READAHEAD_SIZE) {
+                error_report("NFS Warning: Truncating NFS readahead"
+                             " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
+                val = QEMU_NFS_MAX_READAHEAD_SIZE;
+            }
             nfs_set_readahead(client->context, val);
 #endif
         } else {
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 0632fc3..b0ee42d 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -940,19 +940,21 @@
     }
 
     free_in_cluster = s->cluster_size - offset_into_cluster(s, offset);
-    if (!offset || free_in_cluster < size) {
-        int64_t new_cluster = alloc_clusters_noref(bs, s->cluster_size);
-        if (new_cluster < 0) {
-            return new_cluster;
+    do {
+        if (!offset || free_in_cluster < size) {
+            int64_t new_cluster = alloc_clusters_noref(bs, s->cluster_size);
+            if (new_cluster < 0) {
+                return new_cluster;
+            }
+
+            if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) {
+                offset = new_cluster;
+            }
         }
 
-        if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) {
-            offset = new_cluster;
-        }
-    }
-
-    assert(offset);
-    ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
+        assert(offset);
+        ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
+    } while (ret == -EAGAIN);
     if (ret < 0) {
         return ret;
     }
diff --git a/blockdev.c b/blockdev.c
index b354676..7fee519 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2167,9 +2167,6 @@
         return;
     }
 
-    /* quiesce block driver; prevent further io */
-    bdrv_drain_all();
-    bdrv_flush(bs);
     bdrv_close(bs);
 
     /* if we have a device attached to this BlockDriverState
@@ -2658,6 +2655,7 @@
                       bool has_buf_size, int64_t buf_size,
                       bool has_on_source_error, BlockdevOnError on_source_error,
                       bool has_on_target_error, BlockdevOnError on_target_error,
+                      bool has_unmap, bool unmap,
                       Error **errp)
 {
     BlockBackend *blk;
@@ -2689,6 +2687,9 @@
     if (!has_buf_size) {
         buf_size = DEFAULT_MIRROR_BUF_SIZE;
     }
+    if (!has_unmap) {
+        unmap = true;
+    }
 
     if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -2830,6 +2831,7 @@
                  has_replaces ? replaces : NULL,
                  speed, granularity, buf_size, sync,
                  on_source_error, on_target_error,
+                 unmap,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
         bdrv_unref(target_bs);
diff --git a/configure b/configure
index f02510b..3063739 100755
--- a/configure
+++ b/configure
@@ -4773,7 +4773,7 @@
   echo "CONFIG_BLUEZ=y" >> $config_host_mak
   echo "BLUEZ_CFLAGS=$bluez_cflags" >> $config_host_mak
 fi
-if test "glib_subprocess" = "yes" ; then
+if test "$glib_subprocess" = "yes" ; then
   echo "CONFIG_HAS_GLIB_SUBPROCESS_TESTS=y" >> $config_host_mak
 fi
 echo "GLIB_CFLAGS=$glib_cflags" >> $config_host_mak
diff --git a/docs/bitmaps.md b/docs/bitmaps.md
index f066b48..fa87f07 100644
--- a/docs/bitmaps.md
+++ b/docs/bitmaps.md
@@ -210,7 +210,7 @@
         "bitmap": "bitmap0",
         "target": "incremental.0.img",
         "format": "qcow2",
-        "sync": "dirty-bitmap",
+        "sync": "incremental",
         "mode": "existing"
       }
     }
@@ -235,7 +235,7 @@
         "bitmap": "bitmap0",
         "target": "incremental.1.img",
         "format": "qcow2",
-        "sync": "dirty-bitmap",
+        "sync": "incremental",
         "mode": "existing"
       }
     }
@@ -275,7 +275,7 @@
         "bitmap": "bitmap0",
         "target": "incremental.0.img",
         "format": "qcow2",
-        "sync": "dirty-bitmap",
+        "sync": "incremental",
         "mode": "existing"
       }
     }
@@ -308,7 +308,7 @@
         "bitmap": "bitmap0",
         "target": "incremental.0.img",
         "format": "qcow2",
-        "sync": "dirty-bitmap",
+        "sync": "incremental",
         "mode": "existing"
       }
     }
diff --git a/docs/multiseat.txt b/docs/multiseat.txt
index b963665..ebf2446 100644
--- a/docs/multiseat.txt
+++ b/docs/multiseat.txt
@@ -2,8 +2,8 @@
 multiseat howto (with some multihead coverage)
 ==============================================
 
-host side
----------
+host devices
+------------
 
 First you must compile qemu with a user interface supporting
 multihead/multiseat and input event routing.  Right now this
@@ -41,6 +41,19 @@
 the window which belongs to the video.2 display adapter will be routed
 to these input devices.
 
+Starting with qemu 2.4 and linux kernel 4.1 you can also use virtio
+for the input devices, using this ...
+
+	-device pci-bridge,addr=12.0,chassis_nr=2,id=head.2 \
+	-device secondary-vga,bus=head.2,addr=02.0,id=video.2 \
+	-device virtio-keyboard-pci,bus=head.2,addr=03.0,display=video.2 \
+	-device virtio-tablet-pci,bus=head.2,addr=03.0,display=video.2
+
+... instead of xhci and usb hid devices.
+
+host ui
+-------
+
 The sdl2 ui will start up with two windows, one for each display
 device.  The gtk ui will start with a single window and each display
 in a separate tab.  You can either simply switch tabs to switch heads,
@@ -106,6 +119,26 @@
 Background info is here:
   http://www.freedesktop.org/wiki/Software/systemd/multiseat/
 
+
+guest side with pci-bridge-seat
+-------------------------------
+
+Qemu version 2.4 and newer has a new pci-bridge-seat device which
+can be used instead of pci-bridge.  Just swap the device name in the
+qemu command line above.  The only difference between the two devices
+is the pci id.  We can match the pci id instead of the device path
+with a nice generic rule now, which simplifies the guest
+configuration:
+
+    [root@fedora ~]# cat /etc/udev/rules.d/70-qemu-pci-bridge-seat.rules
+    SUBSYSTEM=="pci", ATTR{vendor}=="0x1b36", ATTR{device}=="0x000a", \
+            TAG+="seat", ENV{ID_AUTOSEAT}="1"
+
+Patch with this rule has been submitted to upstream udev/systemd, was
+accepted and and should be included in the next systemd release (222).
+So, if your guest has this or a newer version, multiseat will work just
+fine without any manual guest configuration.
+
 Enjoy!
 
 --
diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
index e4a4490..0adcb89 100644
--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -47,6 +47,7 @@
 1b36:0005  PCI test device (docs/specs/pci-testdev.txt)
 1b36:0006  PCI Rocker Ethernet switch device
 1b36:0007  PCI SD Card Host Controller Interface (SDHCI)
+1b36:000a  PCI-PCI bridge (multiseat)
 
 All these devices are documented in docs/specs.
 
diff --git a/hmp.c b/hmp.c
index 070aaf8..dcc66f1 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1061,7 +1061,7 @@
                      false, NULL, false, NULL,
                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
                      true, mode, false, 0, false, 0, false, 0,
-                     false, 0, false, 0, &err);
+                     false, 0, false, 0, false, true, &err);
     hmp_handle_error(mon, &err);
 }
 
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index d5a8b9c..f365140 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -387,7 +387,7 @@
     mcfg->allocation[0].end_bus_number = (memmap[VIRT_PCIE_ECAM].size
                                           / PCIE_MMCFG_SIZE_MIN) - 1;
 
-    build_header(linker, table_data, (void *)mcfg, "MCFG", len, 5);
+    build_header(linker, table_data, (void *)mcfg, "MCFG", len, 1);
 }
 
 /* GTDT */
@@ -413,7 +413,7 @@
 
     build_header(linker, table_data,
                  (void *)(table_data->data + gtdt_start), "GTDT",
-                 table_data->len - gtdt_start, 5);
+                 table_data->len - gtdt_start, 2);
 }
 
 /* MADT */
@@ -423,8 +423,10 @@
 {
     int madt_start = table_data->len;
     const MemMapEntry *memmap = guest_info->memmap;
+    const int *irqmap = guest_info->irqmap;
     AcpiMultipleApicTable *madt;
     AcpiMadtGenericDistributor *gicd;
+    AcpiMadtGenericMsiFrame *gic_msi;
     int i;
 
     madt = acpi_data_push(table_data, sizeof *madt);
@@ -448,9 +450,18 @@
     gicd->length = sizeof(*gicd);
     gicd->base_address = memmap[VIRT_GIC_DIST].base;
 
+    gic_msi = acpi_data_push(table_data, sizeof *gic_msi);
+    gic_msi->type = ACPI_APIC_GENERIC_MSI_FRAME;
+    gic_msi->length = sizeof(*gic_msi);
+    gic_msi->gic_msi_frame_id = 0;
+    gic_msi->base_address = cpu_to_le64(memmap[VIRT_GIC_V2M].base);
+    gic_msi->flags = cpu_to_le32(1);
+    gic_msi->spi_count = cpu_to_le16(NUM_GICV2M_SPIS);
+    gic_msi->spi_base = cpu_to_le16(irqmap[VIRT_GIC_V2M] + ARM_SPI_BASE);
+
     build_header(linker, table_data,
                  (void *)(table_data->data + madt_start), "APIC",
-                 table_data->len - madt_start, 5);
+                 table_data->len - madt_start, 3);
 }
 
 /* FADT */
@@ -507,7 +518,7 @@
     g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len);
     build_header(linker, table_data,
         (void *)(table_data->data + table_data->len - dsdt->buf->len),
-        "DSDT", dsdt->buf->len, 5);
+        "DSDT", dsdt->buf->len, 2);
     free_aml_allocator();
 }
 
@@ -545,6 +556,7 @@
      * FADT
      * GTDT
      * MADT
+     * MCFG
      * DSDT
      */
 
@@ -552,7 +564,7 @@
     dsdt = tables_blob->len;
     build_dsdt(tables_blob, tables->linker, guest_info);
 
-    /* FADT MADT GTDT SPCR pointed to by RSDT */
+    /* FADT MADT GTDT MCFG SPCR pointed to by RSDT */
     acpi_add_table(table_offsets, tables_blob);
     build_fadt(tables_blob, tables->linker, dsdt);
 
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 4e78083a..4846892 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -956,6 +956,8 @@
     mc->init = machvirt_init;
     mc->max_cpus = 8;
     mc->has_dynamic_sysbus = true;
+    mc->block_default_type = IF_VIRTIO;
+    mc->no_cdrom = 1;
 }
 
 static const TypeInfo machvirt_info = {
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index aa794ca..921e799 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -35,15 +35,15 @@
 }
 
 static void set_pointer(Object *obj, Visitor *v, Property *prop,
-                        int (*parse)(DeviceState *dev, const char *str,
-                                     void **ptr),
+                        void (*parse)(DeviceState *dev, const char *str,
+                                      void **ptr, const char *propname,
+                                      Error **errp),
                         const char *name, Error **errp)
 {
     DeviceState *dev = DEVICE(obj);
     Error *local_err = NULL;
     void **ptr = qdev_get_prop_ptr(dev, prop);
     char *str;
-    int ret;
 
     if (dev->realized) {
         qdev_prop_set_after_realize(dev, name, errp);
@@ -60,26 +60,38 @@
         *ptr = NULL;
         return;
     }
-    ret = parse(dev, str, ptr);
-    error_set_from_qdev_prop_error(errp, ret, dev, prop, str);
+    parse(dev, str, ptr, prop->name, errp);
     g_free(str);
 }
 
 /* --- drive --- */
 
-static int parse_drive(DeviceState *dev, const char *str, void **ptr)
+static void parse_drive(DeviceState *dev, const char *str, void **ptr,
+                        const char *propname, Error **errp)
 {
     BlockBackend *blk;
 
     blk = blk_by_name(str);
     if (!blk) {
-        return -ENOENT;
+        error_setg(errp, "Property '%s.%s' can't find value '%s'",
+                   object_get_typename(OBJECT(dev)), propname, str);
+        return;
     }
     if (blk_attach_dev(blk, dev) < 0) {
-        return -EEXIST;
+        DriveInfo *dinfo = blk_legacy_dinfo(blk);
+
+        if (dinfo->type != IF_NONE) {
+            error_setg(errp, "Drive '%s' is already in use because "
+                       "it has been automatically connected to another "
+                       "device (did you need 'if=none' in the drive options?)",
+                       str);
+        } else {
+            error_setg(errp, "Drive '%s' is already in use by another device",
+                       str);
+        }
+        return;
     }
     *ptr = blk;
-    return 0;
 }
 
 static void release_drive(Object *obj, const char *name, void *opaque)
@@ -121,17 +133,21 @@
 
 /* --- character device --- */
 
-static int parse_chr(DeviceState *dev, const char *str, void **ptr)
+static void parse_chr(DeviceState *dev, const char *str, void **ptr,
+                      const char *propname, Error **errp)
 {
     CharDriverState *chr = qemu_chr_find(str);
     if (chr == NULL) {
-        return -ENOENT;
+        error_setg(errp, "Property '%s.%s' can't find value '%s'",
+                   object_get_typename(OBJECT(dev)), propname, str);
+        return;
     }
     if (qemu_chr_fe_claim(chr) != 0) {
-        return -EEXIST;
+        error_setg(errp, "Property '%s.%s' can't take value '%s', it's in use",
+                  object_get_typename(OBJECT(dev)), propname, str);
+        return;
     }
     *ptr = chr;
-    return 0;
 }
 
 static void release_chr(Object *obj, const char *name, void *opaque)
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 47c1e8f..e9e686f 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -131,7 +131,7 @@
 static uint64_t qdev_get_prop_mask64(Property *prop)
 {
     assert(prop->info == &qdev_prop_bit);
-    return 0x1 << prop->bitnr;
+    return 0x1ull << prop->bitnr;
 }
 
 static void bit64_prop_set(DeviceState *dev, Property *props, bool val)
diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index 92eced9..278a2d1 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -281,6 +281,9 @@
 static char *sysbus_get_fw_dev_path(DeviceState *dev)
 {
     SysBusDevice *s = SYS_BUS_DEVICE(dev);
+    SysBusDeviceClass *sbc = SYS_BUS_DEVICE_GET_CLASS(s);
+    /* for the explicit unit address fallback case: */
+    char *addr, *fw_dev_path;
 
     if (s->num_mmio) {
         return g_strdup_printf("%s@" TARGET_FMT_plx, qdev_fw_name(dev),
@@ -289,6 +292,14 @@
     if (s->num_pio) {
         return g_strdup_printf("%s@i%04x", qdev_fw_name(dev), s->pio[0]);
     }
+    if (sbc->explicit_ofw_unit_address) {
+        addr = sbc->explicit_ofw_unit_address(s);
+        if (addr) {
+            fw_dev_path = g_strdup_printf("%s@%s", qdev_fw_name(dev), addr);
+            g_free(addr);
+            return fw_dev_path;
+        }
+    }
     return g_strdup(qdev_fw_name(dev));
 }
 
diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 0be5d97..bd4f147 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -8,8 +8,7 @@
 obj-y += kvmvapic.o
 obj-y += acpi-build.o
 hw/i386/acpi-build.o: hw/i386/acpi-build.c \
-	hw/i386/acpi-dsdt.hex hw/i386/q35-acpi-dsdt.hex \
-	hw/i386/ssdt-tpm.hex hw/i386/ssdt-tpm2.hex
+	hw/i386/acpi-dsdt.hex hw/i386/q35-acpi-dsdt.hex
 
 iasl-option=$(shell if test -z "`$(1) $(2) 2>&1 > /dev/null`" \
     ; then echo "$(2)"; else echo "$(3)"; fi ;)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index b71e942..aed811a 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -433,9 +433,6 @@
                  table_data->len - madt_start, 1);
 }
 
-#include "hw/i386/ssdt-tpm.hex"
-#include "hw/i386/ssdt-tpm2.hex"
-
 /* Assign BSEL property to all buses.  In the future, this can be changed
  * to only assign to buses that support hotplug.
  */
@@ -1328,6 +1325,19 @@
                 Aml *scope = aml_scope("PCI0");
                 /* Scan all PCI buses. Generate tables to support hotplug. */
                 build_append_pci_bus_devices(scope, bus, pm->pcihp_bridge_en);
+
+                if (misc->tpm_version != TPM_VERSION_UNSPEC) {
+                    dev = aml_device("ISA.TPM");
+                    aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C31")));
+                    aml_append(dev, aml_name_decl("_STA", aml_int(0xF)));
+                    crs = aml_resource_template();
+                    aml_append(crs, aml_memory32_fixed(TPM_TIS_ADDR_BASE,
+                               TPM_TIS_ADDR_SIZE, AML_READ_WRITE));
+                    aml_append(crs, aml_irq_no_flags(TPM_TIS_IRQ));
+                    aml_append(dev, aml_name_decl("_CRS", crs));
+                    aml_append(scope, dev);
+                }
+
                 aml_append(sb_scope, scope);
             }
         }
@@ -1383,22 +1393,9 @@
 }
 
 static void
-build_tpm_ssdt(GArray *table_data, GArray *linker)
-{
-    void *tpm_ptr;
-
-    tpm_ptr = acpi_data_push(table_data, sizeof(ssdt_tpm_aml));
-    memcpy(tpm_ptr, ssdt_tpm_aml, sizeof(ssdt_tpm_aml));
-}
-
-static void
 build_tpm2(GArray *table_data, GArray *linker)
 {
     Acpi20TPM2 *tpm2_ptr;
-    void *tpm_ptr;
-
-    tpm_ptr = acpi_data_push(table_data, sizeof(ssdt_tpm2_aml));
-    memcpy(tpm_ptr, ssdt_tpm2_aml, sizeof(ssdt_tpm2_aml));
 
     tpm2_ptr = acpi_data_push(table_data, sizeof *tpm2_ptr);
 
@@ -1512,7 +1509,7 @@
      */
     if (hotplugabble_address_space_size) {
         numamem = acpi_data_push(table_data, sizeof *numamem);
-        acpi_build_srat_memory(numamem, pcms->hotplug_memory_base,
+        acpi_build_srat_memory(numamem, pcms->hotplug_memory.base,
                                hotplugabble_address_space_size, 0,
                                MEM_AFFINITY_HOTPLUGGABLE |
                                MEM_AFFINITY_ENABLED);
@@ -1726,16 +1723,9 @@
         acpi_add_table(table_offsets, tables_blob);
         build_tpm_tcpa(tables_blob, tables->linker, tables->tcpalog);
 
-        acpi_add_table(table_offsets, tables_blob);
-        switch (misc.tpm_version) {
-        case TPM_VERSION_1_2:
-            build_tpm_ssdt(tables_blob, tables->linker);
-            break;
-        case TPM_VERSION_2_0:
+        if (misc.tpm_version == TPM_VERSION_2_0) {
+            acpi_add_table(table_offsets, tables_blob);
             build_tpm2(tables_blob, tables->linker);
-            break;
-        default:
-            assert(false);
         }
     }
     if (guest_info->numa_nodes) {
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7072930..a66416d 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -64,7 +64,6 @@
 #include "hw/pci/pci_host.h"
 #include "acpi-build.h"
 #include "hw/mem/pc-dimm.h"
-#include "trace.h"
 #include "qapi/visitor.h"
 #include "qapi-visit.h"
 
@@ -1297,7 +1296,7 @@
             exit(EXIT_FAILURE);
         }
 
-        pcms->hotplug_memory_base =
+        pcms->hotplug_memory.base =
             ROUND_UP(0x100000000ULL + above_4g_mem_size, 1ULL << 30);
 
         if (pcms->enforce_aligned_dimm) {
@@ -1305,17 +1304,17 @@
             hotplug_mem_size += (1ULL << 30) * machine->ram_slots;
         }
 
-        if ((pcms->hotplug_memory_base + hotplug_mem_size) <
+        if ((pcms->hotplug_memory.base + hotplug_mem_size) <
             hotplug_mem_size) {
             error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT,
                          machine->maxram_size);
             exit(EXIT_FAILURE);
         }
 
-        memory_region_init(&pcms->hotplug_memory, OBJECT(pcms),
+        memory_region_init(&pcms->hotplug_memory.mr, OBJECT(pcms),
                            "hotplug-memory", hotplug_mem_size);
-        memory_region_add_subregion(system_memory, pcms->hotplug_memory_base,
-                                    &pcms->hotplug_memory);
+        memory_region_add_subregion(system_memory, pcms->hotplug_memory.base,
+                                    &pcms->hotplug_memory.mr);
     }
 
     /* Initialize PC system firmware */
@@ -1333,9 +1332,9 @@
     fw_cfg = bochs_bios_init();
     rom_set_fw(fw_cfg);
 
-    if (guest_info->has_reserved_memory && pcms->hotplug_memory_base) {
+    if (guest_info->has_reserved_memory && pcms->hotplug_memory.base) {
         uint64_t *val = g_malloc(sizeof(*val));
-        *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory_base, 0x1ULL << 30));
+        *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory.base, 0x1ULL << 30));
         fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
     }
 
@@ -1554,88 +1553,31 @@
 static void pc_dimm_plug(HotplugHandler *hotplug_dev,
                          DeviceState *dev, Error **errp)
 {
-    int slot;
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
-    MachineState *machine = MACHINE(hotplug_dev);
     PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MemoryRegion *mr = ddc->get_memory_region(dimm);
-    uint64_t existing_dimms_capacity = 0;
     uint64_t align = TARGET_PAGE_SIZE;
-    uint64_t addr;
-
-    addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
-    if (local_err) {
-        goto out;
-    }
 
     if (memory_region_get_alignment(mr) && pcms->enforce_aligned_dimm) {
         align = memory_region_get_alignment(mr);
     }
 
-    addr = pc_dimm_get_free_addr(pcms->hotplug_memory_base,
-                                 memory_region_size(&pcms->hotplug_memory),
-                                 !addr ? NULL : &addr, align,
-                                 memory_region_size(mr), &local_err);
-    if (local_err) {
-        goto out;
-    }
-
-    existing_dimms_capacity = pc_existing_dimms_capacity(&local_err);
-    if (local_err) {
-        goto out;
-    }
-
-    if (existing_dimms_capacity + memory_region_size(mr) >
-        machine->maxram_size - machine->ram_size) {
-        error_setg(&local_err, "not enough space, currently 0x%" PRIx64
-                   " in use of total hot pluggable 0x" RAM_ADDR_FMT,
-                   existing_dimms_capacity,
-                   machine->maxram_size - machine->ram_size);
-        goto out;
-    }
-
-    object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err);
-    if (local_err) {
-        goto out;
-    }
-    trace_mhp_pc_dimm_assigned_address(addr);
-
-    slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err);
-    if (local_err) {
-        goto out;
-    }
-
-    slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
-                                 machine->ram_slots, &local_err);
-    if (local_err) {
-        goto out;
-    }
-    object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err);
-    if (local_err) {
-        goto out;
-    }
-    trace_mhp_pc_dimm_assigned_slot(slot);
-
     if (!pcms->acpi_dev) {
         error_setg(&local_err,
                    "memory hotplug is not enabled: missing acpi device");
         goto out;
     }
 
-    if (kvm_enabled() && !kvm_has_free_slot(machine)) {
-        error_setg(&local_err, "hypervisor has no free memory slots left");
+    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err);
+    if (local_err) {
         goto out;
     }
 
-    memory_region_add_subregion(&pcms->hotplug_memory,
-                                addr - pcms->hotplug_memory_base, mr);
-    vmstate_register_ram(mr, dev);
-
     hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
-    hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
+    hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort);
 out:
     error_propagate(errp, local_err);
 }
@@ -1677,9 +1619,7 @@
         goto out;
     }
 
-    memory_region_del_subregion(&pcms->hotplug_memory, mr);
-    vmstate_unregister_ram(mr, dev);
-
+    pc_dimm_memory_unplug(dev, &pcms->hotplug_memory, mr);
     object_unparent(OBJECT(dev));
 
  out:
@@ -1766,7 +1706,7 @@
                                           const char *name, Error **errp)
 {
     PCMachineState *pcms = PC_MACHINE(obj);
-    int64_t value = memory_region_size(&pcms->hotplug_memory);
+    int64_t value = memory_region_size(&pcms->hotplug_memory.mr);
 
     visit_type_int(v, &value, name, errp);
 }
diff --git a/hw/i386/ssdt-tpm-common.dsl b/hw/i386/ssdt-tpm-common.dsl
deleted file mode 100644
index 9da4970..0000000
--- a/hw/i386/ssdt-tpm-common.dsl
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
-
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
-
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/*
- * Common parts for TPM 1.2 and TPM 2 (with slight differences for PPI)
- * to be #included
- */
-
-
-    External(\_SB.PCI0.ISA, DeviceObj)
-    Scope(\_SB.PCI0.ISA) {
-        /* TPM with emulated TPM TIS interface */
-        Device (TPM) {
-            Name (_HID, EisaID ("PNP0C31"))
-            Name (_CRS, ResourceTemplate ()
-            {
-                Memory32Fixed (ReadWrite, TPM_TIS_ADDR_BASE, TPM_TIS_ADDR_SIZE)
-                IRQNoFlags () {TPM_TIS_IRQ}
-            })
-            Method (_STA, 0, NotSerialized) {
-                Return (0x0F)
-            }
-        }
-    }
diff --git a/hw/i386/ssdt-tpm.dsl b/hw/i386/ssdt-tpm.dsl
deleted file mode 100644
index d81478c..0000000
--- a/hw/i386/ssdt-tpm.dsl
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
-
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
-
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-#include "hw/acpi/tpm.h"
-
-ACPI_EXTRACT_ALL_CODE ssdt_tpm_aml
-
-DefinitionBlock (
-    "ssdt-tpm.aml",     // Output Filename
-    "SSDT",             // Signature
-    0x01,               // SSDT Compliance Revision
-    "BXPC",             // OEMID
-    "BXSSDT",           // TABLE ID
-    0x1                 // OEM Revision
-    )
-{
-#include "ssdt-tpm-common.dsl"
-}
diff --git a/hw/i386/ssdt-tpm.hex.generated b/hw/i386/ssdt-tpm.hex.generated
deleted file mode 100644
index 874418c..0000000
--- a/hw/i386/ssdt-tpm.hex.generated
+++ /dev/null
@@ -1,109 +0,0 @@
-static unsigned char ssdt_tpm_aml[] = {
-0x53,
-0x53,
-0x44,
-0x54,
-0x6b,
-0x0,
-0x0,
-0x0,
-0x1,
-0x37,
-0x42,
-0x58,
-0x50,
-0x43,
-0x0,
-0x0,
-0x42,
-0x58,
-0x53,
-0x53,
-0x44,
-0x54,
-0x0,
-0x0,
-0x1,
-0x0,
-0x0,
-0x0,
-0x49,
-0x4e,
-0x54,
-0x4c,
-0x7,
-0x11,
-0x14,
-0x20,
-0x10,
-0x46,
-0x4,
-0x5c,
-0x2f,
-0x3,
-0x5f,
-0x53,
-0x42,
-0x5f,
-0x50,
-0x43,
-0x49,
-0x30,
-0x49,
-0x53,
-0x41,
-0x5f,
-0x5b,
-0x82,
-0x33,
-0x54,
-0x50,
-0x4d,
-0x5f,
-0x8,
-0x5f,
-0x48,
-0x49,
-0x44,
-0xc,
-0x41,
-0xd0,
-0xc,
-0x31,
-0x8,
-0x5f,
-0x43,
-0x52,
-0x53,
-0x11,
-0x14,
-0xa,
-0x11,
-0x86,
-0x9,
-0x0,
-0x1,
-0x0,
-0x0,
-0xd4,
-0xfe,
-0x0,
-0x50,
-0x0,
-0x0,
-0x22,
-0x20,
-0x0,
-0x79,
-0x0,
-0x14,
-0x9,
-0x5f,
-0x53,
-0x54,
-0x41,
-0x0,
-0xa4,
-0xa,
-0xf
-};
diff --git a/hw/i386/ssdt-tpm2.dsl b/hw/i386/ssdt-tpm2.dsl
deleted file mode 100644
index 58bbbf8..0000000
--- a/hw/i386/ssdt-tpm2.dsl
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
-
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
-
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-#include "hw/acpi/tpm.h"
-
-ACPI_EXTRACT_ALL_CODE ssdt_tpm2_aml
-
-DefinitionBlock (
-    "ssdt-tpm2.aml",    // Output Filename
-    "SSDT",             // Signature
-    0x01,               // SSDT Compliance Revision
-    "BXPC",             // OEMID
-    "BXSSDT",           // TABLE ID
-    0x1                 // OEM Revision
-    )
-{
-#include "ssdt-tpm-common.dsl"
-}
diff --git a/hw/i386/ssdt-tpm2.hex.generated b/hw/i386/ssdt-tpm2.hex.generated
deleted file mode 100644
index 9ea8271..0000000
--- a/hw/i386/ssdt-tpm2.hex.generated
+++ /dev/null
@@ -1,109 +0,0 @@
-static unsigned char ssdt_tpm2_aml[] = {
-0x53,
-0x53,
-0x44,
-0x54,
-0x6b,
-0x0,
-0x0,
-0x0,
-0x1,
-0x37,
-0x42,
-0x58,
-0x50,
-0x43,
-0x0,
-0x0,
-0x42,
-0x58,
-0x53,
-0x53,
-0x44,
-0x54,
-0x0,
-0x0,
-0x1,
-0x0,
-0x0,
-0x0,
-0x49,
-0x4e,
-0x54,
-0x4c,
-0x7,
-0x11,
-0x14,
-0x20,
-0x10,
-0x46,
-0x4,
-0x5c,
-0x2f,
-0x3,
-0x5f,
-0x53,
-0x42,
-0x5f,
-0x50,
-0x43,
-0x49,
-0x30,
-0x49,
-0x53,
-0x41,
-0x5f,
-0x5b,
-0x82,
-0x33,
-0x54,
-0x50,
-0x4d,
-0x5f,
-0x8,
-0x5f,
-0x48,
-0x49,
-0x44,
-0xc,
-0x41,
-0xd0,
-0xc,
-0x31,
-0x8,
-0x5f,
-0x43,
-0x52,
-0x53,
-0x11,
-0x14,
-0xa,
-0x11,
-0x86,
-0x9,
-0x0,
-0x1,
-0x0,
-0x0,
-0xd4,
-0xfe,
-0x0,
-0x50,
-0x0,
-0x0,
-0x22,
-0x20,
-0x0,
-0x79,
-0x0,
-0x14,
-0x9,
-0x5f,
-0x53,
-0x54,
-0x41,
-0x0,
-0xa4,
-0xa,
-0xf
-};
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index b4b65c1..bb6a92f 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -45,11 +45,11 @@
 } while (0)
 
 static void check_cmd(AHCIState *s, int port);
-static int handle_cmd(AHCIState *s,int port,int slot);
+static int handle_cmd(AHCIState *s, int port, uint8_t slot);
 static void ahci_reset_port(AHCIState *s, int port);
 static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis);
 static void ahci_init_d2h(AHCIDevice *ad);
-static int ahci_dma_prepare_buf(IDEDMA *dma, int is_write);
+static int ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit);
 static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes);
 static bool ahci_map_clb_address(AHCIDevice *ad);
 static bool ahci_map_fis_address(AHCIDevice *ad);
@@ -106,8 +106,6 @@
         val = pr->scr_err;
         break;
     case PORT_SCR_ACT:
-        pr->scr_act &= ~s->dev[port].finished;
-        s->dev[port].finished = 0;
         val = pr->scr_act;
         break;
     case PORT_CMD_ISSUE:
@@ -331,8 +329,7 @@
     }
 }
 
-static uint64_t ahci_mem_read(void *opaque, hwaddr addr,
-                              unsigned size)
+static uint64_t ahci_mem_read_32(void *opaque, hwaddr addr)
 {
     AHCIState *s = opaque;
     uint32_t val = 0;
@@ -368,6 +365,30 @@
 }
 
 
+/**
+ * AHCI 1.3 section 3 ("HBA Memory Registers")
+ * Support unaligned 8/16/32 bit reads, and 64 bit aligned reads.
+ * Caller is responsible for masking unwanted higher order bytes.
+ */
+static uint64_t ahci_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+    hwaddr aligned = addr & ~0x3;
+    int ofst = addr - aligned;
+    uint64_t lo = ahci_mem_read_32(opaque, aligned);
+    uint64_t hi;
+
+    /* if < 8 byte read does not cross 4 byte boundary */
+    if (ofst + size <= 4) {
+        return lo >> (ofst * 8);
+    }
+    g_assert_cmpint(size, >, 1);
+
+    /* If the 64bit read is unaligned, we will produce undefined
+     * results. AHCI does not support unaligned 64bit reads. */
+    hi = ahci_mem_read_32(opaque, aligned + 4);
+    return (hi << 32 | lo) >> (ofst * 8);
+}
+
 
 static void ahci_mem_write(void *opaque, hwaddr addr,
                            uint64_t val, unsigned size)
@@ -483,7 +504,7 @@
 static void check_cmd(AHCIState *s, int port)
 {
     AHCIPortRegs *pr = &s->dev[port].port_regs;
-    int slot;
+    uint8_t slot;
 
     if ((pr->cmd & PORT_CMD_START) && pr->cmd_issue) {
         for (slot = 0; (slot < 32) && pr->cmd_issue; slot++) {
@@ -558,6 +579,7 @@
     /* reset ncq queue */
     for (i = 0; i < AHCI_MAX_CMDS; i++) {
         NCQTransferState *ncq_tfs = &s->dev[port].ncq_tfs[i];
+        ncq_tfs->halt = false;
         if (!ncq_tfs->used) {
             continue;
         }
@@ -642,14 +664,14 @@
     ad->lst = NULL;
 }
 
-static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished)
+static void ahci_write_fis_sdb(AHCIState *s, NCQTransferState *ncq_tfs)
 {
-    AHCIDevice *ad = &s->dev[port];
+    AHCIDevice *ad = ncq_tfs->drive;
     AHCIPortRegs *pr = &ad->port_regs;
     IDEState *ide_state;
     SDBFIS *sdb_fis;
 
-    if (!s->dev[port].res_fis ||
+    if (!ad->res_fis ||
         !(pr->cmd & PORT_CMD_FIS_RX)) {
         return;
     }
@@ -659,53 +681,35 @@
 
     sdb_fis->type = SATA_FIS_TYPE_SDB;
     /* Interrupt pending & Notification bit */
-    sdb_fis->flags = (ad->hba->control_regs.irqstatus ? (1 << 6) : 0);
+    sdb_fis->flags = 0x40; /* Interrupt bit, always 1 for NCQ */
     sdb_fis->status = ide_state->status & 0x77;
     sdb_fis->error = ide_state->error;
     /* update SAct field in SDB_FIS */
-    s->dev[port].finished |= finished;
     sdb_fis->payload = cpu_to_le32(ad->finished);
 
     /* Update shadow registers (except BSY 0x80 and DRQ 0x08) */
     pr->tfdata = (ad->port.ifs[0].error << 8) |
         (ad->port.ifs[0].status & 0x77) |
         (pr->tfdata & 0x88);
+    pr->scr_act &= ~ad->finished;
+    ad->finished = 0;
 
-    ahci_trigger_irq(s, ad, PORT_IRQ_SDB_FIS);
+    /* Trigger IRQ if interrupt bit is set (which currently, it always is) */
+    if (sdb_fis->flags & 0x40) {
+        ahci_trigger_irq(s, ad, PORT_IRQ_SDB_FIS);
+    }
 }
 
 static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len)
 {
     AHCIPortRegs *pr = &ad->port_regs;
-    uint8_t *pio_fis, *cmd_fis;
-    uint64_t tbl_addr;
-    dma_addr_t cmd_len = 0x80;
+    uint8_t *pio_fis;
     IDEState *s = &ad->port.ifs[0];
 
     if (!ad->res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) {
         return;
     }
 
-    /* map cmd_fis */
-    tbl_addr = le64_to_cpu(ad->cur_cmd->tbl_addr);
-    cmd_fis = dma_memory_map(ad->hba->as, tbl_addr, &cmd_len,
-                             DMA_DIRECTION_TO_DEVICE);
-
-    if (cmd_fis == NULL) {
-        DPRINTF(ad->port_no, "dma_memory_map failed in ahci_write_fis_pio");
-        ahci_trigger_irq(ad->hba, ad, PORT_IRQ_HBUS_ERR);
-        return;
-    }
-
-    if (cmd_len != 0x80) {
-        DPRINTF(ad->port_no,
-                "dma_memory_map mapped too few bytes in ahci_write_fis_pio");
-        dma_memory_unmap(ad->hba->as, cmd_fis, cmd_len,
-                         DMA_DIRECTION_TO_DEVICE, cmd_len);
-        ahci_trigger_irq(ad->hba, ad, PORT_IRQ_HBUS_ERR);
-        return;
-    }
-
     pio_fis = &ad->res_fis[RES_FIS_PSFIS];
 
     pio_fis[0] = SATA_FIS_TYPE_PIO_SETUP;
@@ -721,8 +725,8 @@
     pio_fis[9] = s->hob_lcyl;
     pio_fis[10] = s->hob_hcyl;
     pio_fis[11] = 0;
-    pio_fis[12] = cmd_fis[12];
-    pio_fis[13] = cmd_fis[13];
+    pio_fis[12] = s->nsector & 0xFF;
+    pio_fis[13] = (s->nsector >> 8) & 0xFF;
     pio_fis[14] = 0;
     pio_fis[15] = s->status;
     pio_fis[16] = len & 255;
@@ -739,9 +743,6 @@
     }
 
     ahci_trigger_irq(ad->hba, ad, PORT_IRQ_PIOS_FIS);
-
-    dma_memory_unmap(ad->hba->as, cmd_fis, cmd_len,
-                     DMA_DIRECTION_TO_DEVICE, cmd_len);
 }
 
 static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis)
@@ -749,22 +750,12 @@
     AHCIPortRegs *pr = &ad->port_regs;
     uint8_t *d2h_fis;
     int i;
-    dma_addr_t cmd_len = 0x80;
-    int cmd_mapped = 0;
     IDEState *s = &ad->port.ifs[0];
 
     if (!ad->res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) {
         return;
     }
 
-    if (!cmd_fis) {
-        /* map cmd_fis */
-        uint64_t tbl_addr = le64_to_cpu(ad->cur_cmd->tbl_addr);
-        cmd_fis = dma_memory_map(ad->hba->as, tbl_addr, &cmd_len,
-                                 DMA_DIRECTION_TO_DEVICE);
-        cmd_mapped = 1;
-    }
-
     d2h_fis = &ad->res_fis[RES_FIS_RFIS];
 
     d2h_fis[0] = SATA_FIS_TYPE_REGISTER_D2H;
@@ -780,8 +771,8 @@
     d2h_fis[9] = s->hob_lcyl;
     d2h_fis[10] = s->hob_hcyl;
     d2h_fis[11] = 0;
-    d2h_fis[12] = cmd_fis[12];
-    d2h_fis[13] = cmd_fis[13];
+    d2h_fis[12] = s->nsector & 0xFF;
+    d2h_fis[13] = (s->nsector >> 8) & 0xFF;
     for (i = 14; i < 20; i++) {
         d2h_fis[i] = 0;
     }
@@ -795,26 +786,22 @@
     }
 
     ahci_trigger_irq(ad->hba, ad, PORT_IRQ_D2H_REG_FIS);
-
-    if (cmd_mapped) {
-        dma_memory_unmap(ad->hba->as, cmd_fis, cmd_len,
-                         DMA_DIRECTION_TO_DEVICE, cmd_len);
-    }
 }
 
 static int prdt_tbl_entry_size(const AHCI_SG *tbl)
 {
+    /* flags_size is zero-based */
     return (le32_to_cpu(tbl->flags_size) & AHCI_PRDT_SIZE_MASK) + 1;
 }
 
 static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist,
-                                int32_t offset)
+                                AHCICmdHdr *cmd, int64_t limit, int32_t offset)
 {
-    AHCICmdHdr *cmd = ad->cur_cmd;
-    uint32_t opts = le32_to_cpu(cmd->opts);
-    uint64_t prdt_addr = le64_to_cpu(cmd->tbl_addr) + 0x80;
-    int sglist_alloc_hint = opts >> AHCI_CMD_HDR_PRDT_LEN;
-    dma_addr_t prdt_len = (sglist_alloc_hint * sizeof(AHCI_SG));
+    uint16_t opts = le16_to_cpu(cmd->opts);
+    uint16_t prdtl = le16_to_cpu(cmd->prdtl);
+    uint64_t cfis_addr = le64_to_cpu(cmd->tbl_addr);
+    uint64_t prdt_addr = cfis_addr + 0x80;
+    dma_addr_t prdt_len = (prdtl * sizeof(AHCI_SG));
     dma_addr_t real_prdt_len = prdt_len;
     uint8_t *prdt;
     int i;
@@ -834,7 +821,7 @@
      * request for sector sizes up to 32K.
      */
 
-    if (!sglist_alloc_hint) {
+    if (!prdtl) {
         DPRINTF(ad->port_no, "no sg list given by guest: 0x%08x\n", opts);
         return -1;
     }
@@ -853,13 +840,12 @@
     }
 
     /* Get entries in the PRDT, init a qemu sglist accordingly */
-    if (sglist_alloc_hint > 0) {
+    if (prdtl > 0) {
         AHCI_SG *tbl = (AHCI_SG *)prdt;
         sum = 0;
-        for (i = 0; i < sglist_alloc_hint; i++) {
-            /* flags_size is zero-based */
+        for (i = 0; i < prdtl; i++) {
             tbl_entry_size = prdt_tbl_entry_size(&tbl[i]);
-            if (offset <= (sum + tbl_entry_size)) {
+            if (offset < (sum + tbl_entry_size)) {
                 off_idx = i;
                 off_pos = offset - sum;
                 break;
@@ -874,15 +860,16 @@
             goto out;
         }
 
-        qemu_sglist_init(sglist, qbus->parent, (sglist_alloc_hint - off_idx),
+        qemu_sglist_init(sglist, qbus->parent, (prdtl - off_idx),
                          ad->hba->as);
         qemu_sglist_add(sglist, le64_to_cpu(tbl[off_idx].addr) + off_pos,
-                        prdt_tbl_entry_size(&tbl[off_idx]) - off_pos);
+                        MIN(prdt_tbl_entry_size(&tbl[off_idx]) - off_pos,
+                            limit));
 
-        for (i = off_idx + 1; i < sglist_alloc_hint; i++) {
-            /* flags_size is zero-based */
+        for (i = off_idx + 1; i < prdtl && sglist->size < limit; i++) {
             qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
-                            prdt_tbl_entry_size(&tbl[i]));
+                            MIN(prdt_tbl_entry_size(&tbl[i]),
+                                limit - sglist->size));
             if (sglist->size > INT32_MAX) {
                 error_report("AHCI Physical Region Descriptor Table describes "
                              "more than 2 GiB.\n");
@@ -899,28 +886,25 @@
     return r;
 }
 
-static void ncq_cb(void *opaque, int ret)
+static void ncq_err(NCQTransferState *ncq_tfs)
 {
-    NCQTransferState *ncq_tfs = (NCQTransferState *)opaque;
     IDEState *ide_state = &ncq_tfs->drive->port.ifs[0];
 
-    if (ret == -ECANCELED) {
-        return;
-    }
-    /* Clear bit for this tag in SActive */
-    ncq_tfs->drive->port_regs.scr_act &= ~(1 << ncq_tfs->tag);
+    ide_state->error = ABRT_ERR;
+    ide_state->status = READY_STAT | ERR_STAT;
+    ncq_tfs->drive->port_regs.scr_err |= (1 << ncq_tfs->tag);
+}
 
-    if (ret < 0) {
-        /* error */
-        ide_state->error = ABRT_ERR;
-        ide_state->status = READY_STAT | ERR_STAT;
-        ncq_tfs->drive->port_regs.scr_err |= (1 << ncq_tfs->tag);
-    } else {
-        ide_state->status = READY_STAT | SEEK_STAT;
+static void ncq_finish(NCQTransferState *ncq_tfs)
+{
+    /* If we didn't error out, set our finished bit. Errored commands
+     * do not get a bit set for the SDB FIS ACT register, nor do they
+     * clear the outstanding bit in scr_act (PxSACT). */
+    if (!(ncq_tfs->drive->port_regs.scr_err & (1 << ncq_tfs->tag))) {
+        ncq_tfs->drive->finished |= (1 << ncq_tfs->tag);
     }
 
-    ahci_write_fis_sdb(ncq_tfs->drive->hba, ncq_tfs->drive->port_no,
-                       (1 << ncq_tfs->tag));
+    ahci_write_fis_sdb(ncq_tfs->drive->hba, ncq_tfs);
 
     DPRINTF(ncq_tfs->drive->port_no, "NCQ transfer tag %d finished\n",
             ncq_tfs->tag);
@@ -931,6 +915,35 @@
     ncq_tfs->used = 0;
 }
 
+static void ncq_cb(void *opaque, int ret)
+{
+    NCQTransferState *ncq_tfs = (NCQTransferState *)opaque;
+    IDEState *ide_state = &ncq_tfs->drive->port.ifs[0];
+
+    if (ret == -ECANCELED) {
+        return;
+    }
+
+    if (ret < 0) {
+        bool is_read = ncq_tfs->cmd == READ_FPDMA_QUEUED;
+        BlockErrorAction action = blk_get_error_action(ide_state->blk,
+                                                       is_read, -ret);
+        if (action == BLOCK_ERROR_ACTION_STOP) {
+            ncq_tfs->halt = true;
+            ide_state->bus->error_status = IDE_RETRY_HBA;
+        } else if (action == BLOCK_ERROR_ACTION_REPORT) {
+            ncq_err(ncq_tfs);
+        }
+        blk_error_action(ide_state->blk, action, is_read, -ret);
+    } else {
+        ide_state->status = READY_STAT | SEEK_STAT;
+    }
+
+    if (!ncq_tfs->halt) {
+        ncq_finish(ncq_tfs);
+    }
+}
+
 static int is_ncq(uint8_t ata_cmd)
 {
     /* Based on SATA 3.2 section 13.6.3.2 */
@@ -946,13 +959,60 @@
     }
 }
 
-static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
-                                int slot)
+static void execute_ncq_command(NCQTransferState *ncq_tfs)
 {
+    AHCIDevice *ad = ncq_tfs->drive;
+    IDEState *ide_state = &ad->port.ifs[0];
+    int port = ad->port_no;
+
+    g_assert(is_ncq(ncq_tfs->cmd));
+    ncq_tfs->halt = false;
+
+    switch (ncq_tfs->cmd) {
+    case READ_FPDMA_QUEUED:
+        DPRINTF(port, "NCQ reading %d sectors from LBA %"PRId64", tag %d\n",
+                ncq_tfs->sector_count, ncq_tfs->lba, ncq_tfs->tag);
+
+        DPRINTF(port, "tag %d aio read %"PRId64"\n",
+                ncq_tfs->tag, ncq_tfs->lba);
+
+        dma_acct_start(ide_state->blk, &ncq_tfs->acct,
+                       &ncq_tfs->sglist, BLOCK_ACCT_READ);
+        ncq_tfs->aiocb = dma_blk_read(ide_state->blk, &ncq_tfs->sglist,
+                                      ncq_tfs->lba, ncq_cb, ncq_tfs);
+        break;
+    case WRITE_FPDMA_QUEUED:
+        DPRINTF(port, "NCQ writing %d sectors to LBA %"PRId64", tag %d\n",
+                ncq_tfs->sector_count, ncq_tfs->lba, ncq_tfs->tag);
+
+        DPRINTF(port, "tag %d aio write %"PRId64"\n",
+                ncq_tfs->tag, ncq_tfs->lba);
+
+        dma_acct_start(ide_state->blk, &ncq_tfs->acct,
+                       &ncq_tfs->sglist, BLOCK_ACCT_WRITE);
+        ncq_tfs->aiocb = dma_blk_write(ide_state->blk, &ncq_tfs->sglist,
+                                       ncq_tfs->lba, ncq_cb, ncq_tfs);
+        break;
+    default:
+        DPRINTF(port, "error: unsupported NCQ command (0x%02x) received\n",
+                ncq_tfs->cmd);
+        qemu_sglist_destroy(&ncq_tfs->sglist);
+        ncq_err(ncq_tfs);
+    }
+}
+
+
+static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
+                                uint8_t slot)
+{
+    AHCIDevice *ad = &s->dev[port];
+    IDEState *ide_state = &ad->port.ifs[0];
     NCQFrame *ncq_fis = (NCQFrame*)cmd_fis;
     uint8_t tag = ncq_fis->tag >> 3;
-    NCQTransferState *ncq_tfs = &s->dev[port].ncq_tfs[tag];
+    NCQTransferState *ncq_tfs = &ad->ncq_tfs[tag];
+    size_t size;
 
+    g_assert(is_ncq(ncq_fis->command));
     if (ncq_tfs->used) {
         /* error - already in use */
         fprintf(stderr, "%s: tag %d already used\n", __FUNCTION__, tag);
@@ -960,75 +1020,82 @@
     }
 
     ncq_tfs->used = 1;
-    ncq_tfs->drive = &s->dev[port];
+    ncq_tfs->drive = ad;
     ncq_tfs->slot = slot;
+    ncq_tfs->cmdh = &((AHCICmdHdr *)ad->lst)[slot];
+    ncq_tfs->cmd = ncq_fis->command;
     ncq_tfs->lba = ((uint64_t)ncq_fis->lba5 << 40) |
                    ((uint64_t)ncq_fis->lba4 << 32) |
                    ((uint64_t)ncq_fis->lba3 << 24) |
                    ((uint64_t)ncq_fis->lba2 << 16) |
                    ((uint64_t)ncq_fis->lba1 << 8) |
                    (uint64_t)ncq_fis->lba0;
+    ncq_tfs->tag = tag;
 
-    /* Note: We calculate the sector count, but don't currently rely on it.
-     * The total size of the DMA buffer tells us the transfer size instead. */
-    ncq_tfs->sector_count = ((uint16_t)ncq_fis->sector_count_high << 8) |
-                                ncq_fis->sector_count_low;
+    /* Sanity-check the NCQ packet */
+    if (tag != slot) {
+        DPRINTF(port, "Warn: NCQ slot (%d) did not match the given tag (%d)\n",
+                slot, tag);
+    }
+
+    if (ncq_fis->aux0 || ncq_fis->aux1 || ncq_fis->aux2 || ncq_fis->aux3) {
+        DPRINTF(port, "Warn: Attempt to use NCQ auxiliary fields.\n");
+    }
+    if (ncq_fis->prio || ncq_fis->icc) {
+        DPRINTF(port, "Warn: Unsupported attempt to use PRIO/ICC fields\n");
+    }
+    if (ncq_fis->fua & NCQ_FIS_FUA_MASK) {
+        DPRINTF(port, "Warn: Unsupported attempt to use Force Unit Access\n");
+    }
+    if (ncq_fis->tag & NCQ_FIS_RARC_MASK) {
+        DPRINTF(port, "Warn: Unsupported attempt to use Rebuild Assist\n");
+    }
+
+    ncq_tfs->sector_count = ((ncq_fis->sector_count_high << 8) |
+                             ncq_fis->sector_count_low);
+    if (!ncq_tfs->sector_count) {
+        ncq_tfs->sector_count = 0x10000;
+    }
+    size = ncq_tfs->sector_count * 512;
+    ahci_populate_sglist(ad, &ncq_tfs->sglist, ncq_tfs->cmdh, size, 0);
+
+    if (ncq_tfs->sglist.size < size) {
+        error_report("ahci: PRDT length for NCQ command (0x%zx) "
+                     "is smaller than the requested size (0x%zx)",
+                     ncq_tfs->sglist.size, size);
+        qemu_sglist_destroy(&ncq_tfs->sglist);
+        ncq_err(ncq_tfs);
+        ahci_trigger_irq(ad->hba, ad, PORT_IRQ_OVERFLOW);
+        return;
+    } else if (ncq_tfs->sglist.size != size) {
+        DPRINTF(port, "Warn: PRDTL (0x%zx)"
+                " does not match requested size (0x%zx)",
+                ncq_tfs->sglist.size, size);
+    }
 
     DPRINTF(port, "NCQ transfer LBA from %"PRId64" to %"PRId64", "
             "drive max %"PRId64"\n",
-            ncq_tfs->lba, ncq_tfs->lba + ncq_tfs->sector_count - 2,
-            s->dev[port].port.ifs[0].nb_sectors - 1);
+            ncq_tfs->lba, ncq_tfs->lba + ncq_tfs->sector_count - 1,
+            ide_state->nb_sectors - 1);
 
-    ahci_populate_sglist(&s->dev[port], &ncq_tfs->sglist, 0);
-    ncq_tfs->tag = tag;
+    execute_ncq_command(ncq_tfs);
+}
 
-    switch(ncq_fis->command) {
-        case READ_FPDMA_QUEUED:
-            DPRINTF(port, "NCQ reading %d sectors from LBA %"PRId64", "
-                    "tag %d\n",
-                    ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag);
-
-            DPRINTF(port, "tag %d aio read %"PRId64"\n",
-                    ncq_tfs->tag, ncq_tfs->lba);
-
-            dma_acct_start(ncq_tfs->drive->port.ifs[0].blk, &ncq_tfs->acct,
-                           &ncq_tfs->sglist, BLOCK_ACCT_READ);
-            ncq_tfs->aiocb = dma_blk_read(ncq_tfs->drive->port.ifs[0].blk,
-                                          &ncq_tfs->sglist, ncq_tfs->lba,
-                                          ncq_cb, ncq_tfs);
-            break;
-        case WRITE_FPDMA_QUEUED:
-            DPRINTF(port, "NCQ writing %d sectors to LBA %"PRId64", tag %d\n",
-                    ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag);
-
-            DPRINTF(port, "tag %d aio write %"PRId64"\n",
-                    ncq_tfs->tag, ncq_tfs->lba);
-
-            dma_acct_start(ncq_tfs->drive->port.ifs[0].blk, &ncq_tfs->acct,
-                           &ncq_tfs->sglist, BLOCK_ACCT_WRITE);
-            ncq_tfs->aiocb = dma_blk_write(ncq_tfs->drive->port.ifs[0].blk,
-                                           &ncq_tfs->sglist, ncq_tfs->lba,
-                                           ncq_cb, ncq_tfs);
-            break;
-        default:
-            if (is_ncq(cmd_fis[2])) {
-                DPRINTF(port,
-                        "error: unsupported NCQ command (0x%02x) received\n",
-                        cmd_fis[2]);
-            } else {
-                DPRINTF(port,
-                        "error: tried to process non-NCQ command as NCQ\n");
-            }
-            qemu_sglist_destroy(&ncq_tfs->sglist);
+static AHCICmdHdr *get_cmd_header(AHCIState *s, uint8_t port, uint8_t slot)
+{
+    if (port >= s->ports || slot >= AHCI_MAX_CMDS) {
+        return NULL;
     }
+
+    return s->dev[port].lst ? &((AHCICmdHdr *)s->dev[port].lst)[slot] : NULL;
 }
 
 static void handle_reg_h2d_fis(AHCIState *s, int port,
-                               int slot, uint8_t *cmd_fis)
+                               uint8_t slot, uint8_t *cmd_fis)
 {
     IDEState *ide_state = &s->dev[port].port.ifs[0];
-    AHCICmdHdr *cmd = s->dev[port].cur_cmd;
-    uint32_t opts = le32_to_cpu(cmd->opts);
+    AHCICmdHdr *cmd = get_cmd_header(s, port, slot);
+    uint16_t opts = le16_to_cpu(cmd->opts);
 
     if (cmd_fis[1] & 0x0F) {
         DPRINTF(port, "Port Multiplier not supported."
@@ -1108,7 +1175,7 @@
     ide_exec_cmd(&s->dev[port].port, cmd_fis[2]);
 }
 
-static int handle_cmd(AHCIState *s, int port, int slot)
+static int handle_cmd(AHCIState *s, int port, uint8_t slot)
 {
     IDEState *ide_state;
     uint64_t tbl_addr;
@@ -1126,7 +1193,7 @@
         DPRINTF(port, "error: lst not given but cmd handled");
         return -1;
     }
-    cmd = &((AHCICmdHdr *)s->dev[port].lst)[slot];
+    cmd = get_cmd_header(s, port, slot);
     /* remember current slot handle for later */
     s->dev[port].cur_cmd = cmd;
 
@@ -1185,7 +1252,7 @@
     IDEState *s = &ad->port.ifs[0];
     uint32_t size = (uint32_t)(s->data_end - s->data_ptr);
     /* write == ram -> device */
-    uint32_t opts = le32_to_cpu(ad->cur_cmd->opts);
+    uint16_t opts = le16_to_cpu(ad->cur_cmd->opts);
     int is_write = opts & AHCI_CMD_WRITE;
     int is_atapi = opts & AHCI_CMD_ATAPI;
     int has_sglist = 0;
@@ -1197,7 +1264,7 @@
         goto out;
     }
 
-    if (ahci_dma_prepare_buf(dma, is_write)) {
+    if (ahci_dma_prepare_buf(dma, size)) {
         has_sglist = 1;
     }
 
@@ -1243,16 +1310,34 @@
 }
 
 /**
+ * IDE/PIO restarts are handled by the core layer, but NCQ commands
+ * need an extra kick from the AHCI HBA.
+ */
+static void ahci_restart(IDEDMA *dma)
+{
+    AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
+    int i;
+
+    for (i = 0; i < AHCI_MAX_CMDS; i++) {
+        NCQTransferState *ncq_tfs = &ad->ncq_tfs[i];
+        if (ncq_tfs->halt) {
+            execute_ncq_command(ncq_tfs);
+        }
+    }
+}
+
+/**
  * Called in DMA R/W chains to read the PRDT, utilizing ahci_populate_sglist.
  * Not currently invoked by PIO R/W chains,
  * which invoke ahci_populate_sglist via ahci_start_transfer.
  */
-static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int is_write)
+static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit)
 {
     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
     IDEState *s = &ad->port.ifs[0];
 
-    if (ahci_populate_sglist(ad, &s->sg, s->io_buffer_offset) == -1) {
+    if (ahci_populate_sglist(ad, &s->sg, ad->cur_cmd,
+                             limit, s->io_buffer_offset) == -1) {
         DPRINTF(ad->port_no, "ahci_dma_prepare_buf failed.\n");
         return -1;
     }
@@ -1287,7 +1372,7 @@
     uint8_t *p = s->io_buffer + s->io_buffer_index;
     int l = s->io_buffer_size - s->io_buffer_index;
 
-    if (ahci_populate_sglist(ad, &s->sg, s->io_buffer_offset)) {
+    if (ahci_populate_sglist(ad, &s->sg, ad->cur_cmd, l, s->io_buffer_offset)) {
         return 0;
     }
 
@@ -1330,6 +1415,7 @@
 
 static const IDEDMAOps ahci_dma_ops = {
     .start_dma = ahci_start_dma,
+    .restart = ahci_restart,
     .restart_dma = ahci_restart_dma,
     .start_transfer = ahci_start_transfer,
     .prepare_buf = ahci_dma_prepare_buf,
@@ -1400,6 +1486,21 @@
     }
 }
 
+static const VMStateDescription vmstate_ncq_tfs = {
+    .name = "ncq state",
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(sector_count, NCQTransferState),
+        VMSTATE_UINT64(lba, NCQTransferState),
+        VMSTATE_UINT8(tag, NCQTransferState),
+        VMSTATE_UINT8(cmd, NCQTransferState),
+        VMSTATE_UINT8(slot, NCQTransferState),
+        VMSTATE_BOOL(used, NCQTransferState),
+        VMSTATE_BOOL(halt, NCQTransferState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_ahci_device = {
     .name = "ahci port",
     .version_id = 1,
@@ -1425,14 +1526,17 @@
         VMSTATE_BOOL(done_atapi_packet, AHCIDevice),
         VMSTATE_INT32(busy_slot, AHCIDevice),
         VMSTATE_BOOL(init_d2h_sent, AHCIDevice),
+        VMSTATE_STRUCT_ARRAY(ncq_tfs, AHCIDevice, AHCI_MAX_CMDS,
+                             1, vmstate_ncq_tfs, NCQTransferState),
         VMSTATE_END_OF_LIST()
     },
 };
 
 static int ahci_state_post_load(void *opaque, int version_id)
 {
-    int i;
+    int i, j;
     struct AHCIDevice *ad;
+    NCQTransferState *ncq_tfs;
     AHCIState *s = opaque;
 
     for (i = 0; i < s->ports; i++) {
@@ -1444,6 +1548,37 @@
             return -1;
         }
 
+        for (j = 0; j < AHCI_MAX_CMDS; j++) {
+            ncq_tfs = &ad->ncq_tfs[j];
+            ncq_tfs->drive = ad;
+
+            if (ncq_tfs->used != ncq_tfs->halt) {
+                return -1;
+            }
+            if (!ncq_tfs->halt) {
+                continue;
+            }
+            if (!is_ncq(ncq_tfs->cmd)) {
+                return -1;
+            }
+            if (ncq_tfs->slot != ncq_tfs->tag) {
+                return -1;
+            }
+            /* If ncq_tfs->halt is justly set, the engine should be engaged,
+             * and the command list buffer should be mapped. */
+            ncq_tfs->cmdh = get_cmd_header(s, i, ncq_tfs->slot);
+            if (!ncq_tfs->cmdh) {
+                return -1;
+            }
+            ahci_populate_sglist(ncq_tfs->drive, &ncq_tfs->sglist,
+                                 ncq_tfs->cmdh, ncq_tfs->sector_count * 512,
+                                 0);
+            if (ncq_tfs->sector_count != ncq_tfs->sglist.size >> 9) {
+                return -1;
+            }
+        }
+
+
         /*
          * If an error is present, ad->busy_slot will be valid and not -1.
          * In this case, an operation is waiting to resume and will re-check
@@ -1460,7 +1595,7 @@
             if (ad->busy_slot < 0 || ad->busy_slot >= AHCI_MAX_CMDS) {
                 return -1;
             }
-            ad->cur_cmd = &((AHCICmdHdr *)ad->lst)[ad->busy_slot];
+            ad->cur_cmd = get_cmd_header(s, i, ad->busy_slot);
         }
     }
 
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index 501c002..9f5b4d2 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -195,6 +195,9 @@
 #define RECEIVE_FPDMA_QUEUED               0x65
 #define SEND_FPDMA_QUEUED                  0x64
 
+#define NCQ_FIS_FUA_MASK                   0x80
+#define NCQ_FIS_RARC_MASK                  0x01
+
 #define RES_FIS_DSFIS                      0x00
 #define RES_FIS_PSFIS                      0x20
 #define RES_FIS_RFIS                       0x40
@@ -233,7 +236,8 @@
 } AHCIPortRegs;
 
 typedef struct AHCICmdHdr {
-    uint32_t    opts;
+    uint16_t    opts;
+    uint16_t    prdtl;
     uint32_t    status;
     uint64_t    tbl_addr;
     uint32_t    reserved[4];
@@ -250,13 +254,16 @@
 typedef struct NCQTransferState {
     AHCIDevice *drive;
     BlockAIOCB *aiocb;
+    AHCICmdHdr *cmdh;
     QEMUSGList sglist;
     BlockAcctCookie acct;
-    uint16_t sector_count;
+    uint32_t sector_count;
     uint64_t lba;
     uint8_t tag;
-    int slot;
-    int used;
+    uint8_t cmd;
+    uint8_t slot;
+    bool used;
+    bool halt;
 } NCQTransferState;
 
 struct AHCIDevice {
@@ -312,27 +319,39 @@
     .offset     = vmstate_offset_value(_state, _field, AHCIState),   \
 }
 
+/**
+ * NCQFrame is the same as a Register H2D FIS (described in SATA 3.2),
+ * but some fields have been re-mapped and re-purposed, as seen in
+ * SATA 3.2 section 13.6.4.1 ("READ FPDMA QUEUED")
+ *
+ * cmd_fis[3], feature 7:0, becomes sector count 7:0.
+ * cmd_fis[7], device 7:0, uses bit 7 as the Force Unit Access bit.
+ * cmd_fis[11], feature 15:8, becomes sector count 15:8.
+ * cmd_fis[12], count 7:0, becomes the NCQ TAG (7:3) and RARC bit (0)
+ * cmd_fis[13], count 15:8, becomes the priority value (7:6)
+ * bytes 16-19 become an le32 "auxiliary" field.
+ */
 typedef struct NCQFrame {
     uint8_t fis_type;
     uint8_t c;
     uint8_t command;
-    uint8_t sector_count_low;
+    uint8_t sector_count_low;  /* (feature 7:0) */
     uint8_t lba0;
     uint8_t lba1;
     uint8_t lba2;
-    uint8_t fua;
+    uint8_t fua;               /* (device 7:0) */
     uint8_t lba3;
     uint8_t lba4;
     uint8_t lba5;
-    uint8_t sector_count_high;
-    uint8_t tag;
-    uint8_t reserved5;
-    uint8_t reserved6;
+    uint8_t sector_count_high; /* (feature 15:8) */
+    uint8_t tag;               /* (count 0:7) */
+    uint8_t prio;              /* (count 15:8) */
+    uint8_t icc;
     uint8_t control;
-    uint8_t reserved7;
-    uint8_t reserved8;
-    uint8_t reserved9;
-    uint8_t reserved10;
+    uint8_t aux0;
+    uint8_t aux1;
+    uint8_t aux2;
+    uint8_t aux3;
 } QEMU_PACKED NCQFrame;
 
 typedef struct SDBFIS {
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 1efd98a..122e955 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -716,8 +716,8 @@
 
     sector_num = ide_get_sector(s);
     if (n > 0) {
-        assert(s->io_buffer_size == s->sg.size);
-        dma_buf_commit(s, s->io_buffer_size);
+        assert(n * 512 == s->sg.size);
+        dma_buf_commit(s, s->sg.size);
         sector_num += n;
         ide_set_sector(s, sector_num);
         s->nsector -= n;
@@ -734,7 +734,7 @@
     n = s->nsector;
     s->io_buffer_index = 0;
     s->io_buffer_size = n * 512;
-    if (s->bus->dma->ops->prepare_buf(s->bus->dma, ide_cmd_is_read(s)) < 512) {
+    if (s->bus->dma->ops->prepare_buf(s->bus->dma, s->io_buffer_size) < 512) {
         /* The PRDs were too short. Reset the Active bit, but don't raise an
          * interrupt. */
         s->status = READY_STAT | SEEK_STAT;
@@ -2326,7 +2326,7 @@
 {
 }
 
-static int32_t ide_nop_int32(IDEDMA *dma, int x)
+static int32_t ide_nop_int32(IDEDMA *dma, int32_t l)
 {
     return 0;
 }
@@ -2371,6 +2371,13 @@
      * called function can set a new error status. */
     bus->error_status = 0;
 
+    /* The HBA has generically asked to be kicked on retry */
+    if (error_status & IDE_RETRY_HBA) {
+        if (s->bus->dma->ops->restart) {
+            s->bus->dma->ops->restart(s->bus->dma);
+        }
+    }
+
     if (error_status & IDE_RETRY_DMA) {
         if (error_status & IDE_RETRY_TRIM) {
             ide_restart_dma(s, IDE_DMA_TRIM);
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index 965cc55..30fdcbc 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -324,7 +324,7 @@
 typedef void DMAStartFunc(IDEDMA *, IDEState *, BlockCompletionFunc *);
 typedef void DMAVoidFunc(IDEDMA *);
 typedef int DMAIntFunc(IDEDMA *, int);
-typedef int32_t DMAInt32Func(IDEDMA *, int);
+typedef int32_t DMAInt32Func(IDEDMA *, int32_t len);
 typedef void DMAu32Func(IDEDMA *, uint32_t);
 typedef void DMAStopFunc(IDEDMA *, bool);
 typedef void DMARestartFunc(void *, int, RunState);
@@ -436,6 +436,7 @@
     DMAInt32Func *prepare_buf;
     DMAu32Func *commit_buf;
     DMAIntFunc *rw_buf;
+    DMAVoidFunc *restart;
     DMAVoidFunc *restart_dma;
     DMAStopFunc *set_inactive;
     DMAVoidFunc *cmd_done;
@@ -499,6 +500,7 @@
 #define IDE_RETRY_READ  0x20
 #define IDE_RETRY_FLUSH 0x40
 #define IDE_RETRY_TRIM 0x80
+#define IDE_RETRY_HBA  0x100
 
 static inline IDEState *idebus_active_if(IDEBus *bus)
 {
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index dd52d50..a55a479 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -499,7 +499,7 @@
     return 0;
 }
 
-static int32_t ide_nop_int32(IDEDMA *dma, int x)
+static int32_t ide_nop_int32(IDEDMA *dma, int32_t l)
 {
     return 0;
 }
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index 4afd0cf..d31ff88 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -53,10 +53,14 @@
 }
 
 /**
- * Return the number of bytes successfully prepared.
- * -1 on error.
+ * Prepare an sglist based on available PRDs.
+ * @limit: How many bytes to prepare total.
+ *
+ * Returns the number of bytes prepared, -1 on error.
+ * IDEState.io_buffer_size will contain the number of bytes described
+ * by the PRDs, whether or not we added them to the sglist.
  */
-static int32_t bmdma_prepare_buf(IDEDMA *dma, int is_write)
+static int32_t bmdma_prepare_buf(IDEDMA *dma, int32_t limit)
 {
     BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
     IDEState *s = bmdma_active_if(bm);
@@ -75,7 +79,7 @@
             /* end of table (with a fail safe of one page) */
             if (bm->cur_prd_last ||
                 (bm->cur_addr - bm->addr) >= BMDMA_PAGE_SIZE) {
-                return s->io_buffer_size;
+                return s->sg.size;
             }
             pci_dma_read(pci_dev, bm->cur_addr, &prd, 8);
             bm->cur_addr += 8;
@@ -90,7 +94,14 @@
         }
         l = bm->cur_prd_len;
         if (l > 0) {
-            qemu_sglist_add(&s->sg, bm->cur_prd_addr, l);
+            uint64_t sg_len;
+
+            /* Don't add extra bytes to the SGList; consume any remaining
+             * PRDs from the guest, but ignore them. */
+            sg_len = MIN(limit - s->sg.size, bm->cur_prd_len);
+            if (sg_len) {
+                qemu_sglist_add(&s->sg, bm->cur_prd_addr, sg_len);
+            }
 
             /* Note: We limit the max transfer to be 2GiB.
              * This should accommodate the largest ATA transaction
diff --git a/hw/input/virtio-input-hid.c b/hw/input/virtio-input-hid.c
index 32cc94a..616a815 100644
--- a/hw/input/virtio-input-hid.c
+++ b/hw/input/virtio-input-hid.c
@@ -252,7 +252,11 @@
 static void virtio_input_hid_realize(DeviceState *dev, Error **errp)
 {
     VirtIOInputHID *vhid = VIRTIO_INPUT_HID(dev);
+
     vhid->hs = qemu_input_handler_register(dev, vhid->handler);
+    if (vhid->display && vhid->hs) {
+        qemu_input_handler_bind(vhid->hs, vhid->display, vhid->head, NULL);
+    }
 }
 
 static void virtio_input_hid_unrealize(DeviceState *dev, Error **errp)
@@ -301,10 +305,17 @@
     }
 }
 
+static Property virtio_input_hid_properties[] = {
+    DEFINE_PROP_STRING("display", VirtIOInputHID, display),
+    DEFINE_PROP_UINT32("head", VirtIOInputHID, head, 0),
+};
+
 static void virtio_input_hid_class_init(ObjectClass *klass, void *data)
 {
+    DeviceClass *dc = DEVICE_CLASS(klass);
     VirtIOInputClass *vic = VIRTIO_INPUT_CLASS(klass);
 
+    dc->props          = virtio_input_hid_properties;
     vic->realize       = virtio_input_hid_realize;
     vic->unrealize     = virtio_input_hid_unrealize;
     vic->change_active = virtio_input_hid_change_active;
diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
index 044ad66..a64d071 100644
--- a/hw/intc/arm_gic_common.c
+++ b/hw/intc/arm_gic_common.c
@@ -123,7 +123,7 @@
 static void arm_gic_common_reset(DeviceState *dev)
 {
     GICState *s = ARM_GIC_COMMON(dev);
-    int i;
+    int i, j;
     memset(s->irq_state, 0, GIC_MAXIRQ * sizeof(gic_irq_state));
     for (i = 0 ; i < s->num_cpu; i++) {
         if (s->revision == REV_11MPCORE) {
@@ -135,15 +135,30 @@
         s->running_irq[i] = 1023;
         s->running_priority[i] = 0x100;
         s->cpu_ctlr[i] = 0;
+        s->bpr[i] = GIC_MIN_BPR;
+        s->abpr[i] = GIC_MIN_ABPR;
+        for (j = 0; j < GIC_INTERNAL; j++) {
+            s->priority1[j][i] = 0;
+        }
+        for (j = 0; j < GIC_NR_SGIS; j++) {
+            s->sgi_pending[j][i] = 0;
+        }
     }
     for (i = 0; i < GIC_NR_SGIS; i++) {
         GIC_SET_ENABLED(i, ALL_CPU_MASK);
         GIC_SET_EDGE_TRIGGER(i);
     }
-    if (s->num_cpu == 1) {
+
+    for (i = 0; i < ARRAY_SIZE(s->priority2); i++) {
+        s->priority2[i] = 0;
+    }
+
+    for (i = 0; i < GIC_MAXIRQ; i++) {
         /* For uniprocessor GICs all interrupts always target the sole CPU */
-        for (i = 0; i < GIC_MAXIRQ; i++) {
+        if (s->num_cpu == 1) {
             s->irq_target[i] = 1;
+        } else {
+            s->irq_target[i] = 0;
         }
     }
     s->ctlr = 0;
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index e70633d..bb04862 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -23,12 +23,96 @@
 #include "qapi/visitor.h"
 #include "qemu/range.h"
 #include "sysemu/numa.h"
+#include "sysemu/kvm.h"
+#include "trace.h"
 
 typedef struct pc_dimms_capacity {
      uint64_t size;
      Error    **errp;
 } pc_dimms_capacity;
 
+void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
+                         MemoryRegion *mr, uint64_t align, Error **errp)
+{
+    int slot;
+    MachineState *machine = MACHINE(qdev_get_machine());
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    Error *local_err = NULL;
+    uint64_t existing_dimms_capacity = 0;
+    uint64_t addr;
+
+    addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    addr = pc_dimm_get_free_addr(hpms->base,
+                                 memory_region_size(&hpms->mr),
+                                 !addr ? NULL : &addr, align,
+                                 memory_region_size(mr), &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    existing_dimms_capacity = pc_existing_dimms_capacity(&local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    if (existing_dimms_capacity + memory_region_size(mr) >
+        machine->maxram_size - machine->ram_size) {
+        error_setg(&local_err, "not enough space, currently 0x%" PRIx64
+                   " in use of total hot pluggable 0x" RAM_ADDR_FMT,
+                   existing_dimms_capacity,
+                   machine->maxram_size - machine->ram_size);
+        goto out;
+    }
+
+    object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    trace_mhp_pc_dimm_assigned_address(addr);
+
+    slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
+                                 machine->ram_slots, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    trace_mhp_pc_dimm_assigned_slot(slot);
+
+    if (kvm_enabled() && !kvm_has_free_slot(machine)) {
+        error_setg(&local_err, "hypervisor has no free memory slots left");
+        goto out;
+    }
+
+    memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
+    vmstate_register_ram(mr, dev);
+    numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);
+
+out:
+    error_propagate(errp, local_err);
+}
+
+void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
+                           MemoryRegion *mr)
+{
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+
+    numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node);
+    memory_region_del_subregion(&hpms->mr, mr);
+    vmstate_unregister_ram(mr, dev);
+}
+
 static int pc_existing_dimms_capacity_internal(Object *obj, void *opaque)
 {
     pc_dimms_capacity *cap = opaque;
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index 786a8f0..3082e75 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -53,6 +53,7 @@
 #include "qemu/error-report.h"
 #include "hw/empty_slot.h"
 #include "sysemu/kvm.h"
+#include "exec/semihost.h"
 
 //#define DEBUG_BOARD_INIT
 
@@ -634,7 +635,13 @@
 
     /* Second part of the bootloader */
     p = (uint32_t *) (base + 0x580);
-    stl_p(p++, 0x24040002);                                      /* addiu a0, zero, 2 */
+
+    if (semihosting_get_argc()) {
+        /* Preserve a0 content as arguments have been passed */
+        stl_p(p++, 0x00000000);                         /* nop */
+    } else {
+        stl_p(p++, 0x24040002);                         /* addiu a0, zero, 2 */
+    }
     stl_p(p++, 0x3c1d0000 | (((ENVP_ADDR - 64) >> 16) & 0xffff)); /* lui sp, high(ENVP_ADDR) */
     stl_p(p++, 0x37bd0000 | ((ENVP_ADDR - 64) & 0xffff));        /* ori sp, sp, low(ENVP_ADDR) */
     stl_p(p++, 0x3c050000 | ((ENVP_ADDR >> 16) & 0xffff));       /* lui a1, high(ENVP_ADDR) */
@@ -887,7 +894,7 @@
        read only location. The kernel location and the arguments table
        location does not change. */
     if (loaderparams.kernel_filename) {
-        env->CP0_Status &= ~((1 << CP0St_BEV) | (1 << CP0St_ERL));
+        env->CP0_Status &= ~(1 << CP0St_ERL);
     }
 
     malta_mips_config(cpu);
diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c
index 36f73e1..26aded9 100644
--- a/hw/pci-bridge/pci_bridge_dev.c
+++ b/hw/pci-bridge/pci_bridge_dev.c
@@ -28,7 +28,8 @@
 #include "hw/pci/pci_bus.h"
 #include "hw/hotplug.h"
 
-#define TYPE_PCI_BRIDGE_DEV "pci-bridge"
+#define TYPE_PCI_BRIDGE_DEV      "pci-bridge"
+#define TYPE_PCI_BRIDGE_SEAT_DEV "pci-bridge-seat"
 #define PCI_BRIDGE_DEV(obj) \
     OBJECT_CHECK(PCIBridgeDev, (obj), TYPE_PCI_BRIDGE_DEV)
 
@@ -40,6 +41,7 @@
     MemoryRegion bar;
     uint8_t chassis_nr;
 #define PCI_BRIDGE_DEV_F_MSI_REQ 0
+#define PCI_BRIDGE_DEV_F_SHPC_REQ 1
     uint32_t flags;
 };
 typedef struct PCIBridgeDev PCIBridgeDev;
@@ -54,11 +56,17 @@
     if (err) {
         goto bridge_error;
     }
-    dev->config[PCI_INTERRUPT_PIN] = 0x1;
-    memory_region_init(&bridge_dev->bar, OBJECT(dev), "shpc-bar", shpc_bar_size(dev));
-    err = shpc_init(dev, &br->sec_bus, &bridge_dev->bar, 0);
-    if (err) {
-        goto shpc_error;
+    if (bridge_dev->flags & (1 << PCI_BRIDGE_DEV_F_SHPC_REQ)) {
+        dev->config[PCI_INTERRUPT_PIN] = 0x1;
+        memory_region_init(&bridge_dev->bar, OBJECT(dev), "shpc-bar",
+                           shpc_bar_size(dev));
+        err = shpc_init(dev, &br->sec_bus, &bridge_dev->bar, 0);
+        if (err) {
+            goto shpc_error;
+        }
+    } else {
+        /* MSI is not applicable without SHPC */
+        bridge_dev->flags &= ~(1 << PCI_BRIDGE_DEV_F_MSI_REQ);
     }
     err = slotid_cap_init(dev, 0, bridge_dev->chassis_nr, 0);
     if (err) {
@@ -71,15 +79,19 @@
             goto msi_error;
         }
     }
-    /* TODO: spec recommends using 64 bit prefetcheable BAR.
-     * Check whether that works well. */
-    pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
-		     PCI_BASE_ADDRESS_MEM_TYPE_64, &bridge_dev->bar);
+    if (shpc_present(dev)) {
+        /* TODO: spec recommends using 64 bit prefetcheable BAR.
+         * Check whether that works well. */
+        pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+                         PCI_BASE_ADDRESS_MEM_TYPE_64, &bridge_dev->bar);
+    }
     return 0;
 msi_error:
     slotid_cap_cleanup(dev);
 slotid_error:
-    shpc_cleanup(dev, &bridge_dev->bar);
+    if (shpc_present(dev)) {
+        shpc_cleanup(dev, &bridge_dev->bar);
+    }
 shpc_error:
     pci_bridge_exitfn(dev);
 bridge_error:
@@ -93,12 +105,15 @@
         msi_uninit(dev);
     }
     slotid_cap_cleanup(dev);
-    shpc_cleanup(dev, &bridge_dev->bar);
+    if (shpc_present(dev)) {
+        shpc_cleanup(dev, &bridge_dev->bar);
+    }
     pci_bridge_exitfn(dev);
 }
 
 static void pci_bridge_dev_instance_finalize(Object *obj)
 {
+    /* this function is idempotent and handles (PCIDevice.shpc == NULL) */
     shpc_free(PCI_DEVICE(obj));
 }
 
@@ -109,7 +124,9 @@
     if (msi_present(d)) {
         msi_write_config(d, address, val, len);
     }
-    shpc_cap_write_config(d, address, val, len);
+    if (shpc_present(d)) {
+        shpc_cap_write_config(d, address, val, len);
+    }
 }
 
 static void qdev_pci_bridge_dev_reset(DeviceState *qdev)
@@ -117,25 +134,65 @@
     PCIDevice *dev = PCI_DEVICE(qdev);
 
     pci_bridge_reset(qdev);
-    shpc_reset(dev);
+    if (shpc_present(dev)) {
+        shpc_reset(dev);
+    }
 }
 
 static Property pci_bridge_dev_properties[] = {
                     /* Note: 0 is not a legal chassis number. */
-    DEFINE_PROP_UINT8("chassis_nr", PCIBridgeDev, chassis_nr, 0),
-    DEFINE_PROP_BIT("msi", PCIBridgeDev, flags, PCI_BRIDGE_DEV_F_MSI_REQ, true),
+    DEFINE_PROP_UINT8(PCI_BRIDGE_DEV_PROP_CHASSIS_NR, PCIBridgeDev, chassis_nr,
+                      0),
+    DEFINE_PROP_BIT(PCI_BRIDGE_DEV_PROP_MSI, PCIBridgeDev, flags,
+                    PCI_BRIDGE_DEV_F_MSI_REQ, true),
+    DEFINE_PROP_BIT(PCI_BRIDGE_DEV_PROP_SHPC, PCIBridgeDev, flags,
+                    PCI_BRIDGE_DEV_F_SHPC_REQ, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static bool pci_device_shpc_present(void *opaque, int version_id)
+{
+    PCIDevice *dev = opaque;
+
+    return shpc_present(dev);
+}
+
 static const VMStateDescription pci_bridge_dev_vmstate = {
     .name = "pci_bridge",
     .fields = (VMStateField[]) {
         VMSTATE_PCI_DEVICE(parent_obj, PCIBridge),
-        SHPC_VMSTATE(shpc, PCIDevice),
+        SHPC_VMSTATE(shpc, PCIDevice, pci_device_shpc_present),
         VMSTATE_END_OF_LIST()
     }
 };
 
+static void pci_bridge_dev_hotplug_cb(HotplugHandler *hotplug_dev,
+                                      DeviceState *dev, Error **errp)
+{
+    PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
+
+    if (!shpc_present(pci_hotplug_dev)) {
+        error_setg(errp, "standard hotplug controller has been disabled for "
+                   "this %s", TYPE_PCI_BRIDGE_DEV);
+        return;
+    }
+    shpc_device_hotplug_cb(hotplug_dev, dev, errp);
+}
+
+static void pci_bridge_dev_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
+                                                 DeviceState *dev,
+                                                 Error **errp)
+{
+    PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
+
+    if (!shpc_present(pci_hotplug_dev)) {
+        error_setg(errp, "standard hotplug controller has been disabled for "
+                   "this %s", TYPE_PCI_BRIDGE_DEV);
+        return;
+    }
+    shpc_device_hot_unplug_request_cb(hotplug_dev, dev, errp);
+}
+
 static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -154,8 +211,8 @@
     dc->props = pci_bridge_dev_properties;
     dc->vmsd = &pci_bridge_dev_vmstate;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
-    hc->plug = shpc_device_hotplug_cb;
-    hc->unplug_request = shpc_device_hot_unplug_request_cb;
+    hc->plug = pci_bridge_dev_hotplug_cb;
+    hc->unplug_request = pci_bridge_dev_hot_unplug_request_cb;
 }
 
 static const TypeInfo pci_bridge_dev_info = {
@@ -170,9 +227,31 @@
     }
 };
 
+/*
+ * Multiseat bridge.  Same as the standard pci bridge, only with a
+ * different pci id, so we can match it easily in the guest for
+ * automagic multiseat configuration.  See docs/multiseat.txt for more.
+ */
+static void pci_bridge_dev_seat_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->device_id = PCI_DEVICE_ID_REDHAT_BRIDGE_SEAT;
+    dc->desc = "Standard PCI Bridge (multiseat)";
+}
+
+static const TypeInfo pci_bridge_dev_seat_info = {
+    .name              = TYPE_PCI_BRIDGE_SEAT_DEV,
+    .parent            = TYPE_PCI_BRIDGE_DEV,
+    .instance_size     = sizeof(PCIBridgeDev),
+    .class_init        = pci_bridge_dev_seat_class_init,
+};
+
 static void pci_bridge_dev_register(void)
 {
     type_register_static(&pci_bridge_dev_info);
+    type_register_static(&pci_bridge_dev_seat_info);
 }
 
 type_init(pci_bridge_dev_register);
diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index ec2bb45..57f8a37 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -14,6 +14,7 @@
 #include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_host.h"
 #include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_bridge.h"
 #include "hw/i386/pc.h"
 #include "qemu/range.h"
 #include "qemu/error-report.h"
@@ -42,6 +43,8 @@
     uint16_t numa_node;
 } PXBDev;
 
+static GList *pxb_dev_list;
+
 #define TYPE_PXB_HOST "pxb-host"
 
 static int pxb_bus_num(PCIBus *bus)
@@ -88,12 +91,45 @@
     return bus->bus_path;
 }
 
+static char *pxb_host_ofw_unit_address(const SysBusDevice *dev)
+{
+    const PCIHostState *pxb_host;
+    const PCIBus *pxb_bus;
+    const PXBDev *pxb_dev;
+    int position;
+    const DeviceState *pxb_dev_base;
+    const PCIHostState *main_host;
+    const SysBusDevice *main_host_sbd;
+
+    pxb_host = PCI_HOST_BRIDGE(dev);
+    pxb_bus = pxb_host->bus;
+    pxb_dev = PXB_DEV(pxb_bus->parent_dev);
+    position = g_list_index(pxb_dev_list, pxb_dev);
+    assert(position >= 0);
+
+    pxb_dev_base = DEVICE(pxb_dev);
+    main_host = PCI_HOST_BRIDGE(pxb_dev_base->parent_bus->parent);
+    main_host_sbd = SYS_BUS_DEVICE(main_host);
+
+    if (main_host_sbd->num_mmio > 0) {
+        return g_strdup_printf(TARGET_FMT_plx ",%x",
+                               main_host_sbd->mmio[0].addr, position + 1);
+    }
+    if (main_host_sbd->num_pio > 0) {
+        return g_strdup_printf("i%04x,%x",
+                               main_host_sbd->pio[0], position + 1);
+    }
+    return NULL;
+}
+
 static void pxb_host_class_init(ObjectClass *class, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(class);
+    SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(class);
     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(class);
 
     dc->fw_name = "pci";
+    sbc->explicit_ofw_unit_address = pxb_host_ofw_unit_address;
     hc->root_bus_path = pxb_host_root_bus_path;
 }
 
@@ -148,6 +184,15 @@
     return pin - PCI_SLOT(pxb->devfn);
 }
 
+static gint pxb_compare(gconstpointer a, gconstpointer b)
+{
+    const PXBDev *pxb_a = a, *pxb_b = b;
+
+    return pxb_a->bus_nr < pxb_b->bus_nr ? -1 :
+           pxb_a->bus_nr > pxb_b->bus_nr ?  1 :
+           0;
+}
+
 static int pxb_dev_initfn(PCIDevice *dev)
 {
     PXBDev *pxb = PXB_DEV(dev);
@@ -175,7 +220,8 @@
 
     bds = qdev_create(BUS(bus), "pci-bridge");
     bds->id = dev_name;
-    qdev_prop_set_uint8(bds, "chassis_nr", pxb->bus_nr);
+    qdev_prop_set_uint8(bds, PCI_BRIDGE_DEV_PROP_CHASSIS_NR, pxb->bus_nr);
+    qdev_prop_set_bit(bds, PCI_BRIDGE_DEV_PROP_SHPC, false);
 
     PCI_HOST_BRIDGE(ds)->bus = bus;
 
@@ -190,9 +236,17 @@
                                PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK);
     pci_config_set_class(dev->config, PCI_CLASS_BRIDGE_HOST);
 
+    pxb_dev_list = g_list_insert_sorted(pxb_dev_list, pxb, pxb_compare);
     return 0;
 }
 
+static void pxb_dev_exitfn(PCIDevice *pci_dev)
+{
+    PXBDev *pxb = PXB_DEV(pci_dev);
+
+    pxb_dev_list = g_list_remove(pxb_dev_list, pxb);
+}
+
 static Property pxb_dev_properties[] = {
     /* Note: 0 is not a legal a PXB bus number. */
     DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0),
@@ -206,6 +260,7 @@
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     k->init = pxb_dev_initfn;
+    k->exit = pxb_dev_exitfn;
     k->vendor_id = PCI_VENDOR_ID_REDHAT;
     k->device_id = PCI_DEVICE_ID_REDHAT_PXB;
     k->class_id = PCI_CLASS_BRIDGE_HOST;
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 99db56c..2a4b8e1 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -51,8 +51,6 @@
 #  define LOG_TB(...) do { } while (0)
 #endif
 
-#define NSEC_PER_SEC    1000000000LL
-
 static void cpu_ppc_tb_stop (CPUPPCState *env);
 static void cpu_ppc_tb_start (CPUPPCState *env);
 
diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c
index 83eb7c1..9da3746 100644
--- a/hw/ppc/spapr_rtc.c
+++ b/hw/ppc/spapr_rtc.c
@@ -26,6 +26,7 @@
  *
  */
 #include "cpu.h"
+#include "qemu/timer.h"
 #include "sysemu/sysemu.h"
 #include "hw/ppc/spapr.h"
 #include "qapi-event.h"
@@ -40,8 +41,6 @@
     int64_t ns_offset;
 };
 
-#define NSEC_PER_SEC    1000000000LL
-
 void spapr_rtc_read(DeviceState *dev, struct tm *tm, uint32_t *ns)
 {
     sPAPRRTCState *rtc = SPAPR_RTC(dev);
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 5561d80..5df450e 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -588,6 +588,7 @@
 {
     SCSW *s = &sch->curr_status.scsw;
     PMCW *p = &sch->curr_status.pmcw;
+    uint16_t oldflags;
     int ret;
     SCHIB schib;
 
@@ -610,6 +611,7 @@
     copy_schib_from_guest(&schib, orig_schib);
     /* Only update the program-modifiable fields. */
     p->intparm = schib.pmcw.intparm;
+    oldflags = p->flags;
     p->flags &= ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
                   PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
                   PMCW_FLAGS_MASK_MP);
@@ -625,6 +627,12 @@
             (PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_CSENSE);
     sch->curr_status.mba = schib.mba;
 
+    /* Has the channel been disabled? */
+    if (sch->disable_cb && (oldflags & PMCW_FLAGS_MASK_ENA) != 0
+        && (p->flags & PMCW_FLAGS_MASK_ENA) == 0) {
+        sch->disable_cb(sch);
+    }
+
     ret = 0;
 
 out:
@@ -1464,6 +1472,21 @@
     }
     s->ccw_fmt_1 = qemu_get_byte(f);
     s->ccw_no_data_cnt = qemu_get_byte(f);
+    /*
+     * Hack alert. We don't migrate the channel subsystem status (no
+     * device!), but we need to find out if the guest enabled mss/mcss-e.
+     * If the subchannel is enabled, it certainly was able to access it,
+     * so adjust the max_ssid/max_cssid values for relevant ssid/cssid
+     * values. This is not watertight, but better than nothing.
+     */
+    if (s->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ENA) {
+        if (s->ssid) {
+            channel_subsys->max_ssid = MAX_SSID;
+        }
+        if (s->cssid != channel_subsys->default_cssid) {
+            channel_subsys->max_cssid = MAX_CSSID;
+        }
+    }
     return 0;
 }
 
@@ -1483,6 +1506,10 @@
 {
     PMCW *p = &sch->curr_status.pmcw;
 
+    if ((p->flags & PMCW_FLAGS_MASK_ENA) != 0 && sch->disable_cb) {
+        sch->disable_cb(sch);
+    }
+
     p->intparm = 0;
     p->flags &= ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
                   PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
diff --git a/hw/s390x/css.h b/hw/s390x/css.h
index 7e53148..a09bb1f 100644
--- a/hw/s390x/css.h
+++ b/hw/s390x/css.h
@@ -81,6 +81,7 @@
     uint8_t ccw_no_data_cnt;
     /* transport-provided data: */
     int (*ccw_cb) (SubchDev *, CCW1);
+    void (*disable_cb)(SubchDev *);
     SenseId id;
     void *driver_data;
 };
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 132004a..2e0a8b6 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -218,7 +218,7 @@
  * - -1 if no valid boot device was found
  * - ccw id of the boot device otherwise
  */
-static uint32_t s390_update_iplstate(CPUS390XState *env, S390IPLState *ipl)
+static uint64_t s390_update_iplstate(CPUS390XState *env, S390IPLState *ipl)
 {
     DeviceState *dev_st;
 
@@ -248,7 +248,7 @@
 
     return -1;
 out:
-    return ipl->cssid << 24 | ipl->ssid << 16 | ipl->devno;
+    return (uint32_t) (ipl->cssid << 24 | ipl->ssid << 16 | ipl->devno);
 }
 
 int s390_ipl_update_diag308(IplParameterBlock *iplb)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index c574988..3d20d6a 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -204,9 +204,6 @@
     MachineClass *mc = MACHINE_CLASS(oc);
     NMIClass *nc = NMI_CLASS(oc);
 
-    mc->name = "s390-ccw-virtio";
-    mc->alias = "s390-ccw";
-    mc->desc = "VirtIO-ccw based S390 machine";
     mc->init = ccw_init;
     mc->block_default_type = IF_VIRTIO;
     mc->no_cdrom = 1;
@@ -216,7 +213,6 @@
     mc->no_sdcard = 1;
     mc->use_sclp = 1;
     mc->max_cpus = 255;
-    mc->is_default = 1;
     nc->nmi_monitor_handler = s390_nmi;
 }
 
@@ -272,6 +268,7 @@
 static const TypeInfo ccw_machine_info = {
     .name          = TYPE_S390_CCW_MACHINE,
     .parent        = TYPE_MACHINE,
+    .abstract      = true,
     .instance_size = sizeof(S390CcwMachineState),
     .instance_init = s390_machine_initfn,
     .class_init    = ccw_machine_class_init,
@@ -281,9 +278,26 @@
     },
 };
 
+static void ccw_machine_2_4_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->name = "s390-ccw-virtio-2.4";
+    mc->alias = "s390-ccw-virtio";
+    mc->desc = "VirtIO-ccw based S390 machine v2.4";
+    mc->is_default = 1;
+}
+
+static const TypeInfo ccw_machine_2_4_info = {
+    .name          = TYPE_S390_CCW_MACHINE "2.4",
+    .parent        = TYPE_S390_CCW_MACHINE,
+    .class_init    = ccw_machine_2_4_class_init,
+};
+
 static void ccw_machine_register_types(void)
 {
     type_register_static(&ccw_machine_info);
+    type_register_static(&ccw_machine_2_4_info);
 }
 
 type_init(ccw_machine_register_types)
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 9605bf4..d631337 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -21,6 +21,7 @@
 #include "hw/sysbus.h"
 #include "qemu/bitops.h"
 #include "qemu/error-report.h"
+#include "hw/virtio/virtio-access.h"
 #include "hw/virtio/virtio-bus.h"
 #include "hw/s390x/adapter.h"
 #include "hw/s390x/s390_flic.h"
@@ -237,11 +238,20 @@
 }
 
 /* Communication blocks used by several channel commands. */
-typedef struct VqInfoBlock {
+typedef struct VqInfoBlockLegacy {
     uint64_t queue;
     uint32_t align;
     uint16_t index;
     uint16_t num;
+} QEMU_PACKED VqInfoBlockLegacy;
+
+typedef struct VqInfoBlock {
+    uint64_t desc;
+    uint32_t res0;
+    uint16_t index;
+    uint16_t num;
+    uint64_t avail;
+    uint64_t used;
 } QEMU_PACKED VqInfoBlock;
 
 typedef struct VqConfigBlock {
@@ -261,18 +271,27 @@
     uint8_t isc;
 } QEMU_PACKED VirtioThinintInfo;
 
+typedef struct VirtioRevInfo {
+    uint16_t revision;
+    uint16_t length;
+    uint8_t data[0];
+} QEMU_PACKED VirtioRevInfo;
+
 /* Specify where the virtqueues for the subchannel are in guest memory. */
-static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t addr, uint32_t align,
-                              uint16_t index, uint16_t num)
+static int virtio_ccw_set_vqs(SubchDev *sch, VqInfoBlock *info,
+                              VqInfoBlockLegacy *linfo)
 {
     VirtIODevice *vdev = virtio_ccw_get_vdev(sch);
+    uint16_t index = info ? info->index : linfo->index;
+    uint16_t num = info ? info->num : linfo->num;
+    uint64_t desc = info ? info->desc : linfo->queue;
 
     if (index >= VIRTIO_CCW_QUEUE_MAX) {
         return -EINVAL;
     }
 
     /* Current code in virtio.c relies on 4K alignment. */
-    if (addr && (align != 4096)) {
+    if (linfo && desc && (linfo->align != 4096)) {
         return -EINVAL;
     }
 
@@ -280,8 +299,12 @@
         return -EINVAL;
     }
 
-    virtio_queue_set_addr(vdev, index, addr);
-    if (!addr) {
+    if (info) {
+        virtio_queue_set_rings(vdev, index, desc, info->avail, info->used);
+    } else {
+        virtio_queue_set_addr(vdev, index, desc);
+    }
+    if (!desc) {
         virtio_queue_set_vector(vdev, index, VIRTIO_NO_VECTOR);
     } else {
         /* Fail if we don't have a big enough queue. */
@@ -296,10 +319,98 @@
     return 0;
 }
 
-static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
+static void virtio_ccw_reset_virtio(VirtioCcwDevice *dev, VirtIODevice *vdev)
+{
+    virtio_ccw_stop_ioeventfd(dev);
+    virtio_reset(vdev);
+    if (dev->indicators) {
+        release_indicator(&dev->routes.adapter, dev->indicators);
+        dev->indicators = NULL;
+    }
+    if (dev->indicators2) {
+        release_indicator(&dev->routes.adapter, dev->indicators2);
+        dev->indicators2 = NULL;
+    }
+    if (dev->summary_indicator) {
+        release_indicator(&dev->routes.adapter, dev->summary_indicator);
+        dev->summary_indicator = NULL;
+    }
+    dev->sch->thinint_active = false;
+}
+
+static int virtio_ccw_handle_set_vq(SubchDev *sch, CCW1 ccw, bool check_len,
+                                    bool is_legacy)
 {
     int ret;
     VqInfoBlock info;
+    VqInfoBlockLegacy linfo;
+    size_t info_len = is_legacy ? sizeof(linfo) : sizeof(info);
+
+    if (check_len) {
+        if (ccw.count != info_len) {
+            return -EINVAL;
+        }
+    } else if (ccw.count < info_len) {
+        /* Can't execute command. */
+        return -EINVAL;
+    }
+    if (!ccw.cda) {
+        return -EFAULT;
+    }
+    if (is_legacy) {
+        linfo.queue = address_space_ldq_be(&address_space_memory, ccw.cda,
+                                           MEMTXATTRS_UNSPECIFIED, NULL);
+        linfo.align = address_space_ldl_be(&address_space_memory,
+                                           ccw.cda + sizeof(linfo.queue),
+                                           MEMTXATTRS_UNSPECIFIED,
+                                           NULL);
+        linfo.index = address_space_lduw_be(&address_space_memory,
+                                            ccw.cda + sizeof(linfo.queue)
+                                            + sizeof(linfo.align),
+                                            MEMTXATTRS_UNSPECIFIED,
+                                            NULL);
+        linfo.num = address_space_lduw_be(&address_space_memory,
+                                          ccw.cda + sizeof(linfo.queue)
+                                          + sizeof(linfo.align)
+                                          + sizeof(linfo.index),
+                                          MEMTXATTRS_UNSPECIFIED,
+                                          NULL);
+        ret = virtio_ccw_set_vqs(sch, NULL, &linfo);
+    } else {
+        info.desc = address_space_ldq_be(&address_space_memory, ccw.cda,
+                                           MEMTXATTRS_UNSPECIFIED, NULL);
+        info.index = address_space_lduw_be(&address_space_memory,
+                                           ccw.cda + sizeof(info.desc)
+                                           + sizeof(info.res0),
+                                           MEMTXATTRS_UNSPECIFIED, NULL);
+        info.num = address_space_lduw_be(&address_space_memory,
+                                         ccw.cda + sizeof(info.desc)
+                                         + sizeof(info.res0)
+                                         + sizeof(info.index),
+                                         MEMTXATTRS_UNSPECIFIED, NULL);
+        info.avail = address_space_ldq_be(&address_space_memory,
+                                          ccw.cda + sizeof(info.desc)
+                                          + sizeof(info.res0)
+                                          + sizeof(info.index)
+                                          + sizeof(info.num),
+                                          MEMTXATTRS_UNSPECIFIED, NULL);
+        info.used = address_space_ldq_be(&address_space_memory,
+                                         ccw.cda + sizeof(info.desc)
+                                         + sizeof(info.res0)
+                                         + sizeof(info.index)
+                                         + sizeof(info.num)
+                                         + sizeof(info.avail),
+                                         MEMTXATTRS_UNSPECIFIED, NULL);
+        ret = virtio_ccw_set_vqs(sch, &info, NULL);
+    }
+    sch->curr_status.scsw.count = 0;
+    return ret;
+}
+
+static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
+{
+    int ret;
+    VirtioRevInfo revinfo;
     uint8_t status;
     VirtioFeatDesc features;
     void *config;
@@ -323,44 +434,10 @@
     /* Look at the command. */
     switch (ccw.cmd_code) {
     case CCW_CMD_SET_VQ:
-        if (check_len) {
-            if (ccw.count != sizeof(info)) {
-                ret = -EINVAL;
-                break;
-            }
-        } else if (ccw.count < sizeof(info)) {
-            /* Can't execute command. */
-            ret = -EINVAL;
-            break;
-        }
-        if (!ccw.cda) {
-            ret = -EFAULT;
-        } else {
-            info.queue = address_space_ldq(&address_space_memory, ccw.cda,
-                                           MEMTXATTRS_UNSPECIFIED, NULL);
-            info.align = address_space_ldl(&address_space_memory,
-                                           ccw.cda + sizeof(info.queue),
-                                           MEMTXATTRS_UNSPECIFIED,
-                                           NULL);
-            info.index = address_space_lduw(&address_space_memory,
-                                            ccw.cda + sizeof(info.queue)
-                                            + sizeof(info.align),
-                                            MEMTXATTRS_UNSPECIFIED,
-                                            NULL);
-            info.num = address_space_lduw(&address_space_memory,
-                                          ccw.cda + sizeof(info.queue)
-                                          + sizeof(info.align)
-                                          + sizeof(info.index),
-                                          MEMTXATTRS_UNSPECIFIED,
-                                          NULL);
-            ret = virtio_ccw_set_vqs(sch, info.queue, info.align, info.index,
-                                     info.num);
-            sch->curr_status.scsw.count = 0;
-        }
+        ret = virtio_ccw_handle_set_vq(sch, ccw, check_len, dev->revision < 1);
         break;
     case CCW_CMD_VDEV_RESET:
-        virtio_ccw_stop_ioeventfd(dev);
-        virtio_reset(vdev);
+        virtio_ccw_reset_virtio(dev, vdev);
         ret = 0;
         break;
     case CCW_CMD_READ_FEAT:
@@ -383,7 +460,16 @@
                                                 MEMTXATTRS_UNSPECIFIED,
                                                 NULL);
             if (features.index == 0) {
-                features.features = vdev->host_features;
+                features.features = (uint32_t)vdev->host_features;
+            } else if (features.index == 1) {
+                features.features = (uint32_t)(vdev->host_features >> 32);
+                /*
+                 * Don't offer version 1 to the guest if it did not
+                 * negotiate at least revision 1.
+                 */
+                if (dev->revision <= 0) {
+                    features.features &= ~(1 << (VIRTIO_F_VERSION_1 - 32));
+                }
             } else {
                 /* Return zeroes if the guest supports more feature bits. */
                 features.features = 0;
@@ -419,7 +505,20 @@
                                                      MEMTXATTRS_UNSPECIFIED,
                                                      NULL);
             if (features.index == 0) {
-                virtio_set_features(vdev, features.features);
+                virtio_set_features(vdev,
+                                    (vdev->guest_features & 0xffffffff00000000ULL) |
+                                    features.features);
+            } else if (features.index == 1) {
+                /*
+                 * The guest should not set version 1 if it didn't
+                 * negotiate a revision >= 1.
+                 */
+                if (dev->revision <= 0) {
+                    features.features &= ~(1 << (VIRTIO_F_VERSION_1 - 32));
+                }
+                virtio_set_features(vdev,
+                                    (vdev->guest_features & 0x00000000ffffffffULL) |
+                                    ((uint64_t)features.features << 32));
             } else {
                 /*
                  * If the guest supports more feature bits, assert that it
@@ -500,7 +599,7 @@
             }
             if (virtio_set_status(vdev, status) == 0) {
                 if (vdev->status == 0) {
-                    virtio_reset(vdev);
+                    virtio_ccw_reset_virtio(dev, vdev);
                 }
                 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
                     virtio_ccw_start_ioeventfd(dev);
@@ -640,6 +739,40 @@
             }
         }
         break;
+    case CCW_CMD_SET_VIRTIO_REV:
+        len = sizeof(revinfo);
+        if (ccw.count < len) {
+            ret = -EINVAL;
+            break;
+        }
+        if (!ccw.cda) {
+            ret = -EFAULT;
+            break;
+        }
+        revinfo.revision =
+            address_space_lduw_be(&address_space_memory, ccw.cda,
+                                  MEMTXATTRS_UNSPECIFIED, NULL);
+        revinfo.length =
+            address_space_lduw_be(&address_space_memory,
+                                  ccw.cda + sizeof(revinfo.revision),
+                                  MEMTXATTRS_UNSPECIFIED, NULL);
+        if (ccw.count < len + revinfo.length ||
+            (check_len && ccw.count > len + revinfo.length)) {
+            ret = -EINVAL;
+            break;
+        }
+        /*
+         * Once we start to support revisions with additional data, we'll
+         * need to fetch it here. Nothing to do for now, though.
+         */
+        if (dev->revision >= 0 ||
+            revinfo.revision > virtio_ccw_rev_max(vdev)) {
+            ret = -ENOSYS;
+            break;
+        }
+        ret = 0;
+        dev->revision = revinfo.revision;
+        break;
     default:
         ret = -ENOSYS;
         break;
@@ -647,6 +780,13 @@
     return ret;
 }
 
+static void virtio_sch_disable_cb(SubchDev *sch)
+{
+    VirtioCcwDevice *dev = sch->driver_data;
+
+    dev->revision = -1;
+}
+
 static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
 {
     unsigned int cssid = 0;
@@ -766,12 +906,15 @@
     css_sch_build_virtual_schib(sch, 0, VIRTIO_CCW_CHPID_TYPE);
 
     sch->ccw_cb = virtio_ccw_cb;
+    sch->disable_cb = virtio_sch_disable_cb;
 
     /* Build senseid data. */
     memset(&sch->id, 0, sizeof(SenseId));
     sch->id.reserved = 0xff;
     sch->id.cu_type = VIRTIO_CCW_CU_TYPE;
 
+    dev->revision = -1;
+
     if (k->realize) {
         k->realize(dev, &err);
     }
@@ -1081,21 +1224,8 @@
     VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
     VirtIODevice *vdev = virtio_bus_get_device(&dev->bus);
 
-    virtio_ccw_stop_ioeventfd(dev);
-    virtio_reset(vdev);
+    virtio_ccw_reset_virtio(dev, vdev);
     css_reset_sch(dev->sch);
-    if (dev->indicators) {
-        release_indicator(&dev->routes.adapter, dev->indicators);
-        dev->indicators = NULL;
-    }
-    if (dev->indicators2) {
-        release_indicator(&dev->routes.adapter, dev->indicators2);
-        dev->indicators2 = NULL;
-    }
-    if (dev->summary_indicator) {
-        release_indicator(&dev->routes.adapter, dev->summary_indicator);
-        dev->summary_indicator = NULL;
-    }
 }
 
 static void virtio_ccw_vmstate_change(DeviceState *d, bool running)
@@ -1342,6 +1472,7 @@
     qemu_put_be16(f, vdev->config_vector);
     qemu_put_be64(f, dev->routes.adapter.ind_offset);
     qemu_put_byte(f, dev->thinint_isc);
+    qemu_put_be32(f, dev->revision);
 }
 
 static int virtio_ccw_load_config(DeviceState *d, QEMUFile *f)
@@ -1382,6 +1513,7 @@
                                        dev->thinint_isc, true, false,
                                        &dev->routes.adapter.adapter_id);
     }
+    dev->revision = qemu_get_be32(f);
 
     return 0;
 }
diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h
index d729263..692ddd7 100644
--- a/hw/s390x/virtio-ccw.h
+++ b/hw/s390x/virtio-ccw.h
@@ -41,6 +41,7 @@
 #define CCW_CMD_SET_CONF_IND 0x53
 #define CCW_CMD_READ_VQ_CONF 0x32
 #define CCW_CMD_SET_IND_ADAPTER 0x73
+#define CCW_CMD_SET_VIRTIO_REV 0x83
 
 #define TYPE_VIRTIO_CCW_DEVICE "virtio-ccw-device"
 #define VIRTIO_CCW_DEVICE(obj) \
@@ -86,6 +87,7 @@
     DeviceState parent_obj;
     SubchDev *sch;
     char *bus_id;
+    int revision;
     VirtioBusState bus;
     bool ioeventfd_started;
     bool ioeventfd_disabled;
@@ -99,6 +101,12 @@
     uint64_t ind_bit;
 };
 
+/* The maximum virtio revision we support. */
+static inline int virtio_ccw_rev_max(VirtIODevice *vdev)
+{
+    return 0;
+}
+
 /* virtual css bus type */
 typedef struct VirtualCssBus {
     BusState parent_obj;
diff --git a/hw/timer/arm_mptimer.c b/hw/timer/arm_mptimer.c
index 8b93b3c..3e59c2a 100644
--- a/hw/timer/arm_mptimer.c
+++ b/hw/timer/arm_mptimer.c
@@ -38,7 +38,7 @@
 
 static inline void timerblock_update_irq(TimerBlock *tb)
 {
-    qemu_set_irq(tb->irq, tb->status);
+    qemu_set_irq(tb->irq, tb->status && (tb->control & 4));
 }
 
 /* Return conversion factor from mpcore timer ticks to qemu timer ticks.  */
@@ -122,11 +122,18 @@
     case 8: /* Control.  */
         old = tb->control;
         tb->control = value;
-        if (((old & 1) == 0) && (value & 1)) {
-            if (tb->count == 0 && (tb->control & 2)) {
+        if (value & 1) {
+            if ((old & 1) && (tb->count != 0)) {
+                /* Do nothing if timer is ticking right now.  */
+                break;
+            }
+            if (tb->control & 2) {
                 tb->count = tb->load;
             }
             timerblock_reload(tb, 1);
+        } else if (old & 1) {
+            /* Shutdown the timer.  */
+            timer_del(tb->timer);
         }
         break;
     case 12: /* Interrupt status.  */
diff --git a/hw/timer/cadence_ttc.c b/hw/timer/cadence_ttc.c
index d46db3c..35bc880 100644
--- a/hw/timer/cadence_ttc.c
+++ b/hw/timer/cadence_ttc.c
@@ -208,15 +208,14 @@
             s->reg_intr |= (2 << i);
         }
     }
+    if ((x < 0) || (x >= interval)) {
+        s->reg_intr |= (s->reg_count & COUNTER_CTRL_INT) ?
+            COUNTER_INTR_IV : COUNTER_INTR_OV;
+    }
     while (x < 0) {
         x += interval;
     }
     s->reg_value = (uint32_t)(x % interval);
-
-    if (s->reg_value != x) {
-        s->reg_intr |= (s->reg_count & COUNTER_CTRL_INT) ?
-            COUNTER_INTR_IV : COUNTER_INTR_OV;
-    }
     cadence_timer_update(s);
 }
 
diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index 2e3ffc8..954c34d 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -48,7 +48,6 @@
 # define DPRINTF_C(format, ...)      do { } while (0)
 #endif
 
-#define NSEC_PER_SEC    1000000000LL
 #define SEC_PER_MIN     60
 #define MIN_PER_HOUR    60
 #define SEC_PER_HOUR    3600
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index d4d7547..d7cd40b 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -32,7 +32,7 @@
 #include "trace.h"
 
 #define FRAME_TIMER_FREQ 1000
-#define FRAME_TIMER_NS   (1000000000 / FRAME_TIMER_FREQ)
+#define FRAME_TIMER_NS   (NSEC_PER_SEC / FRAME_TIMER_FREQ)
 #define UFRAME_TIMER_NS  (FRAME_TIMER_NS / 8)
 
 #define NB_MAXINTRATE    8        // Max rate at which controller issues ints
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index a6dcc79..2712c6f 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -999,7 +999,7 @@
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
     VirtioBusState *vbus = VIRTIO_BUS(qbus);
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
-    int i, r;
+    int i, r, e;
     if (!k->set_host_notifier) {
         fprintf(stderr, "binding does not support host notifiers\n");
         r = -ENOSYS;
@@ -1017,12 +1017,12 @@
     return 0;
 fail_vq:
     while (--i >= 0) {
-        r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, false);
-        if (r < 0) {
+        e = k->set_host_notifier(qbus->parent, hdev->vq_index + i, false);
+        if (e < 0) {
             fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r);
             fflush(stderr);
         }
-        assert (r >= 0);
+        assert (e >= 0);
     }
 fail:
     return r;
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 78bc14f..2990f8d 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -312,6 +312,8 @@
 
 static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f)
 {
+    VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
+    f |= dev->host_features;
     virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
     return f;
 }
@@ -423,6 +425,8 @@
 }
 
 static Property virtio_balloon_properties[] = {
+    DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
+                    VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 70bc6d8..6a0174e 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -977,7 +977,7 @@
         val = proxy->gfselect;
         break;
     case VIRTIO_PCI_COMMON_GF:
-        if (proxy->gfselect <= ARRAY_SIZE(proxy->guest_features)) {
+        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
             val = proxy->guest_features[proxy->gfselect];
         }
         break;
@@ -1052,7 +1052,7 @@
         proxy->gfselect = val;
         break;
     case VIRTIO_PCI_COMMON_GF:
-        if (proxy->gfselect <= ARRAY_SIZE(proxy->guest_features)) {
+        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
             proxy->guest_features[proxy->gfselect] = val;
             virtio_set_features(vdev,
                                 (((uint64_t)proxy->guest_features[1]) << 32) |
diff --git a/include/block/block.h b/include/block/block.h
index 07bb724..06e4137 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -372,6 +372,10 @@
 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
 int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
                               int nb_sectors, int *pnum);
+int64_t bdrv_get_block_status_above(BlockDriverState *bs,
+                                    BlockDriverState *base,
+                                    int64_t sector_num,
+                                    int nb_sectors, int *pnum);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
                       int *pnum);
 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index b0476fc..8996baf 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -612,6 +612,7 @@
  * @mode: Whether to collapse all images in the chain to the target.
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
+ * @unmap: Whether to unmap target where source sectors only contain zeroes.
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
@@ -626,6 +627,7 @@
                   int64_t speed, uint32_t granularity, int64_t buf_size,
                   MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
+                  bool unmap,
                   BlockCompletionFunc *cb,
                   void *opaque, Error **errp);
 
@@ -635,7 +637,7 @@
  * @target: Block device to write to.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @sync_mode: What parts of the disk image should be copied to the destination.
- * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_DIRTY_BITMAP.
+ * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL.
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
  * @cb: Completion function for the job.
@@ -660,7 +662,5 @@
 void blk_dev_resize_cb(BlockBackend *blk);
 
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
-                      int nr_sectors);
 
 #endif /* BLOCK_INT_H */
diff --git a/include/exec/softmmu-semi.h b/include/exec/softmmu-semi.h
index 8401f7d..1819cc2 100644
--- a/include/exec/softmmu-semi.h
+++ b/include/exec/softmmu-semi.h
@@ -9,14 +9,14 @@
 #ifndef SOFTMMU_SEMI_H
 #define SOFTMMU_SEMI_H 1
 
-static inline uint32_t softmmu_tget32(CPUArchState *env, uint32_t addr)
+static inline uint32_t softmmu_tget32(CPUArchState *env, target_ulong addr)
 {
     uint32_t val;
 
     cpu_memory_rw_debug(ENV_GET_CPU(env), addr, (uint8_t *)&val, 4, 0);
     return tswap32(val);
 }
-static inline uint32_t softmmu_tget8(CPUArchState *env, uint32_t addr)
+static inline uint32_t softmmu_tget8(CPUArchState *env, target_ulong addr)
 {
     uint8_t val;
 
@@ -28,7 +28,8 @@
 #define get_user_u8(arg, p) ({ arg = softmmu_tget8(env, p) ; 0; })
 #define get_user_ual(arg, p) get_user_u32(arg, p)
 
-static inline void softmmu_tput32(CPUArchState *env, uint32_t addr, uint32_t val)
+static inline void softmmu_tput32(CPUArchState *env,
+                                  target_ulong addr, uint32_t val)
 {
     val = tswap32(val);
     cpu_memory_rw_debug(ENV_GET_CPU(env), addr, (uint8_t *)&val, 4, 1);
@@ -36,8 +37,8 @@
 #define put_user_u32(arg, p) ({ softmmu_tput32(env, p, arg) ; 0; })
 #define put_user_ual(arg, p) put_user_u32(arg, p)
 
-static void *softmmu_lock_user(CPUArchState *env, uint32_t addr, uint32_t len,
-                               int copy)
+static void *softmmu_lock_user(CPUArchState *env,
+                               target_ulong addr, target_ulong len, int copy)
 {
     uint8_t *p;
     /* TODO: Make this something that isn't fixed size.  */
@@ -48,7 +49,7 @@
     return p;
 }
 #define lock_user(type, p, len, copy) softmmu_lock_user(env, p, len, copy)
-static char *softmmu_lock_user_string(CPUArchState *env, uint32_t addr)
+static char *softmmu_lock_user_string(CPUArchState *env, target_ulong addr)
 {
     char *p;
     char *s;
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index 7b4bfb7..2b431e6 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -372,6 +372,18 @@
 
 typedef struct AcpiMadtGenericDistributor AcpiMadtGenericDistributor;
 
+struct AcpiMadtGenericMsiFrame {
+    ACPI_SUB_HEADER_DEF
+    uint16_t reserved;
+    uint32_t gic_msi_frame_id;
+    uint64_t base_address;
+    uint32_t flags;
+    uint16_t spi_count;
+    uint16_t spi_base;
+} QEMU_PACKED;
+
+typedef struct AcpiMadtGenericMsiFrame AcpiMadtGenericMsiFrame;
+
 /*
  * Generic Timer Description Table (GTDT)
  */
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 86c5651..328c8f7 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -15,14 +15,12 @@
 #include "hw/pci/pci.h"
 #include "hw/boards.h"
 #include "hw/compat.h"
+#include "hw/mem/pc-dimm.h"
 
 #define HPET_INTCAP "hpet-intcap"
 
 /**
  * PCMachineState:
- * @hotplug_memory_base: address in guest RAM address space where hotplug memory
- * address space begins.
- * @hotplug_memory: hotplug memory addess space container
  * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling
  * @enforce_aligned_dimm: check that DIMM's address/size is aligned by
  *                        backend's alignment value if provided
@@ -32,8 +30,7 @@
     MachineState parent_obj;
 
     /* <public> */
-    ram_addr_t hotplug_memory_base;
-    MemoryRegion hotplug_memory;
+    MemoryHotplugState hotplug_memory;
 
     HotplugHandler *acpi_dev;
     ISADevice *rtc;
diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
index f7b80b4..d83bf30 100644
--- a/include/hw/mem/pc-dimm.h
+++ b/include/hw/mem/pc-dimm.h
@@ -70,6 +70,17 @@
     MemoryRegion *(*get_memory_region)(PCDIMMDevice *dimm);
 } PCDIMMDeviceClass;
 
+/**
+ * MemoryHotplugState:
+ * @base: address in guest RAM address space where hotplug memory
+ * address space begins.
+ * @mr: hotplug memory address space container
+ */
+typedef struct MemoryHotplugState {
+    ram_addr_t base;
+    MemoryRegion mr;
+} MemoryHotplugState;
+
 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
                                uint64_t address_space_size,
                                uint64_t *hint, uint64_t align, uint64_t size,
@@ -79,4 +90,8 @@
 
 int qmp_pc_dimm_device_list(Object *obj, void *opaque);
 uint64_t pc_existing_dimms_capacity(Error **errp);
+void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
+                         MemoryRegion *mr, uint64_t align, Error **errp);
+void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
+                           MemoryRegion *mr);
 #endif
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index d44bc84..551cb3d 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -92,6 +92,7 @@
 #define PCI_DEVICE_ID_REDHAT_SDHCI       0x0007
 #define PCI_DEVICE_ID_REDHAT_PCIE_HOST   0x0008
 #define PCI_DEVICE_ID_REDHAT_PXB         0x0009
+#define PCI_DEVICE_ID_REDHAT_BRIDGE_SEAT 0x000a
 #define PCI_DEVICE_ID_REDHAT_QXL         0x0100
 
 #define FMT_PCIBUS                      PRIx64
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index 1d8f997..93b621c 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -28,6 +28,10 @@
 
 #include "hw/pci/pci.h"
 
+#define PCI_BRIDGE_DEV_PROP_CHASSIS_NR "chassis_nr"
+#define PCI_BRIDGE_DEV_PROP_MSI        "msi"
+#define PCI_BRIDGE_DEV_PROP_SHPC       "shpc"
+
 int pci_bridge_ssvid_init(PCIDevice *dev, uint8_t offset,
                           uint16_t svid, uint16_t ssid);
 
diff --git a/include/hw/pci/shpc.h b/include/hw/pci/shpc.h
index 9bbea39..2c871b9 100644
--- a/include/hw/pci/shpc.h
+++ b/include/hw/pci/shpc.h
@@ -6,6 +6,7 @@
 #include "migration/vmstate.h"
 #include "qapi/error.h"
 #include "hw/hotplug.h"
+#include "hw/pci/pci.h"
 
 struct SHPCDevice {
     /* Capability offset in device's config space */
@@ -51,7 +52,13 @@
                                        DeviceState *dev, Error **errp);
 
 extern VMStateInfo shpc_vmstate_info;
-#define SHPC_VMSTATE(_field, _type) \
-    VMSTATE_BUFFER_UNSAFE_INFO(_field, _type, 0, shpc_vmstate_info, 0)
+#define SHPC_VMSTATE(_field, _type,  _test) \
+    VMSTATE_BUFFER_UNSAFE_INFO_TEST(_field, _type, _test, 0, \
+                                    shpc_vmstate_info, 0)
+
+static inline bool shpc_present(const PCIDevice *dev)
+{
+    return dev->cap_present & QEMU_PCI_CAP_SHPC;
+}
 
 #endif
diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h
index d1f3f00..34f93c3 100644
--- a/include/hw/sysbus.h
+++ b/include/hw/sysbus.h
@@ -41,6 +41,23 @@
     /*< public >*/
 
     int (*init)(SysBusDevice *dev);
+
+    /*
+     * Let the sysbus device format its own non-PIO, non-MMIO unit address.
+     *
+     * Sometimes a class of SysBusDevices has neither MMIO nor PIO resources,
+     * yet instances of it would like to distinguish themselves, in
+     * OpenFirmware device paths, from other instances of the same class on the
+     * sysbus. For that end we expose this callback.
+     *
+     * The implementation is not supposed to change *@dev, or incur other
+     * observable change.
+     *
+     * The function returns a dynamically allocated string. On error, NULL
+     * should be returned; the unit address portion of the OFW node will be
+     * omitted then. (This is not considered a fatal error.)
+     */
+    char *(*explicit_ofw_unit_address)(const SysBusDevice *dev);
 } SysBusDeviceClass;
 
 struct SysBusDevice {
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index 346a9fd..09c2ce4 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -42,6 +42,7 @@
     QEMUTimer *stats_timer;
     int64_t stats_last_update;
     int64_t stats_poll_interval;
+    uint32_t host_features;
 } VirtIOBalloon;
 
 #endif
diff --git a/include/hw/virtio/virtio-input.h b/include/hw/virtio/virtio-input.h
index fd5417d..af1c207 100644
--- a/include/hw/virtio/virtio-input.h
+++ b/include/hw/virtio/virtio-input.h
@@ -95,6 +95,8 @@
 
 struct VirtIOInputHID {
     VirtIOInput                       parent_obj;
+    char                              *display;
+    uint32_t                          head;
     QemuInputHandler                  *handler;
     QemuInputHandlerState             *hs;
     int                               ledstate;
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 7153b1e..0695d7c 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -500,9 +500,10 @@
     .start        = (_start),                                        \
 }
 
-#define VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, _info, _size) { \
+#define VMSTATE_BUFFER_UNSAFE_INFO_TEST(_field, _state, _test, _version, _info, _size) { \
     .name       = (stringify(_field)),                               \
     .version_id = (_version),                                        \
+    .field_exists = (_test),                                         \
     .size       = (_size),                                           \
     .info       = &(_info),                                          \
     .flags      = VMS_BUFFER,                                        \
@@ -562,6 +563,10 @@
     VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version,   \
             _vmsd, _type)
 
+#define VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, _info, _size) \
+    VMSTATE_BUFFER_UNSAFE_INFO_TEST(_field, _state, NULL, _version, _info, \
+            _size)
+
 #define VMSTATE_BOOL_V(_f, _s, _v)                                    \
     VMSTATE_SINGLE(_f, _s, _v, vmstate_info_bool, bool)
 
diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
index 5af76f0..995b2d5 100644
--- a/include/qemu/throttle.h
+++ b/include/qemu/throttle.h
@@ -29,8 +29,6 @@
 #include "qemu-common.h"
 #include "qemu/timer.h"
 
-#define NANOSECONDS_PER_SECOND  1000000000.0
-
 typedef enum {
     THROTTLE_BPS_TOTAL,
     THROTTLE_BPS_READ,
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 9e4f90f..4dda20b 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -5,6 +5,8 @@
 #include "qemu-common.h"
 #include "qemu/notify.h"
 
+#define NSEC_PER_SEC 1000000000LL
+
 /* timers */
 
 #define SCALE_MS 1000000
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 6523b4d..a6392bc 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -10,16 +10,27 @@
 
 extern int nb_numa_nodes;   /* Number of NUMA nodes */
 
+struct numa_addr_range {
+    ram_addr_t mem_start;
+    ram_addr_t mem_end;
+    QLIST_ENTRY(numa_addr_range) entry;
+};
+
 typedef struct node_info {
     uint64_t node_mem;
     DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
     struct HostMemoryBackend *node_memdev;
     bool present;
+    QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
 } NodeInfo;
+
 extern NodeInfo numa_info[MAX_NODES];
 void parse_numa_opts(MachineClass *mc);
 void numa_post_machine_init(void);
 void query_numa_node_mem(uint64_t node_mem[]);
 extern QemuOptsList qemu_numa_opts;
+void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
+void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
+uint32_t numa_get_node(ram_addr_t addr, Error **errp);
 
 #endif
diff --git a/numa.c b/numa.c
index 91fc6c1..3c80059 100644
--- a/numa.c
+++ b/numa.c
@@ -52,6 +52,92 @@
 int nb_numa_nodes;
 NodeInfo numa_info[MAX_NODES];
 
+void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
+{
+    struct numa_addr_range *range = g_malloc0(sizeof(*range));
+
+    /*
+     * Memory-less nodes can come here with 0 size in which case,
+     * there is nothing to do.
+     */
+    if (!size) {
+        return;
+    }
+
+    range->mem_start = addr;
+    range->mem_end = addr + size - 1;
+    QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry);
+}
+
+void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
+{
+    struct numa_addr_range *range, *next;
+
+    QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) {
+        if (addr == range->mem_start && (addr + size - 1) == range->mem_end) {
+            QLIST_REMOVE(range, entry);
+            g_free(range);
+            return;
+        }
+    }
+}
+
+static void numa_set_mem_ranges(void)
+{
+    int i;
+    ram_addr_t mem_start = 0;
+
+    /*
+     * Deduce start address of each node and use it to store
+     * the address range info in numa_info address range list
+     */
+    for (i = 0; i < nb_numa_nodes; i++) {
+        numa_set_mem_node_id(mem_start, numa_info[i].node_mem, i);
+        mem_start += numa_info[i].node_mem;
+    }
+}
+
+/*
+ * Check if @addr falls under NUMA @node.
+ */
+static bool numa_addr_belongs_to_node(ram_addr_t addr, uint32_t node)
+{
+    struct numa_addr_range *range;
+
+    QLIST_FOREACH(range, &numa_info[node].addr, entry) {
+        if (addr >= range->mem_start && addr <= range->mem_end) {
+            return true;
+        }
+    }
+    return false;
+}
+
+/*
+ * Given an address, return the index of the NUMA node to which the
+ * address belongs to.
+ */
+uint32_t numa_get_node(ram_addr_t addr, Error **errp)
+{
+    uint32_t i;
+
+    /* For non NUMA configurations, check if the addr falls under node 0 */
+    if (!nb_numa_nodes) {
+        if (numa_addr_belongs_to_node(addr, 0)) {
+            return 0;
+        }
+    }
+
+    for (i = 0; i < nb_numa_nodes; i++) {
+        if (numa_addr_belongs_to_node(addr, i)) {
+            return i;
+        }
+    }
+
+    error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any "
+                "NUMA node", addr);
+    return -1;
+}
+
 static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
 {
     uint16_t nodenr;
@@ -274,6 +360,12 @@
         }
 
         for (i = 0; i < nb_numa_nodes; i++) {
+            QLIST_INIT(&numa_info[i].addr);
+        }
+
+        numa_set_mem_ranges();
+
+        for (i = 0; i < nb_numa_nodes; i++) {
             if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
                 break;
             }
@@ -297,6 +389,8 @@
         }
 
         validate_numa_cpus();
+    } else {
+        numa_set_mem_node_id(0, ram_size, 0);
     }
 }
 
diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index 3c6b01f..f64380a 100644
--- a/pc-bios/s390-ccw.img
+++ b/pc-bios/s390-ccw.img
Binary files differ
diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h
index 9b3868b..5484c2a 100644
--- a/pc-bios/s390-ccw/s390-ccw.h
+++ b/pc-bios/s390-ccw/s390-ccw.h
@@ -47,6 +47,7 @@
 
 /* start.s */
 void disabled_wait(void);
+void consume_sclp_int(void);
 
 /* main.c */
 void virtio_panic(const char *string);
diff --git a/pc-bios/s390-ccw/sclp-ascii.c b/pc-bios/s390-ccw/sclp-ascii.c
index 761fb44..dc1c3e4 100644
--- a/pc-bios/s390-ccw/sclp-ascii.c
+++ b/pc-bios/s390-ccw/sclp-ascii.c
@@ -24,6 +24,7 @@
                 "       srl     %0,28"
                 : "=&d" (cc) : "d" (command), "a" (__pa(sccb))
                 : "cc", "memory");
+        consume_sclp_int();
         if (cc == 3)
                 return -EIO;
         if (cc == 2)
diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S
index 5d5df0d..b6dd8c2 100644
--- a/pc-bios/s390-ccw/start.S
+++ b/pc-bios/s390-ccw/start.S
@@ -28,6 +28,38 @@
         larl %r1,disabled_wait_psw
         lpswe   0(%r1)
 
+
+/*
+ * void consume_sclp_int(void)
+ *
+ * eats one sclp interrupt
+ */
+        .globl consume_sclp_int
+consume_sclp_int:
+        /* enable service interrupts in cr0 */
+        stctg 0,0,0(15)
+        oi 6(15), 0x2
+        lctlg 0,0,0(15)
+        /* prepare external call handler */
+        larl %r1, external_new_code
+        stg %r1, 0x1b8
+        larl %r1, external_new_mask
+        mvc 0x1b0(8),0(%r1)
+        /* load enabled wait PSW */
+        larl %r1, enabled_wait_psw
+        lpswe 0(%r1)
+
+external_new_code:
+        /* disable service interrupts in cr0 */
+        stctg 0,0,0(15)
+        ni 6(15), 0xfd
+        lctlg 0,0,0(15)
+        br 14
+
         .align  8
 disabled_wait_psw:
         .quad   0x0002000180000000,0x0000000000000000
+enabled_wait_psw:
+        .quad   0x0302000180000000,0x0000000000000000
+external_new_mask:
+        .quad   0x0000000180000000
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 5a368f6..7b2efb8 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -536,12 +536,12 @@
 #
 # @none: only copy data written from now on
 #
-# @dirty-bitmap: only copy data described by the dirty bitmap. Since: 2.4
+# @incremental: only copy data described by the dirty bitmap. Since: 2.4
 #
 # Since: 1.3
 ##
 { 'enum': 'MirrorSyncMode',
-  'data': ['top', 'full', 'none', 'dirty-bitmap'] }
+  'data': ['top', 'full', 'none', 'incremental'] }
 
 ##
 # @BlockJobType:
@@ -724,8 +724,8 @@
 #
 # @speed: #optional the maximum speed, in bytes per second
 #
-# @bitmap: #optional the name of dirty bitmap if sync is "dirty-bitmap".
-#          Must be present if sync is "dirty-bitmap", must NOT be present
+# @bitmap: #optional the name of dirty bitmap if sync is "incremental".
+#          Must be present if sync is "incremental", must NOT be present
 #          otherwise. (Since 2.4)
 #
 # @on-source-error: #optional the action to take on an error on the source,
@@ -975,6 +975,11 @@
 # @on-target-error: #optional the action to take on an error on the target,
 #                   default 'report' (no limitations, since this applies to
 #                   a different block device than @device).
+# @unmap: #optional Whether to try to unmap target sectors where source has
+#         only zero. If true, and target unallocated sectors will read as zero,
+#         target image sectors will be unmapped; otherwise, zeroes will be
+#         written. Both will result in identical contents.
+#         Default is true. (Since 2.4)
 #
 # Returns: nothing on success
 #          If @device is not a valid block device, DeviceNotFound
@@ -987,7 +992,8 @@
             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
             '*speed': 'int', '*granularity': 'uint32',
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
-            '*on-target-error': 'BlockdevOnError' } }
+            '*on-target-error': 'BlockdevOnError',
+            '*unmap': 'bool' } }
 
 ##
 # @BlockDirtyBitmap
diff --git a/qemu-options.hx b/qemu-options.hx
index 987dbf0..7b8efbf 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2294,6 +2294,10 @@
 'iqn.2008-11.org.linux-kvm[:<name>]' but this can also be set from the command
 line or a configuration file.
 
+Since version Qemu 2.4 it is possible to specify a iSCSI request timeout to detect
+stalled requests and force a reestablishment of the session. The timeout
+is specified in seconds. The default is 0 which means no timeout. Libiscsi
+1.15.0 or greater is required for this feature.
 
 Example (without authentication):
 @example
@@ -2321,6 +2325,7 @@
     "-iscsi [user=user][,password=password]\n"
     "       [,header-digest=CRC32C|CR32C-NONE|NONE-CRC32C|NONE\n"
     "       [,initiator-name=initiator-iqn][,id=target-iqn]\n"
+    "       [,timeout=timeout]\n"
     "                iSCSI session parameters\n", QEMU_ARCH_ALL)
 STEXI
 
@@ -3345,20 +3350,22 @@
 ETEXI
 DEF("semihosting", 0, QEMU_OPTION_semihosting,
     "-semihosting    semihosting mode\n",
-    QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32)
+    QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32 |
+    QEMU_ARCH_MIPS)
 STEXI
 @item -semihosting
 @findex -semihosting
-Enable semihosting mode (ARM, M68K, Xtensa only).
+Enable semihosting mode (ARM, M68K, Xtensa, MIPS only).
 ETEXI
 DEF("semihosting-config", HAS_ARG, QEMU_OPTION_semihosting_config,
     "-semihosting-config [enable=on|off][,target=native|gdb|auto][,arg=str[,...]]\n" \
     "                semihosting configuration\n",
-QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32)
+QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32 |
+QEMU_ARCH_MIPS)
 STEXI
 @item -semihosting-config [enable=on|off][,target=native|gdb|auto][,arg=str[,...]]
 @findex -semihosting-config
-Enable and configure semihosting (ARM, M68K, Xtensa only).
+Enable and configure semihosting (ARM, M68K, Xtensa, MIPS only).
 @table @option
 @item target=@code{native|gdb|auto}
 Defines where the semihosting calls will be addressed, to QEMU (@code{native})
diff --git a/qmp-commands.hx b/qmp-commands.hx
index a05d25f..e1bcc60 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1137,10 +1137,10 @@
             (json-string, optional)
 - "sync": what parts of the disk image should be copied to the destination;
   possibilities include "full" for all the disk, "top" for only the sectors
-  allocated in the topmost image, "dirty-bitmap" for only the dirty sectors in
+  allocated in the topmost image, "incremental" for only the dirty sectors in
   the bitmap, or "none" to only replicate new I/O (MirrorSyncMode).
-- "bitmap": dirty bitmap name for sync==dirty-bitmap. Must be present if sync
-            is "dirty-bitmap", must NOT be present otherwise.
+- "bitmap": dirty bitmap name for sync==incremental. Must be present if sync
+            is "incremental", must NOT be present otherwise.
 - "mode": whether and how QEMU should create a new image
           (NewImageMode, optional, default 'absolute-paths')
 - "speed": the maximum speed, in bytes per second (json-int, optional)
@@ -1503,6 +1503,7 @@
         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
                       "node-name:s?,replaces:s?,"
                       "on-source-error:s?,on-target-error:s?,"
+                      "unmap:b?,"
                       "granularity:i?,buf-size:i?",
         .mhandler.cmd_new = qmp_marshal_input_drive_mirror,
     },
@@ -1542,6 +1543,8 @@
   (BlockdevOnError, default 'report')
 - "on-target-error": the action to take on an error on the target
   (BlockdevOnError, default 'report')
+- "unmap": whether the target sectors should be discarded where source has only
+  zeroes. (json-bool, optional, default true)
 
 The default value of the granularity is the image cluster size clamped
 between 4096 and 65536, if the image format defines one.  If the format
diff --git a/target-arm/arm-semi.c b/target-arm/arm-semi.c
index 74a67e9..a2a7369 100644
--- a/target-arm/arm-semi.c
+++ b/target-arm/arm-semi.c
@@ -436,12 +436,19 @@
             size_t input_size;
             size_t output_size;
             int status = 0;
+#if !defined(CONFIG_USER_ONLY)
+            const char *cmdline;
+#endif
             GET_ARG(0);
             GET_ARG(1);
             input_size = arg1;
             /* Compute the size of the output string.  */
 #if !defined(CONFIG_USER_ONLY)
-            output_size = strlen(semihosting_get_cmdline()) + 1;
+            cmdline = semihosting_get_cmdline();
+            if (cmdline == NULL) {
+                cmdline = ""; /* Default to an empty line. */
+            }
+            output_size = strlen(cmdline) + 1; /* Count terminating 0. */
 #else
             unsigned int i;
 
@@ -472,7 +479,7 @@
 
             /* Copy the command-line arguments.  */
 #if !defined(CONFIG_USER_ONLY)
-            pstrcpy(output_buffer, output_size, semihosting_get_cmdline());
+            pstrcpy(output_buffer, output_size, cmdline);
 #else
             if (output_size == 1) {
                 /* Empty command-line.  */
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index e30af06..08c95a3 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -533,6 +533,8 @@
 
         env->condexec_bits = 0;
     }
+    qemu_log_mask(CPU_LOG_INT, "...with ELR 0x%" PRIx64 "\n",
+                  env->elr_el[new_el]);
 
     pstate_write(env, PSTATE_DAIF | new_mode);
     env->aarch64 = 1;
diff --git a/target-arm/helper.c b/target-arm/helper.c
index aa34159..b87afe7 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2441,7 +2441,7 @@
     { .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4,
       .access = PL2_W, .type = ARM_CP_NO_RAW,
-      .writefn = tlbiall_write },
+      .writefn = tlbiall_is_write },
     { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
       .access = PL1_W, .type = ARM_CP_NO_RAW,
diff --git a/target-arm/helper.h b/target-arm/helper.h
index fc885de..827b33d 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -50,6 +50,7 @@
 DEF_HELPER_4(exception_with_syndrome, void, env, i32, i32, i32)
 DEF_HELPER_1(wfi, void, env)
 DEF_HELPER_1(wfe, void, env)
+DEF_HELPER_1(yield, void, env)
 DEF_HELPER_1(pre_hvc, void, env)
 DEF_HELPER_2(pre_smc, void, env, i32)
 
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 7fa32c4..663c05d 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -323,13 +323,25 @@
 
 void HELPER(wfe)(CPUARMState *env)
 {
-    CPUState *cs = CPU(arm_env_get_cpu(env));
-
-    /* Don't actually halt the CPU, just yield back to top
+    /* This is a hint instruction that is semantically different
+     * from YIELD even though we currently implement it identically.
+     * Don't actually halt the CPU, just yield back to top
      * level loop. This is not going into a "low power state"
      * (ie halting until some event occurs), so we never take
      * a configurable trap to a different exception level.
      */
+    HELPER(yield)(env);
+}
+
+void HELPER(yield)(CPUARMState *env)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
+
+    /* This is a non-trappable hint instruction that generally indicates
+     * that the guest is currently busy-looping. Yield control back to the
+     * top level loop so that a more deserving VCPU has a chance to run.
+     */
     cs->exception_index = EXCP_YIELD;
     cpu_loop_exit(cs);
 }
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index e077f2d..689f2be 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -1199,6 +1199,8 @@
         s->is_jmp = DISAS_WFI;
         return;
     case 1: /* YIELD */
+        s->is_jmp = DISAS_YIELD;
+        return;
     case 2: /* WFE */
         s->is_jmp = DISAS_WFE;
         return;
@@ -11107,6 +11109,10 @@
             gen_a64_set_pc_im(dc->pc);
             gen_helper_wfe(cpu_env);
             break;
+        case DISAS_YIELD:
+            gen_a64_set_pc_im(dc->pc);
+            gen_helper_yield(cpu_env);
+            break;
         case DISAS_WFI:
             /* This is a special case because we don't want to just halt the CPU
              * if trying to debug across a WFI.
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 971b6db..69ac18c 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4080,6 +4080,10 @@
 static void gen_nop_hint(DisasContext *s, int val)
 {
     switch (val) {
+    case 1: /* yield */
+        gen_set_pc_im(s, s->pc);
+        s->is_jmp = DISAS_YIELD;
+        break;
     case 3: /* wfi */
         gen_set_pc_im(s, s->pc);
         s->is_jmp = DISAS_WFI;
@@ -11459,6 +11463,9 @@
         case DISAS_WFE:
             gen_helper_wfe(cpu_env);
             break;
+        case DISAS_YIELD:
+            gen_helper_yield(cpu_env);
+            break;
         case DISAS_SWI:
             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
                           default_exception_el(dc));
diff --git a/target-arm/translate.h b/target-arm/translate.h
index bcdcf11..9ab978f 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -103,6 +103,7 @@
 #define DISAS_WFE 7
 #define DISAS_HVC 8
 #define DISAS_SMC 9
+#define DISAS_YIELD 10
 
 #ifdef TARGET_AARCH64
 void a64_translate_init(void);
diff --git a/target-m68k/m68k-qreg.h b/target-m68k/m68k-qreg.h
deleted file mode 100644
index c224d5e..0000000
--- a/target-m68k/m68k-qreg.h
+++ /dev/null
@@ -1,11 +0,0 @@
-enum {
-#define DEFO32(name, offset) QREG_##name,
-#define DEFR(name, reg, mode) QREG_##name,
-#define DEFF64(name, offset) QREG_##name,
-    QREG_NULL,
-#include "qregs.def"
-    TARGET_NUM_QREGS = 0x100
-#undef DEFO32
-#undef DEFR
-#undef DEFF64
-};
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index d6c478f..a57d241 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -129,7 +129,6 @@
     uint32_t fpcr;
     struct TranslationBlock *tb;
     int singlestep_enabled;
-    int is_mem;
     TCGv_i64 mactmp;
     int done_mac;
 } DisasContext;
@@ -179,7 +178,6 @@
 {
     TCGv tmp;
     int index = IS_USER(s);
-    s->is_mem = 1;
     tmp = tcg_temp_new_i32();
     switch(opsize) {
     case OS_BYTE:
@@ -209,7 +207,6 @@
 {
     TCGv_i64 tmp;
     int index = IS_USER(s);
-    s->is_mem = 1;
     tmp = tcg_temp_new_i64();
     tcg_gen_qemu_ldf64(tmp, addr, index);
     gen_throws_exception = gen_last_qop;
@@ -220,7 +217,6 @@
 static inline void gen_store(DisasContext *s, int opsize, TCGv addr, TCGv val)
 {
     int index = IS_USER(s);
-    s->is_mem = 1;
     switch(opsize) {
     case OS_BYTE:
         tcg_gen_qemu_st8(val, addr, index);
@@ -241,7 +237,6 @@
 static inline void gen_store64(DisasContext *s, TCGv addr, TCGv_i64 val)
 {
     int index = IS_USER(s);
-    s->is_mem = 1;
     tcg_gen_qemu_stf64(val, addr, index);
     gen_throws_exception = gen_last_qop;
 }
@@ -297,8 +292,7 @@
 
 /* Handle a base + index + displacement effective addresss.
    A NULL_QREG base means pc-relative.  */
-static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, int opsize,
-                            TCGv base)
+static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, TCGv base)
 {
     uint32_t offset;
     uint16_t ext;
@@ -529,7 +523,7 @@
         return tmp;
     case 6: /* Indirect index + displacement.  */
         reg = AREG(insn, 0);
-        return gen_lea_indexed(env, s, opsize, reg);
+        return gen_lea_indexed(env, s, reg);
     case 7: /* Other */
         switch (insn & 7) {
         case 0: /* Absolute short.  */
@@ -545,7 +539,7 @@
             s->pc += 2;
             return tcg_const_i32(offset);
         case 3: /* pc index+displacement.  */
-            return gen_lea_indexed(env, s, opsize, NULL_QREG);
+            return gen_lea_indexed(env, s, NULL_QREG);
         case 4: /* Immediate.  */
         default:
             return NULL_QREG;
@@ -2227,7 +2221,6 @@
             mask = 0x80;
             for (i = 0; i < 8; i++) {
                 if (ext & mask) {
-                    s->is_mem = 1;
                     dest = FREG(i, 0);
                     if (ext & (1 << 13)) {
                         /* store */
@@ -2999,7 +2992,6 @@
     dc->singlestep_enabled = cs->singlestep_enabled;
     dc->fpcr = env->fpcr;
     dc->user = (env->sr & SR_S) == 0;
-    dc->is_mem = 0;
     dc->done_mac = 0;
     lj = -1;
     num_insns = 0;
diff --git a/target-mips/Makefile.objs b/target-mips/Makefile.objs
index 108fd9b..bc5ed85 100644
--- a/target-mips/Makefile.objs
+++ b/target-mips/Makefile.objs
@@ -1,4 +1,4 @@
 obj-y += translate.o dsp_helper.o op_helper.o lmi_helper.o helper.o cpu.o
-obj-y += gdbstub.o msa_helper.o
+obj-y += gdbstub.o msa_helper.o mips-semi.o
 obj-$(CONFIG_SOFTMMU) += machine.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 8df98c7..2b28e87 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -1,6 +1,8 @@
 DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int)
 DEF_HELPER_2(raise_exception, noreturn, env, i32)
 
+DEF_HELPER_1(do_semihosting, void, env)
+
 #ifdef TARGET_MIPS64
 DEF_HELPER_4(sdl, void, env, tl, tl, int)
 DEF_HELPER_4(sdr, void, env, tl, tl, int)
diff --git a/target-mips/mips-semi.c b/target-mips/mips-semi.c
new file mode 100644
index 0000000..1162c76
--- /dev/null
+++ b/target-mips/mips-semi.c
@@ -0,0 +1,358 @@
+/*
+ * Unified Hosting Interface syscalls.
+ *
+ * Copyright (c) 2015 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/stat.h>
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "exec/softmmu-semi.h"
+#include "exec/semihost.h"
+
+typedef enum UHIOp {
+    UHI_exit = 1,
+    UHI_open = 2,
+    UHI_close = 3,
+    UHI_read = 4,
+    UHI_write = 5,
+    UHI_lseek = 6,
+    UHI_unlink = 7,
+    UHI_fstat = 8,
+    UHI_argc = 9,
+    UHI_argnlen = 10,
+    UHI_argn = 11,
+    UHI_plog = 13,
+    UHI_assert = 14,
+    UHI_pread = 19,
+    UHI_pwrite = 20,
+    UHI_link = 22
+} UHIOp;
+
+typedef struct UHIStat {
+    int16_t uhi_st_dev;
+    uint16_t uhi_st_ino;
+    uint32_t uhi_st_mode;
+    uint16_t uhi_st_nlink;
+    uint16_t uhi_st_uid;
+    uint16_t uhi_st_gid;
+    int16_t uhi_st_rdev;
+    uint64_t uhi_st_size;
+    uint64_t uhi_st_atime;
+    uint64_t uhi_st_spare1;
+    uint64_t uhi_st_mtime;
+    uint64_t uhi_st_spare2;
+    uint64_t uhi_st_ctime;
+    uint64_t uhi_st_spare3;
+    uint64_t uhi_st_blksize;
+    uint64_t uhi_st_blocks;
+    uint64_t uhi_st_spare4[2];
+} UHIStat;
+
+enum UHIOpenFlags {
+    UHIOpen_RDONLY = 0x0,
+    UHIOpen_WRONLY = 0x1,
+    UHIOpen_RDWR   = 0x2,
+    UHIOpen_APPEND = 0x8,
+    UHIOpen_CREAT  = 0x200,
+    UHIOpen_TRUNC  = 0x400,
+    UHIOpen_EXCL   = 0x800
+};
+
+/* Errno values taken from asm-mips/errno.h */
+static uint16_t host_to_mips_errno[] = {
+    [ENAMETOOLONG] = 78,
+#ifdef EOVERFLOW
+    [EOVERFLOW]    = 79,
+#endif
+#ifdef ELOOP
+    [ELOOP]        = 90,
+#endif
+};
+
+static int errno_mips(int err)
+{
+    if (err < 0 || err >= ARRAY_SIZE(host_to_mips_errno)) {
+        return EINVAL;
+    } else if (host_to_mips_errno[err]) {
+        return host_to_mips_errno[err];
+    } else {
+        return err;
+    }
+}
+
+static int copy_stat_to_target(CPUMIPSState *env, const struct stat *src,
+                               target_ulong vaddr)
+{
+    hwaddr len = sizeof(struct UHIStat);
+    UHIStat *dst = lock_user(VERIFY_WRITE, vaddr, len, 0);
+    if (!dst) {
+        errno = EFAULT;
+        return -1;
+    }
+
+    dst->uhi_st_dev = tswap16(src->st_dev);
+    dst->uhi_st_ino = tswap16(src->st_ino);
+    dst->uhi_st_mode = tswap32(src->st_mode);
+    dst->uhi_st_nlink = tswap16(src->st_nlink);
+    dst->uhi_st_uid = tswap16(src->st_uid);
+    dst->uhi_st_gid = tswap16(src->st_gid);
+    dst->uhi_st_rdev = tswap16(src->st_rdev);
+    dst->uhi_st_size = tswap64(src->st_size);
+    dst->uhi_st_atime = tswap64(src->st_atime);
+    dst->uhi_st_mtime = tswap64(src->st_mtime);
+    dst->uhi_st_ctime = tswap64(src->st_ctime);
+#ifdef _WIN32
+    dst->uhi_st_blksize = 0;
+    dst->uhi_st_blocks = 0;
+#else
+    dst->uhi_st_blksize = tswap64(src->st_blksize);
+    dst->uhi_st_blocks = tswap64(src->st_blocks);
+#endif
+    unlock_user(dst, vaddr, len);
+    return 0;
+}
+
+static int get_open_flags(target_ulong target_flags)
+{
+    int open_flags = 0;
+
+    if (target_flags & UHIOpen_RDWR) {
+        open_flags |= O_RDWR;
+    } else if (target_flags & UHIOpen_WRONLY) {
+        open_flags |= O_WRONLY;
+    } else {
+        open_flags |= O_RDONLY;
+    }
+
+    open_flags |= (target_flags & UHIOpen_APPEND) ? O_APPEND : 0;
+    open_flags |= (target_flags & UHIOpen_CREAT)  ? O_CREAT  : 0;
+    open_flags |= (target_flags & UHIOpen_TRUNC)  ? O_TRUNC  : 0;
+    open_flags |= (target_flags & UHIOpen_EXCL)   ? O_EXCL   : 0;
+
+    return open_flags;
+}
+
+static int write_to_file(CPUMIPSState *env, target_ulong fd, target_ulong vaddr,
+                         target_ulong len, target_ulong offset)
+{
+    int num_of_bytes;
+    void *dst = lock_user(VERIFY_READ, vaddr, len, 1);
+    if (!dst) {
+        errno = EFAULT;
+        return -1;
+    }
+
+    if (offset) {
+#ifdef _WIN32
+        num_of_bytes = 0;
+#else
+        num_of_bytes = pwrite(fd, dst, len, offset);
+#endif
+    } else {
+        num_of_bytes = write(fd, dst, len);
+    }
+
+    unlock_user(dst, vaddr, 0);
+    return num_of_bytes;
+}
+
+static int read_from_file(CPUMIPSState *env, target_ulong fd,
+                          target_ulong vaddr, target_ulong len,
+                          target_ulong offset)
+{
+    int num_of_bytes;
+    void *dst = lock_user(VERIFY_WRITE, vaddr, len, 0);
+    if (!dst) {
+        errno = EFAULT;
+        return -1;
+    }
+
+    if (offset) {
+#ifdef _WIN32
+        num_of_bytes = 0;
+#else
+        num_of_bytes = pread(fd, dst, len, offset);
+#endif
+    } else {
+        num_of_bytes = read(fd, dst, len);
+    }
+
+    unlock_user(dst, vaddr, len);
+    return num_of_bytes;
+}
+
+static int copy_argn_to_target(CPUMIPSState *env, int arg_num,
+                               target_ulong vaddr)
+{
+    int strsize = strlen(semihosting_get_arg(arg_num)) + 1;
+    char *dst = lock_user(VERIFY_WRITE, vaddr, strsize, 0);
+    if (!dst) {
+        return -1;
+    }
+
+    strcpy(dst, semihosting_get_arg(arg_num));
+
+    unlock_user(dst, vaddr, strsize);
+    return 0;
+}
+
+#define GET_TARGET_STRING(p, addr)              \
+    do {                                        \
+        p = lock_user_string(addr);             \
+        if (!p) {                               \
+            gpr[2] = -1;                        \
+            gpr[3] = EFAULT;                    \
+            goto uhi_done;                      \
+        }                                       \
+    } while (0)
+
+#define FREE_TARGET_STRING(p, gpr)              \
+    do {                                        \
+        unlock_user(p, gpr, 0);                 \
+    } while (0)
+
+void helper_do_semihosting(CPUMIPSState *env)
+{
+    target_ulong *gpr = env->active_tc.gpr;
+    const UHIOp op = gpr[25];
+    char *p, *p2;
+
+    switch (op) {
+    case UHI_exit:
+        qemu_log("UHI(%d): exit(%d)\n", op, (int)gpr[4]);
+        exit(gpr[4]);
+    case UHI_open:
+        GET_TARGET_STRING(p, gpr[4]);
+        if (!strcmp("/dev/stdin", p)) {
+            gpr[2] = 0;
+        } else if (!strcmp("/dev/stdout", p)) {
+            gpr[2] = 1;
+        } else if (!strcmp("/dev/stderr", p)) {
+            gpr[2] = 2;
+        } else {
+            gpr[2] = open(p, get_open_flags(gpr[5]), gpr[6]);
+            gpr[3] = errno_mips(errno);
+        }
+        FREE_TARGET_STRING(p, gpr[4]);
+        break;
+    case UHI_close:
+        if (gpr[4] < 3) {
+            /* ignore closing stdin/stdout/stderr */
+            gpr[2] = 0;
+            goto uhi_done;
+        }
+        gpr[2] = close(gpr[4]);
+        gpr[3] = errno_mips(errno);
+        break;
+    case UHI_read:
+        gpr[2] = read_from_file(env, gpr[4], gpr[5], gpr[6], 0);
+        gpr[3] = errno_mips(errno);
+        break;
+    case UHI_write:
+        gpr[2] = write_to_file(env, gpr[4], gpr[5], gpr[6], 0);
+        gpr[3] = errno_mips(errno);
+        break;
+    case UHI_lseek:
+        gpr[2] = lseek(gpr[4], gpr[5], gpr[6]);
+        gpr[3] = errno_mips(errno);
+        break;
+    case UHI_unlink:
+        GET_TARGET_STRING(p, gpr[4]);
+        gpr[2] = remove(p);
+        gpr[3] = errno_mips(errno);
+        FREE_TARGET_STRING(p, gpr[4]);
+        break;
+    case UHI_fstat:
+        {
+            struct stat sbuf;
+            memset(&sbuf, 0, sizeof(sbuf));
+            gpr[2] = fstat(gpr[4], &sbuf);
+            gpr[3] = errno_mips(errno);
+            if (gpr[2]) {
+                goto uhi_done;
+            }
+            gpr[2] = copy_stat_to_target(env, &sbuf, gpr[5]);
+            gpr[3] = errno_mips(errno);
+        }
+        break;
+    case UHI_argc:
+        gpr[2] = semihosting_get_argc();
+        break;
+    case UHI_argnlen:
+        if (gpr[4] >= semihosting_get_argc()) {
+            gpr[2] = -1;
+            goto uhi_done;
+        }
+        gpr[2] = strlen(semihosting_get_arg(gpr[4]));
+        break;
+    case UHI_argn:
+        if (gpr[4] >= semihosting_get_argc()) {
+            gpr[2] = -1;
+            goto uhi_done;
+        }
+        gpr[2] = copy_argn_to_target(env, gpr[4], gpr[5]);
+        break;
+    case UHI_plog:
+        GET_TARGET_STRING(p, gpr[4]);
+        p2 = strstr(p, "%d");
+        if (p2) {
+            int char_num = p2 - p;
+            char *buf = g_malloc(char_num + 1);
+            strncpy(buf, p, char_num);
+            buf[char_num] = '\0';
+            gpr[2] = printf("%s%d%s", buf, (int)gpr[5], p2 + 2);
+            g_free(buf);
+        } else {
+            gpr[2] = printf("%s", p);
+        }
+        FREE_TARGET_STRING(p, gpr[4]);
+        break;
+    case UHI_assert:
+        GET_TARGET_STRING(p, gpr[4]);
+        GET_TARGET_STRING(p2, gpr[5]);
+        printf("assertion '");
+        printf("\"%s\"", p);
+        printf("': file \"%s\", line %d\n", p2, (int)gpr[6]);
+        FREE_TARGET_STRING(p2, gpr[5]);
+        FREE_TARGET_STRING(p, gpr[4]);
+        abort();
+        break;
+    case UHI_pread:
+        gpr[2] = read_from_file(env, gpr[4], gpr[5], gpr[6], gpr[7]);
+        gpr[3] = errno_mips(errno);
+        break;
+    case UHI_pwrite:
+        gpr[2] = write_to_file(env, gpr[4], gpr[5], gpr[6], gpr[7]);
+        gpr[3] = errno_mips(errno);
+        break;
+#ifndef _WIN32
+    case UHI_link:
+        GET_TARGET_STRING(p, gpr[4]);
+        GET_TARGET_STRING(p2, gpr[5]);
+        gpr[2] = link(p, p2);
+        gpr[3] = errno_mips(errno);
+        FREE_TARGET_STRING(p2, gpr[5]);
+        FREE_TARGET_STRING(p, gpr[4]);
+        break;
+#endif
+    default:
+        fprintf(stderr, "Unknown UHI operation %d\n", op);
+        abort();
+    }
+uhi_done:
+    return;
+}
diff --git a/target-mips/translate.c b/target-mips/translate.c
index a2dbad5..7302857 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -29,6 +29,7 @@
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 #include "sysemu/kvm.h"
+#include "exec/semihost.h"
 
 #include "trace-tcg.h"
 
@@ -1429,6 +1430,7 @@
     uint64_t PAMask;
     bool mvh;
     int CP0_LLAddr_shift;
+    bool ps;
 } DisasContext;
 
 enum {
@@ -1723,6 +1725,15 @@
     return sum;
 }
 
+static inline void gen_move_low32(TCGv ret, TCGv_i64 arg)
+{
+#if defined(TARGET_MIPS64)
+    tcg_gen_ext32s_tl(ret, arg);
+#else
+    tcg_gen_trunc_i64_tl(ret, arg);
+#endif
+}
+
 static inline void check_cp0_enabled(DisasContext *ctx)
 {
     if (unlikely(!(ctx->hflags & MIPS_HFLAG_CP0)))
@@ -1816,6 +1827,16 @@
     }
 }
 
+/* This code generates a "reserved instruction" exception if the
+   CPU does not support 64-bit paired-single (PS) floating point data type */
+static inline void check_ps(DisasContext *ctx)
+{
+    if (unlikely(!ctx->ps)) {
+        generate_exception(ctx, EXCP_RI);
+    }
+    check_cp1_64bitmode(ctx);
+}
+
 #ifdef TARGET_MIPS64
 /* This code generates a "reserved instruction" exception if 64-bit
    instructions are not enabled. */
@@ -1849,7 +1870,7 @@
     TCGv_i##bits fp1 = tcg_temp_new_i##bits ();                               \
     switch (ifmt) {                                                           \
     case FMT_PS:                                                              \
-        check_cp1_64bitmode(ctx);                                             \
+        check_ps(ctx);                                                        \
         break;                                                                \
     case FMT_D:                                                               \
         if (abs) {                                                            \
@@ -3188,45 +3209,46 @@
     tcg_temp_free(t0);
 }
 
-static inline void gen_pcrel(DisasContext *ctx, int rs, int16_t imm)
+static inline void gen_pcrel(DisasContext *ctx, int opc, target_ulong pc,
+                             int rs)
 {
     target_long offset;
     target_long addr;
 
-    switch (MASK_OPC_PCREL_TOP2BITS(ctx->opcode)) {
+    switch (MASK_OPC_PCREL_TOP2BITS(opc)) {
     case OPC_ADDIUPC:
         if (rs != 0) {
             offset = sextract32(ctx->opcode << 2, 0, 21);
-            addr = addr_add(ctx, ctx->pc, offset);
+            addr = addr_add(ctx, pc, offset);
             tcg_gen_movi_tl(cpu_gpr[rs], addr);
         }
         break;
     case R6_OPC_LWPC:
         offset = sextract32(ctx->opcode << 2, 0, 21);
-        addr = addr_add(ctx, ctx->pc, offset);
+        addr = addr_add(ctx, pc, offset);
         gen_r6_ld(addr, rs, ctx->mem_idx, MO_TESL);
         break;
 #if defined(TARGET_MIPS64)
     case OPC_LWUPC:
         check_mips_64(ctx);
         offset = sextract32(ctx->opcode << 2, 0, 21);
-        addr = addr_add(ctx, ctx->pc, offset);
+        addr = addr_add(ctx, pc, offset);
         gen_r6_ld(addr, rs, ctx->mem_idx, MO_TEUL);
         break;
 #endif
     default:
-        switch (MASK_OPC_PCREL_TOP5BITS(ctx->opcode)) {
+        switch (MASK_OPC_PCREL_TOP5BITS(opc)) {
         case OPC_AUIPC:
             if (rs != 0) {
-                offset = imm << 16;
-                addr = addr_add(ctx, ctx->pc, offset);
+                offset = sextract32(ctx->opcode, 0, 16) << 16;
+                addr = addr_add(ctx, pc, offset);
                 tcg_gen_movi_tl(cpu_gpr[rs], addr);
             }
             break;
         case OPC_ALUIPC:
             if (rs != 0) {
-                offset = imm << 16;
-                addr = ~0xFFFF & addr_add(ctx, ctx->pc, offset);
+                offset = sextract32(ctx->opcode, 0, 16) << 16;
+                addr = ~0xFFFF & addr_add(ctx, pc, offset);
                 tcg_gen_movi_tl(cpu_gpr[rs], addr);
             }
             break;
@@ -3237,7 +3259,7 @@
         case R6_OPC_LDPC + (3 << 16):
             check_mips_64(ctx);
             offset = sextract32(ctx->opcode << 3, 0, 21);
-            addr = addr_add(ctx, (ctx->pc & ~0x7), offset);
+            addr = addr_add(ctx, (pc & ~0x7), offset);
             gen_r6_ld(addr, rs, ctx->mem_idx, MO_TEQ);
             break;
 #endif
@@ -4845,17 +4867,94 @@
     tcg_temp_free(t0);
 }
 
-#ifndef CONFIG_USER_ONLY
-/* CP0 (MMU and control) */
-static inline void gen_move_low32(TCGv ret, TCGv_i64 arg)
+static void gen_lsa(DisasContext *ctx, int opc, int rd, int rs, int rt,
+                    int imm2)
 {
-#if defined(TARGET_MIPS64)
-    tcg_gen_ext32s_tl(ret, arg);
-#else
-    tcg_gen_trunc_i64_tl(ret, arg);
-#endif
+    TCGv t0;
+    TCGv t1;
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+    tcg_gen_shli_tl(t0, t0, imm2 + 1);
+    tcg_gen_add_tl(cpu_gpr[rd], t0, t1);
+    if (opc == OPC_LSA) {
+        tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+    }
+
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+
+    return;
 }
 
+static void gen_align(DisasContext *ctx, int opc, int rd, int rs, int rt,
+                      int bp)
+{
+    TCGv t0;
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rt);
+    if (bp == 0) {
+        tcg_gen_mov_tl(cpu_gpr[rd], t0);
+    } else {
+        TCGv t1 = tcg_temp_new();
+        gen_load_gpr(t1, rs);
+        switch (opc) {
+        case OPC_ALIGN:
+            {
+                TCGv_i64 t2 = tcg_temp_new_i64();
+                tcg_gen_concat_tl_i64(t2, t1, t0);
+                tcg_gen_shri_i64(t2, t2, 8 * (4 - bp));
+                gen_move_low32(cpu_gpr[rd], t2);
+                tcg_temp_free_i64(t2);
+            }
+            break;
+#if defined(TARGET_MIPS64)
+        case OPC_DALIGN:
+            tcg_gen_shli_tl(t0, t0, 8 * bp);
+            tcg_gen_shri_tl(t1, t1, 8 * (8 - bp));
+            tcg_gen_or_tl(cpu_gpr[rd], t1, t0);
+            break;
+#endif
+        }
+        tcg_temp_free(t1);
+    }
+
+    tcg_temp_free(t0);
+}
+
+static void gen_bitswap(DisasContext *ctx, int opc, int rd, int rt)
+{
+    TCGv t0;
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rt);
+    switch (opc) {
+    case OPC_BITSWAP:
+        gen_helper_bitswap(cpu_gpr[rd], t0);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DBITSWAP:
+        gen_helper_dbitswap(cpu_gpr[rd], t0);
+        break;
+#endif
+    }
+    tcg_temp_free(t0);
+}
+
+#ifndef CONFIG_USER_ONLY
+/* CP0 (MMU and control) */
 static inline void gen_mthc0_entrylo(TCGv arg, target_ulong off)
 {
     TCGv_i64 t0 = tcg_temp_new_i64();
@@ -8344,7 +8443,8 @@
 
 /* R6 CP1 Branches */
 static void gen_compute_branch1_r6(DisasContext *ctx, uint32_t op,
-                                   int32_t ft, int32_t offset)
+                                   int32_t ft, int32_t offset,
+                                   int delayslot_size)
 {
     target_ulong btarget;
     const char *opn = "cp1 cond branch";
@@ -8387,7 +8487,15 @@
     MIPS_DEBUG("%s: cond %02x target " TARGET_FMT_lx, opn,
                ctx->hflags, btarget);
     ctx->btarget = btarget;
-    ctx->hflags |= MIPS_HFLAG_BDS32;
+
+    switch (delayslot_size) {
+    case 2:
+        ctx->hflags |= MIPS_HFLAG_BDS16;
+        break;
+    case 4:
+        ctx->hflags |= MIPS_HFLAG_BDS32;
+        break;
+    }
 
 out:
     tcg_temp_free_i64(t0);
@@ -8912,7 +9020,6 @@
     };
     enum { BINOP, CMPOP, OTHEROP } optype = OTHEROP;
     uint32_t func = ctx->opcode & 0x3f;
-
     switch (op1) {
     case OPC_ADD_S:
         {
@@ -9405,8 +9512,7 @@
         opn = "cvt.l.s";
         break;
     case OPC_CVT_PS_S:
-        check_insn_opc_removed(ctx, ISA_MIPS32R6);
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp64 = tcg_temp_new_i64();
             TCGv_i32 fp32_0 = tcg_temp_new_i32();
@@ -10023,8 +10129,7 @@
         opn = "cvt.d.l";
         break;
     case OPC_CVT_PS_PW:
-        check_insn_opc_removed(ctx, ISA_MIPS32R6);
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
@@ -10036,7 +10141,7 @@
         opn = "cvt.ps.pw";
         break;
     case OPC_ADD_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
@@ -10051,7 +10156,7 @@
         opn = "add.ps";
         break;
     case OPC_SUB_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
@@ -10066,7 +10171,7 @@
         opn = "sub.ps";
         break;
     case OPC_MUL_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
@@ -10081,7 +10186,7 @@
         opn = "mul.ps";
         break;
     case OPC_ABS_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
@@ -10093,7 +10198,7 @@
         opn = "abs.ps";
         break;
     case OPC_MOV_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
@@ -10104,7 +10209,7 @@
         opn = "mov.ps";
         break;
     case OPC_NEG_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
@@ -10116,12 +10221,12 @@
         opn = "neg.ps";
         break;
     case OPC_MOVCF_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         gen_movcf_ps(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
         opn = "movcf.ps";
         break;
     case OPC_MOVZ_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGLabel *l1 = gen_new_label();
             TCGv_i64 fp0;
@@ -10137,7 +10242,7 @@
         opn = "movz.ps";
         break;
     case OPC_MOVN_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGLabel *l1 = gen_new_label();
             TCGv_i64 fp0;
@@ -10154,7 +10259,7 @@
         opn = "movn.ps";
         break;
     case OPC_ADDR_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
@@ -10169,7 +10274,7 @@
         opn = "addr.ps";
         break;
     case OPC_MULR_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
@@ -10184,7 +10289,7 @@
         opn = "mulr.ps";
         break;
     case OPC_RECIP2_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
@@ -10199,7 +10304,7 @@
         opn = "recip2.ps";
         break;
     case OPC_RECIP1_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
@@ -10211,7 +10316,7 @@
         opn = "recip1.ps";
         break;
     case OPC_RSQRT1_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
@@ -10223,7 +10328,7 @@
         opn = "rsqrt1.ps";
         break;
     case OPC_RSQRT2_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
@@ -10250,7 +10355,7 @@
         opn = "cvt.s.pu";
         break;
     case OPC_CVT_PW_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
@@ -10274,7 +10379,7 @@
         opn = "cvt.s.pl";
         break;
     case OPC_PLL_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
@@ -10289,7 +10394,7 @@
         opn = "pll.ps";
         break;
     case OPC_PLU_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
@@ -10304,7 +10409,7 @@
         opn = "plu.ps";
         break;
     case OPC_PUL_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
@@ -10319,7 +10424,7 @@
         opn = "pul.ps";
         break;
     case OPC_PUU_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
@@ -10478,7 +10583,7 @@
 
     switch (opc) {
     case OPC_ALNV_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv t0 = tcg_temp_local_new();
             TCGv_i32 fp = tcg_temp_new_i32();
@@ -10553,7 +10658,7 @@
         opn = "madd.d";
         break;
     case OPC_MADD_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
@@ -10608,7 +10713,7 @@
         opn = "msub.d";
         break;
     case OPC_MSUB_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
@@ -10663,7 +10768,7 @@
         opn = "nmadd.d";
         break;
     case OPC_NMADD_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
@@ -10718,7 +10823,7 @@
         opn = "nmsub.d";
         break;
     case OPC_NMSUB_PS:
-        check_cp1_64bitmode(ctx);
+        check_ps(ctx);
         {
             TCGv_i64 fp0 = tcg_temp_new_i64();
             TCGv_i64 fp1 = tcg_temp_new_i64();
@@ -10884,6 +10989,243 @@
     }
 }
 
+/* Compact Branches */
+static void gen_compute_compact_branch(DisasContext *ctx, uint32_t opc,
+                                       int rs, int rt, int32_t offset)
+{
+    int bcond_compute = 0;
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    int m16_lowbit = (ctx->hflags & MIPS_HFLAG_M16) != 0;
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+#ifdef MIPS_DEBUG_DISAS
+        LOG_DISAS("Branch in delay / forbidden slot at PC 0x" TARGET_FMT_lx
+                  "\n", ctx->pc);
+#endif
+        generate_exception(ctx, EXCP_RI);
+        goto out;
+    }
+
+    /* Load needed operands and calculate btarget */
+    switch (opc) {
+    /* compact branch */
+    case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC */
+    case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
+        gen_load_gpr(t0, rs);
+        gen_load_gpr(t1, rt);
+        bcond_compute = 1;
+        ctx->btarget = addr_add(ctx, ctx->pc + 4, offset);
+        if (rs <= rt && rs == 0) {
+            /* OPC_BEQZALC, OPC_BNEZALC */
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 4 + m16_lowbit);
+        }
+        break;
+    case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC */
+    case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC */
+        gen_load_gpr(t0, rs);
+        gen_load_gpr(t1, rt);
+        bcond_compute = 1;
+        ctx->btarget = addr_add(ctx, ctx->pc + 4, offset);
+        break;
+    case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC */
+    case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC */
+        if (rs == 0 || rs == rt) {
+            /* OPC_BLEZALC, OPC_BGEZALC */
+            /* OPC_BGTZALC, OPC_BLTZALC */
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 4 + m16_lowbit);
+        }
+        gen_load_gpr(t0, rs);
+        gen_load_gpr(t1, rt);
+        bcond_compute = 1;
+        ctx->btarget = addr_add(ctx, ctx->pc + 4, offset);
+        break;
+    case OPC_BC:
+    case OPC_BALC:
+        ctx->btarget = addr_add(ctx, ctx->pc + 4, offset);
+        break;
+    case OPC_BEQZC:
+    case OPC_BNEZC:
+        if (rs != 0) {
+            /* OPC_BEQZC, OPC_BNEZC */
+            gen_load_gpr(t0, rs);
+            bcond_compute = 1;
+            ctx->btarget = addr_add(ctx, ctx->pc + 4, offset);
+        } else {
+            /* OPC_JIC, OPC_JIALC */
+            TCGv tbase = tcg_temp_new();
+            TCGv toffset = tcg_temp_new();
+
+            gen_load_gpr(tbase, rt);
+            tcg_gen_movi_tl(toffset, offset);
+            gen_op_addr_add(ctx, btarget, tbase, toffset);
+            tcg_temp_free(tbase);
+            tcg_temp_free(toffset);
+        }
+        break;
+    default:
+        MIPS_INVAL("Compact branch/jump");
+        generate_exception(ctx, EXCP_RI);
+        goto out;
+    }
+
+    if (bcond_compute == 0) {
+        /* Uncoditional compact branch */
+        switch (opc) {
+        case OPC_JIALC:
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 4 + m16_lowbit);
+            /* Fallthrough */
+        case OPC_JIC:
+            ctx->hflags |= MIPS_HFLAG_BR;
+            break;
+        case OPC_BALC:
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 4 + m16_lowbit);
+            /* Fallthrough */
+        case OPC_BC:
+            ctx->hflags |= MIPS_HFLAG_B;
+            break;
+        default:
+            MIPS_INVAL("Compact branch/jump");
+            generate_exception(ctx, EXCP_RI);
+            goto out;
+        }
+
+        /* Generating branch here as compact branches don't have delay slot */
+        gen_branch(ctx, 4);
+    } else {
+        /* Conditional compact branch */
+        TCGLabel *fs = gen_new_label();
+        save_cpu_state(ctx, 0);
+
+        switch (opc) {
+        case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC */
+            if (rs == 0 && rt != 0) {
+                /* OPC_BLEZALC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LE), t1, 0, fs);
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* OPC_BGEZALC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GE), t1, 0, fs);
+            } else {
+                /* OPC_BGEUC */
+                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_GEU), t0, t1, fs);
+            }
+            break;
+        case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC */
+            if (rs == 0 && rt != 0) {
+                /* OPC_BGTZALC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GT), t1, 0, fs);
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* OPC_BLTZALC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LT), t1, 0, fs);
+            } else {
+                /* OPC_BLTUC */
+                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_LTU), t0, t1, fs);
+            }
+            break;
+        case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC */
+            if (rs == 0 && rt != 0) {
+                /* OPC_BLEZC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LE), t1, 0, fs);
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* OPC_BGEZC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GE), t1, 0, fs);
+            } else {
+                /* OPC_BGEC */
+                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_GE), t0, t1, fs);
+            }
+            break;
+        case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC */
+            if (rs == 0 && rt != 0) {
+                /* OPC_BGTZC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GT), t1, 0, fs);
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* OPC_BLTZC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LT), t1, 0, fs);
+            } else {
+                /* OPC_BLTC */
+                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_LT), t0, t1, fs);
+            }
+            break;
+        case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC */
+        case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
+            if (rs >= rt) {
+                /* OPC_BOVC, OPC_BNVC */
+                TCGv t2 = tcg_temp_new();
+                TCGv t3 = tcg_temp_new();
+                TCGv t4 = tcg_temp_new();
+                TCGv input_overflow = tcg_temp_new();
+
+                gen_load_gpr(t0, rs);
+                gen_load_gpr(t1, rt);
+                tcg_gen_ext32s_tl(t2, t0);
+                tcg_gen_setcond_tl(TCG_COND_NE, input_overflow, t2, t0);
+                tcg_gen_ext32s_tl(t3, t1);
+                tcg_gen_setcond_tl(TCG_COND_NE, t4, t3, t1);
+                tcg_gen_or_tl(input_overflow, input_overflow, t4);
+
+                tcg_gen_add_tl(t4, t2, t3);
+                tcg_gen_ext32s_tl(t4, t4);
+                tcg_gen_xor_tl(t2, t2, t3);
+                tcg_gen_xor_tl(t3, t4, t3);
+                tcg_gen_andc_tl(t2, t3, t2);
+                tcg_gen_setcondi_tl(TCG_COND_LT, t4, t2, 0);
+                tcg_gen_or_tl(t4, t4, input_overflow);
+                if (opc == OPC_BOVC) {
+                    /* OPC_BOVC */
+                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_NE), t4, 0, fs);
+                } else {
+                    /* OPC_BNVC */
+                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t4, 0, fs);
+                }
+                tcg_temp_free(input_overflow);
+                tcg_temp_free(t4);
+                tcg_temp_free(t3);
+                tcg_temp_free(t2);
+            } else if (rs < rt && rs == 0) {
+                /* OPC_BEQZALC, OPC_BNEZALC */
+                if (opc == OPC_BEQZALC) {
+                    /* OPC_BEQZALC */
+                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t1, 0, fs);
+                } else {
+                    /* OPC_BNEZALC */
+                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_NE), t1, 0, fs);
+                }
+            } else {
+                /* OPC_BEQC, OPC_BNEC */
+                if (opc == OPC_BEQC) {
+                    /* OPC_BEQC */
+                    tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_EQ), t0, t1, fs);
+                } else {
+                    /* OPC_BNEC */
+                    tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_NE), t0, t1, fs);
+                }
+            }
+            break;
+        case OPC_BEQZC:
+            tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t0, 0, fs);
+            break;
+        case OPC_BNEZC:
+            tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_NE), t0, 0, fs);
+            break;
+        default:
+            MIPS_INVAL("Compact conditional branch/jump");
+            generate_exception(ctx, EXCP_RI);
+            goto out;
+        }
+
+        /* Generating branch here as compact branches don't have delay slot */
+        gen_goto_tb(ctx, 1, ctx->btarget);
+        gen_set_label(fs);
+
+        ctx->hflags |= MIPS_HFLAG_FBNSLOT;
+        MIPS_DEBUG("Compact conditional branch");
+    }
+
+out:
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
 /* ISA extensions (ASEs) */
 /* MIPS16 extension to MIPS32 */
 
@@ -11549,6 +11891,15 @@
     return 4;
 }
 
+static inline bool is_uhi(int sdbbp_code)
+{
+#ifdef CONFIG_USER_ONLY
+    return false;
+#else
+    return semihosting_enabled() && sdbbp_code == 1;
+#endif
+}
+
 static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
 {
     int rx, ry;
@@ -11848,13 +12199,13 @@
             }
             break;
         case RR_SDBBP:
-            /* XXX: not clear which exception should be raised
-             *      when in debug mode...
-             */
-            check_insn(ctx, ISA_MIPS32);
-            if (!(ctx->hflags & MIPS_HFLAG_DM)) {
-                generate_exception(ctx, EXCP_DBp);
+            if (is_uhi(extract32(ctx->opcode, 5, 6))) {
+                gen_helper_do_semihosting(cpu_env);
             } else {
+                /* XXX: not clear which exception should be raised
+                 *      when in debug mode...
+                 */
+                check_insn(ctx, ISA_MIPS32);
                 generate_exception(ctx, EXCP_DBp);
             }
             break;
@@ -12038,6 +12389,8 @@
     LBU16 = 0x02,
     MOVE16 = 0x03,
     ADDI32 = 0x04,
+    R6_LUI = 0x04,
+    AUI = 0x04,
     LBU32 = 0x05,
     SB32 = 0x06,
     LB32 = 0x07,
@@ -12060,56 +12413,88 @@
     POOL32S = 0x16,  /* MIPS64 */
     DADDIU32 = 0x17, /* MIPS64 */
 
-    /* 0x1f is reserved */
     POOL32C = 0x18,
     LWGP16 = 0x19,
     LW16 = 0x1a,
     POOL16E = 0x1b,
     XORI32 = 0x1c,
     JALS32 = 0x1d,
+    BOVC = 0x1d,
+    BEQC = 0x1d,
+    BEQZALC = 0x1d,
     ADDIUPC = 0x1e,
+    PCREL = 0x1e,
+    BNVC = 0x1f,
+    BNEC = 0x1f,
+    BNEZALC = 0x1f,
 
-    /* 0x20 is reserved */
-    RES_20 = 0x20,
+    R6_BEQZC = 0x20,
+    JIC = 0x20,
     POOL16F = 0x21,
     SB16 = 0x22,
     BEQZ16 = 0x23,
+    BEQZC16 = 0x23,
     SLTI32 = 0x24,
     BEQ32 = 0x25,
+    BC = 0x25,
     SWC132 = 0x26,
     LWC132 = 0x27,
 
-    /* 0x28 and 0x29 are reserved */
-    RES_28 = 0x28,
+    /* 0x29 is reserved */
     RES_29 = 0x29,
+    R6_BNEZC = 0x28,
+    JIALC = 0x28,
     SH16 = 0x2a,
     BNEZ16 = 0x2b,
+    BNEZC16 = 0x2b,
     SLTIU32 = 0x2c,
     BNE32 = 0x2d,
+    BALC = 0x2d,
     SDC132 = 0x2e,
     LDC132 = 0x2f,
 
-    /* 0x30 and 0x31 are reserved */
-    RES_30 = 0x30,
+    /* 0x31 is reserved */
     RES_31 = 0x31,
+    BLEZALC = 0x30,
+    BGEZALC = 0x30,
+    BGEUC = 0x30,
     SWSP16 = 0x32,
     B16 = 0x33,
+    BC16 = 0x33,
     ANDI32 = 0x34,
     J32 = 0x35,
+    BGTZC = 0x35,
+    BLTZC = 0x35,
+    BLTC = 0x35,
     SD32 = 0x36, /* MIPS64 */
     LD32 = 0x37, /* MIPS64 */
 
-    /* 0x38 and 0x39 are reserved */
-    RES_38 = 0x38,
+    /* 0x39 is reserved */
     RES_39 = 0x39,
+    BGTZALC = 0x38,
+    BLTZALC = 0x38,
+    BLTUC = 0x38,
     SW16 = 0x3a,
     LI16 = 0x3b,
     JALX32 = 0x3c,
     JAL32 = 0x3d,
+    BLEZC = 0x3d,
+    BGEZC = 0x3d,
+    BGEC = 0x3d,
     SW32 = 0x3e,
     LW32 = 0x3f
 };
 
+/* PCREL Instructions perform PC-Relative address calculation. bits 20..16 */
+enum {
+    ADDIUPC_00 = 0x00,
+    ADDIUPC_07 = 0x07,
+    AUIPC = 0x1e,
+    ALUIPC = 0x1f,
+    LWPC_08 = 0x08,
+    LWPC_0F = 0x0F,
+};
+
 /* POOL32A encoding of minor opcode field */
 
 enum {
@@ -12119,6 +12504,8 @@
     SRL32 = 0x1,
     SRA = 0x2,
     ROTR = 0x3,
+    SELEQZ = 0x5,
+    SELNEZ = 0x6,
 
     SLLV = 0x0,
     SRLV = 0x1,
@@ -12137,11 +12524,21 @@
     SLTU = 0xe,
 
     MOVN = 0x0,
+    R6_MUL  = 0x0,
     MOVZ = 0x1,
+    MUH  = 0x1,
+    MULU = 0x2,
+    MUHU = 0x3,
     LWXS = 0x4,
+    R6_DIV  = 0x4,
+    MOD  = 0x5,
+    R6_DIVU = 0x6,
+    MODU = 0x7,
 
     /* The following can be distinguished by their lower 6 bits. */
     INS = 0x0c,
+    LSA = 0x0f,
+    ALIGN = 0x1f,
     EXT = 0x2c,
     POOL32AXF = 0x3c
 };
@@ -12194,6 +12591,7 @@
     /* end of microMIPS32 DSP */
 
     /* bits 15..12 for 0x2c */
+    BITSWAP = 0x0,
     SEB = 0x2,
     SEH = 0x3,
     CLO = 0x4,
@@ -12220,7 +12618,10 @@
     /* bits 15..12 for 0x3c */
     JALR = 0x0,
     JR = 0x0,                   /* alias */
+    JALRC = 0x0,
+    JRC = 0x0,
     JALR_HB = 0x1,
+    JALRC_HB = 0x1,
     JALRS = 0x4,
     JALRS_HB = 0x5,
 
@@ -12233,6 +12634,8 @@
     TLBR = 0x1,
     TLBWI = 0x2,
     TLBWR = 0x3,
+    TLBINV = 0x4,
+    TLBINVF = 0x5,
     WAIT = 0x9,
     IRET = 0xd,
     DERET = 0xe,
@@ -12302,32 +12705,39 @@
 enum {
     /* These are the bit 7..6 values */
     ADD_FMT = 0x0,
-    MOVN_FMT = 0x0,
 
     SUB_FMT = 0x1,
-    MOVZ_FMT = 0x1,
 
     MUL_FMT = 0x2,
 
     DIV_FMT = 0x3,
 
     /* These are the bit 8..6 values */
+    MOVN_FMT = 0x0,
     RSQRT2_FMT = 0x0,
     MOVF_FMT = 0x0,
+    RINT_FMT = 0x0,
+    SELNEZ_FMT = 0x0,
 
+    MOVZ_FMT = 0x1,
     LWXC1 = 0x1,
     MOVT_FMT = 0x1,
+    CLASS_FMT = 0x1,
+    SELEQZ_FMT = 0x1,
 
     PLL_PS = 0x2,
     SWXC1 = 0x2,
+    SEL_FMT = 0x2,
 
     PLU_PS = 0x3,
     LDXC1 = 0x3,
 
+    MOVN_FMT_04 = 0x4,
     PUL_PS = 0x4,
     SDXC1 = 0x4,
     RECIP2_FMT = 0x4,
 
+    MOVZ_FMT_05 = 0x05,
     PUU_PS = 0x5,
     LUXC1 = 0x5,
 
@@ -12335,8 +12745,10 @@
     SUXC1 = 0x6,
     ADDR_PS = 0x6,
     PREFX = 0x6,
+    MADDF_FMT = 0x6,
 
     MULR_PS = 0x7,
+    MSUBF_FMT = 0x7,
 
     MADD_S = 0x01,
     MADD_D = 0x09,
@@ -12353,10 +12765,17 @@
     NMSUB_D = 0x2a,
     NMSUB_PS = 0x32,
 
+    MIN_FMT = 0x3,
+    MAX_FMT = 0xb,
+    MINA_FMT = 0x23,
+    MAXA_FMT = 0x2b,
     POOL32FXF = 0x3b,
 
     CABS_COND_FMT = 0x1c,              /* MIPS3D */
-    C_COND_FMT = 0x3c
+    C_COND_FMT = 0x3c,
+
+    CMP_CONDN_S = 0x5,
+    CMP_CONDN_D = 0x15
 };
 
 /* POOL32Fxf encoding of minor opcode extension field */
@@ -12409,10 +12828,15 @@
     BGTZ = 0x06,
     BEQZC = 0x07,
     TLTI = 0x08,
+    BC1EQZC = 0x08,
     TGEI = 0x09,
+    BC1NEZC = 0x09,
     TLTIU = 0x0a,
+    BC2EQZC = 0x0a,
     TGEIU = 0x0b,
+    BC2NEZC = 0x0a,
     TNEI = 0x0c,
+    R6_SYNCI = 0x0c,
     LUI = 0x0d,
     TEQI = 0x0e,
     SYNCI = 0x10,
@@ -12465,6 +12889,26 @@
     JRADDIUSP = 0x30
 };
 
+/* R6 POOL16C encoding of minor opcode field (bits 0..5) */
+
+enum {
+    R6_NOT16    = 0x00,
+    R6_AND16    = 0x01,
+    R6_LWM16    = 0x02,
+    R6_JRC16    = 0x03,
+    MOVEP       = 0x04,
+    MOVEP_07    = 0x07,
+    R6_XOR16    = 0x08,
+    R6_OR16     = 0x09,
+    R6_SWM16    = 0x0a,
+    JALRC16     = 0x0b,
+    MOVEP_0C    = 0x0c,
+    MOVEP_0F    = 0x0f,
+    JRCADDIUSP  = 0x13,
+    R6_BREAK16  = 0x1b,
+    R6_SDBBP16  = 0x3b
+};
+
 /* POOL16D encoding of minor opcode field */
 
 enum {
@@ -12703,13 +13147,13 @@
         generate_exception(ctx, EXCP_BREAK);
         break;
     case SDBBP16:
-        /* XXX: not clear which exception should be raised
-         *      when in debug mode...
-         */
-        check_insn(ctx, ISA_MIPS32);
-        if (!(ctx->hflags & MIPS_HFLAG_DM)) {
-            generate_exception(ctx, EXCP_DBp);
+        if (is_uhi(extract32(ctx->opcode, 0, 4))) {
+            gen_helper_do_semihosting(cpu_env);
         } else {
+            /* XXX: not clear which exception should be raised
+             *      when in debug mode...
+             */
+            check_insn(ctx, ISA_MIPS32);
             generate_exception(ctx, EXCP_DBp);
         }
         break;
@@ -12729,6 +13173,110 @@
     }
 }
 
+static inline void gen_movep(DisasContext *ctx, int enc_dest, int enc_rt,
+                             int enc_rs)
+{
+    int rd, rs, re, rt;
+    static const int rd_enc[] = { 5, 5, 6, 4, 4, 4, 4, 4 };
+    static const int re_enc[] = { 6, 7, 7, 21, 22, 5, 6, 7 };
+    static const int rs_rt_enc[] = { 0, 17, 2, 3, 16, 18, 19, 20 };
+    rd = rd_enc[enc_dest];
+    re = re_enc[enc_dest];
+    rs = rs_rt_enc[enc_rs];
+    rt = rs_rt_enc[enc_rt];
+    if (rs) {
+        tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+    } else {
+        tcg_gen_movi_tl(cpu_gpr[rd], 0);
+    }
+    if (rt) {
+        tcg_gen_mov_tl(cpu_gpr[re], cpu_gpr[rt]);
+    } else {
+        tcg_gen_movi_tl(cpu_gpr[re], 0);
+    }
+}
+
+static void gen_pool16c_r6_insn(DisasContext *ctx)
+{
+    int rt = mmreg((ctx->opcode >> 7) & 0x7);
+    int rs = mmreg((ctx->opcode >> 4) & 0x7);
+
+    switch (ctx->opcode & 0xf) {
+    case R6_NOT16:
+        gen_logic(ctx, OPC_NOR, rt, rs, 0);
+        break;
+    case R6_AND16:
+        gen_logic(ctx, OPC_AND, rt, rt, rs);
+        break;
+    case R6_LWM16:
+        {
+            int lwm_converted = 0x11 + extract32(ctx->opcode, 8, 2);
+            int offset = extract32(ctx->opcode, 4, 4);
+            gen_ldst_multiple(ctx, LWM32, lwm_converted, 29, offset << 2);
+        }
+        break;
+    case R6_JRC16: /* JRCADDIUSP */
+        if ((ctx->opcode >> 4) & 1) {
+            /* JRCADDIUSP */
+            int imm = extract32(ctx->opcode, 5, 5);
+            gen_compute_branch(ctx, OPC_JR, 2, 31, 0, 0, 0);
+            gen_arith_imm(ctx, OPC_ADDIU, 29, 29, imm << 2);
+        } else {
+            /* JRC16 */
+            int rs = extract32(ctx->opcode, 5, 5);
+            gen_compute_branch(ctx, OPC_JR, 2, rs, 0, 0, 0);
+        }
+        break;
+    case MOVEP ... MOVEP_07:
+    case MOVEP_0C ... MOVEP_0F:
+        {
+            int enc_dest = uMIPS_RD(ctx->opcode);
+            int enc_rt = uMIPS_RS2(ctx->opcode);
+            int enc_rs = (ctx->opcode & 3) | ((ctx->opcode >> 1) & 4);
+            gen_movep(ctx, enc_dest, enc_rt, enc_rs);
+        }
+        break;
+    case R6_XOR16:
+        gen_logic(ctx, OPC_XOR, rt, rt, rs);
+        break;
+    case R6_OR16:
+        gen_logic(ctx, OPC_OR, rt, rt, rs);
+        break;
+    case R6_SWM16:
+        {
+            int swm_converted = 0x11 + extract32(ctx->opcode, 8, 2);
+            int offset = extract32(ctx->opcode, 4, 4);
+            gen_ldst_multiple(ctx, SWM32, swm_converted, 29, offset << 2);
+        }
+        break;
+    case JALRC16: /* BREAK16, SDBBP16 */
+        switch (ctx->opcode & 0x3f) {
+        case JALRC16:
+        case JALRC16 + 0x20:
+            /* JALRC16 */
+            gen_compute_branch(ctx, OPC_JALR, 2, (ctx->opcode >> 5) & 0x1f,
+                               31, 0, 0);
+            break;
+        case R6_BREAK16:
+            /* BREAK16 */
+            generate_exception(ctx, EXCP_BREAK);
+            break;
+        case R6_SDBBP16:
+            /* SDBBP16 */
+            if (ctx->hflags & MIPS_HFLAG_SBRI) {
+                generate_exception(ctx, EXCP_RI);
+            } else {
+                generate_exception(ctx, EXCP_DBp);
+            }
+            break;
+        }
+        break;
+    default:
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+}
+
 static void gen_ldxs (DisasContext *ctx, int base, int index, int rd)
 {
     TCGv t0 = tcg_temp_new();
@@ -12900,6 +13448,10 @@
         break;
     case 0x2c:
         switch (minor) {
+        case BITSWAP:
+            check_insn(ctx, ISA_MIPS32R6);
+            gen_bitswap(ctx, OPC_BITSWAP, rs, rt);
+            break;
         case SEB:
             gen_bshfl(ctx, OPC_SEB, rs, rt);
             break;
@@ -12922,15 +13474,19 @@
             gen_bshfl(ctx, OPC_WSBH, rs, rt);
             break;
         case MULT:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_MULT;
             goto do_mul;
         case MULTU:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_MULTU;
             goto do_mul;
         case DIV:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_DIV;
             goto do_div;
         case DIVU:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_DIVU;
             goto do_div;
         do_div:
@@ -12938,15 +13494,19 @@
             gen_muldiv(ctx, mips32_op, 0, rs, rt);
             break;
         case MADD:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_MADD;
             goto do_mul;
         case MADDU:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_MADDU;
             goto do_mul;
         case MSUB:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_MSUB;
             goto do_mul;
         case MSUBU:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_MSUBU;
         do_mul:
             check_insn(ctx, ISA_MIPS32);
@@ -12972,13 +13532,20 @@
         break;
     case 0x3c:
         switch (minor) {
-        case JALR:
-        case JALR_HB:
-            gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 4);
-            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+        case JALR:    /* JALRC */
+        case JALR_HB: /* JALRC_HB */
+            if (ctx->insn_flags & ISA_MIPS32R6) {
+                /* JALRC, JALRC_HB */
+                gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 0);
+            } else {
+                /* JALR, JALR_HB */
+                gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 4);
+                ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+            }
             break;
         case JALRS:
         case JALRS_HB:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 2);
             ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
             break;
@@ -12991,12 +13558,12 @@
         case RDPGPR:
             check_cp0_enabled(ctx);
             check_insn(ctx, ISA_MIPS32R2);
-            gen_load_srsgpr(rt, rs);
+            gen_load_srsgpr(rs, rt);
             break;
         case WRPGPR:
             check_cp0_enabled(ctx);
             check_insn(ctx, ISA_MIPS32R2);
-            gen_store_srsgpr(rt, rs);
+            gen_store_srsgpr(rs, rt);
             break;
         default:
             goto pool32axf_invalid;
@@ -13017,6 +13584,12 @@
         case TLBWR:
             mips32_op = OPC_TLBWR;
             goto do_cp0;
+        case TLBINV:
+            mips32_op = OPC_TLBINV;
+            goto do_cp0;
+        case TLBINVF:
+            mips32_op = OPC_TLBINVF;
+            goto do_cp0;
         case WAIT:
             mips32_op = OPC_WAIT;
             goto do_cp0;
@@ -13075,11 +13648,15 @@
             ctx->bstate = BS_STOP;
             break;
         case SDBBP:
-            check_insn(ctx, ISA_MIPS32);
-            if (!(ctx->hflags & MIPS_HFLAG_DM)) {
-                generate_exception(ctx, EXCP_DBp);
+            if (is_uhi(extract32(ctx->opcode, 16, 10))) {
+                gen_helper_do_semihosting(cpu_env);
             } else {
-                generate_exception(ctx, EXCP_DBp);
+                check_insn(ctx, ISA_MIPS32);
+                if (ctx->hflags & MIPS_HFLAG_SBRI) {
+                    generate_exception(ctx, EXCP_RI);
+                } else {
+                    generate_exception(ctx, EXCP_DBp);
+                }
             }
             break;
         default:
@@ -13105,6 +13682,7 @@
         }
         break;
     case 0x35:
+        check_insn_opc_removed(ctx, ISA_MIPS32R6);
         switch (minor) {
         case MFHI32:
             gen_HILO(ctx, OPC_MFHI, 0, rs);
@@ -13377,6 +13955,7 @@
     case COND_FLOAT_MOV(MOVT, 5):
     case COND_FLOAT_MOV(MOVT, 6):
     case COND_FLOAT_MOV(MOVT, 7):
+        check_insn_opc_removed(ctx, ISA_MIPS32R6);
         gen_movci(ctx, rt, rs, (ctx->opcode >> 13) & 0x7, 1);
         break;
     case COND_FLOAT_MOV(MOVF, 0):
@@ -13387,6 +13966,7 @@
     case COND_FLOAT_MOV(MOVF, 5):
     case COND_FLOAT_MOV(MOVF, 6):
     case COND_FLOAT_MOV(MOVF, 7):
+        check_insn_opc_removed(ctx, ISA_MIPS32R6);
         gen_movci(ctx, rt, rs, (ctx->opcode >> 13) & 0x7, 0);
         break;
     default:
@@ -13396,8 +13976,7 @@
     }
 }
 
-static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
-                                    uint16_t insn_hw1)
+static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx)
 {
     int32_t offset;
     uint16_t insn;
@@ -13437,6 +14016,14 @@
             do_shifti:
                 gen_shift_imm(ctx, mips32_op, rt, rs, rd);
                 break;
+            case SELEQZ:
+                check_insn(ctx, ISA_MIPS32R6);
+                gen_cond_move(ctx, OPC_SELEQZ, rd, rs, rt);
+                break;
+            case SELNEZ:
+                check_insn(ctx, ISA_MIPS32R6);
+                gen_cond_move(ctx, OPC_SELNEZ, rd, rs, rt);
+                break;
             default:
                 goto pool32a_invalid;
             }
@@ -13458,6 +14045,7 @@
                 mips32_op = OPC_SUBU;
                 goto do_arith;
             case MUL:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_MUL;
             do_arith:
                 gen_arith(ctx, mips32_op, rd, rs, rt);
@@ -13509,16 +14097,52 @@
             minor = (ctx->opcode >> 6) & 0xf;
             switch (minor) {
                 /* Conditional moves */
-            case MOVN:
-                mips32_op = OPC_MOVN;
-                goto do_cmov;
-            case MOVZ:
-                mips32_op = OPC_MOVZ;
-            do_cmov:
-                gen_cond_move(ctx, mips32_op, rd, rs, rt);
+            case MOVN: /* MUL */
+                if (ctx->insn_flags & ISA_MIPS32R6) {
+                    /* MUL */
+                    gen_r6_muldiv(ctx, R6_OPC_MUL, rd, rs, rt);
+                } else {
+                    /* MOVN */
+                    gen_cond_move(ctx, OPC_MOVN, rd, rs, rt);
+                }
                 break;
-            case LWXS:
-                gen_ldxs(ctx, rs, rt, rd);
+            case MOVZ: /* MUH */
+                if (ctx->insn_flags & ISA_MIPS32R6) {
+                    /* MUH */
+                    gen_r6_muldiv(ctx, R6_OPC_MUH, rd, rs, rt);
+                } else {
+                    /* MOVZ */
+                    gen_cond_move(ctx, OPC_MOVZ, rd, rs, rt);
+                }
+                break;
+            case MULU:
+                check_insn(ctx, ISA_MIPS32R6);
+                gen_r6_muldiv(ctx, R6_OPC_MULU, rd, rs, rt);
+                break;
+            case MUHU:
+                check_insn(ctx, ISA_MIPS32R6);
+                gen_r6_muldiv(ctx, R6_OPC_MUHU, rd, rs, rt);
+                break;
+            case LWXS: /* DIV */
+                if (ctx->insn_flags & ISA_MIPS32R6) {
+                    /* DIV */
+                    gen_r6_muldiv(ctx, R6_OPC_DIV, rd, rs, rt);
+                } else {
+                    /* LWXS */
+                    gen_ldxs(ctx, rs, rt, rd);
+                }
+                break;
+            case MOD:
+                check_insn(ctx, ISA_MIPS32R6);
+                gen_r6_muldiv(ctx, R6_OPC_MOD, rd, rs, rt);
+                break;
+            case R6_DIVU:
+                check_insn(ctx, ISA_MIPS32R6);
+                gen_r6_muldiv(ctx, R6_OPC_DIVU, rd, rs, rt);
+                break;
+            case MODU:
+                check_insn(ctx, ISA_MIPS32R6);
+                gen_r6_muldiv(ctx, R6_OPC_MODU, rd, rs, rt);
                 break;
             default:
                 goto pool32a_invalid;
@@ -13527,6 +14151,16 @@
         case INS:
             gen_bitops(ctx, OPC_INS, rt, rs, rr, rd);
             return;
+        case LSA:
+            check_insn(ctx, ISA_MIPS32R6);
+            gen_lsa(ctx, OPC_LSA, rd, rs, rt,
+                    extract32(ctx->opcode, 9, 2));
+            break;
+        case ALIGN:
+            check_insn(ctx, ISA_MIPS32R6);
+            gen_align(ctx, OPC_ALIGN, rd, rs, rt,
+                      extract32(ctx->opcode, 9, 2));
+            break;
         case EXT:
             gen_bitops(ctx, OPC_EXT, rt, rs, rr, rd);
             return;
@@ -13589,47 +14223,61 @@
             check_cp1_enabled(ctx);
             switch (minor) {
             case ALNV_PS:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_ALNV_PS;
                 goto do_madd;
             case MADD_S:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_MADD_S;
                 goto do_madd;
             case MADD_D:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_MADD_D;
                 goto do_madd;
             case MADD_PS:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_MADD_PS;
                 goto do_madd;
             case MSUB_S:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_MSUB_S;
                 goto do_madd;
             case MSUB_D:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_MSUB_D;
                 goto do_madd;
             case MSUB_PS:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_MSUB_PS;
                 goto do_madd;
             case NMADD_S:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_NMADD_S;
                 goto do_madd;
             case NMADD_D:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_NMADD_D;
                 goto do_madd;
             case NMADD_PS:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_NMADD_PS;
                 goto do_madd;
             case NMSUB_S:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_NMSUB_S;
                 goto do_madd;
             case NMSUB_D:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_NMSUB_D;
                 goto do_madd;
             case NMSUB_PS:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 mips32_op = OPC_NMSUB_PS;
             do_madd:
                 gen_flt3_arith(ctx, mips32_op, rd, rr, rs, rt);
                 break;
             case CABS_COND_FMT:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 cond = (ctx->opcode >> 6) & 0xf;
                 cc = (ctx->opcode >> 13) & 0x7;
                 fmt = (ctx->opcode >> 10) & 0x3;
@@ -13648,6 +14296,7 @@
                 }
                 break;
             case C_COND_FMT:
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 cond = (ctx->opcode >> 6) & 0xf;
                 cc = (ctx->opcode >> 13) & 0x7;
                 fmt = (ctx->opcode >> 10) & 0x3;
@@ -13665,6 +14314,14 @@
                     goto pool32f_invalid;
                 }
                 break;
+            case CMP_CONDN_S:
+                check_insn(ctx, ISA_MIPS32R6);
+                gen_r6_cmp_s(ctx, (ctx->opcode >> 6) & 0x1f, rt, rs, rd);
+                break;
+            case CMP_CONDN_D:
+                check_insn(ctx, ISA_MIPS32R6);
+                gen_r6_cmp_d(ctx, (ctx->opcode >> 6) & 0x1f, rt, rs, rd);
+                break;
             case POOL32FXF:
                 gen_pool32fxf(ctx, rt, rs);
                 break;
@@ -13684,6 +14341,7 @@
                     mips32_op = OPC_PUU_PS;
                     goto do_ps;
                 case CVT_PS_S:
+                    check_insn_opc_removed(ctx, ISA_MIPS32R6);
                     mips32_op = OPC_CVT_PS_S;
                 do_ps:
                     gen_farith(ctx, mips32_op, rt, rs, rd, 0);
@@ -13692,25 +14350,44 @@
                     goto pool32f_invalid;
                 }
                 break;
+            case MIN_FMT:
+                check_insn(ctx, ISA_MIPS32R6);
+                switch ((ctx->opcode >> 9) & 0x3) {
+                case FMT_SDPS_S:
+                    gen_farith(ctx, OPC_MIN_S, rt, rs, rd, 0);
+                    break;
+                case FMT_SDPS_D:
+                    gen_farith(ctx, OPC_MIN_D, rt, rs, rd, 0);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
             case 0x08:
                 /* [LS][WDU]XC1 */
                 switch ((ctx->opcode >> 6) & 0x7) {
                 case LWXC1:
+                    check_insn_opc_removed(ctx, ISA_MIPS32R6);
                     mips32_op = OPC_LWXC1;
                     goto do_ldst_cp1;
                 case SWXC1:
+                    check_insn_opc_removed(ctx, ISA_MIPS32R6);
                     mips32_op = OPC_SWXC1;
                     goto do_ldst_cp1;
                 case LDXC1:
+                    check_insn_opc_removed(ctx, ISA_MIPS32R6);
                     mips32_op = OPC_LDXC1;
                     goto do_ldst_cp1;
                 case SDXC1:
+                    check_insn_opc_removed(ctx, ISA_MIPS32R6);
                     mips32_op = OPC_SDXC1;
                     goto do_ldst_cp1;
                 case LUXC1:
+                    check_insn_opc_removed(ctx, ISA_MIPS32R6);
                     mips32_op = OPC_LUXC1;
                     goto do_ldst_cp1;
                 case SUXC1:
+                    check_insn_opc_removed(ctx, ISA_MIPS32R6);
                     mips32_op = OPC_SUXC1;
                 do_ldst_cp1:
                     gen_flt3_ldst(ctx, mips32_op, rd, rd, rt, rs);
@@ -13719,8 +14396,22 @@
                     goto pool32f_invalid;
                 }
                 break;
+            case MAX_FMT:
+                check_insn(ctx, ISA_MIPS32R6);
+                switch ((ctx->opcode >> 9) & 0x3) {
+                case FMT_SDPS_S:
+                    gen_farith(ctx, OPC_MAX_S, rt, rs, rd, 0);
+                    break;
+                case FMT_SDPS_D:
+                    gen_farith(ctx, OPC_MAX_D, rt, rs, rd, 0);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
             case 0x18:
                 /* 3D insns */
+                check_insn_opc_removed(ctx, ISA_MIPS32R6);
                 fmt = (ctx->opcode >> 9) & 0x3;
                 switch ((ctx->opcode >> 6) & 0x7) {
                 case RSQRT2_FMT:
@@ -13766,41 +14457,74 @@
                 }
                 break;
             case 0x20:
-                /* MOV[FT].fmt and PREFX */
+                /* MOV[FT].fmt, PREFX, RINT.fmt, CLASS.fmt*/
                 cc = (ctx->opcode >> 13) & 0x7;
                 fmt = (ctx->opcode >> 9) & 0x3;
                 switch ((ctx->opcode >> 6) & 0x7) {
-                case MOVF_FMT:
-                    switch (fmt) {
-                    case FMT_SDPS_S:
-                        gen_movcf_s(ctx, rs, rt, cc, 0);
-                        break;
-                    case FMT_SDPS_D:
-                        gen_movcf_d(ctx, rs, rt, cc, 0);
-                        break;
-                    case FMT_SDPS_PS:
-                        gen_movcf_ps(ctx, rs, rt, cc, 0);
-                        break;
-                    default:
-                        goto pool32f_invalid;
+                case MOVF_FMT: /* RINT_FMT */
+                    if (ctx->insn_flags & ISA_MIPS32R6) {
+                        /* RINT_FMT */
+                        switch (fmt) {
+                        case FMT_SDPS_S:
+                            gen_farith(ctx, OPC_RINT_S, 0, rt, rs, 0);
+                            break;
+                        case FMT_SDPS_D:
+                            gen_farith(ctx, OPC_RINT_D, 0, rt, rs, 0);
+                            break;
+                        default:
+                            goto pool32f_invalid;
+                        }
+                    } else {
+                        /* MOVF_FMT */
+                        switch (fmt) {
+                        case FMT_SDPS_S:
+                            gen_movcf_s(ctx, rs, rt, cc, 0);
+                            break;
+                        case FMT_SDPS_D:
+                            gen_movcf_d(ctx, rs, rt, cc, 0);
+                            break;
+                        case FMT_SDPS_PS:
+                            check_ps(ctx);
+                            gen_movcf_ps(ctx, rs, rt, cc, 0);
+                            break;
+                        default:
+                            goto pool32f_invalid;
+                        }
                     }
                     break;
-                case MOVT_FMT:
-                    switch (fmt) {
-                    case FMT_SDPS_S:
-                        gen_movcf_s(ctx, rs, rt, cc, 1);
-                        break;
-                    case FMT_SDPS_D:
-                        gen_movcf_d(ctx, rs, rt, cc, 1);
-                        break;
-                    case FMT_SDPS_PS:
-                        gen_movcf_ps(ctx, rs, rt, cc, 1);
-                        break;
-                    default:
-                        goto pool32f_invalid;
+                case MOVT_FMT: /* CLASS_FMT */
+                    if (ctx->insn_flags & ISA_MIPS32R6) {
+                        /* CLASS_FMT */
+                        switch (fmt) {
+                        case FMT_SDPS_S:
+                            gen_farith(ctx, OPC_CLASS_S, 0, rt, rs, 0);
+                            break;
+                        case FMT_SDPS_D:
+                            gen_farith(ctx, OPC_CLASS_D, 0, rt, rs, 0);
+                            break;
+                        default:
+                            goto pool32f_invalid;
+                        }
+                    } else {
+                        /* MOVT_FMT */
+                        switch (fmt) {
+                        case FMT_SDPS_S:
+                            gen_movcf_s(ctx, rs, rt, cc, 1);
+                            break;
+                        case FMT_SDPS_D:
+                            gen_movcf_d(ctx, rs, rt, cc, 1);
+                            break;
+                        case FMT_SDPS_PS:
+                            check_ps(ctx);
+                            gen_movcf_ps(ctx, rs, rt, cc, 1);
+                            break;
+                        default:
+                            goto pool32f_invalid;
+                        }
                     }
                     break;
                 case PREFX:
+                    check_insn_opc_removed(ctx, ISA_MIPS32R6);
                     break;
                 default:
                     goto pool32f_invalid;
@@ -13815,11 +14539,38 @@
                     mips32_op = OPC_##prfx##_D;         \
                     goto do_fpop;                       \
                 case FMT_SDPS_PS:                       \
+                    check_ps(ctx);                      \
                     mips32_op = OPC_##prfx##_PS;        \
                     goto do_fpop;                       \
                 default:                                \
                     goto pool32f_invalid;               \
                 }
+            case MINA_FMT:
+                check_insn(ctx, ISA_MIPS32R6);
+                switch ((ctx->opcode >> 9) & 0x3) {
+                case FMT_SDPS_S:
+                    gen_farith(ctx, OPC_MINA_S, rt, rs, rd, 0);
+                    break;
+                case FMT_SDPS_D:
+                    gen_farith(ctx, OPC_MINA_D, rt, rs, rd, 0);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case MAXA_FMT:
+                check_insn(ctx, ISA_MIPS32R6);
+                switch ((ctx->opcode >> 9) & 0x3) {
+                case FMT_SDPS_S:
+                    gen_farith(ctx, OPC_MAXA_S, rt, rs, rd, 0);
+                    break;
+                case FMT_SDPS_D:
+                    gen_farith(ctx, OPC_MAXA_D, rt, rs, rd, 0);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
             case 0x30:
                 /* regular FP ops */
                 switch ((ctx->opcode >> 6) & 0x3) {
@@ -13848,13 +14599,90 @@
                 break;
             case 0x38:
                 /* cmovs */
-                switch ((ctx->opcode >> 6) & 0x3) {
-                case MOVN_FMT:
+                switch ((ctx->opcode >> 6) & 0x7) {
+                case MOVN_FMT: /* SELNEZ_FMT */
+                    if (ctx->insn_flags & ISA_MIPS32R6) {
+                        /* SELNEZ_FMT */
+                        switch ((ctx->opcode >> 9) & 0x3) {
+                        case FMT_SDPS_S:
+                            gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs);
+                            break;
+                        case FMT_SDPS_D:
+                            gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs);
+                            break;
+                        default:
+                            goto pool32f_invalid;
+                        }
+                    } else {
+                        /* MOVN_FMT */
+                        FINSN_3ARG_SDPS(MOVN);
+                    }
+                    break;
+                case MOVN_FMT_04:
+                    check_insn_opc_removed(ctx, ISA_MIPS32R6);
                     FINSN_3ARG_SDPS(MOVN);
                     break;
-                case MOVZ_FMT:
+                case MOVZ_FMT: /* SELEQZ_FMT */
+                    if (ctx->insn_flags & ISA_MIPS32R6) {
+                        /* SELEQZ_FMT */
+                        switch ((ctx->opcode >> 9) & 0x3) {
+                        case FMT_SDPS_S:
+                            gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs);
+                            break;
+                        case FMT_SDPS_D:
+                            gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs);
+                            break;
+                        default:
+                            goto pool32f_invalid;
+                        }
+                    } else {
+                        /* MOVZ_FMT */
+                        FINSN_3ARG_SDPS(MOVZ);
+                    }
+                    break;
+                case MOVZ_FMT_05:
+                    check_insn_opc_removed(ctx, ISA_MIPS32R6);
                     FINSN_3ARG_SDPS(MOVZ);
                     break;
+                case SEL_FMT:
+                    check_insn(ctx, ISA_MIPS32R6);
+                    switch ((ctx->opcode >> 9) & 0x3) {
+                    case FMT_SDPS_S:
+                        gen_sel_s(ctx, OPC_SEL_S, rd, rt, rs);
+                        break;
+                    case FMT_SDPS_D:
+                        gen_sel_d(ctx, OPC_SEL_D, rd, rt, rs);
+                        break;
+                    default:
+                        goto pool32f_invalid;
+                    }
+                    break;
+                case MADDF_FMT:
+                    check_insn(ctx, ISA_MIPS32R6);
+                    switch ((ctx->opcode >> 9) & 0x3) {
+                    case FMT_SDPS_S:
+                        mips32_op = OPC_MADDF_S;
+                        goto do_fpop;
+                    case FMT_SDPS_D:
+                        mips32_op = OPC_MADDF_D;
+                        goto do_fpop;
+                    default:
+                        goto pool32f_invalid;
+                    }
+                    break;
+                case MSUBF_FMT:
+                    check_insn(ctx, ISA_MIPS32R6);
+                    switch ((ctx->opcode >> 9) & 0x3) {
+                    case FMT_SDPS_S:
+                        mips32_op = OPC_MSUBF_S;
+                        goto do_fpop;
+                    case FMT_SDPS_D:
+                        mips32_op = OPC_MSUBF_D;
+                        goto do_fpop;
+                    default:
+                        goto pool32f_invalid;
+                    }
+                    break;
                 default:
                     goto pool32f_invalid;
                 }
@@ -13876,51 +14704,87 @@
         minor = (ctx->opcode >> 21) & 0x1f;
         switch (minor) {
         case BLTZ:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_compute_branch(ctx, OPC_BLTZ, 4, rs, -1, imm << 1, 4);
             break;
         case BLTZAL:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_compute_branch(ctx, OPC_BLTZAL, 4, rs, -1, imm << 1, 4);
             ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
             break;
         case BLTZALS:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_compute_branch(ctx, OPC_BLTZAL, 4, rs, -1, imm << 1, 2);
             ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
             break;
         case BGEZ:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_compute_branch(ctx, OPC_BGEZ, 4, rs, -1, imm << 1, 4);
             break;
         case BGEZAL:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_compute_branch(ctx, OPC_BGEZAL, 4, rs, -1, imm << 1, 4);
             ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
             break;
         case BGEZALS:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_compute_branch(ctx, OPC_BGEZAL, 4, rs, -1, imm << 1, 2);
             ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
             break;
         case BLEZ:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_compute_branch(ctx, OPC_BLEZ, 4, rs, -1, imm << 1, 4);
             break;
         case BGTZ:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_compute_branch(ctx, OPC_BGTZ, 4, rs, -1, imm << 1, 4);
             break;
 
             /* Traps */
-        case TLTI:
-            mips32_op = OPC_TLTI;
-            goto do_trapi;
-        case TGEI:
-            mips32_op = OPC_TGEI;
-            goto do_trapi;
+        case TLTI: /* BC1EQZC */
+            if (ctx->insn_flags & ISA_MIPS32R6) {
+                /* BC1EQZC */
+                check_cp1_enabled(ctx);
+                gen_compute_branch1_r6(ctx, OPC_BC1EQZ, rs, imm << 1, 0);
+            } else {
+                /* TLTI */
+                mips32_op = OPC_TLTI;
+                goto do_trapi;
+            }
+            break;
+        case TGEI: /* BC1NEZC */
+            if (ctx->insn_flags & ISA_MIPS32R6) {
+                /* BC1NEZC */
+                check_cp1_enabled(ctx);
+                gen_compute_branch1_r6(ctx, OPC_BC1NEZ, rs, imm << 1, 0);
+            } else {
+                /* TGEI */
+                mips32_op = OPC_TGEI;
+                goto do_trapi;
+            }
+            break;
         case TLTIU:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_TLTIU;
             goto do_trapi;
         case TGEIU:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_TGEIU;
             goto do_trapi;
-        case TNEI:
-            mips32_op = OPC_TNEI;
-            goto do_trapi;
+        case TNEI: /* SYNCI */
+            if (ctx->insn_flags & ISA_MIPS32R6) {
+                /* SYNCI */
+                /* Break the TB to be able to sync copied instructions
+                   immediately */
+                ctx->bstate = BS_STOP;
+            } else {
+                /* TNEI */
+                mips32_op = OPC_TNEI;
+                goto do_trapi;
+            }
+            break;
         case TEQI:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_TEQI;
         do_trapi:
             gen_trap(ctx, mips32_op, rs, -1, imm);
@@ -13928,6 +14792,7 @@
 
         case BNEZC:
         case BEQZC:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_compute_branch(ctx, minor == BNEZC ? OPC_BNE : OPC_BEQ,
                                4, rs, 0, imm << 1, 0);
             /* Compact branches don't have a delay slot, so just let
@@ -13935,28 +14800,35 @@
                target. */
             break;
         case LUI:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_logic_imm(ctx, OPC_LUI, rs, 0, imm);
             break;
         case SYNCI:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             /* Break the TB to be able to sync copied instructions
                immediately */
             ctx->bstate = BS_STOP;
             break;
         case BC2F:
         case BC2T:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             /* COP2: Not implemented. */
             generate_exception_err(ctx, EXCP_CpU, 2);
             break;
         case BC1F:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = (ctx->opcode & (1 << 16)) ? OPC_BC1FANY2 : OPC_BC1F;
             goto do_cp1branch;
         case BC1T:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = (ctx->opcode & (1 << 16)) ? OPC_BC1TANY2 : OPC_BC1T;
             goto do_cp1branch;
         case BC1ANY4F:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_BC1FANY4;
             goto do_cp1mips3d;
         case BC1ANY4T:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_BC1TANY4;
         do_cp1mips3d:
             check_cop1x(ctx);
@@ -13983,38 +14855,48 @@
         break;
     case POOL32C:
         minor = (ctx->opcode >> 12) & 0xf;
+        offset = sextract32(ctx->opcode, 0,
+                            (ctx->insn_flags & ISA_MIPS32R6) ? 9 : 12);
         switch (minor) {
         case LWL:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_LWL;
             goto do_ld_lr;
         case SWL:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_SWL;
             goto do_st_lr;
         case LWR:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_LWR;
             goto do_ld_lr;
         case SWR:
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_SWR;
             goto do_st_lr;
 #if defined(TARGET_MIPS64)
         case LDL:
             check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_LDL;
             goto do_ld_lr;
         case SDL:
             check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_SDL;
             goto do_st_lr;
         case LDR:
             check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_LDR;
             goto do_ld_lr;
         case SDR:
             check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
+            check_insn_opc_removed(ctx, ISA_MIPS32R6);
             mips32_op = OPC_SDR;
             goto do_st_lr;
         case LWU:
@@ -14032,23 +14914,27 @@
             mips32_op = OPC_LL;
             goto do_ld_lr;
         do_ld_lr:
-            gen_ld(ctx, mips32_op, rt, rs, SIMM(ctx->opcode, 0, 12));
+            gen_ld(ctx, mips32_op, rt, rs, offset);
             break;
         do_st_lr:
             gen_st(ctx, mips32_op, rt, rs, SIMM(ctx->opcode, 0, 12));
             break;
         case SC:
-            gen_st_cond(ctx, OPC_SC, rt, rs, SIMM(ctx->opcode, 0, 12));
+            gen_st_cond(ctx, OPC_SC, rt, rs, offset);
             break;
 #if defined(TARGET_MIPS64)
         case SCD:
             check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
-            gen_st_cond(ctx, OPC_SCD, rt, rs, SIMM(ctx->opcode, 0, 12));
+            gen_st_cond(ctx, OPC_SCD, rt, rs, offset);
             break;
 #endif
         case PREF:
             /* Treat as no-op */
+            if ((ctx->insn_flags & ISA_MIPS32R6) && (rt >= 24)) {
+                /* hint codes 24-31 are reserved and signal RI */
+                generate_exception(ctx, EXCP_RI);
+            }
             break;
         default:
             MIPS_INVAL("pool32c");
@@ -14056,9 +14942,16 @@
             break;
         }
         break;
-    case ADDI32:
-        mips32_op = OPC_ADDI;
-        goto do_addi;
+    case ADDI32: /* AUI, LUI */
+        if (ctx->insn_flags & ISA_MIPS32R6) {
+            /* AUI, LUI */
+            gen_logic_imm(ctx, OPC_LUI, rt, rs, imm);
+        } else {
+            /* ADDI32 */
+            mips32_op = OPC_ADDI;
+            goto do_addi;
+        }
+        break;
     case ADDIU32:
         mips32_op = OPC_ADDIU;
     do_addi:
@@ -14088,29 +14981,89 @@
         gen_slt_imm(ctx, mips32_op, rt, rs, imm);
         break;
     case JALX32:
+        check_insn_opc_removed(ctx, ISA_MIPS32R6);
         offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
         gen_compute_branch(ctx, OPC_JALX, 4, rt, rs, offset, 4);
         ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
         break;
-    case JALS32:
-        offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 1;
-        gen_compute_branch(ctx, OPC_JAL, 4, rt, rs, offset, 2);
-        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+    case JALS32: /* BOVC, BEQC, BEQZALC */
+        if (ctx->insn_flags & ISA_MIPS32R6) {
+            if (rs >= rt) {
+                /* BOVC */
+                mips32_op = OPC_BOVC;
+            } else if (rs < rt && rs == 0) {
+                /* BEQZALC */
+                mips32_op = OPC_BEQZALC;
+            } else {
+                /* BEQC */
+                mips32_op = OPC_BEQC;
+            }
+            gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
+        } else {
+            /* JALS32 */
+            offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 1;
+            gen_compute_branch(ctx, OPC_JAL, 4, rt, rs, offset, 2);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+        }
         break;
-    case BEQ32:
-        gen_compute_branch(ctx, OPC_BEQ, 4, rt, rs, imm << 1, 4);
+    case BEQ32: /* BC */
+        if (ctx->insn_flags & ISA_MIPS32R6) {
+            /* BC */
+            gen_compute_compact_branch(ctx, OPC_BC, 0, 0,
+                                       sextract32(ctx->opcode << 1, 0, 27));
+        } else {
+            /* BEQ32 */
+            gen_compute_branch(ctx, OPC_BEQ, 4, rt, rs, imm << 1, 4);
+        }
         break;
-    case BNE32:
-        gen_compute_branch(ctx, OPC_BNE, 4, rt, rs, imm << 1, 4);
+    case BNE32: /* BALC */
+        if (ctx->insn_flags & ISA_MIPS32R6) {
+            /* BALC */
+            gen_compute_compact_branch(ctx, OPC_BALC, 0, 0,
+                                       sextract32(ctx->opcode << 1, 0, 27));
+        } else {
+            /* BNE32 */
+            gen_compute_branch(ctx, OPC_BNE, 4, rt, rs, imm << 1, 4);
+        }
         break;
-    case J32:
-        gen_compute_branch(ctx, OPC_J, 4, rt, rs,
-                           (int32_t)(ctx->opcode & 0x3FFFFFF) << 1, 4);
+    case J32: /* BGTZC, BLTZC, BLTC */
+        if (ctx->insn_flags & ISA_MIPS32R6) {
+            if (rs == 0 && rt != 0) {
+                /* BGTZC */
+                mips32_op = OPC_BGTZC;
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* BLTZC */
+                mips32_op = OPC_BLTZC;
+            } else {
+                /* BLTC */
+                mips32_op = OPC_BLTC;
+            }
+            gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
+        } else {
+            /* J32 */
+            gen_compute_branch(ctx, OPC_J, 4, rt, rs,
+                               (int32_t)(ctx->opcode & 0x3FFFFFF) << 1, 4);
+        }
         break;
-    case JAL32:
-        gen_compute_branch(ctx, OPC_JAL, 4, rt, rs,
-                           (int32_t)(ctx->opcode & 0x3FFFFFF) << 1, 4);
-        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+    case JAL32: /* BLEZC, BGEZC, BGEC */
+        if (ctx->insn_flags & ISA_MIPS32R6) {
+            if (rs == 0 && rt != 0) {
+                /* BLEZC */
+                mips32_op = OPC_BLEZC;
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* BGEZC */
+                mips32_op = OPC_BGEZC;
+            } else {
+                /* BGEC */
+                mips32_op = OPC_BGEC;
+            }
+            gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
+        } else {
+            /* JAL32 */
+            gen_compute_branch(ctx, OPC_JAL, 4, rt, rs,
+                               (int32_t)(ctx->opcode & 0x3FFFFFF) << 1, 4);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+        }
         break;
         /* Floating point (COP1) */
     case LWC132:
@@ -14127,14 +15080,98 @@
     do_cop1:
         gen_cop1_ldst(ctx, mips32_op, rt, rs, imm);
         break;
-    case ADDIUPC:
-        {
+    case ADDIUPC: /* PCREL: ADDIUPC, AUIPC, ALUIPC, LWPC */
+        if (ctx->insn_flags & ISA_MIPS32R6) {
+            /* PCREL: ADDIUPC, AUIPC, ALUIPC, LWPC */
+            switch ((ctx->opcode >> 16) & 0x1f) {
+            case ADDIUPC_00 ... ADDIUPC_07:
+                gen_pcrel(ctx, OPC_ADDIUPC, ctx->pc & ~0x3, rt);
+                break;
+            case AUIPC:
+                gen_pcrel(ctx, OPC_AUIPC, ctx->pc, rt);
+                break;
+            case ALUIPC:
+                gen_pcrel(ctx, OPC_ALUIPC, ctx->pc, rt);
+                break;
+            case LWPC_08 ... LWPC_0F:
+                gen_pcrel(ctx, R6_OPC_LWPC, ctx->pc & ~0x3, rt);
+                break;
+            default:
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+        } else {
+            /* ADDIUPC */
             int reg = mmreg(ZIMM(ctx->opcode, 23, 3));
             int offset = SIMM(ctx->opcode, 0, 23) << 2;
 
             gen_addiupc(ctx, reg, offset, 0, 0);
         }
         break;
+    case BNVC: /* BNEC, BNEZALC */
+        check_insn(ctx, ISA_MIPS32R6);
+        if (rs >= rt) {
+            /* BNVC */
+            mips32_op = OPC_BNVC;
+        } else if (rs < rt && rs == 0) {
+            /* BNEZALC */
+            mips32_op = OPC_BNEZALC;
+        } else {
+            /* BNEC */
+            mips32_op = OPC_BNEC;
+        }
+        gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
+        break;
+    case R6_BNEZC: /* JIALC */
+        check_insn(ctx, ISA_MIPS32R6);
+        if (rt != 0) {
+            /* BNEZC */
+            gen_compute_compact_branch(ctx, OPC_BNEZC, rt, 0,
+                                       sextract32(ctx->opcode << 1, 0, 22));
+        } else {
+            /* JIALC */
+            gen_compute_compact_branch(ctx, OPC_JIALC, 0, rs, imm);
+        }
+        break;
+    case R6_BEQZC: /* JIC */
+        check_insn(ctx, ISA_MIPS32R6);
+        if (rt != 0) {
+            /* BEQZC */
+            gen_compute_compact_branch(ctx, OPC_BEQZC, rt, 0,
+                                       sextract32(ctx->opcode << 1, 0, 22));
+        } else {
+            /* JIC */
+            gen_compute_compact_branch(ctx, OPC_JIC, 0, rs, imm);
+        }
+        break;
+    case BLEZALC: /* BGEZALC, BGEUC */
+        check_insn(ctx, ISA_MIPS32R6);
+        if (rs == 0 && rt != 0) {
+            /* BLEZALC */
+            mips32_op = OPC_BLEZALC;
+        } else if (rs != 0 && rt != 0 && rs == rt) {
+            /* BGEZALC */
+            mips32_op = OPC_BGEZALC;
+        } else {
+            /* BGEUC */
+            mips32_op = OPC_BGEUC;
+        }
+        gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
+        break;
+    case BGTZALC: /* BLTZALC, BLTUC */
+        check_insn(ctx, ISA_MIPS32R6);
+        if (rs == 0 && rt != 0) {
+            /* BGTZALC */
+            mips32_op = OPC_BGTZALC;
+        } else if (rs != 0 && rt != 0 && rs == rt) {
+            /* BLTZALC */
+            mips32_op = OPC_BLTZALC;
+        } else {
+            /* BLTUC */
+            mips32_op = OPC_BLTUC;
+        }
+        gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
+        break;
         /* Loads and stores */
     case LB32:
         mips32_op = OPC_LB;
@@ -14249,8 +15286,14 @@
                 opc = OPC_SUBU;
                 break;
             }
-
-            gen_arith(ctx, opc, rd, rs1, rs2);
+            if (ctx->insn_flags & ISA_MIPS32R6) {
+                /* In the Release 6 the register number location in
+                 * the instruction encoding has changed.
+                 */
+                gen_arith(ctx, opc, rs1, rd, rs2);
+            } else {
+                gen_arith(ctx, opc, rd, rs1, rs2);
+            }
         }
         break;
     case POOL16B:
@@ -14274,7 +15317,11 @@
         }
         break;
     case POOL16C:
-        gen_pool16c_insn(ctx);
+        if (ctx->insn_flags & ISA_MIPS32R6) {
+            gen_pool16c_r6_insn(ctx);
+        } else {
+            gen_pool16c_insn(ctx);
+        }
         break;
     case LWGP16:
         {
@@ -14286,6 +15333,7 @@
         }
         break;
     case POOL16F:
+        check_insn_opc_removed(ctx, ISA_MIPS32R6);
         if (ctx->opcode & 1) {
             generate_exception(ctx, EXCP_RI);
         } else {
@@ -14293,18 +15341,7 @@
             int enc_dest = uMIPS_RD(ctx->opcode);
             int enc_rt = uMIPS_RS2(ctx->opcode);
             int enc_rs = uMIPS_RS1(ctx->opcode);
-            int rd, rs, re, rt;
-            static const int rd_enc[] = { 5, 5, 6, 4, 4, 4, 4, 4 };
-            static const int re_enc[] = { 6, 7, 7, 21, 22, 5, 6, 7 };
-            static const int rs_rt_enc[] = { 0, 17, 2, 3, 16, 18, 19, 20 };
-
-            rd = rd_enc[enc_dest];
-            re = re_enc[enc_dest];
-            rs = rs_rt_enc[enc_rs];
-            rt = rs_rt_enc[enc_rt];
-
-            gen_arith(ctx, OPC_ADDU, rd, rs, 0);
-            gen_arith(ctx, OPC_ADDU, re, rt, 0);
+            gen_movep(ctx, enc_dest, enc_rt, enc_rs);
         }
         break;
     case LBU16:
@@ -14411,15 +15448,18 @@
             break;
         }
         break;
-    case B16:
+    case B16: /* BC16 */
         gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0,
-                           SIMM(ctx->opcode, 0, 10) << 1, 4);
+                           sextract32(ctx->opcode, 0, 10) << 1,
+                           (ctx->insn_flags & ISA_MIPS32R6) ? 0 : 4);
         break;
-    case BNEZ16:
-    case BEQZ16:
+    case BNEZ16: /* BNEZC16 */
+    case BEQZ16: /* BEQZC16 */
         gen_compute_branch(ctx, op == BNEZ16 ? OPC_BNE : OPC_BEQ, 2,
                            mmreg(uMIPS_RD(ctx->opcode)),
-                           0, SIMM(ctx->opcode, 0, 7) << 1, 4);
+                           0, sextract32(ctx->opcode, 0, 7) << 1,
+                           (ctx->insn_flags & ISA_MIPS32R6) ? 0 : 4);
+
         break;
     case LI16:
         {
@@ -14430,17 +15470,13 @@
             tcg_gen_movi_tl(cpu_gpr[reg], imm);
         }
         break;
-    case RES_20:
-    case RES_28:
     case RES_29:
-    case RES_30:
     case RES_31:
-    case RES_38:
     case RES_39:
         generate_exception(ctx, EXCP_RI);
         break;
     default:
-        decode_micromips32_opc (env, ctx, op);
+        decode_micromips32_opc(env, ctx);
         return 4;
     }
 
@@ -16176,242 +17212,6 @@
 
 /* End MIPSDSP functions. */
 
-/* Compact Branches */
-static void gen_compute_compact_branch(DisasContext *ctx, uint32_t opc,
-                                       int rs, int rt, int32_t offset)
-{
-    int bcond_compute = 0;
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-#ifdef MIPS_DEBUG_DISAS
-        LOG_DISAS("Branch in delay / forbidden slot at PC 0x" TARGET_FMT_lx
-                  "\n", ctx->pc);
-#endif
-        generate_exception(ctx, EXCP_RI);
-        goto out;
-    }
-
-    /* Load needed operands and calculate btarget */
-    switch (opc) {
-    /* compact branch */
-    case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC */
-    case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
-        gen_load_gpr(t0, rs);
-        gen_load_gpr(t1, rt);
-        bcond_compute = 1;
-        ctx->btarget = addr_add(ctx, ctx->pc + 4, offset);
-        if (rs <= rt && rs == 0) {
-            /* OPC_BEQZALC, OPC_BNEZALC */
-            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 4);
-        }
-        break;
-    case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC */
-    case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC */
-        gen_load_gpr(t0, rs);
-        gen_load_gpr(t1, rt);
-        bcond_compute = 1;
-        ctx->btarget = addr_add(ctx, ctx->pc + 4, offset);
-        break;
-    case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC */
-    case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC */
-        if (rs == 0 || rs == rt) {
-            /* OPC_BLEZALC, OPC_BGEZALC */
-            /* OPC_BGTZALC, OPC_BLTZALC */
-            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 4);
-        }
-        gen_load_gpr(t0, rs);
-        gen_load_gpr(t1, rt);
-        bcond_compute = 1;
-        ctx->btarget = addr_add(ctx, ctx->pc + 4, offset);
-        break;
-    case OPC_BC:
-    case OPC_BALC:
-        ctx->btarget = addr_add(ctx, ctx->pc + 4, offset);
-        break;
-    case OPC_BEQZC:
-    case OPC_BNEZC:
-        if (rs != 0) {
-            /* OPC_BEQZC, OPC_BNEZC */
-            gen_load_gpr(t0, rs);
-            bcond_compute = 1;
-            ctx->btarget = addr_add(ctx, ctx->pc + 4, offset);
-        } else {
-            /* OPC_JIC, OPC_JIALC */
-            TCGv tbase = tcg_temp_new();
-            TCGv toffset = tcg_temp_new();
-
-            gen_load_gpr(tbase, rt);
-            tcg_gen_movi_tl(toffset, offset);
-            gen_op_addr_add(ctx, btarget, tbase, toffset);
-            tcg_temp_free(tbase);
-            tcg_temp_free(toffset);
-        }
-        break;
-    default:
-        MIPS_INVAL("Compact branch/jump");
-        generate_exception(ctx, EXCP_RI);
-        goto out;
-    }
-
-    if (bcond_compute == 0) {
-        /* Uncoditional compact branch */
-        switch (opc) {
-        case OPC_JIALC:
-            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 4);
-            /* Fallthrough */
-        case OPC_JIC:
-            ctx->hflags |= MIPS_HFLAG_BR;
-            break;
-        case OPC_BALC:
-            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 4);
-            /* Fallthrough */
-        case OPC_BC:
-            ctx->hflags |= MIPS_HFLAG_B;
-            break;
-        default:
-            MIPS_INVAL("Compact branch/jump");
-            generate_exception(ctx, EXCP_RI);
-            goto out;
-        }
-
-        /* Generating branch here as compact branches don't have delay slot */
-        gen_branch(ctx, 4);
-    } else {
-        /* Conditional compact branch */
-        TCGLabel *fs = gen_new_label();
-        save_cpu_state(ctx, 0);
-
-        switch (opc) {
-        case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC */
-            if (rs == 0 && rt != 0) {
-                /* OPC_BLEZALC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LE), t1, 0, fs);
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* OPC_BGEZALC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GE), t1, 0, fs);
-            } else {
-                /* OPC_BGEUC */
-                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_GEU), t0, t1, fs);
-            }
-            break;
-        case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC */
-            if (rs == 0 && rt != 0) {
-                /* OPC_BGTZALC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GT), t1, 0, fs);
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* OPC_BLTZALC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LT), t1, 0, fs);
-            } else {
-                /* OPC_BLTUC */
-                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_LTU), t0, t1, fs);
-            }
-            break;
-        case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC */
-            if (rs == 0 && rt != 0) {
-                /* OPC_BLEZC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LE), t1, 0, fs);
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* OPC_BGEZC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GE), t1, 0, fs);
-            } else {
-                /* OPC_BGEC */
-                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_GE), t0, t1, fs);
-            }
-            break;
-        case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC */
-            if (rs == 0 && rt != 0) {
-                /* OPC_BGTZC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GT), t1, 0, fs);
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* OPC_BLTZC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LT), t1, 0, fs);
-            } else {
-                /* OPC_BLTC */
-                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_LT), t0, t1, fs);
-            }
-            break;
-        case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC */
-        case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
-            if (rs >= rt) {
-                /* OPC_BOVC, OPC_BNVC */
-                TCGv t2 = tcg_temp_new();
-                TCGv t3 = tcg_temp_new();
-                TCGv t4 = tcg_temp_new();
-                TCGv input_overflow = tcg_temp_new();
-
-                gen_load_gpr(t0, rs);
-                gen_load_gpr(t1, rt);
-                tcg_gen_ext32s_tl(t2, t0);
-                tcg_gen_setcond_tl(TCG_COND_NE, input_overflow, t2, t0);
-                tcg_gen_ext32s_tl(t3, t1);
-                tcg_gen_setcond_tl(TCG_COND_NE, t4, t3, t1);
-                tcg_gen_or_tl(input_overflow, input_overflow, t4);
-
-                tcg_gen_add_tl(t4, t2, t3);
-                tcg_gen_ext32s_tl(t4, t4);
-                tcg_gen_xor_tl(t2, t2, t3);
-                tcg_gen_xor_tl(t3, t4, t3);
-                tcg_gen_andc_tl(t2, t3, t2);
-                tcg_gen_setcondi_tl(TCG_COND_LT, t4, t2, 0);
-                tcg_gen_or_tl(t4, t4, input_overflow);
-                if (opc == OPC_BOVC) {
-                    /* OPC_BOVC */
-                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_NE), t4, 0, fs);
-                } else {
-                    /* OPC_BNVC */
-                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t4, 0, fs);
-                }
-                tcg_temp_free(input_overflow);
-                tcg_temp_free(t4);
-                tcg_temp_free(t3);
-                tcg_temp_free(t2);
-            } else if (rs < rt && rs == 0) {
-                /* OPC_BEQZALC, OPC_BNEZALC */
-                if (opc == OPC_BEQZALC) {
-                    /* OPC_BEQZALC */
-                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t1, 0, fs);
-                } else {
-                    /* OPC_BNEZALC */
-                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_NE), t1, 0, fs);
-                }
-            } else {
-                /* OPC_BEQC, OPC_BNEC */
-                if (opc == OPC_BEQC) {
-                    /* OPC_BEQC */
-                    tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_EQ), t0, t1, fs);
-                } else {
-                    /* OPC_BNEC */
-                    tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_NE), t0, t1, fs);
-                }
-            }
-            break;
-        case OPC_BEQZC:
-            tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t0, 0, fs);
-            break;
-        case OPC_BNEZC:
-            tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_NE), t0, 0, fs);
-            break;
-        default:
-            MIPS_INVAL("Compact conditional branch/jump");
-            generate_exception(ctx, EXCP_RI);
-            goto out;
-        }
-
-        /* Generating branch here as compact branches don't have delay slot */
-        gen_goto_tb(ctx, 1, ctx->btarget);
-        gen_set_label(fs);
-
-        ctx->hflags |= MIPS_HFLAG_FBNSLOT;
-        MIPS_DEBUG("Compact conditional branch");
-    }
-
-out:
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
 static void decode_opc_special_r6(CPUMIPSState *env, DisasContext *ctx)
 {
     int rs, rt, rd, sa;
@@ -16425,18 +17225,7 @@
     op1 = MASK_SPECIAL(ctx->opcode);
     switch (op1) {
     case OPC_LSA:
-        if (rd != 0) {
-            int imm2 = extract32(ctx->opcode, 6, 3);
-            TCGv t0 = tcg_temp_new();
-            TCGv t1 = tcg_temp_new();
-            gen_load_gpr(t0, rs);
-            gen_load_gpr(t1, rt);
-            tcg_gen_shli_tl(t0, t0, imm2 + 1);
-            tcg_gen_add_tl(t0, t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
-            tcg_temp_free(t1);
-            tcg_temp_free(t0);
-        }
+        gen_lsa(ctx, op1, rd, rs, rt, extract32(ctx->opcode, 6, 2));
         break;
     case OPC_MULT ... OPC_DIVU:
         op2 = MASK_R6_MULDIV(ctx->opcode);
@@ -16472,26 +17261,20 @@
         }
         break;
     case R6_OPC_SDBBP:
-        if (ctx->hflags & MIPS_HFLAG_SBRI) {
-            generate_exception(ctx, EXCP_RI);
+        if (is_uhi(extract32(ctx->opcode, 6, 20))) {
+            gen_helper_do_semihosting(cpu_env);
         } else {
-            generate_exception(ctx, EXCP_DBp);
+            if (ctx->hflags & MIPS_HFLAG_SBRI) {
+                generate_exception(ctx, EXCP_RI);
+            } else {
+                generate_exception(ctx, EXCP_DBp);
+            }
         }
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DLSA:
         check_mips_64(ctx);
-        if (rd != 0) {
-            int imm2 = extract32(ctx->opcode, 6, 3);
-            TCGv t0 = tcg_temp_new();
-            TCGv t1 = tcg_temp_new();
-            gen_load_gpr(t0, rs);
-            gen_load_gpr(t1, rt);
-            tcg_gen_shli_tl(t0, t0, imm2 + 1);
-            tcg_gen_add_tl(cpu_gpr[rd], t0, t1);
-            tcg_temp_free(t1);
-            tcg_temp_free(t0);
-        }
+        gen_lsa(ctx, op1, rd, rs, rt, extract32(ctx->opcode, 6, 2));
         break;
     case R6_OPC_DCLO:
     case R6_OPC_DCLZ:
@@ -16845,16 +17628,15 @@
         gen_cl(ctx, op1, rd, rs);
         break;
     case OPC_SDBBP:
-        /* XXX: not clear which exception should be raised
-         *      when in debug mode...
-         */
-        check_insn(ctx, ISA_MIPS32);
-        if (!(ctx->hflags & MIPS_HFLAG_DM)) {
-            generate_exception(ctx, EXCP_DBp);
+        if (is_uhi(extract32(ctx->opcode, 6, 20))) {
+            gen_helper_do_semihosting(cpu_env);
         } else {
+            /* XXX: not clear which exception should be raised
+             *      when in debug mode...
+             */
+            check_insn(ctx, ISA_MIPS32);
             generate_exception(ctx, EXCP_DBp);
         }
-        /* Treat as NOP. */
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DCLO:
@@ -16916,35 +17698,15 @@
                 /* Treat as NOP. */
                 break;
             }
-            TCGv t0 = tcg_temp_new();
-            gen_load_gpr(t0, rt);
-
             op2 = MASK_BSHFL(ctx->opcode);
             switch (op2) {
             case OPC_ALIGN ... OPC_ALIGN_END:
-                sa &= 3;
-                if (sa == 0) {
-                    tcg_gen_mov_tl(cpu_gpr[rd], t0);
-                } else {
-                    TCGv t1 = tcg_temp_new();
-                    TCGv_i64 t2 = tcg_temp_new_i64();
-                    gen_load_gpr(t1, rs);
-                    tcg_gen_concat_tl_i64(t2, t1, t0);
-                    tcg_gen_shri_i64(t2, t2, 8 * (4 - sa));
-#if defined(TARGET_MIPS64)
-                    tcg_gen_ext32s_i64(cpu_gpr[rd], t2);
-#else
-                    tcg_gen_trunc_i64_i32(cpu_gpr[rd], t2);
-#endif
-                    tcg_temp_free_i64(t2);
-                    tcg_temp_free(t1);
-                }
+                gen_align(ctx, OPC_ALIGN, rd, rs, rt, sa & 3);
                 break;
             case OPC_BITSWAP:
-                gen_helper_bitswap(cpu_gpr[rd], t0);
+                gen_bitswap(ctx, op2, rd, rt);
                 break;
             }
-            tcg_temp_free(t0);
         }
         break;
 #if defined(TARGET_MIPS64)
@@ -16961,29 +17723,16 @@
                 /* Treat as NOP. */
                 break;
             }
-            TCGv t0 = tcg_temp_new();
-            gen_load_gpr(t0, rt);
-
             op2 = MASK_DBSHFL(ctx->opcode);
             switch (op2) {
             case OPC_DALIGN ... OPC_DALIGN_END:
-                sa &= 7;
-                if (sa == 0) {
-                    tcg_gen_mov_tl(cpu_gpr[rd], t0);
-                } else {
-                    TCGv t1 = tcg_temp_new();
-                    gen_load_gpr(t1, rs);
-                    tcg_gen_shli_tl(t0, t0, 8 * sa);
-                    tcg_gen_shri_tl(t1, t1, 8 * (8 - sa));
-                    tcg_gen_or_tl(cpu_gpr[rd], t1, t0);
-                    tcg_temp_free(t1);
-                }
+                gen_align(ctx, OPC_DALIGN, rd, rs, rt, sa & 7);
                 break;
             case OPC_DBITSWAP:
-                gen_helper_dbitswap(cpu_gpr[rd], t0);
+                gen_bitswap(ctx, op2, rd, rt);
                 break;
             }
-            tcg_temp_free(t0);
+
         }
         break;
 #endif
@@ -19082,7 +19831,7 @@
             if (ctx->insn_flags & ISA_MIPS32R6) {
                 /* OPC_BC1EQZ */
                 gen_compute_branch1_r6(ctx, MASK_CP1(ctx->opcode),
-                                rt, imm << 2);
+                                       rt, imm << 2, 4);
             } else {
                 /* OPC_BC1ANY2 */
                 check_cop1x(ctx);
@@ -19095,7 +19844,7 @@
             check_cp1_enabled(ctx);
             check_insn(ctx, ISA_MIPS32R6);
             gen_compute_branch1_r6(ctx, MASK_CP1(ctx->opcode),
-                            rt, imm << 2);
+                                   rt, imm << 2, 4);
             break;
         case OPC_BC1ANY4:
             check_cp1_enabled(ctx);
@@ -19110,8 +19859,7 @@
                                 (rt >> 2) & 0x7, imm << 2);
             break;
         case OPC_PS_FMT:
-            check_cp1_enabled(ctx);
-            check_insn_opc_removed(ctx, ISA_MIPS32R6);
+            check_ps(ctx);
             /* fall through */
         case OPC_S_FMT:
         case OPC_D_FMT:
@@ -19372,7 +20120,7 @@
         break;
     case OPC_PCREL:
         check_insn(ctx, ISA_MIPS32R6);
-        gen_pcrel(ctx, rs, imm);
+        gen_pcrel(ctx, ctx->opcode, ctx->pc, rs);
         break;
     default:            /* Invalid */
         MIPS_INVAL("major opcode");
@@ -19420,6 +20168,8 @@
     /* Restore delay slot state from the tb context.  */
     ctx.hflags = (uint32_t)tb->flags; /* FIXME: maybe use 64 bits here? */
     ctx.ulri = (env->CP0_Config3 >> CP0C3_ULRI) & 1;
+    ctx.ps = ((env->active_fpu.fcr0 >> FCR0_PS) & 1) ||
+             (env->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F));
     restore_cpu_state(env, &ctx);
 #ifdef CONFIG_USER_ONLY
         ctx.mem_idx = MIPS_HFLAG_UM;
@@ -19489,6 +20239,12 @@
                    forbidden slot */
                 is_slot = 1;
             }
+            if ((ctx.hflags & MIPS_HFLAG_M16) &&
+                (ctx.hflags & MIPS_HFLAG_FBNSLOT)) {
+                /* Force to generate branch as microMIPS R6 doesn't restrict
+                   branches in the forbidden slot. */
+                is_slot = 1;
+            }
         }
         if (is_slot) {
             gen_branch(&ctx, insn_bytes);
@@ -19927,6 +20683,11 @@
     restore_flush_mode(env);
     restore_pamask(env);
     cs->exception_index = EXCP_NONE;
+
+    if (semihosting_get_argc()) {
+        /* UHI interface can be used to obtain argc and argv */
+        env->active_tc.gpr[4] = -1;
+    }
 }
 
 void restore_state_to_opc(CPUMIPSState *env, TranslationBlock *tb, int pc_pos)
diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c
index 30605da..ddfaff8 100644
--- a/target-mips/translate_init.c
+++ b/target-mips/translate_init.c
@@ -424,6 +424,43 @@
         .insn_flags = CPU_MIPS32R5 | ASE_MIPS16 | ASE_MSA,
         .mmu_type = MMU_TYPE_R4000,
     },
+    {
+        /* A generic CPU supporting MIPS32 Release 6 ISA.
+           FIXME: Support IEEE 754-2008 FP.
+                  Eventually this should be replaced by a real CPU model. */
+        .name = "mips32r6-generic",
+        .CP0_PRid = 0x00010000,
+        .CP0_Config0 = MIPS_CONFIG0 | (0x2 << CP0C0_AR) |
+                       (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (31 << CP0C1_MMU) |
+                       (2 << CP0C1_IS) | (4 << CP0C1_IL) | (3 << CP0C1_IA) |
+                       (2 << CP0C1_DS) | (4 << CP0C1_DL) | (3 << CP0C1_DA) |
+                       (0 << CP0C1_PC) | (1 << CP0C1_WR) | (1 << CP0C1_EP),
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_BP) | (1 << CP0C3_BI) |
+                       (2 << CP0C3_ISA) | (1 << CP0C3_ULRI) |
+                       (1 << CP0C3_RXI) | (1U << CP0C3_M),
+        .CP0_Config4 = MIPS_CONFIG4 | (0xfc << CP0C4_KScrExist) |
+                       (3 << CP0C4_IE) | (1U << CP0C4_M),
+        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_LLB),
+        .CP0_Config5_rw_bitmask = (1 << CP0C5_SBRI) | (1 << CP0C5_FRE) |
+                                  (1 << CP0C5_UFE),
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 0,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x3058FF1F,
+        .CP0_PageGrain = (1 << CP0PG_IEC) | (1 << CP0PG_XIE) |
+                         (1U << CP0PG_RIE),
+        .CP0_PageGrain_rw_bitmask = 0,
+        .CP1_fcr0 = (1 << FCR0_FREP) | (1 << FCR0_F64) | (1 << FCR0_L) |
+                    (1 << FCR0_W) | (1 << FCR0_D) | (1 << FCR0_S) |
+                    (0x00 << FCR0_PRID) | (0x0 << FCR0_REV),
+        .SEGBITS = 32,
+        .PABITS = 32,
+        .insn_flags = CPU_MIPS32R6 | ASE_MICROMIPS,
+        .mmu_type = MMU_TYPE_R4000,
+    },
 #if defined(TARGET_MIPS64)
     {
         .name = "R4000",
diff --git a/target-s390x/gdbstub.c b/target-s390x/gdbstub.c
index ddc14a6..31f2049 100644
--- a/target-s390x/gdbstub.c
+++ b/target-s390x/gdbstub.c
@@ -92,6 +92,7 @@
     switch (n) {
     case S390_A0_REGNUM ... S390_A15_REGNUM:
         env->aregs[n] = ldl_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
         return 4;
     default:
         return 0;
diff --git a/target-tricore/op_helper.c b/target-tricore/op_helper.c
index 10ed541..53edbda 100644
--- a/target-tricore/op_helper.c
+++ b/target-tricore/op_helper.c
@@ -2545,10 +2545,10 @@
     env->PC = (env->gpr_a[11] & ~0x1);
     /* ICR.IE = PCXI.PIE; */
     env->ICR = (env->ICR & ~MASK_ICR_IE) |
-               ((env->PCXI & ~MASK_PCXI_PIE) >> 15);
+               ((env->PCXI & MASK_PCXI_PIE) >> 15);
     /* ICR.CCPN = PCXI.PCPN; */
     env->ICR = (env->ICR & ~MASK_ICR_CCPN) |
-               ((env->PCXI & ~MASK_PCXI_PCPN) >> 24);
+               ((env->PCXI & MASK_PCXI_PCPN) >> 24);
     /* {PCXI, PSW, A[10], A[11]} = M(DCX, 4 * word); */
     env->PCXI = cpu_ldl_data(env, env->DCX);
     psw_write(env, cpu_ldl_data(env, env->DCX+4));
diff --git a/target-xtensa/core-dc232b.c b/target-xtensa/core-dc232b.c
index a3b914b..06826c0 100644
--- a/target-xtensa/core-dc232b.c
+++ b/target-xtensa/core-dc232b.c
@@ -33,7 +33,7 @@
 #include "core-dc232b/core-isa.h"
 #include "overlay_tool.h"
 
-static const XtensaConfig dc232b __attribute__((unused)) = {
+static XtensaConfig dc232b __attribute__((unused)) = {
     .name = "dc232b",
     .gdb_regmap = {
         .num_regs = 120,
diff --git a/target-xtensa/core-dc233c.c b/target-xtensa/core-dc233c.c
index ac745d1..8daf7d9 100644
--- a/target-xtensa/core-dc233c.c
+++ b/target-xtensa/core-dc233c.c
@@ -34,7 +34,7 @@
 #include "core-dc233c/core-isa.h"
 #include "overlay_tool.h"
 
-static const XtensaConfig dc233c __attribute__((unused)) = {
+static XtensaConfig dc233c __attribute__((unused)) = {
     .name = "dc233c",
     .gdb_regmap = {
         .num_regs = 121,
diff --git a/target-xtensa/core-fsf.c b/target-xtensa/core-fsf.c
index cfcc840..f6ea6b9 100644
--- a/target-xtensa/core-fsf.c
+++ b/target-xtensa/core-fsf.c
@@ -33,9 +33,14 @@
 #include "core-fsf/core-isa.h"
 #include "overlay_tool.h"
 
-static const XtensaConfig fsf __attribute__((unused)) = {
+static XtensaConfig fsf __attribute__((unused)) = {
     .name = "fsf",
+    .gdb_regmap = {
     /* GDB for this core is not supported currently */
+        .reg = {
+            XTREG_END
+        },
+    },
     .clock_freq_khz = 10000,
     DEFAULT_SECTIONS
 };
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index dfd0d1c..b89c602 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -287,6 +287,7 @@
     int targno;
     int type;
     int group;
+    unsigned size;
 } XtensaGdbReg;
 
 typedef struct XtensaGdbRegmap {
@@ -336,6 +337,18 @@
     struct XtensaConfigList *next;
 } XtensaConfigList;
 
+#ifdef HOST_WORDS_BIGENDIAN
+enum {
+    FP_F32_HIGH,
+    FP_F32_LOW,
+};
+#else
+enum {
+    FP_F32_LOW,
+    FP_F32_HIGH,
+};
+#endif
+
 typedef struct CPUXtensaState {
     const XtensaConfig *config;
     uint32_t regs[16];
@@ -343,7 +356,10 @@
     uint32_t sregs[256];
     uint32_t uregs[256];
     uint32_t phys_regs[MAX_NAREG];
-    float32 fregs[16];
+    union {
+        float32 f32[2];
+        float64 f64;
+    } fregs[16];
     float_status fp_status;
 
     xtensa_tlb_entry itlb[7][MAX_TLB_WAY_SIZE];
@@ -384,6 +400,7 @@
 void xtensa_translate_init(void);
 void xtensa_breakpoint_handler(CPUState *cs);
 int cpu_xtensa_exec(CPUXtensaState *s);
+void xtensa_finalize_config(XtensaConfig *config);
 void xtensa_register_core(XtensaConfigList *node);
 void check_interrupts(CPUXtensaState *s);
 void xtensa_irq_init(CPUXtensaState *env);
diff --git a/target-xtensa/gdbstub.c b/target-xtensa/gdbstub.c
index 9e13b20..bc2e1b5 100644
--- a/target-xtensa/gdbstub.c
+++ b/target-xtensa/gdbstub.c
@@ -26,6 +26,7 @@
     XtensaCPU *cpu = XTENSA_CPU(cs);
     CPUXtensaState *env = &cpu->env;
     const XtensaGdbReg *reg = env->config->gdb_regmap.reg + n;
+    unsigned i;
 
     if (n < 0 || n >= env->config->gdb_regmap.num_regs) {
         return 0;
@@ -47,8 +48,16 @@
         return gdb_get_reg32(mem_buf, env->uregs[reg->targno & 0xff]);
 
     case 4: /*f*/
-        return gdb_get_reg32(mem_buf, float32_val(env->fregs[reg->targno
-                                                             & 0x0f]));
+        i = reg->targno & 0x0f;
+        switch (reg->size) {
+        case 4:
+            return gdb_get_reg32(mem_buf,
+                                 float32_val(env->fregs[i].f32[FP_F32_LOW]));
+        case 8:
+            return gdb_get_reg64(mem_buf, float64_val(env->fregs[i].f64));
+        default:
+            return 0;
+        }
 
     case 8: /*a*/
         return gdb_get_reg32(mem_buf, env->regs[reg->targno & 0x0f]);
@@ -92,8 +101,16 @@
         break;
 
     case 4: /*f*/
-        env->fregs[reg->targno & 0x0f] = make_float32(tmp);
-        break;
+        switch (reg->size) {
+        case 4:
+            env->fregs[reg->targno & 0x0f].f32[FP_F32_LOW] = make_float32(tmp);
+            return 4;
+        case 8:
+            env->fregs[reg->targno & 0x0f].f64 = make_float64(tmp);
+            return 8;
+        default:
+            return 0;
+        }
 
     case 8: /*a*/
         env->regs[reg->targno & 0x0f] = tmp;
diff --git a/target-xtensa/helper.c b/target-xtensa/helper.c
index d84d259..76be50d 100644
--- a/target-xtensa/helper.c
+++ b/target-xtensa/helper.c
@@ -51,6 +51,20 @@
     cc->gdb_num_core_regs = config->gdb_regmap.num_regs;
 }
 
+void xtensa_finalize_config(XtensaConfig *config)
+{
+    unsigned i, n = 0;
+
+    if (config->gdb_regmap.num_regs) {
+        return;
+    }
+
+    for (i = 0; config->gdb_regmap.reg[i].targno >= 0; ++i) {
+        n += (config->gdb_regmap.reg[i].type != 6);
+    }
+    config->gdb_regmap.num_regs = n;
+}
+
 void xtensa_register_core(XtensaConfigList *node)
 {
     TypeInfo type = {
diff --git a/target-xtensa/import_core.sh b/target-xtensa/import_core.sh
index 73791ec..351bee4 100755
--- a/target-xtensa/import_core.sh
+++ b/target-xtensa/import_core.sh
@@ -22,8 +22,7 @@
 tar -xf "$OVERLAY" -C "$TARGET" --strip-components=1 \
     --xform='s/core/core-isa/' config/core.h
 tar -xf "$OVERLAY" -O gdb/xtensa-config.c | \
-    sed -n '1,/*\//p;/pc/,/a15/p' > "$TARGET"/gdb-config.c
-NUM_REGS=$(grep XTREG "$TARGET"/gdb-config.c | wc -l)
+    sed -n '1,/*\//p;/XTREG/,/XTREG_END/p' > "$TARGET"/gdb-config.c
 
 cat <<EOF > "${TARGET}.c"
 #include "cpu.h"
@@ -34,10 +33,9 @@
 #include "core-$NAME/core-isa.h"
 #include "overlay_tool.h"
 
-static const XtensaConfig $NAME __attribute__((unused)) = {
+static XtensaConfig $NAME __attribute__((unused)) = {
     .name = "$NAME",
     .gdb_regmap = {
-        .num_regs = $NUM_REGS,
         .reg = {
 #include "core-$NAME/gdb-config.c"
         }
diff --git a/target-xtensa/overlay_tool.h b/target-xtensa/overlay_tool.h
index 6105d4c..eda03aa 100644
--- a/target-xtensa/overlay_tool.h
+++ b/target-xtensa/overlay_tool.h
@@ -27,7 +27,8 @@
 
 #define XTREG(idx, ofs, bi, sz, al, no, flags, cp, typ, grp, name, \
         a1, a2, a3, a4, a5, a6) \
-    { .targno = (no), .type = (typ), .group = (grp) },
+    { .targno = (no), .type = (typ), .group = (grp), .size = (sz) },
+#define XTREG_END { .targno = -1 },
 
 #ifndef XCHAL_HAVE_DIV32
 #define XCHAL_HAVE_DIV32 0
@@ -316,6 +317,7 @@
         static XtensaConfigList node = { \
             .config = &core, \
         }; \
+        xtensa_finalize_config(&core); \
         xtensa_register_core(&node); \
     }
 #else
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 86e4849..f2118c2 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -228,7 +228,7 @@
 
     for (i = 0; i < 16; i++) {
         cpu_FR[i] = tcg_global_mem_new_i32(TCG_AREG0,
-                offsetof(CPUXtensaState, fregs[i]),
+                offsetof(CPUXtensaState, fregs[i].f32[FP_F32_LOW]),
                 fregnames[i]);
     }
 
@@ -3206,8 +3206,9 @@
 
         for (i = 0; i < 16; ++i) {
             cpu_fprintf(f, "F%02d=%08x (%+10.8e)%c", i,
-                    float32_val(env->fregs[i]),
-                    *(float *)&env->fregs[i], (i % 2) == 1 ? '\n' : ' ');
+                    float32_val(env->fregs[i].f32[FP_F32_LOW]),
+                    *(float *)(env->fregs[i].f32 + FP_F32_LOW),
+                    (i % 2) == 1 ? '\n' : ' ');
         }
     }
 }
diff --git a/tests/ahci-test.c b/tests/ahci-test.c
index ae9415d..87d7691 100644
--- a/tests/ahci-test.c
+++ b/tests/ahci-test.c
@@ -228,6 +228,8 @@
 {
     AHCIQState *ahci;
     va_list ap;
+    uint16_t buff[256];
+    uint8_t port;
 
     if (cli) {
         va_start(ap, cli);
@@ -239,6 +241,10 @@
 
     ahci_pci_enable(ahci);
     ahci_hba_enable(ahci);
+    /* Initialize test device */
+    port = ahci_port_select(ahci);
+    ahci_port_clear(ahci, port);
+    ahci_io(ahci, port, CMD_IDENTIFY, &buff, sizeof(buff), 0);
 
     return ahci;
 }
@@ -890,21 +896,23 @@
     g_free(rx);
 }
 
-static void ahci_test_nondata(AHCIQState *ahci, uint8_t ide_cmd)
+static uint8_t ahci_test_nondata(AHCIQState *ahci, uint8_t ide_cmd)
 {
-    uint8_t px;
+    uint8_t port;
     AHCICommand *cmd;
 
     /* Sanitize */
-    px = ahci_port_select(ahci);
-    ahci_port_clear(ahci, px);
+    port = ahci_port_select(ahci);
+    ahci_port_clear(ahci, port);
 
     /* Issue Command */
     cmd = ahci_command_create(ide_cmd);
-    ahci_command_commit(ahci, cmd, px);
+    ahci_command_commit(ahci, cmd, port);
     ahci_command_issue(ahci, cmd);
     ahci_command_verify(ahci, cmd);
     ahci_command_free(cmd);
+
+    return port;
 }
 
 static void ahci_test_flush(AHCIQState *ahci)
@@ -912,6 +920,33 @@
     ahci_test_nondata(ahci, CMD_FLUSH_CACHE);
 }
 
+static void ahci_test_max(AHCIQState *ahci)
+{
+    RegD2HFIS *d2h = g_malloc0(0x20);
+    uint64_t nsect;
+    uint8_t port;
+    uint8_t cmd;
+    uint64_t config_sect = TEST_IMAGE_SECTORS - 1;
+
+    if (config_sect > 0xFFFFFF) {
+        cmd = CMD_READ_MAX_EXT;
+    } else {
+        cmd = CMD_READ_MAX;
+    }
+
+    port = ahci_test_nondata(ahci, cmd);
+    memread(ahci->port[port].fb + 0x40, d2h, 0x20);
+    nsect = (uint64_t)d2h->lba_hi[2] << 40 |
+        (uint64_t)d2h->lba_hi[1] << 32 |
+        (uint64_t)d2h->lba_hi[0] << 24 |
+        (uint64_t)d2h->lba_lo[2] << 16 |
+        (uint64_t)d2h->lba_lo[1] << 8 |
+        (uint64_t)d2h->lba_lo[0];
+
+    g_assert_cmphex(nsect, ==, config_sect);
+    g_free(d2h);
+}
+
 
 /******************************************************************************/
 /* Test Interfaces                                                            */
@@ -1111,9 +1146,9 @@
 }
 
 /**
- * DMA Migration test: Write a pattern, migrate, then read.
+ * Simple migration test: Write a pattern, migrate, then read.
  */
-static void test_migrate_dma(void)
+static void ahci_migrate_simple(uint8_t cmd_read, uint8_t cmd_write)
 {
     AHCIQState *src, *dst;
     uint8_t px;
@@ -1141,9 +1176,9 @@
     }
 
     /* Write, migrate, then read. */
-    ahci_io(src, px, CMD_WRITE_DMA, tx, bufsize, 0);
+    ahci_io(src, px, cmd_write, tx, bufsize, 0);
     ahci_migrate(src, dst, uri);
-    ahci_io(dst, px, CMD_READ_DMA, rx, bufsize, 0);
+    ahci_io(dst, px, cmd_read, rx, bufsize, 0);
 
     /* Verify pattern */
     g_assert_cmphex(memcmp(tx, rx, bufsize), ==, 0);
@@ -1154,14 +1189,24 @@
     g_free(tx);
 }
 
+static void test_migrate_dma(void)
+{
+    ahci_migrate_simple(CMD_READ_DMA, CMD_WRITE_DMA);
+}
+
+static void test_migrate_ncq(void)
+{
+    ahci_migrate_simple(READ_FPDMA_QUEUED, WRITE_FPDMA_QUEUED);
+}
+
 /**
- * DMA Error Test
+ * Halted IO Error Test
  *
  * Simulate an error on first write, Try to write a pattern,
  * Confirm the VM has stopped, resume the VM, verify command
  * has completed, then read back the data and verify.
  */
-static void test_halted_dma(void)
+static void ahci_halted_io_test(uint8_t cmd_read, uint8_t cmd_write)
 {
     AHCIQState *ahci;
     uint8_t port;
@@ -1196,7 +1241,7 @@
     memwrite(ptr, tx, bufsize);
 
     /* Attempt to write (and fail) */
-    cmd = ahci_guest_io_halt(ahci, port, CMD_WRITE_DMA,
+    cmd = ahci_guest_io_halt(ahci, port, cmd_write,
                              ptr, bufsize, 0);
 
     /* Attempt to resume the command */
@@ -1204,7 +1249,7 @@
     ahci_free(ahci, ptr);
 
     /* Read back and verify */
-    ahci_io(ahci, port, CMD_READ_DMA, rx, bufsize, 0);
+    ahci_io(ahci, port, cmd_read, rx, bufsize, 0);
     g_assert_cmphex(memcmp(tx, rx, bufsize), ==, 0);
 
     /* Cleanup and go home */
@@ -1213,14 +1258,24 @@
     g_free(tx);
 }
 
+static void test_halted_dma(void)
+{
+    ahci_halted_io_test(CMD_READ_DMA, CMD_WRITE_DMA);
+}
+
+static void test_halted_ncq(void)
+{
+    ahci_halted_io_test(READ_FPDMA_QUEUED, WRITE_FPDMA_QUEUED);
+}
+
 /**
- * DMA Error Migration Test
+ * IO Error Migration Test
  *
  * Simulate an error on first write, Try to write a pattern,
  * Confirm the VM has stopped, migrate, resume the VM,
  * verify command has completed, then read back the data and verify.
  */
-static void test_migrate_halted_dma(void)
+static void ahci_migrate_halted_io(uint8_t cmd_read, uint8_t cmd_write)
 {
     AHCIQState *src, *dst;
     uint8_t port;
@@ -1266,14 +1321,14 @@
     memwrite(ptr, tx, bufsize);
 
     /* Write, trigger the VM to stop, migrate, then resume. */
-    cmd = ahci_guest_io_halt(src, port, CMD_WRITE_DMA,
+    cmd = ahci_guest_io_halt(src, port, cmd_write,
                              ptr, bufsize, 0);
     ahci_migrate(src, dst, uri);
     ahci_guest_io_resume(dst, cmd);
     ahci_free(dst, ptr);
 
     /* Read back */
-    ahci_io(dst, port, CMD_READ_DMA, rx, bufsize, 0);
+    ahci_io(dst, port, cmd_read, rx, bufsize, 0);
 
     /* Verify TX and RX are identical */
     g_assert_cmphex(memcmp(tx, rx, bufsize), ==, 0);
@@ -1285,6 +1340,16 @@
     g_free(tx);
 }
 
+static void test_migrate_halted_dma(void)
+{
+    ahci_migrate_halted_io(CMD_READ_DMA, CMD_WRITE_DMA);
+}
+
+static void test_migrate_halted_ncq(void)
+{
+    ahci_migrate_halted_io(READ_FPDMA_QUEUED, WRITE_FPDMA_QUEUED);
+}
+
 /**
  * Migration test: Try to flush, migrate, then resume.
  */
@@ -1334,6 +1399,49 @@
     ahci_shutdown(dst);
 }
 
+static void test_max(void)
+{
+    AHCIQState *ahci;
+
+    ahci = ahci_boot_and_enable(NULL);
+    ahci_test_max(ahci);
+    ahci_shutdown(ahci);
+}
+
+static void test_reset(void)
+{
+    AHCIQState *ahci;
+    int i;
+
+    ahci = ahci_boot(NULL);
+    ahci_test_pci_spec(ahci);
+    ahci_pci_enable(ahci);
+
+    for (i = 0; i < 2; i++) {
+        ahci_test_hba_spec(ahci);
+        ahci_hba_enable(ahci);
+        ahci_test_identify(ahci);
+        ahci_test_io_rw_simple(ahci, 4096, 0,
+                               CMD_READ_DMA_EXT,
+                               CMD_WRITE_DMA_EXT);
+        ahci_set(ahci, AHCI_GHC, AHCI_GHC_HR);
+        ahci_clean_mem(ahci);
+    }
+
+    ahci_shutdown(ahci);
+}
+
+static void test_ncq_simple(void)
+{
+    AHCIQState *ahci;
+
+    ahci = ahci_boot_and_enable(NULL);
+    ahci_test_io_rw_simple(ahci, 4096, 0,
+                           READ_FPDMA_QUEUED,
+                           WRITE_FPDMA_QUEUED);
+    ahci_shutdown(ahci);
+}
+
 /******************************************************************************/
 /* AHCI I/O Test Matrix Definitions                                           */
 
@@ -1584,6 +1692,14 @@
     qtest_add_func("/ahci/io/dma/lba28/retry", test_halted_dma);
     qtest_add_func("/ahci/migrate/dma/halted", test_migrate_halted_dma);
 
+    qtest_add_func("/ahci/max", test_max);
+    qtest_add_func("/ahci/reset", test_reset);
+
+    qtest_add_func("/ahci/io/ncq/simple", test_ncq_simple);
+    qtest_add_func("/ahci/migrate/ncq/simple", test_migrate_ncq);
+    qtest_add_func("/ahci/io/ncq/retry", test_halted_ncq);
+    qtest_add_func("/ahci/migrate/ncq/halted", test_migrate_halted_ncq);
+
     ret = g_test_run();
 
     /* Cleanup */
diff --git a/tests/libqos/ahci.c b/tests/libqos/ahci.c
index 7e17bb6..33ecd2a 100644
--- a/tests/libqos/ahci.c
+++ b/tests/libqos/ahci.c
@@ -50,27 +50,47 @@
 } AHCICommandProp;
 
 AHCICommandProp ahci_command_properties[] = {
-    { .cmd = CMD_READ_PIO,      .data = true,  .pio = true,
-                                .lba28 = true, .read = true },
-    { .cmd = CMD_WRITE_PIO,     .data = true,  .pio = true,
-                                .lba28 = true, .write = true },
-    { .cmd = CMD_READ_PIO_EXT,  .data = true,  .pio = true,
-                                .lba48 = true, .read = true },
-    { .cmd = CMD_WRITE_PIO_EXT, .data = true,  .pio = true,
-                                .lba48 = true, .write = true },
-    { .cmd = CMD_READ_DMA,      .data = true,  .dma = true,
-                                .lba28 = true, .read = true },
-    { .cmd = CMD_WRITE_DMA,     .data = true,  .dma = true,
-                                .lba28 = true, .write = true },
-    { .cmd = CMD_READ_DMA_EXT,  .data = true,  .dma = true,
-                                .lba48 = true, .read = true },
-    { .cmd = CMD_WRITE_DMA_EXT, .data = true,  .dma = true,
-                                .lba48 = true, .write = true },
-    { .cmd = CMD_IDENTIFY,      .data = true,  .pio = true,
-                                .size = 512,   .read = true },
-    { .cmd = CMD_READ_MAX,      .lba28 = true },
-    { .cmd = CMD_READ_MAX_EXT,  .lba48 = true },
-    { .cmd = CMD_FLUSH_CACHE,   .data = false }
+    { .cmd = CMD_READ_PIO,       .data = true,  .pio = true,
+                                 .lba28 = true, .read = true },
+    { .cmd = CMD_WRITE_PIO,      .data = true,  .pio = true,
+                                 .lba28 = true, .write = true },
+    { .cmd = CMD_READ_PIO_EXT,   .data = true,  .pio = true,
+                                 .lba48 = true, .read = true },
+    { .cmd = CMD_WRITE_PIO_EXT,  .data = true,  .pio = true,
+                                 .lba48 = true, .write = true },
+    { .cmd = CMD_READ_DMA,       .data = true,  .dma = true,
+                                 .lba28 = true, .read = true },
+    { .cmd = CMD_WRITE_DMA,      .data = true,  .dma = true,
+                                 .lba28 = true, .write = true },
+    { .cmd = CMD_READ_DMA_EXT,   .data = true,  .dma = true,
+                                 .lba48 = true, .read = true },
+    { .cmd = CMD_WRITE_DMA_EXT,  .data = true,  .dma = true,
+                                 .lba48 = true, .write = true },
+    { .cmd = CMD_IDENTIFY,       .data = true,  .pio = true,
+                                 .size = 512,   .read = true },
+    { .cmd = READ_FPDMA_QUEUED,  .data = true,  .dma = true,
+                                 .lba48 = true, .read = true, .ncq = true },
+    { .cmd = WRITE_FPDMA_QUEUED, .data = true,  .dma = true,
+                                 .lba48 = true, .write = true, .ncq = true },
+    { .cmd = CMD_READ_MAX,       .lba28 = true },
+    { .cmd = CMD_READ_MAX_EXT,   .lba48 = true },
+    { .cmd = CMD_FLUSH_CACHE,    .data = false }
+};
+
+struct AHCICommand {
+    /* Test Management Data */
+    uint8_t name;
+    uint8_t port;
+    uint8_t slot;
+    uint32_t interrupts;
+    uint64_t xbytes;
+    uint32_t prd_size;
+    uint64_t buffer;
+    AHCICommandProp *props;
+    /* Data to be transferred to the guest */
+    AHCICommandHeader header;
+    RegH2DFIS fis;
+    void *atapi_cmd;
 };
 
 /**
@@ -138,12 +158,14 @@
     for (port = 0; port < 32; ++port) {
         if (ahci->port[port].fb) {
             ahci_free(ahci, ahci->port[port].fb);
+            ahci->port[port].fb = 0;
         }
         if (ahci->port[port].clb) {
             for (slot = 0; slot < 32; slot++) {
                 ahci_destroy_command(ahci, port, slot);
             }
             ahci_free(ahci, ahci->port[port].clb);
+            ahci->port[port].clb = 0;
         }
     }
 }
@@ -252,7 +274,7 @@
         /* Allocate Memory for the Command List Buffer & FIS Buffer */
         /* PxCLB space ... 0x20 per command, as in 4.2.2 p 36 */
         ahci->port[i].clb = ahci_alloc(ahci, num_cmd_slots * 0x20);
-        qmemset(ahci->port[i].clb, 0x00, 0x100);
+        qmemset(ahci->port[i].clb, 0x00, num_cmd_slots * 0x20);
         g_test_message("CLB: 0x%08" PRIx64, ahci->port[i].clb);
         ahci_px_wreg(ahci, i, AHCI_PX_CLB, ahci->port[i].clb);
         g_assert_cmphex(ahci->port[i].clb, ==,
@@ -460,13 +482,15 @@
     g_free(pio);
 }
 
-void ahci_port_check_cmd_sanity(AHCIQState *ahci, uint8_t port,
-                                uint8_t slot, size_t buffsize)
+void ahci_port_check_cmd_sanity(AHCIQState *ahci, AHCICommand *cmd)
 {
-    AHCICommandHeader cmd;
+    AHCICommandHeader cmdh;
 
-    ahci_get_command_header(ahci, port, slot, &cmd);
-    g_assert_cmphex(buffsize, ==, cmd.prdbc);
+    ahci_get_command_header(ahci, cmd->port, cmd->slot, &cmdh);
+    /* Physical Region Descriptor Byte Count is not required to work for NCQ. */
+    if (!cmd->props->ncq) {
+        g_assert_cmphex(cmd->xbytes, ==, cmdh.prdbc);
+    }
 }
 
 /* Get the command in #slot of port #port. */
@@ -549,7 +573,7 @@
         if (reg & (1 << j)) {
             continue;
         }
-        ahci_destroy_command(ahci, port, i);
+        ahci_destroy_command(ahci, port, j);
         ahci->port[port].next = (j + 1) % 32;
         return j;
     }
@@ -610,22 +634,6 @@
     ahci_command_free(cmd);
 }
 
-struct AHCICommand {
-    /* Test Management Data */
-    uint8_t name;
-    uint8_t port;
-    uint8_t slot;
-    uint32_t interrupts;
-    uint64_t xbytes;
-    uint32_t prd_size;
-    uint64_t buffer;
-    AHCICommandProp *props;
-    /* Data to be transferred to the guest */
-    AHCICommandHeader header;
-    RegH2DFIS fis;
-    void *atapi_cmd;
-};
-
 static AHCICommandProp *ahci_command_find(uint8_t command_name)
 {
     int i;
@@ -691,19 +699,34 @@
 static void command_table_init(AHCICommand *cmd)
 {
     RegH2DFIS *fis = &(cmd->fis);
+    uint16_t sect_count = (cmd->xbytes / AHCI_SECTOR_SIZE);
 
     fis->fis_type = REG_H2D_FIS;
     fis->flags = REG_H2D_FIS_CMD; /* "Command" bit */
     fis->command = cmd->name;
-    cmd->fis.feature_low = 0x00;
-    cmd->fis.feature_high = 0x00;
-    if (cmd->props->lba28 || cmd->props->lba48) {
-        cmd->fis.device = ATA_DEVICE_LBA;
+
+    if (cmd->props->ncq) {
+        NCQFIS *ncqfis = (NCQFIS *)fis;
+        /* NCQ is weird and re-uses FIS frames for unrelated data.
+         * See SATA 3.2, 13.6.4.1 READ FPDMA QUEUED for an example. */
+        ncqfis->sector_low = sect_count & 0xFF;
+        ncqfis->sector_hi = (sect_count >> 8) & 0xFF;
+        ncqfis->device = NCQ_DEVICE_MAGIC;
+        /* Force Unit Access is bit 7 in the device register */
+        ncqfis->tag = 0;  /* bits 3-7 are the NCQ tag */
+        ncqfis->prio = 0; /* bits 6,7 are a prio tag */
+        /* RARC bit is bit 0 of TAG field */
+    } else {
+        fis->feature_low = 0x00;
+        fis->feature_high = 0x00;
+        if (cmd->props->lba28 || cmd->props->lba48) {
+            fis->device = ATA_DEVICE_LBA;
+        }
+        fis->count = (cmd->xbytes / AHCI_SECTOR_SIZE);
     }
-    cmd->fis.count = (cmd->xbytes / AHCI_SECTOR_SIZE);
-    cmd->fis.icc = 0x00;
-    cmd->fis.control = 0x00;
-    memset(cmd->fis.aux, 0x00, ARRAY_SIZE(cmd->fis.aux));
+    fis->icc = 0x00;
+    fis->control = 0x00;
+    memset(fis->aux, 0x00, ARRAY_SIZE(fis->aux));
 }
 
 AHCICommand *ahci_command_create(uint8_t command_name)
@@ -717,6 +740,7 @@
     g_assert(!(props->lba28 && props->lba48));
     g_assert(!(props->read && props->write));
     g_assert(!props->size || props->data);
+    g_assert(!props->ncq || (props->ncq && props->lba48));
 
     /* Defaults and book-keeping */
     cmd->props = props;
@@ -725,12 +749,15 @@
     cmd->prd_size = 4096;
     cmd->buffer = 0xabad1dea;
 
-    cmd->interrupts = AHCI_PX_IS_DHRS;
+    if (!cmd->props->ncq) {
+        cmd->interrupts = AHCI_PX_IS_DHRS;
+    }
     /* BUG: We expect the DPS interrupt for data commands */
     /* cmd->interrupts |= props->data ? AHCI_PX_IS_DPS : 0; */
     /* BUG: We expect the DMA Setup interrupt for DMA commands */
     /* cmd->interrupts |= props->dma ? AHCI_PX_IS_DSS : 0; */
     cmd->interrupts |= props->pio ? AHCI_PX_IS_PSS : 0;
+    cmd->interrupts |= props->ncq ? AHCI_PX_IS_SDBS : 0;
 
     command_header_init(cmd);
     command_table_init(cmd);
@@ -758,7 +785,7 @@
     RegH2DFIS *fis = &(cmd->fis);
     if (cmd->props->lba28) {
         g_assert_cmphex(lba_sect, <=, 0xFFFFFFF);
-    } else if (cmd->props->lba48) {
+    } else if (cmd->props->lba48 || cmd->props->ncq) {
         g_assert_cmphex(lba_sect, <=, 0xFFFFFFFFFFFF);
     } else {
         /* Can't set offset if we don't know the format. */
@@ -785,6 +812,8 @@
 void ahci_command_set_sizes(AHCICommand *cmd, uint64_t xbytes,
                             unsigned prd_size)
 {
+    uint16_t sect_count;
+
     /* Each PRD can describe up to 4MiB, and must not be odd. */
     g_assert_cmphex(prd_size, <=, 4096 * 1024);
     g_assert_cmphex(prd_size & 0x01, ==, 0x00);
@@ -792,7 +821,15 @@
         cmd->prd_size = prd_size;
     }
     cmd->xbytes = xbytes;
-    cmd->fis.count = (cmd->xbytes / AHCI_SECTOR_SIZE);
+    sect_count = (cmd->xbytes / AHCI_SECTOR_SIZE);
+
+    if (cmd->props->ncq) {
+        NCQFIS *nfis = (NCQFIS *)&(cmd->fis);
+        nfis->sector_low = sect_count & 0xFF;
+        nfis->sector_hi = (sect_count >> 8) & 0xFF;
+    } else {
+        cmd->fis.count = sect_count;
+    }
     cmd->header.prdtl = size_to_prdtl(cmd->xbytes, cmd->prd_size);
 }
 
@@ -824,6 +861,11 @@
     cmd->port = port;
     cmd->slot = ahci_pick_cmd(ahci, port);
 
+    if (cmd->props->ncq) {
+        NCQFIS *nfis = (NCQFIS *)&cmd->fis;
+        nfis->tag = (cmd->slot << 3) & 0xFC;
+    }
+
     /* Create a buffer for the command table */
     prdtl = size_to_prdtl(cmd->xbytes, cmd->prd_size);
     table_size = CMD_TBL_SIZ(prdtl);
@@ -878,11 +920,15 @@
     /* We can't rely on STS_BSY until the command has started processing.
      * Therefore, we also use the Command Issue bit as indication of
      * a command in-flight. */
-    while (BITSET(ahci_px_rreg(ahci, cmd->port, AHCI_PX_TFD),
-                  AHCI_PX_TFD_STS_BSY) ||
-           BITSET(ahci_px_rreg(ahci, cmd->port, AHCI_PX_CI), (1 << cmd->slot))) {
+
+#define RSET(REG, MASK) (BITSET(ahci_px_rreg(ahci, cmd->port, (REG)), (MASK)))
+
+    while (RSET(AHCI_PX_TFD, AHCI_PX_TFD_STS_BSY) ||
+           RSET(AHCI_PX_CI, 1 << cmd->slot) ||
+           (cmd->props->ncq && RSET(AHCI_PX_SACT, 1 << cmd->slot))) {
         usleep(50);
     }
+
 }
 
 void ahci_command_issue(AHCIQState *ahci, AHCICommand *cmd)
@@ -899,8 +945,10 @@
     ahci_port_check_error(ahci, port);
     ahci_port_check_interrupts(ahci, port, cmd->interrupts);
     ahci_port_check_nonbusy(ahci, port, slot);
-    ahci_port_check_cmd_sanity(ahci, port, slot, cmd->xbytes);
-    ahci_port_check_d2h_sanity(ahci, port, slot);
+    ahci_port_check_cmd_sanity(ahci, cmd);
+    if (cmd->interrupts & AHCI_PX_IS_DHRS) {
+        ahci_port_check_d2h_sanity(ahci, port, slot);
+    }
     if (cmd->props->pio) {
         ahci_port_check_pio_sanity(ahci, port, slot, cmd->xbytes);
     }
diff --git a/tests/libqos/ahci.h b/tests/libqos/ahci.h
index 779e812..a08a9dd 100644
--- a/tests/libqos/ahci.h
+++ b/tests/libqos/ahci.h
@@ -263,20 +263,23 @@
 /* ATA Commands */
 enum {
     /* DMA */
-    CMD_READ_DMA      = 0xC8,
-    CMD_READ_DMA_EXT  = 0x25,
-    CMD_WRITE_DMA     = 0xCA,
-    CMD_WRITE_DMA_EXT = 0x35,
+    CMD_READ_DMA       = 0xC8,
+    CMD_READ_DMA_EXT   = 0x25,
+    CMD_WRITE_DMA      = 0xCA,
+    CMD_WRITE_DMA_EXT  = 0x35,
     /* PIO */
-    CMD_READ_PIO      = 0x20,
-    CMD_READ_PIO_EXT  = 0x24,
-    CMD_WRITE_PIO     = 0x30,
-    CMD_WRITE_PIO_EXT = 0x34,
+    CMD_READ_PIO       = 0x20,
+    CMD_READ_PIO_EXT   = 0x24,
+    CMD_WRITE_PIO      = 0x30,
+    CMD_WRITE_PIO_EXT  = 0x34,
     /* Misc */
-    CMD_READ_MAX      = 0xF8,
-    CMD_READ_MAX_EXT  = 0x27,
-    CMD_FLUSH_CACHE   = 0xE7,
-    CMD_IDENTIFY      = 0xEC
+    CMD_READ_MAX       = 0xF8,
+    CMD_READ_MAX_EXT   = 0x27,
+    CMD_FLUSH_CACHE    = 0xE7,
+    CMD_IDENTIFY       = 0xEC,
+    /* NCQ */
+    READ_FPDMA_QUEUED  = 0x60,
+    WRITE_FPDMA_QUEUED = 0x61,
 };
 
 /* AHCI Command Header Flags & Masks*/
@@ -291,8 +294,9 @@
 #define CMDH_PMP      (0xF000)
 
 /* ATA device register masks */
-#define ATA_DEVICE_MAGIC 0xA0
+#define ATA_DEVICE_MAGIC 0xA0 /* used in ata1-3 */
 #define ATA_DEVICE_LBA   0x40
+#define NCQ_DEVICE_MAGIC 0x40 /* for ncq device registers */
 #define ATA_DEVICE_DRIVE 0x10
 #define ATA_DEVICE_HEAD  0x0F
 
@@ -397,6 +401,32 @@
 } __attribute__((__packed__)) RegH2DFIS;
 
 /**
+ * Register host-to-device FIS structure, for NCQ commands.
+ * Actually just a RegH2DFIS, but with fields repurposed.
+ * Repurposed fields are annotated below.
+ */
+typedef struct NCQFIS {
+    /* DW0 */
+    uint8_t fis_type;
+    uint8_t flags;
+    uint8_t command;
+    uint8_t sector_low; /* H2D: Feature 7:0 */
+    /* DW1 */
+    uint8_t lba_lo[3];
+    uint8_t device;
+    /* DW2 */
+    uint8_t lba_hi[3];
+    uint8_t sector_hi; /* H2D: Feature 15:8 */
+    /* DW3 */
+    uint8_t tag;       /* H2D: Count 0:7 */
+    uint8_t prio;      /* H2D: Count 15:8 */
+    uint8_t icc;
+    uint8_t control;
+    /* DW4 */
+    uint8_t aux[4];
+} __attribute__((__packed__)) NCQFIS;
+
+/**
  * Command List entry structure.
  * The command list contains between 1-32 of these structures.
  */
@@ -512,8 +542,7 @@
 void ahci_port_check_d2h_sanity(AHCIQState *ahci, uint8_t port, uint8_t slot);
 void ahci_port_check_pio_sanity(AHCIQState *ahci, uint8_t port,
                                 uint8_t slot, size_t buffsize);
-void ahci_port_check_cmd_sanity(AHCIQState *ahci, uint8_t port,
-                                uint8_t slot, size_t buffsize);
+void ahci_port_check_cmd_sanity(AHCIQState *ahci, AHCICommand *cmd);
 void ahci_get_command_header(AHCIQState *ahci, uint8_t port,
                              uint8_t slot, AHCICommandHeader *cmd);
 void ahci_set_command_header(AHCIQState *ahci, uint8_t port,
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index 59a8f73..3d46ed7 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -34,38 +34,8 @@
 quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img')
 quorum_snapshot_file = os.path.join(iotests.test_dir, 'quorum_snapshot.img')
 
-class ImageMirroringTestCase(iotests.QMPTestCase):
-    '''Abstract base class for image mirroring test cases'''
 
-    def wait_ready(self, drive='drive0'):
-        '''Wait until a block job BLOCK_JOB_READY event'''
-        ready = False
-        while not ready:
-            for event in self.vm.get_qmp_events(wait=True):
-                if event['event'] == 'BLOCK_JOB_READY':
-                    self.assert_qmp(event, 'data/type', 'mirror')
-                    self.assert_qmp(event, 'data/device', drive)
-                    ready = True
-
-    def wait_ready_and_cancel(self, drive='drive0'):
-        self.wait_ready(drive=drive)
-        event = self.cancel_and_wait(drive=drive)
-        self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED')
-        self.assert_qmp(event, 'data/type', 'mirror')
-        self.assert_qmp(event, 'data/offset', event['data']['len'])
-
-    def complete_and_wait(self, drive='drive0', wait_ready=True):
-        '''Complete a block job and wait for it to finish'''
-        if wait_ready:
-            self.wait_ready(drive=drive)
-
-        result = self.vm.qmp('block-job-complete', device=drive)
-        self.assert_qmp(result, 'return', {})
-
-        event = self.wait_until_completed(drive=drive)
-        self.assert_qmp(event, 'data/type', 'mirror')
-
-class TestSingleDrive(ImageMirroringTestCase):
+class TestSingleDrive(iotests.QMPTestCase):
     image_len = 1 * 1024 * 1024 # MB
 
     def setUp(self):
@@ -221,17 +191,9 @@
     test_small_buffer2 = None
     test_large_cluster = None
 
-class TestMirrorNoBacking(ImageMirroringTestCase):
+class TestMirrorNoBacking(iotests.QMPTestCase):
     image_len = 2 * 1024 * 1024 # MB
 
-    def complete_and_wait(self, drive='drive0', wait_ready=True):
-        iotests.create_image(target_backing_img, TestMirrorNoBacking.image_len)
-        return ImageMirroringTestCase.complete_and_wait(self, drive, wait_ready)
-
-    def compare_images(self, img1, img2):
-        iotests.create_image(target_backing_img, TestMirrorNoBacking.image_len)
-        return iotests.compare_images(img1, img2)
-
     def setUp(self):
         iotests.create_image(backing_img, TestMirrorNoBacking.image_len)
         qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img)
@@ -242,7 +204,10 @@
         self.vm.shutdown()
         os.remove(test_img)
         os.remove(backing_img)
-        os.remove(target_backing_img)
+        try:
+            os.remove(target_backing_img)
+        except:
+            pass
         os.remove(target_img)
 
     def test_complete(self):
@@ -257,7 +222,7 @@
         result = self.vm.qmp('query-block')
         self.assert_qmp(result, 'return[0]/inserted/file', target_img)
         self.vm.shutdown()
-        self.assertTrue(self.compare_images(test_img, target_img),
+        self.assertTrue(iotests.compare_images(test_img, target_img),
                         'target image does not match source after mirroring')
 
     def test_cancel(self):
@@ -272,7 +237,7 @@
         result = self.vm.qmp('query-block')
         self.assert_qmp(result, 'return[0]/inserted/file', test_img)
         self.vm.shutdown()
-        self.assertTrue(self.compare_images(test_img, target_img),
+        self.assertTrue(iotests.compare_images(test_img, target_img),
                         'target image does not match source after mirroring')
 
     def test_large_cluster(self):
@@ -283,7 +248,6 @@
                         %(TestMirrorNoBacking.image_len), target_backing_img)
         qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s'
                         % (TestMirrorNoBacking.image_len, target_backing_img), target_img)
-        os.remove(target_backing_img)
 
         result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
                              mode='existing', target=target_img)
@@ -293,10 +257,10 @@
         result = self.vm.qmp('query-block')
         self.assert_qmp(result, 'return[0]/inserted/file', target_img)
         self.vm.shutdown()
-        self.assertTrue(self.compare_images(test_img, target_img),
+        self.assertTrue(iotests.compare_images(test_img, target_img),
                         'target image does not match source after mirroring')
 
-class TestMirrorResized(ImageMirroringTestCase):
+class TestMirrorResized(iotests.QMPTestCase):
     backing_len = 1 * 1024 * 1024 # MB
     image_len = 2 * 1024 * 1024 # MB
 
@@ -344,7 +308,7 @@
         self.assertTrue(iotests.compare_images(test_img, target_img),
                         'target image does not match source after mirroring')
 
-class TestReadErrors(ImageMirroringTestCase):
+class TestReadErrors(iotests.QMPTestCase):
     image_len = 2 * 1024 * 1024 # MB
 
     # this should be a multiple of twice the default granularity
@@ -498,7 +462,7 @@
         self.assert_no_active_block_jobs()
         self.vm.shutdown()
 
-class TestWriteErrors(ImageMirroringTestCase):
+class TestWriteErrors(iotests.QMPTestCase):
     image_len = 2 * 1024 * 1024 # MB
 
     # this should be a multiple of twice the default granularity
@@ -624,7 +588,7 @@
         self.assert_no_active_block_jobs()
         self.vm.shutdown()
 
-class TestSetSpeed(ImageMirroringTestCase):
+class TestSetSpeed(iotests.QMPTestCase):
     image_len = 80 * 1024 * 1024 # MB
 
     def setUp(self):
@@ -690,7 +654,7 @@
 
         self.wait_ready_and_cancel()
 
-class TestUnbackedSource(ImageMirroringTestCase):
+class TestUnbackedSource(iotests.QMPTestCase):
     image_len = 2 * 1024 * 1024 # MB
 
     def setUp(self):
@@ -731,7 +695,7 @@
         self.complete_and_wait()
         self.assert_no_active_block_jobs()
 
-class TestRepairQuorum(ImageMirroringTestCase):
+class TestRepairQuorum(iotests.QMPTestCase):
     """ This class test quorum file repair using drive-mirror.
         It's mostly a fork of TestSingleDrive """
     image_len = 1 * 1024 * 1024 # MB
diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124
index 8abce2f..9ccd118 100644
--- a/tests/qemu-iotests/124
+++ b/tests/qemu-iotests/124
@@ -187,7 +187,7 @@
 
         target = self.prepare_backup(bitmap, parent)
         res = self.do_qmp_backup(device=bitmap.drive['id'],
-                                 sync='dirty-bitmap', bitmap=bitmap.name,
+                                 sync='incremental', bitmap=bitmap.name,
                                  format=bitmap.drive['fmt'], target=target,
                                  mode='existing')
         if not res:
@@ -325,7 +325,7 @@
         self.assert_no_active_block_jobs()
         self.files.append(self.err_img)
         result = self.vm.qmp('drive-backup', device=self.drives[0]['id'],
-                             sync='dirty-bitmap', format=self.drives[0]['fmt'],
+                             sync='incremental', format=self.drives[0]['fmt'],
                              target=self.err_img)
         self.assert_qmp(result, 'error/class', 'GenericError')
 
@@ -334,7 +334,7 @@
         self.assert_no_active_block_jobs()
         self.files.append(self.err_img)
         result = self.vm.qmp('drive-backup', device=self.drives[0]['id'],
-                             sync='dirty-bitmap', bitmap='unknown',
+                             sync='incremental', bitmap='unknown',
                              format=self.drives[0]['fmt'], target=self.err_img)
         self.assert_qmp(result, 'error/class', 'GenericError')
 
diff --git a/tests/qemu-iotests/132 b/tests/qemu-iotests/132
new file mode 100644
index 0000000..f53ef6e
--- /dev/null
+++ b/tests/qemu-iotests/132
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+#
+# Test mirror with unmap
+#
+# Copyright (C) 2015 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import time
+import os
+import iotests
+from iotests import qemu_img, qemu_io
+
+test_img = os.path.join(iotests.test_dir, 'test.img')
+target_img = os.path.join(iotests.test_dir, 'target.img')
+
+class TestSingleDrive(iotests.QMPTestCase):
+    image_len = 2 * 1024 * 1024 # MB
+
+    def setUp(self):
+        # Write data to the image so we can compare later
+        qemu_img('create', '-f', iotests.imgfmt, test_img, str(TestSingleDrive.image_len))
+        qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x5d 0 2M', test_img)
+
+        self.vm = iotests.VM().add_drive(test_img, 'discard=unmap')
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        try:
+            os.remove(target_img)
+        except OSError:
+            pass
+
+    def test_mirror_discard(self):
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             target=target_img)
+        self.assert_qmp(result, 'return', {})
+        self.vm.hmp_qemu_io('drive0', 'discard 0 64k')
+        self.complete_and_wait('drive0')
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['raw', 'qcow2'])
diff --git a/tests/qemu-iotests/132.out b/tests/qemu-iotests/132.out
new file mode 100644
index 0000000..ae1213e
--- /dev/null
+++ b/tests/qemu-iotests/132.out
@@ -0,0 +1,5 @@
+.
+----------------------------------------------------------------------
+Ran 1 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 4597fc1..6206765 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -131,4 +131,5 @@
 129 rw auto quick
 130 rw auto quick
 131 rw auto quick
+132 rw auto quick
 134 rw auto quick
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 04a294d..8615b10 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -326,6 +326,29 @@
         self.assert_no_active_block_jobs()
         return event
 
+    def wait_ready(self, drive='drive0'):
+        '''Wait until a block job BLOCK_JOB_READY event'''
+        f = {'data': {'type': 'mirror', 'device': drive } }
+        event = self.vm.event_wait(name='BLOCK_JOB_READY', match=f)
+
+    def wait_ready_and_cancel(self, drive='drive0'):
+        self.wait_ready(drive=drive)
+        event = self.cancel_and_wait(drive=drive)
+        self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED')
+        self.assert_qmp(event, 'data/type', 'mirror')
+        self.assert_qmp(event, 'data/offset', event['data']['len'])
+
+    def complete_and_wait(self, drive='drive0', wait_ready=True):
+        '''Complete a block job and wait for it to finish'''
+        if wait_ready:
+            self.wait_ready(drive=drive)
+
+        result = self.vm.qmp('block-job-complete', device=drive)
+        self.assert_qmp(result, 'return', {})
+
+        event = self.wait_until_completed(drive=drive)
+        self.assert_qmp(event, 'data/type', 'mirror')
+
 def notrun(reason):
     '''Skip this test suite'''
     # Each test in qemu-iotests has a number ("seq")
diff --git a/tests/rtl8139-test.c b/tests/rtl8139-test.c
index 4e0bf02..3bff0e3 100644
--- a/tests/rtl8139-test.c
+++ b/tests/rtl8139-test.c
@@ -12,6 +12,7 @@
 #include "libqtest.h"
 #include "libqos/pci-pc.h"
 #include "qemu/osdep.h"
+#include "qemu/timer.h"
 #include "qemu-common.h"
 
 /* Tests only initialization so far. TODO: Replace with functional tests */
@@ -20,7 +21,6 @@
 }
 
 #define CLK 33000000
-#define NS_PER_SEC 1000000000ULL
 
 static QPCIBus *pcibus;
 static QPCIDevice *dev;
@@ -86,7 +86,7 @@
         fatal("time too big %u\n", curr);
     }
     for (cnt = 0; ; ) {
-        clock_step(1 * NS_PER_SEC);
+        clock_step(1 * NSEC_PER_SEC);
         prev = curr;
         curr = in_Timer();
 
@@ -125,7 +125,7 @@
     out_IntrStatus(0x4000);
     curr = in_Timer();
     out_TimerInt(curr + 0.5 * CLK);
-    clock_step(1 * NS_PER_SEC);
+    clock_step(1 * NSEC_PER_SEC);
     out_Timer(0);
     if ((in_IntrStatus() & 0x4000) == 0) {
         fatal("we should have an interrupt here!\n");
@@ -137,7 +137,7 @@
     out_IntrStatus(0x4000);
     curr = in_Timer();
     out_TimerInt(curr + 0.5 * CLK);
-    clock_step(1 * NS_PER_SEC);
+    clock_step(1 * NSEC_PER_SEC);
     out_TimerInt(0);
     if ((in_IntrStatus() & 0x4000) == 0) {
         fatal("we should have an interrupt here!\n");
@@ -148,7 +148,7 @@
     next = curr + 5.0 * CLK;
     out_TimerInt(next);
     for (cnt = 0; ; ) {
-        clock_step(1 * NS_PER_SEC);
+        clock_step(1 * NSEC_PER_SEC);
         prev = curr;
         curr = in_Timer();
         diff = (curr-prev) & 0xffffffffu;
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 0168445..33b6b95 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -40,19 +40,19 @@
     bkt.level = 1.5;
 
     /* leak an op work of time */
-    throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150);
+    throttle_leak_bucket(&bkt, NSEC_PER_SEC / 150);
     g_assert(bkt.avg == 150);
     g_assert(bkt.max == 15);
     g_assert(double_cmp(bkt.level, 0.5));
 
     /* leak again emptying the bucket */
-    throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150);
+    throttle_leak_bucket(&bkt, NSEC_PER_SEC / 150);
     g_assert(bkt.avg == 150);
     g_assert(bkt.max == 15);
     g_assert(double_cmp(bkt.level, 0));
 
     /* check that the bucket level won't go lower */
-    throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150);
+    throttle_leak_bucket(&bkt, NSEC_PER_SEC / 150);
     g_assert(bkt.avg == 150);
     g_assert(bkt.max == 15);
     g_assert(double_cmp(bkt.level, 0));
@@ -90,7 +90,7 @@
     bkt.level = 15.5;
     wait = throttle_compute_wait(&bkt);
     /* time required to do half an operation */
-    result = (int64_t)  NANOSECONDS_PER_SECOND / 150 / 2;
+    result = (int64_t)  NSEC_PER_SEC / 150 / 2;
     g_assert(wait == result);
 }
 
diff --git a/tests/wdt_ib700-test.c b/tests/wdt_ib700-test.c
index 513a533..10a5472 100644
--- a/tests/wdt_ib700-test.c
+++ b/tests/wdt_ib700-test.c
@@ -11,8 +11,7 @@
 #include <string.h>
 #include "libqtest.h"
 #include "qemu/osdep.h"
-
-#define NS_PER_SEC 1000000000ULL
+#include "qemu/timer.h"
 
 static void qmp_check_no_event(void)
 {
@@ -41,29 +40,29 @@
 
 static QDict *ib700_program_and_wait(QTestState *s)
 {
-    clock_step(NS_PER_SEC * 40);
+    clock_step(NSEC_PER_SEC * 40);
     qmp_check_no_event();
 
     /* 2 second limit */
     outb(0x443, 14);
 
     /* Ping */
-    clock_step(NS_PER_SEC);
+    clock_step(NSEC_PER_SEC);
     qmp_check_no_event();
     outb(0x443, 14);
 
     /* Disable */
-    clock_step(NS_PER_SEC);
+    clock_step(NSEC_PER_SEC);
     qmp_check_no_event();
     outb(0x441, 1);
-    clock_step(3 * NS_PER_SEC);
+    clock_step(3 * NSEC_PER_SEC);
     qmp_check_no_event();
 
     /* Enable and let it fire */
     outb(0x443, 13);
-    clock_step(3 * NS_PER_SEC);
+    clock_step(3 * NSEC_PER_SEC);
     qmp_check_no_event();
-    clock_step(2 * NS_PER_SEC);
+    clock_step(2 * NSEC_PER_SEC);
     return qmp_get_event("WATCHDOG");
 }
 
diff --git a/util/throttle.c b/util/throttle.c
index 706c131..ec70476 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -36,7 +36,7 @@
     double leak;
 
     /* compute how much to leak */
-    leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND;
+    leak = (bkt->avg * (double) delta_ns) / NSEC_PER_SEC;
 
     /* make the bucket leak */
     bkt->level = MAX(bkt->level - leak, 0);
@@ -72,7 +72,7 @@
  */
 static int64_t throttle_do_compute_wait(double limit, double extra)
 {
-    double wait = extra * NANOSECONDS_PER_SECOND;
+    double wait = extra * NSEC_PER_SEC;
     wait /= limit;
     return wait;
 }