Merge remote-tracking branch 'remotes/kraxel/tags/pull-vga-20141216-1' into staging

cirrus hwcursor fixes.
set secondary-vga category.

# gpg: Signature made Tue 16 Dec 2014 14:44:09 GMT using RSA key ID D3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"

* remotes/kraxel/tags/pull-vga-20141216-1:
  vga: set catagory bit for secondary vga device
  move hw cursor pos from cirrus to vga
  cirrus: Force use of shadow pixmap when HW cursor is enabled
  vga: Add mechanism to force the use of a shadow surface

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index bcb69e8..d72d6e3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -696,6 +696,14 @@
 S: Supported
 F: hw/char/virtio-serial-bus.c
 F: hw/char/virtio-console.c
+F: include/hw/virtio/virtio-serial.h
+
+virtio-rng
+M: Amit Shah <amit.shah@redhat.com>
+S: Supported
+F: hw/virtio/virtio-rng.c
+F: include/hw/virtio/virtio-rng.h
+F: backends/rng*.c
 
 nvme
 M: Keith Busch <keith.busch@intel.com>
@@ -928,12 +936,14 @@
 
 Migration
 M: Juan Quintela <quintela@redhat.com>
+M: Amit Shah <amit.shah@redhat.com>
 S: Maintained
 F: include/migration/
-F: migration*
+F: migration/
 F: savevm.c
 F: arch_init.c
-F: vmstate.c
+F: scripts/vmstate-static-checker.py
+F: tests/vmstate-static-checker-data/
 
 Seccomp
 M: Eduardo Otubo <eduardo.otubo@profitbricks.com>
diff --git a/Makefile.objs b/Makefile.objs
index 18fd35c..abeb902 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -48,15 +48,9 @@
 
 common-obj-$(CONFIG_LINUX) += fsdev/
 
-common-obj-y += migration.o migration-tcp.o
-common-obj-y += vmstate.o
-common-obj-y += qemu-file.o qemu-file-unix.o qemu-file-stdio.o
-common-obj-$(CONFIG_RDMA) += migration-rdma.o
+common-obj-y += migration/
 common-obj-y += qemu-char.o #aio.o
-common-obj-y += block-migration.o
-common-obj-y += page_cache.o xbzrle.o
-
-common-obj-$(CONFIG_POSIX) += migration-exec.o migration-unix.o migration-fd.o
+common-obj-y += page_cache.o
 
 common-obj-$(CONFIG_SPICE) += spice-qemu-char.o
 
diff --git a/cputlb.c b/cputlb.c
index a55518a..3b271d4 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -270,7 +270,8 @@
     assert(sz >= TARGET_PAGE_SIZE);
 
 #if defined(DEBUG_TLB)
-    printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
+    qemu_log_mask(CPU_LOG_MMU,
+           "tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
            " prot=%x idx=%d\n",
            vaddr, paddr, prot, mmu_idx);
 #endif
diff --git a/disas/mips.c b/disas/mips.c
index 2614c52..1afe0c5 100644
--- a/disas/mips.c
+++ b/disas/mips.c
@@ -3511,6 +3511,7 @@
   const char * const name;
 };
 
+#if 0
 /* The mips16 registers.  */
 static const unsigned int mips16_to_32_reg_map[] =
 {
@@ -3518,7 +3519,7 @@
 };
 
 #define mips16_reg_names(rn)	mips_gpr_names[mips16_to_32_reg_map[rn]]
-
+#endif
 
 static const char * const mips_gpr_names_numeric[32] =
 {
@@ -3801,13 +3802,6 @@
   "$24",  "$25",  "$26",  "$27",  "$28",  "$29",  "$30",  "$31"
 };
 
-static const char * const mips_msa_control_names_numeric[32] = {
-  "$0",   "$1",   "$2",   "$3",   "$4",   "$5",   "$6",   "$7",
-  "$8",   "$9",   "$10",  "$11",  "$12",  "$13",  "$14",  "$15",
-  "$16",  "$17",  "$18",  "$19",  "$20",  "$21",  "$22",  "$23",
-  "$24",  "$25",  "$26",  "$27",  "$28",  "$29",  "$30",  "$31"
-};
-
 static const char * const mips_msa_control_names_mips3264r2[32] = {
   "MSAIR", "MSACSR", "$2", "$3",  "$4",   "$5",   "$6",   "$7",
   "$8",   "$9",   "$10",  "$11",  "$12",  "$13",  "$14",  "$15",
diff --git a/exec.c b/exec.c
index 71ac104..963481a 100644
--- a/exec.c
+++ b/exec.c
@@ -840,7 +840,7 @@
 
     block = qemu_get_ram_block(start);
     assert(block == qemu_get_ram_block(end - 1));
-    start1 = (uintptr_t)block->host + (start - block->offset);
+    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
     cpu_tlb_reset_dirty_all(start1, length);
 }
 
@@ -1500,7 +1500,7 @@
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         offset = addr - block->offset;
         if (offset < block->length) {
-            vaddr = block->host + offset;
+            vaddr = ramblock_ptr(block, offset);
             if (block->flags & RAM_PREALLOC) {
                 ;
             } else if (xen_enabled()) {
@@ -1551,7 +1551,7 @@
 {
     RAMBlock *block = qemu_get_ram_block(addr);
 
-    return block->host;
+    return ramblock_ptr(block, 0);
 }
 
 /* Return a host pointer to ram allocated with qemu_ram_alloc.
@@ -1578,7 +1578,7 @@
                 xen_map_cache(block->offset, block->length, 1);
         }
     }
-    return block->host + (addr - block->offset);
+    return ramblock_ptr(block, addr - block->offset);
 }
 
 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
@@ -1597,7 +1597,7 @@
             if (addr - block->offset < block->length) {
                 if (addr - block->offset + *size > block->length)
                     *size = block->length - addr + block->offset;
-                return block->host + (addr - block->offset);
+                return ramblock_ptr(block, addr - block->offset);
             }
         }
 
diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c
index e812ddd..a542087 100644
--- a/hw/display/qxl-render.c
+++ b/hw/display/qxl-render.c
@@ -283,12 +283,14 @@
         qxl->ssd.mouse_x = cmd->u.set.position.x;
         qxl->ssd.mouse_y = cmd->u.set.position.y;
         qemu_mutex_unlock(&qxl->ssd.lock);
+        qemu_bh_schedule(qxl->ssd.cursor_bh);
         break;
     case QXL_CURSOR_MOVE:
         qemu_mutex_lock(&qxl->ssd.lock);
         qxl->ssd.mouse_x = cmd->u.position.x;
         qxl->ssd.mouse_y = cmd->u.position.y;
         qemu_mutex_unlock(&qxl->ssd.lock);
+        qemu_bh_schedule(qxl->ssd.cursor_bh);
         break;
     }
     return 0;
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index b540dd6..61df477 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -1092,6 +1092,7 @@
     spice_qxl_driver_unload(&d->ssd.qxl);
 #endif
     graphic_console_set_hwops(d->ssd.dcl.con, d->vga.hw_ops, &d->vga);
+    update_displaychangelistener(&d->ssd.dcl, GUI_REFRESH_INTERVAL_DEFAULT);
     qemu_spice_create_host_primary(&d->ssd);
     d->mode = QXL_MODE_VGA;
     vga_dirty_log_start(&d->vga);
@@ -1105,6 +1106,7 @@
     }
     trace_qxl_exit_vga_mode(d->id);
     graphic_console_set_hwops(d->ssd.dcl.con, &qxl_ops, d);
+    update_displaychangelistener(&d->ssd.dcl, GUI_REFRESH_INTERVAL_IDLE);
     vga_dirty_log_stop(&d->vga);
     qxl_destroy_primary(d, QXL_SYNC);
 }
@@ -1153,6 +1155,7 @@
         qxl_enter_vga_mode(d);
     } else {
         d->mode = QXL_MODE_UNDEFINED;
+        update_displaychangelistener(&d->ssd.dcl, GUI_REFRESH_INTERVAL_IDLE);
     }
 }
 
@@ -1861,10 +1864,6 @@
 
     if (qxl->mode == QXL_MODE_VGA) {
         qemu_spice_display_refresh(&qxl->ssd);
-    } else {
-        qemu_mutex_lock(&qxl->ssd.lock);
-        qemu_spice_cursor_refresh_unlocked(&qxl->ssd);
-        qemu_mutex_unlock(&qxl->ssd.lock);
     }
 }
 
@@ -2025,6 +2024,7 @@
     qxl_reset_state(qxl);
 
     qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl);
+    qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd);
 
     return 0;
 }
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index c085804..f0ce187 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -313,6 +313,13 @@
     int fd;
 } RAMBlock;
 
+static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
+{
+    assert(offset < block->length);
+    assert(block->host);
+    return (char *)block->host + offset;
+}
+
 typedef struct RAMList {
     QemuMutex mutex;
     /* Protected by the iothread lock.  */
@@ -335,6 +342,7 @@
 #define TLB_MMIO        (1 << 5)
 
 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
+void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf);
 ram_addr_t last_ram_offset(void);
 void qemu_mutex_lock_ramlist(void);
 void qemu_mutex_unlock_ramlist(void);
diff --git a/include/qemu/log.h b/include/qemu/log.h
index d515424..195f665 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -40,6 +40,7 @@
 #define CPU_LOG_RESET      (1 << 9)
 #define LOG_UNIMP          (1 << 10)
 #define LOG_GUEST_ERROR    (1 << 11)
+#define CPU_LOG_MMU        (1 << 12)
 
 /* Returns true if a bit is set in the current loglevel mask
  */
diff --git a/include/ui/spice-display.h b/include/ui/spice-display.h
index 4252ab8..53883a1 100644
--- a/include/ui/spice-display.h
+++ b/include/ui/spice-display.h
@@ -102,6 +102,7 @@
     /* cursor (with qxl): qxl local renderer -> displaychangelistener */
     QEMUCursor *cursor;
     int mouse_x, mouse_y;
+    QEMUBH *cursor_bh;
 };
 
 struct SimpleSpiceUpdate {
@@ -134,7 +135,7 @@
 void qemu_spice_display_switch(SimpleSpiceDisplay *ssd,
                                DisplaySurface *surface);
 void qemu_spice_display_refresh(SimpleSpiceDisplay *ssd);
-void qemu_spice_cursor_refresh_unlocked(SimpleSpiceDisplay *ssd);
+void qemu_spice_cursor_refresh_bh(void *opaque);
 
 void qemu_spice_add_memslot(SimpleSpiceDisplay *ssd, QXLDevMemSlot *memslot,
                             qxl_async_io async);
diff --git a/linux-user/main.c b/linux-user/main.c
index 186ee4d..67b0231 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -3905,7 +3905,7 @@
 #endif
 #elif defined(TARGET_MIPS)
 #if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64)
-        cpu_model = "20Kc";
+        cpu_model = "5KEf";
 #else
         cpu_model = "24Kf";
 #endif
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
new file mode 100644
index 0000000..d929e96
--- /dev/null
+++ b/migration/Makefile.objs
@@ -0,0 +1,10 @@
+common-obj-y += migration.o tcp.o
+common-obj-y += vmstate.o
+common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o
+common-obj-y += xbzrle.o
+
+common-obj-$(CONFIG_RDMA) += rdma.o
+common-obj-$(CONFIG_POSIX) += exec.o unix.o fd.o
+
+common-obj-y += block.o
+
diff --git a/block-migration.c b/migration/block.c
similarity index 100%
rename from block-migration.c
rename to migration/block.c
diff --git a/migration-exec.c b/migration/exec.c
similarity index 100%
rename from migration-exec.c
rename to migration/exec.c
diff --git a/migration-fd.c b/migration/fd.c
similarity index 100%
rename from migration-fd.c
rename to migration/fd.c
diff --git a/migration.c b/migration/migration.c
similarity index 100%
rename from migration.c
rename to migration/migration.c
diff --git a/migration/qemu-file-buf.c b/migration/qemu-file-buf.c
new file mode 100644
index 0000000..d33dd44
--- /dev/null
+++ b/migration/qemu-file-buf.c
@@ -0,0 +1,486 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu/iov.h"
+#include "qemu/sockets.h"
+#include "block/coroutine.h"
+#include "migration/migration.h"
+#include "migration/qemu-file.h"
+#include "migration/qemu-file-internal.h"
+#include "trace.h"
+
+#define QSB_CHUNK_SIZE      (1 << 10)
+#define QSB_MAX_CHUNK_SIZE  (16 * QSB_CHUNK_SIZE)
+
+/**
+ * Create a QEMUSizedBuffer
+ * This type of buffer uses scatter-gather lists internally and
+ * can grow to any size. Any data array in the scatter-gather list
+ * can hold different amount of bytes.
+ *
+ * @buffer: Optional buffer to copy into the QSB
+ * @len: size of initial buffer; if @buffer is given, buffer must
+ *       hold at least len bytes
+ *
+ * Returns a pointer to a QEMUSizedBuffer or NULL on allocation failure
+ */
+QEMUSizedBuffer *qsb_create(const uint8_t *buffer, size_t len)
+{
+    QEMUSizedBuffer *qsb;
+    size_t alloc_len, num_chunks, i, to_copy;
+    size_t chunk_size = (len > QSB_MAX_CHUNK_SIZE)
+                        ? QSB_MAX_CHUNK_SIZE
+                        : QSB_CHUNK_SIZE;
+
+    num_chunks = DIV_ROUND_UP(len ? len : QSB_CHUNK_SIZE, chunk_size);
+    alloc_len = num_chunks * chunk_size;
+
+    qsb = g_try_new0(QEMUSizedBuffer, 1);
+    if (!qsb) {
+        return NULL;
+    }
+
+    qsb->iov = g_try_new0(struct iovec, num_chunks);
+    if (!qsb->iov) {
+        g_free(qsb);
+        return NULL;
+    }
+
+    qsb->n_iov = num_chunks;
+
+    for (i = 0; i < num_chunks; i++) {
+        qsb->iov[i].iov_base = g_try_malloc0(chunk_size);
+        if (!qsb->iov[i].iov_base) {
+            /* qsb_free is safe since g_free can cope with NULL */
+            qsb_free(qsb);
+            return NULL;
+        }
+
+        qsb->iov[i].iov_len = chunk_size;
+        if (buffer) {
+            to_copy = (len - qsb->used) > chunk_size
+                      ? chunk_size : (len - qsb->used);
+            memcpy(qsb->iov[i].iov_base, &buffer[qsb->used], to_copy);
+            qsb->used += to_copy;
+        }
+    }
+
+    qsb->size = alloc_len;
+
+    return qsb;
+}
+
+/**
+ * Free the QEMUSizedBuffer
+ *
+ * @qsb: The QEMUSizedBuffer to free
+ */
+void qsb_free(QEMUSizedBuffer *qsb)
+{
+    size_t i;
+
+    if (!qsb) {
+        return;
+    }
+
+    for (i = 0; i < qsb->n_iov; i++) {
+        g_free(qsb->iov[i].iov_base);
+    }
+    g_free(qsb->iov);
+    g_free(qsb);
+}
+
+/**
+ * Get the number of used bytes in the QEMUSizedBuffer
+ *
+ * @qsb: A QEMUSizedBuffer
+ *
+ * Returns the number of bytes currently used in this buffer
+ */
+size_t qsb_get_length(const QEMUSizedBuffer *qsb)
+{
+    return qsb->used;
+}
+
+/**
+ * Set the length of the buffer; the primary usage of this
+ * function is to truncate the number of used bytes in the buffer.
+ * The size will not be extended beyond the current number of
+ * allocated bytes in the QEMUSizedBuffer.
+ *
+ * @qsb: A QEMUSizedBuffer
+ * @new_len: The new length of bytes in the buffer
+ *
+ * Returns the number of bytes the buffer was truncated or extended
+ * to.
+ */
+size_t qsb_set_length(QEMUSizedBuffer *qsb, size_t new_len)
+{
+    if (new_len <= qsb->size) {
+        qsb->used = new_len;
+    } else {
+        qsb->used = qsb->size;
+    }
+    return qsb->used;
+}
+
+/**
+ * Get the iovec that holds the data for a given position @pos.
+ *
+ * @qsb: A QEMUSizedBuffer
+ * @pos: The index of a byte in the buffer
+ * @d_off: Pointer to an offset that this function will indicate
+ *         at what position within the returned iovec the byte
+ *         is to be found
+ *
+ * Returns the index of the iovec that holds the byte at the given
+ * index @pos in the byte stream; a negative number if the iovec
+ * for the given position @pos does not exist.
+ */
+static ssize_t qsb_get_iovec(const QEMUSizedBuffer *qsb,
+                             off_t pos, off_t *d_off)
+{
+    ssize_t i;
+    off_t curr = 0;
+
+    if (pos > qsb->used) {
+        return -1;
+    }
+
+    for (i = 0; i < qsb->n_iov; i++) {
+        if (curr + qsb->iov[i].iov_len > pos) {
+            *d_off = pos - curr;
+            return i;
+        }
+        curr += qsb->iov[i].iov_len;
+    }
+    return -1;
+}
+
+/*
+ * Convert the QEMUSizedBuffer into a flat buffer.
+ *
+ * Note: If at all possible, try to avoid this function since it
+ *       may unnecessarily copy memory around.
+ *
+ * @qsb: pointer to QEMUSizedBuffer
+ * @start: offset to start at
+ * @count: number of bytes to copy
+ * @buf: a pointer to a buffer to write into (at least @count bytes)
+ *
+ * Returns the number of bytes copied into the output buffer
+ */
+ssize_t qsb_get_buffer(const QEMUSizedBuffer *qsb, off_t start,
+                       size_t count, uint8_t *buffer)
+{
+    const struct iovec *iov;
+    size_t to_copy, all_copy;
+    ssize_t index;
+    off_t s_off;
+    off_t d_off = 0;
+    char *s;
+
+    if (start > qsb->used) {
+        return 0;
+    }
+
+    all_copy = qsb->used - start;
+    if (all_copy > count) {
+        all_copy = count;
+    } else {
+        count = all_copy;
+    }
+
+    index = qsb_get_iovec(qsb, start, &s_off);
+    if (index < 0) {
+        return 0;
+    }
+
+    while (all_copy > 0) {
+        iov = &qsb->iov[index];
+
+        s = iov->iov_base;
+
+        to_copy = iov->iov_len - s_off;
+        if (to_copy > all_copy) {
+            to_copy = all_copy;
+        }
+        memcpy(&buffer[d_off], &s[s_off], to_copy);
+
+        d_off += to_copy;
+        all_copy -= to_copy;
+
+        s_off = 0;
+        index++;
+    }
+
+    return count;
+}
+
+/**
+ * Grow the QEMUSizedBuffer to the given size and allocate
+ * memory for it.
+ *
+ * @qsb: A QEMUSizedBuffer
+ * @new_size: The new size of the buffer
+ *
+ * Return:
+ *    a negative error code in case of memory allocation failure
+ * or
+ *    the new size of the buffer. The returned size may be greater or equal
+ *    to @new_size.
+ */
+static ssize_t qsb_grow(QEMUSizedBuffer *qsb, size_t new_size)
+{
+    size_t needed_chunks, i;
+
+    if (qsb->size < new_size) {
+        struct iovec *new_iov;
+        size_t size_diff = new_size - qsb->size;
+        size_t chunk_size = (size_diff > QSB_MAX_CHUNK_SIZE)
+                             ? QSB_MAX_CHUNK_SIZE : QSB_CHUNK_SIZE;
+
+        needed_chunks = DIV_ROUND_UP(size_diff, chunk_size);
+
+        new_iov = g_try_new(struct iovec, qsb->n_iov + needed_chunks);
+        if (new_iov == NULL) {
+            return -ENOMEM;
+        }
+
+        /* Allocate new chunks as needed into new_iov */
+        for (i = qsb->n_iov; i < qsb->n_iov + needed_chunks; i++) {
+            new_iov[i].iov_base = g_try_malloc0(chunk_size);
+            new_iov[i].iov_len = chunk_size;
+            if (!new_iov[i].iov_base) {
+                size_t j;
+
+                /* Free previously allocated new chunks */
+                for (j = qsb->n_iov; j < i; j++) {
+                    g_free(new_iov[j].iov_base);
+                }
+                g_free(new_iov);
+
+                return -ENOMEM;
+            }
+        }
+
+        /*
+         * Now we can't get any allocation errors, copy over to new iov
+         * and switch.
+         */
+        for (i = 0; i < qsb->n_iov; i++) {
+            new_iov[i] = qsb->iov[i];
+        }
+
+        qsb->n_iov += needed_chunks;
+        g_free(qsb->iov);
+        qsb->iov = new_iov;
+        qsb->size += (needed_chunks * chunk_size);
+    }
+
+    return qsb->size;
+}
+
+/**
+ * Write into the QEMUSizedBuffer at a given position and a given
+ * number of bytes. This function will automatically grow the
+ * QEMUSizedBuffer.
+ *
+ * @qsb: A QEMUSizedBuffer
+ * @source: A byte array to copy data from
+ * @pos: The position within the @qsb to write data to
+ * @size: The number of bytes to copy into the @qsb
+ *
+ * Returns @size or a negative error code in case of memory allocation failure,
+ *           or with an invalid 'pos'
+ */
+ssize_t qsb_write_at(QEMUSizedBuffer *qsb, const uint8_t *source,
+                     off_t pos, size_t count)
+{
+    ssize_t rc = qsb_grow(qsb, pos + count);
+    size_t to_copy;
+    size_t all_copy = count;
+    const struct iovec *iov;
+    ssize_t index;
+    char *dest;
+    off_t d_off, s_off = 0;
+
+    if (rc < 0) {
+        return rc;
+    }
+
+    if (pos + count > qsb->used) {
+        qsb->used = pos + count;
+    }
+
+    index = qsb_get_iovec(qsb, pos, &d_off);
+    if (index < 0) {
+        return -EINVAL;
+    }
+
+    while (all_copy > 0) {
+        iov = &qsb->iov[index];
+
+        dest = iov->iov_base;
+
+        to_copy = iov->iov_len - d_off;
+        if (to_copy > all_copy) {
+            to_copy = all_copy;
+        }
+
+        memcpy(&dest[d_off], &source[s_off], to_copy);
+
+        s_off += to_copy;
+        all_copy -= to_copy;
+
+        d_off = 0;
+        index++;
+    }
+
+    return count;
+}
+
+/**
+ * Create a deep copy of the given QEMUSizedBuffer.
+ *
+ * @qsb: A QEMUSizedBuffer
+ *
+ * Returns a clone of @qsb or NULL on allocation failure
+ */
+QEMUSizedBuffer *qsb_clone(const QEMUSizedBuffer *qsb)
+{
+    QEMUSizedBuffer *out = qsb_create(NULL, qsb_get_length(qsb));
+    size_t i;
+    ssize_t res;
+    off_t pos = 0;
+
+    if (!out) {
+        return NULL;
+    }
+
+    for (i = 0; i < qsb->n_iov; i++) {
+        res =  qsb_write_at(out, qsb->iov[i].iov_base,
+                            pos, qsb->iov[i].iov_len);
+        if (res < 0) {
+            qsb_free(out);
+            return NULL;
+        }
+        pos += res;
+    }
+
+    return out;
+}
+
+typedef struct QEMUBuffer {
+    QEMUSizedBuffer *qsb;
+    QEMUFile *file;
+} QEMUBuffer;
+
+static int buf_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
+{
+    QEMUBuffer *s = opaque;
+    ssize_t len = qsb_get_length(s->qsb) - pos;
+
+    if (len <= 0) {
+        return 0;
+    }
+
+    if (len > size) {
+        len = size;
+    }
+    return qsb_get_buffer(s->qsb, pos, len, buf);
+}
+
+static int buf_put_buffer(void *opaque, const uint8_t *buf,
+                          int64_t pos, int size)
+{
+    QEMUBuffer *s = opaque;
+
+    return qsb_write_at(s->qsb, buf, pos, size);
+}
+
+static int buf_close(void *opaque)
+{
+    QEMUBuffer *s = opaque;
+
+    qsb_free(s->qsb);
+
+    g_free(s);
+
+    return 0;
+}
+
+const QEMUSizedBuffer *qemu_buf_get(QEMUFile *f)
+{
+    QEMUBuffer *p;
+
+    qemu_fflush(f);
+
+    p = f->opaque;
+
+    return p->qsb;
+}
+
+static const QEMUFileOps buf_read_ops = {
+    .get_buffer = buf_get_buffer,
+    .close =      buf_close,
+};
+
+static const QEMUFileOps buf_write_ops = {
+    .put_buffer = buf_put_buffer,
+    .close =      buf_close,
+};
+
+QEMUFile *qemu_bufopen(const char *mode, QEMUSizedBuffer *input)
+{
+    QEMUBuffer *s;
+
+    if (mode == NULL || (mode[0] != 'r' && mode[0] != 'w') ||
+        mode[1] != '\0') {
+        error_report("qemu_bufopen: Argument validity check failed");
+        return NULL;
+    }
+
+    s = g_malloc0(sizeof(QEMUBuffer));
+    if (mode[0] == 'r') {
+        s->qsb = input;
+    }
+
+    if (s->qsb == NULL) {
+        s->qsb = qsb_create(NULL, 0);
+    }
+    if (!s->qsb) {
+        g_free(s);
+        error_report("qemu_bufopen: qsb_create failed");
+        return NULL;
+    }
+
+
+    if (mode[0] == 'r') {
+        s->file = qemu_fopen_ops(s, &buf_read_ops);
+    } else {
+        s->file = qemu_fopen_ops(s, &buf_write_ops);
+    }
+    return s->file;
+}
diff --git a/migration/qemu-file-internal.h b/migration/qemu-file-internal.h
new file mode 100644
index 0000000..d95e853
--- /dev/null
+++ b/migration/qemu-file-internal.h
@@ -0,0 +1,53 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_FILE_INTERNAL_H
+#define QEMU_FILE_INTERNAL_H 1
+
+#include "qemu-common.h"
+#include "qemu/iov.h"
+
+#define IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE MIN(IOV_MAX, 64)
+
+struct QEMUFile {
+    const QEMUFileOps *ops;
+    void *opaque;
+
+    int64_t bytes_xfer;
+    int64_t xfer_limit;
+
+    int64_t pos; /* start of buffer when writing, end of buffer
+                    when reading */
+    int buf_index;
+    int buf_size; /* 0 when writing */
+    uint8_t buf[IO_BUF_SIZE];
+
+    struct iovec iov[MAX_IOV_SIZE];
+    unsigned int iovcnt;
+
+    int last_error;
+};
+
+#endif
diff --git a/qemu-file-stdio.c b/migration/qemu-file-stdio.c
similarity index 100%
rename from qemu-file-stdio.c
rename to migration/qemu-file-stdio.c
diff --git a/qemu-file-unix.c b/migration/qemu-file-unix.c
similarity index 100%
rename from qemu-file-unix.c
rename to migration/qemu-file-unix.c
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
new file mode 100644
index 0000000..d2d4007
--- /dev/null
+++ b/migration/qemu-file.c
@@ -0,0 +1,519 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu/iov.h"
+#include "qemu/sockets.h"
+#include "block/coroutine.h"
+#include "migration/migration.h"
+#include "migration/qemu-file.h"
+#include "migration/qemu-file-internal.h"
+#include "trace.h"
+
+bool qemu_file_mode_is_not_valid(const char *mode)
+{
+    if (mode == NULL ||
+        (mode[0] != 'r' && mode[0] != 'w') ||
+        mode[1] != 'b' || mode[2] != 0) {
+        fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
+        return true;
+    }
+
+    return false;
+}
+
+QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
+{
+    QEMUFile *f;
+
+    f = g_malloc0(sizeof(QEMUFile));
+
+    f->opaque = opaque;
+    f->ops = ops;
+    return f;
+}
+
+/*
+ * Get last error for stream f
+ *
+ * Return negative error value if there has been an error on previous
+ * operations, return 0 if no error happened.
+ *
+ */
+int qemu_file_get_error(QEMUFile *f)
+{
+    return f->last_error;
+}
+
+void qemu_file_set_error(QEMUFile *f, int ret)
+{
+    if (f->last_error == 0) {
+        f->last_error = ret;
+    }
+}
+
+bool qemu_file_is_writable(QEMUFile *f)
+{
+    return f->ops->writev_buffer || f->ops->put_buffer;
+}
+
+/**
+ * Flushes QEMUFile buffer
+ *
+ * If there is writev_buffer QEMUFileOps it uses it otherwise uses
+ * put_buffer ops.
+ */
+void qemu_fflush(QEMUFile *f)
+{
+    ssize_t ret = 0;
+
+    if (!qemu_file_is_writable(f)) {
+        return;
+    }
+
+    if (f->ops->writev_buffer) {
+        if (f->iovcnt > 0) {
+            ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
+        }
+    } else {
+        if (f->buf_index > 0) {
+            ret = f->ops->put_buffer(f->opaque, f->buf, f->pos, f->buf_index);
+        }
+    }
+    if (ret >= 0) {
+        f->pos += ret;
+    }
+    f->buf_index = 0;
+    f->iovcnt = 0;
+    if (ret < 0) {
+        qemu_file_set_error(f, ret);
+    }
+}
+
+void ram_control_before_iterate(QEMUFile *f, uint64_t flags)
+{
+    int ret = 0;
+
+    if (f->ops->before_ram_iterate) {
+        ret = f->ops->before_ram_iterate(f, f->opaque, flags);
+        if (ret < 0) {
+            qemu_file_set_error(f, ret);
+        }
+    }
+}
+
+void ram_control_after_iterate(QEMUFile *f, uint64_t flags)
+{
+    int ret = 0;
+
+    if (f->ops->after_ram_iterate) {
+        ret = f->ops->after_ram_iterate(f, f->opaque, flags);
+        if (ret < 0) {
+            qemu_file_set_error(f, ret);
+        }
+    }
+}
+
+void ram_control_load_hook(QEMUFile *f, uint64_t flags)
+{
+    int ret = -EINVAL;
+
+    if (f->ops->hook_ram_load) {
+        ret = f->ops->hook_ram_load(f, f->opaque, flags);
+        if (ret < 0) {
+            qemu_file_set_error(f, ret);
+        }
+    } else {
+        qemu_file_set_error(f, ret);
+    }
+}
+
+size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+                         ram_addr_t offset, size_t size, int *bytes_sent)
+{
+    if (f->ops->save_page) {
+        int ret = f->ops->save_page(f, f->opaque, block_offset,
+                                    offset, size, bytes_sent);
+
+        if (ret != RAM_SAVE_CONTROL_DELAYED) {
+            if (bytes_sent && *bytes_sent > 0) {
+                qemu_update_position(f, *bytes_sent);
+            } else if (ret < 0) {
+                qemu_file_set_error(f, ret);
+            }
+        }
+
+        return ret;
+    }
+
+    return RAM_SAVE_CONTROL_NOT_SUPP;
+}
+
+/*
+ * Attempt to fill the buffer from the underlying file
+ * Returns the number of bytes read, or negative value for an error.
+ *
+ * Note that it can return a partially full buffer even in a not error/not EOF
+ * case if the underlying file descriptor gives a short read, and that can
+ * happen even on a blocking fd.
+ */
+static ssize_t qemu_fill_buffer(QEMUFile *f)
+{
+    int len;
+    int pending;
+
+    assert(!qemu_file_is_writable(f));
+
+    pending = f->buf_size - f->buf_index;
+    if (pending > 0) {
+        memmove(f->buf, f->buf + f->buf_index, pending);
+    }
+    f->buf_index = 0;
+    f->buf_size = pending;
+
+    len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos,
+                        IO_BUF_SIZE - pending);
+    if (len > 0) {
+        f->buf_size += len;
+        f->pos += len;
+    } else if (len == 0) {
+        qemu_file_set_error(f, -EIO);
+    } else if (len != -EAGAIN) {
+        qemu_file_set_error(f, len);
+    }
+
+    return len;
+}
+
+int qemu_get_fd(QEMUFile *f)
+{
+    if (f->ops->get_fd) {
+        return f->ops->get_fd(f->opaque);
+    }
+    return -1;
+}
+
+void qemu_update_position(QEMUFile *f, size_t size)
+{
+    f->pos += size;
+}
+
+/** Closes the file
+ *
+ * Returns negative error value if any error happened on previous operations or
+ * while closing the file. Returns 0 or positive number on success.
+ *
+ * The meaning of return value on success depends on the specific backend
+ * being used.
+ */
+int qemu_fclose(QEMUFile *f)
+{
+    int ret;
+    qemu_fflush(f);
+    ret = qemu_file_get_error(f);
+
+    if (f->ops->close) {
+        int ret2 = f->ops->close(f->opaque);
+        if (ret >= 0) {
+            ret = ret2;
+        }
+    }
+    /* If any error was spotted before closing, we should report it
+     * instead of the close() return value.
+     */
+    if (f->last_error) {
+        ret = f->last_error;
+    }
+    g_free(f);
+    trace_qemu_file_fclose();
+    return ret;
+}
+
+static void add_to_iovec(QEMUFile *f, const uint8_t *buf, int size)
+{
+    /* check for adjacent buffer and coalesce them */
+    if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
+        f->iov[f->iovcnt - 1].iov_len) {
+        f->iov[f->iovcnt - 1].iov_len += size;
+    } else {
+        f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
+        f->iov[f->iovcnt++].iov_len = size;
+    }
+
+    if (f->iovcnt >= MAX_IOV_SIZE) {
+        qemu_fflush(f);
+    }
+}
+
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size)
+{
+    if (!f->ops->writev_buffer) {
+        qemu_put_buffer(f, buf, size);
+        return;
+    }
+
+    if (f->last_error) {
+        return;
+    }
+
+    f->bytes_xfer += size;
+    add_to_iovec(f, buf, size);
+}
+
+void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
+{
+    int l;
+
+    if (f->last_error) {
+        return;
+    }
+
+    while (size > 0) {
+        l = IO_BUF_SIZE - f->buf_index;
+        if (l > size) {
+            l = size;
+        }
+        memcpy(f->buf + f->buf_index, buf, l);
+        f->bytes_xfer += l;
+        if (f->ops->writev_buffer) {
+            add_to_iovec(f, f->buf + f->buf_index, l);
+        }
+        f->buf_index += l;
+        if (f->buf_index == IO_BUF_SIZE) {
+            qemu_fflush(f);
+        }
+        if (qemu_file_get_error(f)) {
+            break;
+        }
+        buf += l;
+        size -= l;
+    }
+}
+
+void qemu_put_byte(QEMUFile *f, int v)
+{
+    if (f->last_error) {
+        return;
+    }
+
+    f->buf[f->buf_index] = v;
+    f->bytes_xfer++;
+    if (f->ops->writev_buffer) {
+        add_to_iovec(f, f->buf + f->buf_index, 1);
+    }
+    f->buf_index++;
+    if (f->buf_index == IO_BUF_SIZE) {
+        qemu_fflush(f);
+    }
+}
+
+void qemu_file_skip(QEMUFile *f, int size)
+{
+    if (f->buf_index + size <= f->buf_size) {
+        f->buf_index += size;
+    }
+}
+
+/*
+ * Read 'size' bytes from file (at 'offset') into buf without moving the
+ * pointer.
+ *
+ * It will return size bytes unless there was an error, in which case it will
+ * return as many as it managed to read (assuming blocking fd's which
+ * all current QEMUFile are)
+ */
+int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset)
+{
+    int pending;
+    int index;
+
+    assert(!qemu_file_is_writable(f));
+    assert(offset < IO_BUF_SIZE);
+    assert(size <= IO_BUF_SIZE - offset);
+
+    /* The 1st byte to read from */
+    index = f->buf_index + offset;
+    /* The number of available bytes starting at index */
+    pending = f->buf_size - index;
+
+    /*
+     * qemu_fill_buffer might return just a few bytes, even when there isn't
+     * an error, so loop collecting them until we get enough.
+     */
+    while (pending < size) {
+        int received = qemu_fill_buffer(f);
+
+        if (received <= 0) {
+            break;
+        }
+
+        index = f->buf_index + offset;
+        pending = f->buf_size - index;
+    }
+
+    if (pending <= 0) {
+        return 0;
+    }
+    if (size > pending) {
+        size = pending;
+    }
+
+    memcpy(buf, f->buf + index, size);
+    return size;
+}
+
+/*
+ * Read 'size' bytes of data from the file into buf.
+ * 'size' can be larger than the internal buffer.
+ *
+ * It will return size bytes unless there was an error, in which case it will
+ * return as many as it managed to read (assuming blocking fd's which
+ * all current QEMUFile are)
+ */
+int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size)
+{
+    int pending = size;
+    int done = 0;
+
+    while (pending > 0) {
+        int res;
+
+        res = qemu_peek_buffer(f, buf, MIN(pending, IO_BUF_SIZE), 0);
+        if (res == 0) {
+            return done;
+        }
+        qemu_file_skip(f, res);
+        buf += res;
+        pending -= res;
+        done += res;
+    }
+    return done;
+}
+
+/*
+ * Peeks a single byte from the buffer; this isn't guaranteed to work if
+ * offset leaves a gap after the previous read/peeked data.
+ */
+int qemu_peek_byte(QEMUFile *f, int offset)
+{
+    int index = f->buf_index + offset;
+
+    assert(!qemu_file_is_writable(f));
+    assert(offset < IO_BUF_SIZE);
+
+    if (index >= f->buf_size) {
+        qemu_fill_buffer(f);
+        index = f->buf_index + offset;
+        if (index >= f->buf_size) {
+            return 0;
+        }
+    }
+    return f->buf[index];
+}
+
+int qemu_get_byte(QEMUFile *f)
+{
+    int result;
+
+    result = qemu_peek_byte(f, 0);
+    qemu_file_skip(f, 1);
+    return result;
+}
+
+int64_t qemu_ftell(QEMUFile *f)
+{
+    qemu_fflush(f);
+    return f->pos;
+}
+
+int qemu_file_rate_limit(QEMUFile *f)
+{
+    if (qemu_file_get_error(f)) {
+        return 1;
+    }
+    if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) {
+        return 1;
+    }
+    return 0;
+}
+
+int64_t qemu_file_get_rate_limit(QEMUFile *f)
+{
+    return f->xfer_limit;
+}
+
+void qemu_file_set_rate_limit(QEMUFile *f, int64_t limit)
+{
+    f->xfer_limit = limit;
+}
+
+void qemu_file_reset_rate_limit(QEMUFile *f)
+{
+    f->bytes_xfer = 0;
+}
+
+void qemu_put_be16(QEMUFile *f, unsigned int v)
+{
+    qemu_put_byte(f, v >> 8);
+    qemu_put_byte(f, v);
+}
+
+void qemu_put_be32(QEMUFile *f, unsigned int v)
+{
+    qemu_put_byte(f, v >> 24);
+    qemu_put_byte(f, v >> 16);
+    qemu_put_byte(f, v >> 8);
+    qemu_put_byte(f, v);
+}
+
+void qemu_put_be64(QEMUFile *f, uint64_t v)
+{
+    qemu_put_be32(f, v >> 32);
+    qemu_put_be32(f, v);
+}
+
+unsigned int qemu_get_be16(QEMUFile *f)
+{
+    unsigned int v;
+    v = qemu_get_byte(f) << 8;
+    v |= qemu_get_byte(f);
+    return v;
+}
+
+unsigned int qemu_get_be32(QEMUFile *f)
+{
+    unsigned int v;
+    v = qemu_get_byte(f) << 24;
+    v |= qemu_get_byte(f) << 16;
+    v |= qemu_get_byte(f) << 8;
+    v |= qemu_get_byte(f);
+    return v;
+}
+
+uint64_t qemu_get_be64(QEMUFile *f)
+{
+    uint64_t v;
+    v = (uint64_t)qemu_get_be32(f) << 32;
+    v |= qemu_get_be32(f);
+    return v;
+}
diff --git a/migration-rdma.c b/migration/rdma.c
similarity index 100%
rename from migration-rdma.c
rename to migration/rdma.c
diff --git a/migration-tcp.c b/migration/tcp.c
similarity index 100%
rename from migration-tcp.c
rename to migration/tcp.c
diff --git a/migration-unix.c b/migration/unix.c
similarity index 100%
rename from migration-unix.c
rename to migration/unix.c
diff --git a/vmstate.c b/migration/vmstate.c
similarity index 100%
rename from vmstate.c
rename to migration/vmstate.c
diff --git a/xbzrle.c b/migration/xbzrle.c
similarity index 100%
rename from xbzrle.c
rename to migration/xbzrle.c
diff --git a/monitor.c b/monitor.c
index b37ddda..503cf51 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1042,6 +1042,11 @@
     dump_drift_info((FILE *)mon, monitor_fprintf);
 }
 
+static void do_info_opcount(Monitor *mon, const QDict *qdict)
+{
+    dump_opcount_info((FILE *)mon, monitor_fprintf);
+}
+
 static void do_info_history(Monitor *mon, const QDict *qdict)
 {
     int i;
@@ -2739,6 +2744,13 @@
         .mhandler.cmd = do_info_jit,
     },
     {
+        .name       = "opcount",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show dynamic compiler opcode counters",
+        .mhandler.cmd = do_info_opcount,
+    },
+    {
         .name       = "kvm",
         .args_type  = "",
         .params     = "",
diff --git a/qemu-file.c b/qemu-file.c
deleted file mode 100644
index f938e36..0000000
--- a/qemu-file.c
+++ /dev/null
@@ -1,995 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "qemu/iov.h"
-#include "qemu/sockets.h"
-#include "block/coroutine.h"
-#include "migration/migration.h"
-#include "migration/qemu-file.h"
-#include "trace.h"
-
-#define IO_BUF_SIZE 32768
-#define MAX_IOV_SIZE MIN(IOV_MAX, 64)
-
-struct QEMUFile {
-    const QEMUFileOps *ops;
-    void *opaque;
-
-    int64_t bytes_xfer;
-    int64_t xfer_limit;
-
-    int64_t pos; /* start of buffer when writing, end of buffer
-                    when reading */
-    int buf_index;
-    int buf_size; /* 0 when writing */
-    uint8_t buf[IO_BUF_SIZE];
-
-    struct iovec iov[MAX_IOV_SIZE];
-    unsigned int iovcnt;
-
-    int last_error;
-};
-
-bool qemu_file_mode_is_not_valid(const char *mode)
-{
-    if (mode == NULL ||
-        (mode[0] != 'r' && mode[0] != 'w') ||
-        mode[1] != 'b' || mode[2] != 0) {
-        fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
-        return true;
-    }
-
-    return false;
-}
-
-QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
-{
-    QEMUFile *f;
-
-    f = g_malloc0(sizeof(QEMUFile));
-
-    f->opaque = opaque;
-    f->ops = ops;
-    return f;
-}
-
-/*
- * Get last error for stream f
- *
- * Return negative error value if there has been an error on previous
- * operations, return 0 if no error happened.
- *
- */
-int qemu_file_get_error(QEMUFile *f)
-{
-    return f->last_error;
-}
-
-void qemu_file_set_error(QEMUFile *f, int ret)
-{
-    if (f->last_error == 0) {
-        f->last_error = ret;
-    }
-}
-
-bool qemu_file_is_writable(QEMUFile *f)
-{
-    return f->ops->writev_buffer || f->ops->put_buffer;
-}
-
-/**
- * Flushes QEMUFile buffer
- *
- * If there is writev_buffer QEMUFileOps it uses it otherwise uses
- * put_buffer ops.
- */
-void qemu_fflush(QEMUFile *f)
-{
-    ssize_t ret = 0;
-
-    if (!qemu_file_is_writable(f)) {
-        return;
-    }
-
-    if (f->ops->writev_buffer) {
-        if (f->iovcnt > 0) {
-            ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
-        }
-    } else {
-        if (f->buf_index > 0) {
-            ret = f->ops->put_buffer(f->opaque, f->buf, f->pos, f->buf_index);
-        }
-    }
-    if (ret >= 0) {
-        f->pos += ret;
-    }
-    f->buf_index = 0;
-    f->iovcnt = 0;
-    if (ret < 0) {
-        qemu_file_set_error(f, ret);
-    }
-}
-
-void ram_control_before_iterate(QEMUFile *f, uint64_t flags)
-{
-    int ret = 0;
-
-    if (f->ops->before_ram_iterate) {
-        ret = f->ops->before_ram_iterate(f, f->opaque, flags);
-        if (ret < 0) {
-            qemu_file_set_error(f, ret);
-        }
-    }
-}
-
-void ram_control_after_iterate(QEMUFile *f, uint64_t flags)
-{
-    int ret = 0;
-
-    if (f->ops->after_ram_iterate) {
-        ret = f->ops->after_ram_iterate(f, f->opaque, flags);
-        if (ret < 0) {
-            qemu_file_set_error(f, ret);
-        }
-    }
-}
-
-void ram_control_load_hook(QEMUFile *f, uint64_t flags)
-{
-    int ret = -EINVAL;
-
-    if (f->ops->hook_ram_load) {
-        ret = f->ops->hook_ram_load(f, f->opaque, flags);
-        if (ret < 0) {
-            qemu_file_set_error(f, ret);
-        }
-    } else {
-        qemu_file_set_error(f, ret);
-    }
-}
-
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
-                         ram_addr_t offset, size_t size, int *bytes_sent)
-{
-    if (f->ops->save_page) {
-        int ret = f->ops->save_page(f, f->opaque, block_offset,
-                                    offset, size, bytes_sent);
-
-        if (ret != RAM_SAVE_CONTROL_DELAYED) {
-            if (bytes_sent && *bytes_sent > 0) {
-                qemu_update_position(f, *bytes_sent);
-            } else if (ret < 0) {
-                qemu_file_set_error(f, ret);
-            }
-        }
-
-        return ret;
-    }
-
-    return RAM_SAVE_CONTROL_NOT_SUPP;
-}
-
-/*
- * Attempt to fill the buffer from the underlying file
- * Returns the number of bytes read, or negative value for an error.
- *
- * Note that it can return a partially full buffer even in a not error/not EOF
- * case if the underlying file descriptor gives a short read, and that can
- * happen even on a blocking fd.
- */
-static ssize_t qemu_fill_buffer(QEMUFile *f)
-{
-    int len;
-    int pending;
-
-    assert(!qemu_file_is_writable(f));
-
-    pending = f->buf_size - f->buf_index;
-    if (pending > 0) {
-        memmove(f->buf, f->buf + f->buf_index, pending);
-    }
-    f->buf_index = 0;
-    f->buf_size = pending;
-
-    len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos,
-                        IO_BUF_SIZE - pending);
-    if (len > 0) {
-        f->buf_size += len;
-        f->pos += len;
-    } else if (len == 0) {
-        qemu_file_set_error(f, -EIO);
-    } else if (len != -EAGAIN) {
-        qemu_file_set_error(f, len);
-    }
-
-    return len;
-}
-
-int qemu_get_fd(QEMUFile *f)
-{
-    if (f->ops->get_fd) {
-        return f->ops->get_fd(f->opaque);
-    }
-    return -1;
-}
-
-void qemu_update_position(QEMUFile *f, size_t size)
-{
-    f->pos += size;
-}
-
-/** Closes the file
- *
- * Returns negative error value if any error happened on previous operations or
- * while closing the file. Returns 0 or positive number on success.
- *
- * The meaning of return value on success depends on the specific backend
- * being used.
- */
-int qemu_fclose(QEMUFile *f)
-{
-    int ret;
-    qemu_fflush(f);
-    ret = qemu_file_get_error(f);
-
-    if (f->ops->close) {
-        int ret2 = f->ops->close(f->opaque);
-        if (ret >= 0) {
-            ret = ret2;
-        }
-    }
-    /* If any error was spotted before closing, we should report it
-     * instead of the close() return value.
-     */
-    if (f->last_error) {
-        ret = f->last_error;
-    }
-    g_free(f);
-    trace_qemu_file_fclose();
-    return ret;
-}
-
-static void add_to_iovec(QEMUFile *f, const uint8_t *buf, int size)
-{
-    /* check for adjacent buffer and coalesce them */
-    if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
-        f->iov[f->iovcnt - 1].iov_len) {
-        f->iov[f->iovcnt - 1].iov_len += size;
-    } else {
-        f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
-        f->iov[f->iovcnt++].iov_len = size;
-    }
-
-    if (f->iovcnt >= MAX_IOV_SIZE) {
-        qemu_fflush(f);
-    }
-}
-
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size)
-{
-    if (!f->ops->writev_buffer) {
-        qemu_put_buffer(f, buf, size);
-        return;
-    }
-
-    if (f->last_error) {
-        return;
-    }
-
-    f->bytes_xfer += size;
-    add_to_iovec(f, buf, size);
-}
-
-void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
-{
-    int l;
-
-    if (f->last_error) {
-        return;
-    }
-
-    while (size > 0) {
-        l = IO_BUF_SIZE - f->buf_index;
-        if (l > size) {
-            l = size;
-        }
-        memcpy(f->buf + f->buf_index, buf, l);
-        f->bytes_xfer += l;
-        if (f->ops->writev_buffer) {
-            add_to_iovec(f, f->buf + f->buf_index, l);
-        }
-        f->buf_index += l;
-        if (f->buf_index == IO_BUF_SIZE) {
-            qemu_fflush(f);
-        }
-        if (qemu_file_get_error(f)) {
-            break;
-        }
-        buf += l;
-        size -= l;
-    }
-}
-
-void qemu_put_byte(QEMUFile *f, int v)
-{
-    if (f->last_error) {
-        return;
-    }
-
-    f->buf[f->buf_index] = v;
-    f->bytes_xfer++;
-    if (f->ops->writev_buffer) {
-        add_to_iovec(f, f->buf + f->buf_index, 1);
-    }
-    f->buf_index++;
-    if (f->buf_index == IO_BUF_SIZE) {
-        qemu_fflush(f);
-    }
-}
-
-void qemu_file_skip(QEMUFile *f, int size)
-{
-    if (f->buf_index + size <= f->buf_size) {
-        f->buf_index += size;
-    }
-}
-
-/*
- * Read 'size' bytes from file (at 'offset') into buf without moving the
- * pointer.
- *
- * It will return size bytes unless there was an error, in which case it will
- * return as many as it managed to read (assuming blocking fd's which
- * all current QEMUFile are)
- */
-int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset)
-{
-    int pending;
-    int index;
-
-    assert(!qemu_file_is_writable(f));
-    assert(offset < IO_BUF_SIZE);
-    assert(size <= IO_BUF_SIZE - offset);
-
-    /* The 1st byte to read from */
-    index = f->buf_index + offset;
-    /* The number of available bytes starting at index */
-    pending = f->buf_size - index;
-
-    /*
-     * qemu_fill_buffer might return just a few bytes, even when there isn't
-     * an error, so loop collecting them until we get enough.
-     */
-    while (pending < size) {
-        int received = qemu_fill_buffer(f);
-
-        if (received <= 0) {
-            break;
-        }
-
-        index = f->buf_index + offset;
-        pending = f->buf_size - index;
-    }
-
-    if (pending <= 0) {
-        return 0;
-    }
-    if (size > pending) {
-        size = pending;
-    }
-
-    memcpy(buf, f->buf + index, size);
-    return size;
-}
-
-/*
- * Read 'size' bytes of data from the file into buf.
- * 'size' can be larger than the internal buffer.
- *
- * It will return size bytes unless there was an error, in which case it will
- * return as many as it managed to read (assuming blocking fd's which
- * all current QEMUFile are)
- */
-int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size)
-{
-    int pending = size;
-    int done = 0;
-
-    while (pending > 0) {
-        int res;
-
-        res = qemu_peek_buffer(f, buf, MIN(pending, IO_BUF_SIZE), 0);
-        if (res == 0) {
-            return done;
-        }
-        qemu_file_skip(f, res);
-        buf += res;
-        pending -= res;
-        done += res;
-    }
-    return done;
-}
-
-/*
- * Peeks a single byte from the buffer; this isn't guaranteed to work if
- * offset leaves a gap after the previous read/peeked data.
- */
-int qemu_peek_byte(QEMUFile *f, int offset)
-{
-    int index = f->buf_index + offset;
-
-    assert(!qemu_file_is_writable(f));
-    assert(offset < IO_BUF_SIZE);
-
-    if (index >= f->buf_size) {
-        qemu_fill_buffer(f);
-        index = f->buf_index + offset;
-        if (index >= f->buf_size) {
-            return 0;
-        }
-    }
-    return f->buf[index];
-}
-
-int qemu_get_byte(QEMUFile *f)
-{
-    int result;
-
-    result = qemu_peek_byte(f, 0);
-    qemu_file_skip(f, 1);
-    return result;
-}
-
-int64_t qemu_ftell(QEMUFile *f)
-{
-    qemu_fflush(f);
-    return f->pos;
-}
-
-int qemu_file_rate_limit(QEMUFile *f)
-{
-    if (qemu_file_get_error(f)) {
-        return 1;
-    }
-    if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) {
-        return 1;
-    }
-    return 0;
-}
-
-int64_t qemu_file_get_rate_limit(QEMUFile *f)
-{
-    return f->xfer_limit;
-}
-
-void qemu_file_set_rate_limit(QEMUFile *f, int64_t limit)
-{
-    f->xfer_limit = limit;
-}
-
-void qemu_file_reset_rate_limit(QEMUFile *f)
-{
-    f->bytes_xfer = 0;
-}
-
-void qemu_put_be16(QEMUFile *f, unsigned int v)
-{
-    qemu_put_byte(f, v >> 8);
-    qemu_put_byte(f, v);
-}
-
-void qemu_put_be32(QEMUFile *f, unsigned int v)
-{
-    qemu_put_byte(f, v >> 24);
-    qemu_put_byte(f, v >> 16);
-    qemu_put_byte(f, v >> 8);
-    qemu_put_byte(f, v);
-}
-
-void qemu_put_be64(QEMUFile *f, uint64_t v)
-{
-    qemu_put_be32(f, v >> 32);
-    qemu_put_be32(f, v);
-}
-
-unsigned int qemu_get_be16(QEMUFile *f)
-{
-    unsigned int v;
-    v = qemu_get_byte(f) << 8;
-    v |= qemu_get_byte(f);
-    return v;
-}
-
-unsigned int qemu_get_be32(QEMUFile *f)
-{
-    unsigned int v;
-    v = qemu_get_byte(f) << 24;
-    v |= qemu_get_byte(f) << 16;
-    v |= qemu_get_byte(f) << 8;
-    v |= qemu_get_byte(f);
-    return v;
-}
-
-uint64_t qemu_get_be64(QEMUFile *f)
-{
-    uint64_t v;
-    v = (uint64_t)qemu_get_be32(f) << 32;
-    v |= qemu_get_be32(f);
-    return v;
-}
-
-#define QSB_CHUNK_SIZE      (1 << 10)
-#define QSB_MAX_CHUNK_SIZE  (16 * QSB_CHUNK_SIZE)
-
-/**
- * Create a QEMUSizedBuffer
- * This type of buffer uses scatter-gather lists internally and
- * can grow to any size. Any data array in the scatter-gather list
- * can hold different amount of bytes.
- *
- * @buffer: Optional buffer to copy into the QSB
- * @len: size of initial buffer; if @buffer is given, buffer must
- *       hold at least len bytes
- *
- * Returns a pointer to a QEMUSizedBuffer or NULL on allocation failure
- */
-QEMUSizedBuffer *qsb_create(const uint8_t *buffer, size_t len)
-{
-    QEMUSizedBuffer *qsb;
-    size_t alloc_len, num_chunks, i, to_copy;
-    size_t chunk_size = (len > QSB_MAX_CHUNK_SIZE)
-                        ? QSB_MAX_CHUNK_SIZE
-                        : QSB_CHUNK_SIZE;
-
-    num_chunks = DIV_ROUND_UP(len ? len : QSB_CHUNK_SIZE, chunk_size);
-    alloc_len = num_chunks * chunk_size;
-
-    qsb = g_try_new0(QEMUSizedBuffer, 1);
-    if (!qsb) {
-        return NULL;
-    }
-
-    qsb->iov = g_try_new0(struct iovec, num_chunks);
-    if (!qsb->iov) {
-        g_free(qsb);
-        return NULL;
-    }
-
-    qsb->n_iov = num_chunks;
-
-    for (i = 0; i < num_chunks; i++) {
-        qsb->iov[i].iov_base = g_try_malloc0(chunk_size);
-        if (!qsb->iov[i].iov_base) {
-            /* qsb_free is safe since g_free can cope with NULL */
-            qsb_free(qsb);
-            return NULL;
-        }
-
-        qsb->iov[i].iov_len = chunk_size;
-        if (buffer) {
-            to_copy = (len - qsb->used) > chunk_size
-                      ? chunk_size : (len - qsb->used);
-            memcpy(qsb->iov[i].iov_base, &buffer[qsb->used], to_copy);
-            qsb->used += to_copy;
-        }
-    }
-
-    qsb->size = alloc_len;
-
-    return qsb;
-}
-
-/**
- * Free the QEMUSizedBuffer
- *
- * @qsb: The QEMUSizedBuffer to free
- */
-void qsb_free(QEMUSizedBuffer *qsb)
-{
-    size_t i;
-
-    if (!qsb) {
-        return;
-    }
-
-    for (i = 0; i < qsb->n_iov; i++) {
-        g_free(qsb->iov[i].iov_base);
-    }
-    g_free(qsb->iov);
-    g_free(qsb);
-}
-
-/**
- * Get the number of used bytes in the QEMUSizedBuffer
- *
- * @qsb: A QEMUSizedBuffer
- *
- * Returns the number of bytes currently used in this buffer
- */
-size_t qsb_get_length(const QEMUSizedBuffer *qsb)
-{
-    return qsb->used;
-}
-
-/**
- * Set the length of the buffer; the primary usage of this
- * function is to truncate the number of used bytes in the buffer.
- * The size will not be extended beyond the current number of
- * allocated bytes in the QEMUSizedBuffer.
- *
- * @qsb: A QEMUSizedBuffer
- * @new_len: The new length of bytes in the buffer
- *
- * Returns the number of bytes the buffer was truncated or extended
- * to.
- */
-size_t qsb_set_length(QEMUSizedBuffer *qsb, size_t new_len)
-{
-    if (new_len <= qsb->size) {
-        qsb->used = new_len;
-    } else {
-        qsb->used = qsb->size;
-    }
-    return qsb->used;
-}
-
-/**
- * Get the iovec that holds the data for a given position @pos.
- *
- * @qsb: A QEMUSizedBuffer
- * @pos: The index of a byte in the buffer
- * @d_off: Pointer to an offset that this function will indicate
- *         at what position within the returned iovec the byte
- *         is to be found
- *
- * Returns the index of the iovec that holds the byte at the given
- * index @pos in the byte stream; a negative number if the iovec
- * for the given position @pos does not exist.
- */
-static ssize_t qsb_get_iovec(const QEMUSizedBuffer *qsb,
-                             off_t pos, off_t *d_off)
-{
-    ssize_t i;
-    off_t curr = 0;
-
-    if (pos > qsb->used) {
-        return -1;
-    }
-
-    for (i = 0; i < qsb->n_iov; i++) {
-        if (curr + qsb->iov[i].iov_len > pos) {
-            *d_off = pos - curr;
-            return i;
-        }
-        curr += qsb->iov[i].iov_len;
-    }
-    return -1;
-}
-
-/*
- * Convert the QEMUSizedBuffer into a flat buffer.
- *
- * Note: If at all possible, try to avoid this function since it
- *       may unnecessarily copy memory around.
- *
- * @qsb: pointer to QEMUSizedBuffer
- * @start: offset to start at
- * @count: number of bytes to copy
- * @buf: a pointer to a buffer to write into (at least @count bytes)
- *
- * Returns the number of bytes copied into the output buffer
- */
-ssize_t qsb_get_buffer(const QEMUSizedBuffer *qsb, off_t start,
-                       size_t count, uint8_t *buffer)
-{
-    const struct iovec *iov;
-    size_t to_copy, all_copy;
-    ssize_t index;
-    off_t s_off;
-    off_t d_off = 0;
-    char *s;
-
-    if (start > qsb->used) {
-        return 0;
-    }
-
-    all_copy = qsb->used - start;
-    if (all_copy > count) {
-        all_copy = count;
-    } else {
-        count = all_copy;
-    }
-
-    index = qsb_get_iovec(qsb, start, &s_off);
-    if (index < 0) {
-        return 0;
-    }
-
-    while (all_copy > 0) {
-        iov = &qsb->iov[index];
-
-        s = iov->iov_base;
-
-        to_copy = iov->iov_len - s_off;
-        if (to_copy > all_copy) {
-            to_copy = all_copy;
-        }
-        memcpy(&buffer[d_off], &s[s_off], to_copy);
-
-        d_off += to_copy;
-        all_copy -= to_copy;
-
-        s_off = 0;
-        index++;
-    }
-
-    return count;
-}
-
-/**
- * Grow the QEMUSizedBuffer to the given size and allocate
- * memory for it.
- *
- * @qsb: A QEMUSizedBuffer
- * @new_size: The new size of the buffer
- *
- * Return:
- *    a negative error code in case of memory allocation failure
- * or
- *    the new size of the buffer. The returned size may be greater or equal
- *    to @new_size.
- */
-static ssize_t qsb_grow(QEMUSizedBuffer *qsb, size_t new_size)
-{
-    size_t needed_chunks, i;
-
-    if (qsb->size < new_size) {
-        struct iovec *new_iov;
-        size_t size_diff = new_size - qsb->size;
-        size_t chunk_size = (size_diff > QSB_MAX_CHUNK_SIZE)
-                             ? QSB_MAX_CHUNK_SIZE : QSB_CHUNK_SIZE;
-
-        needed_chunks = DIV_ROUND_UP(size_diff, chunk_size);
-
-        new_iov = g_try_new(struct iovec, qsb->n_iov + needed_chunks);
-        if (new_iov == NULL) {
-            return -ENOMEM;
-        }
-
-        /* Allocate new chunks as needed into new_iov */
-        for (i = qsb->n_iov; i < qsb->n_iov + needed_chunks; i++) {
-            new_iov[i].iov_base = g_try_malloc0(chunk_size);
-            new_iov[i].iov_len = chunk_size;
-            if (!new_iov[i].iov_base) {
-                size_t j;
-
-                /* Free previously allocated new chunks */
-                for (j = qsb->n_iov; j < i; j++) {
-                    g_free(new_iov[j].iov_base);
-                }
-                g_free(new_iov);
-
-                return -ENOMEM;
-            }
-        }
-
-        /*
-         * Now we can't get any allocation errors, copy over to new iov
-         * and switch.
-         */
-        for (i = 0; i < qsb->n_iov; i++) {
-            new_iov[i] = qsb->iov[i];
-        }
-
-        qsb->n_iov += needed_chunks;
-        g_free(qsb->iov);
-        qsb->iov = new_iov;
-        qsb->size += (needed_chunks * chunk_size);
-    }
-
-    return qsb->size;
-}
-
-/**
- * Write into the QEMUSizedBuffer at a given position and a given
- * number of bytes. This function will automatically grow the
- * QEMUSizedBuffer.
- *
- * @qsb: A QEMUSizedBuffer
- * @source: A byte array to copy data from
- * @pos: The position within the @qsb to write data to
- * @size: The number of bytes to copy into the @qsb
- *
- * Returns @size or a negative error code in case of memory allocation failure,
- *           or with an invalid 'pos'
- */
-ssize_t qsb_write_at(QEMUSizedBuffer *qsb, const uint8_t *source,
-                     off_t pos, size_t count)
-{
-    ssize_t rc = qsb_grow(qsb, pos + count);
-    size_t to_copy;
-    size_t all_copy = count;
-    const struct iovec *iov;
-    ssize_t index;
-    char *dest;
-    off_t d_off, s_off = 0;
-
-    if (rc < 0) {
-        return rc;
-    }
-
-    if (pos + count > qsb->used) {
-        qsb->used = pos + count;
-    }
-
-    index = qsb_get_iovec(qsb, pos, &d_off);
-    if (index < 0) {
-        return -EINVAL;
-    }
-
-    while (all_copy > 0) {
-        iov = &qsb->iov[index];
-
-        dest = iov->iov_base;
-
-        to_copy = iov->iov_len - d_off;
-        if (to_copy > all_copy) {
-            to_copy = all_copy;
-        }
-
-        memcpy(&dest[d_off], &source[s_off], to_copy);
-
-        s_off += to_copy;
-        all_copy -= to_copy;
-
-        d_off = 0;
-        index++;
-    }
-
-    return count;
-}
-
-/**
- * Create a deep copy of the given QEMUSizedBuffer.
- *
- * @qsb: A QEMUSizedBuffer
- *
- * Returns a clone of @qsb or NULL on allocation failure
- */
-QEMUSizedBuffer *qsb_clone(const QEMUSizedBuffer *qsb)
-{
-    QEMUSizedBuffer *out = qsb_create(NULL, qsb_get_length(qsb));
-    size_t i;
-    ssize_t res;
-    off_t pos = 0;
-
-    if (!out) {
-        return NULL;
-    }
-
-    for (i = 0; i < qsb->n_iov; i++) {
-        res =  qsb_write_at(out, qsb->iov[i].iov_base,
-                            pos, qsb->iov[i].iov_len);
-        if (res < 0) {
-            qsb_free(out);
-            return NULL;
-        }
-        pos += res;
-    }
-
-    return out;
-}
-
-typedef struct QEMUBuffer {
-    QEMUSizedBuffer *qsb;
-    QEMUFile *file;
-} QEMUBuffer;
-
-static int buf_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
-{
-    QEMUBuffer *s = opaque;
-    ssize_t len = qsb_get_length(s->qsb) - pos;
-
-    if (len <= 0) {
-        return 0;
-    }
-
-    if (len > size) {
-        len = size;
-    }
-    return qsb_get_buffer(s->qsb, pos, len, buf);
-}
-
-static int buf_put_buffer(void *opaque, const uint8_t *buf,
-                          int64_t pos, int size)
-{
-    QEMUBuffer *s = opaque;
-
-    return qsb_write_at(s->qsb, buf, pos, size);
-}
-
-static int buf_close(void *opaque)
-{
-    QEMUBuffer *s = opaque;
-
-    qsb_free(s->qsb);
-
-    g_free(s);
-
-    return 0;
-}
-
-const QEMUSizedBuffer *qemu_buf_get(QEMUFile *f)
-{
-    QEMUBuffer *p;
-
-    qemu_fflush(f);
-
-    p = f->opaque;
-
-    return p->qsb;
-}
-
-static const QEMUFileOps buf_read_ops = {
-    .get_buffer = buf_get_buffer,
-    .close =      buf_close,
-};
-
-static const QEMUFileOps buf_write_ops = {
-    .put_buffer = buf_put_buffer,
-    .close =      buf_close,
-};
-
-QEMUFile *qemu_bufopen(const char *mode, QEMUSizedBuffer *input)
-{
-    QEMUBuffer *s;
-
-    if (mode == NULL || (mode[0] != 'r' && mode[0] != 'w') ||
-        mode[1] != '\0') {
-        error_report("qemu_bufopen: Argument validity check failed");
-        return NULL;
-    }
-
-    s = g_malloc0(sizeof(QEMUBuffer));
-    if (mode[0] == 'r') {
-        s->qsb = input;
-    }
-
-    if (s->qsb == NULL) {
-        s->qsb = qsb_create(NULL, 0);
-    }
-    if (!s->qsb) {
-        g_free(s);
-        error_report("qemu_bufopen: qsb_create failed");
-        return NULL;
-    }
-
-
-    if (mode[0] == 'r') {
-        s->file = qemu_fopen_ops(s, &buf_read_ops);
-    } else {
-        s->file = qemu_fopen_ops(s, &buf_write_ops);
-    }
-    return s->file;
-}
diff --git a/qemu-log.c b/qemu-log.c
index 797f2af..05b5493 100644
--- a/qemu-log.c
+++ b/qemu-log.c
@@ -106,6 +106,8 @@
       "show trace before each executed TB (lots of logs)" },
     { CPU_LOG_TB_CPU, "cpu",
       "show CPU state before block translation" },
+    { CPU_LOG_MMU, "mmu",
+      "log MMU-related activities" },
     { CPU_LOG_PCALL, "pcall",
       "x86 only: show protected mode far calls/returns/exceptions" },
     { CPU_LOG_RESET, "cpu_reset",
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 3348782..6945d30 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -3184,6 +3184,9 @@
 Enable/Disable migration capabilities
 
 - "xbzrle": XBZRLE support
+- "rdma-pin-all": pin all pages when using RDMA during migration
+- "auto-converge": throttle down guest to help convergence of migration
+- "zero-blocks": compress zero blocks during block migration
 
 Arguments:
 
@@ -3208,6 +3211,9 @@
 
 - "capabilities": migration capabilities state
          - "xbzrle" : XBZRLE state (json-bool)
+         - "rdma-pin-all" : RDMA Pin Page state (json-bool)
+         - "auto-converge" : Auto Converge state (json-bool)
+         - "zero-blocks" : Zero Blocks state (json-bool)
 
 Arguments:
 
diff --git a/spice-qemu-char.c b/spice-qemu-char.c
index 8106e06..7e0d300 100644
--- a/spice-qemu-char.c
+++ b/spice-qemu-char.c
@@ -3,7 +3,6 @@
 #include "ui/qemu-spice.h"
 #include "sysemu/char.h"
 #include <spice.h>
-#include <spice-experimental.h>
 #include <spice/protocol.h>
 
 #include "qemu/osdep.h"
diff --git a/target-cris/helper.c b/target-cris/helper.c
index e901c3a..df6c9fd 100644
--- a/target-cris/helper.c
+++ b/target-cris/helper.c
@@ -84,8 +84,8 @@
     int r = -1;
     target_ulong phy;
 
-    D(printf("%s addr=%" VADDR_PRIx " pc=%x rw=%x\n",
-             __func__, address, env->pc, rw));
+    qemu_log_mask(CPU_LOG_MMU, "%s addr=%" VADDR_PRIx " pc=%x rw=%x\n",
+            __func__, address, env->pc, rw);
     miss = cris_mmu_translate(&res, env, address & TARGET_PAGE_MASK,
                               rw, mmu_idx, 0);
     if (miss) {
@@ -112,9 +112,10 @@
         r = 0;
     }
     if (r > 0) {
-        D_LOG("%s returns %d irqreq=%x addr=%" VADDR_PRIx " phy=%x vec=%x"
-              " pc=%x\n", __func__, r, cs->interrupt_request, address, res.phy,
-              res.bf_vec, env->pc);
+        qemu_log_mask(CPU_LOG_MMU,
+                "%s returns %d irqreq=%x addr=%" VADDR_PRIx " phy=%x vec=%x"
+                " pc=%x\n", __func__, r, cs->interrupt_request, address,
+                res.phy, res.bf_vec, env->pc);
     }
     return r;
 }
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 345bda1..4f1ddf7 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -25,8 +25,6 @@
 #include "monitor/monitor.h"
 #endif
 
-//#define DEBUG_MMU
-
 static void cpu_x86_version(CPUX86State *env, int *family, int *model)
 {
     int cpuver = env->cpuid_version;
@@ -388,9 +386,7 @@
     if (a20_state != ((env->a20_mask >> 20) & 1)) {
         CPUState *cs = CPU(cpu);
 
-#if defined(DEBUG_MMU)
-        printf("A20 update: a20=%d\n", a20_state);
-#endif
+        qemu_log_mask(CPU_LOG_MMU, "A20 update: a20=%d\n", a20_state);
         /* if the cpu is currently executing code, we must unlink it and
            all the potentially executing TB */
         cpu_interrupt(cs, CPU_INTERRUPT_EXITTB);
@@ -407,9 +403,7 @@
     X86CPU *cpu = x86_env_get_cpu(env);
     int pe_state;
 
-#if defined(DEBUG_MMU)
-    printf("CR0 update: CR0=0x%08x\n", new_cr0);
-#endif
+    qemu_log_mask(CPU_LOG_MMU, "CR0 update: CR0=0x%08x\n", new_cr0);
     if ((new_cr0 & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK)) !=
         (env->cr[0] & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK))) {
         tlb_flush(CPU(cpu), 1);
@@ -452,9 +446,8 @@
 
     env->cr[3] = new_cr3;
     if (env->cr[0] & CR0_PG_MASK) {
-#if defined(DEBUG_MMU)
-        printf("CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3);
-#endif
+        qemu_log_mask(CPU_LOG_MMU,
+                        "CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3);
         tlb_flush(CPU(cpu), 0);
     }
 }
diff --git a/target-microblaze/helper.c b/target-microblaze/helper.c
index 59466c9..32896f4 100644
--- a/target-microblaze/helper.c
+++ b/target-microblaze/helper.c
@@ -22,7 +22,6 @@
 #include "qemu/host-utils.h"
 
 #define D(x)
-#define DMMU(x)
 
 #if defined(CONFIG_USER_ONLY)
 
@@ -75,13 +74,14 @@
             vaddr = address & TARGET_PAGE_MASK;
             paddr = lu.paddr + vaddr - lu.vaddr;
 
-            DMMU(qemu_log("MMU map mmu=%d v=%x p=%x prot=%x\n",
-                     mmu_idx, vaddr, paddr, lu.prot));
+            qemu_log_mask(CPU_LOG_MMU, "MMU map mmu=%d v=%x p=%x prot=%x\n",
+                    mmu_idx, vaddr, paddr, lu.prot);
             tlb_set_page(cs, vaddr, paddr, lu.prot, mmu_idx, TARGET_PAGE_SIZE);
             r = 0;
         } else {
             env->sregs[SR_EAR] = address;
-            DMMU(qemu_log("mmu=%d miss v=%x\n", mmu_idx, address));
+            qemu_log_mask(CPU_LOG_MMU, "mmu=%d miss v=%" VADDR_PRIx "\n",
+                                        mmu_idx, address);
 
             switch (lu.err) {
                 case ERR_PROT:
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index c01bbda..8875c97 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -446,8 +446,8 @@
 #define CP0C3_MT   2
 #define CP0C3_SM   1
 #define CP0C3_TL   0
-    uint32_t CP0_Config4;
-    uint32_t CP0_Config4_rw_bitmask;
+    int32_t CP0_Config4;
+    int32_t CP0_Config4_rw_bitmask;
 #define CP0C4_M    31
 #define CP0C4_IE   29
 #define CP0C4_KScrExist 16
@@ -456,8 +456,8 @@
 #define CP0C4_FTLBWays 4
 #define CP0C4_FTLBSets 0
 #define CP0C4_MMUSizeExt 0
-    uint32_t CP0_Config5;
-    uint32_t CP0_Config5_rw_bitmask;
+    int32_t CP0_Config5;
+    int32_t CP0_Config5_rw_bitmask;
 #define CP0C5_M          31
 #define CP0C5_K          30
 #define CP0C5_CV         29
@@ -777,6 +777,18 @@
 extern unsigned int ieee_rm[];
 int ieee_ex_to_mips(int xcpt);
 
+static inline void restore_rounding_mode(CPUMIPSState *env)
+{
+    set_float_rounding_mode(ieee_rm[env->active_fpu.fcr31 & 3],
+                            &env->active_fpu.fp_status);
+}
+
+static inline void restore_flush_mode(CPUMIPSState *env)
+{
+    set_flush_to_zero((env->active_fpu.fcr31 & (1 << 24)) != 0,
+                      &env->active_fpu.fp_status);
+}
+
 static inline void cpu_get_tb_cpu_state(CPUMIPSState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
@@ -831,16 +843,19 @@
         env->hflags |= (env->CP0_Status >> CP0St_KSU) & MIPS_HFLAG_KSU;
     }
 #if defined(TARGET_MIPS64)
-    if (((env->hflags & MIPS_HFLAG_KSU) != MIPS_HFLAG_UM) ||
-        (env->CP0_Status & (1 << CP0St_PX)) ||
-        (env->CP0_Status & (1 << CP0St_UX))) {
+    if ((env->insn_flags & ISA_MIPS3) &&
+        (((env->hflags & MIPS_HFLAG_KSU) != MIPS_HFLAG_UM) ||
+         (env->CP0_Status & (1 << CP0St_PX)) ||
+         (env->CP0_Status & (1 << CP0St_UX)))) {
         env->hflags |= MIPS_HFLAG_64;
     }
 
-    if (((env->hflags & MIPS_HFLAG_KSU) == MIPS_HFLAG_UM) &&
-        !(env->CP0_Status & (1 << CP0St_UX))) {
+    if (!(env->insn_flags & ISA_MIPS3)) {
         env->hflags |= MIPS_HFLAG_AWRAP;
-    } else if (env->insn_flags & ISA_MIPS32R6) {
+    } else if (((env->hflags & MIPS_HFLAG_KSU) == MIPS_HFLAG_UM) &&
+               !(env->CP0_Status & (1 << CP0St_UX))) {
+        env->hflags |= MIPS_HFLAG_AWRAP;
+    } else if (env->insn_flags & ISA_MIPS64R6) {
         /* Address wrapping for Supervisor and Kernel is specified in R6 */
         if ((((env->hflags & MIPS_HFLAG_KSU) == MIPS_HFLAG_SM) &&
              !(env->CP0_Status & (1 << CP0St_SX))) ||
@@ -904,4 +919,93 @@
     }
 }
 
+#ifndef CONFIG_USER_ONLY
+/* Called for updates to CP0_Status.  */
+static inline void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc)
+{
+    int32_t tcstatus, *tcst;
+    uint32_t v = cpu->CP0_Status;
+    uint32_t cu, mx, asid, ksu;
+    uint32_t mask = ((1 << CP0TCSt_TCU3)
+                       | (1 << CP0TCSt_TCU2)
+                       | (1 << CP0TCSt_TCU1)
+                       | (1 << CP0TCSt_TCU0)
+                       | (1 << CP0TCSt_TMX)
+                       | (3 << CP0TCSt_TKSU)
+                       | (0xff << CP0TCSt_TASID));
+
+    cu = (v >> CP0St_CU0) & 0xf;
+    mx = (v >> CP0St_MX) & 0x1;
+    ksu = (v >> CP0St_KSU) & 0x3;
+    asid = env->CP0_EntryHi & 0xff;
+
+    tcstatus = cu << CP0TCSt_TCU0;
+    tcstatus |= mx << CP0TCSt_TMX;
+    tcstatus |= ksu << CP0TCSt_TKSU;
+    tcstatus |= asid;
+
+    if (tc == cpu->current_tc) {
+        tcst = &cpu->active_tc.CP0_TCStatus;
+    } else {
+        tcst = &cpu->tcs[tc].CP0_TCStatus;
+    }
+
+    *tcst &= ~mask;
+    *tcst |= tcstatus;
+    compute_hflags(cpu);
+}
+
+static inline void cpu_mips_store_status(CPUMIPSState *env, target_ulong val)
+{
+    uint32_t mask = env->CP0_Status_rw_bitmask;
+
+    if (env->insn_flags & ISA_MIPS32R6) {
+        bool has_supervisor = extract32(mask, CP0St_KSU, 2) == 0x3;
+
+        if (has_supervisor && extract32(val, CP0St_KSU, 2) == 0x3) {
+            mask &= ~(3 << CP0St_KSU);
+        }
+        mask &= ~(((1 << CP0St_SR) | (1 << CP0St_NMI)) & val);
+    }
+
+    env->CP0_Status = (env->CP0_Status & ~mask) | (val & mask);
+    if (env->CP0_Config3 & (1 << CP0C3_MT)) {
+        sync_c0_status(env, env, env->current_tc);
+    } else {
+        compute_hflags(env);
+    }
+}
+
+static inline void cpu_mips_store_cause(CPUMIPSState *env, target_ulong val)
+{
+    uint32_t mask = 0x00C00300;
+    uint32_t old = env->CP0_Cause;
+    int i;
+
+    if (env->insn_flags & ISA_MIPS32R2) {
+        mask |= 1 << CP0Ca_DC;
+    }
+    if (env->insn_flags & ISA_MIPS32R6) {
+        mask &= ~((1 << CP0Ca_WP) & val);
+    }
+
+    env->CP0_Cause = (env->CP0_Cause & ~mask) | (val & mask);
+
+    if ((old ^ env->CP0_Cause) & (1 << CP0Ca_DC)) {
+        if (env->CP0_Cause & (1 << CP0Ca_DC)) {
+            cpu_mips_stop_count(env);
+        } else {
+            cpu_mips_start_count(env);
+        }
+    }
+
+    /* Set/reset software interrupts */
+    for (i = 0 ; i < 2 ; i++) {
+        if ((old ^ env->CP0_Cause) & (1 << (CP0Ca_IP + i))) {
+            cpu_mips_soft_irq(env, i, env->CP0_Cause & (1 << (CP0Ca_IP + i)));
+        }
+    }
+}
+#endif
+
 #endif /* !defined (__MIPS_CPU_H__) */
diff --git a/target-mips/gdbstub.c b/target-mips/gdbstub.c
index f65fec2..9845d88 100644
--- a/target-mips/gdbstub.c
+++ b/target-mips/gdbstub.c
@@ -29,8 +29,13 @@
     if (n < 32) {
         return gdb_get_regl(mem_buf, env->active_tc.gpr[n]);
     }
-    if (env->CP0_Config1 & (1 << CP0C1_FP)) {
-        if (n >= 38 && n < 70) {
+    if (env->CP0_Config1 & (1 << CP0C1_FP) && n >= 38 && n < 72) {
+        switch (n) {
+        case 70:
+            return gdb_get_regl(mem_buf, (int32_t)env->active_fpu.fcr31);
+        case 71:
+            return gdb_get_regl(mem_buf, (int32_t)env->active_fpu.fcr0);
+        default:
             if (env->CP0_Status & (1 << CP0St_FR)) {
                 return gdb_get_regl(mem_buf,
                     env->active_fpu.fpr[n - 38].d);
@@ -39,12 +44,6 @@
                     env->active_fpu.fpr[n - 38].w[FP_ENDIAN_IDX]);
             }
         }
-        switch (n) {
-        case 70:
-            return gdb_get_regl(mem_buf, (int32_t)env->active_fpu.fcr31);
-        case 71:
-            return gdb_get_regl(mem_buf, (int32_t)env->active_fpu.fcr0);
-        }
     }
     switch (n) {
     case 32:
@@ -64,8 +63,10 @@
         return gdb_get_regl(mem_buf, 0); /* fp */
     case 89:
         return gdb_get_regl(mem_buf, (int32_t)env->CP0_PRid);
-    }
-    if (n >= 73 && n <= 88) {
+    default:
+        if (n > 89) {
+            return 0;
+        }
         /* 16 embedded regs.  */
         return gdb_get_regl(mem_buf, 0);
     }
@@ -73,10 +74,6 @@
     return 0;
 }
 
-#define RESTORE_ROUNDING_MODE \
-    set_float_rounding_mode(ieee_rm[env->active_fpu.fcr31 & 3], \
-                            &env->active_fpu.fp_status)
-
 int mips_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 {
     MIPSCPU *cpu = MIPS_CPU(cs);
@@ -89,30 +86,33 @@
         env->active_tc.gpr[n] = tmp;
         return sizeof(target_ulong);
     }
-    if (env->CP0_Config1 & (1 << CP0C1_FP)
-            && n >= 38 && n < 73) {
-        if (n < 70) {
+    if (env->CP0_Config1 & (1 << CP0C1_FP) && n >= 38 && n < 72) {
+        switch (n) {
+        case 70:
+            env->active_fpu.fcr31 = tmp & 0xFF83FFFF;
+            /* set rounding mode */
+            restore_rounding_mode(env);
+            /* set flush-to-zero mode */
+            restore_flush_mode(env);
+            break;
+        case 71:
+            /* FIR is read-only.  Ignore writes.  */
+            break;
+        default:
             if (env->CP0_Status & (1 << CP0St_FR)) {
                 env->active_fpu.fpr[n - 38].d = tmp;
             } else {
                 env->active_fpu.fpr[n - 38].w[FP_ENDIAN_IDX] = tmp;
             }
-        }
-        switch (n) {
-        case 70:
-            env->active_fpu.fcr31 = tmp & 0xFF83FFFF;
-            /* set rounding mode */
-            RESTORE_ROUNDING_MODE;
-            break;
-        case 71:
-            env->active_fpu.fcr0 = tmp;
             break;
         }
         return sizeof(target_ulong);
     }
     switch (n) {
     case 32:
-        env->CP0_Status = tmp;
+#ifndef CONFIG_USER_ONLY
+        cpu_mips_store_status(env, tmp);
+#endif
         break;
     case 33:
         env->active_tc.LO[0] = tmp;
@@ -124,7 +124,9 @@
         env->CP0_BadVAddr = tmp;
         break;
     case 36:
-        env->CP0_Cause = tmp;
+#ifndef CONFIG_USER_ONLY
+        cpu_mips_store_cause(env, tmp);
+#endif
         break;
     case 37:
         env->active_tc.PC = tmp & ~(target_ulong)1;
diff --git a/target-mips/helper.c b/target-mips/helper.c
index 3a93c20..8e3204a 100644
--- a/target-mips/helper.c
+++ b/target-mips/helper.c
@@ -341,7 +341,8 @@
 #if 0
     log_cpu_state(cs, 0);
 #endif
-    qemu_log("%s pc " TARGET_FMT_lx " ad %" VADDR_PRIx " rw %d mmu_idx %d\n",
+    qemu_log_mask(CPU_LOG_MMU,
+              "%s pc " TARGET_FMT_lx " ad %" VADDR_PRIx " rw %d mmu_idx %d\n",
               __func__, env->active_tc.PC, address, rw, mmu_idx);
 
     /* data access */
@@ -351,7 +352,8 @@
     access_type = ACCESS_INT;
     ret = get_physical_address(env, &physical, &prot,
                                address, rw, access_type);
-    qemu_log("%s address=%" VADDR_PRIx " ret %d physical " TARGET_FMT_plx
+    qemu_log_mask(CPU_LOG_MMU,
+             "%s address=%" VADDR_PRIx " ret %d physical " TARGET_FMT_plx
              " prot %d\n",
              __func__, address, ret, physical, prot);
     if (ret == TLBRET_MATCH) {
@@ -388,7 +390,6 @@
         return physical;
     }
 }
-#endif
 
 static const char * const excp_names[EXCP_LAST + 1] = {
     [EXCP_RESET] = "reset",
@@ -429,6 +430,7 @@
     [EXCP_MSADIS] = "MSA disabled",
     [EXCP_MSAFPE] = "MSA floating point",
 };
+#endif
 
 target_ulong exception_resume_pc (CPUMIPSState *env)
 {
@@ -527,7 +529,10 @@
         env->CP0_DEPC = exception_resume_pc(env);
         env->hflags &= ~MIPS_HFLAG_BMASK;
  enter_debug_mode:
-        env->hflags |= MIPS_HFLAG_DM | MIPS_HFLAG_64 | MIPS_HFLAG_CP0;
+        if (env->insn_flags & ISA_MIPS3) {
+            env->hflags |= MIPS_HFLAG_64;
+        }
+        env->hflags |= MIPS_HFLAG_DM | MIPS_HFLAG_CP0;
         env->hflags &= ~(MIPS_HFLAG_KSU);
         /* EJTAG probe trap enable is not implemented... */
         if (!(env->CP0_Status & (1 << CP0St_EXL)))
@@ -548,7 +553,10 @@
         env->CP0_ErrorEPC = exception_resume_pc(env);
         env->hflags &= ~MIPS_HFLAG_BMASK;
         env->CP0_Status |= (1 << CP0St_ERL) | (1 << CP0St_BEV);
-        env->hflags |= MIPS_HFLAG_64 | MIPS_HFLAG_CP0;
+        if (env->insn_flags & ISA_MIPS3) {
+            env->hflags |= MIPS_HFLAG_64;
+        }
+        env->hflags |= MIPS_HFLAG_CP0;
         env->hflags &= ~(MIPS_HFLAG_KSU);
         if (!(env->CP0_Status & (1 << CP0St_EXL)))
             env->CP0_Cause &= ~(1U << CP0Ca_BD);
@@ -726,7 +734,10 @@
                 env->CP0_Cause &= ~(1U << CP0Ca_BD);
             }
             env->CP0_Status |= (1 << CP0St_EXL);
-            env->hflags |= MIPS_HFLAG_64 | MIPS_HFLAG_CP0;
+            if (env->insn_flags & ISA_MIPS3) {
+                env->hflags |= MIPS_HFLAG_64;
+            }
+            env->hflags |= MIPS_HFLAG_CP0;
             env->hflags &= ~(MIPS_HFLAG_KSU);
         }
         env->hflags &= ~MIPS_HFLAG_BMASK;
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 9d02758..3bd0b02 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -137,6 +137,7 @@
 DEF_HELPER_2(mttc0_ebase, void, env, tl)
 DEF_HELPER_2(mtc0_config0, void, env, tl)
 DEF_HELPER_2(mtc0_config2, void, env, tl)
+DEF_HELPER_2(mtc0_config3, void, env, tl)
 DEF_HELPER_2(mtc0_config4, void, env, tl)
 DEF_HELPER_2(mtc0_config5, void, env, tl)
 DEF_HELPER_2(mtc0_lladdr, void, env, tl)
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
index b08f37f..6e07f6e 100644
--- a/target-mips/msa_helper.c
+++ b/target-mips/msa_helper.c
@@ -1782,15 +1782,14 @@
 
 #define MSA_FLOAT_COND(DEST, OP, ARG1, ARG2, BITS, QUIET)                   \
     do {                                                                    \
+        float_status *status = &env->active_tc.msa_fp_status;               \
         int c;                                                              \
         int64_t cond;                                                       \
-        set_float_exception_flags(0, &env->active_tc.msa_fp_status);        \
+        set_float_exception_flags(0, status);                               \
         if (!QUIET) {                                                       \
-            cond = float ## BITS ## _ ## OP(ARG1, ARG2,                     \
-                                          &env->active_tc.msa_fp_status);   \
+            cond = float ## BITS ## _ ## OP(ARG1, ARG2, status);            \
         } else {                                                            \
-            cond = float ## BITS ## _ ## OP ## _quiet(ARG1, ARG2,           \
-                                          &env->active_tc.msa_fp_status);   \
+            cond = float ## BITS ## _ ## OP ## _quiet(ARG1, ARG2, status);  \
         }                                                                   \
         DEST = cond ? M_MAX_UINT(BITS) : 0;                                 \
         c = update_msacsr(env, CLEAR_IS_INEXACT, 0);                        \
@@ -2375,11 +2374,11 @@
 
 #define MSA_FLOAT_BINOP(DEST, OP, ARG1, ARG2, BITS)                         \
     do {                                                                    \
+        float_status *status = &env->active_tc.msa_fp_status;               \
         int c;                                                              \
                                                                             \
-        set_float_exception_flags(0, &env->active_tc.msa_fp_status);        \
-        DEST = float ## BITS ## _ ## OP(ARG1, ARG2,                         \
-                                        &env->active_tc.msa_fp_status);     \
+        set_float_exception_flags(0, status);                               \
+        DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
                                                                             \
         if (get_enabled_exceptions(env, c)) {                               \
@@ -2511,11 +2510,11 @@
 
 #define MSA_FLOAT_MULADD(DEST, ARG1, ARG2, ARG3, NEGATE, BITS)              \
     do {                                                                    \
+        float_status *status = &env->active_tc.msa_fp_status;               \
         int c;                                                              \
                                                                             \
-        set_float_exception_flags(0, &env->active_tc.msa_fp_status);        \
-        DEST = float ## BITS ## _muladd(ARG2, ARG3, ARG1, NEGATE,           \
-                                        &env->active_tc.msa_fp_status);     \
+        set_float_exception_flags(0, status);                               \
+        DEST = float ## BITS ## _muladd(ARG2, ARG3, ARG1, NEGATE, status);  \
         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
                                                                             \
         if (get_enabled_exceptions(env, c)) {                               \
@@ -2630,10 +2629,11 @@
 
 #define MSA_FLOAT_UNOP(DEST, OP, ARG, BITS)                                 \
     do {                                                                    \
+        float_status *status = &env->active_tc.msa_fp_status;               \
         int c;                                                              \
                                                                             \
-        set_float_exception_flags(0, &env->active_tc.msa_fp_status);        \
-        DEST = float ## BITS ## _ ## OP(ARG, &env->active_tc.msa_fp_status);\
+        set_float_exception_flags(0, status);                               \
+        DEST = float ## BITS ## _ ## OP(ARG, status);                       \
         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
                                                                             \
         if (get_enabled_exceptions(env, c)) {                               \
@@ -2678,10 +2678,11 @@
 
 #define MSA_FLOAT_UNOP_XD(DEST, OP, ARG, BITS, XBITS)                       \
     do {                                                                    \
+        float_status *status = &env->active_tc.msa_fp_status;               \
         int c;                                                              \
                                                                             \
-        set_float_exception_flags(0, &env->active_tc.msa_fp_status);        \
-        DEST = float ## BITS ## _ ## OP(ARG, &env->active_tc.msa_fp_status);\
+        set_float_exception_flags(0, status);                               \
+        DEST = float ## BITS ## _ ## OP(ARG, status);                       \
         c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
                                                                             \
         if (get_enabled_exceptions(env, c)) {                               \
@@ -2728,11 +2729,11 @@
 
 #define MSA_FLOAT_MAXOP(DEST, OP, ARG1, ARG2, BITS)                         \
     do {                                                                    \
+        float_status *status = &env->active_tc.msa_fp_status;               \
         int c;                                                              \
                                                                             \
-        set_float_exception_flags(0, &env->active_tc.msa_fp_status);        \
-        DEST = float ## BITS ## _ ## OP(ARG1, ARG2,                         \
-                                        &env->active_tc.msa_fp_status);     \
+        set_float_exception_flags(0, status);                               \
+        DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
         c = update_msacsr(env, 0, 0);                                       \
                                                                             \
         if (get_enabled_exceptions(env, c)) {                               \
@@ -2924,10 +2925,11 @@
 
 #define MSA_FLOAT_UNOP0(DEST, OP, ARG, BITS)                                \
     do {                                                                    \
+        float_status *status = &env->active_tc.msa_fp_status;               \
         int c;                                                              \
                                                                             \
-        set_float_exception_flags(0, &env->active_tc.msa_fp_status);        \
-        DEST = float ## BITS ## _ ## OP(ARG, &env->active_tc.msa_fp_status);\
+        set_float_exception_flags(0, status);                               \
+        DEST = float ## BITS ## _ ## OP(ARG, status);                       \
         c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
                                                                             \
         if (get_enabled_exceptions(env, c)) {                               \
@@ -3029,11 +3031,11 @@
 
 #define MSA_FLOAT_RECIPROCAL(DEST, ARG, BITS)                               \
     do {                                                                    \
+        float_status *status = &env->active_tc.msa_fp_status;               \
         int c;                                                              \
                                                                             \
-        set_float_exception_flags(0, &env->active_tc.msa_fp_status);        \
-        DEST = float ## BITS ## _ ## div(FLOAT_ONE ## BITS, ARG,            \
-                                         &env->active_tc.msa_fp_status);    \
+        set_float_exception_flags(0, status);                               \
+        DEST = float ## BITS ## _ ## div(FLOAT_ONE ## BITS, ARG, status);   \
         c = update_msacsr(env, float ## BITS ## _is_infinity(ARG) ||        \
                           float ## BITS ## _is_quiet_nan(DEST) ?            \
                           0 : RECIPROCAL_INEXACT,                           \
@@ -3138,23 +3140,20 @@
 
 #define MSA_FLOAT_LOGB(DEST, ARG, BITS)                                     \
     do {                                                                    \
+        float_status *status = &env->active_tc.msa_fp_status;               \
         int c;                                                              \
                                                                             \
-        set_float_exception_flags(0, &env->active_tc.msa_fp_status);        \
-        set_float_rounding_mode(float_round_down,                           \
-                                &env->active_tc.msa_fp_status);             \
-        DEST = float ## BITS ## _ ## log2(ARG,                              \
-                                          &env->active_tc.msa_fp_status);   \
-        DEST = float ## BITS ## _ ## round_to_int(DEST,                     \
-                                          &env->active_tc.msa_fp_status);   \
+        set_float_exception_flags(0, status);                               \
+        set_float_rounding_mode(float_round_down, status);                  \
+        DEST = float ## BITS ## _ ## log2(ARG, status);                     \
+        DEST = float ## BITS ## _ ## round_to_int(DEST, status);            \
         set_float_rounding_mode(ieee_rm[(env->active_tc.msacsr &            \
                                          MSACSR_RM_MASK) >> MSACSR_RM],     \
-                                &env->active_tc.msa_fp_status);             \
+                                status);                                    \
                                                                             \
-        set_float_exception_flags(                                          \
-            get_float_exception_flags(&env->active_tc.msa_fp_status)        \
-                                                & (~float_flag_inexact),    \
-            &env->active_tc.msa_fp_status);                                 \
+        set_float_exception_flags(get_float_exception_flags(status) &       \
+                                  (~float_flag_inexact),                    \
+                                  status);                                  \
                                                                             \
         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
                                                                             \
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 638c9f9..d619ba4 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -625,40 +625,9 @@
 
    These helper call synchronizes the regs for a given cpu.  */
 
-/* Called for updates to CP0_Status.  */
-static void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc)
-{
-    int32_t tcstatus, *tcst;
-    uint32_t v = cpu->CP0_Status;
-    uint32_t cu, mx, asid, ksu;
-    uint32_t mask = ((1 << CP0TCSt_TCU3)
-                       | (1 << CP0TCSt_TCU2)
-                       | (1 << CP0TCSt_TCU1)
-                       | (1 << CP0TCSt_TCU0)
-                       | (1 << CP0TCSt_TMX)
-                       | (3 << CP0TCSt_TKSU)
-                       | (0xff << CP0TCSt_TASID));
-
-    cu = (v >> CP0St_CU0) & 0xf;
-    mx = (v >> CP0St_MX) & 0x1;
-    ksu = (v >> CP0St_KSU) & 0x3;
-    asid = env->CP0_EntryHi & 0xff;
-
-    tcstatus = cu << CP0TCSt_TCU0;
-    tcstatus |= mx << CP0TCSt_TMX;
-    tcstatus |= ksu << CP0TCSt_TKSU;
-    tcstatus |= asid;
-
-    if (tc == cpu->current_tc) {
-        tcst = &cpu->active_tc.CP0_TCStatus;
-    } else {
-        tcst = &cpu->tcs[tc].CP0_TCStatus;
-    }
-
-    *tcst &= ~mask;
-    *tcst |= tcstatus;
-    compute_hflags(cpu);
-}
+/* Called for updates to CP0_Status.  Defined in "cpu.h" for gdbstub.c.  */
+/* static inline void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu,
+                                     int tc);  */
 
 /* Called for updates to CP0_TCStatus.  */
 static void sync_c0_tcstatus(CPUMIPSState *cpu, int tc,
@@ -1420,23 +1389,10 @@
 {
     MIPSCPU *cpu = mips_env_get_cpu(env);
     uint32_t val, old;
-    uint32_t mask = env->CP0_Status_rw_bitmask;
 
-    if (env->insn_flags & ISA_MIPS32R6) {
-        if (extract32(env->CP0_Status, CP0St_KSU, 2) == 0x3) {
-            mask &= ~(3 << CP0St_KSU);
-        }
-        mask &= ~(0x00180000 & arg1);
-    }
-
-    val = arg1 & mask;
     old = env->CP0_Status;
-    env->CP0_Status = (env->CP0_Status & ~mask) | val;
-    if (env->CP0_Config3 & (1 << CP0C3_MT)) {
-        sync_c0_status(env, env, env->current_tc);
-    } else {
-        compute_hflags(env);
-    }
+    cpu_mips_store_status(env, arg1);
+    val = env->CP0_Status;
 
     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
         qemu_log("Status %08x (%08x) => %08x (%08x) Cause %08x",
@@ -1457,9 +1413,10 @@
 void helper_mttc0_status(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    uint32_t mask = env->CP0_Status_rw_bitmask & ~0xf1000018;
     CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    other->CP0_Status = arg1 & ~0xf1000018;
+    other->CP0_Status = (other->CP0_Status & ~mask) | (arg1 & mask);
     sync_c0_status(env, other, other_tc);
 }
 
@@ -1475,40 +1432,9 @@
     env->CP0_SRSCtl = (env->CP0_SRSCtl & ~mask) | (arg1 & mask);
 }
 
-static void mtc0_cause(CPUMIPSState *cpu, target_ulong arg1)
-{
-    uint32_t mask = 0x00C00300;
-    uint32_t old = cpu->CP0_Cause;
-    int i;
-
-    if (cpu->insn_flags & ISA_MIPS32R2) {
-        mask |= 1 << CP0Ca_DC;
-    }
-    if (cpu->insn_flags & ISA_MIPS32R6) {
-        mask &= ~((1 << CP0Ca_WP) & arg1);
-    }
-
-    cpu->CP0_Cause = (cpu->CP0_Cause & ~mask) | (arg1 & mask);
-
-    if ((old ^ cpu->CP0_Cause) & (1 << CP0Ca_DC)) {
-        if (cpu->CP0_Cause & (1 << CP0Ca_DC)) {
-            cpu_mips_stop_count(cpu);
-        } else {
-            cpu_mips_start_count(cpu);
-        }
-    }
-
-    /* Set/reset software interrupts */
-    for (i = 0 ; i < 2 ; i++) {
-        if ((old ^ cpu->CP0_Cause) & (1 << (CP0Ca_IP + i))) {
-            cpu_mips_soft_irq(cpu, i, cpu->CP0_Cause & (1 << (CP0Ca_IP + i)));
-        }
-    }
-}
-
 void helper_mtc0_cause(CPUMIPSState *env, target_ulong arg1)
 {
-    mtc0_cause(env, arg1);
+    cpu_mips_store_cause(env, arg1);
 }
 
 void helper_mttc0_cause(CPUMIPSState *env, target_ulong arg1)
@@ -1516,7 +1442,7 @@
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    mtc0_cause(other, arg1);
+    cpu_mips_store_cause(other, arg1);
 }
 
 target_ulong helper_mftc0_epc(CPUMIPSState *env)
@@ -1578,6 +1504,14 @@
     env->CP0_Config2 = (env->CP0_Config2 & 0x8FFF0FFF);
 }
 
+void helper_mtc0_config3(CPUMIPSState *env, target_ulong arg1)
+{
+    if (env->insn_flags & ASE_MICROMIPS) {
+        env->CP0_Config3 = (env->CP0_Config3 & ~(1 << CP0C3_ISA_ON_EXC)) |
+                           (arg1 & (1 << CP0C3_ISA_ON_EXC));
+    }
+}
+
 void helper_mtc0_config4(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Config4 = (env->CP0_Config4 & (~env->CP0_Config4_rw_bitmask)) |
@@ -2346,18 +2280,6 @@
     float_round_down
 };
 
-static inline void restore_rounding_mode(CPUMIPSState *env)
-{
-    set_float_rounding_mode(ieee_rm[env->active_fpu.fcr31 & 3],
-                            &env->active_fpu.fp_status);
-}
-
-static inline void restore_flush_mode(CPUMIPSState *env)
-{
-    set_flush_to_zero((env->active_fpu.fcr31 & (1 << 24)) != 0,
-                      &env->active_fpu.fp_status);
-}
-
 target_ulong helper_cfc1(CPUMIPSState *env, uint32_t reg)
 {
     target_ulong arg1 = 0;
@@ -2659,11 +2581,11 @@
     uint32_t wt2;
 
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
     if (get_float_exception_flags(&env->active_fpu.fp_status)
         & (float_flag_invalid | float_flag_overflow)) {
         wt2 = FP_TO_INT32_OVERFLOW;
     }
+    update_fcr31(env, GETPC());
     return wt2;
 }
 
@@ -2935,110 +2857,6 @@
 FLOAT_UNOP(chs)
 #undef FLOAT_UNOP
 
-#define FLOAT_FMADDSUB(name, bits, muladd_arg)                          \
-uint ## bits ## _t helper_float_ ## name (CPUMIPSState *env,            \
-                                          uint ## bits ## _t fs,        \
-                                          uint ## bits ## _t ft,        \
-                                          uint ## bits ## _t fd)        \
-{                                                                       \
-    uint ## bits ## _t fdret;                                           \
-                                                                        \
-    fdret = float ## bits ## _muladd(fs, ft, fd, muladd_arg,            \
-                                     &env->active_fpu.fp_status);       \
-    update_fcr31(env, GETPC());                                         \
-    return fdret;                                                       \
-}
-
-FLOAT_FMADDSUB(maddf_s, 32, 0)
-FLOAT_FMADDSUB(maddf_d, 64, 0)
-FLOAT_FMADDSUB(msubf_s, 32, float_muladd_negate_product)
-FLOAT_FMADDSUB(msubf_d, 64, float_muladd_negate_product)
-#undef FLOAT_FMADDSUB
-
-#define FLOAT_MINMAX(name, bits, minmaxfunc)                            \
-uint ## bits ## _t helper_float_ ## name (CPUMIPSState *env,            \
-                                          uint ## bits ## _t fs,        \
-                                          uint ## bits ## _t ft)        \
-{                                                                       \
-    uint ## bits ## _t fdret;                                           \
-                                                                        \
-    fdret = float ## bits ## _ ## minmaxfunc(fs, ft,                    \
-                                           &env->active_fpu.fp_status); \
-    update_fcr31(env, GETPC());                                         \
-    return fdret;                                                       \
-}
-
-FLOAT_MINMAX(max_s, 32, maxnum)
-FLOAT_MINMAX(max_d, 64, maxnum)
-FLOAT_MINMAX(maxa_s, 32, maxnummag)
-FLOAT_MINMAX(maxa_d, 64, maxnummag)
-
-FLOAT_MINMAX(min_s, 32, minnum)
-FLOAT_MINMAX(min_d, 64, minnum)
-FLOAT_MINMAX(mina_s, 32, minnummag)
-FLOAT_MINMAX(mina_d, 64, minnummag)
-#undef FLOAT_MINMAX
-
-#define FLOAT_RINT(name, bits)                                              \
-uint ## bits ## _t helper_float_ ## name (CPUMIPSState *env,                \
-                                          uint ## bits ## _t fs)            \
-{                                                                           \
-    uint ## bits ## _t fdret;                                               \
-                                                                            \
-    fdret = float ## bits ## _round_to_int(fs, &env->active_fpu.fp_status); \
-    update_fcr31(env, GETPC());                                             \
-    return fdret;                                                           \
-}
-
-FLOAT_RINT(rint_s, 32)
-FLOAT_RINT(rint_d, 64)
-#undef FLOAT_RINT
-
-#define FLOAT_CLASS_SIGNALING_NAN      0x001
-#define FLOAT_CLASS_QUIET_NAN          0x002
-#define FLOAT_CLASS_NEGATIVE_INFINITY  0x004
-#define FLOAT_CLASS_NEGATIVE_NORMAL    0x008
-#define FLOAT_CLASS_NEGATIVE_SUBNORMAL 0x010
-#define FLOAT_CLASS_NEGATIVE_ZERO      0x020
-#define FLOAT_CLASS_POSITIVE_INFINITY  0x040
-#define FLOAT_CLASS_POSITIVE_NORMAL    0x080
-#define FLOAT_CLASS_POSITIVE_SUBNORMAL 0x100
-#define FLOAT_CLASS_POSITIVE_ZERO      0x200
-
-#define FLOAT_CLASS(name, bits)                                      \
-uint ## bits ## _t helper_float_ ## name (uint ## bits ## _t arg)    \
-{                                                                    \
-    if (float ## bits ## _is_signaling_nan(arg)) {                   \
-        return FLOAT_CLASS_SIGNALING_NAN;                            \
-    } else if (float ## bits ## _is_quiet_nan(arg)) {                \
-        return FLOAT_CLASS_QUIET_NAN;                                \
-    } else if (float ## bits ## _is_neg(arg)) {                      \
-        if (float ## bits ## _is_infinity(arg)) {                    \
-            return FLOAT_CLASS_NEGATIVE_INFINITY;                    \
-        } else if (float ## bits ## _is_zero(arg)) {                 \
-            return FLOAT_CLASS_NEGATIVE_ZERO;                        \
-        } else if (float ## bits ## _is_zero_or_denormal(arg)) {     \
-            return FLOAT_CLASS_NEGATIVE_SUBNORMAL;                   \
-        } else {                                                     \
-            return FLOAT_CLASS_NEGATIVE_NORMAL;                      \
-        }                                                            \
-    } else {                                                         \
-        if (float ## bits ## _is_infinity(arg)) {                    \
-            return FLOAT_CLASS_POSITIVE_INFINITY;                    \
-        } else if (float ## bits ## _is_zero(arg)) {                 \
-            return FLOAT_CLASS_POSITIVE_ZERO;                        \
-        } else if (float ## bits ## _is_zero_or_denormal(arg)) {     \
-            return FLOAT_CLASS_POSITIVE_SUBNORMAL;                   \
-        } else {                                                     \
-            return FLOAT_CLASS_POSITIVE_NORMAL;                      \
-        }                                                            \
-    }                                                                \
-}
-
-FLOAT_CLASS(class_s, 32)
-FLOAT_CLASS(class_d, 64)
-#undef FLOAT_CLASS
-
 /* MIPS specific unary operations */
 uint64_t helper_float_recip_d(CPUMIPSState *env, uint64_t fdt0)
 {
@@ -3140,7 +2958,65 @@
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-#define FLOAT_OP(name, p) void helper_float_##name##_##p(CPUMIPSState *env)
+#define FLOAT_RINT(name, bits)                                              \
+uint ## bits ## _t helper_float_ ## name (CPUMIPSState *env,                \
+                                          uint ## bits ## _t fs)            \
+{                                                                           \
+    uint ## bits ## _t fdret;                                               \
+                                                                            \
+    fdret = float ## bits ## _round_to_int(fs, &env->active_fpu.fp_status); \
+    update_fcr31(env, GETPC());                                             \
+    return fdret;                                                           \
+}
+
+FLOAT_RINT(rint_s, 32)
+FLOAT_RINT(rint_d, 64)
+#undef FLOAT_RINT
+
+#define FLOAT_CLASS_SIGNALING_NAN      0x001
+#define FLOAT_CLASS_QUIET_NAN          0x002
+#define FLOAT_CLASS_NEGATIVE_INFINITY  0x004
+#define FLOAT_CLASS_NEGATIVE_NORMAL    0x008
+#define FLOAT_CLASS_NEGATIVE_SUBNORMAL 0x010
+#define FLOAT_CLASS_NEGATIVE_ZERO      0x020
+#define FLOAT_CLASS_POSITIVE_INFINITY  0x040
+#define FLOAT_CLASS_POSITIVE_NORMAL    0x080
+#define FLOAT_CLASS_POSITIVE_SUBNORMAL 0x100
+#define FLOAT_CLASS_POSITIVE_ZERO      0x200
+
+#define FLOAT_CLASS(name, bits)                                      \
+uint ## bits ## _t helper_float_ ## name (uint ## bits ## _t arg)    \
+{                                                                    \
+    if (float ## bits ## _is_signaling_nan(arg)) {                   \
+        return FLOAT_CLASS_SIGNALING_NAN;                            \
+    } else if (float ## bits ## _is_quiet_nan(arg)) {                \
+        return FLOAT_CLASS_QUIET_NAN;                                \
+    } else if (float ## bits ## _is_neg(arg)) {                      \
+        if (float ## bits ## _is_infinity(arg)) {                    \
+            return FLOAT_CLASS_NEGATIVE_INFINITY;                    \
+        } else if (float ## bits ## _is_zero(arg)) {                 \
+            return FLOAT_CLASS_NEGATIVE_ZERO;                        \
+        } else if (float ## bits ## _is_zero_or_denormal(arg)) {     \
+            return FLOAT_CLASS_NEGATIVE_SUBNORMAL;                   \
+        } else {                                                     \
+            return FLOAT_CLASS_NEGATIVE_NORMAL;                      \
+        }                                                            \
+    } else {                                                         \
+        if (float ## bits ## _is_infinity(arg)) {                    \
+            return FLOAT_CLASS_POSITIVE_INFINITY;                    \
+        } else if (float ## bits ## _is_zero(arg)) {                 \
+            return FLOAT_CLASS_POSITIVE_ZERO;                        \
+        } else if (float ## bits ## _is_zero_or_denormal(arg)) {     \
+            return FLOAT_CLASS_POSITIVE_SUBNORMAL;                   \
+        } else {                                                     \
+            return FLOAT_CLASS_POSITIVE_NORMAL;                      \
+        }                                                            \
+    }                                                                \
+}
+
+FLOAT_CLASS(class_s, 32)
+FLOAT_CLASS(class_d, 64)
+#undef FLOAT_CLASS
 
 /* binary operations */
 #define FLOAT_BINOP(name)                                          \
@@ -3187,61 +3063,6 @@
 FLOAT_BINOP(div)
 #undef FLOAT_BINOP
 
-#define UNFUSED_FMA(prefix, a, b, c, flags)                          \
-{                                                                    \
-    a = prefix##_mul(a, b, &env->active_fpu.fp_status);              \
-    if ((flags) & float_muladd_negate_c) {                           \
-        a = prefix##_sub(a, c, &env->active_fpu.fp_status);          \
-    } else {                                                         \
-        a = prefix##_add(a, c, &env->active_fpu.fp_status);          \
-    }                                                                \
-    if ((flags) & float_muladd_negate_result) {                      \
-        a = prefix##_chs(a);                                         \
-    }                                                                \
-}
-
-/* FMA based operations */
-#define FLOAT_FMA(name, type)                                        \
-uint64_t helper_float_ ## name ## _d(CPUMIPSState *env,              \
-                                     uint64_t fdt0, uint64_t fdt1,   \
-                                     uint64_t fdt2)                  \
-{                                                                    \
-    UNFUSED_FMA(float64, fdt0, fdt1, fdt2, type);                    \
-    update_fcr31(env, GETPC());                                      \
-    return fdt0;                                                     \
-}                                                                    \
-                                                                     \
-uint32_t helper_float_ ## name ## _s(CPUMIPSState *env,              \
-                                     uint32_t fst0, uint32_t fst1,   \
-                                     uint32_t fst2)                  \
-{                                                                    \
-    UNFUSED_FMA(float32, fst0, fst1, fst2, type);                    \
-    update_fcr31(env, GETPC());                                      \
-    return fst0;                                                     \
-}                                                                    \
-                                                                     \
-uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,             \
-                                      uint64_t fdt0, uint64_t fdt1,  \
-                                      uint64_t fdt2)                 \
-{                                                                    \
-    uint32_t fst0 = fdt0 & 0XFFFFFFFF;                               \
-    uint32_t fsth0 = fdt0 >> 32;                                     \
-    uint32_t fst1 = fdt1 & 0XFFFFFFFF;                               \
-    uint32_t fsth1 = fdt1 >> 32;                                     \
-    uint32_t fst2 = fdt2 & 0XFFFFFFFF;                               \
-    uint32_t fsth2 = fdt2 >> 32;                                     \
-                                                                     \
-    UNFUSED_FMA(float32, fst0, fst1, fst2, type);                    \
-    UNFUSED_FMA(float32, fsth0, fsth1, fsth2, type);                 \
-    update_fcr31(env, GETPC());                                      \
-    return ((uint64_t)fsth0 << 32) | fst0;                           \
-}
-FLOAT_FMA(madd, 0)
-FLOAT_FMA(msub, float_muladd_negate_c)
-FLOAT_FMA(nmadd, float_muladd_negate_result)
-FLOAT_FMA(nmsub, float_muladd_negate_result | float_muladd_negate_c)
-#undef FLOAT_FMA
-
 /* MIPS specific binary operations */
 uint64_t helper_float_recip2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
@@ -3339,6 +3160,106 @@
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
+#define FLOAT_MINMAX(name, bits, minmaxfunc)                            \
+uint ## bits ## _t helper_float_ ## name (CPUMIPSState *env,            \
+                                          uint ## bits ## _t fs,        \
+                                          uint ## bits ## _t ft)        \
+{                                                                       \
+    uint ## bits ## _t fdret;                                           \
+                                                                        \
+    fdret = float ## bits ## _ ## minmaxfunc(fs, ft,                    \
+                                           &env->active_fpu.fp_status); \
+    update_fcr31(env, GETPC());                                         \
+    return fdret;                                                       \
+}
+
+FLOAT_MINMAX(max_s, 32, maxnum)
+FLOAT_MINMAX(max_d, 64, maxnum)
+FLOAT_MINMAX(maxa_s, 32, maxnummag)
+FLOAT_MINMAX(maxa_d, 64, maxnummag)
+
+FLOAT_MINMAX(min_s, 32, minnum)
+FLOAT_MINMAX(min_d, 64, minnum)
+FLOAT_MINMAX(mina_s, 32, minnummag)
+FLOAT_MINMAX(mina_d, 64, minnummag)
+#undef FLOAT_MINMAX
+
+/* ternary operations */
+#define UNFUSED_FMA(prefix, a, b, c, flags)                          \
+{                                                                    \
+    a = prefix##_mul(a, b, &env->active_fpu.fp_status);              \
+    if ((flags) & float_muladd_negate_c) {                           \
+        a = prefix##_sub(a, c, &env->active_fpu.fp_status);          \
+    } else {                                                         \
+        a = prefix##_add(a, c, &env->active_fpu.fp_status);          \
+    }                                                                \
+    if ((flags) & float_muladd_negate_result) {                      \
+        a = prefix##_chs(a);                                         \
+    }                                                                \
+}
+
+/* FMA based operations */
+#define FLOAT_FMA(name, type)                                        \
+uint64_t helper_float_ ## name ## _d(CPUMIPSState *env,              \
+                                     uint64_t fdt0, uint64_t fdt1,   \
+                                     uint64_t fdt2)                  \
+{                                                                    \
+    UNFUSED_FMA(float64, fdt0, fdt1, fdt2, type);                    \
+    update_fcr31(env, GETPC());                                      \
+    return fdt0;                                                     \
+}                                                                    \
+                                                                     \
+uint32_t helper_float_ ## name ## _s(CPUMIPSState *env,              \
+                                     uint32_t fst0, uint32_t fst1,   \
+                                     uint32_t fst2)                  \
+{                                                                    \
+    UNFUSED_FMA(float32, fst0, fst1, fst2, type);                    \
+    update_fcr31(env, GETPC());                                      \
+    return fst0;                                                     \
+}                                                                    \
+                                                                     \
+uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,             \
+                                      uint64_t fdt0, uint64_t fdt1,  \
+                                      uint64_t fdt2)                 \
+{                                                                    \
+    uint32_t fst0 = fdt0 & 0XFFFFFFFF;                               \
+    uint32_t fsth0 = fdt0 >> 32;                                     \
+    uint32_t fst1 = fdt1 & 0XFFFFFFFF;                               \
+    uint32_t fsth1 = fdt1 >> 32;                                     \
+    uint32_t fst2 = fdt2 & 0XFFFFFFFF;                               \
+    uint32_t fsth2 = fdt2 >> 32;                                     \
+                                                                     \
+    UNFUSED_FMA(float32, fst0, fst1, fst2, type);                    \
+    UNFUSED_FMA(float32, fsth0, fsth1, fsth2, type);                 \
+    update_fcr31(env, GETPC());                                      \
+    return ((uint64_t)fsth0 << 32) | fst0;                           \
+}
+FLOAT_FMA(madd, 0)
+FLOAT_FMA(msub, float_muladd_negate_c)
+FLOAT_FMA(nmadd, float_muladd_negate_result)
+FLOAT_FMA(nmsub, float_muladd_negate_result | float_muladd_negate_c)
+#undef FLOAT_FMA
+
+#define FLOAT_FMADDSUB(name, bits, muladd_arg)                          \
+uint ## bits ## _t helper_float_ ## name (CPUMIPSState *env,            \
+                                          uint ## bits ## _t fs,        \
+                                          uint ## bits ## _t ft,        \
+                                          uint ## bits ## _t fd)        \
+{                                                                       \
+    uint ## bits ## _t fdret;                                           \
+                                                                        \
+    fdret = float ## bits ## _muladd(fs, ft, fd, muladd_arg,            \
+                                     &env->active_fpu.fp_status);       \
+    update_fcr31(env, GETPC());                                         \
+    return fdret;                                                       \
+}
+
+FLOAT_FMADDSUB(maddf_s, 32, 0)
+FLOAT_FMADDSUB(maddf_d, 64, 0)
+FLOAT_FMADDSUB(msubf_s, 32, float_muladd_negate_product)
+FLOAT_FMADDSUB(msubf_d, 64, float_muladd_negate_product)
+#undef FLOAT_FMADDSUB
+
 /* compare operations */
 #define FOP_COND_D(op, cond)                                   \
 void helper_cmp_d_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
diff --git a/target-mips/translate.c b/target-mips/translate.c
index f0b8e6f..1205909 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -1882,10 +1882,8 @@
 {                                                                       \
     TCGv_i ## bits fp0 = tcg_temp_new_i ## bits();                      \
     TCGv_i ## bits fp1 = tcg_temp_new_i ## bits();                      \
-    switch (ifmt) {                                                     \
-    case FMT_D:                                                         \
+    if (ifmt == FMT_D) {                                                \
         check_cp1_registers(ctx, fs | ft | fd);                         \
-        break;                                                          \
     }                                                                   \
     gen_ldcmp_fpr ## bits(ctx, fp0, fs);                                \
     gen_ldcmp_fpr ## bits(ctx, fp1, ft);                                \
@@ -2398,7 +2396,14 @@
 {
     if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
         check_cp1_enabled(ctx);
-        gen_flt_ldst(ctx, op, rt, rs, imm);
+        switch (op) {
+        case OPC_LDC1:
+        case OPC_SDC1:
+            check_insn(ctx, ISA_MIPS2);
+            /* Fallthrough */
+        default:
+            gen_flt_ldst(ctx, op, rt, rs, imm);
+        }
     } else {
         generate_exception_err(ctx, EXCP_CpU, 1);
     }
@@ -5846,8 +5851,10 @@
             ctx->bstate = BS_STOP;
             break;
         case 3:
-            /* ignored, read only */
+            gen_helper_mtc0_config3(cpu_env, arg);
             rn = "Config3";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->bstate = BS_STOP;
             break;
         case 4:
             gen_helper_mtc0_config4(cpu_env, arg);
@@ -7097,8 +7104,10 @@
             ctx->bstate = BS_STOP;
             break;
         case 3:
-            /* ignored */
+            gen_helper_mtc0_config3(cpu_env, arg);
             rn = "Config3";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->bstate = BS_STOP;
             break;
         case 4:
             /* currently ignored */
@@ -10717,6 +10726,7 @@
 {
     TCGv t0 = tcg_temp_new();
     TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
     int args, astatic;
 
     switch (aregs) {
@@ -10775,7 +10785,8 @@
     gen_load_gpr(t0, 29);
 
 #define DECR_AND_STORE(reg) do {                                 \
-        tcg_gen_subi_tl(t0, t0, 4);                              \
+        tcg_gen_movi_tl(t2, -4);                                 \
+        gen_op_addr_add(ctx, t0, t0, t2);                        \
         gen_load_gpr(t1, reg);                                   \
         tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); \
     } while (0)
@@ -10859,9 +10870,11 @@
     }
 #undef DECR_AND_STORE
 
-    tcg_gen_subi_tl(cpu_gpr[29], cpu_gpr[29], framesize);
+    tcg_gen_movi_tl(t2, -framesize);
+    gen_op_addr_add(ctx, cpu_gpr[29], cpu_gpr[29], t2);
     tcg_temp_free(t0);
     tcg_temp_free(t1);
+    tcg_temp_free(t2);
 }
 
 static void gen_mips16_restore (DisasContext *ctx,
@@ -10872,11 +10885,14 @@
     int astatic;
     TCGv t0 = tcg_temp_new();
     TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
 
-    tcg_gen_addi_tl(t0, cpu_gpr[29], framesize);
+    tcg_gen_movi_tl(t2, framesize);
+    gen_op_addr_add(ctx, t0, cpu_gpr[29], t2);
 
 #define DECR_AND_LOAD(reg) do {                            \
-        tcg_gen_subi_tl(t0, t0, 4);                        \
+        tcg_gen_movi_tl(t2, -4);                           \
+        gen_op_addr_add(ctx, t0, t0, t2);                  \
         tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); \
         gen_store_gpr(t1, reg);                            \
     } while (0)
@@ -10960,9 +10976,11 @@
     }
 #undef DECR_AND_LOAD
 
-    tcg_gen_addi_tl(cpu_gpr[29], cpu_gpr[29], framesize);
+    tcg_gen_movi_tl(t2, framesize);
+    gen_op_addr_add(ctx, cpu_gpr[29], cpu_gpr[29], t2);
     tcg_temp_free(t0);
     tcg_temp_free(t1);
+    tcg_temp_free(t2);
 }
 
 static void gen_addiupc (DisasContext *ctx, int rx, int imm,
@@ -10993,26 +11011,32 @@
 {
     switch (funct) {
     case I64_LDSP:
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
         offset = extended ? offset : offset << 3;
         gen_ld(ctx, OPC_LD, ry, 29, offset);
         break;
     case I64_SDSP:
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
         offset = extended ? offset : offset << 3;
         gen_st(ctx, OPC_SD, ry, 29, offset);
         break;
     case I64_SDRASP:
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
         offset = extended ? offset : (ctx->opcode & 0xff) << 3;
         gen_st(ctx, OPC_SD, 31, 29, offset);
         break;
     case I64_DADJSP:
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
         offset = extended ? offset : ((int8_t)ctx->opcode) << 3;
         gen_arith_imm(ctx, OPC_DADDIU, 29, 29, offset);
         break;
     case I64_LDPC:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
         if (extended && (ctx->hflags & MIPS_HFLAG_BMASK)) {
             generate_exception(ctx, EXCP_RI);
         } else {
@@ -11021,16 +11045,19 @@
         }
         break;
     case I64_DADDIU5:
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
         offset = extended ? offset : ((int8_t)(offset << 3)) >> 3;
         gen_arith_imm(ctx, OPC_DADDIU, ry, ry, offset);
         break;
     case I64_DADDIUPC:
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
         offset = extended ? offset : offset << 2;
         gen_addiupc(ctx, ry, offset, 1, extended);
         break;
     case I64_DADDIUSP:
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
         offset = extended ? offset : offset << 2;
         gen_arith_imm(ctx, OPC_DADDIU, ry, 29, offset);
@@ -11099,7 +11126,8 @@
         break;
 #if defined(TARGET_MIPS64)
     case M16_OPC_LD:
-            check_mips_64(ctx);
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
         gen_ld(ctx, OPC_LD, ry, rx, offset);
         break;
 #endif
@@ -11143,6 +11171,7 @@
             gen_arith_imm(ctx, OPC_ADDIU, 29, 29, imm);
             break;
         case I8_SVRS:
+            check_insn(ctx, ISA_MIPS32);
             {
                 int xsregs = (ctx->opcode >> 24) & 0x7;
                 int aregs = (ctx->opcode >> 16) & 0xf;
@@ -11176,6 +11205,8 @@
         break;
 #if defined(TARGET_MIPS64)
     case M16_OPC_SD:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
         gen_st(ctx, OPC_SD, ry, rx, offset);
         break;
 #endif
@@ -11202,6 +11233,8 @@
         break;
 #if defined(TARGET_MIPS64)
     case M16_OPC_LWU:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
         gen_ld(ctx, OPC_LWU, ry, rx, offset);
         break;
 #endif
@@ -11291,6 +11324,7 @@
             break;
         case 0x1:
 #if defined(TARGET_MIPS64)
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
             gen_shift_imm(ctx, OPC_DSLL, rx, ry, sa);
 #else
@@ -11307,6 +11341,7 @@
         break;
 #if defined(TARGET_MIPS64)
     case M16_OPC_LD:
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
         gen_ld(ctx, OPC_LD, ry, rx, offset << 3);
         break;
@@ -11317,6 +11352,7 @@
 
             if ((ctx->opcode >> 4) & 1) {
 #if defined(TARGET_MIPS64)
+                check_insn(ctx, ISA_MIPS3);
                 check_mips_64(ctx);
                 gen_arith_imm(ctx, OPC_DADDIU, ry, rx, imm);
 #else
@@ -11368,6 +11404,7 @@
                               ((int8_t)ctx->opcode) << 3);
                 break;
             case I8_SVRS:
+                check_insn(ctx, ISA_MIPS32);
                 {
                     int do_ra = ctx->opcode & (1 << 6);
                     int do_s0 = ctx->opcode & (1 << 5);
@@ -11423,6 +11460,7 @@
         break;
 #if defined(TARGET_MIPS64)
     case M16_OPC_SD:
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
         gen_st(ctx, OPC_SD, ry, rx, offset << 3);
         break;
@@ -11450,6 +11488,7 @@
         break;
 #if defined (TARGET_MIPS64)
     case M16_OPC_LWU:
+        check_insn(ctx, ISA_MIPS3);
         check_mips_64(ctx);
         gen_ld(ctx, OPC_LWU, ry, rx, offset << 2);
         break;
@@ -11481,10 +11520,12 @@
 #if defined(TARGET_MIPS64)
             case RRR_DADDU:
                 mips32_op = OPC_DADDU;
+                check_insn(ctx, ISA_MIPS3);
                 check_mips_64(ctx);
                 break;
             case RRR_DSUBU:
                 mips32_op = OPC_DSUBU;
+                check_insn(ctx, ISA_MIPS3);
                 check_mips_64(ctx);
                 break;
 #endif
@@ -11506,6 +11547,10 @@
                 int link = (ctx->opcode >> 6) & 0x1;
                 int ra = (ctx->opcode >> 5) & 0x1;
 
+                if (nd) {
+                    check_insn(ctx, ISA_MIPS32);
+                }
+
                 if (link) {
                     op = OPC_JALR;
                 } else {
@@ -11547,6 +11592,7 @@
             break;
 #if defined (TARGET_MIPS64)
         case RR_DSRL:
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
             gen_shift_imm(ctx, OPC_DSRL, ry, ry, sa);
             break;
@@ -11573,6 +11619,7 @@
             gen_HILO(ctx, OPC_MFHI, 0, rx);
             break;
         case RR_CNVT:
+            check_insn(ctx, ISA_MIPS32);
             switch (cnvt_op) {
             case RR_RY_CNVT_ZEB:
                 tcg_gen_ext8u_tl(cpu_gpr[rx], cpu_gpr[rx]);
@@ -11588,10 +11635,12 @@
                 break;
 #if defined (TARGET_MIPS64)
             case RR_RY_CNVT_ZEW:
+                check_insn(ctx, ISA_MIPS64);
                 check_mips_64(ctx);
                 tcg_gen_ext32u_tl(cpu_gpr[rx], cpu_gpr[rx]);
                 break;
             case RR_RY_CNVT_SEW:
+                check_insn(ctx, ISA_MIPS64);
                 check_mips_64(ctx);
                 tcg_gen_ext32s_tl(cpu_gpr[rx], cpu_gpr[rx]);
                 break;
@@ -11606,18 +11655,22 @@
             break;
 #if defined (TARGET_MIPS64)
         case RR_DSRA:
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
             gen_shift_imm(ctx, OPC_DSRA, ry, ry, sa);
             break;
         case RR_DSLLV:
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
             gen_shift(ctx, OPC_DSLLV, ry, rx, ry);
             break;
         case RR_DSRLV:
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
             gen_shift(ctx, OPC_DSRLV, ry, rx, ry);
             break;
         case RR_DSRAV:
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
             gen_shift(ctx, OPC_DSRAV, ry, rx, ry);
             break;
@@ -11636,18 +11689,22 @@
             break;
 #if defined (TARGET_MIPS64)
         case RR_DMULT:
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
             gen_muldiv(ctx, OPC_DMULT, 0, rx, ry);
             break;
         case RR_DMULTU:
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
             gen_muldiv(ctx, OPC_DMULTU, 0, rx, ry);
             break;
         case RR_DDIV:
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
             gen_muldiv(ctx, OPC_DDIV, 0, rx, ry);
             break;
         case RR_DDIVU:
+            check_insn(ctx, ISA_MIPS3);
             check_mips_64(ctx);
             gen_muldiv(ctx, OPC_DDIVU, 0, rx, ry);
             break;
@@ -13212,20 +13269,26 @@
             /* COP2: Not implemented. */
             generate_exception_err(ctx, EXCP_CpU, 2);
             break;
-        case LWP:
-        case SWP:
 #ifdef TARGET_MIPS64
         case LDP:
         case SDP:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            /* Fallthrough */
 #endif
+        case LWP:
+        case SWP:
             gen_ldst_pair(ctx, minor, rt, rs, SIMM(ctx->opcode, 0, 12));
             break;
-        case LWM32:
-        case SWM32:
 #ifdef TARGET_MIPS64
         case LDM:
         case SDM:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            /* Fallthrough */
 #endif
+        case LWM32:
+        case SWM32:
             gen_ldst_multiple(ctx, minor, rt, rs, SIMM(ctx->opcode, 0, 12));
             break;
         default:
@@ -13649,21 +13712,33 @@
             goto do_st_lr;
 #if defined(TARGET_MIPS64)
         case LDL:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
             mips32_op = OPC_LDL;
             goto do_ld_lr;
         case SDL:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
             mips32_op = OPC_SDL;
             goto do_st_lr;
         case LDR:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
             mips32_op = OPC_LDR;
             goto do_ld_lr;
         case SDR:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
             mips32_op = OPC_SDR;
             goto do_st_lr;
         case LWU:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
             mips32_op = OPC_LWU;
             goto do_ld_lr;
         case LLD:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
             mips32_op = OPC_LLD;
             goto do_ld_lr;
 #endif
@@ -13681,6 +13756,8 @@
             break;
 #if defined(TARGET_MIPS64)
         case SCD:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
             gen_st_cond(ctx, OPC_SCD, rt, rs, SIMM(ctx->opcode, 0, 12));
             break;
 #endif
@@ -13790,9 +13867,13 @@
         goto do_ld;
 #ifdef TARGET_MIPS64
     case LD32:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
         mips32_op = OPC_LD;
         goto do_ld;
     case SD32:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
         mips32_op = OPC_SD;
         goto do_st;
 #endif
@@ -13936,8 +14017,8 @@
             rs = rs_rt_enc[enc_rs];
             rt = rs_rt_enc[enc_rt];
 
-            gen_arith_imm(ctx, OPC_ADDIU, rd, rs, 0);
-            gen_arith_imm(ctx, OPC_ADDIU, re, rt, 0);
+            gen_arith(ctx, OPC_ADDU, rd, rs, 0);
+            gen_arith(ctx, OPC_ADDU, re, rt, 0);
         }
         break;
     case LBU16:
@@ -14018,7 +14099,7 @@
             int rd = uMIPS_RD5(ctx->opcode);
             int rs = uMIPS_RS5(ctx->opcode);
 
-            gen_arith_imm(ctx, OPC_ADDIU, rd, rs, 0);
+            gen_arith(ctx, OPC_ADDU, rd, rs, 0);
         }
         break;
     case ANDI16:
@@ -16322,6 +16403,7 @@
         break;
     case OPC_TGE ... OPC_TEQ: /* Traps */
     case OPC_TNE:
+        check_insn(ctx, ISA_MIPS2);
         gen_trap(ctx, op1, rs, rt, -1);
         break;
     case OPC_LSA: /* OPC_PMON */
@@ -16346,6 +16428,7 @@
         generate_exception(ctx, EXCP_BREAK);
         break;
     case OPC_SYNC:
+        check_insn(ctx, ISA_MIPS2);
         /* Treat as NOP. */
         break;
 
@@ -18340,7 +18423,7 @@
 
 }
 
-static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
+static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
 {
     int32_t offset;
     int rs, rt, rd, sa;
@@ -18392,7 +18475,9 @@
         case OPC_BGEZL:
         case OPC_BLTZALL:
         case OPC_BGEZALL:
+            check_insn(ctx, ISA_MIPS2);
             check_insn_opc_removed(ctx, ISA_MIPS32R6);
+            /* Fallthrough */
         case OPC_BLTZ:
         case OPC_BGEZ:
             gen_compute_branch(ctx, op1, 4, rs, -1, imm << 2, 4);
@@ -18412,6 +18497,7 @@
             break;
         case OPC_TGEI ... OPC_TEQI: /* REGIMM traps */
         case OPC_TNEI:
+            check_insn(ctx, ISA_MIPS2);
             check_insn_opc_removed(ctx, ISA_MIPS32R6);
             gen_trap(ctx, op1, rs, -1, imm);
             break;
@@ -18506,7 +18592,8 @@
                     save_cpu_state(ctx, 1);
                     gen_helper_di(t0, cpu_env);
                     gen_store_gpr(t0, rt);
-                    /* Stop translation as we may have switched the execution mode */
+                    /* Stop translation as we may have switched
+                       the execution mode.  */
                     ctx->bstate = BS_STOP;
                     break;
                 case OPC_EI:
@@ -18514,7 +18601,8 @@
                     save_cpu_state(ctx, 1);
                     gen_helper_ei(t0, cpu_env);
                     gen_store_gpr(t0, rt);
-                    /* Stop translation as we may have switched the execution mode */
+                    /* Stop translation as we may have switched
+                       the execution mode.  */
                     ctx->bstate = BS_STOP;
                     break;
                 default:            /* Invalid */
@@ -18616,15 +18704,20 @@
         break;
     case OPC_BEQL:
     case OPC_BNEL:
+        check_insn(ctx, ISA_MIPS2);
          check_insn_opc_removed(ctx, ISA_MIPS32R6);
+        /* Fallthrough */
     case OPC_BEQ:
     case OPC_BNE:
          gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
          break;
-    case OPC_LWL: /* Load and stores */
+    case OPC_LL: /* Load and stores */
+        check_insn(ctx, ISA_MIPS2);
+        /* Fallthrough */
+    case OPC_LWL:
     case OPC_LWR:
-    case OPC_LL:
         check_insn_opc_removed(ctx, ISA_MIPS32R6);
+         /* Fallthrough */
     case OPC_LB ... OPC_LH:
     case OPC_LW ... OPC_LHU:
          gen_ld(ctx, op, rt, rs, imm);
@@ -18637,6 +18730,7 @@
          gen_st(ctx, op, rt, rs, imm);
          break;
     case OPC_SC:
+        check_insn(ctx, ISA_MIPS2);
          check_insn_opc_removed(ctx, ISA_MIPS32R6);
          gen_st_cond(ctx, op, rt, rs, imm);
          break;
@@ -18680,6 +18774,7 @@
         case OPC_DMTC1:
             check_cp1_enabled(ctx);
             check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
             gen_cp1(ctx, op1, rt, rd);
             break;
 #endif
@@ -18780,8 +18875,9 @@
                     gen_r6_cmp_d(ctx, ctx->opcode & 0x1f, rt, rd, sa);
                     break;
                 default:
-                    gen_farith(ctx, ctx->opcode & FOP(0x3f, 0x1f), rt, rd, sa,
-                               (imm >> 8) & 0x7);
+                    gen_farith(ctx, ctx->opcode & FOP(0x3f, 0x1f),
+                               rt, rd, sa, (imm >> 8) & 0x7);
+
                     break;
                 }
             } else {
@@ -18852,18 +18948,24 @@
             check_cp1_enabled(ctx);
             op1 = MASK_CP3(ctx->opcode);
             switch (op1) {
+            case OPC_LUXC1:
+            case OPC_SUXC1:
+                check_insn(ctx, ISA_MIPS5 | ISA_MIPS32R2);
+                /* Fallthrough */
             case OPC_LWXC1:
             case OPC_LDXC1:
-            case OPC_LUXC1:
             case OPC_SWXC1:
             case OPC_SDXC1:
-            case OPC_SUXC1:
+                check_insn(ctx, ISA_MIPS4 | ISA_MIPS32R2);
                 gen_flt3_ldst(ctx, op1, sa, rd, rs, rt);
                 break;
             case OPC_PREFX:
+                check_insn(ctx, ISA_MIPS4 | ISA_MIPS32R2);
                 /* Treat as NOP. */
                 break;
             case OPC_ALNV_PS:
+                check_insn(ctx, ISA_MIPS5 | ISA_MIPS32R2);
+                /* Fallthrough */
             case OPC_MADD_S:
             case OPC_MADD_D:
             case OPC_MADD_PS:
@@ -18876,6 +18978,7 @@
             case OPC_NMSUB_S:
             case OPC_NMSUB_D:
             case OPC_NMSUB_PS:
+                check_insn(ctx, ISA_MIPS4 | ISA_MIPS32R2);
                 gen_flt3_arith(ctx, op1, sa, rs, rd, rt);
                 break;
             default:
@@ -19011,7 +19114,7 @@
     ctx.bp = (env->CP0_Config3 >> CP0C3_BP) & 1;
     /* Restore delay slot state from the tb context.  */
     ctx.hflags = (uint32_t)tb->flags; /* FIXME: maybe use 64 bits here? */
-    ctx.ulri = env->CP0_Config3 & (1 << CP0C3_ULRI);
+    ctx.ulri = (env->CP0_Config3 >> CP0C3_ULRI) & 1;
     restore_cpu_state(env, &ctx);
 #ifdef CONFIG_USER_ONLY
         ctx.mem_idx = MIPS_HFLAG_UM;
@@ -19261,6 +19364,10 @@
                 env->CP0_Status, env->CP0_Cause, env->CP0_EPC);
     cpu_fprintf(f, "    Config0 0x%08x Config1 0x%08x LLAddr 0x" TARGET_FMT_lx "\n",
                 env->CP0_Config0, env->CP0_Config1, env->lladdr);
+    cpu_fprintf(f, "    Config2 0x%08x Config3 0x%08x\n",
+                env->CP0_Config2, env->CP0_Config3);
+    cpu_fprintf(f, "    Config4 0x%08x Config5 0x%08x\n",
+                env->CP0_Config4, env->CP0_Config5);
     if (env->hflags & MIPS_HFLAG_FPU)
         fpu_dump_state(env, f, cpu_fprintf, flags);
 #if defined(TARGET_MIPS64) && defined(MIPS_DEBUG_SIGN_EXTENSIONS)
@@ -19436,7 +19543,8 @@
     if (env->hflags & MIPS_HFLAG_BMASK) {
         /* If the exception was raised from a delay slot,
            come back to the jump.  */
-        env->CP0_ErrorEPC = env->active_tc.PC - 4;
+        env->CP0_ErrorEPC = (env->active_tc.PC
+                             - (env->hflags & MIPS_HFLAG_B16 ? 2 : 4));
     } else {
         env->CP0_ErrorEPC = env->active_tc.PC;
     }
@@ -19507,6 +19615,8 @@
     }
 
     compute_hflags(env);
+    restore_rounding_mode(env);
+    restore_flush_mode(env);
     cs->exception_index = EXCP_NONE;
 }
 
diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c
index 148b394..1543f6c 100644
--- a/target-mips/translate_init.c
+++ b/target-mips/translate_init.c
@@ -334,7 +334,7 @@
                        (1 << CP0C1_CA),
         .CP0_Config2 = MIPS_CONFIG2,
         .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_DSP2P) | (1 << CP0C3_DSPP) |
-                       (0 << CP0C3_VInt),
+                       (1 << CP0C3_VInt),
         .CP0_LLAddr_rw_bitmask = 0,
         .CP0_LLAddr_shift = 4,
         .SYNCI_Step = 32,
@@ -348,6 +348,47 @@
         .mmu_type = MMU_TYPE_R4000,
     },
     {
+        .name = "M14K",
+        .CP0_PRid = 0x00019b00,
+        /* Config1 implemented, fixed mapping MMU,
+           no virtual icache, uncached coherency. */
+        .CP0_Config0 = MIPS_CONFIG0 | (0x2 << CP0C0_KU) | (0x2 << CP0C0_K23) |
+                       (0x1 << CP0C0_AR) | (MMU_TYPE_FMT << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1,
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3 | (0x2 << CP0C3_ISA) | (1 << CP0C3_VInt),
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 4,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x1258FF17,
+        .SEGBITS = 32,
+        .PABITS = 32,
+        .insn_flags = CPU_MIPS32R2 | ASE_MICROMIPS,
+        .mmu_type = MMU_TYPE_FMT,
+    },
+    {
+        .name = "M14Kc",
+        /* This is the TLB-based MMU core.  */
+        .CP0_PRid = 0x00019c00,
+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
+                       (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (15 << CP0C1_MMU) |
+                       (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA),
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3 | (0x2 << CP0C3_ISA) | (0 << CP0C3_VInt),
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 4,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x1278FF17,
+        .SEGBITS = 32,
+        .PABITS = 32,
+        .insn_flags = CPU_MIPS32R2 | ASE_MICROMIPS,
+        .mmu_type = MMU_TYPE_R4000,
+    },
+    {
         /* A generic CPU providing MIPS32 Release 5 features.
            FIXME: Eventually this should be replaced by a real CPU model. */
         .name = "mips32r5-generic",
@@ -520,6 +561,51 @@
         .mmu_type = MMU_TYPE_R4000,
     },
     {
+        .name = "5KEc",
+        .CP0_PRid = 0x00018900,
+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | (0x2 << CP0C0_AT) |
+                       (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (31 << CP0C1_MMU) |
+                       (1 << CP0C1_IS) | (4 << CP0C1_IL) | (1 << CP0C1_IA) |
+                       (1 << CP0C1_DS) | (4 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_PC) | (1 << CP0C1_WR) | (1 << CP0C1_EP),
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3,
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 4,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x32F8FFFF,
+        .SEGBITS = 42,
+        .PABITS = 36,
+        .insn_flags = CPU_MIPS64R2,
+        .mmu_type = MMU_TYPE_R4000,
+    },
+    {
+        .name = "5KEf",
+        .CP0_PRid = 0x00018900,
+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | (0x2 << CP0C0_AT) |
+                       (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (31 << CP0C1_MMU) |
+                       (1 << CP0C1_IS) | (4 << CP0C1_IL) | (1 << CP0C1_IA) |
+                       (1 << CP0C1_DS) | (4 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_PC) | (1 << CP0C1_WR) | (1 << CP0C1_EP),
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3,
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 4,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x36F8FFFF,
+        .CP1_fcr0 = (1 << FCR0_F64) | (1 << FCR0_L) | (1 << FCR0_W) |
+                    (1 << FCR0_D) | (1 << FCR0_S) |
+                    (0x89 << FCR0_PRID) | (0x0 << FCR0_REV),
+        .SEGBITS = 42,
+        .PABITS = 36,
+        .insn_flags = CPU_MIPS64R2,
+        .mmu_type = MMU_TYPE_R4000,
+    },
+    {
         /* A generic CPU supporting MIPS64 Release 6 ISA.
            FIXME: Support IEEE 754-2008 FP and misaligned memory accesses.
                   Eventually this should be replaced by a real CPU model. */
@@ -559,10 +645,11 @@
     {
         .name = "Loongson-2E",
         .CP0_PRid = 0x6302,
-        /*64KB I-cache and d-cache. 4 way with 32 bit cache line size*/
-        .CP0_Config0 = (0x1<<17) | (0x1<<16) | (0x1<<11) | (0x1<<8) | (0x1<<5) |
-                       (0x1<<4) | (0x1<<1),
-        /* Note: Config1 is only used internally, Loongson-2E has only Config0. */
+        /* 64KB I-cache and d-cache. 4 way with 32 bit cache line size.  */
+        .CP0_Config0 = (0x1<<17) | (0x1<<16) | (0x1<<11) | (0x1<<8) |
+                       (0x1<<5) | (0x1<<4) | (0x1<<1),
+        /* Note: Config1 is only used internally,
+           Loongson-2E has only Config0.  */
         .CP0_Config1 = (1 << CP0C1_FP) | (47 << CP0C1_MMU),
         .SYNCI_Step = 16,
         .CCRes = 2,
@@ -574,21 +661,22 @@
         .mmu_type = MMU_TYPE_R4000,
     },
     {
-      .name = "Loongson-2F",
-      .CP0_PRid = 0x6303,
-      /*64KB I-cache and d-cache. 4 way with 32 bit cache line size*/
-      .CP0_Config0 = (0x1<<17) | (0x1<<16) | (0x1<<11) | (0x1<<8) | (0x1<<5) |
-                     (0x1<<4) | (0x1<<1),
-      /* Note: Config1 is only used internally, Loongson-2F has only Config0. */
-      .CP0_Config1 = (1 << CP0C1_FP) | (47 << CP0C1_MMU),
-      .SYNCI_Step = 16,
-      .CCRes = 2,
-      .CP0_Status_rw_bitmask = 0xF5D0FF1F,   /*bit5:7 not writable*/
-      .CP1_fcr0 = (0x5 << FCR0_PRID) | (0x1 << FCR0_REV),
-      .SEGBITS = 40,
-      .PABITS = 40,
-      .insn_flags = CPU_LOONGSON2F,
-      .mmu_type = MMU_TYPE_R4000,
+        .name = "Loongson-2F",
+        .CP0_PRid = 0x6303,
+        /* 64KB I-cache and d-cache. 4 way with 32 bit cache line size.  */
+        .CP0_Config0 = (0x1<<17) | (0x1<<16) | (0x1<<11) | (0x1<<8) |
+                       (0x1<<5) | (0x1<<4) | (0x1<<1),
+        /* Note: Config1 is only used internally,
+           Loongson-2F has only Config0.  */
+        .CP0_Config1 = (1 << CP0C1_FP) | (47 << CP0C1_MMU),
+        .SYNCI_Step = 16,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0xF5D0FF1F,   /* Bits 7:5 not writable.  */
+        .CP1_fcr0 = (0x5 << FCR0_PRID) | (0x1 << FCR0_REV),
+        .SEGBITS = 40,
+        .PABITS = 40,
+        .insn_flags = CPU_LOONGSON2F,
+        .mmu_type = MMU_TYPE_R4000,
     },
     {
         /* A generic CPU providing MIPS64 ASE DSP 2 features.
diff --git a/target-ppc/mmu-hash32.c b/target-ppc/mmu-hash32.c
index 0a13a81..dfee358 100644
--- a/target-ppc/mmu-hash32.c
+++ b/target-ppc/mmu-hash32.c
@@ -28,10 +28,8 @@
 //#define DEBUG_BAT
 
 #ifdef DEBUG_MMU
-#  define LOG_MMU(...) qemu_log(__VA_ARGS__)
 #  define LOG_MMU_STATE(cpu) log_cpu_state((cpu), 0)
 #else
-#  define LOG_MMU(...) do { } while (0)
 #  define LOG_MMU_STATE(cpu) do { } while (0)
 #endif
 
@@ -225,7 +223,7 @@
     CPUState *cs = CPU(ppc_env_get_cpu(env));
     int key = !!(msr_pr ? (sr & SR32_KP) : (sr & SR32_KS));
 
-    LOG_MMU("direct store...\n");
+    qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
 
     if ((sr & 0x1FF00000) >> 20 == 0x07f) {
         /* Memory-forced I/O controller interface access */
@@ -348,12 +346,13 @@
     ptem = (vsid << 7) | (pgidx >> 10);
 
     /* Page address translation */
-    LOG_MMU("htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx
+    qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx
+            " htab_mask " TARGET_FMT_plx
             " hash " TARGET_FMT_plx "\n",
             env->htab_base, env->htab_mask, hash);
 
     /* Primary PTEG lookup */
-    LOG_MMU("0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
+    qemu_log_mask(CPU_LOG_MMU, "0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
             " vsid=%" PRIx32 " ptem=%" PRIx32
             " hash=" TARGET_FMT_plx "\n",
             env->htab_base, env->htab_mask, vsid, ptem, hash);
@@ -361,7 +360,7 @@
     pte_offset = ppc_hash32_pteg_search(env, pteg_off, 0, ptem, pte);
     if (pte_offset == -1) {
         /* Secondary PTEG lookup */
-        LOG_MMU("1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
+        qemu_log_mask(CPU_LOG_MMU, "1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
                 " vsid=%" PRIx32 " api=%" PRIx32
                 " hash=" TARGET_FMT_plx "\n", env->htab_base,
                 env->htab_mask, vsid, ptem, ~hash);
@@ -476,7 +475,8 @@
 
         return 1;
     }
-    LOG_MMU("found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
+    qemu_log_mask(CPU_LOG_MMU,
+                "found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
 
     /* 7. Check access permissions */
 
@@ -484,7 +484,7 @@
 
     if (need_prot[rwx] & ~prot) {
         /* Access right violation */
-        LOG_MMU("PTE access rejected\n");
+        qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
         if (rwx == 2) {
             cs->exception_index = POWERPC_EXCP_ISI;
             env->error_code = 0x08000000;
@@ -501,7 +501,7 @@
         return 1;
     }
 
-    LOG_MMU("PTE access granted !\n");
+    qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
 
     /* 8. Update PTE referenced and changed bits if necessary */
 
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index c72198a..b0278c9 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -27,10 +27,8 @@
 //#define DEBUG_SLB
 
 #ifdef DEBUG_MMU
-#  define LOG_MMU(...) qemu_log(__VA_ARGS__)
 #  define LOG_MMU_STATE(cpu) log_cpu_state((cpu), 0)
 #else
-#  define LOG_MMU(...) do { } while (0)
 #  define LOG_MMU_STATE(cpu) do { } while (0)
 #endif
 
@@ -420,12 +418,14 @@
     ptem = (slb->vsid & SLB_VSID_PTEM) | ((epn >> 16) & HPTE64_V_AVPN);
 
     /* Page address translation */
-    LOG_MMU("htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx
+    qemu_log_mask(CPU_LOG_MMU,
+            "htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx
             " hash " TARGET_FMT_plx "\n",
             env->htab_base, env->htab_mask, hash);
 
     /* Primary PTEG lookup */
-    LOG_MMU("0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
+    qemu_log_mask(CPU_LOG_MMU,
+            "0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
             " vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx
             " hash=" TARGET_FMT_plx "\n",
             env->htab_base, env->htab_mask, vsid, ptem,  hash);
@@ -433,7 +433,8 @@
 
     if (pte_offset == -1) {
         /* Secondary PTEG lookup */
-        LOG_MMU("1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
+        qemu_log_mask(CPU_LOG_MMU,
+                "1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
                 " vsid=" TARGET_FMT_lx " api=" TARGET_FMT_lx
                 " hash=" TARGET_FMT_plx "\n", env->htab_base,
                 env->htab_mask, vsid, ptem, ~hash);
@@ -522,7 +523,8 @@
         }
         return 1;
     }
-    LOG_MMU("found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
+    qemu_log_mask(CPU_LOG_MMU,
+                "found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
 
     /* 5. Check access permissions */
 
@@ -532,7 +534,7 @@
 
     if ((need_prot[rwx] & ~prot) != 0) {
         /* Access right violation */
-        LOG_MMU("PTE access rejected\n");
+        qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
         if (rwx == 2) {
             cs->exception_index = POWERPC_EXCP_ISI;
             env->error_code = 0x08000000;
@@ -556,7 +558,7 @@
         return 1;
     }
 
-    LOG_MMU("PTE access granted !\n");
+    qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
 
     /* 6. Update PTE referenced and changed bits if necessary */
 
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index 4a34a73..660be7f 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -32,10 +32,8 @@
 //#define FLUSH_ALL_TLBS
 
 #ifdef DEBUG_MMU
-#  define LOG_MMU(...) qemu_log(__VA_ARGS__)
 #  define LOG_MMU_STATE(cpu) log_cpu_state((cpu), 0)
 #else
-#  define LOG_MMU(...) do { } while (0)
 #  define LOG_MMU_STATE(cpu) do { } while (0)
 #endif
 
@@ -176,10 +174,10 @@
             ret = check_prot(ctx->prot, rw, type);
             if (ret == 0) {
                 /* Access granted */
-                LOG_MMU("PTE access granted !\n");
+                qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
             } else {
                 /* Access right violation */
-                LOG_MMU("PTE access rejected\n");
+                qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
             }
         }
     }
@@ -480,8 +478,9 @@
     ctx->nx = sr & 0x10000000 ? 1 : 0;
     vsid = sr & 0x00FFFFFF;
     target_page_bits = TARGET_PAGE_BITS;
-    LOG_MMU("Check segment v=" TARGET_FMT_lx " %d " TARGET_FMT_lx " nip="
-            TARGET_FMT_lx " lr=" TARGET_FMT_lx
+    qemu_log_mask(CPU_LOG_MMU,
+            "Check segment v=" TARGET_FMT_lx " %d " TARGET_FMT_lx
+            " nip=" TARGET_FMT_lx " lr=" TARGET_FMT_lx
             " ir=%d dr=%d pr=%d %d t=%d\n",
             eaddr, (int)(eaddr >> 28), sr, env->nip, env->lr, (int)msr_ir,
             (int)msr_dr, pr != 0 ? 1 : 0, rw, type);
@@ -489,14 +488,16 @@
     hash = vsid ^ pgidx;
     ctx->ptem = (vsid << 7) | (pgidx >> 10);
 
-    LOG_MMU("pte segment: key=%d ds %d nx %d vsid " TARGET_FMT_lx "\n",
+    qemu_log_mask(CPU_LOG_MMU,
+            "pte segment: key=%d ds %d nx %d vsid " TARGET_FMT_lx "\n",
             ctx->key, ds, ctx->nx, vsid);
     ret = -1;
     if (!ds) {
         /* Check if instruction fetch is allowed, if needed */
         if (type != ACCESS_CODE || ctx->nx == 0) {
             /* Page address translation */
-            LOG_MMU("htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx
+            qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx
+                    " htab_mask " TARGET_FMT_plx
                     " hash " TARGET_FMT_plx "\n",
                     env->htab_base, env->htab_mask, hash);
             ctx->hash[0] = hash;
@@ -527,13 +528,13 @@
             }
 #endif
         } else {
-            LOG_MMU("No access allowed\n");
+            qemu_log_mask(CPU_LOG_MMU, "No access allowed\n");
             ret = -3;
         }
     } else {
         target_ulong sr;
 
-        LOG_MMU("direct store...\n");
+        qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
         /* Direct-store segment : absolutely *BUGGY* for now */
 
         /* Direct-store implies a 32-bit MMU.
@@ -2037,7 +2038,7 @@
 {
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
 
-    LOG_MMU("%s: " TARGET_FMT_lx "\n", __func__, value);
+    qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value);
     assert(!env->external_htab);
     if (env->spr[SPR_SDR1] != value) {
         env->spr[SPR_SDR1] = value;
@@ -2079,7 +2080,8 @@
 {
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
 
-    LOG_MMU("%s: reg=%d " TARGET_FMT_lx " " TARGET_FMT_lx "\n", __func__,
+    qemu_log_mask(CPU_LOG_MMU,
+            "%s: reg=%d " TARGET_FMT_lx " " TARGET_FMT_lx "\n", __func__,
             (int)srnum, value, env->sr[srnum]);
 #if defined(TARGET_PPC64)
     if (env->mmu_model & POWERPC_MMU_64) {
diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index 96a4f22..5958343 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -461,8 +461,8 @@
         return 1;
     }
 
-    DPRINTF("%s: set tlb %" PRIx64 " -> %" PRIx64 " (%x)\n", __func__,
-            (uint64_t)vaddr, (uint64_t)raddr, prot);
+    qemu_log_mask(CPU_LOG_MMU, "%s: set tlb %" PRIx64 " -> %" PRIx64 " (%x)\n",
+            __func__, (uint64_t)vaddr, (uint64_t)raddr, prot);
 
     tlb_set_page(cs, orig_vaddr, raddr, prot,
                  mmu_idx, TARGET_PAGE_SIZE);
diff --git a/target-sparc/mmu_helper.c b/target-sparc/mmu_helper.c
index 61afbcf..2a0c6f0 100644
--- a/target-sparc/mmu_helper.c
+++ b/target-sparc/mmu_helper.c
@@ -213,10 +213,9 @@
                                       address, rw, mmu_idx, &page_size);
     vaddr = address;
     if (error_code == 0) {
-#ifdef DEBUG_MMU
-        printf("Translate at %" VADDR_PRIx " -> " TARGET_FMT_plx ", vaddr "
-               TARGET_FMT_lx "\n", address, paddr, vaddr);
-#endif
+        qemu_log_mask(CPU_LOG_MMU,
+                "Translate at %" VADDR_PRIx " -> " TARGET_FMT_plx ", vaddr "
+                TARGET_FMT_lx "\n", address, paddr, vaddr);
         tlb_set_page(cs, vaddr, paddr, prot, mmu_idx, page_size);
         return 0;
     }
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index ac463f2..a1bfbf7 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -497,6 +497,8 @@
 #define XTENSA_TBFLAG_CPENABLE_MASK 0x3fc0
 #define XTENSA_TBFLAG_CPENABLE_SHIFT 6
 #define XTENSA_TBFLAG_EXCEPTION 0x4000
+#define XTENSA_TBFLAG_WINDOW_MASK 0x18000
+#define XTENSA_TBFLAG_WINDOW_SHIFT 15
 
 static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc,
         target_ulong *cs_base, int *flags)
@@ -528,6 +530,16 @@
     if (cs->singlestep_enabled && env->exception_taken) {
         *flags |= XTENSA_TBFLAG_EXCEPTION;
     }
+    if (xtensa_option_enabled(env->config, XTENSA_OPTION_WINDOWED_REGISTER) &&
+        (env->sregs[PS] & (PS_WOE | PS_EXCM)) == PS_WOE) {
+        uint32_t windowstart = xtensa_replicate_windowstart(env) >>
+            (env->sregs[WINDOW_BASE] + 1);
+        uint32_t w = ctz32(windowstart | 0x8);
+
+        *flags |= w << XTENSA_TBFLAG_WINDOW_SHIFT;
+    } else {
+        *flags |= 3 << XTENSA_TBFLAG_WINDOW_SHIFT;
+    }
 }
 
 #include "exec/cpu-all.h"
diff --git a/target-xtensa/helper.h b/target-xtensa/helper.h
index ed3af0b..5ea9c5b 100644
--- a/target-xtensa/helper.h
+++ b/target-xtensa/helper.h
@@ -9,7 +9,7 @@
 DEF_HELPER_4(entry, void, env, i32, i32, i32)
 DEF_HELPER_2(retw, i32, env, i32)
 DEF_HELPER_2(rotw, void, env, i32)
-DEF_HELPER_3(window_check, void, env, i32, i32)
+DEF_HELPER_3(window_check, noreturn, env, i32, i32)
 DEF_HELPER_1(restore_owb, void, env)
 DEF_HELPER_2(movsp, void, env, i32)
 DEF_HELPER_2(wsr_lbeg, void, env, i32)
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index 872e5a8..49e8634 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -251,34 +251,27 @@
 void HELPER(window_check)(CPUXtensaState *env, uint32_t pc, uint32_t w)
 {
     uint32_t windowbase = windowbase_bound(env->sregs[WINDOW_BASE], env);
-    uint32_t windowstart = env->sregs[WINDOW_START];
-    uint32_t m, n;
+    uint32_t windowstart = xtensa_replicate_windowstart(env) >>
+        (env->sregs[WINDOW_BASE] + 1);
+    uint32_t n = ctz32(windowstart) + 1;
 
-    if ((env->sregs[PS] & (PS_WOE | PS_EXCM)) ^ PS_WOE) {
-        return;
-    }
+    assert(n <= w);
 
-    for (n = 1; ; ++n) {
-        if (n > w) {
-            return;
-        }
-        if (windowstart & windowstart_bit(windowbase + n, env)) {
-            break;
-        }
-    }
-
-    m = windowbase_bound(windowbase + n, env);
     rotate_window(env, n);
     env->sregs[PS] = (env->sregs[PS] & ~PS_OWB) |
         (windowbase << PS_OWB_SHIFT) | PS_EXCM;
     env->sregs[EPC1] = env->pc = pc;
 
-    if (windowstart & windowstart_bit(m + 1, env)) {
+    switch (ctz32(windowstart >> n)) {
+    case 0:
         HELPER(exception)(env, EXC_WINDOW_OVERFLOW4);
-    } else if (windowstart & windowstart_bit(m + 2, env)) {
+        break;
+    case 1:
         HELPER(exception)(env, EXC_WINDOW_OVERFLOW8);
-    } else {
+        break;
+    default:
         HELPER(exception)(env, EXC_WINDOW_OVERFLOW12);
+        break;
     }
 }
 
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index badca19..6500554 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -63,7 +63,7 @@
     TCGv_i32 sar_m32;
 
     uint32_t ccount_delta;
-    unsigned used_window;
+    unsigned window;
 
     bool debug;
     bool icount;
@@ -311,26 +311,16 @@
     tcg_temp_free(tmp);
 }
 
-static void gen_advance_ccount_cond(DisasContext *dc)
+static void gen_advance_ccount(DisasContext *dc)
 {
     if (dc->ccount_delta > 0) {
         TCGv_i32 tmp = tcg_const_i32(dc->ccount_delta);
         gen_helper_advance_ccount(cpu_env, tmp);
         tcg_temp_free(tmp);
     }
-}
-
-static void gen_advance_ccount(DisasContext *dc)
-{
-    gen_advance_ccount_cond(dc);
     dc->ccount_delta = 0;
 }
 
-static void reset_used_window(DisasContext *dc)
-{
-    dc->used_window = 0;
-}
-
 static void gen_exception(DisasContext *dc, int excp)
 {
     TCGv_i32 tmp = tcg_const_i32(excp);
@@ -377,21 +367,25 @@
     }
 }
 
-static void gen_check_privilege(DisasContext *dc)
+static bool gen_check_privilege(DisasContext *dc)
 {
     if (dc->cring) {
         gen_exception_cause(dc, PRIVILEGED_CAUSE);
         dc->is_jmp = DISAS_UPDATE;
+        return false;
     }
+    return true;
 }
 
-static void gen_check_cpenable(DisasContext *dc, unsigned cp)
+static bool gen_check_cpenable(DisasContext *dc, unsigned cp)
 {
     if (option_enabled(dc, XTENSA_OPTION_COPROCESSOR) &&
             !(dc->cpenable & (1 << cp))) {
         gen_exception_cause(dc, COPROCESSOR0_DISABLED + cp);
         dc->is_jmp = DISAS_UPDATE;
+        return false;
     }
+    return true;
 }
 
 static void gen_jump_slot(DisasContext *dc, TCGv dest, int slot)
@@ -597,13 +591,15 @@
 static void gen_wsr_windowbase(DisasContext *dc, uint32_t sr, TCGv_i32 v)
 {
     gen_helper_wsr_windowbase(cpu_env, v);
-    reset_used_window(dc);
+    /* This can change tb->flags, so exit tb */
+    gen_jumpi_check_loop_end(dc, -1);
 }
 
 static void gen_wsr_windowstart(DisasContext *dc, uint32_t sr, TCGv_i32 v)
 {
     tcg_gen_andi_i32(cpu_SR[sr], v, (1 << dc->config->nareg / 4) - 1);
-    reset_used_window(dc);
+    /* This can change tb->flags, so exit tb */
+    gen_jumpi_check_loop_end(dc, -1);
 }
 
 static void gen_wsr_ptevaddr(DisasContext *dc, uint32_t sr, TCGv_i32 v)
@@ -712,7 +708,6 @@
         mask |= PS_RING;
     }
     tcg_gen_andi_i32(cpu_SR[sr], v, mask);
-    reset_used_window(dc);
     gen_helper_check_interrupts(cpu_env);
     /* This can change mmu index and tb->flags, so exit tb */
     gen_jumpi_check_loop_end(dc, -1);
@@ -833,46 +828,29 @@
     tcg_temp_free(intlevel);
 }
 
-static void gen_window_check1(DisasContext *dc, unsigned r1)
+static bool gen_window_check1(DisasContext *dc, unsigned r1)
 {
-    if (dc->tb->flags & XTENSA_TBFLAG_EXCM) {
-        return;
+    if (r1 / 4 > dc->window) {
+        TCGv_i32 pc = tcg_const_i32(dc->pc);
+        TCGv_i32 w = tcg_const_i32(r1 / 4);
+
+        gen_advance_ccount(dc);
+        gen_helper_window_check(cpu_env, pc, w);
+        dc->is_jmp = DISAS_UPDATE;
+        return false;
     }
-    if (option_enabled(dc, XTENSA_OPTION_WINDOWED_REGISTER) &&
-            r1 / 4 > dc->used_window) {
-        int label = gen_new_label();
-        TCGv_i32 ws = tcg_temp_new_i32();
-
-        dc->used_window = r1 / 4;
-        tcg_gen_deposit_i32(ws, cpu_SR[WINDOW_START], cpu_SR[WINDOW_START],
-                dc->config->nareg / 4, dc->config->nareg / 4);
-        tcg_gen_shr_i32(ws, ws, cpu_SR[WINDOW_BASE]);
-        tcg_gen_andi_i32(ws, ws, (2 << (r1 / 4)) - 2);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, ws, 0, label);
-        {
-            TCGv_i32 pc = tcg_const_i32(dc->pc);
-            TCGv_i32 w = tcg_const_i32(r1 / 4);
-
-            gen_advance_ccount_cond(dc);
-            gen_helper_window_check(cpu_env, pc, w);
-
-            tcg_temp_free(w);
-            tcg_temp_free(pc);
-        }
-        gen_set_label(label);
-        tcg_temp_free(ws);
-    }
+    return true;
 }
 
-static void gen_window_check2(DisasContext *dc, unsigned r1, unsigned r2)
+static bool gen_window_check2(DisasContext *dc, unsigned r1, unsigned r2)
 {
-    gen_window_check1(dc, r1 > r2 ? r1 : r2);
+    return gen_window_check1(dc, r1 > r2 ? r1 : r2);
 }
 
-static void gen_window_check3(DisasContext *dc, unsigned r1, unsigned r2,
+static bool gen_window_check3(DisasContext *dc, unsigned r1, unsigned r2,
         unsigned r3)
 {
-    gen_window_check2(dc, r1, r2 > r3 ? r2 : r3);
+    return gen_window_check2(dc, r1, r2 > r3 ? r2 : r3);
 }
 
 static TCGv_i32 gen_mac16_m(TCGv_i32 v, bool hi, bool is_unsigned)
@@ -887,6 +865,11 @@
     return m;
 }
 
+static inline unsigned xtensa_op0_insn_len(unsigned op0)
+{
+    return op0 >= 8 ? 2 : 3;
+}
+
 static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
 {
 #define HAS_OPTION_BITS(opt) do { \
@@ -989,6 +972,7 @@
     uint8_t b0 = cpu_ldub_code(env, dc->pc);
     uint8_t b1 = cpu_ldub_code(env, dc->pc + 1);
     uint8_t b2 = 0;
+    unsigned len = xtensa_op0_insn_len(OP0);
 
     static const uint32_t B4CONST[] = {
         0xffffffff, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 32, 64, 128, 256
@@ -998,13 +982,19 @@
         32768, 65536, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 32, 64, 128, 256
     };
 
-    if (OP0 >= 8) {
-        dc->next_pc = dc->pc + 2;
+    switch (len) {
+    case 2:
         HAS_OPTION(XTENSA_OPTION_CODE_DENSITY);
-    } else {
-        dc->next_pc = dc->pc + 3;
+        break;
+
+    case 3:
         b2 = cpu_ldub_code(env, dc->pc + 2);
+        break;
+
+    default:
+        RESERVED();
     }
+    dc->next_pc = dc->pc + len;
 
     switch (OP0) {
     case 0: /*QRST*/
@@ -1031,8 +1021,9 @@
                         switch (CALLX_N) {
                         case 0: /*RET*/
                         case 2: /*JX*/
-                            gen_window_check1(dc, CALLX_S);
-                            gen_jump(dc, cpu_R[CALLX_S]);
+                            if (gen_window_check1(dc, CALLX_S)) {
+                                gen_jump(dc, cpu_R[CALLX_S]);
+                            }
                             break;
 
                         case 1: /*RETWw*/
@@ -1053,7 +1044,9 @@
                         break;
 
                     case 3: /*CALLX*/
-                        gen_window_check2(dc, CALLX_S, CALLX_N << 2);
+                        if (!gen_window_check2(dc, CALLX_S, CALLX_N << 2)) {
+                            break;
+                        }
                         switch (CALLX_N) {
                         case 0: /*CALLX0*/
                             {
@@ -1084,8 +1077,7 @@
 
                 case 1: /*MOVSPw*/
                     HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-                    gen_window_check2(dc, RRR_T, RRR_S);
-                    {
+                    if (gen_window_check2(dc, RRR_T, RRR_S)) {
                         TCGv_i32 pc = tcg_const_i32(dc->pc);
                         gen_advance_ccount(dc);
                         gen_helper_movsp(cpu_env, pc);
@@ -1133,10 +1125,11 @@
                         HAS_OPTION(XTENSA_OPTION_EXCEPTION);
                         switch (RRR_S) {
                         case 0: /*RFEx*/
-                            gen_check_privilege(dc);
-                            tcg_gen_andi_i32(cpu_SR[PS], cpu_SR[PS], ~PS_EXCM);
-                            gen_helper_check_interrupts(cpu_env);
-                            gen_jump(dc, cpu_SR[EPC1]);
+                            if (gen_check_privilege(dc)) {
+                                tcg_gen_andi_i32(cpu_SR[PS], cpu_SR[PS], ~PS_EXCM);
+                                gen_helper_check_interrupts(cpu_env);
+                                gen_jump(dc, cpu_SR[EPC1]);
+                            }
                             break;
 
                         case 1: /*RFUEx*/
@@ -1144,16 +1137,16 @@
                             break;
 
                         case 2: /*RFDEx*/
-                            gen_check_privilege(dc);
-                            gen_jump(dc, cpu_SR[
-                                    dc->config->ndepc ? DEPC : EPC1]);
+                            if (gen_check_privilege(dc)) {
+                                gen_jump(dc, cpu_SR[
+                                         dc->config->ndepc ? DEPC : EPC1]);
+                            }
                             break;
 
                         case 4: /*RFWOw*/
                         case 5: /*RFWUw*/
                             HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-                            gen_check_privilege(dc);
-                            {
+                            if (gen_check_privilege(dc)) {
                                 TCGv_i32 tmp = tcg_const_i32(1);
 
                                 tcg_gen_andi_i32(
@@ -1185,11 +1178,12 @@
                     case 1: /*RFIx*/
                         HAS_OPTION(XTENSA_OPTION_HIGH_PRIORITY_INTERRUPT);
                         if (RRR_S >= 2 && RRR_S <= dc->config->nlevel) {
-                            gen_check_privilege(dc);
-                            tcg_gen_mov_i32(cpu_SR[PS],
-                                    cpu_SR[EPS2 + RRR_S - 2]);
-                            gen_helper_check_interrupts(cpu_env);
-                            gen_jump(dc, cpu_SR[EPC1 + RRR_S - 1]);
+                            if (gen_check_privilege(dc)) {
+                                tcg_gen_mov_i32(cpu_SR[PS],
+                                                cpu_SR[EPS2 + RRR_S - 2]);
+                                gen_helper_check_interrupts(cpu_env);
+                                gen_jump(dc, cpu_SR[EPC1 + RRR_S - 1]);
+                            }
                         } else {
                             qemu_log("RFI %d is illegal\n", RRR_S);
                             gen_exception_cause(dc, ILLEGAL_INSTRUCTION_CAUSE);
@@ -1223,8 +1217,9 @@
 
                     case 1: /*SIMCALL*/
                         if (semihosting_enabled) {
-                            gen_check_privilege(dc);
-                            gen_helper_simcall(cpu_env);
+                            if (gen_check_privilege(dc)) {
+                                gen_helper_simcall(cpu_env);
+                            }
                         } else {
                             qemu_log("SIMCALL but semihosting is disabled\n");
                             gen_exception_cause(dc, ILLEGAL_INSTRUCTION_CAUSE);
@@ -1239,19 +1234,21 @@
 
                 case 6: /*RSILx*/
                     HAS_OPTION(XTENSA_OPTION_INTERRUPT);
-                    gen_check_privilege(dc);
-                    gen_window_check1(dc, RRR_T);
-                    tcg_gen_mov_i32(cpu_R[RRR_T], cpu_SR[PS]);
-                    tcg_gen_andi_i32(cpu_SR[PS], cpu_SR[PS], ~PS_INTLEVEL);
-                    tcg_gen_ori_i32(cpu_SR[PS], cpu_SR[PS], RRR_S);
-                    gen_helper_check_interrupts(cpu_env);
-                    gen_jumpi_check_loop_end(dc, 0);
+                    if (gen_check_privilege(dc) &&
+                        gen_window_check1(dc, RRR_T)) {
+                        tcg_gen_mov_i32(cpu_R[RRR_T], cpu_SR[PS]);
+                        tcg_gen_andi_i32(cpu_SR[PS], cpu_SR[PS], ~PS_INTLEVEL);
+                        tcg_gen_ori_i32(cpu_SR[PS], cpu_SR[PS], RRR_S);
+                        gen_helper_check_interrupts(cpu_env);
+                        gen_jumpi_check_loop_end(dc, 0);
+                    }
                     break;
 
                 case 7: /*WAITIx*/
                     HAS_OPTION(XTENSA_OPTION_INTERRUPT);
-                    gen_check_privilege(dc);
-                    gen_waiti(dc, RRR_S);
+                    if (gen_check_privilege(dc)) {
+                        gen_waiti(dc, RRR_S);
+                    }
                     break;
 
                 case 8: /*ANY4p*/
@@ -1287,35 +1284,39 @@
                 break;
 
             case 1: /*AND*/
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
-                tcg_gen_and_i32(cpu_R[RRR_R], cpu_R[RRR_S], cpu_R[RRR_T]);
+                if (gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
+                    tcg_gen_and_i32(cpu_R[RRR_R], cpu_R[RRR_S], cpu_R[RRR_T]);
+                }
                 break;
 
             case 2: /*OR*/
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
-                tcg_gen_or_i32(cpu_R[RRR_R], cpu_R[RRR_S], cpu_R[RRR_T]);
+                if (gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
+                    tcg_gen_or_i32(cpu_R[RRR_R], cpu_R[RRR_S], cpu_R[RRR_T]);
+                }
                 break;
 
             case 3: /*XOR*/
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
-                tcg_gen_xor_i32(cpu_R[RRR_R], cpu_R[RRR_S], cpu_R[RRR_T]);
+                if (gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
+                    tcg_gen_xor_i32(cpu_R[RRR_R], cpu_R[RRR_S], cpu_R[RRR_T]);
+                }
                 break;
 
             case 4: /*ST1*/
                 switch (RRR_R) {
                 case 0: /*SSR*/
-                    gen_window_check1(dc, RRR_S);
-                    gen_right_shift_sar(dc, cpu_R[RRR_S]);
+                    if (gen_window_check1(dc, RRR_S)) {
+                        gen_right_shift_sar(dc, cpu_R[RRR_S]);
+                    }
                     break;
 
                 case 1: /*SSL*/
-                    gen_window_check1(dc, RRR_S);
-                    gen_left_shift_sar(dc, cpu_R[RRR_S]);
+                    if (gen_window_check1(dc, RRR_S)) {
+                        gen_left_shift_sar(dc, cpu_R[RRR_S]);
+                    }
                     break;
 
                 case 2: /*SSA8L*/
-                    gen_window_check1(dc, RRR_S);
-                    {
+                    if (gen_window_check1(dc, RRR_S)) {
                         TCGv_i32 tmp = tcg_temp_new_i32();
                         tcg_gen_shli_i32(tmp, cpu_R[RRR_S], 3);
                         gen_right_shift_sar(dc, tmp);
@@ -1324,8 +1325,7 @@
                     break;
 
                 case 3: /*SSA8B*/
-                    gen_window_check1(dc, RRR_S);
-                    {
+                    if (gen_window_check1(dc, RRR_S)) {
                         TCGv_i32 tmp = tcg_temp_new_i32();
                         tcg_gen_shli_i32(tmp, cpu_R[RRR_S], 3);
                         gen_left_shift_sar(dc, tmp);
@@ -1352,26 +1352,28 @@
 
                 case 8: /*ROTWw*/
                     HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-                    gen_check_privilege(dc);
-                    {
+                    if (gen_check_privilege(dc)) {
                         TCGv_i32 tmp = tcg_const_i32(
                                 RRR_T | ((RRR_T & 8) ? 0xfffffff0 : 0));
                         gen_helper_rotw(cpu_env, tmp);
                         tcg_temp_free(tmp);
-                        reset_used_window(dc);
+                        /* This can change tb->flags, so exit tb */
+                        gen_jumpi_check_loop_end(dc, -1);
                     }
                     break;
 
                 case 14: /*NSAu*/
                     HAS_OPTION(XTENSA_OPTION_MISC_OP_NSA);
-                    gen_window_check2(dc, RRR_S, RRR_T);
-                    gen_helper_nsa(cpu_R[RRR_T], cpu_R[RRR_S]);
+                    if (gen_window_check2(dc, RRR_S, RRR_T)) {
+                        gen_helper_nsa(cpu_R[RRR_T], cpu_R[RRR_S]);
+                    }
                     break;
 
                 case 15: /*NSAUu*/
                     HAS_OPTION(XTENSA_OPTION_MISC_OP_NSA);
-                    gen_window_check2(dc, RRR_S, RRR_T);
-                    gen_helper_nsau(cpu_R[RRR_T], cpu_R[RRR_S]);
+                    if (gen_window_check2(dc, RRR_S, RRR_T)) {
+                        gen_helper_nsau(cpu_R[RRR_T], cpu_R[RRR_S]);
+                    }
                     break;
 
                 default: /*reserved*/
@@ -1385,9 +1387,8 @@
                         XTENSA_OPTION_BIT(XTENSA_OPTION_MMU) |
                         XTENSA_OPTION_BIT(XTENSA_OPTION_REGION_PROTECTION) |
                         XTENSA_OPTION_BIT(XTENSA_OPTION_REGION_TRANSLATION));
-                gen_check_privilege(dc);
-                gen_window_check2(dc, RRR_S, RRR_T);
-                {
+                if (gen_check_privilege(dc) &&
+                    gen_window_check2(dc, RRR_S, RRR_T)) {
                     TCGv_i32 dtlb = tcg_const_i32((RRR_R & 8) != 0);
 
                     switch (RRR_R & 7) {
@@ -1430,7 +1431,9 @@
                 break;
 
             case 6: /*RT0*/
-                gen_window_check2(dc, RRR_R, RRR_T);
+                if (!gen_window_check2(dc, RRR_R, RRR_T)) {
+                    break;
+                }
                 switch (RRR_S) {
                 case 0: /*NEG*/
                     tcg_gen_neg_i32(cpu_R[RRR_R], cpu_R[RRR_T]);
@@ -1460,15 +1463,15 @@
                 break;
 
             case 8: /*ADD*/
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
-                tcg_gen_add_i32(cpu_R[RRR_R], cpu_R[RRR_S], cpu_R[RRR_T]);
+                if (gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
+                    tcg_gen_add_i32(cpu_R[RRR_R], cpu_R[RRR_S], cpu_R[RRR_T]);
+                }
                 break;
 
             case 9: /*ADD**/
             case 10:
             case 11:
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
-                {
+                if (gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
                     TCGv_i32 tmp = tcg_temp_new_i32();
                     tcg_gen_shli_i32(tmp, cpu_R[RRR_S], OP2 - 8);
                     tcg_gen_add_i32(cpu_R[RRR_R], tmp, cpu_R[RRR_T]);
@@ -1477,15 +1480,15 @@
                 break;
 
             case 12: /*SUB*/
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
-                tcg_gen_sub_i32(cpu_R[RRR_R], cpu_R[RRR_S], cpu_R[RRR_T]);
+                if (gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
+                    tcg_gen_sub_i32(cpu_R[RRR_R], cpu_R[RRR_S], cpu_R[RRR_T]);
+                }
                 break;
 
             case 13: /*SUB**/
             case 14:
             case 15:
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
-                {
+                if (gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
                     TCGv_i32 tmp = tcg_temp_new_i32();
                     tcg_gen_shli_i32(tmp, cpu_R[RRR_S], OP2 - 12);
                     tcg_gen_sub_i32(cpu_R[RRR_R], tmp, cpu_R[RRR_T]);
@@ -1499,31 +1502,32 @@
             switch (OP2) {
             case 0: /*SLLI*/
             case 1:
-                gen_window_check2(dc, RRR_R, RRR_S);
-                tcg_gen_shli_i32(cpu_R[RRR_R], cpu_R[RRR_S],
-                        32 - (RRR_T | ((OP2 & 1) << 4)));
+                if (gen_window_check2(dc, RRR_R, RRR_S)) {
+                    tcg_gen_shli_i32(cpu_R[RRR_R], cpu_R[RRR_S],
+                                     32 - (RRR_T | ((OP2 & 1) << 4)));
+                }
                 break;
 
             case 2: /*SRAI*/
             case 3:
-                gen_window_check2(dc, RRR_R, RRR_T);
-                tcg_gen_sari_i32(cpu_R[RRR_R], cpu_R[RRR_T],
-                        RRR_S | ((OP2 & 1) << 4));
+                if (gen_window_check2(dc, RRR_R, RRR_T)) {
+                    tcg_gen_sari_i32(cpu_R[RRR_R], cpu_R[RRR_T],
+                                     RRR_S | ((OP2 & 1) << 4));
+                }
                 break;
 
             case 4: /*SRLI*/
-                gen_window_check2(dc, RRR_R, RRR_T);
-                tcg_gen_shri_i32(cpu_R[RRR_R], cpu_R[RRR_T], RRR_S);
+                if (gen_window_check2(dc, RRR_R, RRR_T)) {
+                    tcg_gen_shri_i32(cpu_R[RRR_R], cpu_R[RRR_T], RRR_S);
+                }
                 break;
 
             case 6: /*XSR*/
-                if (gen_check_sr(dc, RSR_SR, SR_X)) {
+                if (gen_check_sr(dc, RSR_SR, SR_X) &&
+                    (RSR_SR < 64 || gen_check_privilege(dc)) &&
+                    gen_window_check1(dc, RRR_T)) {
                     TCGv_i32 tmp = tcg_temp_new_i32();
 
-                    if (RSR_SR >= 64) {
-                        gen_check_privilege(dc);
-                    }
-                    gen_window_check1(dc, RRR_T);
                     tcg_gen_mov_i32(tmp, cpu_R[RRR_T]);
                     gen_rsr(dc, cpu_R[RRR_T], RSR_SR);
                     gen_wsr(dc, RSR_SR, tmp);
@@ -1547,8 +1551,7 @@
 #define gen_shift(cmd) gen_shift_reg(cmd, cpu_SR[SAR])
 
             case 8: /*SRC*/
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
-                {
+                if (gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
                     TCGv_i64 v = tcg_temp_new_i64();
                     tcg_gen_concat_i32_i64(v, cpu_R[RRR_T], cpu_R[RRR_S]);
                     gen_shift(shr);
@@ -1556,7 +1559,9 @@
                 break;
 
             case 9: /*SRL*/
-                gen_window_check2(dc, RRR_R, RRR_T);
+                if (!gen_window_check2(dc, RRR_R, RRR_T)) {
+                    break;
+                }
                 if (dc->sar_5bit) {
                     tcg_gen_shr_i32(cpu_R[RRR_R], cpu_R[RRR_T], cpu_SR[SAR]);
                 } else {
@@ -1567,7 +1572,9 @@
                 break;
 
             case 10: /*SLL*/
-                gen_window_check2(dc, RRR_R, RRR_S);
+                if (!gen_window_check2(dc, RRR_R, RRR_S)) {
+                    break;
+                }
                 if (dc->sar_m32_5bit) {
                     tcg_gen_shl_i32(cpu_R[RRR_R], cpu_R[RRR_S], dc->sar_m32);
                 } else {
@@ -1582,7 +1589,9 @@
                 break;
 
             case 11: /*SRA*/
-                gen_window_check2(dc, RRR_R, RRR_T);
+                if (!gen_window_check2(dc, RRR_R, RRR_T)) {
+                    break;
+                }
                 if (dc->sar_5bit) {
                     tcg_gen_sar_i32(cpu_R[RRR_R], cpu_R[RRR_T], cpu_SR[SAR]);
                 } else {
@@ -1596,8 +1605,7 @@
 
             case 12: /*MUL16U*/
                 HAS_OPTION(XTENSA_OPTION_16_BIT_IMUL);
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
-                {
+                if (gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
                     TCGv_i32 v1 = tcg_temp_new_i32();
                     TCGv_i32 v2 = tcg_temp_new_i32();
                     tcg_gen_ext16u_i32(v1, cpu_R[RRR_S]);
@@ -1610,8 +1618,7 @@
 
             case 13: /*MUL16S*/
                 HAS_OPTION(XTENSA_OPTION_16_BIT_IMUL);
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
-                {
+                if (gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
                     TCGv_i32 v1 = tcg_temp_new_i32();
                     TCGv_i32 v2 = tcg_temp_new_i32();
                     tcg_gen_ext16s_i32(v1, cpu_R[RRR_S]);
@@ -1629,8 +1636,8 @@
             break;
 
         case 2: /*RST2*/
-            if (OP2 >= 8) {
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
+            if (OP2 >= 8 && !gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
+                break;
             }
 
             if (OP2 >= 12) {
@@ -1742,29 +1749,24 @@
         case 3: /*RST3*/
             switch (OP2) {
             case 0: /*RSR*/
-                if (gen_check_sr(dc, RSR_SR, SR_R)) {
-                    if (RSR_SR >= 64) {
-                        gen_check_privilege(dc);
-                    }
-                    gen_window_check1(dc, RRR_T);
+                if (gen_check_sr(dc, RSR_SR, SR_R) &&
+                    (RSR_SR < 64 || gen_check_privilege(dc)) &&
+                    gen_window_check1(dc, RRR_T)) {
                     gen_rsr(dc, cpu_R[RRR_T], RSR_SR);
                 }
                 break;
 
             case 1: /*WSR*/
-                if (gen_check_sr(dc, RSR_SR, SR_W)) {
-                    if (RSR_SR >= 64) {
-                        gen_check_privilege(dc);
-                    }
-                    gen_window_check1(dc, RRR_T);
+                if (gen_check_sr(dc, RSR_SR, SR_W) &&
+                    (RSR_SR < 64 || gen_check_privilege(dc)) &&
+                    gen_window_check1(dc, RRR_T)) {
                     gen_wsr(dc, RSR_SR, cpu_R[RRR_T]);
                 }
                 break;
 
             case 2: /*SEXTu*/
                 HAS_OPTION(XTENSA_OPTION_MISC_OP_SEXT);
-                gen_window_check2(dc, RRR_R, RRR_S);
-                {
+                if (gen_window_check2(dc, RRR_R, RRR_S)) {
                     int shift = 24 - RRR_T;
 
                     if (shift == 24) {
@@ -1782,8 +1784,7 @@
 
             case 3: /*CLAMPSu*/
                 HAS_OPTION(XTENSA_OPTION_MISC_OP_CLAMPS);
-                gen_window_check2(dc, RRR_R, RRR_S);
-                {
+                if (gen_window_check2(dc, RRR_R, RRR_S)) {
                     TCGv_i32 tmp1 = tcg_temp_new_i32();
                     TCGv_i32 tmp2 = tcg_temp_new_i32();
                     TCGv_i32 zero = tcg_const_i32(0);
@@ -1808,8 +1809,7 @@
             case 6: /*MINUu*/
             case 7: /*MAXUu*/
                 HAS_OPTION(XTENSA_OPTION_MISC_OP_MINMAX);
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
-                {
+                if (gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
                     static const TCGCond cond[] = {
                         TCG_COND_LE,
                         TCG_COND_GE,
@@ -1826,8 +1826,7 @@
             case 9: /*MOVNEZ*/
             case 10: /*MOVLTZ*/
             case 11: /*MOVGEZ*/
-                gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
-                {
+                if (gen_window_check3(dc, RRR_R, RRR_S, RRR_T)) {
                     static const TCGCond cond[] = {
                         TCG_COND_EQ,
                         TCG_COND_NE,
@@ -1845,8 +1844,7 @@
             case 12: /*MOVFp*/
             case 13: /*MOVTp*/
                 HAS_OPTION(XTENSA_OPTION_BOOLEAN);
-                gen_window_check2(dc, RRR_R, RRR_S);
-                {
+                if (gen_window_check2(dc, RRR_R, RRR_S)) {
                     TCGv_i32 zero = tcg_const_i32(0);
                     TCGv_i32 tmp = tcg_temp_new_i32();
 
@@ -1861,8 +1859,7 @@
                 break;
 
             case 14: /*RUR*/
-                gen_window_check1(dc, RRR_R);
-                {
+                if (gen_window_check1(dc, RRR_R)) {
                     int st = (RRR_S << 4) + RRR_T;
                     if (uregnames[st].name) {
                         tcg_gen_mov_i32(cpu_R[RRR_R], cpu_UR[st]);
@@ -1874,12 +1871,13 @@
                 break;
 
             case 15: /*WUR*/
-                gen_window_check1(dc, RRR_T);
-                if (uregnames[RSR_SR].name) {
-                    gen_wur(RSR_SR, cpu_R[RRR_T]);
-                } else {
-                    qemu_log("WUR %d not implemented, ", RSR_SR);
-                    TBD();
+                if (gen_window_check1(dc, RRR_T)) {
+                    if (uregnames[RSR_SR].name) {
+                        gen_wur(RSR_SR, cpu_R[RRR_T]);
+                    } else {
+                        qemu_log("WUR %d not implemented, ", RSR_SR);
+                        TBD();
+                    }
                 }
                 break;
 
@@ -1888,8 +1886,7 @@
 
         case 4: /*EXTUI*/
         case 5:
-            gen_window_check2(dc, RRR_R, RRR_T);
-            {
+            if (gen_window_check2(dc, RRR_R, RRR_T)) {
                 int shiftimm = RRR_S | ((OP1 & 1) << 4);
                 int maskimm = (1 << (OP2 + 1)) - 1;
 
@@ -1915,9 +1912,8 @@
             case 4: /*SSXf*/
             case 5: /*SSXUf*/
                 HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
-                gen_window_check2(dc, RRR_S, RRR_T);
-                gen_check_cpenable(dc, 0);
-                {
+                if (gen_window_check2(dc, RRR_S, RRR_T) &&
+                    gen_check_cpenable(dc, 0)) {
                     TCGv_i32 addr = tcg_temp_new_i32();
                     tcg_gen_add_i32(addr, cpu_R[RRR_S], cpu_R[RRR_T]);
                     gen_load_store_alignment(dc, 2, addr, false);
@@ -1940,12 +1936,13 @@
             break;
 
         case 9: /*LSC4*/
-            gen_window_check2(dc, RRR_S, RRR_T);
+            if (!gen_window_check2(dc, RRR_S, RRR_T)) {
+                break;
+            }
             switch (OP2) {
             case 0: /*L32E*/
                 HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-                gen_check_privilege(dc);
-                {
+                if (gen_check_privilege(dc)) {
                     TCGv_i32 addr = tcg_temp_new_i32();
                     tcg_gen_addi_i32(addr, cpu_R[RRR_S],
                             (0xffffffc0 | (RRR_R << 2)));
@@ -1956,8 +1953,7 @@
 
             case 4: /*S32E*/
                 HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-                gen_check_privilege(dc);
-                {
+                if (gen_check_privilege(dc)) {
                     TCGv_i32 addr = tcg_temp_new_i32();
                     tcg_gen_addi_i32(addr, cpu_R[RRR_S],
                             (0xffffffc0 | (RRR_R << 2)));
@@ -1976,33 +1972,40 @@
             HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
             switch (OP2) {
             case 0: /*ADD.Sf*/
-                gen_check_cpenable(dc, 0);
-                gen_helper_add_s(cpu_FR[RRR_R], cpu_env,
-                        cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                if (gen_check_cpenable(dc, 0)) {
+                    gen_helper_add_s(cpu_FR[RRR_R], cpu_env,
+                                     cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                }
                 break;
 
             case 1: /*SUB.Sf*/
-                gen_check_cpenable(dc, 0);
-                gen_helper_sub_s(cpu_FR[RRR_R], cpu_env,
-                        cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                if (gen_check_cpenable(dc, 0)) {
+                    gen_helper_sub_s(cpu_FR[RRR_R], cpu_env,
+                                     cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                }
                 break;
 
             case 2: /*MUL.Sf*/
-                gen_check_cpenable(dc, 0);
-                gen_helper_mul_s(cpu_FR[RRR_R], cpu_env,
-                        cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                if (gen_check_cpenable(dc, 0)) {
+                    gen_helper_mul_s(cpu_FR[RRR_R], cpu_env,
+                                     cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                }
                 break;
 
             case 4: /*MADD.Sf*/
-                gen_check_cpenable(dc, 0);
-                gen_helper_madd_s(cpu_FR[RRR_R], cpu_env,
-                        cpu_FR[RRR_R], cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                if (gen_check_cpenable(dc, 0)) {
+                    gen_helper_madd_s(cpu_FR[RRR_R], cpu_env,
+                                      cpu_FR[RRR_R], cpu_FR[RRR_S],
+                                      cpu_FR[RRR_T]);
+                }
                 break;
 
             case 5: /*MSUB.Sf*/
-                gen_check_cpenable(dc, 0);
-                gen_helper_msub_s(cpu_FR[RRR_R], cpu_env,
-                        cpu_FR[RRR_R], cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                if (gen_check_cpenable(dc, 0)) {
+                    gen_helper_msub_s(cpu_FR[RRR_R], cpu_env,
+                                      cpu_FR[RRR_R], cpu_FR[RRR_S],
+                                      cpu_FR[RRR_T]);
+                }
                 break;
 
             case 8: /*ROUND.Sf*/
@@ -2010,9 +2013,8 @@
             case 10: /*FLOOR.Sf*/
             case 11: /*CEIL.Sf*/
             case 14: /*UTRUNC.Sf*/
-                gen_window_check1(dc, RRR_R);
-                gen_check_cpenable(dc, 0);
-                {
+                if (gen_window_check1(dc, RRR_R) &&
+                    gen_check_cpenable(dc, 0)) {
                     static const unsigned rounding_mode_const[] = {
                         float_round_nearest_even,
                         float_round_to_zero,
@@ -2039,9 +2041,8 @@
 
             case 12: /*FLOAT.Sf*/
             case 13: /*UFLOAT.Sf*/
-                gen_window_check1(dc, RRR_S);
-                gen_check_cpenable(dc, 0);
-                {
+                if (gen_window_check1(dc, RRR_S) &&
+                    gen_check_cpenable(dc, 0)) {
                     TCGv_i32 scale = tcg_const_i32(-RRR_T);
 
                     if (OP2 == 13) {
@@ -2058,30 +2059,35 @@
             case 15: /*FP1OP*/
                 switch (RRR_T) {
                 case 0: /*MOV.Sf*/
-                    gen_check_cpenable(dc, 0);
-                    tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    if (gen_check_cpenable(dc, 0)) {
+                        tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    }
                     break;
 
                 case 1: /*ABS.Sf*/
-                    gen_check_cpenable(dc, 0);
-                    gen_helper_abs_s(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    if (gen_check_cpenable(dc, 0)) {
+                        gen_helper_abs_s(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    }
                     break;
 
                 case 4: /*RFRf*/
-                    gen_window_check1(dc, RRR_R);
-                    gen_check_cpenable(dc, 0);
-                    tcg_gen_mov_i32(cpu_R[RRR_R], cpu_FR[RRR_S]);
+                    if (gen_window_check1(dc, RRR_R) &&
+                        gen_check_cpenable(dc, 0)) {
+                        tcg_gen_mov_i32(cpu_R[RRR_R], cpu_FR[RRR_S]);
+                    }
                     break;
 
                 case 5: /*WFRf*/
-                    gen_window_check1(dc, RRR_S);
-                    gen_check_cpenable(dc, 0);
-                    tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_R[RRR_S]);
+                    if (gen_window_check1(dc, RRR_S) &&
+                        gen_check_cpenable(dc, 0)) {
+                        tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_R[RRR_S]);
+                    }
                     break;
 
                 case 6: /*NEG.Sf*/
-                    gen_check_cpenable(dc, 0);
-                    gen_helper_neg_s(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    if (gen_check_cpenable(dc, 0)) {
+                        gen_helper_neg_s(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    }
                     break;
 
                 default: /*reserved*/
@@ -2101,11 +2107,12 @@
 
 #define gen_compare(rel, br, a, b) \
     do { \
-        TCGv_i32 bit = tcg_const_i32(1 << br); \
-        \
-        gen_check_cpenable(dc, 0); \
-        gen_helper_##rel(cpu_env, bit, cpu_FR[a], cpu_FR[b]); \
-        tcg_temp_free(bit); \
+        if (gen_check_cpenable(dc, 0)) { \
+            TCGv_i32 bit = tcg_const_i32(1 << br); \
+            \
+            gen_helper_##rel(cpu_env, bit, cpu_FR[a], cpu_FR[b]); \
+            tcg_temp_free(bit); \
+        } \
     } while (0)
 
             switch (OP2) {
@@ -2143,9 +2150,8 @@
             case 9: /*MOVNEZ.Sf*/
             case 10: /*MOVLTZ.Sf*/
             case 11: /*MOVGEZ.Sf*/
-                gen_window_check1(dc, RRR_T);
-                gen_check_cpenable(dc, 0);
-                {
+                if (gen_window_check1(dc, RRR_T) &&
+                    gen_check_cpenable(dc, 0)) {
                     static const TCGCond cond[] = {
                         TCG_COND_EQ,
                         TCG_COND_NE,
@@ -2163,8 +2169,7 @@
             case 12: /*MOVF.Sf*/
             case 13: /*MOVT.Sf*/
                 HAS_OPTION(XTENSA_OPTION_BOOLEAN);
-                gen_check_cpenable(dc, 0);
-                {
+                if (gen_check_cpenable(dc, 0)) {
                     TCGv_i32 zero = tcg_const_i32(0);
                     TCGv_i32 tmp = tcg_temp_new_i32();
 
@@ -2191,8 +2196,7 @@
         break;
 
     case 1: /*L32R*/
-        gen_window_check1(dc, RRR_T);
-        {
+        if (gen_window_check1(dc, RRR_T)) {
             TCGv_i32 tmp = tcg_const_i32(
                     ((dc->tb->flags & XTENSA_TBFLAG_LITBASE) ?
                      0 : ((dc->pc + 3) & ~3)) +
@@ -2208,14 +2212,16 @@
 
     case 2: /*LSAI*/
 #define gen_load_store(type, shift) do { \
-            TCGv_i32 addr = tcg_temp_new_i32(); \
-            gen_window_check2(dc, RRI8_S, RRI8_T); \
-            tcg_gen_addi_i32(addr, cpu_R[RRI8_S], RRI8_IMM8 << shift); \
-            if (shift) { \
-                gen_load_store_alignment(dc, shift, addr, false); \
+            if (gen_window_check2(dc, RRI8_S, RRI8_T)) { \
+                TCGv_i32 addr = tcg_temp_new_i32(); \
+                \
+                tcg_gen_addi_i32(addr, cpu_R[RRI8_S], RRI8_IMM8 << shift); \
+                if (shift) { \
+                    gen_load_store_alignment(dc, shift, addr, false); \
+                } \
+                tcg_gen_qemu_##type(cpu_R[RRI8_T], addr, dc->cring); \
+                tcg_temp_free(addr); \
             } \
-            tcg_gen_qemu_##type(cpu_R[RRI8_T], addr, dc->cring); \
-            tcg_temp_free(addr); \
         } while (0)
 
         switch (RRI8_R) {
@@ -2244,14 +2250,15 @@
             break;
 
 #define gen_dcache_hit_test(w, shift) do { \
-            TCGv_i32 addr = tcg_temp_new_i32(); \
-            TCGv_i32 res = tcg_temp_new_i32(); \
-            gen_window_check1(dc, RRI##w##_S); \
-            tcg_gen_addi_i32(addr, cpu_R[RRI##w##_S], \
-                             RRI##w##_IMM##w << shift); \
-            tcg_gen_qemu_ld8u(res, addr, dc->cring); \
-            tcg_temp_free(addr); \
-            tcg_temp_free(res); \
+            if (gen_window_check1(dc, RRI##w##_S)) { \
+                TCGv_i32 addr = tcg_temp_new_i32(); \
+                TCGv_i32 res = tcg_temp_new_i32(); \
+                tcg_gen_addi_i32(addr, cpu_R[RRI##w##_S], \
+                                 RRI##w##_IMM##w << shift); \
+                tcg_gen_qemu_ld8u(res, addr, dc->cring); \
+                tcg_temp_free(addr); \
+                tcg_temp_free(res); \
+            } \
         } while (0)
 
 #define gen_dcache_hit_test4() gen_dcache_hit_test(4, 4)
@@ -2288,45 +2295,52 @@
                 break;
 
             case 6: /*DHIc*/
-                gen_check_privilege(dc);
-                gen_dcache_hit_test8();
+                if (gen_check_privilege(dc)) {
+                    gen_dcache_hit_test8();
+                }
                 break;
 
             case 7: /*DIIc*/
-                gen_check_privilege(dc);
-                gen_window_check1(dc, RRI8_S);
+                if (gen_check_privilege(dc)) {
+                    gen_window_check1(dc, RRI8_S);
+                }
                 break;
 
             case 8: /*DCEc*/
                 switch (OP1) {
                 case 0: /*DPFLl*/
                     HAS_OPTION(XTENSA_OPTION_DCACHE_INDEX_LOCK);
-                    gen_check_privilege(dc);
-                    gen_dcache_hit_test4();
+                    if (gen_check_privilege(dc)) {
+                        gen_dcache_hit_test4();
+                    }
                     break;
 
                 case 2: /*DHUl*/
                     HAS_OPTION(XTENSA_OPTION_DCACHE_INDEX_LOCK);
-                    gen_check_privilege(dc);
-                    gen_dcache_hit_test4();
+                    if (gen_check_privilege(dc)) {
+                        gen_dcache_hit_test4();
+                    }
                     break;
 
                 case 3: /*DIUl*/
                     HAS_OPTION(XTENSA_OPTION_DCACHE_INDEX_LOCK);
-                    gen_check_privilege(dc);
-                    gen_window_check1(dc, RRI4_S);
+                    if (gen_check_privilege(dc)) {
+                        gen_window_check1(dc, RRI4_S);
+                    }
                     break;
 
                 case 4: /*DIWBc*/
                     HAS_OPTION(XTENSA_OPTION_DCACHE);
-                    gen_check_privilege(dc);
-                    gen_window_check1(dc, RRI4_S);
+                    if (gen_check_privilege(dc)) {
+                        gen_window_check1(dc, RRI4_S);
+                    }
                     break;
 
                 case 5: /*DIWBIc*/
                     HAS_OPTION(XTENSA_OPTION_DCACHE);
-                    gen_check_privilege(dc);
-                    gen_window_check1(dc, RRI4_S);
+                    if (gen_check_privilege(dc)) {
+                        gen_window_check1(dc, RRI4_S);
+                    }
                     break;
 
                 default: /*reserved*/
@@ -2341,13 +2355,14 @@
 #undef gen_dcache_hit_test8
 
 #define gen_icache_hit_test(w, shift) do { \
-            TCGv_i32 addr = tcg_temp_new_i32(); \
-            gen_window_check1(dc, RRI##w##_S); \
-            tcg_gen_movi_i32(cpu_pc, dc->pc); \
-            tcg_gen_addi_i32(addr, cpu_R[RRI##w##_S], \
-                             RRI##w##_IMM##w << shift); \
-            gen_helper_itlb_hit_test(cpu_env, addr); \
-            tcg_temp_free(addr); \
+            if (gen_window_check1(dc, RRI##w##_S)) { \
+                TCGv_i32 addr = tcg_temp_new_i32(); \
+                tcg_gen_movi_i32(cpu_pc, dc->pc); \
+                tcg_gen_addi_i32(addr, cpu_R[RRI##w##_S], \
+                                 RRI##w##_IMM##w << shift); \
+                gen_helper_itlb_hit_test(cpu_env, addr); \
+                tcg_temp_free(addr); \
+            }\
         } while (0)
 
 #define gen_icache_hit_test4() gen_icache_hit_test(4, 4)
@@ -2362,20 +2377,23 @@
                 switch (OP1) {
                 case 0: /*IPFLl*/
                     HAS_OPTION(XTENSA_OPTION_ICACHE_INDEX_LOCK);
-                    gen_check_privilege(dc);
-                    gen_icache_hit_test4();
+                    if (gen_check_privilege(dc)) {
+                        gen_icache_hit_test4();
+                    }
                     break;
 
                 case 2: /*IHUl*/
                     HAS_OPTION(XTENSA_OPTION_ICACHE_INDEX_LOCK);
-                    gen_check_privilege(dc);
-                    gen_icache_hit_test4();
+                    if (gen_check_privilege(dc)) {
+                        gen_icache_hit_test4();
+                    }
                     break;
 
                 case 3: /*IIUl*/
                     HAS_OPTION(XTENSA_OPTION_ICACHE_INDEX_LOCK);
-                    gen_check_privilege(dc);
-                    gen_window_check1(dc, RRI4_S);
+                    if (gen_check_privilege(dc)) {
+                        gen_window_check1(dc, RRI4_S);
+                    }
                     break;
 
                 default: /*reserved*/
@@ -2391,8 +2409,9 @@
 
             case 15: /*IIIc*/
                 HAS_OPTION(XTENSA_OPTION_ICACHE);
-                gen_check_privilege(dc);
-                gen_window_check1(dc, RRI8_S);
+                if (gen_check_privilege(dc)) {
+                    gen_window_check1(dc, RRI8_S);
+                }
                 break;
 
             default: /*reserved*/
@@ -2411,19 +2430,21 @@
 #undef gen_load_store
 
         case 10: /*MOVI*/
-            gen_window_check1(dc, RRI8_T);
-            tcg_gen_movi_i32(cpu_R[RRI8_T],
-                    RRI8_IMM8 | (RRI8_S << 8) |
-                    ((RRI8_S & 0x8) ? 0xfffff000 : 0));
+            if (gen_window_check1(dc, RRI8_T)) {
+                tcg_gen_movi_i32(cpu_R[RRI8_T],
+                                 RRI8_IMM8 | (RRI8_S << 8) |
+                                 ((RRI8_S & 0x8) ? 0xfffff000 : 0));
+            }
             break;
 
 #define gen_load_store_no_hw_align(type) do { \
-            TCGv_i32 addr = tcg_temp_local_new_i32(); \
-            gen_window_check2(dc, RRI8_S, RRI8_T); \
-            tcg_gen_addi_i32(addr, cpu_R[RRI8_S], RRI8_IMM8 << 2); \
-            gen_load_store_alignment(dc, 2, addr, true); \
-            tcg_gen_qemu_##type(cpu_R[RRI8_T], addr, dc->cring); \
-            tcg_temp_free(addr); \
+            if (gen_window_check2(dc, RRI8_S, RRI8_T)) { \
+                TCGv_i32 addr = tcg_temp_local_new_i32(); \
+                tcg_gen_addi_i32(addr, cpu_R[RRI8_S], RRI8_IMM8 << 2); \
+                gen_load_store_alignment(dc, 2, addr, true); \
+                tcg_gen_qemu_##type(cpu_R[RRI8_T], addr, dc->cring); \
+                tcg_temp_free(addr); \
+            } \
         } while (0)
 
         case 11: /*L32AIy*/
@@ -2432,19 +2453,21 @@
             break;
 
         case 12: /*ADDI*/
-            gen_window_check2(dc, RRI8_S, RRI8_T);
-            tcg_gen_addi_i32(cpu_R[RRI8_T], cpu_R[RRI8_S], RRI8_IMM8_SE);
+            if (gen_window_check2(dc, RRI8_S, RRI8_T)) {
+                tcg_gen_addi_i32(cpu_R[RRI8_T], cpu_R[RRI8_S], RRI8_IMM8_SE);
+            }
             break;
 
         case 13: /*ADDMI*/
-            gen_window_check2(dc, RRI8_S, RRI8_T);
-            tcg_gen_addi_i32(cpu_R[RRI8_T], cpu_R[RRI8_S], RRI8_IMM8_SE << 8);
+            if (gen_window_check2(dc, RRI8_S, RRI8_T)) {
+                tcg_gen_addi_i32(cpu_R[RRI8_T], cpu_R[RRI8_S],
+                                 RRI8_IMM8_SE << 8);
+            }
             break;
 
         case 14: /*S32C1Iy*/
             HAS_OPTION(XTENSA_OPTION_CONDITIONAL_STORE);
-            gen_window_check2(dc, RRI8_S, RRI8_T);
-            {
+            if (gen_window_check2(dc, RRI8_S, RRI8_T)) {
                 int label = gen_new_label();
                 TCGv_i32 tmp = tcg_temp_local_new_i32();
                 TCGv_i32 addr = tcg_temp_local_new_i32();
@@ -2489,9 +2512,8 @@
         case 8: /*LSIUf*/
         case 12: /*SSIUf*/
             HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
-            gen_window_check1(dc, RRI8_S);
-            gen_check_cpenable(dc, 0);
-            {
+            if (gen_window_check1(dc, RRI8_S) &&
+                gen_check_cpenable(dc, 0)) {
                 TCGv_i32 addr = tcg_temp_new_i32();
                 tcg_gen_addi_i32(addr, cpu_R[RRI8_S], RRI8_IMM8 << 2);
                 gen_load_store_alignment(dc, 2, addr, false);
@@ -2555,20 +2577,23 @@
             }
 
             if (op != MAC16_NONE) {
-                if (!is_m1_sr) {
-                    gen_window_check1(dc, RRR_S);
+                if (!is_m1_sr && !gen_window_check1(dc, RRR_S)) {
+                    break;
                 }
-                if (!is_m2_sr) {
-                    gen_window_check1(dc, RRR_T);
+                if (!is_m2_sr && !gen_window_check1(dc, RRR_T)) {
+                    break;
                 }
             }
 
+            if (ld_offset && !gen_window_check1(dc, RRR_S)) {
+                break;
+            }
+
             {
                 TCGv_i32 vaddr = tcg_temp_new_i32();
                 TCGv_i32 mem32 = tcg_temp_new_i32();
 
                 if (ld_offset) {
-                    gen_window_check1(dc, RRR_S);
                     tcg_gen_addi_i32(vaddr, cpu_R[RRR_S], ld_offset);
                     gen_load_store_alignment(dc, 2, vaddr, false);
                     tcg_gen_qemu_ld32u(mem32, vaddr, dc->cring);
@@ -2632,9 +2657,10 @@
         case 2: /*CALL8w*/
         case 3: /*CALL12w*/
             HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-            gen_window_check1(dc, CALL_N << 2);
-            gen_callwi(dc, CALL_N,
-                    (dc->pc & ~3) + (CALL_OFFSET_SE << 2) + 4, 0);
+            if (gen_window_check1(dc, CALL_N << 2)) {
+                gen_callwi(dc, CALL_N,
+                           (dc->pc & ~3) + (CALL_OFFSET_SE << 2) + 4, 0);
+            }
             break;
         }
         break;
@@ -2646,8 +2672,7 @@
             break;
 
         case 1: /*BZ*/
-            gen_window_check1(dc, BRI12_S);
-            {
+            if (gen_window_check1(dc, BRI12_S)) {
                 static const TCGCond cond[] = {
                     TCG_COND_EQ, /*BEQZ*/
                     TCG_COND_NE, /*BNEZ*/
@@ -2661,8 +2686,7 @@
             break;
 
         case 2: /*BI0*/
-            gen_window_check1(dc, BRI8_S);
-            {
+            if (gen_window_check1(dc, BRI8_S)) {
                 static const TCGCond cond[] = {
                     TCG_COND_EQ, /*BEQI*/
                     TCG_COND_NE, /*BNEI*/
@@ -2688,7 +2712,8 @@
                     tcg_temp_free(imm);
                     tcg_temp_free(s);
                     tcg_temp_free(pc);
-                    reset_used_window(dc);
+                    /* This can change tb->flags, so exit tb */
+                    gen_jumpi_check_loop_end(dc, -1);
                 }
                 break;
 
@@ -2711,8 +2736,7 @@
                 case 9: /*LOOPNEZ*/
                 case 10: /*LOOPGTZ*/
                     HAS_OPTION(XTENSA_OPTION_LOOP);
-                    gen_window_check1(dc, RRI8_S);
-                    {
+                    if (gen_window_check1(dc, RRI8_S)) {
                         uint32_t lend = dc->pc + RRI8_IMM8 + 4;
                         TCGv_i32 tmp = tcg_const_i32(lend);
 
@@ -2743,9 +2767,11 @@
 
             case 2: /*BLTUI*/
             case 3: /*BGEUI*/
-                gen_window_check1(dc, BRI8_S);
-                gen_brcondi(dc, BRI8_M == 2 ? TCG_COND_LTU : TCG_COND_GEU,
-                        cpu_R[BRI8_S], B4CONSTU[BRI8_R], 4 + BRI8_IMM8_SE);
+                if (gen_window_check1(dc, BRI8_S)) {
+                    gen_brcondi(dc, BRI8_M == 2 ? TCG_COND_LTU : TCG_COND_GEU,
+                                cpu_R[BRI8_S], B4CONSTU[BRI8_R],
+                                4 + BRI8_IMM8_SE);
+                }
                 break;
             }
             break;
@@ -2759,8 +2785,7 @@
 
             switch (RRI8_R & 7) {
             case 0: /*BNONE*/ /*BANY*/
-                gen_window_check2(dc, RRI8_S, RRI8_T);
-                {
+                if (gen_window_check2(dc, RRI8_S, RRI8_T)) {
                     TCGv_i32 tmp = tcg_temp_new_i32();
                     tcg_gen_and_i32(tmp, cpu_R[RRI8_S], cpu_R[RRI8_T]);
                     gen_brcondi(dc, eq_ne, tmp, 0, 4 + RRI8_IMM8_SE);
@@ -2771,8 +2796,7 @@
             case 1: /*BEQ*/ /*BNE*/
             case 2: /*BLT*/ /*BGE*/
             case 3: /*BLTU*/ /*BGEU*/
-                gen_window_check2(dc, RRI8_S, RRI8_T);
-                {
+                if (gen_window_check2(dc, RRI8_S, RRI8_T)) {
                     static const TCGCond cond[] = {
                         [1] = TCG_COND_EQ,
                         [2] = TCG_COND_LT,
@@ -2787,8 +2811,7 @@
                 break;
 
             case 4: /*BALL*/ /*BNALL*/
-                gen_window_check2(dc, RRI8_S, RRI8_T);
-                {
+                if (gen_window_check2(dc, RRI8_S, RRI8_T)) {
                     TCGv_i32 tmp = tcg_temp_new_i32();
                     tcg_gen_and_i32(tmp, cpu_R[RRI8_S], cpu_R[RRI8_T]);
                     gen_brcond(dc, eq_ne, tmp, cpu_R[RRI8_T],
@@ -2798,8 +2821,7 @@
                 break;
 
             case 5: /*BBC*/ /*BBS*/
-                gen_window_check2(dc, RRI8_S, RRI8_T);
-                {
+                if (gen_window_check2(dc, RRI8_S, RRI8_T)) {
 #ifdef TARGET_WORDS_BIGENDIAN
                     TCGv_i32 bit = tcg_const_i32(0x80000000);
 #else
@@ -2821,8 +2843,7 @@
 
             case 6: /*BBCI*/ /*BBSI*/
             case 7:
-                gen_window_check1(dc, RRI8_S);
-                {
+                if (gen_window_check1(dc, RRI8_S)) {
                     TCGv_i32 tmp = tcg_temp_new_i32();
                     tcg_gen_andi_i32(tmp, cpu_R[RRI8_S],
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -2840,12 +2861,13 @@
         break;
 
 #define gen_narrow_load_store(type) do { \
-            TCGv_i32 addr = tcg_temp_new_i32(); \
-            gen_window_check2(dc, RRRN_S, RRRN_T); \
-            tcg_gen_addi_i32(addr, cpu_R[RRRN_S], RRRN_R << 2); \
-            gen_load_store_alignment(dc, 2, addr, false); \
-            tcg_gen_qemu_##type(cpu_R[RRRN_T], addr, dc->cring); \
-            tcg_temp_free(addr); \
+            if (gen_window_check2(dc, RRRN_S, RRRN_T)) { \
+                TCGv_i32 addr = tcg_temp_new_i32(); \
+                tcg_gen_addi_i32(addr, cpu_R[RRRN_S], RRRN_R << 2); \
+                gen_load_store_alignment(dc, 2, addr, false); \
+                tcg_gen_qemu_##type(cpu_R[RRRN_T], addr, dc->cring); \
+                tcg_temp_free(addr); \
+            } \
         } while (0)
 
     case 8: /*L32I.Nn*/
@@ -2858,17 +2880,22 @@
 #undef gen_narrow_load_store
 
     case 10: /*ADD.Nn*/
-        gen_window_check3(dc, RRRN_R, RRRN_S, RRRN_T);
-        tcg_gen_add_i32(cpu_R[RRRN_R], cpu_R[RRRN_S], cpu_R[RRRN_T]);
+        if (gen_window_check3(dc, RRRN_R, RRRN_S, RRRN_T)) {
+            tcg_gen_add_i32(cpu_R[RRRN_R], cpu_R[RRRN_S], cpu_R[RRRN_T]);
+        }
         break;
 
     case 11: /*ADDI.Nn*/
-        gen_window_check2(dc, RRRN_R, RRRN_S);
-        tcg_gen_addi_i32(cpu_R[RRRN_R], cpu_R[RRRN_S], RRRN_T ? RRRN_T : -1);
+        if (gen_window_check2(dc, RRRN_R, RRRN_S)) {
+            tcg_gen_addi_i32(cpu_R[RRRN_R], cpu_R[RRRN_S],
+                             RRRN_T ? RRRN_T : -1);
+        }
         break;
 
     case 12: /*ST2n*/
-        gen_window_check1(dc, RRRN_S);
+        if (!gen_window_check1(dc, RRRN_S)) {
+            break;
+        }
         if (RRRN_T < 8) { /*MOVI.Nn*/
             tcg_gen_movi_i32(cpu_R[RRRN_S],
                     RRRN_R | (RRRN_T << 4) |
@@ -2884,8 +2911,9 @@
     case 13: /*ST3n*/
         switch (RRRN_R) {
         case 0: /*MOV.Nn*/
-            gen_window_check2(dc, RRRN_S, RRRN_T);
-            tcg_gen_mov_i32(cpu_R[RRRN_T], cpu_R[RRRN_S]);
+            if (gen_window_check2(dc, RRRN_S, RRRN_T)) {
+                tcg_gen_mov_i32(cpu_R[RRRN_T], cpu_R[RRRN_S]);
+            }
             break;
 
         case 15: /*S3*/
@@ -2949,6 +2977,12 @@
 #undef HAS_OPTION
 }
 
+static inline unsigned xtensa_insn_len(CPUXtensaState *env, DisasContext *dc)
+{
+    uint8_t b0 = cpu_ldub_code(env, dc->pc);
+    return xtensa_op0_insn_len(OP0);
+}
+
 static void check_breakpoint(CPUXtensaState *env, DisasContext *dc)
 {
     CPUState *cs = CPU(xtensa_env_get_cpu(env));
@@ -3011,10 +3045,11 @@
     dc.icount = tb->flags & XTENSA_TBFLAG_ICOUNT;
     dc.cpenable = (tb->flags & XTENSA_TBFLAG_CPENABLE_MASK) >>
         XTENSA_TBFLAG_CPENABLE_SHIFT;
+    dc.window = ((tb->flags & XTENSA_TBFLAG_WINDOW_MASK) >>
+                 XTENSA_TBFLAG_WINDOW_SHIFT);
 
     init_litbase(&dc);
     init_sar_tracker(&dc);
-    reset_used_window(&dc);
     if (dc.icount) {
         dc.next_icount = tcg_temp_local_new_i32();
     }
@@ -3081,6 +3116,7 @@
     } while (dc.is_jmp == DISAS_NEXT &&
             insn_count < max_insns &&
             dc.pc < next_page_start &&
+            dc.pc + xtensa_insn_len(env, &dc) <= next_page_start &&
             tcg_ctx.gen_opc_ptr < gen_opc_end);
 
     reset_litbase(&dc);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 7a84b87..6ff8b51 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2401,14 +2401,20 @@
 
 static int64_t tcg_table_op_count[NB_OPS];
 
-static void dump_op_count(void)
+void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
 {
     int i;
 
     for(i = INDEX_op_end; i < NB_OPS; i++) {
-        qemu_log("%s %" PRId64 "\n", tcg_op_defs[i].name, tcg_table_op_count[i]);
+        cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
+                    tcg_table_op_count[i]);
     }
 }
+#else
+void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
+{
+    cpu_fprintf(f, "[TCG profiler not compiled]\n");
+}
 #endif
 
 
@@ -2620,8 +2626,6 @@
                 s->restore_count);
     cpu_fprintf(f, "  avg cycles        %0.1f\n",
                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
-
-    dump_op_count();
 }
 #else
 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 7285f71..944b877 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -610,6 +610,7 @@
 #endif
 
 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf);
+void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf);
 
 #define TCG_CT_ALIAS  0x80
 #define TCG_CT_IALIAS 0x40
diff --git a/tests/Makefile b/tests/Makefile
index 16f0e4c..e4ddb6a 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -48,8 +48,11 @@
 check-unit-y += tests/test-x86-cpuid$(EXESUF)
 # all code tested by test-x86-cpuid is inside topology.h
 gcov-files-test-x86-cpuid-y =
+ifeq ($(CONFIG_SOFTMMU),y)
 check-unit-y += tests/test-xbzrle$(EXESUF)
-gcov-files-test-xbzrle-y = xbzrle.c
+gcov-files-test-xbzrle-y = migration/xbzrle.c
+check-unit-$(CONFIG_POSIX) += tests/test-vmstate$(EXESUF)
+endif
 check-unit-y += tests/test-cutils$(EXESUF)
 gcov-files-test-cutils-y += util/cutils.c
 check-unit-y += tests/test-mul64$(EXESUF)
@@ -61,7 +64,6 @@
 check-unit-$(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) += tests/test-qdev-global-props$(EXESUF)
 check-unit-y += tests/check-qom-interface$(EXESUF)
 gcov-files-check-qom-interface-y = qom/object.c
-check-unit-$(CONFIG_POSIX) += tests/test-vmstate$(EXESUF)
 check-unit-y += tests/test-qemu-opts$(EXESUF)
 gcov-files-test-qemu-opts-y = qom/test-qemu-opts.c
 
@@ -247,7 +249,7 @@
 tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a
 tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o libqemuutil.a libqemustub.a
 tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o
-tests/test-xbzrle$(EXESUF): tests/test-xbzrle.o xbzrle.o page_cache.o libqemuutil.a
+tests/test-xbzrle$(EXESUF): tests/test-xbzrle.o migration/xbzrle.o page_cache.o libqemuutil.a
 tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o
 tests/test-int128$(EXESUF): tests/test-int128.o
 tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
@@ -258,7 +260,8 @@
 	$(test-qapi-obj-y) \
 	libqemuutil.a libqemustub.a
 tests/test-vmstate$(EXESUF): tests/test-vmstate.o \
-	vmstate.o qemu-file.o qemu-file-unix.o \
+	migration/vmstate.o migration/qemu-file.o migration/qemu-file-buf.o \
+        migration/qemu-file-unix.o \
 	libqemuutil.a libqemustub.a
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
diff --git a/tests/tcg/xtensa/test_mmu.S b/tests/tcg/xtensa/test_mmu.S
index 58c5bca..a15316f 100644
--- a/tests/tcg/xtensa/test_mmu.S
+++ b/tests/tcg/xtensa/test_mmu.S
@@ -641,7 +641,7 @@
     witlb   a2, a3
     wdtlb   a2, a3
 
-    movi    a2, 0x00007ffd
+    movi    a2, 0x00007ffc
     movi    a3, 20f
     movi    a4, 21f
     sub     a4, a4, a3
@@ -651,7 +651,7 @@
     addi    a2, a2, 1
     addi    a3, a3, 1
 1:
-    movi    a2, 0x00007ffd
+    movi    a2, 0x00007ffc
     movi    a3, 0x00008000
     /* DTLB: OK, ITLB: OK */
     jx      a2
@@ -668,10 +668,10 @@
     movi    a3, 1
     assert  eq, a2, a3
     rsr     a2, epc1
-    movi    a3, 0x8000
+    movi    a3, 0x7fff
     assert  eq, a2, a3
     rsr     a2, excsave1
-    movi    a3, 0x00007ffd
+    movi    a3, 0x00007ffc
     assert  ne, a2, a3
 
     reset_ps
@@ -680,7 +680,7 @@
     movi    a2, 0x0400000c /* PPN */
     movi    a3, 0x00008000 /* VPN */
     wdtlb   a2, a3
-    movi    a2, 0x00007ffd
+    movi    a2, 0x00007ffc
     movi    a3, 0x00008000
     /* DTLB: FAIL, ITLB: OK */
     jx      a2
@@ -689,10 +689,10 @@
     movi    a3, 28
     assert  eq, a2, a3
     rsr     a2, epc1
-    movi    a3, 0x7ffd
+    movi    a3, 0x7ffc
     assert  eq, a2, a3
     rsr     a2, excsave1
-    movi    a3, 0x00007ffd
+    movi    a3, 0x00007ffc
     assert  eq, a2, a3
 
     reset_ps
@@ -703,7 +703,7 @@
     witlb   a2, a3
     movi    a2, 0x04000003 /* PPN */
     wdtlb   a2, a3
-    movi    a2, 0x00007ffd
+    movi    a2, 0x00007ffc
     movi    a3, 0x00008000
     /* DTLB: OK, ITLB: FAIL */
     jx      a2
@@ -712,10 +712,10 @@
     movi    a3, 20
     assert  eq, a2, a3
     rsr     a2, epc1
-    movi    a3, 0x8000
+    movi    a3, 0x7fff
     assert  eq, a2, a3
     rsr     a2, excsave1
-    movi    a3, 0x00007ffd
+    movi    a3, 0x00007ffc
     assert  ne, a2, a3
 
     reset_ps
@@ -724,7 +724,7 @@
     movi    a2, 0x0400000c /* PPN */
     movi    a3, 0x00008000 /* VPN */
     wdtlb   a2, a3
-    movi    a2, 0x00007ffd
+    movi    a2, 0x00007ffc
     movi    a3, 0x00008000
     /* DTLB: FAIL, ITLB: FAIL */
     jx      a2
@@ -733,10 +733,10 @@
     movi    a3, 28
     assert  eq, a2, a3
     rsr     a2, epc1
-    movi    a3, 0x7ffd
+    movi    a3, 0x7ffc
     assert  eq, a2, a3
     rsr     a2, excsave1
-    movi    a3, 0x00007ffd
+    movi    a3, 0x00007ffc
     assert  eq, a2, a3
 test_end
 
diff --git a/translate-all.c b/translate-all.c
index cf05472..c24cfe8 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -1540,7 +1540,7 @@
        branch.  */
 #if defined(TARGET_MIPS)
     if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
-        env->active_tc.PC -= 4;
+        env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
         cpu->icount_decr.u16.low++;
         env->hflags &= ~MIPS_HFLAG_BMASK;
     }
@@ -1651,6 +1651,11 @@
     tcg_dump_info(f, cpu_fprintf);
 }
 
+void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)
+{
+    tcg_dump_op_count(f, cpu_fprintf);
+}
+
 #else /* CONFIG_USER_ONLY */
 
 void cpu_interrupt(CPUState *cpu, int mask)
diff --git a/ui/spice-core.c b/ui/spice-core.c
index 6467fa4..fe705c1 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -16,7 +16,6 @@
  */
 
 #include <spice.h>
-#include <spice-experimental.h>
 
 #include <netdb.h>
 #include "sysemu/sysemu.h"
@@ -386,10 +385,7 @@
         struct sockaddr *paddr;
         socklen_t plen;
 
-        if (!(item->info->flags & SPICE_CHANNEL_EVENT_FLAG_ADDR_EXT)) {
-            error_report("invalid channel event");
-            return NULL;
-        }
+        assert(item->info->flags & SPICE_CHANNEL_EVENT_FLAG_ADDR_EXT);
 
         chan = g_malloc0(sizeof(*chan));
         chan->value = g_malloc0(sizeof(*chan->value));
@@ -661,10 +657,6 @@
     }
     port = qemu_opt_get_number(opts, "port", 0);
     tls_port = qemu_opt_get_number(opts, "tls-port", 0);
-    if (!port && !tls_port) {
-        error_report("neither port nor tls-port specified for spice");
-        exit(1);
-    }
     if (port < 0 || port > 65535) {
         error_report("spice port is out of range");
         exit(1);
diff --git a/ui/spice-display.c b/ui/spice-display.c
index def7b52..d2e3793 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -207,12 +207,6 @@
         return;
     };
 
-    if (ssd->surface == NULL) {
-        ssd->surface = pixman_image_ref(ssd->ds->image);
-        ssd->mirror  = qemu_pixman_mirror_create(ssd->ds->format,
-                                                 ssd->ds->image);
-    }
-
     for (blk = 0; blk < blocks; blk++) {
         dirty_top[blk] = -1;
     }
@@ -409,7 +403,29 @@
     SimpleSpiceUpdate *update;
     bool need_destroy;
 
-    dprint(1, "%s/%d:\n", __func__, ssd->qxl.id);
+    if (surface && ssd->surface &&
+        surface_width(surface) == pixman_image_get_width(ssd->surface) &&
+        surface_height(surface) == pixman_image_get_height(ssd->surface)) {
+        /* no-resize fast path: just swap backing store */
+        dprint(1, "%s/%d: fast (%dx%d)\n", __func__, ssd->qxl.id,
+               surface_width(surface), surface_height(surface));
+        qemu_mutex_lock(&ssd->lock);
+        ssd->ds = surface;
+        pixman_image_unref(ssd->surface);
+        ssd->surface = pixman_image_ref(ssd->ds->image);
+        qemu_mutex_unlock(&ssd->lock);
+        qemu_spice_display_update(ssd, 0, 0,
+                                  surface_width(surface),
+                                  surface_height(surface));
+        return;
+    }
+
+    /* full mode switch */
+    dprint(1, "%s/%d: full (%dx%d -> %dx%d)\n", __func__, ssd->qxl.id,
+           ssd->surface ? pixman_image_get_width(ssd->surface)  : 0,
+           ssd->surface ? pixman_image_get_height(ssd->surface) : 0,
+           surface ? surface_width(surface)  : 0,
+           surface ? surface_height(surface) : 0);
 
     memset(&ssd->dirty, 0, sizeof(ssd->dirty));
     if (ssd->surface) {
@@ -422,6 +438,9 @@
     qemu_mutex_lock(&ssd->lock);
     need_destroy = (ssd->ds != NULL);
     ssd->ds = surface;
+    ssd->surface = pixman_image_ref(ssd->ds->image);
+    ssd->mirror  = qemu_pixman_mirror_create(ssd->ds->format,
+                                             ssd->ds->image);
     while ((update = QTAILQ_FIRST(&ssd->updates)) != NULL) {
         QTAILQ_REMOVE(&ssd->updates, update, next);
         qemu_spice_destroy_update(ssd, update);
@@ -438,7 +457,7 @@
     ssd->notify++;
 }
 
-void qemu_spice_cursor_refresh_unlocked(SimpleSpiceDisplay *ssd)
+static void qemu_spice_cursor_refresh_unlocked(SimpleSpiceDisplay *ssd)
 {
     if (ssd->cursor) {
         assert(ssd->dcl.con);
@@ -454,6 +473,15 @@
     }
 }
 
+void qemu_spice_cursor_refresh_bh(void *opaque)
+{
+    SimpleSpiceDisplay *ssd = opaque;
+
+    qemu_mutex_lock(&ssd->lock);
+    qemu_spice_cursor_refresh_unlocked(ssd);
+    qemu_mutex_unlock(&ssd->lock);
+}
+
 void qemu_spice_display_refresh(SimpleSpiceDisplay *ssd)
 {
     dprint(3, "%s/%d:\n", __func__, ssd->qxl.id);
@@ -464,7 +492,6 @@
         qemu_spice_create_update(ssd);
         ssd->notify++;
     }
-    qemu_spice_cursor_refresh_unlocked(ssd);
     qemu_mutex_unlock(&ssd->lock);
 
     if (ssd->notify) {