Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

* SCSI fixes from Stefan and Fam
* vhost-scsi fix from Igor and Lu Lina
* a build system fix from Daniel
* two more multi-arch-related patches from Peter C.
* TCG patches from myself and Sergey Fedorov
* RCU improvement from Wen Congyang
* a few more simple cleanups

# gpg: Signature made Fri 14 Aug 2015 22:41:52 BST using RSA key ID 78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  disas: Defeature print_target_address
  hw: fix mask for ColdFire UART command register
  scsi-generic: identify AIO callbacks more clearly
  scsi-disk: identify AIO callbacks more clearly
  scsi: create restart bottom half in the right AioContext
  configure: only add CONFIG_RDMA to config-host.h once
  qemu-nbd: remove unnecessary qemu_notify_event()
  vhost-scsi: Clarify vhost_virtqueue_mask argument
  exec: use macro ROUND_UP for alignment
  rcu: Allow calling rcu_(un)register_thread() during synchronize_rcu()
  exec: drop cpu_can_do_io, just read cpu->can_do_io
  cpu_defs: Simplify CPUTLB padding logic
  cpu-exec: Do not invalidate original TB in cpu_exec_nocache()
  vhost/scsi: call vhost_dev_cleanup() at unrealize() time
  virtio-scsi-test: Add test case for tail unaligned WRITE SAME
  scsi-disk: Fix assertion failure on WRITE SAME
  tests: virtio-scsi: clear unit attention after reset
  scsi-disk: fix cmd.mode field typo
  virtio-scsi: use virtqueue_map_sg() when loading requests

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/configure b/configure
index cd219d8..d06427f 100755
--- a/configure
+++ b/configure
@@ -5600,10 +5600,6 @@
   echo "config-host.h: subdir-pixman" >> $config_host_mak
 fi
 
-if test "$rdma" = "yes" ; then
-echo "CONFIG_RDMA=y" >> $config_host_mak
-fi
-
 if [ "$dtc_internal" = "yes" ]; then
   echo "config-host.h: subdir-dtc" >> $config_host_mak
 fi
diff --git a/cpu-exec.c b/cpu-exec.c
index 75694f3..713540f 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -196,7 +196,7 @@
     }
 #endif /* DEBUG_DISAS */
 
-    cpu->can_do_io = 0;
+    cpu->can_do_io = !use_icount;
     next_tb = tcg_qemu_tb_exec(env, tb_ptr);
     cpu->can_do_io = 1;
     trace_exec_tb_exit((void *) (next_tb & ~TB_EXIT_MASK),
@@ -231,19 +231,15 @@
                              TranslationBlock *orig_tb)
 {
     TranslationBlock *tb;
-    target_ulong pc = orig_tb->pc;
-    target_ulong cs_base = orig_tb->cs_base;
-    uint64_t flags = orig_tb->flags;
 
     /* Should never happen.
        We only end up here when an existing TB is too long.  */
     if (max_cycles > CF_COUNT_MASK)
         max_cycles = CF_COUNT_MASK;
 
-    /* tb_gen_code can flush our orig_tb, invalidate it now */
-    tb_phys_invalidate(orig_tb, -1);
-    tb = tb_gen_code(cpu, pc, cs_base, flags,
+    tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
                      max_cycles | CF_NOCACHE);
+    tb->orig_tb = tcg_ctx.tb_ctx.tb_invalidated_flag ? NULL : orig_tb;
     cpu->current_tb = tb;
     /* execute the generated code */
     trace_exec_tb_nocache(tb, tb->pc);
diff --git a/cpus.c b/cpus.c
index a822ce3..c1e74d9 100644
--- a/cpus.c
+++ b/cpus.c
@@ -145,7 +145,7 @@
 
     icount = timers_state.qemu_icount;
     if (cpu) {
-        if (!cpu_can_do_io(cpu)) {
+        if (!cpu->can_do_io) {
             fprintf(stderr, "Bad icount read\n");
             exit(1);
         }
diff --git a/disas.c b/disas.c
index 69a6066..0ae70c2 100644
--- a/disas.c
+++ b/disas.c
@@ -72,14 +72,6 @@
     (*info->fprintf_func) (info->stream, "0x%" PRIx64, addr);
 }
 
-/* Print address in hex, truncated to the width of a target virtual address. */
-static void
-generic_print_target_address(bfd_vma addr, struct disassemble_info *info)
-{
-    uint64_t mask = ~0ULL >> (64 - TARGET_VIRT_ADDR_SPACE_BITS);
-    generic_print_address(addr & mask, info);
-}
-
 /* Print address in hex, truncated to the width of a host virtual address. */
 static void
 generic_print_host_address(bfd_vma addr, struct disassemble_info *info)
@@ -201,7 +193,7 @@
     s.info.read_memory_func = target_read_memory;
     s.info.buffer_vma = code;
     s.info.buffer_length = size;
-    s.info.print_address_func = generic_print_target_address;
+    s.info.print_address_func = generic_print_address;
 
 #ifdef TARGET_WORDS_BIGENDIAN
     s.info.endian = BFD_ENDIAN_BIG;
@@ -424,7 +416,7 @@
     s.cpu = cpu;
     monitor_disas_is_physical = is_physical;
     s.info.read_memory_func = monitor_read_memory;
-    s.info.print_address_func = generic_print_target_address;
+    s.info.print_address_func = generic_print_address;
 
     s.info.buffer_vma = pc;
 
diff --git a/exec.c b/exec.c
index 0a4a0c5..54cd70a 100644
--- a/exec.c
+++ b/exec.c
@@ -1210,7 +1210,7 @@
     unlink(filename);
     g_free(filename);
 
-    memory = (memory+hpagesize-1) & ~(hpagesize-1);
+    memory = ROUND_UP(memory, hpagesize);
 
     /*
      * ftruncate is not supported by hugetlbfs in older
diff --git a/hw/char/mcf_uart.c b/hw/char/mcf_uart.c
index 98fd44e..cda22ee 100644
--- a/hw/char/mcf_uart.c
+++ b/hw/char/mcf_uart.c
@@ -126,7 +126,7 @@
 static void mcf_do_command(mcf_uart_state *s, uint8_t cmd)
 {
     /* Misc command.  */
-    switch ((cmd >> 4) & 3) {
+    switch ((cmd >> 4) & 7) {
     case 0: /* No-op.  */
         break;
     case 1: /* Reset mode register pointer.  */
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index f0ae462..ffac8f4 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -136,7 +136,8 @@
         return;
     }
     if (!s->bh) {
-        s->bh = qemu_bh_new(scsi_dma_restart_bh, s);
+        AioContext *ctx = blk_get_aio_context(s->conf.blk);
+        s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s);
         qemu_bh_schedule(s->bh);
     }
 }
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 64f0694..bada9a7 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -217,6 +217,8 @@
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    assert(r->req.aiocb == NULL);
+
     if (r->req.io_canceled) {
         scsi_req_cancel_complete(&r->req);
         goto done;
@@ -235,15 +237,10 @@
     scsi_req_unref(&r->req);
 }
 
-static void scsi_dma_complete_noio(void *opaque, int ret)
+static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
 {
-    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
-    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+    assert(r->req.aiocb == NULL);
 
-    if (r->req.aiocb != NULL) {
-        r->req.aiocb = NULL;
-        block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
-    }
     if (r->req.io_canceled) {
         scsi_req_cancel_complete(&r->req);
         goto done;
@@ -271,9 +268,13 @@
 static void scsi_dma_complete(void *opaque, int ret)
 {
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
     assert(r->req.aiocb != NULL);
-    scsi_dma_complete_noio(opaque, ret);
+    r->req.aiocb = NULL;
+
+    block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
+    scsi_dma_complete_noio(r, ret);
 }
 
 static void scsi_read_complete(void * opaque, int ret)
@@ -308,16 +309,13 @@
 }
 
 /* Actually issue a read to the block device.  */
-static void scsi_do_read(void *opaque, int ret)
+static void scsi_do_read(SCSIDiskReq *r, int ret)
 {
-    SCSIDiskReq *r = opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     uint32_t n;
 
-    if (r->req.aiocb != NULL) {
-        r->req.aiocb = NULL;
-        block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
-    }
+    assert (r->req.aiocb == NULL);
+
     if (r->req.io_canceled) {
         scsi_req_cancel_complete(&r->req);
         goto done;
@@ -349,6 +347,18 @@
     scsi_req_unref(&r->req);
 }
 
+static void scsi_do_read_cb(void *opaque, int ret)
+{
+    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+    assert (r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+
+    block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
+    scsi_do_read(opaque, ret);
+}
+
 /* Read more data from scsi device into buffer.  */
 static void scsi_read_data(SCSIRequest *req)
 {
@@ -384,7 +394,7 @@
     if (first && scsi_is_cmd_fua(&r->req.cmd)) {
         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
                          BLOCK_ACCT_FLUSH);
-        r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read, r);
+        r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
     } else {
         scsi_do_read(r, 0);
     }
@@ -399,7 +409,7 @@
  */
 static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
 {
-    bool is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
+    bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk,
                                                    is_read, error);
@@ -430,16 +440,12 @@
     return action != BLOCK_ERROR_ACTION_IGNORE;
 }
 
-static void scsi_write_complete(void * opaque, int ret)
+static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
 {
-    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
-    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     uint32_t n;
 
-    if (r->req.aiocb != NULL) {
-        r->req.aiocb = NULL;
-        block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
-    }
+    assert (r->req.aiocb == NULL);
+
     if (r->req.io_canceled) {
         scsi_req_cancel_complete(&r->req);
         goto done;
@@ -467,6 +473,18 @@
     scsi_req_unref(&r->req);
 }
 
+static void scsi_write_complete(void * opaque, int ret)
+{
+    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+    assert (r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+
+    block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
+    scsi_write_complete_noio(r, ret);
+}
+
 static void scsi_write_data(SCSIRequest *req)
 {
     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
@@ -480,18 +498,18 @@
     scsi_req_ref(&r->req);
     if (r->req.cmd.mode != SCSI_XFER_TO_DEV) {
         DPRINTF("Data transfer direction invalid\n");
-        scsi_write_complete(r, -EINVAL);
+        scsi_write_complete_noio(r, -EINVAL);
         return;
     }
 
     if (!r->req.sg && !r->qiov.size) {
         /* Called for the first time.  Ask the driver to send us more data.  */
         r->started = true;
-        scsi_write_complete(r, 0);
+        scsi_write_complete_noio(r, 0);
         return;
     }
     if (s->tray_open) {
-        scsi_write_complete(r, -ENOMEDIUM);
+        scsi_write_complete_noio(r, -ENOMEDIUM);
         return;
     }
 
@@ -500,7 +518,7 @@
         if (r->req.sg) {
             scsi_dma_complete_noio(r, 0);
         } else {
-            scsi_write_complete(r, 0);
+            scsi_write_complete_noio(r, 0);
         }
         return;
     }
@@ -1557,15 +1575,17 @@
     int count;
 } UnmapCBData;
 
-static void scsi_unmap_complete(void *opaque, int ret)
+static void scsi_unmap_complete(void *opaque, int ret);
+
+static void scsi_unmap_complete_noio(UnmapCBData *data, int ret)
 {
-    UnmapCBData *data = opaque;
     SCSIDiskReq *r = data->r;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     uint64_t sector_num;
     uint32_t nb_sectors;
 
-    r->req.aiocb = NULL;
+    assert(r->req.aiocb == NULL);
+
     if (r->req.io_canceled) {
         scsi_req_cancel_complete(&r->req);
         goto done;
@@ -1601,6 +1621,17 @@
     g_free(data);
 }
 
+static void scsi_unmap_complete(void *opaque, int ret)
+{
+    UnmapCBData *data = opaque;
+    SCSIDiskReq *r = data->r;
+
+    assert(r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+
+    scsi_unmap_complete_noio(data, ret);
+}
+
 static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
@@ -1638,7 +1669,7 @@
 
     /* The matching unref is in scsi_unmap_complete, before data is freed.  */
     scsi_req_ref(&r->req);
-    scsi_unmap_complete(data, 0);
+    scsi_unmap_complete_noio(data, 0);
     return;
 
 invalid_param_len:
@@ -1683,6 +1714,10 @@
     if (data->iov.iov_len) {
         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
                          data->iov.iov_len, BLOCK_ACCT_WRITE);
+        /* blk_aio_write doesn't like the qiov size being different from
+         * nb_sectors, make sure they match.
+         */
+        qemu_iovec_init_external(&data->qiov, &data->iov, 1);
         r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, data->sector,
                                       &data->qiov, data->iov.iov_len / 512,
                                       scsi_write_same_complete, data);
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index e53470f..1b6350b 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -88,12 +88,12 @@
 }
 
 /* Helper function for command completion.  */
-static void scsi_command_complete(void *opaque, int ret)
+static void scsi_command_complete_noio(SCSIGenericReq *r, int ret)
 {
     int status;
-    SCSIGenericReq *r = (SCSIGenericReq *)opaque;
 
-    r->req.aiocb = NULL;
+    assert(r->req.aiocb == NULL);
+
     if (r->req.io_canceled) {
         scsi_req_cancel_complete(&r->req);
         goto done;
@@ -142,6 +142,15 @@
     scsi_req_unref(&r->req);
 }
 
+static void scsi_command_complete(void *opaque, int ret)
+{
+    SCSIGenericReq *r = (SCSIGenericReq *)opaque;
+
+    assert(r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+    scsi_command_complete_noio(r, ret);
+}
+
 static int execute_command(BlockBackend *blk,
                            SCSIGenericReq *r, int direction,
                            BlockCompletionFunc *complete)
@@ -172,33 +181,37 @@
     SCSIDevice *s = r->req.dev;
     int len;
 
+    assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
+
     if (ret || r->req.io_canceled) {
-        scsi_command_complete(r, ret);
+        scsi_command_complete_noio(r, ret);
         return;
     }
+
     len = r->io_header.dxfer_len - r->io_header.resid;
     DPRINTF("Data ready tag=0x%x len=%d\n", r->req.tag, len);
 
     r->len = -1;
     if (len == 0) {
-        scsi_command_complete(r, 0);
-    } else {
-        /* Snoop READ CAPACITY output to set the blocksize.  */
-        if (r->req.cmd.buf[0] == READ_CAPACITY_10 &&
-            (ldl_be_p(&r->buf[0]) != 0xffffffffU || s->max_lba == 0)) {
-            s->blocksize = ldl_be_p(&r->buf[4]);
-            s->max_lba = ldl_be_p(&r->buf[0]) & 0xffffffffULL;
-        } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 &&
-                   (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
-            s->blocksize = ldl_be_p(&r->buf[8]);
-            s->max_lba = ldq_be_p(&r->buf[0]);
-        }
-        blk_set_guest_block_size(s->conf.blk, s->blocksize);
-
-        scsi_req_data(&r->req, len);
-        scsi_req_unref(&r->req);
+        scsi_command_complete_noio(r, 0);
+        return;
     }
+
+    /* Snoop READ CAPACITY output to set the blocksize.  */
+    if (r->req.cmd.buf[0] == READ_CAPACITY_10 &&
+        (ldl_be_p(&r->buf[0]) != 0xffffffffU || s->max_lba == 0)) {
+        s->blocksize = ldl_be_p(&r->buf[4]);
+        s->max_lba = ldl_be_p(&r->buf[0]) & 0xffffffffULL;
+    } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 &&
+               (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
+        s->blocksize = ldl_be_p(&r->buf[8]);
+        s->max_lba = ldq_be_p(&r->buf[0]);
+    }
+    blk_set_guest_block_size(s->conf.blk, s->blocksize);
+
+    scsi_req_data(&r->req, len);
+    scsi_req_unref(&r->req);
 }
 
 /* Read more data from scsi device into buffer.  */
@@ -213,14 +226,14 @@
     /* The request is used as the AIO opaque value, so add a ref.  */
     scsi_req_ref(&r->req);
     if (r->len == -1) {
-        scsi_command_complete(r, 0);
+        scsi_command_complete_noio(r, 0);
         return;
     }
 
     ret = execute_command(s->conf.blk, r, SG_DXFER_FROM_DEV,
                           scsi_read_complete);
     if (ret < 0) {
-        scsi_command_complete(r, ret);
+        scsi_command_complete_noio(r, ret);
     }
 }
 
@@ -230,9 +243,12 @@
     SCSIDevice *s = r->req.dev;
 
     DPRINTF("scsi_write_complete() ret = %d\n", ret);
+
+    assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
+
     if (ret || r->req.io_canceled) {
-        scsi_command_complete(r, ret);
+        scsi_command_complete_noio(r, ret);
         return;
     }
 
@@ -242,7 +258,7 @@
         DPRINTF("block size %d\n", s->blocksize);
     }
 
-    scsi_command_complete(r, ret);
+    scsi_command_complete_noio(r, ret);
 }
 
 /* Write data to a scsi device.  Returns nonzero on failure.
@@ -264,7 +280,7 @@
     scsi_req_ref(&r->req);
     ret = execute_command(s->conf.blk, r, SG_DXFER_TO_DEV, scsi_write_complete);
     if (ret < 0) {
-        scsi_command_complete(r, ret);
+        scsi_command_complete_noio(r, ret);
     }
 }
 
@@ -306,7 +322,7 @@
         ret = execute_command(s->conf.blk, r, SG_DXFER_NONE,
                               scsi_command_complete);
         if (ret < 0) {
-            scsi_command_complete(r, ret);
+            scsi_command_complete_noio(r, ret);
             return 0;
         }
         return 0;
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index a69918b..7eacca9 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -118,7 +118,7 @@
      * enabling/disabling irqfd.
      */
     for (i = 0; i < s->dev.nvqs; i++) {
-        vhost_virtqueue_mask(&s->dev, vdev, i, false);
+        vhost_virtqueue_mask(&s->dev, vdev, s->dev.vq_index + i, false);
     }
 
     return ret;
@@ -277,6 +277,7 @@
     /* This will stop vhost backend. */
     vhost_scsi_set_status(vdev, 0);
 
+    vhost_dev_cleanup(&s->dev);
     g_free(s->dev.vqs);
 
     virtio_scsi_common_unrealize(dev, errp);
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 811c3da..a8bb1c6 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -217,6 +217,11 @@
     assert(req->elem.in_num <= ARRAY_SIZE(req->elem.in_sg));
     assert(req->elem.out_num <= ARRAY_SIZE(req->elem.out_sg));
 
+    virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
+                     req->elem.in_num, 1);
+    virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
+                     req->elem.out_num, 0);
+
     if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size,
                               sizeof(VirtIOSCSICmdResp) + vs->sense_size) < 0) {
         error_report("invalid SCSI request migration data");
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index 98b9cff..5093be2 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -105,17 +105,18 @@
        bit 3                      : indicates that the entry is invalid
        bit 2..0                   : zero
     */
-    target_ulong addr_read;
-    target_ulong addr_write;
-    target_ulong addr_code;
-    /* Addend to virtual address to get host address.  IO accesses
-       use the corresponding iotlb value.  */
-    uintptr_t addend;
-    /* padding to get a power of two size */
-    uint8_t dummy[(1 << CPU_TLB_ENTRY_BITS) -
-                  (sizeof(target_ulong) * 3 +
-                   ((-sizeof(target_ulong) * 3) & (sizeof(uintptr_t) - 1)) +
-                   sizeof(uintptr_t))];
+    union {
+        struct {
+            target_ulong addr_read;
+            target_ulong addr_write;
+            target_ulong addr_code;
+            /* Addend to virtual address to get host address.  IO accesses
+               use the corresponding iotlb value.  */
+            uintptr_t addend;
+        };
+        /* padding to get a power of two size */
+        uint8_t dummy[1 << CPU_TLB_ENTRY_BITS];
+    };
 } CPUTLBEntry;
 
 QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index a6fce04..29775c0 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -155,6 +155,8 @@
     void *tc_ptr;    /* pointer to the translated code */
     /* next matching tb for physical address. */
     struct TranslationBlock *phys_hash_next;
+    /* original tb when cflags has CF_NOCACHE */
+    struct TranslationBlock *orig_tb;
     /* first and second physical page containing code. The lower bit
        of the pointer tells the index in page_next[] */
     struct TranslationBlock *page_next[2];
@@ -344,27 +346,6 @@
 /* cpu-exec.c */
 extern volatile sig_atomic_t exit_request;
 
-/**
- * cpu_can_do_io:
- * @cpu: The CPU for which to check IO.
- *
- * Deterministic execution requires that IO only be performed on the last
- * instruction of a TB so that interrupts take effect immediately.
- *
- * Returns: %true if memory-mapped IO is safe, %false otherwise.
- */
-static inline bool cpu_can_do_io(CPUState *cpu)
-{
-    if (!use_icount) {
-        return true;
-    }
-    /* If not executing code then assume we are ok.  */
-    if (cpu->current_tb == NULL) {
-        return true;
-    }
-    return cpu->can_do_io != 0;
-}
-
 #if !defined(CONFIG_USER_ONLY)
 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new);
 #endif
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 20aabc9..39712ab 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -231,7 +231,9 @@
  * @icount_decr: Number of cycles left, with interrupt flag in high bit.
  * This allows a single read-compare-cbranch-write sequence to test
  * for both decrementer underflow and exceptions.
- * @can_do_io: Nonzero if memory-mapped IO is safe.
+ * @can_do_io: Nonzero if memory-mapped IO is safe. Deterministic execution
+ * requires that IO only be performed on the last instruction of a TB
+ * so that interrupts take effect immediately.
  * @env_ptr: Pointer to subclass-specific CPUArchState field.
  * @current_tb: Currently executing TB.
  * @gdb_regs: Additional GDB registers.
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 5106b80..d9644b2 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -362,7 +362,6 @@
         state = TERMINATE;
     }
     nbd_update_server_fd_handler(server_fd);
-    qemu_notify_event();
     nbd_client_put(client);
 }
 
diff --git a/qom/cpu.c b/qom/cpu.c
index eb9cfec..62f4b5d 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -247,7 +247,7 @@
     cpu->mem_io_vaddr = 0;
     cpu->icount_extra = 0;
     cpu->icount_decr.u32 = 0;
-    cpu->can_do_io = 0;
+    cpu->can_do_io = 1;
     cpu->exception_index = -1;
     memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
 }
diff --git a/softmmu_template.h b/softmmu_template.h
index d42d89d..50dec1c 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -154,7 +154,7 @@
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
     cpu->mem_io_pc = retaddr;
-    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu_can_do_io(cpu)) {
+    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
         cpu_io_recompile(cpu, retaddr);
     }
 
@@ -374,7 +374,7 @@
     MemoryRegion *mr = iotlb_to_region(cpu, physaddr);
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
-    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu_can_do_io(cpu)) {
+    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
         cpu_io_recompile(cpu, retaddr);
     }
 
diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c
index 11ccdd6..66d8491 100644
--- a/tests/virtio-scsi-test.c
+++ b/tests/virtio-scsi-test.c
@@ -13,6 +13,7 @@
 #include "libqtest.h"
 #include "qemu/osdep.h"
 #include <stdio.h>
+#include "block/scsi.h"
 #include "libqos/virtio.h"
 #include "libqos/virtio-pci.h"
 #include "libqos/pci-pc.h"
@@ -71,40 +72,6 @@
     qtest_end();
 }
 
-static QVirtIOSCSI *qvirtio_scsi_pci_init(int slot)
-{
-    QVirtIOSCSI *vs;
-    QVirtioPCIDevice *dev;
-    void *addr;
-    int i;
-
-    vs = g_new0(QVirtIOSCSI, 1);
-    vs->alloc = pc_alloc_init();
-    vs->bus = qpci_init_pc();
-
-    dev = qvirtio_pci_device_find(vs->bus, QVIRTIO_SCSI_DEVICE_ID);
-    vs->dev = (QVirtioDevice *)dev;
-    g_assert(dev != NULL);
-    g_assert_cmphex(vs->dev->device_type, ==, QVIRTIO_SCSI_DEVICE_ID);
-
-    qvirtio_pci_device_enable(dev);
-    qvirtio_reset(&qvirtio_pci, vs->dev);
-    qvirtio_set_acknowledge(&qvirtio_pci, vs->dev);
-    qvirtio_set_driver(&qvirtio_pci, vs->dev);
-
-    addr = dev->addr + QVIRTIO_PCI_DEVICE_SPECIFIC_NO_MSIX;
-    vs->num_queues = qvirtio_config_readl(&qvirtio_pci, vs->dev,
-                                          (uint64_t)(uintptr_t)addr);
-
-    g_assert_cmpint(vs->num_queues, <, MAX_NUM_QUEUES);
-
-    for (i = 0; i < vs->num_queues + 2; i++) {
-        vs->vq[i] = qvirtqueue_setup(&qvirtio_pci, vs->dev, vs->alloc, i);
-    }
-
-    return vs;
-}
-
 static void qvirtio_scsi_pci_free(QVirtIOSCSI *vs)
 {
     int i;
@@ -134,7 +101,8 @@
 static uint8_t virtio_scsi_do_command(QVirtIOSCSI *vs, const uint8_t *cdb,
                                       const uint8_t *data_in,
                                       size_t data_in_len,
-                                      uint8_t *data_out, size_t data_out_len)
+                                      uint8_t *data_out, size_t data_out_len,
+                                      QVirtIOSCSICmdResp *resp_out)
 {
     QVirtQueue *vq;
     QVirtIOSCSICmdReq req = { { 0 } };
@@ -174,6 +142,10 @@
 
     response = readb(resp_addr + offsetof(QVirtIOSCSICmdResp, response));
 
+    if (resp_out) {
+        memread(resp_addr, resp_out, sizeof(*resp_out));
+    }
+
     guest_free(vs->alloc, req_addr);
     guest_free(vs->alloc, resp_addr);
     guest_free(vs->alloc, data_in_addr);
@@ -181,6 +153,52 @@
     return response;
 }
 
+static QVirtIOSCSI *qvirtio_scsi_pci_init(int slot)
+{
+    const uint8_t test_unit_ready_cdb[CDB_SIZE] = {};
+    QVirtIOSCSI *vs;
+    QVirtioPCIDevice *dev;
+    QVirtIOSCSICmdResp resp;
+    void *addr;
+    int i;
+
+    vs = g_new0(QVirtIOSCSI, 1);
+    vs->alloc = pc_alloc_init();
+    vs->bus = qpci_init_pc();
+
+    dev = qvirtio_pci_device_find(vs->bus, QVIRTIO_SCSI_DEVICE_ID);
+    vs->dev = (QVirtioDevice *)dev;
+    g_assert(dev != NULL);
+    g_assert_cmphex(vs->dev->device_type, ==, QVIRTIO_SCSI_DEVICE_ID);
+
+    qvirtio_pci_device_enable(dev);
+    qvirtio_reset(&qvirtio_pci, vs->dev);
+    qvirtio_set_acknowledge(&qvirtio_pci, vs->dev);
+    qvirtio_set_driver(&qvirtio_pci, vs->dev);
+
+    addr = dev->addr + QVIRTIO_PCI_DEVICE_SPECIFIC_NO_MSIX;
+    vs->num_queues = qvirtio_config_readl(&qvirtio_pci, vs->dev,
+                                          (uint64_t)(uintptr_t)addr);
+
+    g_assert_cmpint(vs->num_queues, <, MAX_NUM_QUEUES);
+
+    for (i = 0; i < vs->num_queues + 2; i++) {
+        vs->vq[i] = qvirtqueue_setup(&qvirtio_pci, vs->dev, vs->alloc, i);
+    }
+
+    /* Clear the POWER ON OCCURRED unit attention */
+    g_assert_cmpint(virtio_scsi_do_command(vs, test_unit_ready_cdb,
+                                           NULL, 0, NULL, 0, &resp),
+                    ==, 0);
+    g_assert_cmpint(resp.status, ==, CHECK_CONDITION);
+    g_assert_cmpint(resp.sense[0], ==, 0x70); /* Fixed format sense buffer */
+    g_assert_cmpint(resp.sense[2], ==, UNIT_ATTENTION);
+    g_assert_cmpint(resp.sense[12], ==, 0x29); /* POWER ON */
+    g_assert_cmpint(resp.sense[13], ==, 0x00);
+
+    return vs;
+}
+
 /* Tests only initialization so far. TODO: Replace with functional tests */
 static void pci_nop(void)
 {
@@ -221,9 +239,12 @@
 static void test_unaligned_write_same(void)
 {
     QVirtIOSCSI *vs;
-    uint8_t buf[512] = { 0 };
-    const uint8_t write_same_cdb[CDB_SIZE] = { 0x41, 0x00, 0x00, 0x00, 0x00,
+    uint8_t buf1[512] = { 0 };
+    uint8_t buf2[512] = { 1 };
+    const uint8_t write_same_cdb_1[CDB_SIZE] = { 0x41, 0x00, 0x00, 0x00, 0x00,
                                                0x01, 0x00, 0x00, 0x02, 0x00 };
+    const uint8_t write_same_cdb_2[CDB_SIZE] = { 0x41, 0x00, 0x00, 0x00, 0x00,
+                                               0x01, 0x00, 0x33, 0x00, 0x00 };
 
     qvirtio_scsi_start("-drive file=blkdebug::null-co://,if=none,id=dr1"
                        ",format=raw,file.align=4k "
@@ -231,7 +252,10 @@
     vs = qvirtio_scsi_pci_init(PCI_SLOT);
 
     g_assert_cmphex(0, ==,
-        virtio_scsi_do_command(vs, write_same_cdb, NULL, 0, buf, 512));
+        virtio_scsi_do_command(vs, write_same_cdb_1, NULL, 0, buf1, 512, NULL));
+
+    g_assert_cmphex(0, ==,
+        virtio_scsi_do_command(vs, write_same_cdb_2, NULL, 0, buf2, 512, NULL));
 
     qvirtio_scsi_pci_free(vs);
     qvirtio_scsi_stop();
diff --git a/translate-all.c b/translate-all.c
index 60a3d8b..9c46ffa 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -222,6 +222,7 @@
     gen_intermediate_code_pc(env, tb);
 
     if (tb->cflags & CF_USE_ICOUNT) {
+        assert(use_icount);
         /* Reset the cycle counter to the start of the block.  */
         cpu->icount_decr.u16.low += tb->icount;
         /* Clear the IO flag.  */
@@ -1470,7 +1471,7 @@
 
     if (use_icount) {
         cpu->icount_decr.u16.high = 0xffff;
-        if (!cpu_can_do_io(cpu)
+        if (!cpu->can_do_io
             && (mask & ~old_mask) != 0) {
             cpu_abort(cpu, "Raised interrupt while not in I/O function");
         }
@@ -1533,6 +1534,14 @@
     cs_base = tb->cs_base;
     flags = tb->flags;
     tb_phys_invalidate(tb, -1);
+    if (tb->cflags & CF_NOCACHE) {
+        if (tb->orig_tb) {
+            /* Invalidate original TB if this TB was generated in
+             * cpu_exec_nocache() */
+            tb_phys_invalidate(tb->orig_tb, -1);
+        }
+        tb_free(tb);
+    }
     /* FIXME: In theory this could raise an exception.  In practice
        we have already translated the block once so it's probably ok.  */
     tb_gen_code(cpu, pc, cs_base, flags, cflags);
diff --git a/util/rcu.c b/util/rcu.c
index cdcad67..8ba304d 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -47,7 +47,8 @@
 unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
 
 QemuEvent rcu_gp_event;
-static QemuMutex rcu_gp_lock;
+static QemuMutex rcu_registry_lock;
+static QemuMutex rcu_sync_lock;
 
 /*
  * Check whether a quiescent state was crossed between the beginning of
@@ -66,7 +67,7 @@
  */
 __thread struct rcu_reader_data rcu_reader;
 
-/* Protected by rcu_gp_lock.  */
+/* Protected by rcu_registry_lock.  */
 typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
 static ThreadList registry = QLIST_HEAD_INITIALIZER(registry);
 
@@ -114,10 +115,26 @@
             break;
         }
 
-        /* Wait for one thread to report a quiescent state and
-         * try again.
+        /* Wait for one thread to report a quiescent state and try again.
+         * Release rcu_registry_lock, so rcu_(un)register_thread() doesn't
+         * wait too much time.
+         *
+         * rcu_register_thread() may add nodes to &registry; it will not
+         * wake up synchronize_rcu, but that is okay because at least another
+         * thread must exit its RCU read-side critical section before
+         * synchronize_rcu is done.  The next iteration of the loop will
+         * move the new thread's rcu_reader from &registry to &qsreaders,
+         * because rcu_gp_ongoing() will return false.
+         *
+         * rcu_unregister_thread() may remove nodes from &qsreaders instead
+         * of &registry if it runs during qemu_event_wait.  That's okay;
+         * the node then will not be added back to &registry by QLIST_SWAP
+         * below.  The invariant is that the node is part of one list when
+         * rcu_registry_lock is released.
          */
+        qemu_mutex_unlock(&rcu_registry_lock);
         qemu_event_wait(&rcu_gp_event);
+        qemu_mutex_lock(&rcu_registry_lock);
     }
 
     /* put back the reader list in the registry */
@@ -126,7 +143,8 @@
 
 void synchronize_rcu(void)
 {
-    qemu_mutex_lock(&rcu_gp_lock);
+    qemu_mutex_lock(&rcu_sync_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
 
     if (!QLIST_EMPTY(&registry)) {
         /* In either case, the atomic_mb_set below blocks stores that free
@@ -149,7 +167,8 @@
         wait_for_readers();
     }
 
-    qemu_mutex_unlock(&rcu_gp_lock);
+    qemu_mutex_unlock(&rcu_registry_lock);
+    qemu_mutex_unlock(&rcu_sync_lock);
 }
 
 
@@ -273,23 +292,24 @@
 void rcu_register_thread(void)
 {
     assert(rcu_reader.ctr == 0);
-    qemu_mutex_lock(&rcu_gp_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
     QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
-    qemu_mutex_unlock(&rcu_gp_lock);
+    qemu_mutex_unlock(&rcu_registry_lock);
 }
 
 void rcu_unregister_thread(void)
 {
-    qemu_mutex_lock(&rcu_gp_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
     QLIST_REMOVE(&rcu_reader, node);
-    qemu_mutex_unlock(&rcu_gp_lock);
+    qemu_mutex_unlock(&rcu_registry_lock);
 }
 
 static void rcu_init_complete(void)
 {
     QemuThread thread;
 
-    qemu_mutex_init(&rcu_gp_lock);
+    qemu_mutex_init(&rcu_registry_lock);
+    qemu_mutex_init(&rcu_sync_lock);
     qemu_event_init(&rcu_gp_event, true);
 
     qemu_event_init(&rcu_call_ready_event, false);
@@ -306,12 +326,14 @@
 #ifdef CONFIG_POSIX
 static void rcu_init_lock(void)
 {
-    qemu_mutex_lock(&rcu_gp_lock);
+    qemu_mutex_lock(&rcu_sync_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
 }
 
 static void rcu_init_unlock(void)
 {
-    qemu_mutex_unlock(&rcu_gp_lock);
+    qemu_mutex_unlock(&rcu_registry_lock);
+    qemu_mutex_unlock(&rcu_sync_lock);
 }
 #endif