Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging

# gpg: Signature made Mon 12 Oct 2015 08:56:47 BST using RSA key ID 398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  tests: add test cases for netfilter object
  netfilter: add a netbuffer filter
  net/queue: export qemu_net_queue_append_iov
  netfilter: print filter info associate with the netdev
  netfilter: add an API to pass the packet to next filter
  net/queue: introduce NetQueueDeliverFunc
  net: merge qemu_deliver_packet and qemu_deliver_packet_iov
  netfilter: hook packets before net queue send
  init/cleanup of netfilter object
  vl.c: init delayed object after net_init_clients
  vmxnet3: Add support for VMXNET3_CMD_GET_ADAPTIVE_RING_INFO command
  e1000: use alias for default model
  vmxnet3: Support reading IMR registers on bar0
  net/vmxnet3: Refine l2 header validation

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/.dir-locals.el b/.dir-locals.el
new file mode 100644
index 0000000..3ac0cfc
--- /dev/null
+++ b/.dir-locals.el
@@ -0,0 +1,2 @@
+((c-mode . ((c-file-style . "stroustrup")
+	    (indent-tabs-mode . nil))))
diff --git a/.travis.yml b/.travis.yml
index 0ac170b..6ca0260 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -54,15 +54,7 @@
   include:
     # Make check target (we only do this once)
     - env:
-        - TARGETS=alpha-softmmu,arm-softmmu,aarch64-softmmu,cris-softmmu,
-                  i386-softmmu,x86_64-softmmu,m68k-softmmu,microblaze-softmmu,
-                  microblazeel-softmmu,mips-softmmu,mips64-softmmu,
-                  mips64el-softmmu,mipsel-softmmu,or32-softmmu,ppc-softmmu,
-                  ppc64-softmmu,ppcemb-softmmu,s390x-softmmu,sh4-softmmu,
-                  sh4eb-softmmu,sparc-softmmu,sparc64-softmmu,
-                  unicore32-softmmu,unicore32-linux-user,
-                  lm32-softmmu,moxie-softmmu,tricore-softmmu,xtensa-softmmu,
-                  xtensaeb-softmmu
+        - TARGETS=alpha-softmmu,arm-softmmu,aarch64-softmmu,cris-softmmu,i386-softmmu,x86_64-softmmu,m68k-softmmu,microblaze-softmmu,microblazeel-softmmu,mips-softmmu,mips64-softmmu,mips64el-softmmu,mipsel-softmmu,or32-softmmu,ppc-softmmu,ppc64-softmmu,ppcemb-softmmu,s390x-softmmu,sh4-softmmu,sh4eb-softmmu,sparc-softmmu,sparc64-softmmu,unicore32-softmmu,unicore32-linux-user,lm32-softmmu,moxie-softmmu,tricore-softmmu,xtensa-softmmu,xtensaeb-softmmu
           TEST_CMD="make check"
       compiler: gcc
     # Debug related options
diff --git a/MAINTAINERS b/MAINTAINERS
index 7603ea2..9bde832 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -275,6 +275,7 @@
 M: Stefan Weil <sw@weilnetz.de>
 S: Maintained
 F: *win32*
+F: qemu.nsi
 
 ARM Machines
 ------------
diff --git a/block/ssh.c b/block/ssh.c
index 8d06739..d35b51f 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -193,7 +193,7 @@
 static int parse_uri(const char *filename, QDict *options, Error **errp)
 {
     URI *uri = NULL;
-    QueryParams *qp = NULL;
+    QueryParams *qp;
     int i;
 
     uri = uri_parse(filename);
@@ -249,9 +249,6 @@
     return 0;
 
  err:
-    if (qp) {
-      query_params_free(qp);
-    }
     if (uri) {
       uri_free(uri);
     }
diff --git a/bsd-user/main.c b/bsd-user/main.c
index f0a1268..adf2de0 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -108,7 +108,7 @@
 
 uint64_t cpu_get_tsc(CPUX86State *env)
 {
-    return cpu_get_real_ticks();
+    return cpu_get_host_ticks();
 }
 
 static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
diff --git a/configure b/configure
index f14454e..f08327e 100755
--- a/configure
+++ b/configure
@@ -328,9 +328,11 @@
 archipelago="no"
 gtk=""
 gtkabi=""
+gtk_gl="no"
 gnutls=""
 gnutls_hash=""
 vte=""
+virglrenderer=""
 tpm="yes"
 libssh2=""
 vhdx=""
@@ -1122,6 +1124,10 @@
   ;;
   --enable-vte) vte="yes"
   ;;
+  --disable-virglrenderer) virglrenderer="no"
+  ;;
+  --enable-virglrenderer) virglrenderer="yes"
+  ;;
   --disable-tpm) tpm="no"
   ;;
   --enable-tpm) tpm="yes"
@@ -3231,6 +3237,9 @@
     opengl_cflags="$($pkg_config --cflags $opengl_pkgs) $x11_cflags"
     opengl_libs="$($pkg_config --libs $opengl_pkgs) $x11_libs"
     opengl=yes
+    if test "$gtk" = "yes" && $pkg_config --exists "$gtkpackage >= 3.16"; then
+        gtk_gl="yes"
+    fi
   else
     if test "$opengl" = "yes" ; then
       feature_not_found "opengl" "Please install opengl (mesa) devel pkgs: $opengl_pkgs"
@@ -3967,6 +3976,27 @@
 fi
 
 ##########################################
+# virgl renderer probe
+
+if test "$virglrenderer" != "no" ; then
+  cat > $TMPC << EOF
+#include <virglrenderer.h>
+int main(void) { virgl_renderer_poll(); return 0; }
+EOF
+  virgl_cflags=$($pkg_config --cflags virglrenderer 2>/dev/null)
+  virgl_libs=$($pkg_config --libs virglrenderer 2>/dev/null)
+  if $pkg_config virglrenderer >/dev/null 2>&1 && \
+     compile_prog "$virgl_cflags" "$virgl_libs" ; then
+    virglrenderer="yes"
+  else
+    if test "$virglrenderer" = "yes" ; then
+      feature_not_found "virglrenderer"
+    fi
+    virglrenderer="no"
+  fi
+fi
+
+##########################################
 # check if we have fdatasync
 
 fdatasync=no
@@ -4576,6 +4606,7 @@
 echo "pixman            $pixman"
 echo "SDL support       $sdl"
 echo "GTK support       $gtk"
+echo "GTK GL support    $gtk_gl"
 echo "GNUTLS support    $gnutls"
 echo "GNUTLS hash       $gnutls_hash"
 echo "GNUTLS gcrypt     $gnutls_gcrypt"
@@ -4583,6 +4614,7 @@
 echo "libtasn1          $tasn1"
 echo "VTE support       $vte"
 echo "curses support    $curses"
+echo "virgl support     $virglrenderer"
 echo "curl support      $curl"
 echo "mingw32 support   $mingw32"
 echo "Audio drivers     $audio_drv_list"
@@ -4934,6 +4966,9 @@
   echo "CONFIG_GTK=y" >> $config_host_mak
   echo "CONFIG_GTKABI=$gtkabi" >> $config_host_mak
   echo "GTK_CFLAGS=$gtk_cflags" >> $config_host_mak
+  if test "$gtk_gl" = "yes" ; then
+    echo "CONFIG_GTK_GL=y" >> $config_host_mak
+  fi
 fi
 if test "$gnutls" = "yes" ; then
   echo "CONFIG_GNUTLS=y" >> $config_host_mak
@@ -4955,6 +4990,11 @@
   echo "CONFIG_VTE=y" >> $config_host_mak
   echo "VTE_CFLAGS=$vte_cflags" >> $config_host_mak
 fi
+if test "$virglrenderer" = "yes" ; then
+  echo "CONFIG_VIRGL=y" >> $config_host_mak
+  echo "VIRGL_CFLAGS=$virgl_cflags" >> $config_host_mak
+  echo "VIRGL_LIBS=$virgl_libs" >> $config_host_mak
+fi
 if test "$xen" = "yes" ; then
   echo "CONFIG_XEN_BACKEND=y" >> $config_host_mak
   echo "CONFIG_XEN_CTRL_INTERFACE_VERSION=$xen_ctrl_version" >> $config_host_mak
@@ -5457,7 +5497,7 @@
     echo "TARGET_ABI32=y" >> $config_target_mak
   ;;
   s390x)
-    gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml s390-cr.xml"
+    gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml s390-cr.xml s390-virt.xml"
   ;;
   tilegx)
   ;;
diff --git a/cpus.c b/cpus.c
index d44c0ed..d2e9e4f 100644
--- a/cpus.c
+++ b/cpus.c
@@ -199,7 +199,7 @@
 
     ticks = timers_state.cpu_ticks_offset;
     if (timers_state.cpu_ticks_enabled) {
-        ticks += cpu_get_real_ticks();
+        ticks += cpu_get_host_ticks();
     }
 
     if (timers_state.cpu_ticks_prev > ticks) {
@@ -247,7 +247,7 @@
     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
     seqlock_write_lock(&timers_state.vm_clock_seqlock);
     if (!timers_state.cpu_ticks_enabled) {
-        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
+        timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
         timers_state.cpu_clock_offset -= get_clock();
         timers_state.cpu_ticks_enabled = 1;
     }
@@ -263,7 +263,7 @@
     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
     seqlock_write_lock(&timers_state.vm_clock_seqlock);
     if (timers_state.cpu_ticks_enabled) {
-        timers_state.cpu_ticks_offset += cpu_get_real_ticks();
+        timers_state.cpu_ticks_offset += cpu_get_host_ticks();
         timers_state.cpu_clock_offset = cpu_get_clock_locked();
         timers_state.cpu_ticks_enabled = 0;
     }
diff --git a/docs/tracing.txt b/docs/tracing.txt
index 7117c5e..3853a6a 100644
--- a/docs/tracing.txt
+++ b/docs/tracing.txt
@@ -258,11 +258,11 @@
 performed manually after a build in order to change the binary name in the .stp
 probes:
 
-    scripts/tracetool --dtrace --stap \
-                      --binary path/to/qemu-binary \
-                      --target-type system \
-                      --target-name x86_64 \
-                      <trace-events >qemu.stp
+    scripts/tracetool.py --backends=dtrace --format=stap \
+                         --binary path/to/qemu-binary \
+                         --target-type system \
+                         --target-name x86_64 \
+                         <trace-events >qemu.stp
 
 == Trace event properties ==
 
diff --git a/gdb-xml/s390-virt.xml b/gdb-xml/s390-virt.xml
new file mode 100644
index 0000000..e2e9a7a
--- /dev/null
+++ b/gdb-xml/s390-virt.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<!-- Copyright 2015 IBM Corp.
+
+     This work is licensed under the terms of the GNU GPL, version 2 or
+     (at your option) any later version. See the COPYING file in the
+     top-level directory. -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.s390.virt">
+  <reg name="ckc" bitsize="64" type="uint64" group="system"/>
+  <reg name="cputm" bitsize="64" type="uint64" group="system"/>
+  <reg name="last_break" bitsize="64" type="code_ptr" group="system"/>
+  <reg name="prefix" bitsize="64" type="data_ptr" group="system"/>
+  <reg name="pp" bitsize="64" type="uint64" group="system"/>
+  <reg name="pfault_token" bitsize="64" type="uint64" group="system"/>
+  <reg name="pfault_select" bitsize="64" type="uint64" group="system"/>
+  <reg name="pfault_compare" bitsize="64" type="uint64" group="system"/>
+</feature>
diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
index ff249af..43dc0a1 100644
--- a/hw/arm/allwinner-a10.c
+++ b/hw/arm/allwinner-a10.c
@@ -103,6 +103,12 @@
     DeviceClass *dc = DEVICE_CLASS(oc);
 
     dc->realize = aw_a10_realize;
+
+    /*
+     * Reason: creates an ARM CPU, thus use after free(), see
+     * arm_cpu_class_init()
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo aw_a10_type_info = {
diff --git a/hw/arm/digic.c b/hw/arm/digic.c
index ec8c330..90f8190 100644
--- a/hw/arm/digic.c
+++ b/hw/arm/digic.c
@@ -97,6 +97,12 @@
     DeviceClass *dc = DEVICE_CLASS(oc);
 
     dc->realize = digic_realize;
+
+    /*
+     * Reason: creates an ARM CPU, thus use after free(), see
+     * arm_cpu_class_init()
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo digic_type_info = {
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
index 86fde42..e1cadac 100644
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -284,6 +284,12 @@
     DeviceClass *dc = DEVICE_CLASS(oc);
 
     dc->realize = fsl_imx25_realize;
+
+    /*
+     * Reason: creates an ARM CPU, thus use after free(), see
+     * arm_cpu_class_init()
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo fsl_imx25_type_info = {
diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c
index 8e1ed48..53d4473 100644
--- a/hw/arm/fsl-imx31.c
+++ b/hw/arm/fsl-imx31.c
@@ -258,6 +258,12 @@
     DeviceClass *dc = DEVICE_CLASS(oc);
 
     dc->realize = fsl_imx31_realize;
+
+    /*
+     * Reason: creates an ARM CPU, thus use after free(), see
+     * arm_cpu_class_init()
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo fsl_imx31_type_info = {
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index 164260a..79d22d9 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -1958,7 +1958,7 @@
     PXA2xxFIrState *s = PXA2XX_FIR(obj);
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 
-    memory_region_init_io(&s->iomem, NULL, &pxa2xx_fir_ops, s,
+    memory_region_init_io(&s->iomem, obj, &pxa2xx_fir_ops, s,
                           "pxa2xx-fir", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
     sysbus_init_irq(sbd, &s->irq);
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index a9097f9..b36ca3d 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -271,6 +271,12 @@
 
     dc->props = xlnx_zynqmp_props;
     dc->realize = xlnx_zynqmp_realize;
+
+    /*
+     * Reason: creates an ARM CPU, thus use after free(), see
+     * arm_cpu_class_init()
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo xlnx_zynqmp_type_info = {
diff --git a/hw/char/etraxfs_ser.c b/hw/char/etraxfs_ser.c
index 857c136..562021e 100644
--- a/hw/char/etraxfs_ser.c
+++ b/hw/char/etraxfs_ser.c
@@ -182,15 +182,13 @@
 static int serial_can_receive(void *opaque)
 {
     ETRAXSerial *s = opaque;
-    int r;
 
     /* Is the receiver enabled?  */
     if (!(s->regs[RW_REC_CTRL] & (1 << 3))) {
         return 0;
     }
 
-    r = sizeof(s->rx_fifo) - s->rx_fifo_len;
-    return r;
+    return sizeof(s->rx_fifo) - s->rx_fifo_len;
 }
 
 static void serial_event(void *opaque, int event)
diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
index e8f32c4..f0c4c72 100644
--- a/hw/char/imx_serial.c
+++ b/hw/char/imx_serial.c
@@ -66,7 +66,9 @@
     uint32_t flags;
 
     flags = (s->usr1 & s->ucr1) & (USR1_TRDY|USR1_RRDY);
-    if (!(s->ucr1 & UCR1_TXMPTYEN)) {
+    if (s->ucr1 & UCR1_TXMPTYEN) {
+        flags |= (s->uts1 & UTS1_TXEMPTY);
+    } else {
         flags &= ~USR1_TRDY;
     }
 
diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs
index dd8ea76..f0cf431 100644
--- a/hw/display/Makefile.objs
+++ b/hw/display/Makefile.objs
@@ -35,6 +35,10 @@
 
 common-obj-$(CONFIG_QXL) += qxl.o qxl-logger.o qxl-render.o
 
-obj-$(CONFIG_VIRTIO) += virtio-gpu.o
+obj-$(CONFIG_VIRTIO) += virtio-gpu.o virtio-gpu-3d.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio-gpu-pci.o
 obj-$(CONFIG_VIRTIO_VGA) += virtio-vga.o
+virtio-gpu.o-cflags := $(VIRGL_CFLAGS)
+virtio-gpu.o-libs += $(VIRGL_LIBS)
+virtio-gpu-3d.o-cflags := $(VIRGL_CFLAGS)
+virtio-gpu-3d.o-libs += $(VIRGL_LIBS)
diff --git a/hw/display/cg3.c b/hw/display/cg3.c
index d2a0d97..e309fbe 100644
--- a/hw/display/cg3.c
+++ b/hw/display/cg3.c
@@ -280,12 +280,12 @@
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     CG3State *s = CG3(obj);
 
-    memory_region_init_ram(&s->rom, NULL, "cg3.prom", FCODE_MAX_ROM_SIZE,
+    memory_region_init_ram(&s->rom, obj, "cg3.prom", FCODE_MAX_ROM_SIZE,
                            &error_fatal);
     memory_region_set_readonly(&s->rom, true);
     sysbus_init_mmio(sbd, &s->rom);
 
-    memory_region_init_io(&s->reg, NULL, &cg3_reg_ops, s, "cg3.reg",
+    memory_region_init_io(&s->reg, obj, &cg3_reg_ops, s, "cg3.reg",
                           CG3_REG_SIZE);
     sysbus_init_mmio(sbd, &s->reg);
 }
diff --git a/hw/display/tcx.c b/hw/display/tcx.c
index 4635800..bf119bc 100644
--- a/hw/display/tcx.c
+++ b/hw/display/tcx.c
@@ -944,7 +944,7 @@
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     TCXState *s = TCX(obj);
 
-    memory_region_init_ram(&s->rom, NULL, "tcx.prom", FCODE_MAX_ROM_SIZE,
+    memory_region_init_ram(&s->rom, OBJECT(s), "tcx.prom", FCODE_MAX_ROM_SIZE,
                            &error_fatal);
     memory_region_set_readonly(&s->rom, true);
     sysbus_init_mmio(sbd, &s->rom);
diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c
new file mode 100644
index 0000000..28dccfd
--- /dev/null
+++ b/hw/display/virtio-gpu-3d.c
@@ -0,0 +1,598 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qemu/iov.h"
+#include "trace.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-gpu.h"
+
+#ifdef CONFIG_VIRGL
+
+#include "virglrenderer.h"
+
+static struct virgl_renderer_callbacks virtio_gpu_3d_cbs;
+
+static void virgl_cmd_create_resource_2d(VirtIOGPU *g,
+                                         struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_create_2d c2d;
+    struct virgl_renderer_resource_create_args args;
+
+    VIRTIO_GPU_FILL_CMD(c2d);
+    trace_virtio_gpu_cmd_res_create_2d(c2d.resource_id, c2d.format,
+                                       c2d.width, c2d.height);
+
+    args.handle = c2d.resource_id;
+    args.target = 2;
+    args.format = c2d.format;
+    args.bind = (1 << 1);
+    args.width = c2d.width;
+    args.height = c2d.height;
+    args.depth = 1;
+    args.array_size = 1;
+    args.last_level = 0;
+    args.nr_samples = 0;
+    args.flags = VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP;
+    virgl_renderer_resource_create(&args, NULL, 0);
+}
+
+static void virgl_cmd_create_resource_3d(VirtIOGPU *g,
+                                         struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_create_3d c3d;
+    struct virgl_renderer_resource_create_args args;
+
+    VIRTIO_GPU_FILL_CMD(c3d);
+    trace_virtio_gpu_cmd_res_create_3d(c3d.resource_id, c3d.format,
+                                       c3d.width, c3d.height, c3d.depth);
+
+    args.handle = c3d.resource_id;
+    args.target = c3d.target;
+    args.format = c3d.format;
+    args.bind = c3d.bind;
+    args.width = c3d.width;
+    args.height = c3d.height;
+    args.depth = c3d.depth;
+    args.array_size = c3d.array_size;
+    args.last_level = c3d.last_level;
+    args.nr_samples = c3d.nr_samples;
+    args.flags = c3d.flags;
+    virgl_renderer_resource_create(&args, NULL, 0);
+}
+
+static void virgl_cmd_resource_unref(VirtIOGPU *g,
+                                     struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_unref unref;
+
+    VIRTIO_GPU_FILL_CMD(unref);
+    trace_virtio_gpu_cmd_res_unref(unref.resource_id);
+
+    virgl_renderer_resource_unref(unref.resource_id);
+}
+
+static void virgl_cmd_context_create(VirtIOGPU *g,
+                                     struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_create cc;
+
+    VIRTIO_GPU_FILL_CMD(cc);
+    trace_virtio_gpu_cmd_ctx_create(cc.hdr.ctx_id,
+                                    cc.debug_name);
+
+    virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen,
+                                  cc.debug_name);
+}
+
+static void virgl_cmd_context_destroy(VirtIOGPU *g,
+                                      struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_destroy cd;
+
+    VIRTIO_GPU_FILL_CMD(cd);
+    trace_virtio_gpu_cmd_ctx_destroy(cd.hdr.ctx_id);
+
+    virgl_renderer_context_destroy(cd.hdr.ctx_id);
+}
+
+static void virtio_gpu_rect_update(VirtIOGPU *g, int idx, int x, int y,
+                                int width, int height)
+{
+    if (!g->scanout[idx].con) {
+        return;
+    }
+
+    dpy_gl_update(g->scanout[idx].con, x, y, width, height);
+}
+
+static void virgl_cmd_resource_flush(VirtIOGPU *g,
+                                     struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_flush rf;
+    int i;
+
+    VIRTIO_GPU_FILL_CMD(rf);
+    trace_virtio_gpu_cmd_res_flush(rf.resource_id,
+                                   rf.r.width, rf.r.height, rf.r.x, rf.r.y);
+
+    for (i = 0; i < VIRTIO_GPU_MAX_SCANOUT; i++) {
+        if (g->scanout[i].resource_id != rf.resource_id) {
+            continue;
+        }
+        virtio_gpu_rect_update(g, i, rf.r.x, rf.r.y, rf.r.width, rf.r.height);
+    }
+}
+
+static void virgl_cmd_set_scanout(VirtIOGPU *g,
+                                  struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_set_scanout ss;
+    struct virgl_renderer_resource_info info;
+    int ret;
+
+    VIRTIO_GPU_FILL_CMD(ss);
+    trace_virtio_gpu_cmd_set_scanout(ss.scanout_id, ss.resource_id,
+                                     ss.r.width, ss.r.height, ss.r.x, ss.r.y);
+
+    if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUT) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d",
+                      __func__, ss.scanout_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
+        return;
+    }
+    g->enable = 1;
+
+    memset(&info, 0, sizeof(info));
+
+    if (ss.resource_id && ss.r.width && ss.r.height) {
+        ret = virgl_renderer_resource_get_info(ss.resource_id, &info);
+        if (ret == -1) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: illegal resource specified %d\n",
+                          __func__, ss.resource_id);
+            cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+            return;
+        }
+        qemu_console_resize(g->scanout[ss.scanout_id].con,
+                            ss.r.width, ss.r.height);
+        virgl_renderer_force_ctx_0();
+        dpy_gl_scanout(g->scanout[ss.scanout_id].con, info.tex_id,
+                       info.flags & 1 /* FIXME: Y_0_TOP */,
+                       ss.r.x, ss.r.y, ss.r.width, ss.r.height);
+    } else {
+        if (ss.scanout_id != 0) {
+            dpy_gfx_replace_surface(g->scanout[ss.scanout_id].con, NULL);
+        }
+        dpy_gl_scanout(g->scanout[ss.scanout_id].con, 0, false,
+                       0, 0, 0, 0);
+    }
+    g->scanout[ss.scanout_id].resource_id = ss.resource_id;
+}
+
+static void virgl_cmd_submit_3d(VirtIOGPU *g,
+                                struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_cmd_submit cs;
+    void *buf;
+    size_t s;
+
+    VIRTIO_GPU_FILL_CMD(cs);
+    trace_virtio_gpu_cmd_ctx_submit(cs.hdr.ctx_id, cs.size);
+
+    buf = g_malloc(cs.size);
+    s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
+                   sizeof(cs), buf, cs.size);
+    if (s != cs.size) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: size mismatch (%zd/%d)",
+                      __func__, s, cs.size);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    if (virtio_gpu_stats_enabled(g->conf)) {
+        g->stats.req_3d++;
+        g->stats.bytes_3d += cs.size;
+    }
+
+    virgl_renderer_submit_cmd(buf, cs.hdr.ctx_id, cs.size / 4);
+
+    g_free(buf);
+}
+
+static void virgl_cmd_transfer_to_host_2d(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_transfer_to_host_2d t2d;
+    struct virtio_gpu_box box;
+
+    VIRTIO_GPU_FILL_CMD(t2d);
+    trace_virtio_gpu_cmd_res_xfer_toh_2d(t2d.resource_id);
+
+    box.x = t2d.r.x;
+    box.y = t2d.r.y;
+    box.z = 0;
+    box.w = t2d.r.width;
+    box.h = t2d.r.height;
+    box.d = 1;
+
+    virgl_renderer_transfer_write_iov(t2d.resource_id,
+                                      0,
+                                      0,
+                                      0,
+                                      0,
+                                      (struct virgl_box *)&box,
+                                      t2d.offset, NULL, 0);
+}
+
+static void virgl_cmd_transfer_to_host_3d(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_transfer_host_3d t3d;
+
+    VIRTIO_GPU_FILL_CMD(t3d);
+    trace_virtio_gpu_cmd_res_xfer_toh_3d(t3d.resource_id);
+
+    virgl_renderer_transfer_write_iov(t3d.resource_id,
+                                      t3d.hdr.ctx_id,
+                                      t3d.level,
+                                      t3d.stride,
+                                      t3d.layer_stride,
+                                      (struct virgl_box *)&t3d.box,
+                                      t3d.offset, NULL, 0);
+}
+
+static void
+virgl_cmd_transfer_from_host_3d(VirtIOGPU *g,
+                                struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_transfer_host_3d tf3d;
+
+    VIRTIO_GPU_FILL_CMD(tf3d);
+    trace_virtio_gpu_cmd_res_xfer_fromh_3d(tf3d.resource_id);
+
+    virgl_renderer_transfer_read_iov(tf3d.resource_id,
+                                     tf3d.hdr.ctx_id,
+                                     tf3d.level,
+                                     tf3d.stride,
+                                     tf3d.layer_stride,
+                                     (struct virgl_box *)&tf3d.box,
+                                     tf3d.offset, NULL, 0);
+}
+
+
+static void virgl_resource_attach_backing(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_attach_backing att_rb;
+    struct iovec *res_iovs;
+    int ret;
+
+    VIRTIO_GPU_FILL_CMD(att_rb);
+    trace_virtio_gpu_cmd_res_back_attach(att_rb.resource_id);
+
+    ret = virtio_gpu_create_mapping_iov(&att_rb, cmd, &res_iovs);
+    if (ret != 0) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        return;
+    }
+
+    virgl_renderer_resource_attach_iov(att_rb.resource_id,
+                                       res_iovs, att_rb.nr_entries);
+}
+
+static void virgl_resource_detach_backing(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_detach_backing detach_rb;
+    struct iovec *res_iovs = NULL;
+    int num_iovs = 0;
+
+    VIRTIO_GPU_FILL_CMD(detach_rb);
+    trace_virtio_gpu_cmd_res_back_detach(detach_rb.resource_id);
+
+    virgl_renderer_resource_detach_iov(detach_rb.resource_id,
+                                       &res_iovs,
+                                       &num_iovs);
+    if (res_iovs == NULL || num_iovs == 0) {
+        return;
+    }
+    virtio_gpu_cleanup_mapping_iov(res_iovs, num_iovs);
+}
+
+
+static void virgl_cmd_ctx_attach_resource(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_resource att_res;
+
+    VIRTIO_GPU_FILL_CMD(att_res);
+    trace_virtio_gpu_cmd_ctx_res_attach(att_res.hdr.ctx_id,
+                                        att_res.resource_id);
+
+    virgl_renderer_ctx_attach_resource(att_res.hdr.ctx_id, att_res.resource_id);
+}
+
+static void virgl_cmd_ctx_detach_resource(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_resource det_res;
+
+    VIRTIO_GPU_FILL_CMD(det_res);
+    trace_virtio_gpu_cmd_ctx_res_detach(det_res.hdr.ctx_id,
+                                        det_res.resource_id);
+
+    virgl_renderer_ctx_detach_resource(det_res.hdr.ctx_id, det_res.resource_id);
+}
+
+static void virgl_cmd_get_capset_info(VirtIOGPU *g,
+                                      struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_get_capset_info info;
+    struct virtio_gpu_resp_capset_info resp;
+
+    VIRTIO_GPU_FILL_CMD(info);
+
+    if (info.capset_index == 0) {
+        resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL;
+        virgl_renderer_get_cap_set(resp.capset_id,
+                                   &resp.capset_max_version,
+                                   &resp.capset_max_size);
+    } else {
+        resp.capset_max_version = 0;
+        resp.capset_max_size = 0;
+    }
+    resp.hdr.type = VIRTIO_GPU_RESP_OK_CAPSET_INFO;
+    virtio_gpu_ctrl_response(g, cmd, &resp.hdr, sizeof(resp));
+}
+
+static void virgl_cmd_get_capset(VirtIOGPU *g,
+                                 struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_get_capset gc;
+    struct virtio_gpu_resp_capset *resp;
+    uint32_t max_ver, max_size;
+    VIRTIO_GPU_FILL_CMD(gc);
+
+    virgl_renderer_get_cap_set(gc.capset_id, &max_ver,
+                               &max_size);
+    resp = g_malloc(sizeof(*resp) + max_size);
+
+    resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET;
+    virgl_renderer_fill_caps(gc.capset_id,
+                             gc.capset_version,
+                             (void *)resp->capset_data);
+    virtio_gpu_ctrl_response(g, cmd, &resp->hdr, sizeof(*resp) + max_size);
+    g_free(resp);
+}
+
+void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
+                                      struct virtio_gpu_ctrl_command *cmd)
+{
+    VIRTIO_GPU_FILL_CMD(cmd->cmd_hdr);
+
+    virgl_renderer_force_ctx_0();
+    switch (cmd->cmd_hdr.type) {
+    case VIRTIO_GPU_CMD_CTX_CREATE:
+        virgl_cmd_context_create(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_CTX_DESTROY:
+        virgl_cmd_context_destroy(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D:
+        virgl_cmd_create_resource_2d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_CREATE_3D:
+        virgl_cmd_create_resource_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_SUBMIT_3D:
+        virgl_cmd_submit_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D:
+        virgl_cmd_transfer_to_host_2d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D:
+        virgl_cmd_transfer_to_host_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D:
+        virgl_cmd_transfer_from_host_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
+        virgl_resource_attach_backing(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
+        virgl_resource_detach_backing(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_SET_SCANOUT:
+        virgl_cmd_set_scanout(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_FLUSH:
+        virgl_cmd_resource_flush(g, cmd);
+       break;
+    case VIRTIO_GPU_CMD_RESOURCE_UNREF:
+        virgl_cmd_resource_unref(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE:
+        /* TODO add security */
+        virgl_cmd_ctx_attach_resource(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE:
+        /* TODO add security */
+        virgl_cmd_ctx_detach_resource(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_GET_CAPSET_INFO:
+        virgl_cmd_get_capset_info(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_GET_CAPSET:
+        virgl_cmd_get_capset(g, cmd);
+        break;
+
+    case VIRTIO_GPU_CMD_GET_DISPLAY_INFO:
+        virtio_gpu_get_display_info(g, cmd);
+        break;
+    default:
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        break;
+    }
+
+    if (cmd->finished) {
+        return;
+    }
+    if (cmd->error) {
+        fprintf(stderr, "%s: ctrl 0x%x, error 0x%x\n", __func__,
+                cmd->cmd_hdr.type, cmd->error);
+        virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error);
+        return;
+    }
+    if (!(cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE)) {
+        virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+        return;
+    }
+
+    trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
+    virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
+}
+
+static void virgl_write_fence(void *opaque, uint32_t fence)
+{
+    VirtIOGPU *g = opaque;
+    struct virtio_gpu_ctrl_command *cmd, *tmp;
+
+    QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) {
+        /*
+	 * the guest can end up emitting fences out of order
+	 * so we should check all fenced cmds not just the first one.
+	 */
+        if (cmd->cmd_hdr.fence_id > fence) {
+            continue;
+        }
+        trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id);
+        virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+        QTAILQ_REMOVE(&g->fenceq, cmd, next);
+        g_free(cmd);
+        g->inflight--;
+        if (virtio_gpu_stats_enabled(g->conf)) {
+            fprintf(stderr, "inflight: %3d (-)\r", g->inflight);
+        }
+    }
+}
+
+static virgl_renderer_gl_context
+virgl_create_context(void *opaque, int scanout_idx,
+                     struct virgl_renderer_gl_ctx_param *params)
+{
+    VirtIOGPU *g = opaque;
+    QEMUGLContext ctx;
+    QEMUGLParams qparams;
+
+    qparams.major_ver = params->major_ver;
+    qparams.minor_ver = params->minor_ver;
+
+    ctx = dpy_gl_ctx_create(g->scanout[scanout_idx].con, &qparams);
+    return (virgl_renderer_gl_context)ctx;
+}
+
+static void virgl_destroy_context(void *opaque, virgl_renderer_gl_context ctx)
+{
+    VirtIOGPU *g = opaque;
+    QEMUGLContext qctx = (QEMUGLContext)ctx;
+
+    dpy_gl_ctx_destroy(g->scanout[0].con, qctx);
+}
+
+static int virgl_make_context_current(void *opaque, int scanout_idx,
+                                      virgl_renderer_gl_context ctx)
+{
+    VirtIOGPU *g = opaque;
+    QEMUGLContext qctx = (QEMUGLContext)ctx;
+
+    return dpy_gl_ctx_make_current(g->scanout[scanout_idx].con, qctx);
+}
+
+static struct virgl_renderer_callbacks virtio_gpu_3d_cbs = {
+    .version             = 1,
+    .write_fence         = virgl_write_fence,
+    .create_gl_context   = virgl_create_context,
+    .destroy_gl_context  = virgl_destroy_context,
+    .make_current        = virgl_make_context_current,
+};
+
+static void virtio_gpu_print_stats(void *opaque)
+{
+    VirtIOGPU *g = opaque;
+
+    if (g->stats.requests) {
+        fprintf(stderr, "stats: vq req %4d, %3d -- 3D %4d (%5d)\n",
+                g->stats.requests,
+                g->stats.max_inflight,
+                g->stats.req_3d,
+                g->stats.bytes_3d);
+        g->stats.requests     = 0;
+        g->stats.max_inflight = 0;
+        g->stats.req_3d       = 0;
+        g->stats.bytes_3d     = 0;
+    } else {
+        fprintf(stderr, "stats: idle\r");
+    }
+    timer_mod(g->print_stats, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
+}
+
+static void virtio_gpu_fence_poll(void *opaque)
+{
+    VirtIOGPU *g = opaque;
+
+    virgl_renderer_poll();
+    if (g->inflight) {
+        timer_mod(g->fence_poll, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 10);
+    }
+}
+
+void virtio_gpu_virgl_fence_poll(VirtIOGPU *g)
+{
+    virtio_gpu_fence_poll(g);
+}
+
+void virtio_gpu_virgl_reset(VirtIOGPU *g)
+{
+    int i;
+
+    /* virgl_renderer_reset() ??? */
+    for (i = 0; i < g->conf.max_outputs; i++) {
+        if (i != 0) {
+            dpy_gfx_replace_surface(g->scanout[i].con, NULL);
+        }
+        dpy_gl_scanout(g->scanout[i].con, 0, false, 0, 0, 0, 0);
+    }
+}
+
+int virtio_gpu_virgl_init(VirtIOGPU *g)
+{
+    int ret;
+
+    ret = virgl_renderer_init(g, 0, &virtio_gpu_3d_cbs);
+    if (ret != 0) {
+        return ret;
+    }
+
+    g->fence_poll = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+                                 virtio_gpu_fence_poll, g);
+
+    if (virtio_gpu_stats_enabled(g->conf)) {
+        g->print_stats = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+                                      virtio_gpu_print_stats, g);
+        timer_mod(g->print_stats, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
+    }
+    return 0;
+}
+
+#endif /* CONFIG_VIRGL */
diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
index 5bc62cf..eef137f 100644
--- a/hw/display/virtio-gpu-pci.c
+++ b/hw/display/virtio-gpu-pci.c
@@ -6,8 +6,8 @@
  * Authors:
  *  Dave Airlie
  *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
  *
  */
 #include "hw/pci/pci.h"
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index a67d927..a836ce3 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -7,7 +7,7 @@
  *     Dave Airlie <airlied@redhat.com>
  *     Gerd Hoffmann <kraxel@redhat.com>
  *
- * This work is licensed under the terms of the GNU GPL, version 2.
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
  */
 
@@ -22,6 +22,23 @@
 static struct virtio_gpu_simple_resource*
 virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id);
 
+#ifdef CONFIG_VIRGL
+#include "virglrenderer.h"
+#define VIRGL(_g, _virgl, _simple, ...)                     \
+    do {                                                    \
+        if (_g->use_virgl_renderer) {                       \
+            _virgl(__VA_ARGS__);                            \
+        } else {                                            \
+            _simple(__VA_ARGS__);                           \
+        }                                                   \
+    } while (0)
+#else
+#define VIRGL(_g, _virgl, _simple, ...)                 \
+    do {                                                \
+        _simple(__VA_ARGS__);                           \
+    } while (0)
+#endif
+
 static void update_cursor_data_simple(VirtIOGPU *g,
                                       struct virtio_gpu_scanout *s,
                                       uint32_t resource_id)
@@ -45,16 +62,49 @@
            pixels * sizeof(uint32_t));
 }
 
+#ifdef CONFIG_VIRGL
+
+static void update_cursor_data_virgl(VirtIOGPU *g,
+                                     struct virtio_gpu_scanout *s,
+                                     uint32_t resource_id)
+{
+    uint32_t width, height;
+    uint32_t pixels, *data;
+
+    data = virgl_renderer_get_cursor_data(resource_id, &width, &height);
+    if (!data) {
+        return;
+    }
+
+    if (width != s->current_cursor->width ||
+        height != s->current_cursor->height) {
+        return;
+    }
+
+    pixels = s->current_cursor->width * s->current_cursor->height;
+    memcpy(s->current_cursor->data, data, pixels * sizeof(uint32_t));
+    free(data);
+}
+
+#endif
+
 static void update_cursor(VirtIOGPU *g, struct virtio_gpu_update_cursor *cursor)
 {
     struct virtio_gpu_scanout *s;
+    bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR;
 
     if (cursor->pos.scanout_id >= g->conf.max_outputs) {
         return;
     }
     s = &g->scanout[cursor->pos.scanout_id];
 
-    if (cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR) {
+    trace_virtio_gpu_update_cursor(cursor->pos.scanout_id,
+                                   cursor->pos.x,
+                                   cursor->pos.y,
+                                   move ? "move" : "update",
+                                   cursor->resource_id);
+
+    if (move) {
         if (!s->current_cursor) {
             s->current_cursor = cursor_alloc(64, 64);
         }
@@ -63,7 +113,8 @@
         s->current_cursor->hot_y = cursor->hot_y;
 
         if (cursor->resource_id > 0) {
-            update_cursor_data_simple(g, s, cursor->resource_id);
+            VIRGL(g, update_cursor_data_virgl, update_cursor_data_simple,
+                  g, s, cursor->resource_id);
         }
         dpy_cursor_define(s->con, s->current_cursor);
     }
@@ -92,9 +143,23 @@
 static uint64_t virtio_gpu_get_features(VirtIODevice *vdev, uint64_t features,
                                         Error **errp)
 {
+    VirtIOGPU *g = VIRTIO_GPU(vdev);
+
+    if (virtio_gpu_virgl_enabled(g->conf)) {
+        features |= (1 << VIRTIO_GPU_FEATURE_VIRGL);
+    }
     return features;
 }
 
+static void virtio_gpu_set_features(VirtIODevice *vdev, uint64_t features)
+{
+    static const uint32_t virgl = (1 << VIRTIO_GPU_FEATURE_VIRGL);
+    VirtIOGPU *g = VIRTIO_GPU(vdev);
+
+    g->use_virgl_renderer = ((features & virgl) == virgl);
+    trace_virtio_gpu_features(g->use_virgl_renderer);
+}
+
 static void virtio_gpu_notify_event(VirtIOGPU *g, uint32_t event_type)
 {
     g->virtio_config.events_read |= event_type;
@@ -563,7 +628,6 @@
                           __func__, ab->resource_id, i);
             virtio_gpu_cleanup_mapping_iov(*iov, i);
             g_free(ents);
-            g_free(*iov);
             *iov = NULL;
             return -1;
         }
@@ -580,12 +644,12 @@
         cpu_physical_memory_unmap(iov[i].iov_base, iov[i].iov_len, 1,
                                   iov[i].iov_len);
     }
+    g_free(iov);
 }
 
 static void virtio_gpu_cleanup_mapping(struct virtio_gpu_simple_resource *res)
 {
     virtio_gpu_cleanup_mapping_iov(res->iov, res->iov_cnt);
-    g_free(res->iov);
     res->iov = NULL;
     res->iov_cnt = 0;
 }
@@ -699,25 +763,43 @@
         return;
     }
 
+#ifdef CONFIG_VIRGL
+    if (!g->renderer_inited && g->use_virgl_renderer) {
+        virtio_gpu_virgl_init(g);
+        g->renderer_inited = true;
+    }
+#endif
+
     cmd = g_new(struct virtio_gpu_ctrl_command, 1);
     while (virtqueue_pop(vq, &cmd->elem)) {
         cmd->vq = vq;
         cmd->error = 0;
         cmd->finished = false;
-        g->stats.requests++;
+        if (virtio_gpu_stats_enabled(g->conf)) {
+            g->stats.requests++;
+        }
 
-        virtio_gpu_simple_process_cmd(g, cmd);
+        VIRGL(g, virtio_gpu_virgl_process_cmd, virtio_gpu_simple_process_cmd,
+              g, cmd);
         if (!cmd->finished) {
             QTAILQ_INSERT_TAIL(&g->fenceq, cmd, next);
-            g->stats.inflight++;
-            if (g->stats.max_inflight < g->stats.inflight) {
-                g->stats.max_inflight = g->stats.inflight;
+            g->inflight++;
+            if (virtio_gpu_stats_enabled(g->conf)) {
+                if (g->stats.max_inflight < g->inflight) {
+                    g->stats.max_inflight = g->inflight;
+                }
+                fprintf(stderr, "inflight: %3d (+)\r", g->inflight);
             }
-            fprintf(stderr, "inflight: %3d (+)\r", g->stats.inflight);
             cmd = g_new(struct virtio_gpu_ctrl_command, 1);
         }
     }
     g_free(cmd);
+
+#ifdef CONFIG_VIRGL
+    if (g->use_virgl_renderer) {
+        virtio_gpu_virgl_fence_poll(g);
+    }
+#endif
 }
 
 static void virtio_gpu_ctrl_bh(void *opaque)
@@ -804,6 +886,7 @@
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
     VirtIOGPU *g = VIRTIO_GPU(qdev);
+    bool have_virgl;
     int i;
 
     g->config_size = sizeof(struct virtio_gpu_config);
@@ -814,8 +897,25 @@
     g->req_state[0].width = 1024;
     g->req_state[0].height = 768;
 
-    g->ctrl_vq   = virtio_add_queue(vdev, 64, virtio_gpu_handle_ctrl_cb);
-    g->cursor_vq = virtio_add_queue(vdev, 16, virtio_gpu_handle_cursor_cb);
+    g->use_virgl_renderer = false;
+#if !defined(CONFIG_VIRGL) || defined(HOST_WORDS_BIGENDIAN)
+    have_virgl = false;
+#else
+    have_virgl = display_opengl;
+#endif
+    if (!have_virgl) {
+        g->conf.flags &= ~(1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED);
+    }
+
+    if (virtio_gpu_virgl_enabled(g->conf)) {
+        /* use larger control queue in 3d mode */
+        g->ctrl_vq   = virtio_add_queue(vdev, 256, virtio_gpu_handle_ctrl_cb);
+        g->cursor_vq = virtio_add_queue(vdev, 16, virtio_gpu_handle_cursor_cb);
+        g->virtio_config.num_capsets = 1;
+    } else {
+        g->ctrl_vq   = virtio_add_queue(vdev, 64, virtio_gpu_handle_ctrl_cb);
+        g->cursor_vq = virtio_add_queue(vdev, 16, virtio_gpu_handle_cursor_cb);
+    }
 
     g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
     g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
@@ -869,10 +969,23 @@
         g->scanout[i].ds = NULL;
     }
     g->enabled_output_bitmask = 1;
+
+#ifdef CONFIG_VIRGL
+    if (g->use_virgl_renderer) {
+        virtio_gpu_virgl_reset(g);
+        g->use_virgl_renderer = 0;
+    }
+#endif
 }
 
 static Property virtio_gpu_properties[] = {
     DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1),
+#ifdef CONFIG_VIRGL
+    DEFINE_PROP_BIT("virgl", VirtIOGPU, conf.flags,
+                    VIRTIO_GPU_FLAG_VIRGL_ENABLED, true),
+    DEFINE_PROP_BIT("stats", VirtIOGPU, conf.flags,
+                    VIRTIO_GPU_FLAG_STATS_ENABLED, false),
+#endif
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -885,6 +998,7 @@
     vdc->get_config = virtio_gpu_get_config;
     vdc->set_config = virtio_gpu_set_config;
     vdc->get_features = virtio_gpu_get_features;
+    vdc->set_features = virtio_gpu_set_features;
 
     vdc->reset = virtio_gpu_reset;
 
@@ -917,3 +1031,14 @@
 QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_attach_backing) != 32);
 QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_detach_backing) != 32);
 QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_display_info)       != 408);
+
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_transfer_host_3d)        != 72);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_create_3d)      != 72);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_create)              != 96);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_destroy)             != 24);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctx_resource)            != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_cmd_submit)              != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_get_capset_info)         != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_capset_info)        != 40);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_get_capset)              != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_capset)             != 24);
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index b1beaa6..44beee3 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -22,7 +22,6 @@
  */
 #include <stdio.h>
 #include <unistd.h>
-#include <sys/io.h>
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 9275297..682867a 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -985,6 +985,10 @@
         setup_size = 4;
     }
     setup_size = (setup_size+1)*512;
+    if (setup_size > kernel_size) {
+        fprintf(stderr, "qemu: invalid kernel header\n");
+        exit(1);
+    }
     kernel_size -= setup_size;
 
     setup  = g_malloc(setup_size);
diff --git a/hw/input/Makefile.objs b/hw/input/Makefile.objs
index 624ba7e..7715d72 100644
--- a/hw/input/Makefile.objs
+++ b/hw/input/Makefile.objs
@@ -8,9 +8,9 @@
 common-obj-$(CONFIG_TSC2005) += tsc2005.o
 common-obj-$(CONFIG_VMMOUSE) += vmmouse.o
 
-ifeq ($(CONFIG_LINUX),y)
 common-obj-$(CONFIG_VIRTIO) += virtio-input.o
 common-obj-$(CONFIG_VIRTIO) += virtio-input-hid.o
+ifeq ($(CONFIG_LINUX),y)
 common-obj-$(CONFIG_VIRTIO) += virtio-input-host.o
 endif
 
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 67881c7..9ff5796 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -848,7 +848,7 @@
     uint32_t xirr = icp_accept(ss);
 
     args[0] = xirr;
-    args[1] = cpu_get_real_ticks();
+    args[1] = cpu_get_host_ticks();
     return H_SUCCESS;
 }
 
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 6cc6ac3..506fe0d 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -417,10 +417,11 @@
         error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
         return;
     }
-    if ((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) {
+    if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) ||
+        (!nb_numa_nodes && dimm->node)) {
         error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
                    PRIu32 "' which exceeds the number of numa nodes: %d",
-                   dimm->node, nb_numa_nodes);
+                   dimm->node, nb_numa_nodes ? nb_numa_nodes : 1);
         return;
     }
 }
diff --git a/hw/misc/arm_integrator_debug.c b/hw/misc/arm_integrator_debug.c
index 99b720f..6d9dd74 100644
--- a/hw/misc/arm_integrator_debug.c
+++ b/hw/misc/arm_integrator_debug.c
@@ -79,7 +79,7 @@
     SysBusDevice *sd = SYS_BUS_DEVICE(obj);
     IntegratorDebugState *s = INTEGRATOR_DEBUG(obj);
 
-    memory_region_init_io(&s->iomem, NULL, &intdbg_control_ops,
+    memory_region_init_io(&s->iomem, obj, &intdbg_control_ops,
                           NULL, "dbg-leds", 0x1000000);
     sysbus_init_mmio(sd, &s->iomem);
 }
diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c
index f3984e3..5d7043e 100644
--- a/hw/misc/macio/cuda.c
+++ b/hw/misc/macio/cuda.c
@@ -713,7 +713,7 @@
     CUDAState *s = CUDA(obj);
     int i;
 
-    memory_region_init_io(&s->mem, NULL, &cuda_ops, s, "cuda", 0x2000);
+    memory_region_init_io(&s->mem, obj, &cuda_ops, s, "cuda", 0x2000);
     sysbus_init_mmio(d, &s->mem);
     sysbus_init_irq(d, &s->irq);
 
diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index e3c0242..c661f86 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -105,10 +105,10 @@
         0xF0, 0xE0,
     };
 
-    memory_region_init(escc_legacy, NULL, "escc-legacy", 256);
+    memory_region_init(escc_legacy, OBJECT(macio_state), "escc-legacy", 256);
     for (i = 0; i < ARRAY_SIZE(maps); i += 2) {
         MemoryRegion *port = g_new(MemoryRegion, 1);
-        memory_region_init_alias(port, NULL, "escc-legacy-port",
+        memory_region_init_alias(port, OBJECT(macio_state), "escc-legacy-port",
                                  macio_state->escc_mem, maps[i+1], 0x2);
         memory_region_add_subregion(escc_legacy, maps[i], port);
     }
@@ -131,6 +131,10 @@
     MacIOState *s = MACIO(d);
     SysBusDevice *sysbus_dev;
     Error *err = NULL;
+    MemoryRegion *dbdma_mem;
+
+    s->dbdma = DBDMA_init(&dbdma_mem);
+    memory_region_add_subregion(&s->bar, 0x08000, dbdma_mem);
 
     object_property_set_bool(OBJECT(&s->cuda), true, "realized", &err);
     if (err) {
@@ -328,16 +332,12 @@
 static void macio_instance_init(Object *obj)
 {
     MacIOState *s = MACIO(obj);
-    MemoryRegion *dbdma_mem;
 
-    memory_region_init(&s->bar, NULL, "macio", 0x80000);
+    memory_region_init(&s->bar, obj, "macio", 0x80000);
 
     object_initialize(&s->cuda, sizeof(s->cuda), TYPE_CUDA);
     qdev_set_parent_bus(DEVICE(&s->cuda), sysbus_get_default());
     object_property_add_child(obj, "cuda", OBJECT(&s->cuda), NULL);
-
-    s->dbdma = DBDMA_init(&dbdma_mem);
-    memory_region_add_subregion(&s->bar, 0x08000, dbdma_mem);
 }
 
 static const VMStateDescription vmstate_macio_oldworld = {
diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c
index 7e7bda4..bb6fdc3 100644
--- a/hw/net/rocker/rocker.c
+++ b/hw/net/rocker/rocker.c
@@ -1362,7 +1362,7 @@
         r->fp_ports = ROCKER_FP_PORTS_MAX;
     }
 
-    r->rings = g_malloc(sizeof(DescRing *) * rocker_pci_ring_count(r));
+    r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
     if (!r->rings) {
         goto err_rings_alloc;
     }
diff --git a/hw/net/rocker/rocker_desc.c b/hw/net/rocker/rocker_desc.c
index b5c0b4a..5e697b1 100644
--- a/hw/net/rocker/rocker_desc.c
+++ b/hw/net/rocker/rocker_desc.c
@@ -142,7 +142,7 @@
     ring->size = size;
     ring->head = ring->tail = 0;
 
-    ring->info = g_realloc(ring->info, size * sizeof(DescInfo));
+    ring->info = g_renew(DescInfo, ring->info, size);
     if (!ring->info) {
         return false;
     }
@@ -345,7 +345,7 @@
 {
     DescRing *ring;
 
-    ring = g_malloc0(sizeof(DescRing));
+    ring = g_new0(DescRing, 1);
     if (!ring) {
         return NULL;
     }
diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c
index c693ae5..5906396 100644
--- a/hw/net/rocker/rocker_fp.c
+++ b/hw/net/rocker/rocker_fp.c
@@ -218,7 +218,7 @@
                       MACAddr *start_mac, unsigned int index,
                       NICPeers *peers)
 {
-    FpPort *port = g_malloc0(sizeof(FpPort));
+    FpPort *port = g_new0(FpPort, 1);
 
     if (!port) {
         return NULL;
diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c
index 874fb01..1ad2791 100644
--- a/hw/net/rocker/rocker_of_dpa.c
+++ b/hw/net/rocker/rocker_of_dpa.c
@@ -367,7 +367,7 @@
     OfDpaFlow *flow;
     int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000;
 
-    flow = g_malloc0(sizeof(OfDpaFlow));
+    flow = g_new0(OfDpaFlow, 1);
     if (!flow) {
         return NULL;
     }
@@ -811,7 +811,7 @@
 
 static OfDpaGroup *of_dpa_group_alloc(uint32_t id)
 {
-    OfDpaGroup *group = g_malloc0(sizeof(OfDpaGroup));
+    OfDpaGroup *group = g_new0(OfDpaGroup, 1);
 
     if (!group) {
         return NULL;
@@ -2039,15 +2039,14 @@
     group->l2_flood.group_count =
         rocker_tlv_get_le16(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_COUNT]);
 
-    tlvs = g_malloc0((group->l2_flood.group_count + 1) *
-                     sizeof(RockerTlv *));
+    tlvs = g_new0(RockerTlv *, group->l2_flood.group_count + 1);
     if (!tlvs) {
         return -ROCKER_ENOMEM;
     }
 
     g_free(group->l2_flood.group_ids);
     group->l2_flood.group_ids =
-        g_malloc0(group->l2_flood.group_count * sizeof(uint32_t));
+        g_new0(uint32_t, group->l2_flood.group_count);
     if (!group->l2_flood.group_ids) {
         err = -ROCKER_ENOMEM;
         goto err_out;
diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c
index 6d23553..7172b90 100644
--- a/hw/pci-host/versatile.c
+++ b/hw/pci-host/versatile.c
@@ -500,6 +500,8 @@
     dc->reset = pci_vpb_reset;
     dc->vmsd = &pci_vpb_vmstate;
     dc->props = pci_vpb_properties;
+    /* Reason: object_unref() hangs */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo pci_vpb_info = {
@@ -521,10 +523,19 @@
     s->mem_win_size[2] = 0x08000000;
 }
 
+static void pci_realview_class_init(ObjectClass *class, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(class);
+
+    /* Reason: object_unref() hangs */
+    dc->cannot_destroy_with_object_finalize_yet = true;
+}
+
 static const TypeInfo pci_realview_info = {
     .name          = "realview_pci",
     .parent        = TYPE_VERSATILE_PCI,
     .instance_init = pci_realview_init,
+    .class_init    = pci_realview_class_init,
 };
 
 static void versatile_pci_register_types(void)
diff --git a/hw/pcmcia/pxa2xx.c b/hw/pcmcia/pxa2xx.c
index a7e1877..812716e 100644
--- a/hw/pcmcia/pxa2xx.c
+++ b/hw/pcmcia/pxa2xx.c
@@ -163,7 +163,7 @@
     sysbus_init_mmio(sbd, &s->container_mem);
 
     /* Socket I/O Memory Space */
-    memory_region_init_io(&s->iomem, NULL, &pxa2xx_pcmcia_io_ops, s,
+    memory_region_init_io(&s->iomem, obj, &pxa2xx_pcmcia_io_ops, s,
                           "pxa2xx-pcmcia-io", 0x04000000);
     memory_region_add_subregion(&s->container_mem, 0x00000000,
                                 &s->iomem);
@@ -171,13 +171,13 @@
     /* Then next 64 MB is reserved */
 
     /* Socket Attribute Memory Space */
-    memory_region_init_io(&s->attr_iomem, NULL, &pxa2xx_pcmcia_attr_ops, s,
+    memory_region_init_io(&s->attr_iomem, obj, &pxa2xx_pcmcia_attr_ops, s,
                           "pxa2xx-pcmcia-attribute", 0x04000000);
     memory_region_add_subregion(&s->container_mem, 0x08000000,
                                 &s->attr_iomem);
 
     /* Socket Common Memory Space */
-    memory_region_init_io(&s->common_iomem, NULL, &pxa2xx_pcmcia_common_ops, s,
+    memory_region_init_io(&s->common_iomem, obj, &pxa2xx_pcmcia_common_ops, s,
                           "pxa2xx-pcmcia-common", 0x04000000);
     memory_region_add_subregion(&s->container_mem, 0x0c000000,
                                 &s->common_iomem);
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index b77e303..2c604ef 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -834,7 +834,7 @@
 static void timebase_pre_save(void *opaque)
 {
     PPCTimebase *tb = opaque;
-    uint64_t ticks = cpu_get_real_ticks();
+    uint64_t ticks = cpu_get_host_ticks();
     PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
 
     if (!first_ppc_cpu->env.tb_env) {
@@ -878,7 +878,7 @@
                                      NANOSECONDS_PER_SECOND);
     guest_tb = tb_remote->guest_timebase + MIN(0, migration_duration_tb);
 
-    tb_off_adj = guest_tb - cpu_get_real_ticks();
+    tb_off_adj = guest_tb - cpu_get_host_ticks();
 
     tb_off = first_ppc_cpu->env.tb_env->tb_offset;
     trace_ppc_tb_adjust(tb_off, tb_off_adj, tb_off_adj - tb_off,
diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c
index ef2a051..907b485 100644
--- a/hw/s390x/event-facility.c
+++ b/hw/s390x/event-facility.c
@@ -27,8 +27,6 @@
 struct SCLPEventFacility {
     SysBusDevice parent_obj;
     SCLPEventsBus sbus;
-    SCLPEvent quiesce_event;
-    SCLPEvent cpu_hotplug_event;
     /* guest' receive mask */
     unsigned int receive_mask;
 };
@@ -347,19 +345,21 @@
 {
     SCLPEventFacility *event_facility = EVENT_FACILITY(obj);
     DeviceState *sdev = DEVICE(obj);
+    Object *new;
 
     /* Spawn a new bus for SCLP events */
     qbus_create_inplace(&event_facility->sbus, sizeof(event_facility->sbus),
                         TYPE_SCLP_EVENTS_BUS, sdev, NULL);
 
-    object_initialize(&event_facility->quiesce_event, sizeof(SCLPEvent),
-                      TYPE_SCLP_QUIESCE);
-    qdev_set_parent_bus(DEVICE(&event_facility->quiesce_event),
-                        &event_facility->sbus.qbus);
-    object_initialize(&event_facility->cpu_hotplug_event, sizeof(SCLPEvent),
-                      TYPE_SCLP_CPU_HOTPLUG);
-    qdev_set_parent_bus(DEVICE(&event_facility->cpu_hotplug_event),
-                        &event_facility->sbus.qbus);
+    new = object_new(TYPE_SCLP_QUIESCE);
+    object_property_add_child(obj, TYPE_SCLP_QUIESCE, new, NULL);
+    object_unref(new);
+    qdev_set_parent_bus(DEVICE(new), &event_facility->sbus.qbus);
+
+    new = object_new(TYPE_SCLP_CPU_HOTPLUG);
+    object_property_add_child(obj, TYPE_SCLP_CPU_HOTPLUG, new, NULL);
+    object_unref(new);
+    qdev_set_parent_bus(DEVICE(new), &event_facility->sbus.qbus);
     /* the facility will automatically realize the devices via the bus */
 }
 
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index c53ebc1..6195f13 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -35,7 +35,7 @@
     bool dea_key_wrap;
 } S390CcwMachineState;
 
-void io_subsystem_reset(void)
+void subsystem_reset(void)
 {
     DeviceState *css, *sclp, *flic, *diag288;
 
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 65304cf..8b0f9f0 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -22,6 +22,7 @@
  * with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <inttypes.h>
 #include "hw/hw.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
@@ -36,24 +37,24 @@
 #define SDHC_DEBUG                        0
 #endif
 
-#if SDHC_DEBUG == 0
-    #define DPRINT_L1(fmt, args...)       do { } while (0)
-    #define DPRINT_L2(fmt, args...)       do { } while (0)
-    #define ERRPRINT(fmt, args...)        do { } while (0)
-#elif SDHC_DEBUG == 1
-    #define DPRINT_L1(fmt, args...)       \
-        do {fprintf(stderr, "QEMU SDHC: "fmt, ## args); } while (0)
-    #define DPRINT_L2(fmt, args...)       do { } while (0)
-    #define ERRPRINT(fmt, args...)        \
-        do {fprintf(stderr, "QEMU SDHC ERROR: "fmt, ## args); } while (0)
-#else
-    #define DPRINT_L1(fmt, args...)       \
-        do {fprintf(stderr, "QEMU SDHC: "fmt, ## args); } while (0)
-    #define DPRINT_L2(fmt, args...)       \
-        do {fprintf(stderr, "QEMU SDHC: "fmt, ## args); } while (0)
-    #define ERRPRINT(fmt, args...)        \
-        do {fprintf(stderr, "QEMU SDHC ERROR: "fmt, ## args); } while (0)
-#endif
+#define DPRINT_L1(fmt, args...) \
+    do { \
+        if (SDHC_DEBUG) { \
+            fprintf(stderr, "QEMU SDHC: " fmt, ## args); \
+        } \
+    } while (0)
+#define DPRINT_L2(fmt, args...) \
+    do { \
+        if (SDHC_DEBUG > 1) { \
+            fprintf(stderr, "QEMU SDHC: " fmt, ## args); \
+        } \
+    } while (0)
+#define ERRPRINT(fmt, args...) \
+    do { \
+        if (SDHC_DEBUG) { \
+            fprintf(stderr, "QEMU SDHC ERROR: " fmt, ## args); \
+        } \
+    } while (0)
 
 /* Default SD/MMC host controller features information, which will be
  * presented in CAPABILITIES register of generic SD host controller at reset.
@@ -719,7 +720,8 @@
             break;
         case SDHC_ADMA_ATTR_ACT_LINK:   /* link to next descriptor table */
             s->admasysaddr = dscr.addr;
-            DPRINT_L1("ADMA link: admasysaddr=0x%lx\n", s->admasysaddr);
+            DPRINT_L1("ADMA link: admasysaddr=0x%" PRIx64 "\n",
+                      s->admasysaddr);
             break;
         default:
             s->admasysaddr += dscr.incr;
@@ -727,7 +729,8 @@
         }
 
         if (dscr.attr & SDHC_ADMA_ATTR_INT) {
-            DPRINT_L1("ADMA interrupt: admasysaddr=0x%lx\n", s->admasysaddr);
+            DPRINT_L1("ADMA interrupt: admasysaddr=0x%" PRIx64 "\n",
+                      s->admasysaddr);
             if (s->norintstsen & SDHC_NISEN_DMA) {
                 s->norintsts |= SDHC_NIS_DMA;
             }
diff --git a/hw/timer/m48t59.c b/hw/timer/m48t59.c
index 8ab683d..b3df8f9 100644
--- a/hw/timer/m48t59.c
+++ b/hw/timer/m48t59.c
@@ -590,10 +590,8 @@
 static uint32_t nvram_readb (void *opaque, hwaddr addr)
 {
     M48t59State *NVRAM = opaque;
-    uint32_t retval;
 
-    retval = m48t59_read(NVRAM, addr);
-    return retval;
+    return m48t59_read(NVRAM, addr);
 }
 
 static uint32_t nvram_readw (void *opaque, hwaddr addr)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 0d341a3..6797208 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -312,11 +312,15 @@
     rcu_read_unlock();
 }
 
+static hwaddr vfio_container_granularity(VFIOContainer *container)
+{
+    return (hwaddr)1 << ctz64(container->iova_pgsizes);
+}
+
 static void vfio_listener_region_add(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
-    VFIOContainer *container = container_of(listener, VFIOContainer,
-                                            iommu_data.type1.listener);
+    VFIOContainer *container = container_of(listener, VFIOContainer, listener);
     hwaddr iova, end;
     Int128 llend;
     void *vaddr;
@@ -344,14 +348,22 @@
     if (int128_ge(int128_make64(iova), llend)) {
         return;
     }
+    end = int128_get64(llend);
+
+    if ((iova < container->min_iova) || ((end - 1) > container->max_iova)) {
+        error_report("vfio: IOMMU container %p can't map guest IOVA region"
+                     " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx,
+                     container, iova, end - 1);
+        ret = -EFAULT;
+        goto fail;
+    }
 
     memory_region_ref(section->mr);
 
     if (memory_region_is_iommu(section->mr)) {
         VFIOGuestIOMMU *giommu;
 
-        trace_vfio_listener_region_add_iommu(iova,
-                    int128_get64(int128_sub(llend, int128_one())));
+        trace_vfio_listener_region_add_iommu(iova, end - 1);
         /*
          * FIXME: We should do some checking to see if the
          * capabilities of the host VFIO IOMMU are adequate to model
@@ -362,33 +374,22 @@
          * would be the right place to wire that up (tell the KVM
          * device emulation the VFIO iommu handles to use).
          */
-        /*
-         * This assumes that the guest IOMMU is empty of
-         * mappings at this point.
-         *
-         * One way of doing this is:
-         * 1. Avoid sharing IOMMUs between emulated devices or different
-         * IOMMU groups.
-         * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if
-         * there are some mappings in IOMMU.
-         *
-         * VFIO on SPAPR does that. Other IOMMU models may do that different,
-         * they must make sure there are no existing mappings or
-         * loop through existing mappings to map them into VFIO.
-         */
         giommu = g_malloc0(sizeof(*giommu));
         giommu->iommu = section->mr;
         giommu->container = container;
         giommu->n.notify = vfio_iommu_map_notify;
         QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
+
         memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
+        memory_region_iommu_replay(giommu->iommu, &giommu->n,
+                                   vfio_container_granularity(container),
+                                   false);
 
         return;
     }
 
     /* Here we assume that memory_region_is_ram(section->mr)==true */
 
-    end = int128_get64(llend);
     vaddr = memory_region_get_ram_ptr(section->mr) +
             section->offset_within_region +
             (iova - section->offset_within_address_space);
@@ -400,27 +401,30 @@
         error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
                      "0x%"HWADDR_PRIx", %p) = %d (%m)",
                      container, iova, end - iova, vaddr, ret);
+        goto fail;
+    }
 
-        /*
-         * On the initfn path, store the first error in the container so we
-         * can gracefully fail.  Runtime, there's not much we can do other
-         * than throw a hardware error.
-         */
-        if (!container->iommu_data.type1.initialized) {
-            if (!container->iommu_data.type1.error) {
-                container->iommu_data.type1.error = ret;
-            }
-        } else {
-            hw_error("vfio: DMA mapping failed, unable to continue");
+    return;
+
+fail:
+    /*
+     * On the initfn path, store the first error in the container so we
+     * can gracefully fail.  Runtime, there's not much we can do other
+     * than throw a hardware error.
+     */
+    if (!container->initialized) {
+        if (!container->error) {
+            container->error = ret;
         }
+    } else {
+        hw_error("vfio: DMA mapping failed, unable to continue");
     }
 }
 
 static void vfio_listener_region_del(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
-    VFIOContainer *container = container_of(listener, VFIOContainer,
-                                            iommu_data.type1.listener);
+    VFIOContainer *container = container_of(listener, VFIOContainer, listener);
     hwaddr iova, end;
     int ret;
 
@@ -485,7 +489,7 @@
 
 static void vfio_listener_release(VFIOContainer *container)
 {
-    memory_listener_unregister(&container->iommu_data.type1.listener);
+    memory_listener_unregister(&container->listener);
 }
 
 int vfio_mmap_region(Object *obj, VFIORegion *region,
@@ -668,6 +672,7 @@
     if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU) ||
         ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)) {
         bool v2 = !!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU);
+        struct vfio_iommu_type1_info info;
 
         ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
         if (ret) {
@@ -684,21 +689,27 @@
             goto free_container_exit;
         }
 
-        container->iommu_data.type1.listener = vfio_memory_listener;
-        container->iommu_data.release = vfio_listener_release;
+        /*
+         * FIXME: This assumes that a Type1 IOMMU can map any 64-bit
+         * IOVA whatsoever.  That's not actually true, but the current
+         * kernel interface doesn't tell us what it can map, and the
+         * existing Type1 IOMMUs generally support any IOVA we're
+         * going to actually try in practice.
+         */
+        container->min_iova = 0;
+        container->max_iova = (hwaddr)-1;
 
-        memory_listener_register(&container->iommu_data.type1.listener,
-                                 container->space->as);
-
-        if (container->iommu_data.type1.error) {
-            ret = container->iommu_data.type1.error;
-            error_report("vfio: memory listener initialization failed for container");
-            goto listener_release_exit;
+        /* Assume just 4K IOVA page size */
+        container->iova_pgsizes = 0x1000;
+        info.argsz = sizeof(info);
+        ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info);
+        /* Ignore errors */
+        if ((ret == 0) && (info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
+            container->iova_pgsizes = info.iova_pgsizes;
         }
-
-        container->iommu_data.type1.initialized = true;
-
     } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
+        struct vfio_iommu_spapr_tce_info info;
+
         ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
         if (ret) {
             error_report("vfio: failed to set group container: %m");
@@ -724,18 +735,41 @@
             goto free_container_exit;
         }
 
-        container->iommu_data.type1.listener = vfio_memory_listener;
-        container->iommu_data.release = vfio_listener_release;
+        /*
+         * This only considers the host IOMMU's 32-bit window.  At
+         * some point we need to add support for the optional 64-bit
+         * window and dynamic windows
+         */
+        info.argsz = sizeof(info);
+        ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+        if (ret) {
+            error_report("vfio: VFIO_IOMMU_SPAPR_TCE_GET_INFO failed: %m");
+            ret = -errno;
+            goto free_container_exit;
+        }
+        container->min_iova = info.dma32_window_start;
+        container->max_iova = container->min_iova + info.dma32_window_size - 1;
 
-        memory_listener_register(&container->iommu_data.type1.listener,
-                                 container->space->as);
-
+        /* Assume just 4K IOVA pages for now */
+        container->iova_pgsizes = 0x1000;
     } else {
         error_report("vfio: No available IOMMU models");
         ret = -EINVAL;
         goto free_container_exit;
     }
 
+    container->listener = vfio_memory_listener;
+
+    memory_listener_register(&container->listener, container->space->as);
+
+    if (container->error) {
+        ret = container->error;
+        error_report("vfio: memory listener initialization failed for container");
+        goto listener_release_exit;
+    }
+
+    container->initialized = true;
+
     QLIST_INIT(&container->group_list);
     QLIST_INSERT_HEAD(&space->containers, container, next);
 
@@ -774,9 +808,7 @@
         VFIOAddressSpace *space = container->space;
         VFIOGuestIOMMU *giommu, *tmp;
 
-        if (container->iommu_data.release) {
-            container->iommu_data.release(container);
-        }
+        vfio_listener_release(container);
         QLIST_REMOVE(container, next);
 
         QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index a6726cd..5c1156c 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -32,6 +32,11 @@
  * Functions used whatever the injection method
  */
 
+static inline bool vfio_irq_is_automasked(VFIOINTp *intp)
+{
+    return intp->flags & VFIO_IRQ_INFO_AUTOMASKED;
+}
+
 /**
  * vfio_init_intp - allocate, initialize the IRQ struct pointer
  * and add it into the list of IRQs
@@ -57,18 +62,25 @@
     sysbus_init_irq(sbdev, &intp->qemuirq);
 
     /* Get an eventfd for trigger */
-    ret = event_notifier_init(&intp->interrupt, 0);
+    intp->interrupt = g_malloc0(sizeof(EventNotifier));
+    ret = event_notifier_init(intp->interrupt, 0);
     if (ret) {
+        g_free(intp->interrupt);
         g_free(intp);
         error_report("vfio: Error: trigger event_notifier_init failed ");
         return NULL;
     }
-    /* Get an eventfd for resample/unmask */
-    ret = event_notifier_init(&intp->unmask, 0);
-    if (ret) {
-        g_free(intp);
-        error_report("vfio: Error: resamplefd event_notifier_init failed");
-        return NULL;
+    if (vfio_irq_is_automasked(intp)) {
+        /* Get an eventfd for resample/unmask */
+        intp->unmask = g_malloc0(sizeof(EventNotifier));
+        ret = event_notifier_init(intp->unmask, 0);
+        if (ret) {
+            g_free(intp->interrupt);
+            g_free(intp->unmask);
+            g_free(intp);
+            error_report("vfio: Error: resamplefd event_notifier_init failed");
+            return NULL;
+        }
     }
 
     QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
@@ -100,7 +112,7 @@
     irq_set->start = 0;
     irq_set->count = 1;
     pfd = (int32_t *)&irq_set->data;
-    *pfd = event_notifier_get_fd(&intp->interrupt);
+    *pfd = event_notifier_get_fd(intp->interrupt);
     qemu_set_fd_handler(*pfd, (IOHandler *)handler, NULL, intp);
     ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
     g_free(irq_set);
@@ -182,7 +194,7 @@
 static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
 {
     trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
-                              event_notifier_get_fd(&intp->interrupt));
+                              event_notifier_get_fd(intp->interrupt));
 
     intp->state = VFIO_IRQ_ACTIVE;
 
@@ -224,18 +236,18 @@
         trace_vfio_intp_interrupt_set_pending(intp->pin);
         QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
                              intp, pqnext);
-        ret = event_notifier_test_and_clear(&intp->interrupt);
+        ret = event_notifier_test_and_clear(intp->interrupt);
         qemu_mutex_unlock(&vdev->intp_mutex);
         return;
     }
 
     trace_vfio_platform_intp_interrupt(intp->pin,
-                              event_notifier_get_fd(&intp->interrupt));
+                              event_notifier_get_fd(intp->interrupt));
 
-    ret = event_notifier_test_and_clear(&intp->interrupt);
+    ret = event_notifier_test_and_clear(intp->interrupt);
     if (!ret) {
         error_report("Error when clearing fd=%d (ret = %d)",
-                     event_notifier_get_fd(&intp->interrupt), ret);
+                     event_notifier_get_fd(intp->interrupt), ret);
     }
 
     intp->state = VFIO_IRQ_ACTIVE;
@@ -283,13 +295,13 @@
     QLIST_FOREACH(intp, &vdev->intp_list, next) {
         if (intp->state == VFIO_IRQ_ACTIVE) {
             trace_vfio_platform_eoi(intp->pin,
-                                event_notifier_get_fd(&intp->interrupt));
+                                event_notifier_get_fd(intp->interrupt));
             intp->state = VFIO_IRQ_INACTIVE;
 
             /* deassert the virtual IRQ */
             qemu_set_irq(intp->qemuirq, 0);
 
-            if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) {
+            if (vfio_irq_is_automasked(intp)) {
                 /* unmasks the physical level-sensitive IRQ */
                 vfio_unmask_single_irqindex(vbasedev, intp->pin);
             }
@@ -310,18 +322,29 @@
 /**
  * vfio_start_eventfd_injection - starts the virtual IRQ injection using
  * user-side handled eventfds
- * @intp: the IRQ struct pointer
+ * @sbdev: the sysbus device handle
+ * @irq: the qemu irq handle
  */
 
-static int vfio_start_eventfd_injection(VFIOINTp *intp)
+static void vfio_start_eventfd_injection(SysBusDevice *sbdev, qemu_irq irq)
 {
     int ret;
+    VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
+    VFIOINTp *intp;
+
+    QLIST_FOREACH(intp, &vdev->intp_list, next) {
+        if (intp->qemuirq == irq) {
+            break;
+        }
+    }
+    assert(intp);
 
     ret = vfio_set_trigger_eventfd(intp, vfio_intp_interrupt);
     if (ret) {
-        error_report("vfio: Error: Failed to pass IRQ fd to the driver: %m");
+        error_report("vfio: failed to start eventfd signaling for IRQ %d: %m",
+                     intp->pin);
+        abort();
     }
-    return ret;
 }
 
 /*
@@ -349,7 +372,7 @@
     irq_set->start = 0;
     irq_set->count = 1;
     pfd = (int32_t *)&irq_set->data;
-    *pfd = event_notifier_get_fd(&intp->unmask);
+    *pfd = event_notifier_get_fd(intp->unmask);
     qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
     ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
     g_free(irq_set);
@@ -359,6 +382,15 @@
     return ret;
 }
 
+/**
+ * vfio_start_irqfd_injection - starts the virtual IRQ injection using
+ * irqfd
+ *
+ * @sbdev: the sysbus device handle
+ * @irq: the qemu irq handle
+ *
+ * In case the irqfd setup fails, we fallback to userspace handled eventfd
+ */
 static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
 {
     VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
@@ -366,7 +398,7 @@
 
     if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
         !vdev->irqfd_allowed) {
-        return;
+        goto fail_irqfd;
     }
 
     QLIST_FOREACH(intp, &vdev->intp_list, next) {
@@ -376,39 +408,36 @@
     }
     assert(intp);
 
-    /* Get to a known interrupt state */
-    qemu_set_fd_handler(event_notifier_get_fd(&intp->interrupt),
-                        NULL, NULL, vdev);
-
-    vfio_mask_single_irqindex(&vdev->vbasedev, intp->pin);
-    qemu_set_irq(intp->qemuirq, 0);
-
-    if (kvm_irqchip_add_irqfd_notifier(kvm_state, &intp->interrupt,
-                                   &intp->unmask, irq) < 0) {
+    if (kvm_irqchip_add_irqfd_notifier(kvm_state, intp->interrupt,
+                                   intp->unmask, irq) < 0) {
         goto fail_irqfd;
     }
 
     if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
         goto fail_vfio;
     }
-    if (vfio_set_resample_eventfd(intp) < 0) {
-        goto fail_vfio;
+    if (vfio_irq_is_automasked(intp)) {
+        if (vfio_set_resample_eventfd(intp) < 0) {
+            goto fail_vfio;
+        }
+        trace_vfio_platform_start_level_irqfd_injection(intp->pin,
+                                    event_notifier_get_fd(intp->interrupt),
+                                    event_notifier_get_fd(intp->unmask));
+    } else {
+        trace_vfio_platform_start_edge_irqfd_injection(intp->pin,
+                                    event_notifier_get_fd(intp->interrupt));
     }
 
-    /* Let's resume injection with irqfd setup */
-    vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin);
-
     intp->kvm_accel = true;
 
-    trace_vfio_platform_start_irqfd_injection(intp->pin,
-                                     event_notifier_get_fd(&intp->interrupt),
-                                     event_notifier_get_fd(&intp->unmask));
     return;
 fail_vfio:
-    kvm_irqchip_remove_irqfd_notifier(kvm_state, &intp->interrupt, irq);
+    kvm_irqchip_remove_irqfd_notifier(kvm_state, intp->interrupt, irq);
+    error_report("vfio: failed to start eventfd signaling for IRQ %d: %m",
+                 intp->pin);
+    abort();
 fail_irqfd:
-    vfio_start_eventfd_injection(intp);
-    vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin);
+    vfio_start_eventfd_injection(sbdev, irq);
     return;
 }
 
@@ -646,7 +675,6 @@
     VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
     SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
     VFIODevice *vbasedev = &vdev->vbasedev;
-    VFIOINTp *intp;
     int i, ret;
 
     vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
@@ -665,10 +693,6 @@
         vfio_map_region(vdev, i);
         sysbus_init_mmio(sbdev, &vdev->regions[i]->mem);
     }
-
-    QLIST_FOREACH(intp, &vdev->intp_list, next) {
-        vfio_start_eventfd_injection(intp);
-    }
 }
 
 static const VMStateDescription vfio_platform_vmstate = {
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 6703806..e5c406d 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2134,14 +2134,6 @@
                                 TYPE_VIRTIO_TABLET);
 }
 
-static void virtio_host_initfn(Object *obj)
-{
-    VirtIOInputHostPCI *dev = VIRTIO_INPUT_HOST_PCI(obj);
-
-    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
-                                TYPE_VIRTIO_INPUT_HOST);
-}
-
 static const TypeInfo virtio_input_pci_info = {
     .name          = TYPE_VIRTIO_INPUT_PCI,
     .parent        = TYPE_VIRTIO_PCI,
@@ -2180,12 +2172,22 @@
     .instance_init = virtio_tablet_initfn,
 };
 
+#ifdef CONFIG_LINUX
+static void virtio_host_initfn(Object *obj)
+{
+    VirtIOInputHostPCI *dev = VIRTIO_INPUT_HOST_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_INPUT_HOST);
+}
+
 static const TypeInfo virtio_host_pci_info = {
     .name          = TYPE_VIRTIO_INPUT_HOST_PCI,
     .parent        = TYPE_VIRTIO_INPUT_PCI,
     .instance_size = sizeof(VirtIOInputHostPCI),
     .instance_init = virtio_host_initfn,
 };
+#endif
 
 /* virtio-pci-bus */
 
@@ -2233,7 +2235,9 @@
     type_register_static(&virtio_keyboard_pci_info);
     type_register_static(&virtio_mouse_pci_info);
     type_register_static(&virtio_tablet_pci_info);
+#ifdef CONFIG_LINUX
     type_register_static(&virtio_host_pci_info);
+#endif
     type_register_static(&virtio_pci_bus_info);
     type_register_static(&virtio_pci_info);
 #ifdef CONFIG_VIRTFS
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index b6c442f..801c23a 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -267,6 +267,8 @@
     VirtIOInputHID vdev;
 };
 
+#ifdef CONFIG_LINUX
+
 #define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci"
 #define VIRTIO_INPUT_HOST_PCI(obj) \
         OBJECT_CHECK(VirtIOInputHostPCI, (obj), TYPE_VIRTIO_INPUT_HOST_PCI)
@@ -276,6 +278,8 @@
     VirtIOInputHost vdev;
 };
 
+#endif
+
 /*
  * virtio-gpu-pci: This extends VirtioPCIProxy.
  */
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index a3719b7..a63fd60 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -62,24 +62,15 @@
 #define OPC_BUF_SIZE 640
 #define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR)
 
-/* Maximum size a TCG op can expand to.  This is complicated because a
-   single op may require several host instructions and register reloads.
-   For now take a wild guess at 192 bytes, which should allow at least
-   a couple of fixup instructions per argument.  */
-#define TCG_MAX_OP_SIZE 192
-
 #define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * MAX_OPC_PARAM)
 
 #include "qemu/log.h"
 
 void gen_intermediate_code(CPUArchState *env, struct TranslationBlock *tb);
-void gen_intermediate_code_pc(CPUArchState *env, struct TranslationBlock *tb);
 void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb,
-                          int pc_pos);
+                          target_ulong *data);
 
 void cpu_gen_init(void);
-int cpu_gen_code(CPUArchState *env, struct TranslationBlock *tb,
-                 int *gen_code_size_ptr);
 bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc);
 void page_size_init(void);
 
@@ -170,13 +161,14 @@
 #define CODE_GEN_PHYS_HASH_BITS     15
 #define CODE_GEN_PHYS_HASH_SIZE     (1 << CODE_GEN_PHYS_HASH_BITS)
 
-/* estimated block size for TB allocation */
-/* XXX: use a per code average code fragment size and modulate it
-   according to the host CPU */
+/* Estimated block size for TB allocation.  */
+/* ??? The following is based on a 2015 survey of x86_64 host output.
+   Better would seem to be some sort of dynamically sized TB array,
+   adapting to the block sizes actually being produced.  */
 #if defined(CONFIG_SOFTMMU)
-#define CODE_GEN_AVG_BLOCK_SIZE 128
+#define CODE_GEN_AVG_BLOCK_SIZE 400
 #else
-#define CODE_GEN_AVG_BLOCK_SIZE 64
+#define CODE_GEN_AVG_BLOCK_SIZE 150
 #endif
 
 #if defined(__arm__) || defined(_ARCH_PPC) \
@@ -201,6 +193,7 @@
 #define CF_USE_ICOUNT  0x20000
 
     void *tc_ptr;    /* pointer to the translated code */
+    uint8_t *tc_search;  /* pointer to search data */
     /* next matching tb for physical address. */
     struct TranslationBlock *phys_hash_next;
     /* original tb when cflags has CF_NOCACHE */
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 5baaf48..0f07159 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -583,6 +583,19 @@
 void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n);
 
 /**
+ * memory_region_iommu_replay: replay existing IOMMU translations to
+ * a notifier
+ *
+ * @mr: the memory region to observe
+ * @n: the notifier to which to replay iommu mappings
+ * @granularity: Minimum page granularity to replay notifications for
+ * @is_write: Whether to treat the replay as a translate "write"
+ *     through the iommu
+ */
+void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n,
+                                hwaddr granularity, bool is_write);
+
+/**
  * memory_region_unregister_iommu_notifier: unregister a notifier for
  * changes to IOMMU translation entries.
  *
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 038b54d..8057aed 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -114,6 +114,19 @@
      * TODO remove once we're there
      */
     bool cannot_instantiate_with_device_add_yet;
+    /*
+     * Does this device model survive object_unref(object_new(TNAME))?
+     * All device models should, and this flag shouldn't exist.  Some
+     * devices crash in object_new(), some crash or hang in
+     * object_unref().  Makes introspecting properties with
+     * qmp_device_list_properties() dangerous.  Bad, because it's used
+     * by -device FOO,help.  This flag serves to protect that code.
+     * It should never be set without a comment explaining why it is
+     * set.
+     * TODO remove once we're there
+     */
+    bool cannot_destroy_with_object_finalize_yet;
+
     bool hotpluggable;
 
     /* callbacks */
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 9b9901f..f037f3c 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -59,22 +59,19 @@
 
 struct VFIOGroup;
 
-typedef struct VFIOType1 {
-    MemoryListener listener;
-    int error;
-    bool initialized;
-} VFIOType1;
-
 typedef struct VFIOContainer {
     VFIOAddressSpace *space;
     int fd; /* /dev/vfio/vfio, empowered by the attached groups */
-    struct {
-        /* enable abstraction to support various iommu backends */
-        union {
-            VFIOType1 type1;
-        };
-        void (*release)(struct VFIOContainer *);
-    } iommu_data;
+    MemoryListener listener;
+    int error;
+    bool initialized;
+    /*
+     * This assumes the host IOMMU can support only a single
+     * contiguous IOVA window.  We may need to generalize that in
+     * future
+     */
+    hwaddr min_iova, max_iova;
+    uint64_t iova_pgsizes;
     QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
     QLIST_HEAD(, VFIOGroup) group_list;
     QLIST_ENTRY(VFIOContainer) next;
diff --git a/include/hw/vfio/vfio-platform.h b/include/hw/vfio/vfio-platform.h
index c5cf1d7..b468f80 100644
--- a/include/hw/vfio/vfio-platform.h
+++ b/include/hw/vfio/vfio-platform.h
@@ -34,8 +34,8 @@
 typedef struct VFIOINTp {
     QLIST_ENTRY(VFIOINTp) next; /* entry for IRQ list */
     QSIMPLEQ_ENTRY(VFIOINTp) pqnext; /* entry for pending IRQ queue */
-    EventNotifier interrupt; /* eventfd triggered on interrupt */
-    EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
+    EventNotifier *interrupt; /* eventfd triggered on interrupt */
+    EventNotifier *unmask; /* eventfd for unmask on QEMU bypass */
     qemu_irq qemuirq;
     struct VFIOPlatformDevice *vdev; /* back pointer to device */
     int state; /* inactive, pending, active */
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 8896761..9b279d7 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -56,8 +56,19 @@
     int x, y;
 };
 
+enum virtio_gpu_conf_flags {
+    VIRTIO_GPU_FLAG_VIRGL_ENABLED = 1,
+    VIRTIO_GPU_FLAG_STATS_ENABLED,
+};
+
+#define virtio_gpu_virgl_enabled(_cfg) \
+    (_cfg.flags & (1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED))
+#define virtio_gpu_stats_enabled(_cfg) \
+    (_cfg.flags & (1 << VIRTIO_GPU_FLAG_STATS_ENABLED))
+
 struct virtio_gpu_conf {
     uint32_t max_outputs;
+    uint32_t flags;
 };
 
 struct virtio_gpu_ctrl_command {
@@ -92,11 +103,13 @@
     int enabled_output_bitmask;
     struct virtio_gpu_config virtio_config;
 
+    bool use_virgl_renderer;
+    bool renderer_inited;
     QEMUTimer *fence_poll;
     QEMUTimer *print_stats;
 
+    uint32_t inflight;
     struct {
-        uint32_t inflight;
         uint32_t max_inflight;
         uint32_t requests;
         uint32_t req_3d;
@@ -139,4 +152,11 @@
                                   struct iovec **iov);
 void virtio_gpu_cleanup_mapping_iov(struct iovec *iov, uint32_t count);
 
+/* virtio-gpu-3d.c */
+void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
+                                  struct virtio_gpu_ctrl_command *cmd);
+void virtio_gpu_virgl_fence_poll(VirtIOGPU *g);
+void virtio_gpu_virgl_reset(VirtIOGPU *g);
+int virtio_gpu_virgl_init(VirtIOGPU *g);
+
 #endif
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 9939246..d0946cb 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -857,7 +857,7 @@
 
 #if defined(_ARCH_PPC)
 
-static inline int64_t cpu_get_real_ticks(void)
+static inline int64_t cpu_get_host_ticks(void)
 {
     int64_t retval;
 #ifdef _ARCH_PPC64
@@ -883,7 +883,7 @@
 
 #elif defined(__i386__)
 
-static inline int64_t cpu_get_real_ticks(void)
+static inline int64_t cpu_get_host_ticks(void)
 {
     int64_t val;
     asm volatile ("rdtsc" : "=A" (val));
@@ -892,7 +892,7 @@
 
 #elif defined(__x86_64__)
 
-static inline int64_t cpu_get_real_ticks(void)
+static inline int64_t cpu_get_host_ticks(void)
 {
     uint32_t low,high;
     int64_t val;
@@ -905,7 +905,7 @@
 
 #elif defined(__hppa__)
 
-static inline int64_t cpu_get_real_ticks(void)
+static inline int64_t cpu_get_host_ticks(void)
 {
     int val;
     asm volatile ("mfctl %%cr16, %0" : "=r"(val));
@@ -914,7 +914,7 @@
 
 #elif defined(__ia64)
 
-static inline int64_t cpu_get_real_ticks(void)
+static inline int64_t cpu_get_host_ticks(void)
 {
     int64_t val;
     asm volatile ("mov %0 = ar.itc" : "=r"(val) :: "memory");
@@ -923,7 +923,7 @@
 
 #elif defined(__s390__)
 
-static inline int64_t cpu_get_real_ticks(void)
+static inline int64_t cpu_get_host_ticks(void)
 {
     int64_t val;
     asm volatile("stck 0(%1)" : "=m" (val) : "a" (&val) : "cc");
@@ -932,7 +932,7 @@
 
 #elif defined(__sparc__)
 
-static inline int64_t cpu_get_real_ticks (void)
+static inline int64_t cpu_get_host_ticks (void)
 {
 #if defined(_LP64)
     uint64_t        rval;
@@ -970,7 +970,7 @@
                               : "=r" (value));          \
     }
 
-static inline int64_t cpu_get_real_ticks(void)
+static inline int64_t cpu_get_host_ticks(void)
 {
     /* On kernels >= 2.6.25 rdhwr <reg>, $2 and $3 are emulated */
     uint32_t count;
@@ -986,7 +986,7 @@
 
 #elif defined(__alpha__)
 
-static inline int64_t cpu_get_real_ticks(void)
+static inline int64_t cpu_get_host_ticks(void)
 {
     uint64_t cc;
     uint32_t cur, ofs;
@@ -1001,7 +1001,7 @@
 /* The host CPU doesn't have an easily accessible cycle counter.
    Just return a monotonically increasing value.  This will be
    totally wrong, but hopefully better than nothing.  */
-static inline int64_t cpu_get_real_ticks (void)
+static inline int64_t cpu_get_host_ticks (void)
 {
     static int64_t ticks = 0;
     return ticks++;
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 9405554..b613ff0 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -721,6 +721,7 @@
 /* 0x08 currently unused */
 #define BP_GDB                0x10
 #define BP_CPU                0x20
+#define BP_ANY                (BP_GDB | BP_CPU)
 #define BP_WATCHPOINT_HIT_READ 0x40
 #define BP_WATCHPOINT_HIT_WRITE 0x80
 #define BP_WATCHPOINT_HIT (BP_WATCHPOINT_HIT_READ | BP_WATCHPOINT_HIT_WRITE)
@@ -731,6 +732,21 @@
 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint);
 void cpu_breakpoint_remove_all(CPUState *cpu, int mask);
 
+/* Return true if PC matches an installed breakpoint.  */
+static inline bool cpu_breakpoint_test(CPUState *cpu, vaddr pc, int mask)
+{
+    CPUBreakpoint *bp;
+
+    if (unlikely(!QTAILQ_EMPTY(&cpu->breakpoints))) {
+        QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
+            if (bp->pc == pc && (bp->flags & mask)) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
                           int flags, CPUWatchpoint **watchpoint);
 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr,
diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h
index b003c67..43f1850 100644
--- a/include/standard-headers/linux/input.h
+++ b/include/standard-headers/linux/input.h
@@ -887,8 +887,8 @@
 #define SW_ROTATE_LOCK		0x0c  /* set = rotate locked/disabled */
 #define SW_LINEIN_INSERT	0x0d  /* set = inserted */
 #define SW_MUTE_DEVICE		0x0e  /* set = device disabled */
-#define SW_MAX			0x0f
-#define SW_CNT			(SW_MAX+1)
+#define SW_MAX_			0x0f
+#define SW_CNT			(SW_MAX_+1)
 
 /*
  * Misc events
diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h
index 72ef815..76e5e52 100644
--- a/include/standard-headers/linux/virtio_gpu.h
+++ b/include/standard-headers/linux/virtio_gpu.h
@@ -40,6 +40,8 @@
 
 #include "standard-headers/linux/types.h"
 
+#define VIRTIO_GPU_FEATURE_VIRGL 0
+
 enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_UNDEFINED = 0,
 
@@ -52,6 +54,18 @@
 	VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D,
 	VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING,
 	VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING,
+	VIRTIO_GPU_CMD_GET_CAPSET_INFO,
+	VIRTIO_GPU_CMD_GET_CAPSET,
+
+	/* 3d commands */
+	VIRTIO_GPU_CMD_CTX_CREATE = 0x0200,
+	VIRTIO_GPU_CMD_CTX_DESTROY,
+	VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE,
+	VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE,
+	VIRTIO_GPU_CMD_RESOURCE_CREATE_3D,
+	VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D,
+	VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D,
+	VIRTIO_GPU_CMD_SUBMIT_3D,
 
 	/* cursor commands */
 	VIRTIO_GPU_CMD_UPDATE_CURSOR = 0x0300,
@@ -60,6 +74,8 @@
 	/* success responses */
 	VIRTIO_GPU_RESP_OK_NODATA = 0x1100,
 	VIRTIO_GPU_RESP_OK_DISPLAY_INFO,
+	VIRTIO_GPU_RESP_OK_CAPSET_INFO,
+	VIRTIO_GPU_RESP_OK_CAPSET,
 
 	/* error responses */
 	VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200,
@@ -180,13 +196,107 @@
 	} pmodes[VIRTIO_GPU_MAX_SCANOUTS];
 };
 
+/* data passed in the control vq, 3d related */
+
+struct virtio_gpu_box {
+	uint32_t x, y, z;
+	uint32_t w, h, d;
+};
+
+/* VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D, VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D */
+struct virtio_gpu_transfer_host_3d {
+	struct virtio_gpu_ctrl_hdr hdr;
+	struct virtio_gpu_box box;
+	uint64_t offset;
+	uint32_t resource_id;
+	uint32_t level;
+	uint32_t stride;
+	uint32_t layer_stride;
+};
+
+/* VIRTIO_GPU_CMD_RESOURCE_CREATE_3D */
+#define VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP (1 << 0)
+struct virtio_gpu_resource_create_3d {
+	struct virtio_gpu_ctrl_hdr hdr;
+	uint32_t resource_id;
+	uint32_t target;
+	uint32_t format;
+	uint32_t bind;
+	uint32_t width;
+	uint32_t height;
+	uint32_t depth;
+	uint32_t array_size;
+	uint32_t last_level;
+	uint32_t nr_samples;
+	uint32_t flags;
+	uint32_t padding;
+};
+
+/* VIRTIO_GPU_CMD_CTX_CREATE */
+struct virtio_gpu_ctx_create {
+	struct virtio_gpu_ctrl_hdr hdr;
+	uint32_t nlen;
+	uint32_t padding;
+	char debug_name[64];
+};
+
+/* VIRTIO_GPU_CMD_CTX_DESTROY */
+struct virtio_gpu_ctx_destroy {
+	struct virtio_gpu_ctrl_hdr hdr;
+};
+
+/* VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE, VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE */
+struct virtio_gpu_ctx_resource {
+	struct virtio_gpu_ctrl_hdr hdr;
+	uint32_t resource_id;
+	uint32_t padding;
+};
+
+/* VIRTIO_GPU_CMD_SUBMIT_3D */
+struct virtio_gpu_cmd_submit {
+	struct virtio_gpu_ctrl_hdr hdr;
+	uint32_t size;
+	uint32_t padding;
+};
+
+#define VIRTIO_GPU_CAPSET_VIRGL 1
+
+/* VIRTIO_GPU_CMD_GET_CAPSET_INFO */
+struct virtio_gpu_get_capset_info {
+	struct virtio_gpu_ctrl_hdr hdr;
+	uint32_t capset_index;
+	uint32_t padding;
+};
+
+/* VIRTIO_GPU_RESP_OK_CAPSET_INFO */
+struct virtio_gpu_resp_capset_info {
+	struct virtio_gpu_ctrl_hdr hdr;
+	uint32_t capset_id;
+	uint32_t capset_max_version;
+	uint32_t capset_max_size;
+	uint32_t padding;
+};
+
+/* VIRTIO_GPU_CMD_GET_CAPSET */
+struct virtio_gpu_get_capset {
+	struct virtio_gpu_ctrl_hdr hdr;
+	uint32_t capset_id;
+	uint32_t capset_version;
+};
+
+/* VIRTIO_GPU_RESP_OK_CAPSET */
+struct virtio_gpu_resp_capset {
+	struct virtio_gpu_ctrl_hdr hdr;
+	uint8_t capset_data[];
+};
+
 #define VIRTIO_GPU_EVENT_DISPLAY (1 << 0)
 
 struct virtio_gpu_config {
 	uint32_t events_read;
 	uint32_t events_clear;
 	uint32_t num_scanouts;
-	uint32_t reserved;
+	uint32_t num_capsets;
 };
 
 /* simple formats for fbcon/X use */
diff --git a/include/ui/console.h b/include/ui/console.h
index 047a2b4..d887f91 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -157,6 +157,14 @@
 void cursor_get_mono_image(QEMUCursor *c, int foreground, uint8_t *mask);
 void cursor_get_mono_mask(QEMUCursor *c, int transparent, uint8_t *mask);
 
+typedef void *QEMUGLContext;
+typedef struct QEMUGLParams QEMUGLParams;
+
+struct QEMUGLParams {
+    int major_ver;
+    int minor_ver;
+};
+
 typedef struct DisplayChangeListenerOps {
     const char *dpy_name;
 
@@ -183,6 +191,21 @@
                           int x, int y, int on);
     void (*dpy_cursor_define)(DisplayChangeListener *dcl,
                               QEMUCursor *cursor);
+
+    QEMUGLContext (*dpy_gl_ctx_create)(DisplayChangeListener *dcl,
+                                       QEMUGLParams *params);
+    void (*dpy_gl_ctx_destroy)(DisplayChangeListener *dcl,
+                               QEMUGLContext ctx);
+    int (*dpy_gl_ctx_make_current)(DisplayChangeListener *dcl,
+                                   QEMUGLContext ctx);
+    QEMUGLContext (*dpy_gl_ctx_get_current)(DisplayChangeListener *dcl);
+
+    void (*dpy_gl_scanout)(DisplayChangeListener *dcl,
+                           uint32_t backing_id, bool backing_y_0_top,
+                           uint32_t x, uint32_t y, uint32_t w, uint32_t h);
+    void (*dpy_gl_update)(DisplayChangeListener *dcl,
+                          uint32_t x, uint32_t y, uint32_t w, uint32_t h);
+
 } DisplayChangeListenerOps;
 
 struct DisplayChangeListener {
@@ -244,6 +267,20 @@
 bool dpy_gfx_check_format(QemuConsole *con,
                           pixman_format_code_t format);
 
+void dpy_gl_scanout(QemuConsole *con,
+                    uint32_t backing_id, bool backing_y_0_top,
+                    uint32_t x, uint32_t y, uint32_t w, uint32_t h);
+void dpy_gl_update(QemuConsole *con,
+                   uint32_t x, uint32_t y, uint32_t w, uint32_t h);
+
+QEMUGLContext dpy_gl_ctx_create(QemuConsole *con,
+                                QEMUGLParams *params);
+void dpy_gl_ctx_destroy(QemuConsole *con, QEMUGLContext ctx);
+int dpy_gl_ctx_make_current(QemuConsole *con, QEMUGLContext ctx);
+QEMUGLContext dpy_gl_ctx_get_current(QemuConsole *con);
+
+bool console_has_gl(QemuConsole *con);
+
 static inline int surface_stride(DisplaySurface *s)
 {
     return pixman_image_get_stride(s->image);
diff --git a/include/ui/egl-context.h b/include/ui/egl-context.h
new file mode 100644
index 0000000..f004ce1
--- /dev/null
+++ b/include/ui/egl-context.h
@@ -0,0 +1,14 @@
+#ifndef EGL_CONTEXT_H
+#define EGL_CONTEXT_H
+
+#include "ui/console.h"
+#include "ui/egl-helpers.h"
+
+QEMUGLContext qemu_egl_create_context(DisplayChangeListener *dcl,
+                                      QEMUGLParams *params);
+void qemu_egl_destroy_context(DisplayChangeListener *dcl, QEMUGLContext ctx);
+int qemu_egl_make_context_current(DisplayChangeListener *dcl,
+                                  QEMUGLContext ctx);
+QEMUGLContext qemu_egl_get_current_context(DisplayChangeListener *dcl);
+
+#endif /* EGL_CONTEXT_H */
diff --git a/include/ui/gtk.h b/include/ui/gtk.h
index 0359333..bf289cf 100644
--- a/include/ui/gtk.h
+++ b/include/ui/gtk.h
@@ -20,6 +20,7 @@
 
 #if defined(CONFIG_OPENGL)
 #include "ui/egl-helpers.h"
+#include "ui/egl-context.h"
 #endif
 
 /* Compatibility define to let us build on both Gtk2 and Gtk3 */
@@ -46,6 +47,11 @@
     EGLContext ectx;
     EGLSurface esurface;
     int glupdates;
+    int x, y, w, h;
+    GLuint tex_id;
+    GLuint fbo_id;
+    bool y0_top;
+    bool scanout_mode;
 #endif
 } VirtualGfxConsole;
 
@@ -90,6 +96,39 @@
 void gd_egl_refresh(DisplayChangeListener *dcl);
 void gd_egl_switch(DisplayChangeListener *dcl,
                    DisplaySurface *surface);
+QEMUGLContext gd_egl_create_context(DisplayChangeListener *dcl,
+                                    QEMUGLParams *params);
+void gd_egl_scanout(DisplayChangeListener *dcl,
+                    uint32_t backing_id, bool backing_y_0_top,
+                    uint32_t x, uint32_t y,
+                    uint32_t w, uint32_t h);
+void gd_egl_scanout_flush(DisplayChangeListener *dcl,
+                          uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gtk_egl_init(void);
+int gd_egl_make_current(DisplayChangeListener *dcl,
+                        QEMUGLContext ctx);
+
+/* ui/gtk-gl-area.c */
+void gd_gl_area_init(VirtualConsole *vc);
+void gd_gl_area_draw(VirtualConsole *vc);
+void gd_gl_area_update(DisplayChangeListener *dcl,
+                       int x, int y, int w, int h);
+void gd_gl_area_refresh(DisplayChangeListener *dcl);
+void gd_gl_area_switch(DisplayChangeListener *dcl,
+                       DisplaySurface *surface);
+QEMUGLContext gd_gl_area_create_context(DisplayChangeListener *dcl,
+                                        QEMUGLParams *params);
+void gd_gl_area_destroy_context(DisplayChangeListener *dcl,
+                                QEMUGLContext ctx);
+void gd_gl_area_scanout(DisplayChangeListener *dcl,
+                        uint32_t backing_id, bool backing_y_0_top,
+                        uint32_t x, uint32_t y,
+                        uint32_t w, uint32_t h);
+void gd_gl_area_scanout_flush(DisplayChangeListener *dcl,
+                              uint32_t x, uint32_t y, uint32_t w, uint32_t h);
+void gtk_gl_area_init(void);
+QEMUGLContext gd_gl_area_get_current_context(DisplayChangeListener *dcl);
+int gd_gl_area_make_current(DisplayChangeListener *dcl,
+                            QEMUGLContext ctx);
 
 #endif /* UI_GTK_H */
diff --git a/include/ui/shader.h b/include/ui/shader.h
index 8509596..f7d8618 100644
--- a/include/ui/shader.h
+++ b/include/ui/shader.h
@@ -3,7 +3,9 @@
 
 #include <epoxy/gl.h>
 
-void qemu_gl_run_texture_blit(GLint texture_blit_prog);
+GLuint qemu_gl_init_texture_blit(GLint texture_blit_prog);
+void qemu_gl_run_texture_blit(GLint texture_blit_prog,
+                              GLint texture_blit_vao);
 
 GLuint qemu_gl_create_compile_shader(GLenum type, const GLchar *src);
 GLuint qemu_gl_create_link_program(GLuint vert, GLuint frag);
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index fdae6a6..d68f5a1 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -2853,7 +2853,7 @@
     TaskState *ts = (TaskState *)cpu->opaque;
     int i;
 
-    info->notes = g_malloc0(NUMNOTES * sizeof (struct memelfnote));
+    info->notes = g_new0(struct memelfnote, NUMNOTES);
     if (info->notes == NULL)
         return (-ENOMEM);
     info->prstatus = g_malloc0(sizeof (*info->prstatus));
diff --git a/linux-user/main.c b/linux-user/main.c
index 6599a41..8acfe0f 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -215,7 +215,7 @@
 
 uint64_t cpu_get_tsc(CPUX86State *env)
 {
-    return cpu_get_real_ticks();
+    return cpu_get_host_ticks();
 }
 
 static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
@@ -1425,7 +1425,7 @@
 #ifdef TARGET_PPC
 static inline uint64_t cpu_ppc_get_tb(CPUPPCState *env)
 {
-    return cpu_get_real_ticks();
+    return cpu_get_host_ticks();
 }
 
 uint64_t cpu_ppc_load_tbl(CPUPPCState *env)
@@ -3414,17 +3414,6 @@
 
 #ifdef TARGET_TILEGX
 
-static void gen_sigsegv_maperr(CPUTLGState *env, target_ulong addr)
-{
-    target_siginfo_t info;
-
-    info.si_signo = TARGET_SIGSEGV;
-    info.si_errno = 0;
-    info.si_code = TARGET_SEGV_MAPERR;
-    info._sifields._sigfault._addr = addr;
-    queue_signal(env, info.si_signo, &info);
-}
-
 static void gen_sigill_reg(CPUTLGState *env)
 {
     target_siginfo_t info;
@@ -3436,6 +3425,36 @@
     queue_signal(env, info.si_signo, &info);
 }
 
+static void do_signal(CPUTLGState *env, int signo, int sigcode)
+{
+    target_siginfo_t info;
+
+    info.si_signo = signo;
+    info.si_errno = 0;
+    info._sifields._sigfault._addr = env->pc;
+
+    if (signo == TARGET_SIGSEGV) {
+        /* The passed in sigcode is a dummy; check for a page mapping
+           and pass either MAPERR or ACCERR.  */
+        target_ulong addr = env->excaddr;
+        info._sifields._sigfault._addr = addr;
+        if (page_check_range(addr, 1, PAGE_VALID) < 0) {
+            sigcode = TARGET_SEGV_MAPERR;
+        } else {
+            sigcode = TARGET_SEGV_ACCERR;
+        }
+    }
+    info.si_code = sigcode;
+
+    queue_signal(env, info.si_signo, &info);
+}
+
+static void gen_sigsegv_maperr(CPUTLGState *env, target_ulong addr)
+{
+    env->excaddr = addr;
+    do_signal(env, TARGET_SIGSEGV, 0);
+}
+
 static void set_regval(CPUTLGState *env, uint8_t reg, uint64_t val)
 {
     if (unlikely(reg >= TILEGX_R_COUNT)) {
@@ -3622,13 +3641,13 @@
         case TILEGX_EXCP_OPCODE_FETCHOR4:
             do_fetch(env, trapnr, false);
             break;
+        case TILEGX_EXCP_SIGNAL:
+            do_signal(env, env->signo, env->sigcode);
+            break;
         case TILEGX_EXCP_REG_IDN_ACCESS:
         case TILEGX_EXCP_REG_UDN_ACCESS:
             gen_sigill_reg(env);
             break;
-        case TILEGX_EXCP_SEGV:
-            gen_sigsegv_maperr(env, env->excaddr);
-            break;
         default:
             fprintf(stderr, "trapnr is %d[0x%x].\n", trapnr, trapnr);
             g_assert_not_reached();
@@ -4251,7 +4270,7 @@
     }
     target_argv[target_argc] = NULL;
 
-    ts = g_malloc0 (sizeof(TaskState));
+    ts = g_new0(TaskState, 1);
     init_task_state(ts);
     /* build Task State */
     ts->info = info;
diff --git a/linux-user/signal.c b/linux-user/signal.c
index ac82baa..9d62e02 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -5537,6 +5537,163 @@
     force_sig(TARGET_SIGSEGV);
 }
 
+#elif defined(TARGET_TILEGX)
+
+struct target_sigcontext {
+    union {
+        /* General-purpose registers.  */
+        abi_ulong gregs[56];
+        struct {
+            abi_ulong __gregs[53];
+            abi_ulong tp;        /* Aliases gregs[TREG_TP].  */
+            abi_ulong sp;        /* Aliases gregs[TREG_SP].  */
+            abi_ulong lr;        /* Aliases gregs[TREG_LR].  */
+        };
+    };
+    abi_ulong pc;        /* Program counter.  */
+    abi_ulong ics;       /* In Interrupt Critical Section?  */
+    abi_ulong faultnum;  /* Fault number.  */
+    abi_ulong pad[5];
+};
+
+struct target_ucontext {
+    abi_ulong tuc_flags;
+    abi_ulong tuc_link;
+    target_stack_t tuc_stack;
+    struct target_sigcontext tuc_mcontext;
+    target_sigset_t tuc_sigmask;   /* mask last for extensibility */
+};
+
+struct target_rt_sigframe {
+    unsigned char save_area[16]; /* caller save area */
+    struct target_siginfo info;
+    struct target_ucontext uc;
+};
+
+static void setup_sigcontext(struct target_sigcontext *sc,
+                             CPUArchState *env, int signo)
+{
+    int i;
+
+    for (i = 0; i < TILEGX_R_COUNT; ++i) {
+        __put_user(env->regs[i], &sc->gregs[i]);
+    }
+
+    __put_user(env->pc, &sc->pc);
+    __put_user(0, &sc->ics);
+    __put_user(signo, &sc->faultnum);
+}
+
+static void restore_sigcontext(CPUTLGState *env, struct target_sigcontext *sc)
+{
+    int i;
+
+    for (i = 0; i < TILEGX_R_COUNT; ++i) {
+        __get_user(env->regs[i], &sc->gregs[i]);
+    }
+
+    __get_user(env->pc, &sc->pc);
+}
+
+static abi_ulong get_sigframe(struct target_sigaction *ka, CPUArchState *env,
+                              size_t frame_size)
+{
+    unsigned long sp = env->regs[TILEGX_R_SP];
+
+    if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) {
+        return -1UL;
+    }
+
+    if ((ka->sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) {
+        sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
+    }
+
+    sp -= frame_size;
+    sp &= -16UL;
+    return sp;
+}
+
+static void setup_rt_frame(int sig, struct target_sigaction *ka,
+                           target_siginfo_t *info,
+                           target_sigset_t *set, CPUArchState *env)
+{
+    abi_ulong frame_addr;
+    struct target_rt_sigframe *frame;
+    unsigned long restorer;
+
+    frame_addr = get_sigframe(ka, env, sizeof(*frame));
+    if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
+        goto give_sigsegv;
+    }
+
+    /* Always write at least the signal number for the stack backtracer. */
+    if (ka->sa_flags & TARGET_SA_SIGINFO) {
+        /* At sigreturn time, restore the callee-save registers too. */
+        tswap_siginfo(&frame->info, info);
+        /* regs->flags |= PT_FLAGS_RESTORE_REGS; FIXME: we can skip it? */
+    } else {
+        __put_user(info->si_signo, &frame->info.si_signo);
+    }
+
+    /* Create the ucontext.  */
+    __put_user(0, &frame->uc.tuc_flags);
+    __put_user(0, &frame->uc.tuc_link);
+    __put_user(target_sigaltstack_used.ss_sp, &frame->uc.tuc_stack.ss_sp);
+    __put_user(sas_ss_flags(env->regs[TILEGX_R_SP]),
+               &frame->uc.tuc_stack.ss_flags);
+    __put_user(target_sigaltstack_used.ss_size, &frame->uc.tuc_stack.ss_size);
+    setup_sigcontext(&frame->uc.tuc_mcontext, env, info->si_signo);
+
+    restorer = (unsigned long) do_rt_sigreturn;
+    if (ka->sa_flags & TARGET_SA_RESTORER) {
+            restorer = (unsigned long) ka->sa_restorer;
+    }
+    env->pc = (unsigned long) ka->_sa_handler;
+    env->regs[TILEGX_R_SP] = (unsigned long) frame;
+    env->regs[TILEGX_R_LR] = restorer;
+    env->regs[0] = (unsigned long) sig;
+    env->regs[1] = (unsigned long) &frame->info;
+    env->regs[2] = (unsigned long) &frame->uc;
+    /* regs->flags |= PT_FLAGS_CALLER_SAVES; FIXME: we can skip it? */
+
+    unlock_user_struct(frame, frame_addr, 1);
+    return;
+
+give_sigsegv:
+    if (sig == TARGET_SIGSEGV) {
+        ka->_sa_handler = TARGET_SIG_DFL;
+    }
+    force_sig(TARGET_SIGSEGV /* , current */);
+}
+
+long do_rt_sigreturn(CPUTLGState *env)
+{
+    abi_ulong frame_addr = env->regs[TILEGX_R_SP];
+    struct target_rt_sigframe *frame;
+    sigset_t set;
+
+    if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
+        goto badframe;
+    }
+    target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
+    do_sigprocmask(SIG_SETMASK, &set, NULL);
+
+    restore_sigcontext(env, &frame->uc.tuc_mcontext);
+    if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,
+                                             uc.tuc_stack),
+                       0, env->regs[TILEGX_R_SP]) == -EFAULT) {
+        goto badframe;
+    }
+
+    unlock_user_struct(frame, frame_addr, 0);
+    return env->regs[TILEGX_R_RE];
+
+
+ badframe:
+    unlock_user_struct(frame, frame_addr, 0);
+    force_sig(TARGET_SIGSEGV);
+}
+
 #else
 
 static void setup_frame(int sig, struct target_sigaction *ka,
@@ -5657,7 +5814,7 @@
 #endif
         /* prepare the stack frame of the virtual CPU */
 #if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64) \
-    || defined(TARGET_OPENRISC)
+    || defined(TARGET_OPENRISC) || defined(TARGET_TILEGX)
         /* These targets do not have traditional signals.  */
         setup_rt_frame(sig, sa, &q->info, &target_old_set, cpu_env);
 #else
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 98b5766..8bfb24f 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -2728,8 +2728,9 @@
 }
 
 static inline abi_long do_semctl(int semid, int semnum, int cmd,
-                                 union target_semun target_su)
+                                 abi_ulong target_arg)
 {
+    union target_semun target_su = { .buf = target_arg };
     union semun arg;
     struct semid_ds dsarg;
     unsigned short *array = NULL;
@@ -3251,8 +3252,7 @@
          * ptr argument. */
         abi_ulong atptr;
         get_user_ual(atptr, ptr);
-        ret = do_semctl(first, second, third,
-                (union target_semun) atptr);
+        ret = do_semctl(first, second, third, atptr);
         break;
     }
 
@@ -4566,7 +4566,7 @@
         new_thread_info info;
         pthread_attr_t attr;
 
-        ts = g_malloc0(sizeof(TaskState));
+        ts = g_new0(TaskState, 1);
         init_task_state(ts);
         /* we create a new CPU instance. */
         new_env = cpu_copy(env);
@@ -7550,7 +7550,7 @@
 #endif
 #ifdef TARGET_NR_semctl
     case TARGET_NR_semctl:
-        ret = do_semctl(arg1, arg2, arg3, (union target_semun)(abi_ulong)arg4);
+        ret = do_semctl(arg1, arg2, arg3, arg4);
         break;
 #endif
 #ifdef TARGET_NR_msgctl
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 7ca33a6..f996acf 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -748,6 +748,10 @@
 #define TARGET_ILL_PRVREG	(6)	/* privileged register */
 #define TARGET_ILL_COPROC	(7)	/* coprocessor error */
 #define TARGET_ILL_BADSTK	(8)	/* internal stack error */
+#ifdef TARGET_TILEGX
+#define TARGET_ILL_DBLFLT       (9)     /* double fault */
+#define TARGET_ILL_HARDWALL     (10)    /* user networks hardwall violation */
+#endif
 
 /*
  * SIGFPE si_codes
@@ -767,6 +771,7 @@
  */
 #define TARGET_SEGV_MAPERR     (1)  /* address not mapped to object */
 #define TARGET_SEGV_ACCERR     (2)  /* invalid permissions for mapped object */
+#define TARGET_SEGV_BNDERR     (3)  /* failed address bound checks */
 
 /*
  * SIGBUS si_codes
@@ -774,12 +779,18 @@
 #define TARGET_BUS_ADRALN       (1)	/* invalid address alignment */
 #define TARGET_BUS_ADRERR       (2)	/* non-existent physical address */
 #define TARGET_BUS_OBJERR       (3)	/* object specific hardware error */
+/* hardware memory error consumed on a machine check: action required */
+#define TARGET_BUS_MCEERR_AR    (4)
+/* hardware memory error detected in process but not consumed: action optional*/
+#define TARGET_BUS_MCEERR_AO    (5)
 
 /*
  * SIGTRAP si_codes
  */
 #define TARGET_TRAP_BRKPT	(1)	/* process breakpoint */
 #define TARGET_TRAP_TRACE	(2)	/* process trace trap */
+#define TARGET_TRAP_BRANCH      (3)     /* process taken branch trap */
+#define TARGET_TRAP_HWBKPT      (4)     /* hardware breakpoint/watchpoint */
 
 #endif /* defined(TARGET_I386) || defined(TARGET_ARM) */
 
diff --git a/linux-user/tilegx/syscall.h b/linux-user/tilegx/syscall.h
index 653ece1..a938d4e 100644
--- a/linux-user/tilegx/syscall.h
+++ b/linux-user/tilegx/syscall.h
@@ -37,4 +37,7 @@
 #define TARGET_MLOCKALL_MCL_CURRENT 1
 #define TARGET_MLOCKALL_MCL_FUTURE  2
 
+/* For faultnum */
+#define TARGET_INT_SWINT_1            14
+
 #endif
diff --git a/memory.c b/memory.c
index ef87363..2eb1597 100644
--- a/memory.c
+++ b/memory.c
@@ -1304,7 +1304,22 @@
 {
     MemoryRegion *mr = MEMORY_REGION(obj);
 
-    assert(QTAILQ_EMPTY(&mr->subregions));
+    assert(!mr->container);
+
+    /* We know the region is not visible in any address space (it
+     * does not have a container and cannot be a root either because
+     * it has no references, so we can blindly clear mr->enabled.
+     * memory_region_set_enabled instead could trigger a transaction
+     * and cause an infinite loop.
+     */
+    mr->enabled = false;
+    memory_region_transaction_begin();
+    while (!QTAILQ_EMPTY(&mr->subregions)) {
+        MemoryRegion *subregion = QTAILQ_FIRST(&mr->subregions);
+        memory_region_del_subregion(mr, subregion);
+    }
+    memory_region_transaction_commit();
+
     mr->destructor(mr);
     memory_region_clear_coalescing(mr);
     g_free((char *)mr->name);
@@ -1403,6 +1418,26 @@
     notifier_list_add(&mr->iommu_notify, n);
 }
 
+void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n,
+                                hwaddr granularity, bool is_write)
+{
+    hwaddr addr;
+    IOMMUTLBEntry iotlb;
+
+    for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
+        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
+        if (iotlb.perm != IOMMU_NONE) {
+            n->notify(n, &iotlb);
+        }
+
+        /* if (2^64 - MR size) < granularity, it's possible to get an
+         * infinite loop here.  This should catch such a wraparound */
+        if ((addr + granularity) < addr) {
+            break;
+        }
+    }
+}
+
 void memory_region_unregister_iommu_notifier(Notifier *n)
 {
     notifier_remove(n);
diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile
index 746603a..15e4232 100644
--- a/pc-bios/s390-ccw/Makefile
+++ b/pc-bios/s390-ccw/Makefile
@@ -10,7 +10,8 @@
 .PHONY : all clean build-all
 
 OBJECTS = start.o main.o bootmap.o sclp-ascii.o virtio.o
-CFLAGS += -fPIE -fno-stack-protector -ffreestanding -fno-delete-null-pointer-checks
+CFLAGS += -fPIE -fno-stack-protector -ffreestanding
+CFLAGS += -fno-delete-null-pointer-checks -msoft-float
 LDFLAGS += -Wl,-pie -nostdlib
 
 build-all: s390-ccw.img
diff --git a/qapi/block.json b/qapi/block.json
index aad645c..84022f1 100644
--- a/qapi/block.json
+++ b/qapi/block.json
@@ -6,7 +6,7 @@
 { 'include': 'block-core.json' }
 
 ##
-# BiosAtaTranslation:
+# @BiosAtaTranslation:
 #
 # Policy that BIOS should use to interpret cylinder/head/sector
 # addresses.  Note that Bochs BIOS and SeaBIOS will not actually
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 00f6303..a35098f 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -238,9 +238,12 @@
         return 0;
     }
 
-    qdev_get_device_class(&driver, &local_err);
-    if (local_err) {
-        goto error;
+    if (!object_class_by_name(driver)) {
+        const char *typename = find_typename_by_alias(driver);
+
+        if (typename) {
+            driver = typename;
+        }
     }
 
     prop_list = qmp_device_list_properties(driver, &local_err);
diff --git a/qmp.c b/qmp.c
index 057a7cb..d9ecede 100644
--- a/qmp.c
+++ b/qmp.c
@@ -515,6 +515,17 @@
         return NULL;
     }
 
+    if (object_class_is_abstract(klass)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "name",
+                   "non-abstract device type");
+        return NULL;
+    }
+
+    if (DEVICE_CLASS(klass)->cannot_destroy_with_object_finalize_yet) {
+        error_setg(errp, "Can't list properties of device '%s'", typename);
+        return NULL;
+    }
+
     obj = object_new(typename);
 
     QTAILQ_FOREACH(prop, &obj->properties, node) {
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 11076191..457ef37 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -53,6 +53,7 @@
         -e 's/__attribute__((packed))/QEMU_PACKED/' \
         -e 's/__inline__/inline/' \
         -e '/sys\/ioctl.h/d' \
+        -e 's/SW_MAX/SW_MAX_/' \
         "$f" > "$to/$header";
 }
 
diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
index 421d7e5..ff1926a 100644
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@ -298,6 +298,13 @@
     dc->vmsd = &vmstate_alpha_cpu;
 #endif
     cc->gdb_num_core_regs = 67;
+
+    /*
+     * Reason: alpha_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo alpha_cpu_type_info = {
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index 097637e..bcd8076 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -287,7 +287,6 @@
 
 #define cpu_list alpha_cpu_list
 #define cpu_exec cpu_alpha_exec
-#define cpu_gen_code cpu_alpha_gen_code
 #define cpu_signal_handler cpu_alpha_signal_handler
 
 #include "exec/cpu-all.h"
diff --git a/target-alpha/sys_helper.c b/target-alpha/sys_helper.c
index 1f0e1a9..75c96c1 100644
--- a/target-alpha/sys_helper.c
+++ b/target-alpha/sys_helper.c
@@ -34,7 +34,7 @@
 #else
     /* In user-mode, QEMU_CLOCK_VIRTUAL doesn't exist.  Just pass through the host cpu
        clock ticks.  Also, don't bother taking PCC_OFS into account.  */
-    return (uint32_t)cpu_get_real_ticks();
+    return (uint32_t)cpu_get_host_ticks();
 #endif
 }
 
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 2ba5fb8..f936d1b 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -2858,18 +2858,14 @@
     return ret;
 }
 
-static inline void gen_intermediate_code_internal(AlphaCPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
 {
+    AlphaCPU *cpu = alpha_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUAlphaState *env = &cpu->env;
     DisasContext ctx, *ctxp = &ctx;
     target_ulong pc_start;
     target_ulong pc_mask;
     uint32_t insn;
-    CPUBreakpoint *bp;
-    int j, lj = -1;
     ExitStatus ret;
     int num_insns;
     int max_insns;
@@ -2904,6 +2900,9 @@
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     if (in_superpage(&ctx, pc_start)) {
         pc_mask = (1ULL << 41) - 1;
@@ -2913,35 +2912,17 @@
 
     gen_tb_start(tb);
     do {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == ctx.pc) {
-                    gen_excp(&ctx, EXCP_DEBUG, 0);
-                    break;
-                }
-            }
+        tcg_gen_insn_start(ctx.pc);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
+            gen_excp(&ctx, EXCP_DEBUG, 0);
+            break;
         }
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = ctx.pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
         insn = cpu_ldl_code(env, ctx.pc);
-        num_insns++;
-
-	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(ctx.pc);
-        }
 
         TCGV_UNUSED_I64(ctx.zero);
         TCGV_UNUSED_I64(ctx.sink);
@@ -2997,16 +2978,8 @@
 
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = ctx.pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = ctx.pc - pc_start;
+    tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -3017,17 +2990,8 @@
 #endif
 }
 
-void gen_intermediate_code (CPUAlphaState *env, struct TranslationBlock *tb)
+void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    gen_intermediate_code_internal(alpha_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc (CPUAlphaState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(alpha_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb, int pc_pos)
-{
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index d7b4445..30739fc 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -1427,6 +1427,17 @@
     cc->debug_excp_handler = arm_debug_excp_handler;
 
     cc->disas_set_info = arm_disas_set_info;
+
+    /*
+     * Reason: arm_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     *
+     * Once this is fixed, the devices that create ARM CPUs should be
+     * updated not to set cannot_destroy_with_object_finalize_yet,
+     * unless they still screw up something else.
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void cpu_register(const ARMCPUInfo *info)
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index cc1578c..493f9d0 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -95,6 +95,7 @@
 struct arm_boot_info;
 
 #define NB_MMU_MODES 7
+#define TARGET_INSN_START_EXTRA_WORDS 1
 
 /* We currently assume float and double are IEEE single and double
    precision respectively.
@@ -1600,7 +1601,6 @@
 #define cpu_init(cpu_model) CPU(cpu_arm_init(cpu_model))
 
 #define cpu_exec cpu_arm_exec
-#define cpu_gen_code cpu_arm_gen_code
 #define cpu_signal_handler cpu_arm_signal_handler
 #define cpu_list arm_cpu_list
 
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index ec0936c..e65e309 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -11000,15 +11000,11 @@
     free_tmp_a64(s);
 }
 
-void gen_intermediate_code_internal_a64(ARMCPU *cpu,
-                                        TranslationBlock *tb,
-                                        bool search_pc)
+void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
 {
     CPUState *cs = CPU(cpu);
     CPUARMState *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
-    CPUBreakpoint *bp;
-    int j, lj;
     target_ulong pc_start;
     target_ulong next_page_start;
     int num_insns;
@@ -11067,19 +11063,25 @@
     init_tmp_a64_array(dc);
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
 
     tcg_clear_temp_count();
 
     do {
+        tcg_gen_insn_start(dc->pc, 0);
+        num_insns++;
+
         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
+            CPUBreakpoint *bp;
             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
                 if (bp->pc == dc->pc) {
                     gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
@@ -11091,27 +11093,10 @@
             }
         }
 
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
-
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(dc->pc);
-        }
-
         if (dc->ss_active && !dc->pstate_ss) {
             /* Singlestep state is Active-pending.
              * If we're in this state at the start of a TB then either
@@ -11123,7 +11108,7 @@
              * "did not step an insn" case, and so the syndrome ISV and EX
              * bits should be zero.
              */
-            assert(num_insns == 0);
+            assert(num_insns == 1);
             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
                           default_exception_el(dc));
             dc->is_jmp = DISAS_EXC;
@@ -11142,7 +11127,6 @@
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.
          */
-        num_insns++;
     } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
@@ -11221,14 +11205,6 @@
         qemu_log("\n");
     }
 #endif
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 }
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 84a21ac..22c3587 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -52,7 +52,6 @@
 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
 
 #include "translate.h"
-static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE];
 
 #if defined(CONFIG_USER_ONLY)
 #define IS_USER(s) 1
@@ -11168,17 +11167,12 @@
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
-   basic block 'tb'. If search_pc is TRUE, also generate PC
-   information for each intermediate instruction. */
-static inline void gen_intermediate_code_internal(ARMCPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+   basic block 'tb'.  */
+void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
 {
+    ARMCPU *cpu = arm_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUARMState *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
-    CPUBreakpoint *bp;
-    int j, lj;
     target_ulong pc_start;
     target_ulong next_page_start;
     int num_insns;
@@ -11190,7 +11184,7 @@
      * the A32/T32 complexity to do with conditional execution/IT blocks/etc.
      */
     if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
-        gen_intermediate_code_internal_a64(cpu, tb, search_pc);
+        gen_intermediate_code_a64(cpu, tb);
         return;
     }
 
@@ -11256,11 +11250,14 @@
     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
     cpu_M0 = tcg_temp_new_i64();
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
 
@@ -11286,10 +11283,9 @@
      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
      * then the CPUARMState will be wrong and we need to reset it.
      * This is handled in the same way as restoration of the
-     * PC in these situations: we will be called again with search_pc=1
-     * and generate a mapping of the condexec bits for each PC in
-     * gen_opc_condexec_bits[]. restore_state_to_opc() then uses
-     * this to restore the condexec bits.
+     * PC in these situations; we save the value of the condexec bits
+     * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
+     * then uses this to restore them after an exception.
      *
      * Note that there are no instructions which can read the condexec
      * bits, and none which can write non-static values to them, so
@@ -11306,6 +11302,10 @@
         store_cpu_field(tmp, condexec_bits);
       }
     do {
+        tcg_gen_insn_start(dc->pc,
+                           (dc->condexec_cond << 4) | (dc->condexec_mask >> 1));
+        num_insns++;
+
 #ifdef CONFIG_USER_ONLY
         /* Intercept jump to the magic kernel page.  */
         if (dc->pc >= 0xffff0000) {
@@ -11326,6 +11326,7 @@
 #endif
 
         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
+            CPUBreakpoint *bp;
             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
                 if (bp->pc == dc->pc) {
                     gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
@@ -11336,24 +11337,9 @@
                 }
             }
         }
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
 
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
-
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(dc->pc);
         }
 
         if (dc->ss_active && !dc->pstate_ss) {
@@ -11367,7 +11353,7 @@
              * "did not step an insn" case, and so the syndrome ISV and EX
              * bits should be zero.
              */
-            assert(num_insns == 0);
+            assert(num_insns == 1);
             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
                           default_exception_el(dc));
             goto done_generating;
@@ -11403,7 +11389,6 @@
          * Otherwise the subsequent code could get translated several times.
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.  */
-        num_insns ++;
     } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
@@ -11533,25 +11518,8 @@
         qemu_log("\n");
     }
 #endif
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
-}
-
-void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(arm_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUARMState *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(arm_env_get_cpu(env), tb, true);
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 }
 
 static const char *cpu_mode_names[16] = {
@@ -11608,13 +11576,14 @@
     }
 }
 
-void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
     if (is_a64(env)) {
-        env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+        env->pc = data[0];
         env->condexec_bits = 0;
     } else {
-        env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
-        env->condexec_bits = gen_opc_condexec_bits[pc_pos];
+        env->regs[15] = data[0];
+        env->condexec_bits = data[1];
     }
 }
diff --git a/target-arm/translate.h b/target-arm/translate.h
index b8fe37a..53ef971 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -122,9 +122,7 @@
 
 #ifdef TARGET_AARCH64
 void a64_translate_init(void);
-void gen_intermediate_code_internal_a64(ARMCPU *cpu,
-                                        TranslationBlock *tb,
-                                        bool search_pc);
+void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb);
 void gen_a64_set_pc_im(uint64_t val);
 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
                             fprintf_function cpu_fprintf, int flags);
@@ -133,9 +131,7 @@
 {
 }
 
-static inline void gen_intermediate_code_internal_a64(ARMCPU *cpu,
-                                                      TranslationBlock *tb,
-                                                      bool search_pc)
+static inline void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
 {
 }
 
diff --git a/target-cris/cpu.c b/target-cris/cpu.c
index d461e07..8eaf5a5 100644
--- a/target-cris/cpu.c
+++ b/target-cris/cpu.c
@@ -309,6 +309,13 @@
     cc->gdb_stop_before_watchpoint = true;
 
     cc->disas_set_info = cris_disas_set_info;
+
+    /*
+     * Reason: cris_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo cris_cpu_type_info = {
diff --git a/target-cris/cpu.h b/target-cris/cpu.h
index d47fad4..3220460 100644
--- a/target-cris/cpu.h
+++ b/target-cris/cpu.h
@@ -223,7 +223,6 @@
 #define cpu_init(cpu_model) CPU(cpu_cris_init(cpu_model))
 
 #define cpu_exec cpu_cris_exec
-#define cpu_gen_code cpu_cris_gen_code
 #define cpu_signal_handler cpu_cris_signal_handler
 
 /* MMU modes definitions */
diff --git a/target-cris/translate.c b/target-cris/translate.c
index d5b54e1..964845c 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -2994,10 +2994,6 @@
     int insn_len = 2;
     int i;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(dc->pc);
-        }
-
     /* Load a halfword onto the instruction register.  */
         dc->ir = cris_fetch(env, dc, dc->pc, 2, 0);
 
@@ -3034,23 +3030,6 @@
     return insn_len;
 }
 
-static void check_breakpoint(CPUCRISState *env, DisasContext *dc)
-{
-    CPUState *cs = CPU(cris_env_get_cpu(env));
-    CPUBreakpoint *bp;
-
-    if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == dc->pc) {
-                cris_evaluate_flags(dc);
-                tcg_gen_movi_tl(env_pc, dc->pc);
-                t_gen_raise_exception(EXCP_DEBUG);
-                dc->is_jmp = DISAS_UPDATE;
-            }
-        }
-    }
-}
-
 #include "translate_v10.c"
 
 /*
@@ -3088,15 +3067,12 @@
  */
 
 /* generate intermediate code for basic block 'tb'.  */
-static inline void
-gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
-                               bool search_pc)
+void gen_intermediate_code(CPUCRISState *env, struct TranslationBlock *tb)
 {
+    CRISCPU *cpu = cris_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUCRISState *env = &cpu->env;
     uint32_t pc_start;
     unsigned int insn_len;
-    int j, lj;
     struct DisasContext ctx;
     struct DisasContext *dc = &ctx;
     uint32_t next_page_start;
@@ -3148,13 +3124,13 @@
 
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         qemu_log(
-                "srch=%d pc=%x %x flg=%" PRIx64 " bt=%x ds=%u ccs=%x\n"
+                "pc=%x %x flg=%" PRIx64 " bt=%x ds=%u ccs=%x\n"
                 "pid=%x usp=%x\n"
                 "%x.%x.%x.%x\n"
                 "%x.%x.%x.%x\n"
                 "%x.%x.%x.%x\n"
                 "%x.%x.%x.%x\n",
-                search_pc, dc->pc, dc->ppc,
+                dc->pc, dc->ppc,
                 (uint64_t)tb->flags,
                 env->btarget, (unsigned)tb->flags & 7,
                 env->pregs[PR_CCS],
@@ -3170,38 +3146,33 @@
     }
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     do {
-        check_breakpoint(env, dc);
+        tcg_gen_insn_start(dc->delayed_branch == 1
+                           ? dc->ppc | 1 : dc->pc);
+        num_insns++;
 
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            if (dc->delayed_branch == 1) {
-                tcg_ctx.gen_opc_pc[lj] = dc->ppc | 1;
-            } else {
-                tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            }
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            cris_evaluate_flags(dc);
+            tcg_gen_movi_tl(env_pc, dc->pc);
+            t_gen_raise_exception(EXCP_DEBUG);
+            dc->is_jmp = DISAS_UPDATE;
+            break;
         }
 
         /* Pretty disas.  */
         LOG_DIS("%8.8x:\t", dc->pc);
 
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
         dc->clear_x = 1;
@@ -3213,7 +3184,6 @@
             cris_clear_x_flag(dc);
         }
 
-        num_insns++;
         /* Check for delayed branches here. If we do it before
            actually generating any host code, the simulator will just
            loop doing nothing for on this program location.  */
@@ -3318,16 +3288,8 @@
     }
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
 #if !DISAS_CRIS
@@ -3341,16 +3303,6 @@
 #endif
 }
 
-void gen_intermediate_code (CPUCRISState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(cris_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc (CPUCRISState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(cris_env_get_cpu(env), tb, true);
-}
-
 void cris_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
                          int flags)
 {
@@ -3443,7 +3395,8 @@
     }
 }
 
-void restore_state_to_opc(CPUCRISState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUCRISState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
diff --git a/target-cris/translate_v10.c b/target-cris/translate_v10.c
index da0b2ca..3ab1c39 100644
--- a/target-cris/translate_v10.c
+++ b/target-cris/translate_v10.c
@@ -1199,9 +1199,6 @@
 {
     unsigned int insn_len = 2;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
-        tcg_gen_debug_insn_start(dc->pc);
-
     /* Load a halfword onto the instruction register.  */
     dc->ir = cpu_lduw_code(env, dc->pc);
 
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index c793812..05d7f26 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1453,6 +1453,8 @@
      */
 
     dc->props = host_x86_cpu_properties;
+    /* Reason: host_x86_cpu_initfn() dies when !kvm_enabled() */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void host_x86_cpu_initfn(Object *obj)
@@ -3190,6 +3192,12 @@
 #endif
     cc->cpu_exec_enter = x86_cpu_exec_enter;
     cc->cpu_exec_exit = x86_cpu_exec_exit;
+
+    /*
+     * Reason: x86_cpu_initfn() calls cpu_exec_init(), which saves the
+     * object in cpus -> dangling pointer after final object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo x86_cpu_type_info = {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 8926780..54d9d50 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -794,6 +794,7 @@
 #define MAX_GP_COUNTERS    (MSR_IA32_PERF_STATUS - MSR_P6_EVNTSEL0)
 
 #define NB_MMU_MODES 3
+#define TARGET_INSN_START_EXTRA_WORDS 1
 
 #define NB_OPMASK_REGS 8
 
@@ -1188,7 +1189,6 @@
 #define cpu_init(cpu_model) CPU(cpu_x86_init(cpu_model))
 
 #define cpu_exec cpu_x86_exec
-#define cpu_gen_code cpu_x86_gen_code
 #define cpu_signal_handler cpu_x86_signal_handler
 #define cpu_list x86_cpu_list
 #define cpudef_setup x86_cpudef_setup
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 8b35de1..ef10e68 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -75,8 +75,6 @@
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
 
-static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
-
 #include "exec/gen-icount.h"
 
 #ifdef TARGET_X86_64
@@ -4401,9 +4399,6 @@
     target_ulong next_eip, tval;
     int rex_w, rex_r;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(pc_start);
-    }
     s->pc = pc_start;
     prefixes = 0;
     s->override = -1;
@@ -7842,18 +7837,13 @@
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
-   basic block 'tb'. If search_pc is TRUE, also generate PC
-   information for each intermediate instruction. */
-static inline void gen_intermediate_code_internal(X86CPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+   basic block 'tb'.  */
+void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
 {
+    X86CPU *cpu = x86_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUX86State *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
     target_ulong pc_ptr;
-    CPUBreakpoint *bp;
-    int j, lj;
     uint64_t flags;
     target_ulong pc_start;
     target_ulong cs_base;
@@ -7933,40 +7923,32 @@
 
     dc->is_jmp = DISAS_NEXT;
     pc_ptr = pc_start;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     for(;;) {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == pc_ptr &&
-                    !((bp->flags & BP_CPU) && (tb->flags & HF_RF_MASK))) {
-                    gen_debug(dc, pc_ptr - dc->cs_base);
-                    goto done_generating;
-                }
-            }
+        tcg_gen_insn_start(pc_ptr, dc->cc_op);
+        num_insns++;
+
+        /* If RF is set, suppress an internally generated breakpoint.  */
+        if (unlikely(cpu_breakpoint_test(cs, pc_ptr,
+                                         tb->flags & HF_RF_MASK
+                                         ? BP_GDB : BP_ANY))) {
+            gen_debug(dc, pc_ptr - dc->cs_base);
+            goto done_generating;
         }
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = pc_ptr;
-            gen_opc_cc_op[lj] = dc->cc_op;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
+        }
 
         pc_ptr = disas_insn(env, dc, pc_ptr);
-        num_insns++;
         /* stop translation if indicated */
         if (dc->is_jmp)
             break;
@@ -8014,14 +7996,6 @@
 done_generating:
     gen_tb_end(tb, num_insns);
 
-    /* we don't forget to fill the last values */
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    }
-
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         int disas_flags;
@@ -8038,42 +8012,16 @@
     }
 #endif
 
-    if (!search_pc) {
-        tb->size = pc_ptr - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = pc_ptr - pc_start;
+    tb->icount = num_insns;
 }
 
-void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
+void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    gen_intermediate_code_internal(x86_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUX86State *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(x86_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, int pc_pos)
-{
-    int cc_op;
-#ifdef DEBUG_DISAS
-    if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
-        int i;
-        qemu_log("RESTORE:\n");
-        for(i = 0;i <= pc_pos; i++) {
-            if (tcg_ctx.gen_opc_instr_start[i]) {
-                qemu_log("0x%04x: " TARGET_FMT_lx "\n", i,
-                        tcg_ctx.gen_opc_pc[i]);
-            }
-        }
-        qemu_log("pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
-                pc_pos, tcg_ctx.gen_opc_pc[pc_pos] - tb->cs_base,
-                (uint32_t)tb->cs_base);
-    }
-#endif
-    env->eip = tcg_ctx.gen_opc_pc[pc_pos] - tb->cs_base;
-    cc_op = gen_opc_cc_op[pc_pos];
-    if (cc_op != CC_OP_DYNAMIC)
+    int cc_op = data[1];
+    env->eip = data[0] - tb->cs_base;
+    if (cc_op != CC_OP_DYNAMIC) {
         env->cc_op = cc_op;
+    }
 }
diff --git a/target-lm32/cpu.c b/target-lm32/cpu.c
index c2b77c6..d0ab278 100644
--- a/target-lm32/cpu.c
+++ b/target-lm32/cpu.c
@@ -275,6 +275,13 @@
     cc->gdb_num_core_regs = 32 + 7;
     cc->gdb_stop_before_watchpoint = true;
     cc->debug_excp_handler = lm32_debug_excp_handler;
+
+    /*
+     * Reason: lm32_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void lm32_register_cpu_type(const LM32CPUInfo *info)
diff --git a/target-lm32/cpu.h b/target-lm32/cpu.h
index 3f874d5..2b7620c 100644
--- a/target-lm32/cpu.h
+++ b/target-lm32/cpu.h
@@ -219,7 +219,6 @@
 
 #define cpu_list lm32_cpu_list
 #define cpu_exec cpu_lm32_exec
-#define cpu_gen_code cpu_lm32_gen_code
 #define cpu_signal_handler cpu_lm32_signal_handler
 
 int lm32_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int rw,
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index cf7042e..c61ad0f 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -1005,10 +1005,6 @@
 
 static inline void decode(DisasContext *dc, uint32_t ir)
 {
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(dc->pc);
-    }
-
     dc->ir = ir;
     LOG_DIS("%8.8x\t", dc->ir);
 
@@ -1036,32 +1032,13 @@
     decinfo[dc->opcode](dc);
 }
 
-static void check_breakpoint(CPULM32State *env, DisasContext *dc)
-{
-    CPUState *cs = CPU(lm32_env_get_cpu(env));
-    CPUBreakpoint *bp;
-
-    if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == dc->pc) {
-                tcg_gen_movi_tl(cpu_pc, dc->pc);
-                t_gen_raise_exception(dc, EXCP_DEBUG);
-                dc->is_jmp = DISAS_UPDATE;
-             }
-        }
-    }
-}
-
 /* generate intermediate code for basic block 'tb'.  */
-static inline
-void gen_intermediate_code_internal(LM32CPU *cpu,
-                                    TranslationBlock *tb, bool search_pc)
+void gen_intermediate_code(CPULM32State *env, struct TranslationBlock *tb)
 {
+    LM32CPU *cpu = lm32_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPULM32State *env = &cpu->env;
     struct DisasContext ctx, *dc = &ctx;
     uint32_t pc_start;
-    int j, lj;
     uint32_t next_page_start;
     int num_insns;
     int max_insns;
@@ -1083,41 +1060,36 @@
     }
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     do {
-        check_breakpoint(env, dc);
+        tcg_gen_insn_start(dc->pc);
+        num_insns++;
 
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            tcg_gen_movi_tl(cpu_pc, dc->pc);
+            t_gen_raise_exception(dc, EXCP_DEBUG);
+            dc->is_jmp = DISAS_UPDATE;
+            break;
         }
 
         /* Pretty disas.  */
         LOG_DIS("%8.8x:\t", dc->pc);
 
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
 
         decode(dc, cpu_ldl_code(env, dc->pc));
         dc->pc += 4;
-        num_insns++;
-
     } while (!dc->is_jmp
          && !tcg_op_buf_full()
          && !cs->singlestep_enabled
@@ -1154,16 +1126,8 @@
 
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -1175,16 +1139,6 @@
 #endif
 }
 
-void gen_intermediate_code(CPULM32State *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(lm32_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPULM32State *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(lm32_env_get_cpu(env), tb, true);
-}
-
 void lm32_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
                          int flags)
 {
@@ -1219,9 +1173,10 @@
     cpu_fprintf(f, "\n\n");
 }
 
-void restore_state_to_opc(CPULM32State *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPULM32State *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
 
 void lm32_translate_init(void)
diff --git a/target-m68k/cpu.c b/target-m68k/cpu.c
index 4f246da..97527ef 100644
--- a/target-m68k/cpu.c
+++ b/target-m68k/cpu.c
@@ -212,6 +212,13 @@
     dc->vmsd = &vmstate_m68k_cpu;
     cc->gdb_num_core_regs = 18;
     cc->gdb_core_xml_file = "cf-core.xml";
+
+    /*
+     * Reason: m68k_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void register_cpu_type(const M68kCPUInfo *info)
diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h
index d195706..224c169 100644
--- a/target-m68k/cpu.h
+++ b/target-m68k/cpu.h
@@ -213,7 +213,6 @@
 #define cpu_init(cpu_model) CPU(cpu_m68k_init(cpu_model))
 
 #define cpu_exec cpu_m68k_exec
-#define cpu_gen_code cpu_m68k_gen_code
 #define cpu_signal_handler cpu_m68k_signal_handler
 #define cpu_list m68k_cpu_list
 
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 3cdf665..5995cce 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -2955,10 +2955,6 @@
 {
     uint16_t insn;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(s->pc);
-    }
-
     insn = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
@@ -2966,15 +2962,11 @@
 }
 
 /* generate intermediate code for basic block 'tb'.  */
-static inline void
-gen_intermediate_code_internal(M68kCPU *cpu, TranslationBlock *tb,
-                               bool search_pc)
+void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb)
 {
+    M68kCPU *cpu = m68k_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUM68KState *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
-    CPUBreakpoint *bp;
-    int j, lj;
     target_ulong pc_start;
     int pc_offset;
     int num_insns;
@@ -2993,43 +2985,34 @@
     dc->fpcr = env->fpcr;
     dc->user = (env->sr & SR_S) == 0;
     dc->done_mac = 0;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     do {
         pc_offset = dc->pc - pc_start;
         gen_throws_exception = NULL;
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == dc->pc) {
-                    gen_exception(dc, dc->pc, EXCP_DEBUG);
-                    dc->is_jmp = DISAS_JUMP;
-                    break;
-                }
-            }
-            if (dc->is_jmp)
-                break;
+        tcg_gen_insn_start(dc->pc);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            gen_exception(dc, dc->pc, EXCP_DEBUG);
+            dc->is_jmp = DISAS_JUMP;
+            break;
         }
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
+        }
+
         dc->insn_pc = dc->pc;
 	disas_m68k_insn(env, dc);
-        num_insns++;
     } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
@@ -3073,28 +3056,8 @@
         qemu_log("\n");
     }
 #endif
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
-
-    //optimize_flags();
-    //expand_target_qops();
-}
-
-void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(m68k_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUM68KState *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(m68k_env_get_cpu(env), tb, true);
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 }
 
 void m68k_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
@@ -3120,7 +3083,8 @@
     cpu_fprintf (f, "FPRESULT = %12g\n", *(double *)&env->fp_result);
 }
 
-void restore_state_to_opc(CPUM68KState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUM68KState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
diff --git a/target-microblaze/cpu.c b/target-microblaze/cpu.c
index cbd84a2..52959e1 100644
--- a/target-microblaze/cpu.c
+++ b/target-microblaze/cpu.c
@@ -264,6 +264,12 @@
     cc->gdb_num_core_regs = 32 + 5;
 
     cc->disas_set_info = mb_disas_set_info;
+
+    /*
+     * Reason: mb_cpu_initfn() calls cpu_exec_init(), which saves the
+     * object in cpus -> dangling pointer after final object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo mb_cpu_type_info = {
diff --git a/target-microblaze/cpu.h b/target-microblaze/cpu.h
index b707c71..6b212ab 100644
--- a/target-microblaze/cpu.h
+++ b/target-microblaze/cpu.h
@@ -295,7 +295,6 @@
 #define cpu_init(cpu_model) CPU(cpu_mb_init(cpu_model))
 
 #define cpu_exec cpu_mb_exec
-#define cpu_gen_code cpu_mb_gen_code
 #define cpu_signal_handler cpu_mb_signal_handler
 
 /* MMU modes definitions */
diff --git a/target-microblaze/op_helper.c b/target-microblaze/op_helper.c
index 092c4b5..d324347 100644
--- a/target-microblaze/op_helper.c
+++ b/target-microblaze/op_helper.c
@@ -150,9 +150,7 @@
 
 uint32_t helper_carry(uint32_t a, uint32_t b, uint32_t cf)
 {
-    uint32_t ncf;
-    ncf = compute_carry(a, b, cf);
-    return ncf;
+    return compute_carry(a, b, cf);
 }
 
 static inline int div_prepare(CPUMBState *env, uint32_t a, uint32_t b)
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index 3de8944..a9c5010 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -1588,10 +1588,6 @@
 {
     int i;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(dc->pc);
-    }
-
     dc->ir = ir;
     LOG_DIS("%8.8x\t", dc->ir);
 
@@ -1630,30 +1626,12 @@
     }
 }
 
-static void check_breakpoint(CPUMBState *env, DisasContext *dc)
-{
-    CPUState *cs = CPU(mb_env_get_cpu(env));
-    CPUBreakpoint *bp;
-
-    if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == dc->pc) {
-                t_gen_raise_exception(dc, EXCP_DEBUG);
-                dc->is_jmp = DISAS_UPDATE;
-             }
-        }
-    }
-}
-
 /* generate intermediate code for basic block 'tb'.  */
-static inline void
-gen_intermediate_code_internal(MicroBlazeCPU *cpu, TranslationBlock *tb,
-                               bool search_pc)
+void gen_intermediate_code(CPUMBState *env, struct TranslationBlock *tb)
 {
+    MicroBlazeCPU *cpu = mb_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUMBState *env = &cpu->env;
     uint32_t pc_start;
-    int j, lj;
     struct DisasContext ctx;
     struct DisasContext *dc = &ctx;
     uint32_t next_page_start, org_flags;
@@ -1690,47 +1668,46 @@
     }
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     do
     {
+        tcg_gen_insn_start(dc->pc);
+        num_insns++;
+
 #if SIM_COMPAT
         if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
             tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
             gen_helper_debug();
         }
 #endif
-        check_breakpoint(env, dc);
 
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-                        tcg_ctx.gen_opc_icount[lj] = num_insns;
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            t_gen_raise_exception(dc, EXCP_DEBUG);
+            dc->is_jmp = DISAS_UPDATE;
+            break;
         }
 
         /* Pretty disas.  */
         LOG_DIS("%8.8x:\t", dc->pc);
 
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
+        }
 
         dc->clear_imm = 1;
         decode(dc, cpu_ldl_code(env, dc->pc));
         if (dc->clear_imm)
             dc->tb_flags &= ~IMM_FLAG;
         dc->pc += 4;
-        num_insns++;
 
         if (dc->delayed_branch) {
             dc->delayed_branch--;
@@ -1821,15 +1798,8 @@
     }
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    } else {
-        tb->size = dc->pc - pc_start;
-                tb->icount = num_insns;
-    }
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
 #if !SIM_COMPAT
@@ -1846,16 +1816,6 @@
     assert(!dc->abort_at_next_insn);
 }
 
-void gen_intermediate_code (CPUMBState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(mb_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc (CPUMBState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(mb_env_get_cpu(env), tb, true);
-}
-
 void mb_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
                        int flags)
 {
@@ -1936,7 +1896,8 @@
     }
 }
 
-void restore_state_to_opc(CPUMBState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUMBState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->sregs[SR_PC] = tcg_ctx.gen_opc_pc[pc_pos];
+    env->sregs[SR_PC] = data[0];
 }
diff --git a/target-mips/cpu.c b/target-mips/cpu.c
index 4027d0f..7fe1f04 100644
--- a/target-mips/cpu.c
+++ b/target-mips/cpu.c
@@ -153,6 +153,13 @@
 
     cc->gdb_num_core_regs = 73;
     cc->gdb_stop_before_watchpoint = true;
+
+    /*
+     * Reason: mips_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo mips_cpu_type_info = {
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index ec5f991..f32a0fd 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -130,6 +130,7 @@
 };
 
 #define NB_MMU_MODES 3
+#define TARGET_INSN_START_EXTRA_WORDS 2
 
 typedef struct CPUMIPSMVPContext CPUMIPSMVPContext;
 struct CPUMIPSMVPContext {
@@ -619,7 +620,6 @@
 void mips_cpu_list (FILE *f, fprintf_function cpu_fprintf);
 
 #define cpu_exec cpu_mips_exec
-#define cpu_gen_code cpu_mips_gen_code
 #define cpu_signal_handler cpu_mips_signal_handler
 #define cpu_list mips_cpu_list
 
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 87d4959..897839c 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -1359,9 +1359,6 @@
 static TCGv_i64 fpu_f64[32];
 static TCGv_i64 msa_wr_d[64];
 
-static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
-static target_ulong gen_opc_btarget[OPC_BUF_SIZE];
-
 #include "exec/gen-icount.h"
 
 #define gen_helper_0e0i(name, arg) do {                           \
@@ -18904,10 +18901,6 @@
         gen_set_label(l1);
     }
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(ctx->pc);
-    }
-
     op = MASK_OP_MAJOR(ctx->opcode);
     rs = (ctx->opcode >> 21) & 0x1f;
     rt = (ctx->opcode >> 16) & 0x1f;
@@ -19539,25 +19532,18 @@
     }
 }
 
-static inline void
-gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
-                               bool search_pc)
+void gen_intermediate_code(CPUMIPSState *env, struct TranslationBlock *tb)
 {
+    MIPSCPU *cpu = mips_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUMIPSState *env = &cpu->env;
     DisasContext ctx;
     target_ulong pc_start;
     target_ulong next_page_start;
-    CPUBreakpoint *bp;
-    int j, lj = -1;
     int num_insns;
     int max_insns;
     int insn_bytes;
     int is_slot;
 
-    if (search_pc)
-        qemu_log("search pc %d\n", search_pc);
-
     pc_start = tb->pc;
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
     ctx.pc = pc_start;
@@ -19567,6 +19553,7 @@
     ctx.CP0_Config1 = env->CP0_Config1;
     ctx.tb = tb;
     ctx.bstate = BS_NONE;
+    ctx.btarget = 0;
     ctx.kscrexist = (env->CP0_Config4 >> CP0C4_KScrExist) & 0xff;
     ctx.rxi = (env->CP0_Config3 >> CP0C3_RXI) & 1;
     ctx.ie = (env->CP0_Config4 >> CP0C4_IE) & 3;
@@ -19590,40 +19577,32 @@
                                  MO_UNALN : MO_ALIGN;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
+
     LOG_DISAS("\ntb %p idx %d hflags %04x\n", tb, ctx.mem_idx, ctx.hflags);
     gen_tb_start(tb);
     while (ctx.bstate == BS_NONE) {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == ctx.pc) {
-                    save_cpu_state(&ctx, 1);
-                    ctx.bstate = BS_BRANCH;
-                    gen_helper_raise_exception_debug(cpu_env);
-                    /* Include the breakpoint location or the tb won't
-                     * be flushed when it must be.  */
-                    ctx.pc += 4;
-                    goto done_generating;
-                }
-            }
+        tcg_gen_insn_start(ctx.pc, ctx.hflags & MIPS_HFLAG_BMASK, ctx.btarget);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
+            save_cpu_state(&ctx, 1);
+            ctx.bstate = BS_BRANCH;
+            gen_helper_raise_exception_debug(cpu_env);
+            /* Include the breakpoint location or the tb won't
+             * be flushed when it must be.  */
+            ctx.pc += 4;
+            goto done_generating;
         }
 
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = ctx.pc;
-            gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
-            gen_opc_btarget[lj] = ctx.btarget;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
+        }
 
         is_slot = ctx.hflags & MIPS_HFLAG_BMASK;
         if (!(ctx.hflags & MIPS_HFLAG_M16)) {
@@ -19660,8 +19639,6 @@
         }
         ctx.pc += insn_bytes;
 
-        num_insns++;
-
         /* Execute a branch and its delay slot as a single instruction.
            This is what GDB expects and is consistent with what the
            hardware does (e.g. if a delay slot instruction faults, the
@@ -19710,15 +19687,9 @@
 done_generating:
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    } else {
-        tb->size = ctx.pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = ctx.pc - pc_start;
+    tb->icount = num_insns;
+
 #ifdef DEBUG_DISAS
     LOG_DISAS("\n");
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -19729,16 +19700,6 @@
 #endif
 }
 
-void gen_intermediate_code (CPUMIPSState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(mips_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc (CPUMIPSState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(mips_env_get_cpu(env), tb, true);
-}
-
 static void fpu_dump_state(CPUMIPSState *env, FILE *f, fprintf_function fpu_fprintf,
                            int flags)
 {
@@ -20062,18 +20023,19 @@
     }
 }
 
-void restore_state_to_opc(CPUMIPSState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUMIPSState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->active_tc.PC = tcg_ctx.gen_opc_pc[pc_pos];
+    env->active_tc.PC = data[0];
     env->hflags &= ~MIPS_HFLAG_BMASK;
-    env->hflags |= gen_opc_hflags[pc_pos];
+    env->hflags |= data[1];
     switch (env->hflags & MIPS_HFLAG_BMASK_BASE) {
     case MIPS_HFLAG_BR:
         break;
     case MIPS_HFLAG_BC:
     case MIPS_HFLAG_BL:
     case MIPS_HFLAG_B:
-        env->btarget = gen_opc_btarget[pc_pos];
+        env->btarget = data[2];
         break;
     }
 }
diff --git a/target-moxie/cpu.c b/target-moxie/cpu.c
index 6b035aa..3af3779 100644
--- a/target-moxie/cpu.c
+++ b/target-moxie/cpu.c
@@ -114,6 +114,13 @@
     cc->get_phys_page_debug = moxie_cpu_get_phys_page_debug;
     cc->vmsd = &vmstate_moxie_cpu;
 #endif
+
+    /*
+     * Reason: moxie_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void moxielite_initfn(Object *obj)
diff --git a/target-moxie/cpu.h b/target-moxie/cpu.h
index 2bac15b..a612744 100644
--- a/target-moxie/cpu.h
+++ b/target-moxie/cpu.h
@@ -122,7 +122,6 @@
 #define cpu_init(cpu_model) CPU(cpu_moxie_init(cpu_model))
 
 #define cpu_exec cpu_moxie_exec
-#define cpu_gen_code cpu_moxie_gen_code
 #define cpu_signal_handler cpu_moxie_signal_handler
 
 static inline int cpu_mmu_index(CPUMoxieState *env, bool ifetch)
diff --git a/target-moxie/translate.c b/target-moxie/translate.c
index cc77366..f84841e 100644
--- a/target-moxie/translate.c
+++ b/target-moxie/translate.c
@@ -153,10 +153,6 @@
     /* Set the default instruction length.  */
     int length = 2;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(ctx->pc);
-    }
-
     /* Examine the 16-bit opcode.  */
     opcode = ctx->opcode;
 
@@ -819,17 +815,13 @@
 }
 
 /* generate intermediate code for basic block 'tb'.  */
-static inline void
-gen_intermediate_code_internal(MoxieCPU *cpu, TranslationBlock *tb,
-                               bool search_pc)
+void gen_intermediate_code(CPUMoxieState *env, struct TranslationBlock *tb)
 {
+    MoxieCPU *cpu = moxie_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
     DisasContext ctx;
     target_ulong pc_start;
-    CPUBreakpoint *bp;
-    int j, lj = -1;
-    CPUMoxieState *env = &cpu->env;
-    int num_insns;
+    int num_insns, max_insns;
 
     pc_start = tb->pc;
     ctx.pc = pc_start;
@@ -839,40 +831,35 @@
     ctx.singlestep_enabled = 0;
     ctx.bstate = BS_NONE;
     num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0) {
+        max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     do {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (ctx.pc == bp->pc) {
-                    tcg_gen_movi_i32(cpu_pc, ctx.pc);
-                    gen_helper_debug(cpu_env);
-                    ctx.bstate = BS_EXCP;
-                    goto done_generating;
-                }
-            }
-        }
-
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = ctx.pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
-        ctx.opcode = cpu_lduw_code(env, ctx.pc);
-        ctx.pc += decode_opc(cpu, &ctx);
+        tcg_gen_insn_start(ctx.pc);
         num_insns++;
 
+        if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
+            tcg_gen_movi_i32(cpu_pc, ctx.pc);
+            gen_helper_debug(cpu_env);
+            ctx.bstate = BS_EXCP;
+            goto done_generating;
+        }
+
+        ctx.opcode = cpu_lduw_code(env, ctx.pc);
+        ctx.pc += decode_opc(cpu, &ctx);
+
+        if (num_insns >= max_insns) {
+            break;
+        }
         if (cs->singlestep_enabled) {
             break;
         }
-
         if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0) {
             break;
         }
@@ -898,29 +885,12 @@
  done_generating:
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = ctx.pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = ctx.pc - pc_start;
+    tb->icount = num_insns;
 }
 
-void gen_intermediate_code(CPUMoxieState *env, struct TranslationBlock *tb)
+void restore_state_to_opc(CPUMoxieState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    gen_intermediate_code_internal(moxie_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUMoxieState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(moxie_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUMoxieState *env, TranslationBlock *tb, int pc_pos)
-{
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
diff --git a/target-openrisc/cpu.c b/target-openrisc/cpu.c
index d97f3c0..cc5e2d1 100644
--- a/target-openrisc/cpu.c
+++ b/target-openrisc/cpu.c
@@ -177,6 +177,13 @@
     dc->vmsd = &vmstate_openrisc_cpu;
 #endif
     cc->gdb_num_core_regs = 32 + 3;
+
+    /*
+     * Reason: openrisc_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void cpu_register(const OpenRISCCPUInfo *info)
diff --git a/target-openrisc/cpu.h b/target-openrisc/cpu.h
index 1ff1c9e..eb71607 100644
--- a/target-openrisc/cpu.h
+++ b/target-openrisc/cpu.h
@@ -360,7 +360,6 @@
 
 #define cpu_list cpu_openrisc_list
 #define cpu_exec cpu_openrisc_exec
-#define cpu_gen_code cpu_openrisc_gen_code
 #define cpu_signal_handler cpu_openrisc_signal_handler
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 473556e..b66fde1 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -1618,30 +1618,12 @@
     }
 }
 
-static void check_breakpoint(OpenRISCCPU *cpu, DisasContext *dc)
+void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb)
 {
-    CPUState *cs = CPU(cpu);
-    CPUBreakpoint *bp;
-
-    if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == dc->pc) {
-                tcg_gen_movi_tl(cpu_pc, dc->pc);
-                gen_exception(dc, EXCP_DEBUG);
-                dc->is_jmp = DISAS_UPDATE;
-            }
-        }
-    }
-}
-
-static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  int search_pc)
-{
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
     struct DisasContext ctx, *dc = &ctx;
     uint32_t pc_start;
-    int j, k;
     uint32_t next_page_start;
     int num_insns;
     int max_insns;
@@ -1663,36 +1645,30 @@
     }
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    k = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
 
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
 
     do {
-        check_breakpoint(cpu, dc);
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (k < j) {
-                k++;
-                while (k < j) {
-                    tcg_ctx.gen_opc_instr_start[k++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[k] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[k] = 1;
-            tcg_ctx.gen_opc_icount[k] = num_insns;
+        tcg_gen_insn_start(dc->pc);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            tcg_gen_movi_tl(cpu_pc, dc->pc);
+            gen_exception(dc, EXCP_DEBUG);
+            dc->is_jmp = DISAS_UPDATE;
+            break;
         }
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(dc->pc);
-        }
-
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
         dc->ppc = dc->pc - 4;
@@ -1701,7 +1677,6 @@
         tcg_gen_movi_tl(cpu_npc, dc->npc);
         disas_openrisc_insn(dc, cpu);
         dc->pc = dc->npc;
-        num_insns++;
         /* delay slot */
         if (dc->delayed_branch) {
             dc->delayed_branch--;
@@ -1756,16 +1731,8 @@
 
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        k++;
-        while (k <= j) {
-            tcg_ctx.gen_opc_instr_start[k++] = 0;
-        }
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -1777,17 +1744,6 @@
 #endif
 }
 
-void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(openrisc_env_get_cpu(env), tb, 0);
-}
-
-void gen_intermediate_code_pc(CPUOpenRISCState *env,
-                              struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(openrisc_env_get_cpu(env), tb, 1);
-}
-
 void openrisc_cpu_dump_state(CPUState *cs, FILE *f,
                              fprintf_function cpu_fprintf,
                              int flags)
@@ -1804,7 +1760,7 @@
 }
 
 void restore_state_to_opc(CPUOpenRISCState *env, TranslationBlock *tb,
-                          int pc_pos)
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index c6dbb38..98ce5a7 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1241,7 +1241,6 @@
 #define cpu_init(cpu_model) CPU(cpu_ppc_init(cpu_model))
 
 #define cpu_exec cpu_ppc_exec
-#define cpu_gen_code cpu_ppc_gen_code
 #define cpu_signal_handler cpu_ppc_signal_handler
 #define cpu_list ppc_cpu_list
 
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index e641680..7276299 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -1782,8 +1782,7 @@
 
     ns++;
 
-    retval = atoi(ns);
-    return retval;
+    return atoi(ns);
 }
 
 bool kvmppc_get_host_serial(char **value)
@@ -2193,6 +2192,7 @@
 
 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
 {
+    DeviceClass *dc = DEVICE_CLASS(oc);
     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
     uint32_t vmx = kvmppc_get_vmx();
     uint32_t dfp = kvmppc_get_dfp();
@@ -2219,6 +2219,9 @@
     if (icache_size != -1) {
         pcc->l1_icache_size = icache_size;
     }
+
+    /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 bool kvmppc_has_cap_epr(void)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index c0eed13..c2bc1a7 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -11409,17 +11409,13 @@
 }
 
 /*****************************************************************************/
-static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUPPCState *env = &cpu->env;
     DisasContext ctx, *ctxp = &ctx;
     opc_handler_t **table, *handler;
     target_ulong pc_start;
-    CPUBreakpoint *bp;
-    int j, lj = -1;
     int num_insns;
     int max_insns;
 
@@ -11476,36 +11472,29 @@
 #endif
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     tcg_clear_temp_count();
     /* Set env in case of segfault during code fetch */
     while (ctx.exception == POWERPC_EXCP_NONE && !tcg_op_buf_full()) {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == ctx.nip) {
-                    gen_debug_exception(ctxp);
-                    break;
-                }
-            }
+        tcg_gen_insn_start(ctx.nip);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, ctx.nip, BP_ANY))) {
+            gen_debug_exception(ctxp);
+            break;
         }
-        if (unlikely(search_pc)) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = ctx.nip;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
+
         LOG_DISAS("----------------\n");
         LOG_DISAS("nip=" TARGET_FMT_lx " super=%d ir=%d\n",
                   ctx.nip, ctx.mem_idx, (int)msr_ir);
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO))
             gen_io_start();
         if (unlikely(need_byteswap(&ctx))) {
             ctx.opcode = bswap32(cpu_ldl_code(env, ctx.nip));
@@ -11515,12 +11504,8 @@
         LOG_DISAS("translate opcode %08x (%02x %02x %02x) (%s)\n",
                     ctx.opcode, opc1(ctx.opcode), opc2(ctx.opcode),
                     opc3(ctx.opcode), ctx.le_mode ? "little" : "big");
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(ctx.nip);
-        }
         ctx.nip += 4;
         table = env->opcodes;
-        num_insns++;
         handler = table[opc1(ctx.opcode)];
         if (is_indirect_opcode(handler)) {
             table = ind_table(handler);
@@ -11599,15 +11584,9 @@
     }
     gen_tb_end(tb, num_insns);
 
-    if (unlikely(search_pc)) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    } else {
-        tb->size = ctx.nip - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = ctx.nip - pc_start;
+    tb->icount = num_insns;
+
 #if defined(DEBUG_DISAS)
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         int flags;
@@ -11620,17 +11599,8 @@
 #endif
 }
 
-void gen_intermediate_code (CPUPPCState *env, struct TranslationBlock *tb)
+void restore_state_to_opc(CPUPPCState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    gen_intermediate_code_internal(ppc_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc (CPUPPCState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(ppc_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUPPCState *env, TranslationBlock *tb, int pc_pos)
-{
-    env->nip = tcg_ctx.gen_opc_pc[pc_pos];
+    env->nip = data[0];
 }
diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c
index c3e21b4..ccfaa8a 100644
--- a/target-s390x/cpu.c
+++ b/target-s390x/cpu.c
@@ -353,6 +353,13 @@
 #endif
     cc->gdb_num_core_regs = S390_NUM_CORE_REGS;
     cc->gdb_core_xml_file = "s390x-core64.xml";
+
+    /*
+     * Reason: s390_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo s390_cpu_type_info = {
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 5acd54c..e4de863 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -42,6 +42,7 @@
 #include "fpu/softfloat.h"
 
 #define NB_MMU_MODES 3
+#define TARGET_INSN_START_EXTRA_WORDS 1
 
 #define MMU_MODE0_SUFFIX _primary
 #define MMU_MODE1_SUFFIX _secondary
@@ -568,7 +569,7 @@
 typedef struct SubchDev SubchDev;
 
 #ifndef CONFIG_USER_ONLY
-extern void io_subsystem_reset(void);
+extern void subsystem_reset(void);
 SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
                          uint16_t schid);
 bool css_subch_visible(SubchDev *sch);
@@ -597,7 +598,6 @@
 
 #define cpu_init(model) CPU(cpu_s390x_init(model))
 #define cpu_exec cpu_s390x_exec
-#define cpu_gen_code cpu_s390x_gen_code
 #define cpu_signal_handler cpu_s390x_signal_handler
 
 void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf);
diff --git a/target-s390x/gdbstub.c b/target-s390x/gdbstub.c
index 0c39a3c..a05d1cd 100644
--- a/target-s390x/gdbstub.c
+++ b/target-s390x/gdbstub.c
@@ -205,6 +205,82 @@
         return 0;
     }
 }
+
+/* the values represent the positions in s390-virt.xml */
+#define S390_VIRT_CKC_REGNUM    0
+#define S390_VIRT_CPUTM_REGNUM  1
+#define S390_VIRT_BEA_REGNUM    2
+#define S390_VIRT_PREFIX_REGNUM 3
+#define S390_VIRT_PP_REGNUM     4
+#define S390_VIRT_PFT_REGNUM    5
+#define S390_VIRT_PFS_REGNUM    6
+#define S390_VIRT_PFC_REGNUM    7
+/* total number of registers in s390-virt.xml */
+#define S390_NUM_VIRT_REGS 8
+
+static int cpu_read_virt_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
+{
+    switch (n) {
+    case S390_VIRT_CKC_REGNUM:
+        return gdb_get_regl(mem_buf, env->ckc);
+    case S390_VIRT_CPUTM_REGNUM:
+        return gdb_get_regl(mem_buf, env->cputm);
+    case S390_VIRT_BEA_REGNUM:
+        return gdb_get_regl(mem_buf, env->gbea);
+    case S390_VIRT_PREFIX_REGNUM:
+        return gdb_get_regl(mem_buf, env->psa);
+    case S390_VIRT_PP_REGNUM:
+        return gdb_get_regl(mem_buf, env->pp);
+    case S390_VIRT_PFT_REGNUM:
+        return gdb_get_regl(mem_buf, env->pfault_token);
+    case S390_VIRT_PFS_REGNUM:
+        return gdb_get_regl(mem_buf, env->pfault_select);
+    case S390_VIRT_PFC_REGNUM:
+        return gdb_get_regl(mem_buf, env->pfault_compare);
+    default:
+        return 0;
+    }
+}
+
+static int cpu_write_virt_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
+{
+    switch (n) {
+    case S390_VIRT_CKC_REGNUM:
+        env->ckc = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_CPUTM_REGNUM:
+        env->cputm = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_BEA_REGNUM:
+        env->gbea = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_PREFIX_REGNUM:
+        env->psa = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_PP_REGNUM:
+        env->pp = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_PFT_REGNUM:
+        env->pfault_token = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_PFS_REGNUM:
+        env->pfault_select = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_PFC_REGNUM:
+        env->pfault_compare = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    default:
+        return 0;
+    }
+}
 #endif
 
 void s390_cpu_gdb_init(CPUState *cs)
@@ -225,5 +301,11 @@
     gdb_register_coprocessor(cs, cpu_read_c_reg,
                              cpu_write_c_reg,
                              S390_NUM_C_REGS, "s390-cr.xml", 0);
+
+    if (kvm_enabled()) {
+        gdb_register_coprocessor(cs, cpu_read_virt_reg,
+                                 cpu_write_virt_reg,
+                                 S390_NUM_VIRT_REGS, "s390-virt.xml", 0);
+    }
 #endif
 }
diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c
index 8eac0e1..3a19e32 100644
--- a/target-s390x/misc_helper.c
+++ b/target-s390x/misc_helper.c
@@ -128,7 +128,7 @@
         run_on_cpu(t, s390_do_cpu_full_reset, t);
     }
     cmma_reset(cpu);
-    io_subsystem_reset();
+    subsystem_reset();
     scc->load_normal(CPU(cpu));
     cpu_synchronize_all_post_reset();
     resume_all_vcpus();
@@ -146,7 +146,7 @@
         run_on_cpu(t, s390_do_cpu_reset, t);
     }
     cmma_reset(cpu);
-    io_subsystem_reset();
+    subsystem_reset();
     scc->initial_cpu_reset(CPU(cpu));
     scc->load_normal(CPU(cpu));
     cpu_synchronize_all_post_reset();
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 2bca33a..05d51fe 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -124,7 +124,7 @@
     for (i = 0; i < 32; i++) {
         cpu_fprintf(f, "V%02d=%016" PRIx64 "%016" PRIx64, i,
                     env->vregs[i][0].ll, env->vregs[i][1].ll);
-        cpu_fprintf(f, (i % 2) ? " " : "\n");
+        cpu_fprintf(f, (i % 2) ? "\n" : " ");
     }
 
 #ifndef CONFIG_USER_ONLY
@@ -161,8 +161,6 @@
 static TCGv_i64 regs[16];
 static TCGv_i64 fregs[16];
 
-static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
-
 void s390x_translate_init(void)
 {
     int i;
@@ -5319,18 +5317,14 @@
     return ret;
 }
 
-static inline void gen_intermediate_code_internal(S390CPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb)
 {
+    S390CPU *cpu = s390_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUS390XState *env = &cpu->env;
     DisasContext dc;
     target_ulong pc_start;
     uint64_t next_page_start;
-    int j, lj = -1;
     int num_insns, max_insns;
-    CPUBreakpoint *bp;
     ExitStatus status;
     bool do_debug;
 
@@ -5353,41 +5347,27 @@
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
 
     do {
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc.pc;
-            gen_opc_cc_op[lj] = dc.cc_op;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        tcg_gen_insn_start(dc.pc, dc.cc_op);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, dc.pc, BP_ANY))) {
+            status = EXIT_PC_STALE;
+            do_debug = true;
+            break;
         }
-        if (++num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
+
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(dc.pc);
-        }
-
         status = NO_EXIT;
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == dc.pc) {
-                    status = EXIT_PC_STALE;
-                    do_debug = true;
-                    break;
-                }
-            }
-        }
         if (status == NO_EXIT) {
             status = translate_one(env, &dc);
         }
@@ -5432,16 +5412,8 @@
 
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = dc.pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = dc.pc - pc_start;
+    tb->icount = num_insns;
 
 #if defined(S390X_DEBUG_DISAS)
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -5452,21 +5424,11 @@
 #endif
 }
 
-void gen_intermediate_code (CPUS390XState *env, struct TranslationBlock *tb)
+void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    gen_intermediate_code_internal(s390_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc (CPUS390XState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(s390_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb, int pc_pos)
-{
-    int cc_op;
-    env->psw.addr = tcg_ctx.gen_opc_pc[pc_pos];
-    cc_op = gen_opc_cc_op[pc_pos];
+    int cc_op = data[1];
+    env->psw.addr = data[0];
     if ((cc_op != CC_OP_DYNAMIC) && (cc_op != CC_OP_STATIC)) {
         env->cc_op = cc_op;
     }
diff --git a/target-sh4/cpu.c b/target-sh4/cpu.c
index 5c65ab4..64e4467 100644
--- a/target-sh4/cpu.c
+++ b/target-sh4/cpu.c
@@ -290,6 +290,13 @@
 #endif
     dc->vmsd = &vmstate_sh_cpu;
     cc->gdb_num_core_regs = 59;
+
+    /*
+     * Reason: superh_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo superh_cpu_type_info = {
diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h
index 6fb6321..5b022c5 100644
--- a/target-sh4/cpu.h
+++ b/target-sh4/cpu.h
@@ -120,6 +120,7 @@
 #define ITLB_SIZE 4
 
 #define NB_MMU_MODES 2
+#define TARGET_INSN_START_EXTRA_WORDS 1
 
 enum sh_features {
     SH_FEATURE_SH4A = 1,
@@ -225,7 +226,6 @@
 #define cpu_init(cpu_model) CPU(cpu_sh4_init(cpu_model))
 
 #define cpu_exec cpu_sh4_exec
-#define cpu_gen_code cpu_sh4_gen_code
 #define cpu_signal_handler cpu_sh4_signal_handler
 #define cpu_list sh4_cpu_list
 
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 724c0e7..f764bc2 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -70,8 +70,6 @@
 /* internal register indexes */
 static TCGv cpu_flags, cpu_delayed_pc;
 
-static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
-
 #include "exec/gen-icount.h"
 
 void sh4_translate_init(void)
@@ -1790,10 +1788,6 @@
 {
     uint32_t old_flags = ctx->flags;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(ctx->pc);
-    }
-
     _decode_opc(ctx);
 
     if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
@@ -1820,16 +1814,12 @@
         gen_store_flags(ctx->flags);
 }
 
-static inline void
-gen_intermediate_code_internal(SuperHCPU *cpu, TranslationBlock *tb,
-                               bool search_pc)
+void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
 {
+    SuperHCPU *cpu = sh_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUSH4State *env = &cpu->env;
     DisasContext ctx;
     target_ulong pc_start;
-    CPUBreakpoint *bp;
-    int i, ii;
     int num_insns;
     int max_insns;
 
@@ -1846,45 +1836,34 @@
     ctx.features = env->features;
     ctx.has_movcal = (ctx.flags & TB_FLAG_PENDING_MOVCA);
 
-    ii = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
+
     gen_tb_start(tb);
     while (ctx.bstate == BS_NONE && !tcg_op_buf_full()) {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (ctx.pc == bp->pc) {
-		    /* We have hit a breakpoint - make sure PC is up-to-date */
-		    tcg_gen_movi_i32(cpu_pc, ctx.pc);
-                    gen_helper_debug(cpu_env);
-                    ctx.bstate = BS_BRANCH;
-		    break;
-		}
-	    }
-	}
-        if (search_pc) {
-            i = tcg_op_buf_count();
-            if (ii < i) {
-                ii++;
-                while (ii < i)
-                    tcg_ctx.gen_opc_instr_start[ii++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[ii] = ctx.pc;
-            gen_opc_hflags[ii] = ctx.flags;
-            tcg_ctx.gen_opc_instr_start[ii] = 1;
-            tcg_ctx.gen_opc_icount[ii] = num_insns;
+        tcg_gen_insn_start(ctx.pc, ctx.flags);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
+            /* We have hit a breakpoint - make sure PC is up-to-date */
+            tcg_gen_movi_i32(cpu_pc, ctx.pc);
+            gen_helper_debug(cpu_env);
+            ctx.bstate = BS_BRANCH;
+            break;
         }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
-#if 0
-	fprintf(stderr, "Loading opcode at address 0x%08x\n", ctx.pc);
-	fflush(stderr);
-#endif
+        }
+
         ctx.opcode = cpu_lduw_code(env, ctx.pc);
 	decode_opc(&ctx);
-        num_insns++;
 	ctx.pc += 2;
 	if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0)
 	    break;
@@ -1924,15 +1903,8 @@
 
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        i = tcg_op_buf_count();
-        ii++;
-        while (ii <= i)
-            tcg_ctx.gen_opc_instr_start[ii++] = 0;
-    } else {
-        tb->size = ctx.pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = ctx.pc - pc_start;
+    tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -1943,18 +1915,9 @@
 #endif
 }
 
-void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
+void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    gen_intermediate_code_internal(sh_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUSH4State * env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(sh_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb, int pc_pos)
-{
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
-    env->flags = gen_opc_hflags[pc_pos];
+    env->pc = data[0];
+    env->flags = data[1];
 }
diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c
index 9528e3a..82bb72a 100644
--- a/target-sparc/cpu.c
+++ b/target-sparc/cpu.c
@@ -854,6 +854,13 @@
 #else
     cc->gdb_num_core_regs = 72;
 #endif
+
+    /*
+     * Reason: sparc_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo sparc_cpu_type_info = {
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 053edd5..9fa770b 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -230,6 +230,7 @@
     uint32_t tt;
 } trap_state;
 #endif
+#define TARGET_INSN_START_EXTRA_WORDS 1
 
 typedef struct sparc_def_t {
     const char *name;
@@ -592,7 +593,6 @@
 #endif
 
 #define cpu_exec cpu_sparc_exec
-#define cpu_gen_code cpu_sparc_gen_code
 #define cpu_signal_handler cpu_sparc_signal_handler
 #define cpu_list sparc_cpu_list
 
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 4690b46..b59742a 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -64,9 +64,6 @@
 /* Floating point registers */
 static TCGv_i64 cpu_fpr[TARGET_DPREGS];
 
-static target_ulong gen_opc_npc[OPC_BUF_SIZE];
-static target_ulong gen_opc_jump_pc[2];
-
 #include "exec/gen-icount.h"
 
 typedef struct DisasContext {
@@ -955,17 +952,44 @@
     gen_goto_tb(dc, 1, pc2, pc2 + 4);
 }
 
-static inline void gen_branch_a(DisasContext *dc, target_ulong pc1,
-                                target_ulong pc2, TCGv r_cond)
+static void gen_branch_a(DisasContext *dc, target_ulong pc1)
 {
     TCGLabel *l1 = gen_new_label();
+    target_ulong npc = dc->npc;
 
-    tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond, 0, l1);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cond, 0, l1);
 
-    gen_goto_tb(dc, 0, pc2, pc1);
+    gen_goto_tb(dc, 0, npc, pc1);
 
     gen_set_label(l1);
-    gen_goto_tb(dc, 1, pc2 + 4, pc2 + 8);
+    gen_goto_tb(dc, 1, npc + 4, npc + 8);
+
+    dc->is_br = 1;
+}
+
+static void gen_branch_n(DisasContext *dc, target_ulong pc1)
+{
+    target_ulong npc = dc->npc;
+
+    if (likely(npc != DYNAMIC_PC)) {
+        dc->pc = npc;
+        dc->jump_pc[0] = pc1;
+        dc->jump_pc[1] = npc + 4;
+        dc->npc = JUMP_PC;
+    } else {
+        TCGv t, z;
+
+        tcg_gen_mov_tl(cpu_pc, cpu_npc);
+
+        tcg_gen_addi_tl(cpu_npc, cpu_npc, 4);
+        t = tcg_const_tl(pc1);
+        z = tcg_const_tl(0);
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_npc, cpu_cond, z, t, cpu_npc);
+        tcg_temp_free(t);
+        tcg_temp_free(z);
+
+        dc->pc = DYNAMIC_PC;
+    }
 }
 
 static inline void gen_generic_branch(DisasContext *dc)
@@ -1398,18 +1422,9 @@
         flush_cond(dc);
         gen_cond(cpu_cond, cc, cond, dc);
         if (a) {
-            gen_branch_a(dc, target, dc->npc, cpu_cond);
-            dc->is_br = 1;
+            gen_branch_a(dc, target);
         } else {
-            dc->pc = dc->npc;
-            dc->jump_pc[0] = target;
-            if (unlikely(dc->npc == DYNAMIC_PC)) {
-                dc->jump_pc[1] = DYNAMIC_PC;
-                tcg_gen_addi_tl(cpu_pc, cpu_npc, 4);
-            } else {
-                dc->jump_pc[1] = dc->npc + 4;
-                dc->npc = JUMP_PC;
-            }
+            gen_branch_n(dc, target);
         }
     }
 }
@@ -1447,18 +1462,9 @@
         flush_cond(dc);
         gen_fcond(cpu_cond, cc, cond);
         if (a) {
-            gen_branch_a(dc, target, dc->npc, cpu_cond);
-            dc->is_br = 1;
+            gen_branch_a(dc, target);
         } else {
-            dc->pc = dc->npc;
-            dc->jump_pc[0] = target;
-            if (unlikely(dc->npc == DYNAMIC_PC)) {
-                dc->jump_pc[1] = DYNAMIC_PC;
-                tcg_gen_addi_tl(cpu_pc, cpu_npc, 4);
-            } else {
-                dc->jump_pc[1] = dc->npc + 4;
-                dc->npc = JUMP_PC;
-            }
+            gen_branch_n(dc, target);
         }
     }
 }
@@ -1476,18 +1482,9 @@
     flush_cond(dc);
     gen_cond_reg(cpu_cond, cond, r_reg);
     if (a) {
-        gen_branch_a(dc, target, dc->npc, cpu_cond);
-        dc->is_br = 1;
+        gen_branch_a(dc, target);
     } else {
-        dc->pc = dc->npc;
-        dc->jump_pc[0] = target;
-        if (unlikely(dc->npc == DYNAMIC_PC)) {
-            dc->jump_pc[1] = DYNAMIC_PC;
-            tcg_gen_addi_tl(cpu_pc, cpu_npc, 4);
-        } else {
-            dc->jump_pc[1] = dc->npc + 4;
-            dc->npc = JUMP_PC;
-        }
+        gen_branch_n(dc, target);
     }
 }
 
@@ -2482,10 +2479,6 @@
     TCGv_i64 cpu_src1_64, cpu_src2_64, cpu_dst_64;
     target_long simm;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(dc->pc);
-    }
-
     opc = GET_FIELD(insn, 0, 1);
     rd = GET_FIELD(insn, 2, 6);
 
@@ -5213,16 +5206,12 @@
     }
 }
 
-static inline void gen_intermediate_code_internal(SPARCCPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool spc)
+void gen_intermediate_code(CPUSPARCState * env, TranslationBlock * tb)
 {
+    SPARCCPU *cpu = sparc_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUSPARCState *env = &cpu->env;
     target_ulong pc_start, last_pc;
     DisasContext dc1, *dc = &dc1;
-    CPUBreakpoint *bp;
-    int j, lj = -1;
     int num_insns;
     int max_insns;
     unsigned int insn;
@@ -5242,42 +5231,41 @@
 
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
+
     gen_tb_start(tb);
     do {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == dc->pc) {
-                    if (dc->pc != pc_start)
-                        save_state(dc);
-                    gen_helper_debug(cpu_env);
-                    tcg_gen_exit_tb(0);
-                    dc->is_br = 1;
-                    goto exit_gen_loop;
-                }
-            }
+        if (dc->npc & JUMP_PC) {
+            assert(dc->jump_pc[1] == dc->pc + 4);
+            tcg_gen_insn_start(dc->pc, dc->jump_pc[0] | JUMP_PC);
+        } else {
+            tcg_gen_insn_start(dc->pc, dc->npc);
         }
-        if (spc) {
-            qemu_log("Search PC...\n");
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                tcg_ctx.gen_opc_pc[lj] = dc->pc;
-                gen_opc_npc[lj] = dc->npc;
-                tcg_ctx.gen_opc_instr_start[lj] = 1;
-                tcg_ctx.gen_opc_icount[lj] = num_insns;
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            if (dc->pc != pc_start) {
+                save_state(dc);
             }
+            gen_helper_debug(cpu_env);
+            tcg_gen_exit_tb(0);
+            dc->is_br = 1;
+            goto exit_gen_loop;
         }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
+        }
+
         last_pc = dc->pc;
         insn = cpu_ldl_code(env, dc->pc);
 
         disas_sparc_insn(dc, insn);
-        num_insns++;
 
         if (dc->is_br)
             break;
@@ -5316,20 +5304,9 @@
     }
     gen_tb_end(tb, num_insns);
 
-    if (spc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-#if 0
-        log_page_dump();
-#endif
-        gen_opc_jump_pc[0] = dc->jump_pc[0];
-        gen_opc_jump_pc[1] = dc->jump_pc[1];
-    } else {
-        tb->size = last_pc + 4 - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = last_pc + 4 - pc_start;
+    tb->icount = num_insns;
+
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         qemu_log("--------------\n");
@@ -5340,16 +5317,6 @@
 #endif
 }
 
-void gen_intermediate_code(CPUSPARCState * env, TranslationBlock * tb)
-{
-    gen_intermediate_code_internal(sparc_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUSPARCState * env, TranslationBlock * tb)
-{
-    gen_intermediate_code_internal(sparc_env_get_cpu(env), tb, true);
-}
-
 void gen_intermediate_code_init(CPUSPARCState *env)
 {
     unsigned int i;
@@ -5451,19 +5418,21 @@
     }
 }
 
-void restore_state_to_opc(CPUSPARCState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUSPARCState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    target_ulong npc;
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
-    npc = gen_opc_npc[pc_pos];
-    if (npc == 1) {
+    target_ulong pc = data[0];
+    target_ulong npc = data[1];
+
+    env->pc = pc;
+    if (npc == DYNAMIC_PC) {
         /* dynamic NPC: already stored */
-    } else if (npc == 2) {
+    } else if (npc & JUMP_PC) {
         /* jump PC: use 'cond' and the jump targets of the translation */
         if (env->cond) {
-            env->npc = gen_opc_jump_pc[0];
+            env->npc = npc & ~3;
         } else {
-            env->npc = gen_opc_jump_pc[1];
+            env->npc = pc + 4;
         }
     } else {
         env->npc = npc;
diff --git a/target-tilegx/cpu.c b/target-tilegx/cpu.c
index 78b73e4..c249704 100644
--- a/target-tilegx/cpu.c
+++ b/target-tilegx/cpu.c
@@ -22,6 +22,7 @@
 #include "qemu-common.h"
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
+#include "linux-user/syscall_defs.h"
 
 static void tilegx_cpu_dump_state(CPUState *cs, FILE *f,
                                   fprintf_function cpu_fprintf, int flags)
@@ -121,8 +122,12 @@
 {
     TileGXCPU *cpu = TILEGX_CPU(cs);
 
-    cs->exception_index = TILEGX_EXCP_SEGV;
+    /* The sigcode field will be filled in by do_signal in main.c.  */
+    cs->exception_index = TILEGX_EXCP_SIGNAL;
     cpu->env.excaddr = address;
+    cpu->env.signo = TARGET_SIGSEGV;
+    cpu->env.sigcode = 0;
+
     return 1;
 }
 
@@ -154,6 +159,13 @@
     cc->set_pc = tilegx_cpu_set_pc;
     cc->handle_mmu_fault = tilegx_cpu_handle_mmu_fault;
     cc->gdb_num_core_regs = 0;
+
+    /*
+     * Reason: tilegx_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo tilegx_cpu_type_info = {
diff --git a/target-tilegx/cpu.h b/target-tilegx/cpu.h
index b9f5082..03df107 100644
--- a/target-tilegx/cpu.h
+++ b/target-tilegx/cpu.h
@@ -53,6 +53,8 @@
     TILEGX_SPR_CMPEXCH = 0,
     TILEGX_SPR_CRITICAL_SEC = 1,
     TILEGX_SPR_SIM_CONTROL = 2,
+    TILEGX_SPR_EX_CONTEXT_0_0 = 3,
+    TILEGX_SPR_EX_CONTEXT_0_1 = 4,
     TILEGX_SPR_COUNT
 };
 
@@ -60,7 +62,7 @@
 typedef enum {
     TILEGX_EXCP_NONE = 0,
     TILEGX_EXCP_SYSCALL = 1,
-    TILEGX_EXCP_SEGV = 2,
+    TILEGX_EXCP_SIGNAL = 2,
     TILEGX_EXCP_OPCODE_UNKNOWN = 0x101,
     TILEGX_EXCP_OPCODE_UNIMPLEMENTED = 0x102,
     TILEGX_EXCP_OPCODE_CMPEXCH = 0x103,
@@ -87,10 +89,12 @@
     uint64_t pc;                       /* Current pc */
 
 #if defined(CONFIG_USER_ONLY)
+    uint64_t excaddr;                  /* exception address */
     uint64_t atomic_srca;              /* Arguments to atomic "exceptions" */
     uint64_t atomic_srcb;
     uint32_t atomic_dstr;
-    uint64_t excaddr;                  /* exception address */
+    uint32_t signo;                    /* Signal number */
+    uint32_t sigcode;                  /* Signal code */
 #endif
 
     CPU_COMMON
@@ -163,7 +167,6 @@
 #define cpu_init(cpu_model) CPU(cpu_tilegx_init(cpu_model))
 
 #define cpu_exec cpu_tilegx_exec
-#define cpu_gen_code cpu_tilegx_gen_code
 #define cpu_signal_handler cpu_tilegx_signal_handler
 
 static inline void cpu_get_tb_cpu_state(CPUTLGState *env, target_ulong *pc,
diff --git a/target-tilegx/helper.c b/target-tilegx/helper.c
index a01bb8d..dda821f 100644
--- a/target-tilegx/helper.c
+++ b/target-tilegx/helper.c
@@ -21,6 +21,8 @@
 #include "cpu.h"
 #include "qemu-common.h"
 #include "exec/helper-proto.h"
+#include <zlib.h> /* For crc32 */
+#include "syscall_defs.h"
 
 void helper_exception(CPUTLGState *env, uint32_t excp)
 {
@@ -30,6 +32,27 @@
     cpu_loop_exit(cs);
 }
 
+void helper_ext01_ics(CPUTLGState *env)
+{
+    uint64_t val = env->spregs[TILEGX_SPR_EX_CONTEXT_0_1];
+
+    switch (val) {
+    case 0:
+    case 1:
+        env->spregs[TILEGX_SPR_CRITICAL_SEC] = val;
+        break;
+    default:
+#if defined(CONFIG_USER_ONLY)
+        env->signo = TARGET_SIGILL;
+        env->sigcode = TARGET_ILL_ILLOPC;
+        helper_exception(env, TILEGX_EXCP_SIGNAL);
+#else
+        helper_exception(env, TILEGX_EXCP_OPCODE_UNIMPLEMENTED);
+#endif
+        break;
+    }
+}
+
 uint64_t helper_cntlz(uint64_t arg)
 {
     return clz64(arg);
@@ -78,3 +101,61 @@
 
     return vdst;
 }
+
+uint64_t helper_crc32_8(uint64_t accum, uint64_t input)
+{
+    uint8_t buf = input;
+
+    /* zlib crc32 converts the accumulator and output to one's complement.  */
+    return crc32(accum ^ 0xffffffff, &buf, 1) ^ 0xffffffff;
+}
+
+uint64_t helper_crc32_32(uint64_t accum, uint64_t input)
+{
+    uint8_t buf[4];
+
+    stl_le_p(buf, input);
+
+    /* zlib crc32 converts the accumulator and output to one's complement.  */
+    return crc32(accum ^ 0xffffffff, buf, 4) ^ 0xffffffff;
+}
+
+uint64_t helper_cmula(uint64_t srcd, uint64_t srca, uint64_t srcb)
+{
+    uint32_t reala = (int16_t)srca;
+    uint32_t imaga = (int16_t)(srca >> 16);
+    uint32_t realb = (int16_t)srcb;
+    uint32_t imagb = (int16_t)(srcb >> 16);
+    uint32_t reald = srcd;
+    uint32_t imagd = srcd >> 32;
+    uint32_t realr = reala * realb - imaga * imagb + reald;
+    uint32_t imagr = reala * imagb + imaga * realb + imagd;
+
+    return deposit64(realr, 32, 32, imagr);
+}
+
+uint64_t helper_cmulaf(uint64_t srcd, uint64_t srca, uint64_t srcb)
+{
+    uint32_t reala = (int16_t)srca;
+    uint32_t imaga = (int16_t)(srca >> 16);
+    uint32_t realb = (int16_t)srcb;
+    uint32_t imagb = (int16_t)(srcb >> 16);
+    uint32_t reald = (int16_t)srcd;
+    uint32_t imagd = (int16_t)(srcd >> 16);
+    int32_t realr = reala * realb - imaga * imagb;
+    int32_t imagr = reala * imagb + imaga * realb;
+
+    return deposit32((realr >> 15) + reald, 16, 16, (imagr >> 15) + imagd);
+}
+
+uint64_t helper_cmul2(uint64_t srca, uint64_t srcb, int shift, int round)
+{
+    uint32_t reala = (int16_t)srca;
+    uint32_t imaga = (int16_t)(srca >> 16);
+    uint32_t realb = (int16_t)srcb;
+    uint32_t imagb = (int16_t)(srcb >> 16);
+    int32_t realr = reala * realb - imaga * imagb + round;
+    int32_t imagr = reala * imagb + imaga * realb + round;
+
+    return deposit32(realr >> shift, 16, 16, imagr >> shift);
+}
diff --git a/target-tilegx/helper.h b/target-tilegx/helper.h
index 766f5f2..9281d0f 100644
--- a/target-tilegx/helper.h
+++ b/target-tilegx/helper.h
@@ -1,10 +1,26 @@
 DEF_HELPER_2(exception, noreturn, env, i32)
+DEF_HELPER_1(ext01_ics, void, env)
 DEF_HELPER_FLAGS_1(cntlz, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(cnttz, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(pcnt, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(revbits, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_3(shufflebytes, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_2(crc32_8, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(crc32_32, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_3(cmula, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_3(cmulaf, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_4(cmul2, TCG_CALL_NO_RWG_SE, i64, i64, i64, int, int)
 
+DEF_HELPER_FLAGS_2(v1int_h, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v1int_l, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2int_h, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2int_l, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(v1multu, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2mults, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(v1shl, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(v1shru, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(v1shrs, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2shl, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2shru, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2shrs, TCG_CALL_NO_RWG_SE, i64, i64, i64)
diff --git a/target-tilegx/simd_helper.c b/target-tilegx/simd_helper.c
index b931929..6d7bb5c 100644
--- a/target-tilegx/simd_helper.c
+++ b/target-tilegx/simd_helper.c
@@ -23,12 +23,54 @@
 #include "exec/helper-proto.h"
 
 
+/* Broadcast a value to all elements of a vector.  */
+#define V1(X)      (((X) & 0xff) * 0x0101010101010101ull)
+#define V2(X)      (((X) & 0xffff) * 0x0001000100010001ull)
+
+
+uint64_t helper_v1multu(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 64; i += 8) {
+        unsigned ae = extract64(a, i, 8);
+        unsigned be = extract64(b, i, 8);
+        r = deposit64(r, i, 8, ae * be);
+    }
+    return r;
+}
+
+uint64_t helper_v2mults(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    /* While the instruction talks about signed inputs, with a
+       truncated result the sign of the inputs doesn't matter.  */
+    for (i = 0; i < 64; i += 16) {
+        unsigned ae = extract64(a, i, 16);
+        unsigned be = extract64(b, i, 16);
+        r = deposit64(r, i, 16, ae * be);
+    }
+    return r;
+}
+
 uint64_t helper_v1shl(uint64_t a, uint64_t b)
 {
     uint64_t m;
 
     b &= 7;
-    m = 0x0101010101010101ULL * (0xff >> b);
+    m = V1(0xff >> b);
+    return (a & m) << b;
+}
+
+uint64_t helper_v2shl(uint64_t a, uint64_t b)
+{
+    uint64_t m;
+
+    b &= 15;
+    m = V2(0xffff >> b);
     return (a & m) << b;
 }
 
@@ -37,7 +79,16 @@
     uint64_t m;
 
     b &= 7;
-    m = 0x0101010101010101ULL * ((0xff << b) & 0xff);
+    m = V1(0xff << b);
+    return (a & m) >> b;
+}
+
+uint64_t helper_v2shru(uint64_t a, uint64_t b)
+{
+    uint64_t m;
+
+    b &= 15;
+    m = V2(0xffff << b);
     return (a & m) >> b;
 }
 
@@ -48,8 +99,67 @@
 
     b &= 7;
     for (i = 0; i < 64; i += 8) {
-        int64_t ae = (int8_t)(a >> i);
-        r |= ((ae >> b) & 0xff) << i;
+        r = deposit64(r, i, 8, sextract64(a, i + b, 8 - b));
+    }
+    return r;
+}
+
+uint64_t helper_v2shrs(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    b &= 15;
+    for (i = 0; i < 64; i += 16) {
+        r = deposit64(r, i, 16, sextract64(a, i + b, 16 - b));
+    }
+    return r;
+}
+
+uint64_t helper_v1int_h(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r = deposit64(r, 2 * i + 8, 8, extract64(a, i + 32, 8));
+        r = deposit64(r, 2 * i, 8, extract64(b, i + 32, 8));
+    }
+    return r;
+}
+
+uint64_t helper_v1int_l(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r = deposit64(r, 2 * i + 8, 8, extract64(a, i, 8));
+        r = deposit64(r, 2 * i, 8, extract64(b, i, 8));
+    }
+    return r;
+}
+
+uint64_t helper_v2int_h(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 32; i += 16) {
+        r = deposit64(r, 2 * i + 16, 16, extract64(a, i + 32, 16));
+        r = deposit64(r, 2 * i, 16, extract64(b, i + 32, 16));
+    }
+    return r;
+}
+
+uint64_t helper_v2int_l(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 32; i += 16) {
+        r = deposit64(r, 2 * i + 16, 16, extract64(a, i, 16));
+        r = deposit64(r, 2 * i, 16, extract64(b, i, 16));
     }
     return r;
 }
diff --git a/target-tilegx/translate.c b/target-tilegx/translate.c
index e70c3e5..34d45f8 100644
--- a/target-tilegx/translate.c
+++ b/target-tilegx/translate.c
@@ -23,6 +23,8 @@
 #include "disas/disas.h"
 #include "tcg-op.h"
 #include "exec/cpu_ldst.h"
+#include "linux-user/syscall_defs.h"
+
 #include "opcode_tilegx.h"
 #include "spr_def_64.h"
 
@@ -96,6 +98,7 @@
 #define OE_SH(E,XY)    OE(SHIFT_OPCODE_##XY, E##_SHIFT_OPCODE_##XY, XY)
 
 #define V1_IMM(X)      (((X) & 0xff) * 0x0101010101010101ull)
+#define V2_IMM(X)      (((X) & 0xffff) * 0x0001000100010001ull)
 
 
 static void gen_exception(DisasContext *dc, TileExcp num)
@@ -275,6 +278,44 @@
     tcg_temp_free(t);
 }
 
+static void gen_cmul2(TCGv tdest, TCGv tsrca, TCGv tsrcb, int sh, int rd)
+{
+    TCGv_i32 tsh = tcg_const_i32(sh);
+    TCGv_i32 trd = tcg_const_i32(rd);
+    gen_helper_cmul2(tdest, tsrca, tsrcb, tsh, trd);
+    tcg_temp_free_i32(tsh);
+    tcg_temp_free_i32(trd);
+}
+
+static TileExcp gen_st_opcode(DisasContext *dc, unsigned dest, unsigned srca,
+                              unsigned srcb, TCGMemOp memop, const char *name)
+{
+    if (dest) {
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
+    }
+
+    tcg_gen_qemu_st_tl(load_gr(dc, srcb), load_gr(dc, srca),
+		       dc->mmuidx, memop);
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s", name,
+                  reg_names[srca], reg_names[srcb]);
+    return TILEGX_EXCP_NONE;
+}
+
+static TileExcp gen_st_add_opcode(DisasContext *dc, unsigned srca, unsigned srcb,
+                                  int imm, TCGMemOp memop, const char *name)
+{
+    TCGv tsrca = load_gr(dc, srca);
+    TCGv tsrcb = load_gr(dc, srcb);
+
+    tcg_gen_qemu_st_tl(tsrcb, tsrca, dc->mmuidx, memop);
+    tcg_gen_addi_tl(dest_gr(dc, srca), tsrca, imm);
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %d", name,
+                  reg_names[srca], reg_names[srcb], imm);
+    return TILEGX_EXCP_NONE;
+}
+
 /* Equality comparison with zero can be done quickly and efficiently.  */
 static void gen_v1cmpeq0(TCGv v)
 {
@@ -310,42 +351,152 @@
     tcg_temp_free(c);
 }
 
-static TileExcp gen_st_opcode(DisasContext *dc, unsigned dest, unsigned srca,
-                              unsigned srcb, TCGMemOp memop, const char *name)
+/* Vector addition can be performed via arithmetic plus masking.  It is
+   efficient this way only for 4 or more elements.  */
+static void gen_v12add(TCGv tdest, TCGv tsrca, TCGv tsrcb, uint64_t sign)
 {
-    if (dest) {
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
-    }
+    TCGv tmask = tcg_const_tl(~sign);
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
-    tcg_gen_qemu_st_tl(load_gr(dc, srcb), load_gr(dc, srca),
-		       dc->mmuidx, memop);
+    /* ((a & ~sign) + (b & ~sign)) ^ ((a ^ b) & sign).  */
+    tcg_gen_and_tl(t0, tsrca, tmask);
+    tcg_gen_and_tl(t1, tsrcb, tmask);
+    tcg_gen_add_tl(tdest, t0, t1);
+    tcg_gen_xor_tl(t0, tsrca, tsrcb);
+    tcg_gen_andc_tl(t0, t0, tmask);
+    tcg_gen_xor_tl(tdest, tdest, t0);
 
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s", name,
-                  reg_names[srca], reg_names[srcb]);
-    return TILEGX_EXCP_NONE;
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+    tcg_temp_free(tmask);
 }
 
-static TileExcp gen_st_add_opcode(DisasContext *dc, unsigned srca, unsigned srcb,
-                                  int imm, TCGMemOp memop, const char *name)
+/* Similarly for vector subtraction.  */
+static void gen_v12sub(TCGv tdest, TCGv tsrca, TCGv tsrcb, uint64_t sign)
 {
-    TCGv tsrca = load_gr(dc, srca);
-    TCGv tsrcb = load_gr(dc, srcb);
+    TCGv tsign = tcg_const_tl(sign);
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
-    tcg_gen_qemu_st_tl(tsrcb, tsrca, dc->mmuidx, memop);
-    tcg_gen_addi_tl(dest_gr(dc, srca), tsrca, imm);
+    /* ((a | sign) - (b & ~sign)) ^ ((a ^ ~b) & sign).  */
+    tcg_gen_or_tl(t0, tsrca, tsign);
+    tcg_gen_andc_tl(t1, tsrcb, tsign);
+    tcg_gen_sub_tl(tdest, t0, t1);
+    tcg_gen_eqv_tl(t0, tsrca, tsrcb);
+    tcg_gen_and_tl(t0, t0, tsign);
+    tcg_gen_xor_tl(tdest, tdest, t0);
 
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %d", name,
-                  reg_names[srca], reg_names[srcb], imm);
-    return TILEGX_EXCP_NONE;
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+    tcg_temp_free(tsign);
+}
+
+static void gen_v4sh(TCGv d64, TCGv a64, TCGv b64,
+                     void (*generate)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 al = tcg_temp_new_i32();
+    TCGv_i32 ah = tcg_temp_new_i32();
+    TCGv_i32 bl = tcg_temp_new_i32();
+
+    tcg_gen_extr_i64_i32(al, ah, a64);
+    tcg_gen_extrl_i64_i32(bl, b64);
+    tcg_gen_andi_i32(bl, bl, 31);
+    generate(al, al, bl);
+    generate(ah, ah, bl);
+    tcg_gen_concat_i32_i64(d64, al, ah);
+
+    tcg_temp_free_i32(al);
+    tcg_temp_free_i32(ah);
+    tcg_temp_free_i32(bl);
+}
+
+static void gen_v4op(TCGv d64, TCGv a64, TCGv b64,
+                     void (*generate)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 al = tcg_temp_new_i32();
+    TCGv_i32 ah = tcg_temp_new_i32();
+    TCGv_i32 bl = tcg_temp_new_i32();
+    TCGv_i32 bh = tcg_temp_new_i32();
+
+    tcg_gen_extr_i64_i32(al, ah, a64);
+    tcg_gen_extr_i64_i32(bl, bh, b64);
+    generate(al, al, bl);
+    generate(ah, ah, bh);
+    tcg_gen_concat_i32_i64(d64, al, ah);
+
+    tcg_temp_free_i32(al);
+    tcg_temp_free_i32(ah);
+    tcg_temp_free_i32(bl);
+    tcg_temp_free_i32(bh);
+}
+
+static TileExcp gen_signal(DisasContext *dc, int signo, int sigcode,
+                           const char *mnemonic)
+{
+    TCGv_i32 t0 = tcg_const_i32(signo);
+    TCGv_i32 t1 = tcg_const_i32(sigcode);
+
+    tcg_gen_st_i32(t0, cpu_env, offsetof(CPUTLGState, signo));
+    tcg_gen_st_i32(t1, cpu_env, offsetof(CPUTLGState, sigcode));
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s", mnemonic);
+    return TILEGX_EXCP_SIGNAL;
+}
+
+static bool parse_from_addli(uint64_t bundle, int *signo, int *sigcode)
+{
+    int imm;
+
+    if ((get_Opcode_X0(bundle) != ADDLI_OPCODE_X0)
+        || (get_Dest_X0(bundle) != TILEGX_R_ZERO)
+        || (get_SrcA_X0(bundle) != TILEGX_R_ZERO)) {
+        return false;
+    }
+
+    imm = get_Imm16_X0(bundle);
+    *signo = imm & 0x3f;
+    *sigcode = (imm >> 6) & 0xf;
+
+    /* ??? The linux kernel validates both signo and the sigcode vs the
+       known max for each signal.  Don't bother here.  */
+    return true;
+}
+
+static TileExcp gen_specill(DisasContext *dc, unsigned dest, unsigned srca,
+                            uint64_t bundle)
+{
+    const char *mnemonic;
+    int signo;
+    int sigcode;
+
+    if (dest == 0x1c && srca == 0x25) {
+        signo = TARGET_SIGTRAP;
+        sigcode = TARGET_TRAP_BRKPT;
+        mnemonic = "bpt";
+    } else if (dest == 0x1d && srca == 0x25
+               && parse_from_addli(bundle, &signo, &sigcode)) {
+        mnemonic = "raise";
+    } else {
+        signo = TARGET_SIGILL;
+        sigcode = TARGET_ILL_ILLOPC;
+        mnemonic = "ill";
+    }
+
+    return gen_signal(dc, signo, sigcode, mnemonic);
 }
 
 static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
-                              unsigned dest, unsigned srca)
+                              unsigned dest, unsigned srca, uint64_t bundle)
 {
     TCGv tdest, tsrca;
     const char *mnemonic;
     TCGMemOp memop;
     TileExcp ret = TILEGX_EXCP_NONE;
+    bool prefetch_nofault = false;
 
     /* Eliminate instructions with no output before doing anything else.  */
     switch (opext) {
@@ -368,10 +519,9 @@
         mnemonic = "flushwb";
         goto done0;
     case OE_RR_X1(ILL):
+        return gen_specill(dc, dest, srca, bundle);
     case OE_RR_Y1(ILL):
-        mnemonic = (dest == 0x1c && srca == 0x25 ? "bpt" : "ill");
-        qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s", mnemonic);
-        return TILEGX_EXCP_OPCODE_UNKNOWN;
+        return gen_signal(dc, TARGET_SIGILL, TARGET_ILL_ILLOPC, "ill");
     case OE_RR_X1(MF):
         mnemonic = "mf";
         goto done0;
@@ -379,6 +529,15 @@
         /* ??? This should yield, especially in system mode.  */
         mnemonic = "nap";
         goto done0;
+    case OE_RR_X1(IRET):
+        gen_helper_ext01_ics(cpu_env);
+        dc->jmp.cond = TCG_COND_ALWAYS;
+        dc->jmp.dest = tcg_temp_new();
+        tcg_gen_ld_tl(dc->jmp.dest, cpu_env,
+                      offsetof(CPUTLGState, spregs[TILEGX_SPR_EX_CONTEXT_0_0]));
+        tcg_gen_andi_tl(dc->jmp.dest, dc->jmp.dest, ~7);
+        mnemonic = "iret";
+        goto done0;
     case OE_RR_X1(SWINT0):
     case OE_RR_X1(SWINT2):
     case OE_RR_X1(SWINT3):
@@ -388,7 +547,7 @@
         mnemonic = "swint1";
     done0:
         if (srca || dest) {
-            return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+            return TILEGX_EXCP_OPCODE_UNKNOWN;
         }
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s", mnemonic);
         return ret;
@@ -434,7 +593,7 @@
         tcg_gen_andi_tl(dc->jmp.dest, load_gr(dc, srca), ~7);
     done1:
         if (dest) {
-            return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+            return TILEGX_EXCP_OPCODE_UNKNOWN;
         }
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s", mnemonic, reg_names[srca]);
         return ret;
@@ -456,31 +615,33 @@
         break;
     case OE_RR_X0(FSINGLE_PACK1):
     case OE_RR_Y0(FSINGLE_PACK1):
-    case OE_RR_X1(IRET):
         return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RR_X1(LD1S):
         memop = MO_SB;
-        mnemonic = "ld1s";
+        mnemonic = "ld1s"; /* prefetch_l1_fault */
         goto do_load;
     case OE_RR_X1(LD1U):
         memop = MO_UB;
-        mnemonic = "ld1u";
+        mnemonic = "ld1u"; /* prefetch, prefetch_l1 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load;
     case OE_RR_X1(LD2S):
         memop = MO_TESW;
-        mnemonic = "ld2s";
+        mnemonic = "ld2s"; /* prefetch_l2_fault */
         goto do_load;
     case OE_RR_X1(LD2U):
         memop = MO_TEUW;
-        mnemonic = "ld2u";
+        mnemonic = "ld2u"; /* prefetch_l2 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load;
     case OE_RR_X1(LD4S):
         memop = MO_TESL;
-        mnemonic = "ld4s";
+        mnemonic = "ld4s"; /* prefetch_l3_fault */
         goto do_load;
     case OE_RR_X1(LD4U):
         memop = MO_TEUL;
-        mnemonic = "ld4u";
+        mnemonic = "ld4u"; /* prefetch_l3 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load;
     case OE_RR_X1(LDNT1S):
         memop = MO_SB;
@@ -514,7 +675,9 @@
         memop = MO_TEQ;
         mnemonic = "ld";
     do_load:
-        tcg_gen_qemu_ld_tl(tdest, tsrca, dc->mmuidx, memop);
+        if (!prefetch_nofault) {
+            tcg_gen_qemu_ld_tl(tdest, tsrca, dc->mmuidx, memop);
+        }
         break;
     case OE_RR_X1(LDNA):
         tcg_gen_andi_tl(tdest, tsrca, ~7);
@@ -524,7 +687,7 @@
     case OE_RR_X1(LNK):
     case OE_RR_Y1(LNK):
         if (srca) {
-            return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+            return TILEGX_EXCP_OPCODE_UNKNOWN;
         }
         tcg_gen_movi_tl(tdest, dc->pc + TILEGX_BUNDLE_SIZE_IN_BYTES);
         mnemonic = "lnk";
@@ -546,14 +709,29 @@
         break;
     case OE_RR_X0(TBLIDXB0):
     case OE_RR_Y0(TBLIDXB0):
+        tcg_gen_deposit_tl(tdest, load_gr(dc, dest), tsrca, 2, 8);
+        mnemonic = "tblidxb0";
+        break;
     case OE_RR_X0(TBLIDXB1):
     case OE_RR_Y0(TBLIDXB1):
+        tcg_gen_shri_tl(tdest, tsrca, 8);
+        tcg_gen_deposit_tl(tdest, load_gr(dc, dest), tdest, 2, 8);
+        mnemonic = "tblidxb1";
+        break;
     case OE_RR_X0(TBLIDXB2):
     case OE_RR_Y0(TBLIDXB2):
+        tcg_gen_shri_tl(tdest, tsrca, 16);
+        tcg_gen_deposit_tl(tdest, load_gr(dc, dest), tdest, 2, 8);
+        mnemonic = "tblidxb2";
+        break;
     case OE_RR_X0(TBLIDXB3):
     case OE_RR_Y0(TBLIDXB3):
+        tcg_gen_shri_tl(tdest, tsrca, 24);
+        tcg_gen_deposit_tl(tdest, load_gr(dc, dest), tdest, 2, 8);
+        mnemonic = "tblidxb3";
+        break;
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s", mnemonic,
@@ -663,15 +841,41 @@
         mnemonic = "cmpne";
         break;
     case OE_RRR(CMULAF, 0, X0):
+        gen_helper_cmulaf(tdest, load_gr(dc, dest), tsrca, tsrcb);
+        mnemonic = "cmulaf";
+        break;
     case OE_RRR(CMULA, 0, X0):
+        gen_helper_cmula(tdest, load_gr(dc, dest), tsrca, tsrcb);
+        mnemonic = "cmula";
+        break;
     case OE_RRR(CMULFR, 0, X0):
+        gen_cmul2(tdest, tsrca, tsrcb, 15, 1 << 14);
+        mnemonic = "cmulfr";
+        break;
     case OE_RRR(CMULF, 0, X0):
+        gen_cmul2(tdest, tsrca, tsrcb, 15, 0);
+        mnemonic = "cmulf";
+        break;
     case OE_RRR(CMULHR, 0, X0):
+        gen_cmul2(tdest, tsrca, tsrcb, 16, 1 << 15);
+        mnemonic = "cmulhr";
+        break;
     case OE_RRR(CMULH, 0, X0):
+        gen_cmul2(tdest, tsrca, tsrcb, 16, 0);
+        mnemonic = "cmulh";
+        break;
     case OE_RRR(CMUL, 0, X0):
+        gen_helper_cmula(tdest, load_zero(dc), tsrca, tsrcb);
+        mnemonic = "cmul";
+        break;
     case OE_RRR(CRC32_32, 0, X0):
+        gen_helper_crc32_32(tdest, tsrca, tsrcb);
+        mnemonic = "crc32_32";
+        break;
     case OE_RRR(CRC32_8, 0, X0):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        gen_helper_crc32_8(tdest, tsrca, tsrcb);
+        mnemonic = "crc32_8";
+        break;
     case OE_RRR(DBLALIGN2, 0, X0):
     case OE_RRR(DBLALIGN2, 0, X1):
         gen_dblaligni(tdest, tsrca, tsrcb, 16);
@@ -1024,8 +1228,12 @@
         break;
     case OE_RRR(V1ADDUC, 0, X0):
     case OE_RRR(V1ADDUC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V1ADD, 0, X0):
     case OE_RRR(V1ADD, 0, X1):
+        gen_v12add(tdest, tsrca, tsrcb, V1_IMM(0x80));
+        mnemonic = "v1add";
+        break;
     case OE_RRR(V1ADIFFU, 0, X0):
     case OE_RRR(V1AVGU, 0, X0):
         return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
@@ -1060,17 +1268,28 @@
     case OE_RRR(V1DOTPUS, 0, X0):
     case OE_RRR(V1DOTPU, 0, X0):
     case OE_RRR(V1DOTP, 0, X0):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V1INT_H, 0, X0):
     case OE_RRR(V1INT_H, 0, X1):
+        gen_helper_v1int_h(tdest, tsrca, tsrcb);
+        mnemonic = "v1int_h";
+        break;
     case OE_RRR(V1INT_L, 0, X0):
     case OE_RRR(V1INT_L, 0, X1):
+        gen_helper_v1int_l(tdest, tsrca, tsrcb);
+        mnemonic = "v1int_l";
+        break;
     case OE_RRR(V1MAXU, 0, X0):
     case OE_RRR(V1MAXU, 0, X1):
     case OE_RRR(V1MINU, 0, X0):
     case OE_RRR(V1MINU, 0, X1):
     case OE_RRR(V1MNZ, 0, X0):
     case OE_RRR(V1MNZ, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V1MULTU, 0, X0):
+        gen_helper_v1multu(tdest, tsrca, tsrcb);
+        mnemonic = "v1multu";
+        break;
     case OE_RRR(V1MULUS, 0, X0):
     case OE_RRR(V1MULU, 0, X0):
     case OE_RRR(V1MZ, 0, X0):
@@ -1095,12 +1314,20 @@
         break;
     case OE_RRR(V1SUBUC, 0, X0):
     case OE_RRR(V1SUBUC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V1SUB, 0, X0):
     case OE_RRR(V1SUB, 0, X1):
+        gen_v12sub(tdest, tsrca, tsrcb, V1_IMM(0x80));
+        mnemonic = "v1sub";
+        break;
     case OE_RRR(V2ADDSC, 0, X0):
     case OE_RRR(V2ADDSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2ADD, 0, X0):
     case OE_RRR(V2ADD, 0, X1):
+        gen_v12add(tdest, tsrca, tsrcb, V2_IMM(0x8000));
+        mnemonic = "v2add";
+        break;
     case OE_RRR(V2ADIFFS, 0, X0):
     case OE_RRR(V2AVGS, 0, X0):
     case OE_RRR(V2CMPEQ, 0, X0):
@@ -1117,10 +1344,17 @@
     case OE_RRR(V2CMPNE, 0, X1):
     case OE_RRR(V2DOTPA, 0, X0):
     case OE_RRR(V2DOTP, 0, X0):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2INT_H, 0, X0):
     case OE_RRR(V2INT_H, 0, X1):
+        gen_helper_v2int_h(tdest, tsrca, tsrcb);
+        mnemonic = "v2int_h";
+        break;
     case OE_RRR(V2INT_L, 0, X0):
     case OE_RRR(V2INT_L, 0, X1):
+        gen_helper_v2int_l(tdest, tsrca, tsrcb);
+        mnemonic = "v2int_l";
+        break;
     case OE_RRR(V2MAXS, 0, X0):
     case OE_RRR(V2MAXS, 0, X1):
     case OE_RRR(V2MINS, 0, X0):
@@ -1129,7 +1363,11 @@
     case OE_RRR(V2MNZ, 0, X1):
     case OE_RRR(V2MULFSC, 0, X0):
     case OE_RRR(V2MULS, 0, X0):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2MULTS, 0, X0):
+        gen_helper_v2mults(tdest, tsrca, tsrcb);
+        mnemonic = "v2mults";
+        break;
     case OE_RRR(V2MZ, 0, X0):
     case OE_RRR(V2MZ, 0, X1):
     case OE_RRR(V2PACKH, 0, X0):
@@ -1144,21 +1382,38 @@
     case OE_RRR(V2SADU, 0, X0):
     case OE_RRR(V2SHLSC, 0, X0):
     case OE_RRR(V2SHLSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2SHL, 0, X0):
     case OE_RRR(V2SHL, 0, X1):
+        gen_helper_v2shl(tdest, tsrca, tsrcb);
+        mnemonic = "v2shl";
+        break;
     case OE_RRR(V2SHRS, 0, X0):
     case OE_RRR(V2SHRS, 0, X1):
+        gen_helper_v2shrs(tdest, tsrca, tsrcb);
+        mnemonic = "v2shrs";
+        break;
     case OE_RRR(V2SHRU, 0, X0):
     case OE_RRR(V2SHRU, 0, X1):
+        gen_helper_v2shru(tdest, tsrca, tsrcb);
+        mnemonic = "v2shru";
+        break;
     case OE_RRR(V2SUBSC, 0, X0):
     case OE_RRR(V2SUBSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2SUB, 0, X0):
     case OE_RRR(V2SUB, 0, X1):
+        gen_v12sub(tdest, tsrca, tsrcb, V2_IMM(0x8000));
+        mnemonic = "v2sub";
+        break;
     case OE_RRR(V4ADDSC, 0, X0):
     case OE_RRR(V4ADDSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V4ADD, 0, X0):
     case OE_RRR(V4ADD, 0, X1):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        gen_v4op(tdest, tsrca, tsrcb, tcg_gen_add_i32);
+        mnemonic = "v4add";
+        break;
     case OE_RRR(V4INT_H, 0, X0):
     case OE_RRR(V4INT_H, 0, X1):
         tcg_gen_shri_tl(tdest, tsrcb, 32);
@@ -1174,17 +1429,30 @@
     case OE_RRR(V4PACKSC, 0, X1):
     case OE_RRR(V4SHLSC, 0, X0):
     case OE_RRR(V4SHLSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V4SHL, 0, X0):
     case OE_RRR(V4SHL, 0, X1):
+        gen_v4sh(tdest, tsrca, tsrcb, tcg_gen_shl_i32);
+        mnemonic = "v4shl";
+        break;
     case OE_RRR(V4SHRS, 0, X0):
     case OE_RRR(V4SHRS, 0, X1):
+        gen_v4sh(tdest, tsrca, tsrcb, tcg_gen_sar_i32);
+        mnemonic = "v4shrs";
+        break;
     case OE_RRR(V4SHRU, 0, X0):
     case OE_RRR(V4SHRU, 0, X1):
+        gen_v4sh(tdest, tsrca, tsrcb, tcg_gen_shr_i32);
+        mnemonic = "v4shru";
+        break;
     case OE_RRR(V4SUBSC, 0, X0):
     case OE_RRR(V4SUBSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V4SUB, 0, X0):
     case OE_RRR(V4SUB, 0, X1):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        gen_v4op(tdest, tsrca, tsrcb, tcg_gen_sub_i32);
+        mnemonic = "v2sub";
+        break;
     case OE_RRR(XOR, 0, X0):
     case OE_RRR(XOR, 0, X1):
     case OE_RRR(XOR, 5, Y0):
@@ -1193,7 +1461,7 @@
         mnemonic = "xor";
         break;
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %s", mnemonic,
@@ -1206,6 +1474,7 @@
 {
     TCGv tdest = dest_gr(dc, dest);
     TCGv tsrca = load_gr(dc, srca);
+    bool prefetch_nofault = false;
     const char *mnemonic;
     TCGMemOp memop;
     int i2, i3;
@@ -1255,27 +1524,30 @@
         break;
     case OE_IM(LD1S_ADD, X1):
         memop = MO_SB;
-        mnemonic = "ld1s_add";
+        mnemonic = "ld1s_add"; /* prefetch_add_l1_fault */
         goto do_load_add;
     case OE_IM(LD1U_ADD, X1):
         memop = MO_UB;
-        mnemonic = "ld1u_add";
+        mnemonic = "ld1u_add"; /* prefetch_add_l1 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load_add;
     case OE_IM(LD2S_ADD, X1):
         memop = MO_TESW;
-        mnemonic = "ld2s_add";
+        mnemonic = "ld2s_add"; /* prefetch_add_l2_fault */
         goto do_load_add;
     case OE_IM(LD2U_ADD, X1):
         memop = MO_TEUW;
-        mnemonic = "ld2u_add";
+        mnemonic = "ld2u_add"; /* prefetch_add_l2 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load_add;
     case OE_IM(LD4S_ADD, X1):
         memop = MO_TESL;
-        mnemonic = "ld4s_add";
+        mnemonic = "ld4s_add"; /* prefetch_add_l3_fault */
         goto do_load_add;
     case OE_IM(LD4U_ADD, X1):
         memop = MO_TEUL;
-        mnemonic = "ld4u_add";
+        mnemonic = "ld4u_add"; /* prefetch_add_l3 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load_add;
     case OE_IM(LDNT1S_ADD, X1):
         memop = MO_SB;
@@ -1307,9 +1579,11 @@
         goto do_load_add;
     case OE_IM(LD_ADD, X1):
         memop = MO_TEQ;
-        mnemonic = "ldnt_add";
+        mnemonic = "ld_add";
     do_load_add:
-        tcg_gen_qemu_ld_tl(tdest, tsrca, dc->mmuidx, memop);
+        if (!prefetch_nofault) {
+            tcg_gen_qemu_ld_tl(tdest, tsrca, dc->mmuidx, memop);
+        }
         tcg_gen_addi_tl(dest_gr(dc, srca), tsrca, imm);
         break;
     case OE_IM(LDNA_ADD, X1):
@@ -1325,6 +1599,11 @@
         break;
     case OE_IM(V1ADDI, X0):
     case OE_IM(V1ADDI, X1):
+        t0 = tcg_const_tl(V1_IMM(imm));
+        gen_v12add(tdest, tsrca, t0, V1_IMM(0x80));
+        tcg_temp_free(t0);
+        mnemonic = "v1addi";
+        break;
     case OE_IM(V1CMPEQI, X0):
     case OE_IM(V1CMPEQI, X1):
         tcg_gen_xori_tl(tdest, tsrca, V1_IMM(imm));
@@ -1339,8 +1618,14 @@
     case OE_IM(V1MAXUI, X1):
     case OE_IM(V1MINUI, X0):
     case OE_IM(V1MINUI, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_IM(V2ADDI, X0):
     case OE_IM(V2ADDI, X1):
+        t0 = tcg_const_tl(V2_IMM(imm));
+        gen_v12add(tdest, tsrca, t0, V2_IMM(0x8000));
+        tcg_temp_free(t0);
+        mnemonic = "v2addi";
+        break;
     case OE_IM(V2CMPEQI, X0):
     case OE_IM(V2CMPEQI, X1):
     case OE_IM(V2CMPLTSI, X0):
@@ -1427,11 +1712,27 @@
         break;
     case OE_SH(V2SHLI, X0):
     case OE_SH(V2SHLI, X1):
+        i2 = imm & 15;
+        i3 = 0xffff >> i2;
+        tcg_gen_andi_tl(tdest, tsrca, V2_IMM(i3));
+        tcg_gen_shli_tl(tdest, tdest, i2);
+        mnemonic = "v2shli";
+        break;
     case OE_SH(V2SHRSI, X0):
     case OE_SH(V2SHRSI, X1):
+        t0 = tcg_const_tl(imm & 15);
+        gen_helper_v2shrs(tdest, tsrca, t0);
+        tcg_temp_free(t0);
+        mnemonic = "v2shrsi";
+        break;
     case OE_SH(V2SHRUI, X0):
     case OE_SH(V2SHRUI, X1):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        i2 = imm & 15;
+        i3 = (0xffff << i2) & 0xffff;
+        tcg_gen_andi_tl(tdest, tsrca, V2_IMM(i3));
+        tcg_gen_shri_tl(tdest, tdest, i2);
+        mnemonic = "v2shrui";
+        break;
 
     case OE(ADDLI_OPCODE_X0, 0, X0):
     case OE(ADDLI_OPCODE_X1, 0, X1):
@@ -1452,7 +1753,7 @@
         break;
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %d", mnemonic,
@@ -1546,7 +1847,7 @@
         break;
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %u, %u", mnemonic,
@@ -1602,7 +1903,7 @@
         mnemonic = "blbs";
         break;
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -1654,6 +1955,10 @@
       offsetof(CPUTLGState, spregs[TILEGX_SPR_CRITICAL_SEC]), 0, 0)
     D(SIM_CONTROL,
       offsetof(CPUTLGState, spregs[TILEGX_SPR_SIM_CONTROL]), 0, 0)
+    D(EX_CONTEXT_0_0,
+      offsetof(CPUTLGState, spregs[TILEGX_SPR_EX_CONTEXT_0_0]), 0, 0)
+    D(EX_CONTEXT_0_1,
+      offsetof(CPUTLGState, spregs[TILEGX_SPR_EX_CONTEXT_0_1]), 0, 0)
     }
 
 #undef D
@@ -1669,7 +1974,7 @@
 
     if (def == NULL) {
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "mtspr spr[%u], %s", spr, reg_names[srca]);
-        return TILEGX_EXCP_OPCODE_UNKNOWN;
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     }
 
     tsrca = load_gr(dc, srca);
@@ -1689,7 +1994,7 @@
 
     if (def == NULL) {
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "mtspr %s, spr[%u]", reg_names[dest], spr);
-        return TILEGX_EXCP_OPCODE_UNKNOWN;
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     }
 
     tdest = dest_gr(dc, dest);
@@ -1715,7 +2020,7 @@
     case RRR_1_OPCODE_Y0:
         if (ext == UNARY_RRR_1_OPCODE_Y0) {
             ext = get_UnaryOpcodeExtension_Y0(bundle);
-            return gen_rr_opcode(dc, OE(opc, ext, Y0), dest, srca);
+            return gen_rr_opcode(dc, OE(opc, ext, Y0), dest, srca, bundle);
         }
         /* fallthru */
     case RRR_0_OPCODE_Y0:
@@ -1744,7 +2049,7 @@
         return gen_rri_opcode(dc, OE(opc, 0, Y0), dest, srca, imm);
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1761,7 +2066,7 @@
     case RRR_1_OPCODE_Y1:
         if (ext == UNARY_RRR_1_OPCODE_Y0) {
             ext = get_UnaryOpcodeExtension_Y1(bundle);
-            return gen_rr_opcode(dc, OE(opc, ext, Y1), dest, srca);
+            return gen_rr_opcode(dc, OE(opc, ext, Y1), dest, srca, bundle);
         }
         /* fallthru */
     case RRR_0_OPCODE_Y1:
@@ -1788,7 +2093,7 @@
         return gen_rri_opcode(dc, OE(opc, 0, Y1), dest, srca, imm);
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1846,7 +2151,7 @@
         return gen_st_opcode(dc, 0, srca, srcbdest, MO_TEQ, "st");
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1863,7 +2168,7 @@
         ext = get_RRROpcodeExtension_X0(bundle);
         if (ext == UNARY_RRR_0_OPCODE_X0) {
             ext = get_UnaryOpcodeExtension_X0(bundle);
-            return gen_rr_opcode(dc, OE(opc, ext, X0), dest, srca);
+            return gen_rr_opcode(dc, OE(opc, ext, X0), dest, srca, bundle);
         }
         srcb = get_SrcB_X0(bundle);
         return gen_rrr_opcode(dc, OE(opc, ext, X0), dest, srca, srcb);
@@ -1891,7 +2196,7 @@
         return gen_rri_opcode(dc, OE(opc, 0, X0), dest, srca, imm);
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1910,7 +2215,7 @@
         switch (ext) {
         case UNARY_RRR_0_OPCODE_X1:
             ext = get_UnaryOpcodeExtension_X1(bundle);
-            return gen_rr_opcode(dc, OE(opc, ext, X1), dest, srca);
+            return gen_rr_opcode(dc, OE(opc, ext, X1), dest, srca, bundle);
         case ST1_RRR_0_OPCODE_X1:
             return gen_st_opcode(dc, dest, srca, srcb, MO_UB, "st1");
         case ST2_RRR_0_OPCODE_X1:
@@ -1981,7 +2286,7 @@
         return gen_rri_opcode(dc, OE(opc, 0, X1), dest, srca, imm);
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1992,8 +2297,15 @@
         return;
     }
     gen_exception(dc, excp);
-    if (excp == TILEGX_EXCP_OPCODE_UNIMPLEMENTED) {
+    switch (excp) {
+    case TILEGX_EXCP_OPCODE_UNIMPLEMENTED:
         qemu_log_mask(LOG_UNIMP, "UNIMP %s, [" FMT64X "]\n", type, bundle);
+        break;
+    case TILEGX_EXCP_OPCODE_UNKNOWN:
+        qemu_log_mask(LOG_UNIMP, "UNKNOWN %s, [" FMT64X "]\n", type, bundle);
+        break;
+    default:
+        break;
     }
 }
 
@@ -2008,10 +2320,6 @@
     }
     dc->num_wb = 0;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(dc->pc);
-    }
-
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "  %" PRIx64 ":  { ", dc->pc);
     if (get_Mode(bundle)) {
         notice_excp(dc, bundle, "y0", decode_y0(dc, bundle));
@@ -2053,17 +2361,14 @@
     }
 }
 
-static inline void gen_intermediate_code_internal(TileGXCPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+void gen_intermediate_code(CPUTLGState *env, struct TranslationBlock *tb)
 {
+    TileGXCPU *cpu = tilegx_env_get_cpu(env);
     DisasContext ctx;
     DisasContext *dc = &ctx;
     CPUState *cs = CPU(cpu);
-    CPUTLGState *env = &cpu->env;
     uint64_t pc_start = tb->pc;
     uint64_t next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    int j, lj = -1;
     int num_insns = 0;
     int max_insns = tb->cflags & CF_COUNT_MASK;
 
@@ -2085,21 +2390,15 @@
     if (cs->singlestep_enabled || singlestep) {
         max_insns = 1;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
     gen_tb_start(tb);
 
     while (1) {
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
+        tcg_gen_insn_start(dc->pc);
+        num_insns++;
+
         translate_one_bundle(dc, cpu_ldq_data(env, dc->pc));
 
         if (dc->exit_tb) {
@@ -2107,7 +2406,7 @@
             break;
         }
         dc->pc += TILEGX_BUNDLE_SIZE_IN_BYTES;
-        if (++num_insns >= max_insns
+        if (num_insns >= max_insns
             || dc->pc >= next_page_start
             || tcg_op_buf_full()) {
             /* Ending the TB due to TB size or page boundary.  Set PC.  */
@@ -2118,33 +2417,16 @@
     }
 
     gen_tb_end(tb, num_insns);
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "\n");
 }
 
-void gen_intermediate_code(CPUTLGState *env, struct TranslationBlock *tb)
+void restore_state_to_opc(CPUTLGState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    gen_intermediate_code_internal(tilegx_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUTLGState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(tilegx_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUTLGState *env, TranslationBlock *tb, int pc_pos)
-{
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
 
 void tilegx_tcg_init(void)
diff --git a/target-tricore/cpu.c b/target-tricore/cpu.c
index 2029ef6..ed8b030 100644
--- a/target-tricore/cpu.c
+++ b/target-tricore/cpu.c
@@ -170,6 +170,12 @@
     cc->set_pc = tricore_cpu_set_pc;
     cc->synchronize_from_tb = tricore_cpu_synchronize_from_tb;
 
+    /*
+     * Reason: tricore_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void cpu_register(const TriCoreCPUInfo *info)
diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 440f30a..135c583 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -8266,21 +8266,26 @@
     }
 }
 
-static inline void
-gen_intermediate_code_internal(TriCoreCPU *cpu, struct TranslationBlock *tb,
-                              int search_pc)
+void gen_intermediate_code(CPUTriCoreState *env, struct TranslationBlock *tb)
 {
+    TriCoreCPU *cpu = tricore_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUTriCoreState *env = &cpu->env;
     DisasContext ctx;
     target_ulong pc_start;
-    int num_insns;
-
-    if (search_pc) {
-        qemu_log("search pc %d\n", search_pc);
-    }
+    int num_insns, max_insns;
 
     num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0) {
+        max_insns = CF_COUNT_MASK;
+    }
+    if (singlestep) {
+        max_insns = 1;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
+
     pc_start = tb->pc;
     ctx.pc = pc_start;
     ctx.saved_pc = -1;
@@ -8292,17 +8297,13 @@
     tcg_clear_temp_count();
     gen_tb_start(tb);
     while (ctx.bstate == BS_NONE) {
+        tcg_gen_insn_start(ctx.pc);
+        num_insns++;
+
         ctx.opcode = cpu_ldl_code(env, ctx.pc);
         decode_opc(env, &ctx, 0);
 
-        num_insns++;
-
-        if (tcg_op_buf_full()) {
-            gen_save_pc(ctx.next_pc);
-            tcg_gen_exit_tb(0);
-            break;
-        }
-        if (singlestep) {
+        if (num_insns >= max_insns || tcg_op_buf_full()) {
             gen_save_pc(ctx.next_pc);
             tcg_gen_exit_tb(0);
             break;
@@ -8311,12 +8312,9 @@
     }
 
     gen_tb_end(tb, num_insns);
-    if (search_pc) {
-        printf("done_generating search pc\n");
-    } else {
-        tb->size = ctx.pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = ctx.pc - pc_start;
+    tb->icount = num_insns;
+
     if (tcg_check_temp_count()) {
         printf("LEAK at %08x\n", env->PC);
     }
@@ -8331,21 +8329,10 @@
 }
 
 void
-gen_intermediate_code(CPUTriCoreState *env, struct TranslationBlock *tb)
+restore_state_to_opc(CPUTriCoreState *env, TranslationBlock *tb,
+                     target_ulong *data)
 {
-    gen_intermediate_code_internal(tricore_env_get_cpu(env), tb, false);
-}
-
-void
-gen_intermediate_code_pc(CPUTriCoreState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(tricore_env_get_cpu(env), tb, true);
-}
-
-void
-restore_state_to_opc(CPUTriCoreState *env, TranslationBlock *tb, int pc_pos)
-{
-    env->PC = tcg_ctx.gen_opc_pc[pc_pos];
+    env->PC = data[0];
 }
 /*
  *
diff --git a/target-unicore32/cpu.c b/target-unicore32/cpu.c
index fc451a1..e5252eb 100644
--- a/target-unicore32/cpu.c
+++ b/target-unicore32/cpu.c
@@ -155,6 +155,13 @@
     cc->get_phys_page_debug = uc32_cpu_get_phys_page_debug;
 #endif
     dc->vmsd = &vmstate_uc32_cpu;
+
+    /*
+     * Reason: uc32_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void uc32_register_cpu_type(const UniCore32CPUInfo *info)
diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index 2fc78e6..48f89fb 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -1794,10 +1794,6 @@
     UniCore32CPU *cpu = uc32_env_get_cpu(env);
     unsigned int insn;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(s->pc);
-    }
-
     insn = cpu_ldl_code(env, s->pc);
     s->pc += 4;
 
@@ -1868,16 +1864,12 @@
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
-   basic block 'tb'. If search_pc is TRUE, also generate PC
-   information for each intermediate instruction. */
-static inline void gen_intermediate_code_internal(UniCore32CPU *cpu,
-        TranslationBlock *tb, bool search_pc)
+   basic block 'tb'.  */
+void gen_intermediate_code(CPUUniCore32State *env, TranslationBlock *tb)
 {
+    UniCore32CPU *cpu = uc32_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUUniCore32State *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
-    CPUBreakpoint *bp;
-    int j, lj;
     target_ulong pc_start;
     uint32_t next_page_start;
     int num_insns;
@@ -1899,12 +1891,14 @@
     cpu_F0d = tcg_temp_new_i64();
     cpu_F1d = tcg_temp_new_i64();
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
 #ifndef CONFIG_USER_ONLY
     if ((env->uncached_asr & ASR_M) == ASR_MODE_USER) {
@@ -1916,33 +1910,20 @@
 
     gen_tb_start(tb);
     do {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == dc->pc) {
-                    gen_set_pc_im(dc->pc);
-                    gen_exception(EXCP_DEBUG);
-                    dc->is_jmp = DISAS_JUMP;
-                    /* Advance PC so that clearing the breakpoint will
-                       invalidate this TB.  */
-                    dc->pc += 2; /* FIXME */
-                    goto done_generating;
-                }
-            }
-        }
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        tcg_gen_insn_start(dc->pc);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            gen_set_pc_im(dc->pc);
+            gen_exception(EXCP_DEBUG);
+            dc->is_jmp = DISAS_JUMP;
+            /* Advance PC so that clearing the breakpoint will
+               invalidate this TB.  */
+            dc->pc += 2; /* FIXME */
+            goto done_generating;
         }
 
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
 
@@ -1961,7 +1942,6 @@
          * Otherwise the subsequent code could get translated several times.
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.  */
-        num_insns++;
     } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
@@ -2043,26 +2023,8 @@
         qemu_log("\n");
     }
 #endif
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
-}
-
-void gen_intermediate_code(CPUUniCore32State *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(uc32_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUUniCore32State *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(uc32_env_get_cpu(env), tb, true);
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 }
 
 static const char *cpu_mode_names[16] = {
@@ -2133,7 +2095,8 @@
     cpu_dump_state_ucf64(env, f, cpu_fprintf, flags);
 }
 
-void restore_state_to_opc(CPUUniCore32State *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUUniCore32State *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->regs[31] = tcg_ctx.gen_opc_pc[pc_pos];
+    env->regs[31] = data[0];
 }
diff --git a/target-xtensa/cpu.c b/target-xtensa/cpu.c
index da8129d..4e49bee 100644
--- a/target-xtensa/cpu.c
+++ b/target-xtensa/cpu.c
@@ -155,6 +155,13 @@
 #endif
     cc->debug_excp_handler = xtensa_breakpoint_handler;
     dc->vmsd = &vmstate_xtensa_cpu;
+
+    /*
+     * Reason: xtensa_cpu_initfn() calls cpu_exec_init(), which saves
+     * the object in cpus -> dangling pointer after final
+     * object_unref().
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo xtensa_cpu_type_info = {
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index 148a0f8..006bcb7 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -382,7 +382,6 @@
 #include "cpu-qom.h"
 
 #define cpu_exec cpu_xtensa_exec
-#define cpu_gen_code cpu_xtensa_gen_code
 #define cpu_signal_handler cpu_xtensa_signal_handler
 #define cpu_list xtensa_cpu_list
 
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index a29b3e6..fda91b7 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -2984,22 +2984,6 @@
     return xtensa_op0_insn_len(OP0);
 }
 
-static void check_breakpoint(CPUXtensaState *env, DisasContext *dc)
-{
-    CPUState *cs = CPU(xtensa_env_get_cpu(env));
-    CPUBreakpoint *bp;
-
-    if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == dc->pc) {
-                tcg_gen_movi_i32(cpu_pc, dc->pc);
-                gen_exception(dc, EXCP_DEBUG);
-                dc->is_jmp = DISAS_UPDATE;
-             }
-        }
-    }
-}
-
 static void gen_ibreak_check(CPUXtensaState *env, DisasContext *dc)
 {
     unsigned i;
@@ -3013,15 +2997,12 @@
     }
 }
 
-static inline
-void gen_intermediate_code_internal(XtensaCPU *cpu,
-                                    TranslationBlock *tb, bool search_pc)
+void gen_intermediate_code(CPUXtensaState *env, TranslationBlock *tb)
 {
+    XtensaCPU *cpu = xtensa_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUXtensaState *env = &cpu->env;
     DisasContext dc;
     int insn_count = 0;
-    int j, lj = -1;
     int max_insns = tb->cflags & CF_COUNT_MASK;
     uint32_t pc_start = tb->pc;
     uint32_t next_page_start =
@@ -3030,6 +3011,9 @@
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     dc.config = env->config;
     dc.singlestep_enabled = cs->singlestep_enabled;
@@ -3062,28 +3046,19 @@
     }
 
     do {
-        check_breakpoint(env, &dc);
-
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc.pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = insn_count;
-        }
-
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(dc.pc);
-        }
+        tcg_gen_insn_start(dc.pc);
+        ++insn_count;
 
         ++dc.ccount_delta;
 
-        if (insn_count + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (unlikely(cpu_breakpoint_test(cs, dc.pc, BP_ANY))) {
+            tcg_gen_movi_i32(cpu_pc, dc.pc);
+            gen_exception(&dc, EXCP_DEBUG);
+            dc.is_jmp = DISAS_UPDATE;
+            break;
+        }
+
+        if (insn_count == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
 
@@ -3104,7 +3079,6 @@
         }
 
         disas_xtensa_insn(env, &dc);
-        ++insn_count;
         if (dc.icount) {
             tcg_gen_mov_i32(cpu_SR[ICOUNT], dc.next_icount);
         }
@@ -3142,24 +3116,8 @@
         qemu_log("\n");
     }
 #endif
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        memset(tcg_ctx.gen_opc_instr_start + lj + 1, 0,
-                (j - lj) * sizeof(tcg_ctx.gen_opc_instr_start[0]));
-    } else {
-        tb->size = dc.pc - pc_start;
-        tb->icount = insn_count;
-    }
-}
-
-void gen_intermediate_code(CPUXtensaState *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(xtensa_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUXtensaState *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(xtensa_env_get_cpu(env), tb, true);
+    tb->size = dc.pc - pc_start;
+    tb->icount = insn_count;
 }
 
 void xtensa_cpu_dump_state(CPUState *cs, FILE *f,
@@ -3213,7 +3171,8 @@
     }
 }
 
-void restore_state_to_opc(CPUXtensaState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUXtensaState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 6da083a..4e20dc1 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -700,17 +700,53 @@
 #error must include QEMU headers
 #endif
 
-/* debug info: write the PC of the corresponding QEMU CPU instruction */
-static inline void tcg_gen_debug_insn_start(uint64_t pc)
+#if TARGET_INSN_START_WORDS == 1
+# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+static inline void tcg_gen_insn_start(target_ulong pc)
 {
-    /* XXX: must really use a 32 bit size for TCGArg in all cases */
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-    tcg_gen_op2ii(INDEX_op_debug_insn_start,
-                  (uint32_t)(pc), (uint32_t)(pc >> 32));
-#else
-    tcg_gen_op1i(INDEX_op_debug_insn_start, pc);
-#endif
+    tcg_gen_op1(&tcg_ctx, INDEX_op_insn_start, pc);
 }
+# else
+static inline void tcg_gen_insn_start(target_ulong pc)
+{
+    tcg_gen_op2(&tcg_ctx, INDEX_op_insn_start,
+                (uint32_t)pc, (uint32_t)(pc >> 32));
+}
+# endif
+#elif TARGET_INSN_START_WORDS == 2
+# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
+{
+    tcg_gen_op2(&tcg_ctx, INDEX_op_insn_start, pc, a1);
+}
+# else
+static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
+{
+    tcg_gen_op4(&tcg_ctx, INDEX_op_insn_start,
+                (uint32_t)pc, (uint32_t)(pc >> 32),
+                (uint32_t)a1, (uint32_t)(a1 >> 32));
+}
+# endif
+#elif TARGET_INSN_START_WORDS == 3
+# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
+                                      target_ulong a2)
+{
+    tcg_gen_op3(&tcg_ctx, INDEX_op_insn_start, pc, a1, a2);
+}
+# else
+static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
+                                      target_ulong a2)
+{
+    tcg_gen_op6(&tcg_ctx, INDEX_op_insn_start,
+                (uint32_t)pc, (uint32_t)(pc >> 32),
+                (uint32_t)a1, (uint32_t)(a1 >> 32),
+                (uint32_t)a2, (uint32_t)(a2 >> 32));
+}
+# endif
+#else
+# error "Unhandled number of operands to insn_start"
+#endif
 
 static inline void tcg_gen_exit_tb(uintptr_t val)
 {
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 02bbf30..c6f9570 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -175,9 +175,9 @@
 
 /* QEMU specific */
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-DEF(debug_insn_start, 0, 0, 2, TCG_OPF_NOT_PRESENT)
+DEF(insn_start, 0, 0, 2 * TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT)
 #else
-DEF(debug_insn_start, 0, 0, 1, TCG_OPF_NOT_PRESENT)
+DEF(insn_start, 0, 0, TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT)
 #endif
 DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
 DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a2cb027..682af8a 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -363,17 +363,39 @@
 
 void tcg_prologue_init(TCGContext *s)
 {
-    /* init global prologue and epilogue */
-    s->code_buf = s->code_gen_prologue;
-    s->code_ptr = s->code_buf;
+    size_t prologue_size, total_size;
+    void *buf0, *buf1;
+
+    /* Put the prologue at the beginning of code_gen_buffer.  */
+    buf0 = s->code_gen_buffer;
+    s->code_ptr = buf0;
+    s->code_buf = buf0;
+    s->code_gen_prologue = buf0;
+
+    /* Generate the prologue.  */
     tcg_target_qemu_prologue(s);
-    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
+    buf1 = s->code_ptr;
+    flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
+
+    /* Deduct the prologue from the buffer.  */
+    prologue_size = tcg_current_code_size(s);
+    s->code_gen_ptr = buf1;
+    s->code_gen_buffer = buf1;
+    s->code_buf = buf1;
+    total_size = s->code_gen_buffer_size - prologue_size;
+    s->code_gen_buffer_size = total_size;
+
+    /* Compute a high-water mark, at which we voluntarily flush the buffer
+       and start over.  The size here is arbitrary, significantly larger
+       than we expect the code generation for any one opcode to require.  */
+    s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024);
+
+    tcg_register_jit(s->code_gen_buffer, total_size);
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
-        size_t size = tcg_current_code_size(s);
-        qemu_log("PROLOGUE: [size=%zu]\n", size);
-        log_disas(s->code_buf, size);
+        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
+        log_disas(buf0, prologue_size);
         qemu_log("\n");
         qemu_log_flush();
     }
@@ -990,17 +1012,18 @@
         def = &tcg_op_defs[c];
         args = &s->gen_opparam_buf[op->args];
 
-        if (c == INDEX_op_debug_insn_start) {
-            uint64_t pc;
+        if (c == INDEX_op_insn_start) {
+            qemu_log("%s ----", oi != s->gen_first_op_idx ? "\n" : "");
+
+            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
+                target_ulong a;
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-            pc = ((uint64_t)args[1] << 32) | args[0];
+                a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
 #else
-            pc = args[0];
+                a = args[i];
 #endif
-            if (oi != s->gen_first_op_idx) {
-                qemu_log("\n");
+                qemu_log(" " TARGET_FMT_lx, a);
             }
-            qemu_log(" ---- 0x%" PRIx64, pc);
         } else if (c == INDEX_op_call) {
             /* variable number of arguments */
             nb_oargs = op->callo;
@@ -1400,7 +1423,7 @@
                 }
             }
             break;
-        case INDEX_op_debug_insn_start:
+        case INDEX_op_insn_start:
             break;
         case INDEX_op_discard:
             /* mark the temporary as dead */
@@ -2289,11 +2312,27 @@
 #endif
 
 
-static inline int tcg_gen_code_common(TCGContext *s,
-                                      tcg_insn_unit *gen_code_buf,
-                                      long search_pc)
+int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
 {
-    int oi, oi_next;
+    int i, oi, oi_next, num_insns;
+
+#ifdef CONFIG_PROFILER
+    {
+        int n;
+
+        n = s->gen_last_op_idx + 1;
+        s->op_count += n;
+        if (n > s->op_count_max) {
+            s->op_count_max = n;
+        }
+
+        n = s->nb_temps;
+        s->temp_count += n;
+        if (n > s->temp_count_max) {
+            s->temp_count_max = n;
+        }
+    }
+#endif
 
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
@@ -2337,6 +2376,7 @@
 
     tcg_out_tb_init(s);
 
+    num_insns = -1;
     for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
         TCGOp * const op = &s->gen_op_buf[oi];
         TCGArg * const args = &s->gen_opparam_buf[op->args];
@@ -2359,7 +2399,20 @@
         case INDEX_op_movi_i64:
             tcg_reg_alloc_movi(s, args, dead_args, sync_args);
             break;
-        case INDEX_op_debug_insn_start:
+        case INDEX_op_insn_start:
+            if (num_insns >= 0) {
+                s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
+            }
+            num_insns++;
+            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
+                target_ulong a;
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+                a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
+#else
+                a = args[i];
+#endif
+                s->gen_insn_data[num_insns][i] = a;
+            }
             break;
         case INDEX_op_discard:
             temp_dead(s, args[0]);
@@ -2383,40 +2436,22 @@
             tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
             break;
         }
-        if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) {
-            return oi;
-        }
 #ifndef NDEBUG
         check_regs(s);
 #endif
+        /* Test for (pending) buffer overflow.  The assumption is that any
+           one operation beginning below the high water mark cannot overrun
+           the buffer completely.  Thus we can test for overflow after
+           generating code without having to check during generation.  */
+        if (unlikely(s->code_gen_ptr > s->code_gen_highwater)) {
+            return -1;
+        }
     }
+    tcg_debug_assert(num_insns >= 0);
+    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
 
     /* Generate TB finalization at the end of block */
     tcg_out_tb_finalize(s);
-    return -1;
-}
-
-int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
-{
-#ifdef CONFIG_PROFILER
-    {
-        int n;
-
-        n = s->gen_last_op_idx + 1;
-        s->op_count += n;
-        if (n > s->op_count_max) {
-            s->op_count_max = n;
-        }
-
-        n = s->nb_temps;
-        s->temp_count += n;
-        if (n > s->temp_count_max) {
-            s->temp_count_max = n;
-        }
-    }
-#endif
-
-    tcg_gen_code_common(s, gen_code_buf, -1);
 
     /* flush instruction cache */
     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
@@ -2424,38 +2459,30 @@
     return tcg_current_code_size(s);
 }
 
-/* Return the index of the micro operation such as the pc after is <
-   offset bytes from the start of the TB.  The contents of gen_code_buf must
-   not be changed, though writing the same values is ok.
-   Return -1 if not found. */
-int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
-                           long offset)
-{
-    return tcg_gen_code_common(s, gen_code_buf, offset);
-}
-
 #ifdef CONFIG_PROFILER
 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
 {
     TCGContext *s = &tcg_ctx;
-    int64_t tot;
+    int64_t tb_count = s->tb_count;
+    int64_t tb_div_count = tb_count ? tb_count : 1;
+    int64_t tot = s->interm_time + s->code_time;
 
-    tot = s->interm_time + s->code_time;
     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
                 tot, tot / 2.4e9);
     cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 
-                s->tb_count, 
-                s->tb_count1 - s->tb_count,
-                s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0);
+                tb_count, s->tb_count1 - tb_count,
+                (double)(s->tb_count1 - s->tb_count)
+                / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
     cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n", 
-                s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max);
+                (double)s->op_count / tb_div_count, s->op_count_max);
     cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
-                s->tb_count ? 
-                (double)s->del_op_count / s->tb_count : 0);
+                (double)s->del_op_count / tb_div_count);
     cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
-                s->tb_count ? 
-                (double)s->temp_count / s->tb_count : 0,
-                s->temp_count_max);
+                (double)s->temp_count / tb_div_count, s->temp_count_max);
+    cpu_fprintf(f, "avg host code/TB    %0.1f\n",
+                (double)s->code_out_len / tb_div_count);
+    cpu_fprintf(f, "avg search data/TB  %0.1f\n",
+                (double)s->search_out_len / tb_div_count);
     
     cpu_fprintf(f, "cycles/op           %0.1f\n", 
                 s->op_count ? (double)tot / s->op_count : 0);
@@ -2463,8 +2490,11 @@
                 s->code_in_len ? (double)tot / s->code_in_len : 0);
     cpu_fprintf(f, "cycles/out byte     %0.1f\n", 
                 s->code_out_len ? (double)tot / s->code_out_len : 0);
-    if (tot == 0)
+    cpu_fprintf(f, "cycles/search byte     %0.1f\n",
+                s->search_out_len ? (double)tot / s->search_out_len : 0);
+    if (tot == 0) {
         tot = 1;
+    }
     cpu_fprintf(f, "  gen_interm time   %0.1f%%\n", 
                 (double)s->interm_time / tot * 100.0);
     cpu_fprintf(f, "  gen_code time     %0.1f%%\n", 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 879a665..a696922 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -129,6 +129,12 @@
 # error "Missing unsigned widening multiply"
 #endif
 
+#ifndef TARGET_INSN_START_EXTRA_WORDS
+# define TARGET_INSN_START_WORDS 1
+#else
+# define TARGET_INSN_START_WORDS (1 + TARGET_INSN_START_EXTRA_WORDS)
+#endif
+
 typedef enum TCGOpcode {
 #define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
 #include "tcg-opc.h"
@@ -188,6 +194,7 @@
 #define TCG_POOL_CHUNK_SIZE 32768
 
 #define TCG_MAX_TEMPS 512
+#define TCG_MAX_INSNS 512
 
 /* when the size of the arguments of a called function is smaller than
    this value, they are statically allocated in the TB stack frame */
@@ -525,6 +532,7 @@
     int64_t del_op_count;
     int64_t code_in_len;
     int64_t code_out_len;
+    int64_t search_out_len;
     int64_t interm_time;
     int64_t code_time;
     int64_t la_time;
@@ -551,10 +559,11 @@
     void *code_gen_prologue;
     void *code_gen_buffer;
     size_t code_gen_buffer_size;
-    /* threshold to flush the translated code buffer */
-    size_t code_gen_buffer_max_size;
     void *code_gen_ptr;
 
+    /* Threshold to flush the translated code buffer.  */
+    void *code_gen_highwater;
+
     TBContext tb_ctx;
 
     /* The TCGBackendData structure is private to tcg-target.c.  */
@@ -570,9 +579,8 @@
     TCGOp gen_op_buf[OPC_BUF_SIZE];
     TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
 
-    target_ulong gen_opc_pc[OPC_BUF_SIZE];
-    uint16_t gen_opc_icount[OPC_BUF_SIZE];
-    uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+    uint16_t gen_insn_end_off[TCG_MAX_INSNS];
+    target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
 };
 
 extern TCGContext tcg_ctx;
@@ -619,8 +627,6 @@
 void tcg_func_start(TCGContext *s);
 
 int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf);
-int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
-                           long offset);
 
 void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size);
 
diff --git a/tci.c b/tci.c
index 70eaab2..b5ed7b1 100644
--- a/tci.c
+++ b/tci.c
@@ -1081,15 +1081,6 @@
 
             /* QEMU specific operations. */
 
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-        case INDEX_op_debug_insn_start:
-            TODO();
-            break;
-#else
-        case INDEX_op_debug_insn_start:
-            TODO();
-            break;
-#endif
         case INDEX_op_exit_tb:
             next_tb = *(uint64_t *)tb_ptr;
             goto exit;
diff --git a/tests/Makefile b/tests/Makefile
index 78d5d0a..dbd32a6 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -86,6 +86,9 @@
 # All QTests for now are POSIX-only, but the dependencies are
 # really in libqtest, not in the testcases themselves.
 
+check-qtest-generic-y = tests/device-introspect-test$(EXESUF)
+gcov-files-generic-y = qdev-monitor.c qmp.c
+
 gcov-files-ipack-y += hw/ipack/ipack.c
 check-qtest-ipack-y += tests/ipoctal232-test$(EXESUF)
 gcov-files-ipack-y += hw/char/ipoctal232.c
@@ -219,10 +222,7 @@
 check-qtest-microblazeel-y = $(check-qtest-microblaze-y)
 check-qtest-xtensaeb-y = $(check-qtest-xtensa-y)
 
-# qom-test works for all sysemu architectures:
-$(foreach target,$(SYSEMU_TARGET_LIST), \
-	$(if $(findstring tests/qom-test$(EXESUF), $(check-qtest-$(target)-y)),, \
-		$(eval check-qtest-$(target)-y += tests/qom-test$(EXESUF))))
+check-qtest-generic-y += tests/qom-test$(EXESUF)
 
 check-qapi-schema-y := $(addprefix tests/qapi-schema/, \
 	comments.json empty.json enum-empty.json enum-missing-data.json \
@@ -385,6 +385,7 @@
 libqos-usb-obj-y = $(libqos-pc-obj-y) tests/libqos/usb.o
 libqos-virtio-obj-y = $(libqos-pc-obj-y) tests/libqos/virtio.o tests/libqos/virtio-pci.o tests/libqos/virtio-mmio.o tests/libqos/malloc-generic.o
 
+tests/device-introspect-test$(EXESUF): tests/device-introspect-test.o
 tests/rtc-test$(EXESUF): tests/rtc-test.o
 tests/m48t59-test$(EXESUF): tests/m48t59-test.o
 tests/endianness-test$(EXESUF): tests/endianness-test.o
@@ -450,8 +451,11 @@
 
 TARGETS=$(patsubst %-softmmu,%, $(filter %-softmmu,$(TARGET_DIRS)))
 ifeq ($(CONFIG_POSIX),y)
-QTEST_TARGETS=$(foreach TARGET,$(TARGETS), $(if $(check-qtest-$(TARGET)-y), $(TARGET),))
+QTEST_TARGETS = $(TARGETS)
 check-qtest-y=$(foreach TARGET,$(TARGETS), $(check-qtest-$(TARGET)-y))
+check-qtest-y += $(check-qtest-generic-y)
+else
+QTEST_TARGETS =
 endif
 
 qtest-obj-y = tests/libqtest.o $(test-util-obj-y)
@@ -489,8 +493,8 @@
 	$(call quiet-command,QTEST_QEMU_BINARY=$*-softmmu/qemu-system-$* \
 		QTEST_QEMU_IMG=qemu-img$(EXESUF) \
 		MALLOC_PERTURB_=$${MALLOC_PERTURB_:-$$((RANDOM % 255 + 1))} \
-		gtester $(GTESTER_OPTIONS) -m=$(SPEED) $(check-qtest-$*-y),"GTESTER $@")
-	$(if $(CONFIG_GCOV),@for f in $(gcov-files-$*-y); do \
+		gtester $(GTESTER_OPTIONS) -m=$(SPEED) $(check-qtest-$*-y) $(check-qtest-generic-y),"GTESTER $@")
+	$(if $(CONFIG_GCOV),@for f in $(gcov-files-$*-y) $(gcov-files-generic-y); do \
 	  echo Gcov report for $$f:;\
 	  $(GCOV) $(GCOV_OPTIONS) $$f -o `dirname $$f`; \
 	done,)
@@ -501,7 +505,7 @@
 	$(call quiet-command, \
 		MALLOC_PERTURB_=$${MALLOC_PERTURB_:-$$((RANDOM % 255 + 1))} \
 		gtester $(GTESTER_OPTIONS) -m=$(SPEED) $*,"GTESTER $*")
-	$(if $(CONFIG_GCOV),@for f in $(gcov-files-$(subst tests/,,$*)-y); do \
+	$(if $(CONFIG_GCOV),@for f in $(gcov-files-$(subst tests/,,$*)-y) $(gcov-files-generic-y); do \
 	  echo Gcov report for $$f:;\
 	  $(GCOV) $(GCOV_OPTIONS) $$f -o `dirname $$f`; \
 	done,)
diff --git a/tests/device-introspect-test.c b/tests/device-introspect-test.c
new file mode 100644
index 0000000..11d5fea
--- /dev/null
+++ b/tests/device-introspect-test.c
@@ -0,0 +1,124 @@
+/*
+ * Device introspection test cases
+ *
+ * Copyright (c) 2015 Red Hat Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * Covers QMP device-list-properties and HMP device_add help.  We
+ * currently don't check that their output makes sense, only that QEMU
+ * survives.  Useful since we've had an astounding number of crash
+ * bugs around here.
+ */
+
+#include <glib.h>
+#include <stdarg.h>
+#include "qemu-common.h"
+#include "qapi/qmp/qstring.h"
+#include "libqtest.h"
+
+const char common_args[] = "-nodefaults -machine none";
+
+static QList *device_type_list(bool abstract)
+{
+    QDict *resp;
+    QList *ret;
+
+    resp = qmp("{'execute': 'qom-list-types',"
+               " 'arguments': {'implements': 'device', 'abstract': %i}}",
+               abstract);
+    g_assert(qdict_haskey(resp, "return"));
+    ret = qdict_get_qlist(resp, "return");
+    QINCREF(ret);
+    QDECREF(resp);
+    return ret;
+}
+
+static void test_one_device(const char *type)
+{
+    QDict *resp;
+    char *help, *qom_tree;
+
+    resp = qmp("{'execute': 'device-list-properties',"
+               " 'arguments': {'typename': %s}}",
+               type);
+    QDECREF(resp);
+
+    help = hmp("device_add \"%s,help\"", type);
+    g_free(help);
+
+    /*
+     * Some devices leave dangling pointers in QOM behind.
+     * "info qom-tree" has a good chance at crashing then
+     */
+    qom_tree = hmp("info qom-tree");
+    g_free(qom_tree);
+}
+
+static void test_device_intro_list(void)
+{
+    QList *types;
+    char *help;
+
+    qtest_start(common_args);
+
+    types = device_type_list(true);
+    QDECREF(types);
+
+    help = hmp("device_add help");
+    g_free(help);
+
+    qtest_end();
+}
+
+static void test_device_intro_none(void)
+{
+    qtest_start(common_args);
+    test_one_device("nonexistent");
+    qtest_end();
+}
+
+static void test_device_intro_abstract(void)
+{
+    qtest_start(common_args);
+    test_one_device("device");
+    qtest_end();
+}
+
+static void test_device_intro_concrete(void)
+{
+    QList *types;
+    QListEntry *entry;
+    const char *type;
+
+    qtest_start(common_args);
+    types = device_type_list(false);
+
+    QLIST_FOREACH_ENTRY(types, entry) {
+        type = qdict_get_try_str(qobject_to_qdict(qlist_entry_obj(entry)),
+                                "name");
+        g_assert(type);
+        test_one_device(type);
+    }
+
+    QDECREF(types);
+    qtest_end();
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_func("device/introspect/list", test_device_intro_list);
+    qtest_add_func("device/introspect/none", test_device_intro_none);
+    qtest_add_func("device/introspect/abstract", test_device_intro_abstract);
+    qtest_add_func("device/introspect/concrete", test_device_intro_concrete);
+
+    return g_test_run();
+}
diff --git a/tests/drive_del-test.c b/tests/drive_del-test.c
index 8951f6f..3390946 100644
--- a/tests/drive_del-test.c
+++ b/tests/drive_del-test.c
@@ -16,28 +16,18 @@
 
 static void drive_add(void)
 {
-    QDict *response;
+    char *resp = hmp("drive_add 0 if=none,id=drive0");
 
-    response = qmp("{'execute': 'human-monitor-command',"
-                   " 'arguments': {"
-                   "   'command-line': 'drive_add 0 if=none,id=drive0'"
-                   "}}");
-    g_assert(response);
-    g_assert_cmpstr(qdict_get_try_str(response, "return"), ==, "OK\r\n");
-    QDECREF(response);
+    g_assert_cmpstr(resp, ==, "OK\r\n");
+    g_free(resp);
 }
 
 static void drive_del(void)
 {
-    QDict *response;
+    char *resp = hmp("drive_del drive0");
 
-    response = qmp("{'execute': 'human-monitor-command',"
-                   " 'arguments': {"
-                   "   'command-line': 'drive_del drive0'"
-                   "}}");
-    g_assert(response);
-    g_assert_cmpstr(qdict_get_try_str(response, "return"), ==, "");
-    QDECREF(response);
+    g_assert_cmpstr(resp, ==, "");
+    g_free(resp);
 }
 
 static void device_del(void)
diff --git a/tests/ide-test.c b/tests/ide-test.c
index b6e9e1a..d1014bb 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -510,9 +510,7 @@
         tmp_path);
 
     /* Delay the completion of the flush request until we explicitly do it */
-    qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {"
-                         " 'command-line':"
-                         " 'qemu-io ide0-hd0 \"break flush_to_os A\"'} }");
+    g_free(hmp("qemu-io ide0-hd0 \"break flush_to_os A\""));
 
     /* FLUSH CACHE command on device 0*/
     outb(IDE_BASE + reg_device, 0);
@@ -524,9 +522,7 @@
     assert_bit_clear(data, DF | ERR | DRQ);
 
     /* Complete the command */
-    qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {"
-                         " 'command-line':"
-                         " 'qemu-io ide0-hd0 \"resume A\"'} }");
+    g_free(hmp("qemu-io ide0-hd0 \"resume A\""));
 
     /* Check registers */
     data = inb(IDE_BASE + reg_device);
diff --git a/tests/libqtest.c b/tests/libqtest.c
index e5188e0..2a396ba 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -46,7 +46,6 @@
     bool irq_level[MAX_IRQ];
     GString *rx;
     pid_t qemu_pid;  /* our child QEMU process */
-    struct sigaction sigact_old; /* restored on exit */
 };
 
 static GList *qtest_instances;
@@ -484,6 +483,33 @@
     }
 }
 
+char *qtest_hmpv(QTestState *s, const char *fmt, va_list ap)
+{
+    char *cmd;
+    QDict *resp;
+    char *ret;
+
+    cmd = g_strdup_vprintf(fmt, ap);
+    resp = qtest_qmp(s, "{'execute': 'human-monitor-command',"
+                     " 'arguments': {'command-line': %s}}",
+                     cmd);
+    ret = g_strdup(qdict_get_try_str(resp, "return"));
+    g_assert(ret);
+    QDECREF(resp);
+    g_free(cmd);
+    return ret;
+}
+
+char *qtest_hmp(QTestState *s, const char *fmt, ...)
+{
+    va_list ap;
+    char *ret;
+
+    va_start(ap, fmt);
+    ret = qtest_hmpv(s, fmt, ap);
+    va_end(ap);
+    return ret;
+}
 
 const char *qtest_get_arch(void)
 {
@@ -775,6 +801,16 @@
     qtest_qmpv_discard_response(global_qtest, fmt, ap);
     va_end(ap);
 }
+char *hmp(const char *fmt, ...)
+{
+    va_list ap;
+    char *ret;
+
+    va_start(ap, fmt);
+    ret = qtest_hmpv(global_qtest, fmt, ap);
+    va_end(ap);
+    return ret;
+}
 
 bool qtest_big_endian(void)
 {
diff --git a/tests/libqtest.h b/tests/libqtest.h
index ec42031..55bccbf 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -120,6 +120,29 @@
 void qtest_qmp_eventwait(QTestState *s, const char *event);
 
 /**
+ * qtest_hmpv:
+ * @s: #QTestState instance to operate on.
+ * @fmt...: HMP command to send to QEMU
+ *
+ * Send HMP command to QEMU via QMP's human-monitor-command.
+ *
+ * Returns: the command's output.  The caller should g_free() it.
+ */
+char *qtest_hmp(QTestState *s, const char *fmt, ...);
+
+/**
+ * qtest_hmpv:
+ * @s: #QTestState instance to operate on.
+ * @fmt: HMP command to send to QEMU
+ * @ap: HMP command arguments
+ *
+ * Send HMP command to QEMU via QMP's human-monitor-command.
+ *
+ * Returns: the command's output.  The caller should g_free() it.
+ */
+char *qtest_hmpv(QTestState *s, const char *fmt, va_list ap);
+
+/**
  * qtest_get_irq:
  * @s: #QTestState instance to operate on.
  * @num: Interrupt to observe.
@@ -499,6 +522,16 @@
 }
 
 /**
+ * hmp:
+ * @fmt...: HMP command to send to QEMU
+ *
+ * Send HMP command to QEMU via QMP's human-monitor-command.
+ *
+ * Returns: the command's output.  The caller should g_free() it.
+ */
+char *hmp(const char *fmt, ...);
+
+/**
  * get_irq:
  * @num: Interrupt to observe.
  *
diff --git a/tests/test-string-output-visitor.c b/tests/test-string-output-visitor.c
index 101fb27..fd5e67b 100644
--- a/tests/test-string-output-visitor.c
+++ b/tests/test-string-output-visitor.c
@@ -248,39 +248,39 @@
 
     output_visitor_test_add("/string-visitor/output/int",
                             &out_visitor_data, test_visitor_out_int, false);
-    output_visitor_test_add("/string-visitor/output/int",
+    output_visitor_test_add("/string-visitor/output/int-human",
                             &out_visitor_data, test_visitor_out_int, true);
     output_visitor_test_add("/string-visitor/output/bool",
                             &out_visitor_data, test_visitor_out_bool, false);
-    output_visitor_test_add("/string-visitor/output/bool",
+    output_visitor_test_add("/string-visitor/output/bool-human",
                             &out_visitor_data, test_visitor_out_bool, true);
     output_visitor_test_add("/string-visitor/output/number",
                             &out_visitor_data, test_visitor_out_number, false);
-    output_visitor_test_add("/string-visitor/output/number",
+    output_visitor_test_add("/string-visitor/output/number-human",
                             &out_visitor_data, test_visitor_out_number, true);
     output_visitor_test_add("/string-visitor/output/string",
                             &out_visitor_data, test_visitor_out_string, false);
-    output_visitor_test_add("/string-visitor/output/string",
+    output_visitor_test_add("/string-visitor/output/string-human",
                             &out_visitor_data, test_visitor_out_string, true);
     output_visitor_test_add("/string-visitor/output/no-string",
                             &out_visitor_data, test_visitor_out_no_string,
                             false);
-    output_visitor_test_add("/string-visitor/output/no-string",
+    output_visitor_test_add("/string-visitor/output/no-string-human",
                             &out_visitor_data, test_visitor_out_no_string,
                             true);
     output_visitor_test_add("/string-visitor/output/enum",
                             &out_visitor_data, test_visitor_out_enum, false);
-    output_visitor_test_add("/string-visitor/output/enum",
+    output_visitor_test_add("/string-visitor/output/enum-human",
                             &out_visitor_data, test_visitor_out_enum, true);
     output_visitor_test_add("/string-visitor/output/enum-errors",
                             &out_visitor_data, test_visitor_out_enum_errors,
                             false);
-    output_visitor_test_add("/string-visitor/output/enum-errors",
+    output_visitor_test_add("/string-visitor/output/enum-errors-human",
                             &out_visitor_data, test_visitor_out_enum_errors,
                             true);
     output_visitor_test_add("/string-visitor/output/intList",
                             &out_visitor_data, test_visitor_out_intList, false);
-    output_visitor_test_add("/string-visitor/output/intList",
+    output_visitor_test_add("/string-visitor/output/intList-human",
                             &out_visitor_data, test_visitor_out_intList, true);
 
     g_test_run();
diff --git a/trace-events b/trace-events
index 36db793..a0ddc6b 100644
--- a/trace-events
+++ b/trace-events
@@ -603,9 +603,6 @@
 vm_state_notify(int running, int reason) "running %d reason %d"
 load_file(const char *name, const char *path) "name %s location %s"
 runstate_set(int new_state) "new state %d"
-g_malloc(size_t size, void *ptr) "size %zu ptr %p"
-g_realloc(void *ptr, size_t size, void *newptr) "ptr %p size %zu newptr %p"
-g_free(void *ptr) "ptr %p"
 system_wakeup_request(int reason) "reason=%d"
 qemu_system_shutdown_request(void) ""
 qemu_system_powerdown_request(void) ""
@@ -1181,6 +1178,7 @@
 vmware_setmode(uint32_t w, uint32_t h, uint32_t bpp) "%dx%d @ %d bpp"
 
 # hw/display/virtio-gpu.c
+virtio_gpu_features(bool virgl) "virgl %d"
 virtio_gpu_cmd_get_display_info(void) ""
 virtio_gpu_cmd_get_caps(void) ""
 virtio_gpu_cmd_set_scanout(uint32_t id, uint32_t res, uint32_t w, uint32_t h, uint32_t x, uint32_t y) "id %d, res 0x%x, w %d, h %d, x %d, y %d"
@@ -1190,7 +1188,15 @@
 virtio_gpu_cmd_res_back_attach(uint32_t res) "res 0x%x"
 virtio_gpu_cmd_res_back_detach(uint32_t res) "res 0x%x"
 virtio_gpu_cmd_res_xfer_toh_2d(uint32_t res) "res 0x%x"
+virtio_gpu_cmd_res_xfer_toh_3d(uint32_t res) "res 0x%x"
+virtio_gpu_cmd_res_xfer_fromh_3d(uint32_t res) "res 0x%x"
 virtio_gpu_cmd_res_flush(uint32_t res, uint32_t w, uint32_t h, uint32_t x, uint32_t y) "res 0x%x, w %d, h %d, x %d, y %d"
+virtio_gpu_cmd_ctx_create(uint32_t ctx, const char *name) "ctx 0x%x, name %s"
+virtio_gpu_cmd_ctx_destroy(uint32_t ctx) "ctx 0x%x"
+virtio_gpu_cmd_ctx_res_attach(uint32_t ctx, uint32_t res) "ctx 0x%x, res 0x%x"
+virtio_gpu_cmd_ctx_res_detach(uint32_t ctx, uint32_t res) "ctx 0x%x, res 0x%x"
+virtio_gpu_cmd_ctx_submit(uint32_t ctx, uint32_t size) "ctx 0x%x, size %d"
+virtio_gpu_update_cursor(uint32_t scanout, uint32_t x, uint32_t y, const char *type, uint32_t res) "scanout %d, x %d, y %d, %s, res 0x%x"
 virtio_gpu_fence_ctrl(uint64_t fence, uint32_t type) "fence 0x%" PRIx64 ", type 0x%x"
 virtio_gpu_fence_resp(uint64_t fence) "fence 0x%" PRIx64
 
@@ -1624,7 +1630,9 @@
 vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)"
 vfio_platform_populate_interrupts(int pin, int count, int flags) "- IRQ index %d: count %d, flags=0x%x"
 vfio_intp_interrupt_set_pending(int index) "irq %d is set PENDING"
-vfio_platform_start_irqfd_injection(int index, int fd, int resamplefd) "IRQ index=%d, fd = %d, resamplefd = %d"
+vfio_platform_start_level_irqfd_injection(int index, int fd, int resamplefd) "IRQ index=%d, fd = %d, resamplefd = %d"
+vfio_platform_start_edge_irqfd_injection(int index, int fd) "IRQ index=%d, fd = %d"
+
 
 #hw/acpi/memory_hotplug.c
 mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32
diff --git a/translate-all.c b/translate-all.c
index 4a9ee33..333eba4 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -168,127 +168,137 @@
     tcg_context_init(&tcg_ctx); 
 }
 
-/* return non zero if the very first instruction is invalid so that
- * the virtual CPU can trigger an exception.
- *
- * '*gen_code_size_ptr' contains the size of the generated code (host
- * code).
- *
- * Called with mmap_lock held for user-mode emulation.
- */
-int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr)
+/* Encode VAL as a signed leb128 sequence at P.
+   Return P incremented past the encoded value.  */
+static uint8_t *encode_sleb128(uint8_t *p, target_long val)
 {
-    TCGContext *s = &tcg_ctx;
-    tcg_insn_unit *gen_code_buf;
-    int gen_code_size;
-#ifdef CONFIG_PROFILER
-    int64_t ti;
-#endif
+    int more, byte;
 
-#ifdef CONFIG_PROFILER
-    s->tb_count1++; /* includes aborted translations because of
-                       exceptions */
-    ti = profile_getclock();
-#endif
-    tcg_func_start(s);
+    do {
+        byte = val & 0x7f;
+        val >>= 7;
+        more = !((val == 0 && (byte & 0x40) == 0)
+                 || (val == -1 && (byte & 0x40) != 0));
+        if (more) {
+            byte |= 0x80;
+        }
+        *p++ = byte;
+    } while (more);
 
-    gen_intermediate_code(env, tb);
-
-    trace_translate_block(tb, tb->pc, tb->tc_ptr);
-
-    /* generate machine code */
-    gen_code_buf = tb->tc_ptr;
-    tb->tb_next_offset[0] = 0xffff;
-    tb->tb_next_offset[1] = 0xffff;
-    s->tb_next_offset = tb->tb_next_offset;
-#ifdef USE_DIRECT_JUMP
-    s->tb_jmp_offset = tb->tb_jmp_offset;
-    s->tb_next = NULL;
-#else
-    s->tb_jmp_offset = NULL;
-    s->tb_next = tb->tb_next;
-#endif
-
-#ifdef CONFIG_PROFILER
-    s->tb_count++;
-    s->interm_time += profile_getclock() - ti;
-    s->code_time -= profile_getclock();
-#endif
-    gen_code_size = tcg_gen_code(s, gen_code_buf);
-    *gen_code_size_ptr = gen_code_size;
-#ifdef CONFIG_PROFILER
-    s->code_time += profile_getclock();
-    s->code_in_len += tb->size;
-    s->code_out_len += gen_code_size;
-#endif
-
-#ifdef DEBUG_DISAS
-    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
-        qemu_log("OUT: [size=%d]\n", gen_code_size);
-        log_disas(tb->tc_ptr, gen_code_size);
-        qemu_log("\n");
-        qemu_log_flush();
-    }
-#endif
-    return 0;
+    return p;
 }
 
-/* The cpu state corresponding to 'searched_pc' is restored.
- */
+/* Decode a signed leb128 sequence at *PP; increment *PP past the
+   decoded value.  Return the decoded value.  */
+static target_long decode_sleb128(uint8_t **pp)
+{
+    uint8_t *p = *pp;
+    target_long val = 0;
+    int byte, shift = 0;
+
+    do {
+        byte = *p++;
+        val |= (target_ulong)(byte & 0x7f) << shift;
+        shift += 7;
+    } while (byte & 0x80);
+    if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
+        val |= -(target_ulong)1 << shift;
+    }
+
+    *pp = p;
+    return val;
+}
+
+/* Encode the data collected about the instructions while compiling TB.
+   Place the data at BLOCK, and return the number of bytes consumed.
+
+   The logical table consisits of TARGET_INSN_START_WORDS target_ulong's,
+   which come from the target's insn_start data, followed by a uintptr_t
+   which comes from the host pc of the end of the code implementing the insn.
+
+   Each line of the table is encoded as sleb128 deltas from the previous
+   line.  The seed for the first line is { tb->pc, 0..., tb->tc_ptr }.
+   That is, the first column is seeded with the guest pc, the last column
+   with the host pc, and the middle columns with zeros.  */
+
+static int encode_search(TranslationBlock *tb, uint8_t *block)
+{
+    uint8_t *highwater = tcg_ctx.code_gen_highwater;
+    uint8_t *p = block;
+    int i, j, n;
+
+    tb->tc_search = block;
+
+    for (i = 0, n = tb->icount; i < n; ++i) {
+        target_ulong prev;
+
+        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
+            if (i == 0) {
+                prev = (j == 0 ? tb->pc : 0);
+            } else {
+                prev = tcg_ctx.gen_insn_data[i - 1][j];
+            }
+            p = encode_sleb128(p, tcg_ctx.gen_insn_data[i][j] - prev);
+        }
+        prev = (i == 0 ? 0 : tcg_ctx.gen_insn_end_off[i - 1]);
+        p = encode_sleb128(p, tcg_ctx.gen_insn_end_off[i] - prev);
+
+        /* Test for (pending) buffer overflow.  The assumption is that any
+           one row beginning below the high water mark cannot overrun
+           the buffer completely.  Thus we can test for overflow after
+           encoding a row without having to check during encoding.  */
+        if (unlikely(p > highwater)) {
+            return -1;
+        }
+    }
+
+    return p - block;
+}
+
+/* The cpu state corresponding to 'searched_pc' is restored.  */
 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                                      uintptr_t searched_pc)
 {
+    target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
+    uintptr_t host_pc = (uintptr_t)tb->tc_ptr;
     CPUArchState *env = cpu->env_ptr;
-    TCGContext *s = &tcg_ctx;
-    int j;
-    uintptr_t tc_ptr;
+    uint8_t *p = tb->tc_search;
+    int i, j, num_insns = tb->icount;
 #ifdef CONFIG_PROFILER
-    int64_t ti;
+    int64_t ti = profile_getclock();
 #endif
 
-#ifdef CONFIG_PROFILER
-    ti = profile_getclock();
-#endif
-    tcg_func_start(s);
+    if (searched_pc < host_pc) {
+        return -1;
+    }
 
-    gen_intermediate_code_pc(env, tb);
+    /* Reconstruct the stored insn data while looking for the point at
+       which the end of the insn exceeds the searched_pc.  */
+    for (i = 0; i < num_insns; ++i) {
+        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
+            data[j] += decode_sleb128(&p);
+        }
+        host_pc += decode_sleb128(&p);
+        if (host_pc > searched_pc) {
+            goto found;
+        }
+    }
+    return -1;
 
+ found:
     if (tb->cflags & CF_USE_ICOUNT) {
         assert(use_icount);
         /* Reset the cycle counter to the start of the block.  */
-        cpu->icount_decr.u16.low += tb->icount;
+        cpu->icount_decr.u16.low += num_insns;
         /* Clear the IO flag.  */
         cpu->can_do_io = 0;
     }
-
-    /* find opc index corresponding to search_pc */
-    tc_ptr = (uintptr_t)tb->tc_ptr;
-    if (searched_pc < tc_ptr)
-        return -1;
-
-    s->tb_next_offset = tb->tb_next_offset;
-#ifdef USE_DIRECT_JUMP
-    s->tb_jmp_offset = tb->tb_jmp_offset;
-    s->tb_next = NULL;
-#else
-    s->tb_jmp_offset = NULL;
-    s->tb_next = tb->tb_next;
-#endif
-    j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr,
-                               searched_pc - tc_ptr);
-    if (j < 0)
-        return -1;
-    /* now find start of instruction before */
-    while (s->gen_opc_instr_start[j] == 0) {
-        j--;
-    }
-    cpu->icount_decr.u16.low -= s->gen_opc_icount[j];
-
-    restore_state_to_opc(env, tb, j);
+    cpu->icount_decr.u16.low -= i;
+    restore_state_to_opc(env, tb, data);
 
 #ifdef CONFIG_PROFILER
-    s->restore_time += profile_getclock() - ti;
-    s->restore_count++;
+    tcg_ctx.restore_time += profile_getclock() - ti;
+    tcg_ctx.restore_count++;
 #endif
     return 0;
 }
@@ -311,31 +321,6 @@
     return false;
 }
 
-#ifdef _WIN32
-static __attribute__((unused)) void map_exec(void *addr, long size)
-{
-    DWORD old_protect;
-    VirtualProtect(addr, size,
-                   PAGE_EXECUTE_READWRITE, &old_protect);
-}
-#else
-static __attribute__((unused)) void map_exec(void *addr, long size)
-{
-    unsigned long start, end, page_size;
-
-    page_size = getpagesize();
-    start = (unsigned long)addr;
-    start &= ~(page_size - 1);
-
-    end = (unsigned long)addr + size;
-    end += page_size - 1;
-    end &= ~(page_size - 1);
-
-    mprotect((void *)start, end - start,
-             PROT_READ | PROT_WRITE | PROT_EXEC);
-}
-#endif
-
 void page_size_init(void)
 {
     /* NOTE: we can always suppose that qemu_host_page_size >=
@@ -472,14 +457,6 @@
 #define USE_STATIC_CODE_GEN_BUFFER
 #endif
 
-/* ??? Should configure for this, not list operating systems here.  */
-#if (defined(__linux__) \
-    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__) \
-    || defined(__NetBSD__))
-# define USE_MMAP
-#endif
-
 /* Minimum size of the code gen buffer.  This number is randomly chosen,
    but not so small that we can't have a fair number of TB's live.  */
 #define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
@@ -567,22 +544,102 @@
 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
     __attribute__((aligned(CODE_GEN_ALIGN)));
 
+# ifdef _WIN32
+static inline void do_protect(void *addr, long size, int prot)
+{
+    DWORD old_protect;
+    VirtualProtect(addr, size, prot, &old_protect);
+}
+
+static inline void map_exec(void *addr, long size)
+{
+    do_protect(addr, size, PAGE_EXECUTE_READWRITE);
+}
+
+static inline void map_none(void *addr, long size)
+{
+    do_protect(addr, size, PAGE_NOACCESS);
+}
+# else
+static inline void do_protect(void *addr, long size, int prot)
+{
+    uintptr_t start, end;
+
+    start = (uintptr_t)addr;
+    start &= qemu_real_host_page_mask;
+
+    end = (uintptr_t)addr + size;
+    end = ROUND_UP(end, qemu_real_host_page_size);
+
+    mprotect((void *)start, end - start, prot);
+}
+
+static inline void map_exec(void *addr, long size)
+{
+    do_protect(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
+}
+
+static inline void map_none(void *addr, long size)
+{
+    do_protect(addr, size, PROT_NONE);
+}
+# endif /* WIN32 */
+
 static inline void *alloc_code_gen_buffer(void)
 {
     void *buf = static_code_gen_buffer;
+    size_t full_size, size;
+
+    /* The size of the buffer, rounded down to end on a page boundary.  */
+    full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer))
+                 & qemu_real_host_page_mask) - (uintptr_t)buf;
+
+    /* Reserve a guard page.  */
+    size = full_size - qemu_real_host_page_size;
+
+    /* Honor a command-line option limiting the size of the buffer.  */
+    if (size > tcg_ctx.code_gen_buffer_size) {
+        size = (((uintptr_t)buf + tcg_ctx.code_gen_buffer_size)
+                & qemu_real_host_page_mask) - (uintptr_t)buf;
+    }
+    tcg_ctx.code_gen_buffer_size = size;
+
 #ifdef __mips__
-    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
-        buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
+    if (cross_256mb(buf, size)) {
+        buf = split_cross_256mb(buf, size);
+        size = tcg_ctx.code_gen_buffer_size;
     }
 #endif
-    map_exec(buf, tcg_ctx.code_gen_buffer_size);
+
+    map_exec(buf, size);
+    map_none(buf + size, qemu_real_host_page_size);
+    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
+
     return buf;
 }
-#elif defined(USE_MMAP)
+#elif defined(_WIN32)
+static inline void *alloc_code_gen_buffer(void)
+{
+    size_t size = tcg_ctx.code_gen_buffer_size;
+    void *buf1, *buf2;
+
+    /* Perform the allocation in two steps, so that the guard page
+       is reserved but uncommitted.  */
+    buf1 = VirtualAlloc(NULL, size + qemu_real_host_page_size,
+                        MEM_RESERVE, PAGE_NOACCESS);
+    if (buf1 != NULL) {
+        buf2 = VirtualAlloc(buf1, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+        assert(buf1 == buf2);
+    }
+
+    return buf1;
+}
+#else
 static inline void *alloc_code_gen_buffer(void)
 {
     int flags = MAP_PRIVATE | MAP_ANONYMOUS;
     uintptr_t start = 0;
+    size_t size = tcg_ctx.code_gen_buffer_size;
     void *buf;
 
     /* Constrain the position of the buffer based on the host cpu.
@@ -598,86 +655,70 @@
        Leave the choice of exact location with the kernel.  */
     flags |= MAP_32BIT;
     /* Cannot expect to map more than 800MB in low memory.  */
-    if (tcg_ctx.code_gen_buffer_size > 800u * 1024 * 1024) {
-        tcg_ctx.code_gen_buffer_size = 800u * 1024 * 1024;
+    if (size > 800u * 1024 * 1024) {
+        tcg_ctx.code_gen_buffer_size = size = 800u * 1024 * 1024;
     }
 # elif defined(__sparc__)
     start = 0x40000000ul;
 # elif defined(__s390x__)
     start = 0x90000000ul;
 # elif defined(__mips__)
-    /* ??? We ought to more explicitly manage layout for softmmu too.  */
-#  ifdef CONFIG_USER_ONLY
-    start = 0x68000000ul;
-#  elif _MIPS_SIM == _ABI64
+#  if _MIPS_SIM == _ABI64
     start = 0x128000000ul;
 #  else
     start = 0x08000000ul;
 #  endif
 # endif
 
-    buf = mmap((void *)start, tcg_ctx.code_gen_buffer_size,
-               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
+    buf = mmap((void *)start, size + qemu_real_host_page_size,
+               PROT_NONE, flags, -1, 0);
     if (buf == MAP_FAILED) {
         return NULL;
     }
 
 #ifdef __mips__
-    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
+    if (cross_256mb(buf, size)) {
         /* Try again, with the original still mapped, to avoid re-acquiring
            that 256mb crossing.  This time don't specify an address.  */
-        size_t size2, size1 = tcg_ctx.code_gen_buffer_size;
-        void *buf2 = mmap(NULL, size1, PROT_WRITE | PROT_READ | PROT_EXEC,
-                          flags, -1, 0);
-        if (buf2 != MAP_FAILED) {
-            if (!cross_256mb(buf2, size1)) {
+        size_t size2;
+        void *buf2 = mmap(NULL, size + qemu_real_host_page_size,
+                          PROT_NONE, flags, -1, 0);
+        switch (buf2 != MAP_FAILED) {
+        case 1:
+            if (!cross_256mb(buf2, size)) {
                 /* Success!  Use the new buffer.  */
-                munmap(buf, size1);
-                return buf2;
+                munmap(buf, size);
+                break;
             }
             /* Failure.  Work with what we had.  */
-            munmap(buf2, size1);
+            munmap(buf2, size);
+            /* fallthru */
+        default:
+            /* Split the original buffer.  Free the smaller half.  */
+            buf2 = split_cross_256mb(buf, size);
+            size2 = tcg_ctx.code_gen_buffer_size;
+            if (buf == buf2) {
+                munmap(buf + size2 + qemu_real_host_page_size, size - size2);
+            } else {
+                munmap(buf, size - size2);
+            }
+            size = size2;
+            break;
         }
-
-        /* Split the original buffer.  Free the smaller half.  */
-        buf2 = split_cross_256mb(buf, size1);
-        size2 = tcg_ctx.code_gen_buffer_size;
-        munmap(buf + (buf == buf2 ? size2 : 0), size1 - size2);
-        return buf2;
+        buf = buf2;
     }
 #endif
 
+    /* Make the final buffer accessible.  The guard page at the end
+       will remain inaccessible with PROT_NONE.  */
+    mprotect(buf, size, PROT_WRITE | PROT_READ | PROT_EXEC);
+
+    /* Request large pages for the buffer.  */
+    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
+
     return buf;
 }
-#else
-static inline void *alloc_code_gen_buffer(void)
-{
-    void *buf = g_try_malloc(tcg_ctx.code_gen_buffer_size);
-
-    if (buf == NULL) {
-        return NULL;
-    }
-
-#ifdef __mips__
-    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
-        void *buf2 = g_malloc(tcg_ctx.code_gen_buffer_size);
-        if (buf2 != NULL && !cross_256mb(buf2, size1)) {
-            /* Success!  Use the new buffer.  */
-            free(buf);
-            buf = buf2;
-        } else {
-            /* Failure.  Work with what we had.  Since this is malloc
-               and not mmap, we can't free the other half.  */
-            free(buf2);
-            buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
-        }
-    }
-#endif
-
-    map_exec(buf, tcg_ctx.code_gen_buffer_size);
-    return buf;
-}
-#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
+#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
 
 static inline void code_gen_alloc(size_t tb_size)
 {
@@ -688,24 +729,13 @@
         exit(1);
     }
 
-    qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size,
-            QEMU_MADV_HUGEPAGE);
+    /* Estimate a good size for the number of TBs we can support.  We
+       still haven't deducted the prologue from the buffer size here,
+       but that's minimal and won't affect the estimate much.  */
+    tcg_ctx.code_gen_max_blocks
+        = tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
+    tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks);
 
-    /* Steal room for the prologue at the end of the buffer.  This ensures
-       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
-       from TB's to the prologue are going to be in range.  It also means
-       that we don't need to mark (additional) portions of the data segment
-       as executable.  */
-    tcg_ctx.code_gen_prologue = tcg_ctx.code_gen_buffer +
-            tcg_ctx.code_gen_buffer_size - 1024;
-    tcg_ctx.code_gen_buffer_size -= 1024;
-
-    tcg_ctx.code_gen_buffer_max_size = tcg_ctx.code_gen_buffer_size -
-        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
-    tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size /
-            CODE_GEN_AVG_BLOCK_SIZE;
-    tcg_ctx.tb_ctx.tbs =
-            g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
     qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
 }
 
@@ -715,10 +745,8 @@
 void tcg_exec_init(unsigned long tb_size)
 {
     cpu_gen_init();
-    code_gen_alloc(tb_size);
-    tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
-    tcg_register_jit(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size);
     page_init();
+    code_gen_alloc(tb_size);
 #if defined(CONFIG_SOFTMMU)
     /* There's no guest base to take into account, so go ahead and
        initialize the prologue now.  */
@@ -737,9 +765,7 @@
 {
     TranslationBlock *tb;
 
-    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks ||
-        (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >=
-         tcg_ctx.code_gen_buffer_max_size) {
+    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
         return NULL;
     }
     tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
@@ -1034,28 +1060,98 @@
     TranslationBlock *tb;
     tb_page_addr_t phys_pc, phys_page2;
     target_ulong virt_page2;
-    int code_gen_size;
+    tcg_insn_unit *gen_code_buf;
+    int gen_code_size, search_size;
+#ifdef CONFIG_PROFILER
+    int64_t ti;
+#endif
 
     phys_pc = get_page_addr_code(env, pc);
     if (use_icount) {
         cflags |= CF_USE_ICOUNT;
     }
+
     tb = tb_alloc(pc);
-    if (!tb) {
+    if (unlikely(!tb)) {
+ buffer_overflow:
         /* flush must be done */
         tb_flush(cpu);
         /* cannot fail at this point */
         tb = tb_alloc(pc);
+        assert(tb != NULL);
         /* Don't forget to invalidate previous TB info.  */
         tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
     }
-    tb->tc_ptr = tcg_ctx.code_gen_ptr;
+
+    gen_code_buf = tcg_ctx.code_gen_ptr;
+    tb->tc_ptr = gen_code_buf;
     tb->cs_base = cs_base;
     tb->flags = flags;
     tb->cflags = cflags;
-    cpu_gen_code(env, tb, &code_gen_size);
-    tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)tcg_ctx.code_gen_ptr +
-            code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+
+#ifdef CONFIG_PROFILER
+    tcg_ctx.tb_count1++; /* includes aborted translations because of
+                       exceptions */
+    ti = profile_getclock();
+#endif
+
+    tcg_func_start(&tcg_ctx);
+
+    gen_intermediate_code(env, tb);
+
+    trace_translate_block(tb, tb->pc, tb->tc_ptr);
+
+    /* generate machine code */
+    tb->tb_next_offset[0] = 0xffff;
+    tb->tb_next_offset[1] = 0xffff;
+    tcg_ctx.tb_next_offset = tb->tb_next_offset;
+#ifdef USE_DIRECT_JUMP
+    tcg_ctx.tb_jmp_offset = tb->tb_jmp_offset;
+    tcg_ctx.tb_next = NULL;
+#else
+    tcg_ctx.tb_jmp_offset = NULL;
+    tcg_ctx.tb_next = tb->tb_next;
+#endif
+
+#ifdef CONFIG_PROFILER
+    tcg_ctx.tb_count++;
+    tcg_ctx.interm_time += profile_getclock() - ti;
+    tcg_ctx.code_time -= profile_getclock();
+#endif
+
+    /* ??? Overflow could be handled better here.  In particular, we
+       don't need to re-do gen_intermediate_code, nor should we re-do
+       the tcg optimization currently hidden inside tcg_gen_code.  All
+       that should be required is to flush the TBs, allocate a new TB,
+       re-initialize it per above, and re-do the actual code generation.  */
+    gen_code_size = tcg_gen_code(&tcg_ctx, gen_code_buf);
+    if (unlikely(gen_code_size < 0)) {
+        goto buffer_overflow;
+    }
+    search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
+    if (unlikely(search_size < 0)) {
+        goto buffer_overflow;
+    }
+
+#ifdef CONFIG_PROFILER
+    tcg_ctx.code_time += profile_getclock();
+    tcg_ctx.code_in_len += tb->size;
+    tcg_ctx.code_out_len += gen_code_size;
+    tcg_ctx.search_out_len += search_size;
+#endif
+
+#ifdef DEBUG_DISAS
+    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
+        qemu_log("OUT: [size=%d]\n", gen_code_size);
+        log_disas(tb->tc_ptr, gen_code_size);
+        qemu_log("\n");
+        qemu_log_flush();
+    }
+#endif
+
+    tcg_ctx.code_gen_ptr = (void *)
+        ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
+                 CODE_GEN_ALIGN);
 
     /* check next page if needed */
     virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
@@ -1606,7 +1702,7 @@
     cpu_fprintf(f, "Translation buffer state:\n");
     cpu_fprintf(f, "gen code size       %td/%zd\n",
                 tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
-                tcg_ctx.code_gen_buffer_max_size);
+                tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer);
     cpu_fprintf(f, "TB count            %d/%d\n",
             tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
     cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
diff --git a/ui/Makefile.objs b/ui/Makefile.objs
index 0034fbb..728393c 100644
--- a/ui/Makefile.objs
+++ b/ui/Makefile.objs
@@ -31,11 +31,17 @@
 common-obj-y += shader.o
 common-obj-y += console-gl.o
 common-obj-y += egl-helpers.o
+common-obj-y += egl-context.o
+ifeq ($(CONFIG_GTK_GL),y)
+common-obj-$(CONFIG_GTK) += gtk-gl-area.o
+else
 common-obj-$(CONFIG_GTK) += gtk-egl.o
 endif
+endif
 
 gtk.o-cflags := $(GTK_CFLAGS) $(VTE_CFLAGS)
 gtk-egl.o-cflags := $(GTK_CFLAGS) $(VTE_CFLAGS) $(OPENGL_CFLAGS)
+gtk-gl-area.o-cflags := $(GTK_CFLAGS) $(VTE_CFLAGS) $(OPENGL_CFLAGS)
 shader.o-cflags += $(OPENGL_CFLAGS)
 console-gl.o-cflags += $(OPENGL_CFLAGS)
 egl-helpers.o-cflags += $(OPENGL_CFLAGS)
diff --git a/ui/console-gl.c b/ui/console-gl.c
index cb45cf8..baf397b 100644
--- a/ui/console-gl.c
+++ b/ui/console-gl.c
@@ -33,6 +33,7 @@
 
 struct ConsoleGLState {
     GLint texture_blit_prog;
+    GLint texture_blit_vao;
 };
 
 /* ---------------------------------------------------------------------- */
@@ -47,6 +48,9 @@
         exit(1);
     }
 
+    gls->texture_blit_vao =
+        qemu_gl_init_texture_blit(gls->texture_blit_prog);
+
     return gls;
 }
 
@@ -131,7 +135,8 @@
     glClearColor(0.1f, 0.1f, 0.1f, 0.0f);
     glClear(GL_COLOR_BUFFER_BIT);
 
-    qemu_gl_run_texture_blit(gls->texture_blit_prog);
+    qemu_gl_run_texture_blit(gls->texture_blit_prog,
+                             gls->texture_blit_vao);
 }
 
 void surface_gl_destroy_texture(ConsoleGLState *gls,
diff --git a/ui/console.c b/ui/console.c
index 75fc492..31f0d35 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -121,6 +121,7 @@
     DisplayState *ds;
     DisplaySurface *surface;
     int dcls;
+    DisplayChangeListener *gl;
 
     /* Graphic console state.  */
     Object *device;
@@ -1332,6 +1333,11 @@
     g_free(surface);
 }
 
+bool console_has_gl(QemuConsole *con)
+{
+    return con->gl != NULL;
+}
+
 void register_displaychangelistener(DisplayChangeListener *dcl)
 {
     static const char nodev[] =
@@ -1339,6 +1345,17 @@
     static DisplaySurface *dummy;
     QemuConsole *con;
 
+    if (dcl->ops->dpy_gl_ctx_create) {
+        /* display has opengl support */
+        assert(dcl->con);
+        if (dcl->con->gl) {
+            fprintf(stderr, "can't register two opengl displays (%s, %s)\n",
+                    dcl->ops->dpy_name, dcl->con->gl->ops->dpy_name);
+            exit(1);
+        }
+        dcl->con->gl = dcl;
+    }
+
     trace_displaychangelistener_register(dcl, dcl->ops->dpy_name);
     dcl->ds = get_alloc_displaystate();
     QLIST_INSERT_HEAD(&dcl->ds->listeners, dcl, next);
@@ -1417,9 +1434,13 @@
 {
     DisplayState *s = con->ds;
     DisplayChangeListener *dcl;
-    int width = surface_width(con->surface);
-    int height = surface_height(con->surface);
+    int width = w;
+    int height = h;
 
+    if (con->surface) {
+        width = surface_width(con->surface);
+        height = surface_height(con->surface);
+    }
     x = MAX(x, 0);
     y = MAX(y, 0);
     x = MIN(x, width);
@@ -1619,6 +1640,48 @@
     return false;
 }
 
+QEMUGLContext dpy_gl_ctx_create(QemuConsole *con,
+                                struct QEMUGLParams *qparams)
+{
+    assert(con->gl);
+    return con->gl->ops->dpy_gl_ctx_create(con->gl, qparams);
+}
+
+void dpy_gl_ctx_destroy(QemuConsole *con, QEMUGLContext ctx)
+{
+    assert(con->gl);
+    con->gl->ops->dpy_gl_ctx_destroy(con->gl, ctx);
+}
+
+int dpy_gl_ctx_make_current(QemuConsole *con, QEMUGLContext ctx)
+{
+    assert(con->gl);
+    return con->gl->ops->dpy_gl_ctx_make_current(con->gl, ctx);
+}
+
+QEMUGLContext dpy_gl_ctx_get_current(QemuConsole *con)
+{
+    assert(con->gl);
+    return con->gl->ops->dpy_gl_ctx_get_current(con->gl);
+}
+
+void dpy_gl_scanout(QemuConsole *con,
+                    uint32_t backing_id, bool backing_y_0_top,
+                    uint32_t x, uint32_t y, uint32_t width, uint32_t height)
+{
+    assert(con->gl);
+    con->gl->ops->dpy_gl_scanout(con->gl, backing_id,
+                                 backing_y_0_top,
+                                 x, y, width, height);
+}
+
+void dpy_gl_update(QemuConsole *con,
+                   uint32_t x, uint32_t y, uint32_t w, uint32_t h)
+{
+    assert(con->gl);
+    con->gl->ops->dpy_gl_update(con->gl, x, y, w, h);
+}
+
 /***********************************************************/
 /* register display */
 
diff --git a/ui/egl-context.c b/ui/egl-context.c
new file mode 100644
index 0000000..40102e3
--- /dev/null
+++ b/ui/egl-context.c
@@ -0,0 +1,34 @@
+#include "qemu-common.h"
+#include "ui/egl-context.h"
+
+QEMUGLContext qemu_egl_create_context(DisplayChangeListener *dcl,
+                                      QEMUGLParams *params)
+{
+   EGLContext ctx;
+   EGLint ctx_att[] = {
+      EGL_CONTEXT_CLIENT_VERSION, params->major_ver,
+      EGL_CONTEXT_MINOR_VERSION_KHR, params->minor_ver,
+      EGL_NONE
+   };
+
+   ctx = eglCreateContext(qemu_egl_display, qemu_egl_config,
+                          eglGetCurrentContext(), ctx_att);
+   return ctx;
+}
+
+void qemu_egl_destroy_context(DisplayChangeListener *dcl, QEMUGLContext ctx)
+{
+    eglDestroyContext(qemu_egl_display, ctx);
+}
+
+int qemu_egl_make_context_current(DisplayChangeListener *dcl,
+                                  QEMUGLContext ctx)
+{
+   return eglMakeCurrent(qemu_egl_display,
+                         EGL_NO_SURFACE, EGL_NO_SURFACE, ctx);
+}
+
+QEMUGLContext qemu_egl_get_current_context(DisplayChangeListener *dcl)
+{
+    return eglGetCurrentContext();
+}
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index 15b41f2..500c42c 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -21,6 +21,29 @@
 
 #include "sysemu/sysemu.h"
 
+static void gtk_egl_set_scanout_mode(VirtualConsole *vc, bool scanout)
+{
+    if (vc->gfx.scanout_mode == scanout) {
+        return;
+    }
+
+    vc->gfx.scanout_mode = scanout;
+    if (!vc->gfx.scanout_mode) {
+        if (vc->gfx.fbo_id) {
+            glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
+                                      GL_COLOR_ATTACHMENT0_EXT,
+                                      GL_TEXTURE_2D, 0, 0);
+            glBindFramebuffer(GL_FRAMEBUFFER_EXT, 0);
+            glDeleteFramebuffers(1, &vc->gfx.fbo_id);
+            vc->gfx.fbo_id = 0;
+        }
+        if (vc->gfx.surface) {
+            surface_gl_destroy_texture(vc->gfx.gls, vc->gfx.ds);
+            surface_gl_create_texture(vc->gfx.gls, vc->gfx.ds);
+        }
+    }
+}
+
 /** DisplayState Callbacks (opengl version) **/
 
 void gd_egl_init(VirtualConsole *vc)
@@ -50,19 +73,26 @@
     GdkWindow *window;
     int ww, wh;
 
-    if (!vc->gfx.gls || !vc->gfx.ds) {
+    if (!vc->gfx.gls) {
         return;
     }
 
-    eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
-                   vc->gfx.esurface, vc->gfx.ectx);
+    if (vc->gfx.scanout_mode) {
+        gd_egl_scanout_flush(&vc->gfx.dcl, 0, 0, vc->gfx.w, vc->gfx.h);
+    } else {
+        if (!vc->gfx.ds) {
+            return;
+        }
+        eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
+                       vc->gfx.esurface, vc->gfx.ectx);
 
-    window = gtk_widget_get_window(vc->gfx.drawing_area);
-    gdk_drawable_get_size(window, &ww, &wh);
-    surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, ww, wh);
-    surface_gl_render_texture(vc->gfx.gls, vc->gfx.ds);
+        window = gtk_widget_get_window(vc->gfx.drawing_area);
+        gdk_drawable_get_size(window, &ww, &wh);
+        surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, ww, wh);
+        surface_gl_render_texture(vc->gfx.gls, vc->gfx.ds);
 
-    eglSwapBuffers(qemu_egl_display, vc->gfx.esurface);
+        eglSwapBuffers(qemu_egl_display, vc->gfx.esurface);
+    }
 }
 
 void gd_egl_update(DisplayChangeListener *dcl,
@@ -99,6 +129,7 @@
 
     if (vc->gfx.glupdates) {
         vc->gfx.glupdates = 0;
+        gtk_egl_set_scanout_mode(vc, false);
         gd_egl_draw(vc);
     }
 }
@@ -128,6 +159,81 @@
     }
 }
 
+QEMUGLContext gd_egl_create_context(DisplayChangeListener *dcl,
+                                    QEMUGLParams *params)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+
+    eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
+                   vc->gfx.esurface, vc->gfx.ectx);
+    return qemu_egl_create_context(dcl, params);
+}
+
+void gd_egl_scanout(DisplayChangeListener *dcl,
+                    uint32_t backing_id, bool backing_y_0_top,
+                    uint32_t x, uint32_t y,
+                    uint32_t w, uint32_t h)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+
+    vc->gfx.x = x;
+    vc->gfx.y = y;
+    vc->gfx.w = w;
+    vc->gfx.h = h;
+    vc->gfx.tex_id = backing_id;
+    vc->gfx.y0_top = backing_y_0_top;
+
+    eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
+                   vc->gfx.esurface, vc->gfx.ectx);
+
+    if (vc->gfx.tex_id == 0 || vc->gfx.w == 0 || vc->gfx.h == 0) {
+        gtk_egl_set_scanout_mode(vc, false);
+        return;
+    }
+
+    gtk_egl_set_scanout_mode(vc, true);
+    if (!vc->gfx.fbo_id) {
+        glGenFramebuffers(1, &vc->gfx.fbo_id);
+    }
+
+    glBindFramebuffer(GL_FRAMEBUFFER_EXT, vc->gfx.fbo_id);
+    glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
+                              GL_TEXTURE_2D, vc->gfx.tex_id, 0);
+}
+
+void gd_egl_scanout_flush(DisplayChangeListener *dcl,
+                          uint32_t x, uint32_t y, uint32_t w, uint32_t h)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+    GdkWindow *window;
+    int ww, wh, y1, y2;
+
+    if (!vc->gfx.scanout_mode) {
+        return;
+    }
+    if (!vc->gfx.fbo_id) {
+        return;
+    }
+
+    eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
+                   vc->gfx.esurface, vc->gfx.ectx);
+
+    glBindFramebuffer(GL_READ_FRAMEBUFFER, vc->gfx.fbo_id);
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+
+    window = gtk_widget_get_window(vc->gfx.drawing_area);
+    gdk_drawable_get_size(window, &ww, &wh);
+    glViewport(0, 0, ww, wh);
+    y1 = vc->gfx.y0_top ? 0 : vc->gfx.h;
+    y2 = vc->gfx.y0_top ? vc->gfx.h : 0;
+    glBlitFramebuffer(0, y1, vc->gfx.w, y2,
+                      0, 0, ww, wh,
+                      GL_COLOR_BUFFER_BIT, GL_NEAREST);
+    glBindFramebuffer(GL_FRAMEBUFFER_EXT, vc->gfx.fbo_id);
+
+    eglSwapBuffers(qemu_egl_display, vc->gfx.esurface);
+}
+
 void gtk_egl_init(void)
 {
     GdkDisplay *gdk_display = gdk_display_get_default();
@@ -139,3 +245,12 @@
 
     display_opengl = 1;
 }
+
+int gd_egl_make_current(DisplayChangeListener *dcl,
+                        QEMUGLContext ctx)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+
+    return eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
+                          vc->gfx.esurface, ctx);
+}
diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c
new file mode 100644
index 0000000..dec3edb
--- /dev/null
+++ b/ui/gtk-gl-area.c
@@ -0,0 +1,223 @@
+/*
+ * GTK UI -- glarea opengl code.
+ *
+ * Requires 3.16+ (GtkGLArea widget).
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+
+#include "trace.h"
+
+#include "ui/console.h"
+#include "ui/gtk.h"
+#include "ui/egl-helpers.h"
+
+#include "sysemu/sysemu.h"
+
+static void gtk_gl_area_set_scanout_mode(VirtualConsole *vc, bool scanout)
+{
+    if (vc->gfx.scanout_mode == scanout) {
+        return;
+    }
+
+    vc->gfx.scanout_mode = scanout;
+    if (!vc->gfx.scanout_mode) {
+        if (vc->gfx.fbo_id) {
+            glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
+                                      GL_COLOR_ATTACHMENT0_EXT,
+                                      GL_TEXTURE_2D, 0, 0);
+            glBindFramebuffer(GL_FRAMEBUFFER_EXT, 0);
+            glDeleteFramebuffers(1, &vc->gfx.fbo_id);
+            vc->gfx.fbo_id = 0;
+        }
+        if (vc->gfx.surface) {
+            surface_gl_destroy_texture(vc->gfx.gls, vc->gfx.ds);
+            surface_gl_create_texture(vc->gfx.gls, vc->gfx.ds);
+        }
+    }
+}
+
+/** DisplayState Callbacks (opengl version) **/
+
+void gd_gl_area_draw(VirtualConsole *vc)
+{
+    int ww, wh, y1, y2;
+
+    if (!vc->gfx.gls) {
+        return;
+    }
+
+    gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area));
+    ww = gtk_widget_get_allocated_width(vc->gfx.drawing_area);
+    wh = gtk_widget_get_allocated_height(vc->gfx.drawing_area);
+
+    if (vc->gfx.scanout_mode) {
+        if (!vc->gfx.fbo_id) {
+            return;
+        }
+
+        glBindFramebuffer(GL_READ_FRAMEBUFFER, vc->gfx.fbo_id);
+        /* GtkGLArea sets GL_DRAW_FRAMEBUFFER for us */
+
+        glViewport(0, 0, ww, wh);
+        y1 = vc->gfx.y0_top ? 0 : vc->gfx.h;
+        y2 = vc->gfx.y0_top ? vc->gfx.h : 0;
+        glBlitFramebuffer(0, y1, vc->gfx.w, y2,
+                          0, 0, ww, wh,
+                          GL_COLOR_BUFFER_BIT, GL_NEAREST);
+    } else {
+        if (!vc->gfx.ds) {
+            return;
+        }
+        gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area));
+
+        surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, ww, wh);
+        surface_gl_render_texture(vc->gfx.gls, vc->gfx.ds);
+    }
+}
+
+void gd_gl_area_update(DisplayChangeListener *dcl,
+                   int x, int y, int w, int h)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+
+    if (!vc->gfx.gls || !vc->gfx.ds) {
+        return;
+    }
+
+    gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area));
+    surface_gl_update_texture(vc->gfx.gls, vc->gfx.ds, x, y, w, h);
+    vc->gfx.glupdates++;
+}
+
+void gd_gl_area_refresh(DisplayChangeListener *dcl)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+
+    if (!vc->gfx.gls) {
+        if (!gtk_widget_get_realized(vc->gfx.drawing_area)) {
+            return;
+        }
+        gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area));
+        vc->gfx.gls = console_gl_init_context();
+        if (vc->gfx.ds) {
+            surface_gl_create_texture(vc->gfx.gls, vc->gfx.ds);
+        }
+    }
+
+    graphic_hw_update(dcl->con);
+
+    if (vc->gfx.glupdates) {
+        vc->gfx.glupdates = 0;
+        gtk_gl_area_set_scanout_mode(vc, false);
+        gtk_gl_area_queue_render(GTK_GL_AREA(vc->gfx.drawing_area));
+    }
+}
+
+void gd_gl_area_switch(DisplayChangeListener *dcl,
+                       DisplaySurface *surface)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+    bool resized = true;
+
+    trace_gd_switch(vc->label, surface_width(surface), surface_height(surface));
+
+    if (vc->gfx.ds &&
+        surface_width(vc->gfx.ds) == surface_width(surface) &&
+        surface_height(vc->gfx.ds) == surface_height(surface)) {
+        resized = false;
+    }
+
+    if (vc->gfx.gls) {
+        gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area));
+        surface_gl_destroy_texture(vc->gfx.gls, vc->gfx.ds);
+        surface_gl_create_texture(vc->gfx.gls, surface);
+    }
+    vc->gfx.ds = surface;
+
+    if (resized) {
+        gd_update_windowsize(vc);
+    }
+}
+
+QEMUGLContext gd_gl_area_create_context(DisplayChangeListener *dcl,
+                                        QEMUGLParams *params)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+    GdkWindow *window;
+    GdkGLContext *ctx;
+    GError *err = NULL;
+
+    gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area));
+    window = gtk_widget_get_window(vc->gfx.drawing_area);
+    ctx = gdk_window_create_gl_context(window, &err);
+    gdk_gl_context_set_required_version(ctx,
+                                        params->major_ver,
+                                        params->minor_ver);
+    gdk_gl_context_realize(ctx, &err);
+    return ctx;
+}
+
+void gd_gl_area_destroy_context(DisplayChangeListener *dcl, QEMUGLContext ctx)
+{
+    /* FIXME */
+}
+
+void gd_gl_area_scanout(DisplayChangeListener *dcl,
+                        uint32_t backing_id, bool backing_y_0_top,
+                        uint32_t x, uint32_t y,
+                        uint32_t w, uint32_t h)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+
+    vc->gfx.x = x;
+    vc->gfx.y = y;
+    vc->gfx.w = w;
+    vc->gfx.h = h;
+    vc->gfx.tex_id = backing_id;
+    vc->gfx.y0_top = backing_y_0_top;
+
+    gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area));
+
+    if (vc->gfx.tex_id == 0 || vc->gfx.w == 0 || vc->gfx.h == 0) {
+        gtk_gl_area_set_scanout_mode(vc, false);
+        return;
+    }
+
+    gtk_gl_area_set_scanout_mode(vc, true);
+    if (!vc->gfx.fbo_id) {
+        glGenFramebuffers(1, &vc->gfx.fbo_id);
+    }
+
+    glBindFramebuffer(GL_FRAMEBUFFER_EXT, vc->gfx.fbo_id);
+    glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
+                              GL_TEXTURE_2D, vc->gfx.tex_id, 0);
+}
+
+void gd_gl_area_scanout_flush(DisplayChangeListener *dcl,
+                          uint32_t x, uint32_t y, uint32_t w, uint32_t h)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+
+    gtk_gl_area_queue_render(GTK_GL_AREA(vc->gfx.drawing_area));
+}
+
+void gtk_gl_area_init(void)
+{
+    display_opengl = 1;
+}
+
+QEMUGLContext gd_gl_area_get_current_context(DisplayChangeListener *dcl)
+{
+    return gdk_gl_context_get_current();
+}
+
+int gd_gl_area_make_current(DisplayChangeListener *dcl,
+                            QEMUGLContext ctx)
+{
+    gdk_gl_context_make_current(ctx);
+    return 0;
+}
diff --git a/ui/gtk.c b/ui/gtk.c
index 3057cdc..2947838 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -367,6 +367,12 @@
     GtkWidget *area = vc->gfx.drawing_area;
     int ww, wh;
     gdk_drawable_get_size(gtk_widget_get_window(area), &ww, &wh);
+#if defined(CONFIG_GTK_GL)
+    if (vc->gfx.gls) {
+        gtk_gl_area_queue_render(GTK_GL_AREA(vc->gfx.drawing_area));
+        return;
+    }
+#endif
     gtk_widget_queue_draw_area(area, 0, 0, ww, wh);
 }
 
@@ -607,6 +613,27 @@
 
 /** DisplayState Callbacks (opengl version) **/
 
+#if defined(CONFIG_GTK_GL)
+
+static const DisplayChangeListenerOps dcl_gl_area_ops = {
+    .dpy_name             = "gtk-egl",
+    .dpy_gfx_update       = gd_gl_area_update,
+    .dpy_gfx_switch       = gd_gl_area_switch,
+    .dpy_gfx_check_format = console_gl_check_format,
+    .dpy_refresh          = gd_gl_area_refresh,
+    .dpy_mouse_set        = gd_mouse_set,
+    .dpy_cursor_define    = gd_cursor_define,
+
+    .dpy_gl_ctx_create       = gd_gl_area_create_context,
+    .dpy_gl_ctx_destroy      = gd_gl_area_destroy_context,
+    .dpy_gl_ctx_make_current = gd_gl_area_make_current,
+    .dpy_gl_ctx_get_current  = gd_gl_area_get_current_context,
+    .dpy_gl_scanout          = gd_gl_area_scanout,
+    .dpy_gl_update           = gd_gl_area_scanout_flush,
+};
+
+#else
+
 static const DisplayChangeListenerOps dcl_egl_ops = {
     .dpy_name             = "gtk-egl",
     .dpy_gfx_update       = gd_egl_update,
@@ -615,9 +642,17 @@
     .dpy_refresh          = gd_egl_refresh,
     .dpy_mouse_set        = gd_mouse_set,
     .dpy_cursor_define    = gd_cursor_define,
+
+    .dpy_gl_ctx_create       = gd_egl_create_context,
+    .dpy_gl_ctx_destroy      = qemu_egl_destroy_context,
+    .dpy_gl_ctx_make_current = gd_egl_make_current,
+    .dpy_gl_ctx_get_current  = qemu_egl_get_current_context,
+    .dpy_gl_scanout          = gd_egl_scanout,
+    .dpy_gl_update           = gd_egl_scanout_flush,
 };
 
-#endif
+#endif /* CONFIG_GTK_GL */
+#endif /* CONFIG_OPENGL */
 
 /** QEMU Events **/
 
@@ -667,6 +702,39 @@
     return TRUE;
 }
 
+static void gd_set_ui_info(VirtualConsole *vc, gint width, gint height)
+{
+    QemuUIInfo info;
+
+    memset(&info, 0, sizeof(info));
+    info.width = width;
+    info.height = height;
+    dpy_set_ui_info(vc->gfx.dcl.con, &info);
+}
+
+#if defined(CONFIG_GTK_GL)
+
+static gboolean gd_render_event(GtkGLArea *area, GdkGLContext *context,
+                                void *opaque)
+{
+    VirtualConsole *vc = opaque;
+
+    if (vc->gfx.gls) {
+        gd_gl_area_draw(vc);
+    }
+    return TRUE;
+}
+
+static void gd_resize_event(GtkGLArea *area,
+                            gint width, gint height, gpointer *opaque)
+{
+    VirtualConsole *vc = (void *)opaque;
+
+    gd_set_ui_info(vc, width, height);
+}
+
+#endif
+
 static gboolean gd_draw_event(GtkWidget *widget, cairo_t *cr, void *opaque)
 {
     VirtualConsole *vc = opaque;
@@ -677,8 +745,13 @@
 
 #if defined(CONFIG_OPENGL)
     if (vc->gfx.gls) {
+#if defined(CONFIG_GTK_GL)
+        /* invoke render callback please */
+        return FALSE;
+#else
         gd_egl_draw(vc);
         return TRUE;
+#endif
     }
 #endif
 
@@ -1466,12 +1539,8 @@
                              GdkEventConfigure *cfg, gpointer opaque)
 {
     VirtualConsole *vc = opaque;
-    QemuUIInfo info;
 
-    memset(&info, 0, sizeof(info));
-    info.width = cfg->width;
-    info.height = cfg->height;
-    dpy_set_ui_info(vc->gfx.dcl.con, &info);
+    gd_set_ui_info(vc, cfg->width, cfg->height);
     return FALSE;
 }
 
@@ -1628,6 +1697,15 @@
 #if GTK_CHECK_VERSION(3, 0, 0)
     g_signal_connect(vc->gfx.drawing_area, "draw",
                      G_CALLBACK(gd_draw_event), vc);
+#if defined(CONFIG_GTK_GL)
+    if (display_opengl) {
+        /* wire up GtkGlArea events */
+        g_signal_connect(vc->gfx.drawing_area, "render",
+                         G_CALLBACK(gd_render_event), vc);
+        g_signal_connect(vc->gfx.drawing_area, "resize",
+                         G_CALLBACK(gd_resize_event), vc);
+    }
+#endif
 #else
     g_signal_connect(vc->gfx.drawing_area, "expose-event",
                      G_CALLBACK(gd_expose_event), vc);
@@ -1736,7 +1814,37 @@
     vc->gfx.scale_x = 1.0;
     vc->gfx.scale_y = 1.0;
 
-    vc->gfx.drawing_area = gtk_drawing_area_new();
+#if defined(CONFIG_OPENGL)
+    if (display_opengl) {
+#if defined(CONFIG_GTK_GL)
+        vc->gfx.drawing_area = gtk_gl_area_new();
+        vc->gfx.dcl.ops = &dcl_gl_area_ops;
+#else
+        vc->gfx.drawing_area = gtk_drawing_area_new();
+        /*
+         * gtk_widget_set_double_buffered() was deprecated in 3.14.
+         * It is required for opengl rendering on X11 though.  A
+         * proper replacement (native opengl support) is only
+         * available in 3.16+.  Silence the warning if possible.
+         */
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
+        gtk_widget_set_double_buffered(vc->gfx.drawing_area, FALSE);
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+#pragma GCC diagnostic pop
+#endif
+        vc->gfx.dcl.ops = &dcl_egl_ops;
+#endif /* CONFIG_GTK_GL */
+    } else
+#endif
+    {
+        vc->gfx.drawing_area = gtk_drawing_area_new();
+        vc->gfx.dcl.ops = &dcl_ops;
+    }
+
+
     gtk_widget_add_events(vc->gfx.drawing_area,
                           GDK_POINTER_MOTION_MASK |
                           GDK_BUTTON_PRESS_MASK |
@@ -1754,29 +1862,6 @@
     gtk_notebook_append_page(GTK_NOTEBOOK(s->notebook),
                              vc->tab_item, gtk_label_new(vc->label));
 
-#if defined(CONFIG_OPENGL)
-    if (display_opengl) {
-        /*
-         * gtk_widget_set_double_buffered() was deprecated in 3.14.
-         * It is required for opengl rendering on X11 though.  A
-         * proper replacement (native opengl support) is only
-         * available in 3.16+.  Silence the warning if possible.
-         */
-#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
-        gtk_widget_set_double_buffered(vc->gfx.drawing_area, FALSE);
-#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
-#pragma GCC diagnostic pop
-#endif
-        vc->gfx.dcl.ops = &dcl_egl_ops;
-    } else
-#endif
-    {
-        vc->gfx.dcl.ops = &dcl_ops;
-    }
-
     vc->gfx.dcl.con = con;
     register_displaychangelistener(&vc->gfx.dcl);
 
@@ -2059,8 +2144,12 @@
         break;
     case 1: /* on */
 #if defined(CONFIG_OPENGL)
+#if defined(CONFIG_GTK_GL)
+        gtk_gl_area_init();
+#else
         gtk_egl_init();
 #endif
+#endif
         break;
     default:
         g_assert_not_reached();
diff --git a/ui/sdl2-2d.c b/ui/sdl2-2d.c
index d0b340f..191ee3b 100644
--- a/ui/sdl2-2d.c
+++ b/ui/sdl2-2d.c
@@ -45,10 +45,23 @@
         return;
     }
 
+    /*
+     * SDL2 seems to do some double-buffering, and trying to only
+     * update the changed areas results in only one of the two buffers
+     * being updated.  Which flickers alot.  So lets not try to be
+     * clever do a full update every time ...
+     */
+#if 0
     rect.x = x;
     rect.y = y;
     rect.w = w;
     rect.h = h;
+#else
+    rect.x = 0;
+    rect.y = 0;
+    rect.w = surface_width(surf);
+    rect.h = surface_height(surf);
+#endif
 
     SDL_UpdateTexture(scon->texture, NULL, surface_data(surf),
                       surface_stride(surf));
diff --git a/ui/shader.c b/ui/shader.c
index 52a4632..0588655 100644
--- a/ui/shader.c
+++ b/ui/shader.c
@@ -29,21 +29,42 @@
 
 /* ---------------------------------------------------------------------- */
 
-void qemu_gl_run_texture_blit(GLint texture_blit_prog)
+GLuint qemu_gl_init_texture_blit(GLint texture_blit_prog)
 {
-    GLfloat in_position[] = {
+    static const GLfloat in_position[] = {
         -1, -1,
         1,  -1,
         -1,  1,
         1,   1,
     };
     GLint l_position;
+    GLuint vao, buffer;
 
-    glUseProgram(texture_blit_prog);
+    glGenVertexArrays(1, &vao);
+    glBindVertexArray(vao);
+
+    /* this is the VBO that holds the vertex data */
+    glGenBuffers(1, &buffer);
+    glBindBuffer(GL_ARRAY_BUFFER, buffer);
+    glBufferData(GL_ARRAY_BUFFER, sizeof(in_position), in_position,
+                 GL_STATIC_DRAW);
+
     l_position = glGetAttribLocation(texture_blit_prog, "in_position");
-    glVertexAttribPointer(l_position, 2, GL_FLOAT, GL_FALSE, 0, in_position);
+    glVertexAttribPointer(l_position, 2, GL_FLOAT, GL_FALSE, 0, 0);
     glEnableVertexAttribArray(l_position);
-    glDrawArrays(GL_TRIANGLE_STRIP, l_position, 4);
+
+    glBindBuffer(GL_ARRAY_BUFFER, 0);
+    glBindVertexArray(0);
+
+    return vao;
+}
+
+void qemu_gl_run_texture_blit(GLint texture_blit_prog,
+                              GLint texture_blit_vao)
+{
+    glUseProgram(texture_blit_prog);
+    glBindVertexArray(texture_blit_vao);
+    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/vl.c b/vl.c
index 6fbfd4d..7c806a2 100644
--- a/vl.c
+++ b/vl.c
@@ -2703,26 +2703,6 @@
     return popt;
 }
 
-static gpointer malloc_and_trace(gsize n_bytes)
-{
-    void *ptr = malloc(n_bytes);
-    trace_g_malloc(n_bytes, ptr);
-    return ptr;
-}
-
-static gpointer realloc_and_trace(gpointer mem, gsize n_bytes)
-{
-    void *ptr = realloc(mem, n_bytes);
-    trace_g_realloc(mem, n_bytes, ptr);
-    return ptr;
-}
-
-static void free_and_trace(gpointer mem)
-{
-    trace_g_free(mem);
-    free(mem);
-}
-
 static int machine_set_property(void *opaque,
                                 const char *name, const char *value,
                                 Error **errp)
@@ -2955,11 +2935,6 @@
     bool userconfig = true;
     const char *log_mask = NULL;
     const char *log_file = NULL;
-    GMemVTable mem_trace = {
-        .malloc = malloc_and_trace,
-        .realloc = realloc_and_trace,
-        .free = free_and_trace,
-    };
     const char *trace_events = NULL;
     const char *trace_file = NULL;
     ram_addr_t maxram_size;
@@ -2975,8 +2950,6 @@
     error_set_progname(argv[0]);
     qemu_init_exec_dir(argv[0]);
 
-    g_mem_set_vtable(&mem_trace);
-
     module_call_init(MODULE_INIT_QOM);
 
     qemu_add_opts(&qemu_drive_opts);