Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

acpi,pc,test bug fixes

More small fixes: the issues annoy developers so
I thought they are worth fixing quickly.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Tue 11 Mar 2014 11:27:44 GMT using RSA key ID D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  acpi-test: update expected SSDT files
  acpi-build: don't access unaligned addresses
  q35: Correct typo BRDIGE -> BRIDGE
  configure: don't modify .status on error
  pc: avoid duplicate names for ROM MRs
  loader: rename in_ram/has_mr

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index 62e7683..7d17f83 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -175,9 +175,12 @@
 F: target-ppc/kvm.c
 
 S390
+M: Christian Borntraeger <borntraeger@de.ibm.com>
+M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: target-s390x/kvm.c
+F: hw/intc/s390_flic.[hc]
 
 X86
 M: Marcelo Tosatti <mtosatti@redhat.com>
@@ -493,10 +496,13 @@
 
 S390 Virtio-ccw
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
+M: Christian Borntraeger <borntraeger@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Supported
 F: hw/s390x/s390-virtio-ccw.c
 F: hw/s390x/css.[hc]
+F: hw/s390x/sclp*.[hc]
+F: hw/s390x/ipl*.[hc]
 T: git git://github.com/cohuck/qemu virtio-ccw-upstr
 
 UniCore32 Machines
@@ -627,6 +633,7 @@
 
 virtio-ccw
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
+M: Christian Borntraeger <borntraeger@de.ibm.com>
 S: Supported
 F: hw/s390x/virtio-ccw.[hc]
 T: git git://github.com/cohuck/qemu virtio-ccw-upstr
diff --git a/arch_init.c b/arch_init.c
index fe17279..60c975d 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -164,8 +164,9 @@
     uint8_t *encoded_buf;
     /* buffer for storing page content */
     uint8_t *current_buf;
-    /* Cache for XBZRLE */
+    /* Cache for XBZRLE, Protected by lock. */
     PageCache *cache;
+    QemuMutex lock;
 } XBZRLE = {
     .encoded_buf = NULL,
     .current_buf = NULL,
@@ -174,16 +175,52 @@
 /* buffer used for XBZRLE decoding */
 static uint8_t *xbzrle_decoded_buf;
 
+static void XBZRLE_cache_lock(void)
+{
+    if (migrate_use_xbzrle())
+        qemu_mutex_lock(&XBZRLE.lock);
+}
+
+static void XBZRLE_cache_unlock(void)
+{
+    if (migrate_use_xbzrle())
+        qemu_mutex_unlock(&XBZRLE.lock);
+}
+
 int64_t xbzrle_cache_resize(int64_t new_size)
 {
+    PageCache *new_cache, *cache_to_free;
+
     if (new_size < TARGET_PAGE_SIZE) {
         return -1;
     }
 
+    /* no need to lock, the current thread holds qemu big lock */
     if (XBZRLE.cache != NULL) {
-        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
-            TARGET_PAGE_SIZE;
+        /* check XBZRLE.cache again later */
+        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
+            return pow2floor(new_size);
+        }
+        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
+                                        TARGET_PAGE_SIZE);
+        if (!new_cache) {
+            DPRINTF("Error creating cache\n");
+            return -1;
+        }
+
+        XBZRLE_cache_lock();
+        /* the XBZRLE.cache may have be destroyed, check it again */
+        if (XBZRLE.cache != NULL) {
+            cache_to_free = XBZRLE.cache;
+            XBZRLE.cache = new_cache;
+        } else {
+            cache_to_free = new_cache;
+        }
+        XBZRLE_cache_unlock();
+
+        cache_fini(cache_to_free);
     }
+
     return pow2floor(new_size);
 }
 
@@ -539,6 +576,8 @@
             ret = ram_control_save_page(f, block->offset,
                                offset, TARGET_PAGE_SIZE, &bytes_sent);
 
+            XBZRLE_cache_lock();
+
             current_addr = block->offset + offset;
             if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
                 if (ret != RAM_SAVE_CONTROL_DELAYED) {
@@ -587,6 +626,7 @@
                 acct_info.norm_pages++;
             }
 
+            XBZRLE_cache_unlock();
             /* if page is unmodified, continue to the next */
             if (bytes_sent > 0) {
                 last_sent_block = block;
@@ -654,6 +694,7 @@
         migration_bitmap = NULL;
     }
 
+    XBZRLE_cache_lock();
     if (XBZRLE.cache) {
         cache_fini(XBZRLE.cache);
         g_free(XBZRLE.cache);
@@ -663,6 +704,7 @@
         XBZRLE.encoded_buf = NULL;
         XBZRLE.current_buf = NULL;
     }
+    XBZRLE_cache_unlock();
 }
 
 static void ram_migration_cancel(void *opaque)
@@ -693,13 +735,17 @@
     dirty_rate_high_cnt = 0;
 
     if (migrate_use_xbzrle()) {
+        qemu_mutex_lock_iothread();
         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
                                   TARGET_PAGE_SIZE,
                                   TARGET_PAGE_SIZE);
         if (!XBZRLE.cache) {
+            qemu_mutex_unlock_iothread();
             DPRINTF("Error creating cache\n");
             return -1;
         }
+        qemu_mutex_init(&XBZRLE.lock);
+        qemu_mutex_unlock_iothread();
 
         /* We prefer not to abort if there is no memory */
         XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
diff --git a/configure b/configure
index 3ae57d7..8c2838e 100755
--- a/configure
+++ b/configure
@@ -4094,7 +4094,11 @@
 echo "vhost-scsi support $vhost_scsi"
 echo "Trace backend     $trace_backend"
 echo "Trace output file $trace_file-<pid>"
+if test "$spice" = "yes"; then
 echo "spice support     $spice ($spice_protocol_version/$spice_server_version)"
+else
+echo "spice support     $spice"
+fi
 echo "rbd support       $rbd"
 echo "xfsctl support    $xfs"
 echo "nss used          $smartcard_nss"
@@ -4955,6 +4959,12 @@
     echo "CONFIG_ALPHA_DIS=y"  >> $config_target_mak
     echo "CONFIG_ALPHA_DIS=y"  >> config-all-disas.mak
   ;;
+  aarch64)
+    if test -n "${cxx}"; then
+      echo "CONFIG_ARM_A64_DIS=y"  >> $config_target_mak
+      echo "CONFIG_ARM_A64_DIS=y"  >> config-all-disas.mak
+    fi
+  ;;
   arm)
     echo "CONFIG_ARM_DIS=y"  >> $config_target_mak
     echo "CONFIG_ARM_DIS=y"  >> config-all-disas.mak
diff --git a/disas/libvixl/a64/disasm-a64.cc b/disas/libvixl/a64/disasm-a64.cc
index 5c6b898..5f172da 100644
--- a/disas/libvixl/a64/disasm-a64.cc
+++ b/disas/libvixl/a64/disasm-a64.cc
@@ -1342,7 +1342,7 @@
         ASSERT(format[5] == 'L');
         AppendToOutput("#0x%" PRIx64, instr->ImmMoveWide());
         if (instr->ShiftMoveWide() > 0) {
-          AppendToOutput(", lsl #%d", 16 * instr->ShiftMoveWide());
+          AppendToOutput(", lsl #%" PRId64, 16 * instr->ShiftMoveWide());
         }
       }
       return 8;
@@ -1391,7 +1391,7 @@
     }
     case 'F': {  // IFPSingle, IFPDouble or IFPFBits.
       if (format[3] == 'F') {  // IFPFbits.
-        AppendToOutput("#%d", 64 - instr->FPScale());
+        AppendToOutput("#%" PRId64, 64 - instr->FPScale());
         return 8;
       } else {
         AppendToOutput("#0x%" PRIx64 " (%.4f)", instr->ImmFP(),
@@ -1412,23 +1412,23 @@
       return 5;
     }
     case 'P': {  // IP - Conditional compare.
-      AppendToOutput("#%d", instr->ImmCondCmp());
+      AppendToOutput("#%" PRId64, instr->ImmCondCmp());
       return 2;
     }
     case 'B': {  // Bitfields.
       return SubstituteBitfieldImmediateField(instr, format);
     }
     case 'E': {  // IExtract.
-      AppendToOutput("#%d", instr->ImmS());
+      AppendToOutput("#%" PRId64, instr->ImmS());
       return 8;
     }
     case 'S': {  // IS - Test and branch bit.
-      AppendToOutput("#%d", (instr->ImmTestBranchBit5() << 5) |
-                            instr->ImmTestBranchBit40());
+      AppendToOutput("#%" PRId64, (instr->ImmTestBranchBit5() << 5) |
+                                  instr->ImmTestBranchBit40());
       return 2;
     }
     case 'D': {  // IDebug - HLT and BRK instructions.
-      AppendToOutput("#0x%x", instr->ImmException());
+      AppendToOutput("#0x%" PRIx64, instr->ImmException());
       return 6;
     }
     default: {
@@ -1598,12 +1598,12 @@
       (((instr->ExtendMode() == UXTW) && (instr->SixtyFourBits() == 0)) ||
        (instr->ExtendMode() == UXTX))) {
     if (instr->ImmExtendShift() > 0) {
-      AppendToOutput(", lsl #%d", instr->ImmExtendShift());
+      AppendToOutput(", lsl #%" PRId64, instr->ImmExtendShift());
     }
   } else {
     AppendToOutput(", %s", extend_mode[instr->ExtendMode()]);
     if (instr->ImmExtendShift() > 0) {
-      AppendToOutput(" #%d", instr->ImmExtendShift());
+      AppendToOutput(" #%" PRId64, instr->ImmExtendShift());
     }
   }
   return 3;
@@ -1632,7 +1632,7 @@
   if (!((ext == UXTX) && (shift == 0))) {
     AppendToOutput(", %s", extend_mode[ext]);
     if (shift != 0) {
-      AppendToOutput(" #%d", instr->SizeLS());
+      AppendToOutput(" #%" PRId64, instr->SizeLS());
     }
   }
   return 9;
diff --git a/exec.c b/exec.c
index 680268a..31ed375 100644
--- a/exec.c
+++ b/exec.c
@@ -1029,7 +1029,7 @@
 
     hpagesize = gethugepagesize(path);
     if (!hpagesize) {
-        return NULL;
+        goto error;
     }
 
     if (memory < hpagesize) {
@@ -1038,7 +1038,7 @@
 
     if (kvm_enabled() && !kvm_has_sync_mmu()) {
         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
-        return NULL;
+        goto error;
     }
 
     /* Make name safe to use with mkstemp by replacing '/' with '_'. */
@@ -1056,7 +1056,7 @@
     if (fd < 0) {
         perror("unable to create backing store for hugepages");
         g_free(filename);
-        return NULL;
+        goto error;
     }
     unlink(filename);
     g_free(filename);
@@ -1076,7 +1076,7 @@
     if (area == MAP_FAILED) {
         perror("file_ram_alloc: can't mmap RAM pages");
         close(fd);
-        return (NULL);
+        goto error;
     }
 
     if (mem_prealloc) {
@@ -1120,6 +1120,12 @@
 
     block->fd = fd;
     return area;
+
+error:
+    if (mem_prealloc) {
+        exit(1);
+    }
+    return NULL;
 }
 #else
 static void *file_ram_alloc(RAMBlock *block,
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
index d10b5db..de54201 100644
--- a/hw/arm/musicpal.c
+++ b/hw/arm/musicpal.c
@@ -110,10 +110,10 @@
 #define MP_PHY_88E3015          0x01410E20
 
 /* TX descriptor status */
-#define MP_ETH_TX_OWN           (1 << 31)
+#define MP_ETH_TX_OWN           (1U << 31)
 
 /* RX descriptor status */
-#define MP_ETH_RX_OWN           (1 << 31)
+#define MP_ETH_RX_OWN           (1U << 31)
 
 /* Interrupt cause/mask bits */
 #define MP_ETH_IRQ_RX_BIT       0
diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c
index 47511d2..b433748 100644
--- a/hw/arm/omap1.c
+++ b/hw/arm/omap1.c
@@ -809,22 +809,26 @@
                 uint32_t diff, uint32_t value)
 {
     if (s->compat1509) {
-        if (diff & (1 << 31))			/* MCBSP3_CLK_HIZ_DI */
-            omap_clk_onoff(omap_findclk(s, "mcbsp3.clkx"),
-                            (value >> 31) & 1);
-        if (diff & (1 << 1))			/* CLK32K */
-            omap_clk_onoff(omap_findclk(s, "clk32k_out"),
-                            (~value >> 1) & 1);
+        if (diff & (1U << 31)) {
+            /* MCBSP3_CLK_HIZ_DI */
+            omap_clk_onoff(omap_findclk(s, "mcbsp3.clkx"), (value >> 31) & 1);
+        }
+        if (diff & (1 << 1)) {
+            /* CLK32K */
+            omap_clk_onoff(omap_findclk(s, "clk32k_out"), (~value >> 1) & 1);
+        }
     }
 }
 
 static inline void omap_pin_modconf1_update(struct omap_mpu_state_s *s,
                 uint32_t diff, uint32_t value)
 {
-    if (diff & (1 << 31))			/* CONF_MOD_UART3_CLK_MODE_R */
-         omap_clk_reparent(omap_findclk(s, "uart3_ck"),
-                         omap_findclk(s, ((value >> 31) & 1) ?
-                                 "ck_48m" : "armper_ck"));
+    if (diff & (1U << 31)) {
+        /* CONF_MOD_UART3_CLK_MODE_R */
+        omap_clk_reparent(omap_findclk(s, "uart3_ck"),
+                          omap_findclk(s, ((value >> 31) & 1) ?
+                                       "ck_48m" : "armper_ck"));
+    }
     if (diff & (1 << 30))			/* CONF_MOD_UART2_CLK_MODE_R */
          omap_clk_reparent(omap_findclk(s, "uart2_ck"),
                          omap_findclk(s, ((value >> 30) & 1) ?
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index 904277a..0429148 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -259,7 +259,7 @@
 
     case 1:
         /* Idle */
-        if (!(s->cm_regs[CCCR >> 2] & (1 << 31))) { /* CPDIS */
+        if (!(s->cm_regs[CCCR >> 2] & (1U << 31))) { /* CPDIS */
             cpu_interrupt(CPU(s->cpu), CPU_INTERRUPT_HALT);
             break;
         }
@@ -496,7 +496,7 @@
 #define SSCR0_SSE	(1 << 7)
 #define SSCR0_RIM	(1 << 22)
 #define SSCR0_TIM	(1 << 23)
-#define SSCR0_MOD	(1 << 31)
+#define SSCR0_MOD       (1U << 31)
 #define SSCR0_DSS(x)	(((((x) >> 16) & 0x10) | ((x) & 0xf)) + 1)
 #define SSCR1_RIE	(1 << 0)
 #define SSCR1_TIE	(1 << 1)
@@ -1006,7 +1006,7 @@
 
     switch (addr) {
     case RTTR:
-        if (!(s->rttr & (1 << 31))) {
+        if (!(s->rttr & (1U << 31))) {
             pxa2xx_rtc_hzupdate(s);
             s->rttr = value;
             pxa2xx_rtc_alarm_update(s, s->rtsr);
diff --git a/hw/arm/pxa2xx_gpio.c b/hw/arm/pxa2xx_gpio.c
index ca77f56..0727428 100644
--- a/hw/arm/pxa2xx_gpio.c
+++ b/hw/arm/pxa2xx_gpio.c
@@ -110,7 +110,7 @@
     }
 
     bank = line >> 5;
-    mask = 1 << (line & 31);
+    mask = 1U << (line & 31);
 
     if (level) {
         s->status[bank] |= s->rising[bank] & mask &
diff --git a/hw/arm/pxa2xx_pic.c b/hw/arm/pxa2xx_pic.c
index 345fa4a..d37fb54 100644
--- a/hw/arm/pxa2xx_pic.c
+++ b/hw/arm/pxa2xx_pic.c
@@ -105,7 +105,7 @@
 
     for (i = PXA2XX_PIC_SRCS - 1; i >= 0; i --) {
         irq = s->priority[i] & 0x3f;
-        if ((s->priority[i] & (1 << 31)) && irq < PXA2XX_PIC_SRCS) {
+        if ((s->priority[i] & (1U << 31)) && irq < PXA2XX_PIC_SRCS) {
             /* Source peripheral ID is valid.  */
             bit = 1 << (irq & 31);
             int_set = (irq >= 32);
@@ -119,7 +119,7 @@
             if (mask[int_set] & bit & ~s->is_fiq[int_set]) {
                 /* IRQ asserted */
                 ichp &= 0x0000ffff;
-                ichp |= (1 << 31) | (irq << 16);
+                ichp |= (1U << 31) | (irq << 16);
             }
         }
     }
diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c
index 6a28746..8977243 100644
--- a/hw/ssi/xilinx_spips.c
+++ b/hw/ssi/xilinx_spips.c
@@ -43,7 +43,7 @@
 
 /* config register */
 #define R_CONFIG            (0x00 / 4)
-#define IFMODE              (1 << 31)
+#define IFMODE              (1U << 31)
 #define ENDIAN              (1 << 26)
 #define MODEFAIL_GEN_EN     (1 << 17)
 #define MAN_START_COM       (1 << 16)
@@ -87,7 +87,7 @@
 
 #define R_LQSPI_CFG         (0xa0 / 4)
 #define R_LQSPI_CFG_RESET       0x03A002EB
-#define LQSPI_CFG_LQ_MODE       (1 << 31)
+#define LQSPI_CFG_LQ_MODE       (1U << 31)
 #define LQSPI_CFG_TWO_MEM       (1 << 30)
 #define LQSPI_CFG_SEP_BUS       (1 << 30)
 #define LQSPI_CFG_U_PAGE        (1 << 28)
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index 01cd8c7..66a3d46 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -59,6 +59,7 @@
 #define EXCP_HLT        0x10001 /* hlt instruction reached */
 #define EXCP_DEBUG      0x10002 /* cpu stopped after a breakpoint or singlestep */
 #define EXCP_HALTED     0x10003 /* cpu is halted (waiting for external event) */
+#define EXCP_YIELD      0x10004 /* cpu wants to yield timeslice to another */
 
 #define TB_JMP_CACHE_BITS 12
 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
diff --git a/kvm-all.c b/kvm-all.c
index 87fe482..82a9119 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1423,11 +1423,10 @@
                     nc->name, nc->num, soft_vcpus_limit);
 
             if (nc->num > hard_vcpus_limit) {
-                ret = -EINVAL;
                 fprintf(stderr, "Number of %s cpus requested (%d) exceeds "
                         "the maximum cpus supported by KVM (%d)\n",
                         nc->name, nc->num, hard_vcpus_limit);
-                goto err;
+                exit(1);
             }
         }
         nc++;
diff --git a/linux-user/alpha/syscall.h b/linux-user/alpha/syscall.h
index 15a0100..ed13d9a 100644
--- a/linux-user/alpha/syscall.h
+++ b/linux-user/alpha/syscall.h
@@ -39,6 +39,7 @@
 };
 
 #define UNAME_MACHINE "alpha"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #undef TARGET_EDEADLK
 #define TARGET_EDEADLK		11
diff --git a/linux-user/arm/syscall.h b/linux-user/arm/syscall.h
index 73f2931..ce2c2a8 100644
--- a/linux-user/arm/syscall.h
+++ b/linux-user/arm/syscall.h
@@ -40,5 +40,6 @@
 #else
 #define UNAME_MACHINE "armv5tel"
 #endif
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS
diff --git a/linux-user/cris/syscall.h b/linux-user/cris/syscall.h
index 832ee64..f5783c0 100644
--- a/linux-user/cris/syscall.h
+++ b/linux-user/cris/syscall.h
@@ -1,8 +1,8 @@
 #ifndef CRIS_SYSCALL_H
 #define CRIS_SYSCALL_H 1
 
-
 #define UNAME_MACHINE "cris"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 /* pt_regs not only specifices the format in the user-struct during
  * ptrace but is also the frame format used in the kernel prologue/epilogues
diff --git a/linux-user/i386/syscall.h b/linux-user/i386/syscall.h
index 12b8c3b..9bfc1ad 100644
--- a/linux-user/i386/syscall.h
+++ b/linux-user/i386/syscall.h
@@ -144,5 +144,6 @@
 };
 
 #define UNAME_MACHINE "i686"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS
diff --git a/linux-user/m68k/syscall.h b/linux-user/m68k/syscall.h
index 2618793..889eaf7 100644
--- a/linux-user/m68k/syscall.h
+++ b/linux-user/m68k/syscall.h
@@ -15,7 +15,7 @@
     uint16_t __fill;
 };
 
-
 #define UNAME_MACHINE "m68k"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 void do_m68k_simcall(CPUM68KState *, int);
diff --git a/linux-user/main.c b/linux-user/main.c
index be9491b..dee1084 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2400,6 +2400,10 @@
         ret = 0;
         break;
     default:
+        info->si_signo = TARGET_SIGTRAP;
+        info->si_errno = 0;
+        queue_signal(env, info->si_signo, &*info);
+        ret = 0;
         break;
     }
 
diff --git a/linux-user/microblaze/syscall.h b/linux-user/microblaze/syscall.h
index d550989..5b5f6b4 100644
--- a/linux-user/microblaze/syscall.h
+++ b/linux-user/microblaze/syscall.h
@@ -1,8 +1,8 @@
 #ifndef MICROBLAZE_SYSCALLS_H
 #define MICROBLAZE_SYSCALLS_H 1
 
-
 #define UNAME_MACHINE "microblaze"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 /* We use microblaze_reg_t to keep things similar to the kernel sources.  */
 typedef uint32_t microblaze_reg_t;
diff --git a/linux-user/mips/syscall.h b/linux-user/mips/syscall.h
index 9d437d9..5bc5696 100644
--- a/linux-user/mips/syscall.h
+++ b/linux-user/mips/syscall.h
@@ -225,5 +225,6 @@
 #define TARGET_QEMU_ESIGRETURN 255
 
 #define UNAME_MACHINE "mips"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS
diff --git a/linux-user/mips64/syscall.h b/linux-user/mips64/syscall.h
index 1710f76..a7f5a58 100644
--- a/linux-user/mips64/syscall.h
+++ b/linux-user/mips64/syscall.h
@@ -222,5 +222,6 @@
 #define TARGET_QEMU_ESIGRETURN 255
 
 #define UNAME_MACHINE "mips64"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS
diff --git a/linux-user/openrisc/syscall.h b/linux-user/openrisc/syscall.h
index bdbb577..c3b36da 100644
--- a/linux-user/openrisc/syscall.h
+++ b/linux-user/openrisc/syscall.h
@@ -22,3 +22,4 @@
 };
 
 #define UNAME_MACHINE "openrisc"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
diff --git a/linux-user/ppc/syscall.h b/linux-user/ppc/syscall.h
index ba36acb..6514c63 100644
--- a/linux-user/ppc/syscall.h
+++ b/linux-user/ppc/syscall.h
@@ -62,5 +62,6 @@
 #else
 #define UNAME_MACHINE "ppc"
 #endif
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS
diff --git a/linux-user/s390x/syscall.h b/linux-user/s390x/syscall.h
index e5ce30b..aaad512 100644
--- a/linux-user/s390x/syscall.h
+++ b/linux-user/s390x/syscall.h
@@ -21,5 +21,6 @@
 };
 
 #define UNAME_MACHINE "s390x"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS2
diff --git a/linux-user/sh4/syscall.h b/linux-user/sh4/syscall.h
index 014bf58..ccd2216 100644
--- a/linux-user/sh4/syscall.h
+++ b/linux-user/sh4/syscall.h
@@ -10,3 +10,4 @@
 };
 
 #define UNAME_MACHINE "sh4"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 04638e2..c8a1da0 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -1233,8 +1233,14 @@
         return 1;
     }
 
-    for (i = 0; i < 32 * 2; i++) {
-        __get_user(env->vfp.regs[i], &aux->fpsimd.vregs[i]);
+    for (i = 0; i < 32; i++) {
+#ifdef TARGET_WORDS_BIGENDIAN
+        __get_user(env->vfp.regs[i * 2], &aux->fpsimd.vregs[i * 2 + 1]);
+        __get_user(env->vfp.regs[i * 2 + 1], &aux->fpsimd.vregs[i * 2]);
+#else
+        __get_user(env->vfp.regs[i * 2], &aux->fpsimd.vregs[i * 2]);
+        __get_user(env->vfp.regs[i * 2 + 1], &aux->fpsimd.vregs[i * 2 + 1]);
+#endif
     }
     __get_user(fpsr, &aux->fpsimd.fpsr);
     vfp_set_fpsr(env, fpsr);
@@ -1267,7 +1273,7 @@
                                CPUARMState *env)
 {
     struct target_rt_sigframe *frame;
-    abi_ulong frame_addr;
+    abi_ulong frame_addr, return_addr;
 
     frame_addr = get_sigframe(ka, env);
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
@@ -1284,15 +1290,19 @@
     __put_user(target_sigaltstack_used.ss_size,
                       &frame->uc.tuc_stack.ss_size);
     target_setup_sigframe(frame, env, set);
-    /* mov x8,#__NR_rt_sigreturn; svc #0 */
-    __put_user(0xd2801168, &frame->tramp[0]);
-    __put_user(0xd4000001, &frame->tramp[1]);
+    if (ka->sa_flags & TARGET_SA_RESTORER) {
+        return_addr = ka->sa_restorer;
+    } else {
+        /* mov x8,#__NR_rt_sigreturn; svc #0 */
+        __put_user(0xd2801168, &frame->tramp[0]);
+        __put_user(0xd4000001, &frame->tramp[1]);
+        return_addr = frame_addr + offsetof(struct target_rt_sigframe, tramp);
+    }
     env->xregs[0] = usig;
     env->xregs[31] = frame_addr;
     env->xregs[29] = env->xregs[31] + offsetof(struct target_rt_sigframe, fp);
     env->pc = ka->_sa_handler;
-    env->xregs[30] = env->xregs[31] +
-        offsetof(struct target_rt_sigframe, tramp);
+    env->xregs[30] = return_addr;
     if (info) {
         if (copy_siginfo_to_user(&frame->info, info)) {
             goto give_sigsegv;
diff --git a/linux-user/sparc/syscall.h b/linux-user/sparc/syscall.h
index 4cd64bf..9549ea0 100644
--- a/linux-user/sparc/syscall.h
+++ b/linux-user/sparc/syscall.h
@@ -7,6 +7,7 @@
 };
 
 #define UNAME_MACHINE "sun4"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 /* SPARC kernels don't define this in their Kconfig, but they have the
  * same ABI as if they did, implemented by sparc-specific code which fishes
diff --git a/linux-user/sparc64/syscall.h b/linux-user/sparc64/syscall.h
index e60bf31..82b1680 100644
--- a/linux-user/sparc64/syscall.h
+++ b/linux-user/sparc64/syscall.h
@@ -8,6 +8,7 @@
 };
 
 #define UNAME_MACHINE "sun4u"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 /* SPARC kernels don't define this in their Kconfig, but they have the
  * same ABI as if they did, implemented by sparc-specific code which fishes
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 1407b7a..e2c10cc 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -1904,23 +1904,16 @@
     return get_errno(connect(sockfd, addr, addrlen));
 }
 
-/* do_sendrecvmsg() Must return target values and target errnos. */
-static abi_long do_sendrecvmsg(int fd, abi_ulong target_msg,
-                               int flags, int send)
+/* do_sendrecvmsg_locked() Must return target values and target errnos. */
+static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
+                                      int flags, int send)
 {
     abi_long ret, len;
-    struct target_msghdr *msgp;
     struct msghdr msg;
     int count;
     struct iovec *vec;
     abi_ulong target_vec;
 
-    /* FIXME */
-    if (!lock_user_struct(send ? VERIFY_READ : VERIFY_WRITE,
-                          msgp,
-                          target_msg,
-                          send ? 1 : 0))
-        return -TARGET_EFAULT;
     if (msgp->msg_name) {
         msg.msg_namelen = tswap32(msgp->msg_namelen);
         msg.msg_name = alloca(msg.msg_namelen);
@@ -1975,10 +1968,75 @@
 out:
     unlock_iovec(vec, target_vec, count, !send);
 out2:
+    return ret;
+}
+
+static abi_long do_sendrecvmsg(int fd, abi_ulong target_msg,
+                               int flags, int send)
+{
+    abi_long ret;
+    struct target_msghdr *msgp;
+
+    if (!lock_user_struct(send ? VERIFY_READ : VERIFY_WRITE,
+                          msgp,
+                          target_msg,
+                          send ? 1 : 0)) {
+        return -TARGET_EFAULT;
+    }
+    ret = do_sendrecvmsg_locked(fd, msgp, flags, send);
     unlock_user_struct(msgp, target_msg, send ? 0 : 1);
     return ret;
 }
 
+#ifdef TARGET_NR_sendmmsg
+/* We don't rely on the C library to have sendmmsg/recvmmsg support,
+ * so it might not have this *mmsg-specific flag either.
+ */
+#ifndef MSG_WAITFORONE
+#define MSG_WAITFORONE 0x10000
+#endif
+
+static abi_long do_sendrecvmmsg(int fd, abi_ulong target_msgvec,
+                                unsigned int vlen, unsigned int flags,
+                                int send)
+{
+    struct target_mmsghdr *mmsgp;
+    abi_long ret = 0;
+    int i;
+
+    if (vlen > UIO_MAXIOV) {
+        vlen = UIO_MAXIOV;
+    }
+
+    mmsgp = lock_user(VERIFY_WRITE, target_msgvec, sizeof(*mmsgp) * vlen, 1);
+    if (!mmsgp) {
+        return -TARGET_EFAULT;
+    }
+
+    for (i = 0; i < vlen; i++) {
+        ret = do_sendrecvmsg_locked(fd, &mmsgp[i].msg_hdr, flags, send);
+        if (is_error(ret)) {
+            break;
+        }
+        mmsgp[i].msg_len = tswap32(ret);
+        /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
+        if (flags & MSG_WAITFORONE) {
+            flags |= MSG_DONTWAIT;
+        }
+    }
+
+    unlock_user(mmsgp, target_msgvec, sizeof(*mmsgp) * i);
+
+    /* Return number of datagrams sent if we sent any at all;
+     * otherwise return the error.
+     */
+    if (i) {
+        return i;
+    }
+    return ret;
+}
+#endif
+
 /* If we don't have a system accept4() then just call accept.
  * The callsites to do_accept4() will ensure that they don't
  * pass a non-zero flags argument in this config.
@@ -4528,6 +4586,9 @@
 {
     return tswap16(id);
 }
+
+#define put_user_id(x, gaddr) put_user_u16(x, gaddr)
+
 #else /* !USE_UID16 */
 static inline int high2lowuid(int uid)
 {
@@ -4549,6 +4610,9 @@
 {
     return tswap32(id);
 }
+
+#define put_user_id(x, gaddr) put_user_u32(x, gaddr)
+
 #endif /* USE_UID16 */
 
 void syscall_init(void)
@@ -6121,11 +6185,17 @@
                 puts = NULL;
             }
             ret = get_errno(sigtimedwait(&set, &uinfo, puts));
-            if (!is_error(ret) && arg2) {
-                if (!(p = lock_user(VERIFY_WRITE, arg2, sizeof(target_siginfo_t), 0)))
-                    goto efault;
-                host_to_target_siginfo(p, &uinfo);
-                unlock_user(p, arg2, sizeof(target_siginfo_t));
+            if (!is_error(ret)) {
+                if (arg2) {
+                    p = lock_user(VERIFY_WRITE, arg2, sizeof(target_siginfo_t),
+                                  0);
+                    if (!p) {
+                        goto efault;
+                    }
+                    host_to_target_siginfo(p, &uinfo);
+                    unlock_user(p, arg2, sizeof(target_siginfo_t));
+                }
+                ret = host_to_target_signal(ret);
             }
         }
         break;
@@ -6710,6 +6780,14 @@
         ret = do_sendrecvmsg(arg1, arg2, arg3, 1);
         break;
 #endif
+#ifdef TARGET_NR_sendmmsg
+    case TARGET_NR_sendmmsg:
+        ret = do_sendrecvmmsg(arg1, arg2, arg3, arg4, 1);
+        break;
+    case TARGET_NR_recvmmsg:
+        ret = do_sendrecvmmsg(arg1, arg2, arg3, arg4, 0);
+        break;
+#endif
 #ifdef TARGET_NR_sendto
     case TARGET_NR_sendto:
         ret = do_sendto(arg1, arg2, arg3, arg4, arg5, arg6);
@@ -7805,9 +7883,9 @@
             uid_t ruid, euid, suid;
             ret = get_errno(getresuid(&ruid, &euid, &suid));
             if (!is_error(ret)) {
-                if (put_user_u16(high2lowuid(ruid), arg1)
-                    || put_user_u16(high2lowuid(euid), arg2)
-                    || put_user_u16(high2lowuid(suid), arg3))
+                if (put_user_id(high2lowuid(ruid), arg1)
+                    || put_user_id(high2lowuid(euid), arg2)
+                    || put_user_id(high2lowuid(suid), arg3))
                     goto efault;
             }
         }
@@ -7826,9 +7904,9 @@
             gid_t rgid, egid, sgid;
             ret = get_errno(getresgid(&rgid, &egid, &sgid));
             if (!is_error(ret)) {
-                if (put_user_u16(high2lowgid(rgid), arg1)
-                    || put_user_u16(high2lowgid(egid), arg2)
-                    || put_user_u16(high2lowgid(sgid), arg3))
+                if (put_user_id(high2lowgid(rgid), arg1)
+                    || put_user_id(high2lowgid(egid), arg2)
+                    || put_user_id(high2lowgid(sgid), arg3))
                     goto efault;
             }
         }
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 3c8869e..732c9e3 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -53,7 +53,8 @@
 #define TARGET_IOC_NRBITS	8
 #define TARGET_IOC_TYPEBITS	8
 
-#if defined(TARGET_I386) || defined(TARGET_ARM) || defined(TARGET_SPARC) \
+#if defined(TARGET_I386) || (defined(TARGET_ARM) && defined(TARGET_ABI32)) \
+    || defined(TARGET_SPARC) \
     || defined(TARGET_M68K) || defined(TARGET_SH4) || defined(TARGET_CRIS)
     /* 16 bit uid wrappers emulation */
 #define USE_UID16
@@ -239,6 +240,10 @@
   return __cmsg;
 }
 
+struct target_mmsghdr {
+    struct target_msghdr msg_hdr;              /* Message header */
+    unsigned int         msg_len;              /* Number of bytes transmitted */
+};
 
 struct  target_rusage {
         struct target_timeval ru_utime;        /* user time used */
diff --git a/linux-user/unicore32/syscall.h b/linux-user/unicore32/syscall.h
index 010cdd8..f7e5525 100644
--- a/linux-user/unicore32/syscall.h
+++ b/linux-user/unicore32/syscall.h
@@ -51,5 +51,6 @@
 #define UC32_SYSCALL_NR_set_tls         (UC32_SYSCALL_ARCH_BASE + 5)
 
 #define UNAME_MACHINE "UniCore-II"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #endif /* __UC32_SYSCALL_H__ */
diff --git a/linux-user/x86_64/syscall.h b/linux-user/x86_64/syscall.h
index 81314cf..e03b5a0 100644
--- a/linux-user/x86_64/syscall.h
+++ b/linux-user/x86_64/syscall.h
@@ -91,6 +91,7 @@
 };
 
 #define UNAME_MACHINE "x86_64"
+#define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_ARCH_SET_GS 0x1001
 #define TARGET_ARCH_SET_FS 0x1002
diff --git a/pc-bios/README b/pc-bios/README
index 5914200..2bb6357 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -12,7 +12,7 @@
   1275-1994 (referred to as Open Firmware) compliant firmware.
   The included images for PowerPC (for 32 and 64 bit PPC CPUs),
   Sparc32 (including QEMU,tcx.bin and QEMU,cgthree.bin) and Sparc64 are built
-  from OpenBIOS SVN revision 1246.
+  from OpenBIOS SVN revision 1280.
 
 - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
   implementation for certain IBM POWER hardware.  The sources are at
diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index f4a3a39..8a21389 100644
--- a/pc-bios/openbios-ppc
+++ b/pc-bios/openbios-ppc
Binary files differ
diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index bb7cdfb..d4d00e5 100644
--- a/pc-bios/openbios-sparc32
+++ b/pc-bios/openbios-sparc32
Binary files differ
diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index 46b4fdd..4182052 100644
--- a/pc-bios/openbios-sparc64
+++ b/pc-bios/openbios-sparc64
Binary files differ
diff --git a/qemu-file.c b/qemu-file.c
index f074af1..e5ec798 100644
--- a/qemu-file.c
+++ b/qemu-file.c
@@ -105,7 +105,7 @@
     res = fwrite(buf, 1, size, s->stdio_file);
 
     if (res != size) {
-        return -EIO;	/* fake errno value */
+        return -errno;
     }
     return res;
 }
diff --git a/roms/openbios b/roms/openbios
index 8881262..1ac3fb9 160000
--- a/roms/openbios
+++ b/roms/openbios
@@ -1 +1 @@
-Subproject commit 888126272f92294b0da45158393f1b862742cf6b
+Subproject commit 1ac3fb92c109f5545d373a0576b87750c53cce19
diff --git a/savevm.c b/savevm.c
index 7329fc5..d094fbb 100644
--- a/savevm.c
+++ b/savevm.c
@@ -527,13 +527,13 @@
         if (qemu_file_rate_limit(f)) {
             return 0;
         }
-        trace_savevm_section_start();
+        trace_savevm_section_start(se->idstr, se->section_id);
         /* Section type */
         qemu_put_byte(f, QEMU_VM_SECTION_PART);
         qemu_put_be32(f, se->section_id);
 
         ret = se->ops->save_live_iterate(f, se->opaque);
-        trace_savevm_section_end(se->section_id);
+        trace_savevm_section_end(se->idstr, se->section_id);
 
         if (ret < 0) {
             qemu_file_set_error(f, ret);
@@ -565,13 +565,13 @@
                 continue;
             }
         }
-        trace_savevm_section_start();
+        trace_savevm_section_start(se->idstr, se->section_id);
         /* Section type */
         qemu_put_byte(f, QEMU_VM_SECTION_END);
         qemu_put_be32(f, se->section_id);
 
         ret = se->ops->save_live_complete(f, se->opaque);
-        trace_savevm_section_end(se->section_id);
+        trace_savevm_section_end(se->idstr, se->section_id);
         if (ret < 0) {
             qemu_file_set_error(f, ret);
             return;
@@ -584,7 +584,7 @@
         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
             continue;
         }
-        trace_savevm_section_start();
+        trace_savevm_section_start(se->idstr, se->section_id);
         /* Section type */
         qemu_put_byte(f, QEMU_VM_SECTION_FULL);
         qemu_put_be32(f, se->section_id);
@@ -598,7 +598,7 @@
         qemu_put_be32(f, se->version_id);
 
         vmstate_save(f, se);
-        trace_savevm_section_end(se->section_id);
+        trace_savevm_section_end(se->idstr, se->section_id);
     }
 
     qemu_put_byte(f, QEMU_VM_EOF);
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 49fef3f..0a7edfe 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -222,6 +222,10 @@
         uint64_t dbgbcr[16]; /* breakpoint control registers */
         uint64_t dbgwvr[16]; /* watchpoint value registers */
         uint64_t dbgwcr[16]; /* watchpoint control registers */
+        /* If the counter is enabled, this stores the last time the counter
+         * was reset. Otherwise it stores the counter value
+         */
+        uint32_t c15_ccnt;
     } cp15;
 
     struct {
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 90f85f1..f65cbac 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -13,6 +13,11 @@
                                 int access_type, int is_user,
                                 hwaddr *phys_ptr, int *prot,
                                 target_ulong *page_size);
+
+/* Definitions for the PMCCNTR and PMCR registers */
+#define PMCRD   0x8
+#define PMCRC   0x4
+#define PMCRE   0x1
 #endif
 
 static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
@@ -478,14 +483,85 @@
     return CP_ACCESS_OK;
 }
 
+#ifndef CONFIG_USER_ONLY
 static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                        uint64_t value)
 {
+    /* Don't computer the number of ticks in user mode */
+    uint32_t temp_ticks;
+
+    temp_ticks = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) *
+                  get_ticks_per_sec() / 1000000;
+
+    if (env->cp15.c9_pmcr & PMCRE) {
+        /* If the counter is enabled */
+        if (env->cp15.c9_pmcr & PMCRD) {
+            /* Increment once every 64 processor clock cycles */
+            env->cp15.c15_ccnt = (temp_ticks/64) - env->cp15.c15_ccnt;
+        } else {
+            env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt;
+        }
+    }
+
+    if (value & PMCRC) {
+        /* The counter has been reset */
+        env->cp15.c15_ccnt = 0;
+    }
+
     /* only the DP, X, D and E bits are writable */
     env->cp15.c9_pmcr &= ~0x39;
     env->cp15.c9_pmcr |= (value & 0x39);
+
+    if (env->cp15.c9_pmcr & PMCRE) {
+        if (env->cp15.c9_pmcr & PMCRD) {
+            /* Increment once every 64 processor clock cycles */
+            temp_ticks /= 64;
+        }
+        env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt;
+    }
 }
 
+static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    uint32_t total_ticks;
+
+    if (!(env->cp15.c9_pmcr & PMCRE)) {
+        /* Counter is disabled, do not change value */
+        return env->cp15.c15_ccnt;
+    }
+
+    total_ticks = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) *
+                  get_ticks_per_sec() / 1000000;
+
+    if (env->cp15.c9_pmcr & PMCRD) {
+        /* Increment once every 64 processor clock cycles */
+        total_ticks /= 64;
+    }
+    return total_ticks - env->cp15.c15_ccnt;
+}
+
+static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                        uint64_t value)
+{
+    uint32_t total_ticks;
+
+    if (!(env->cp15.c9_pmcr & PMCRE)) {
+        /* Counter is disabled, set the absolute value */
+        env->cp15.c15_ccnt = value;
+        return;
+    }
+
+    total_ticks = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) *
+                  get_ticks_per_sec() / 1000000;
+
+    if (env->cp15.c9_pmcr & PMCRD) {
+        /* Increment once every 64 processor clock cycles */
+        total_ticks /= 64;
+    }
+    env->cp15.c15_ccnt = total_ticks - value;
+}
+#endif
+
 static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
 {
@@ -604,10 +680,12 @@
     { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5,
       .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
       .accessfn = pmreg_access },
-    /* Unimplemented, RAZ/WI. */
+#ifndef CONFIG_USER_ONLY
     { .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0,
-      .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .access = PL0_RW, .resetvalue = 0, .type = ARM_CP_IO,
+      .readfn = pmccntr_read, .writefn = pmccntr_write,
       .accessfn = pmreg_access },
+#endif
     { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1,
       .access = PL0_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pmxevtyper),
@@ -1873,8 +1951,10 @@
     }
     if (arm_feature(env, ARM_FEATURE_V7)) {
         /* v7 performance monitor control register: same implementor
-         * field as main ID register, and we implement no event counters.
+         * field as main ID register, and we implement only the cycle
+         * count register.
          */
+#ifndef CONFIG_USER_ONLY
         ARMCPRegInfo pmcr = {
             .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0,
             .access = PL0_RW, .resetvalue = cpu->midr & 0xff000000,
@@ -1882,12 +1962,13 @@
             .accessfn = pmreg_access, .writefn = pmcr_write,
             .raw_writefn = raw_write,
         };
+        define_one_arm_cp_reg(cpu, &pmcr);
+#endif
         ARMCPRegInfo clidr = {
             .name = "CLIDR", .state = ARM_CP_STATE_BOTH,
             .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1,
             .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->clidr
         };
-        define_one_arm_cp_reg(cpu, &pmcr);
         define_one_arm_cp_reg(cpu, &clidr);
         define_arm_cp_regs(cpu, v7_cp_reginfo);
     } else {
@@ -2478,7 +2559,7 @@
         (env->CF << 29) | ((env->VF & 0x80000000) >> 3) | (env->QF << 27)
         | (env->thumb << 5) | ((env->condexec_bits & 3) << 25)
         | ((env->condexec_bits & 0xfc) << 8)
-        | (env->GE << 16) | env->daif;
+        | (env->GE << 16) | (env->daif & CPSR_AIF);
 }
 
 void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 276f3a9..8923f8a 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -50,6 +50,7 @@
                    i32, i32, i32, i32)
 DEF_HELPER_2(exception, void, env, i32)
 DEF_HELPER_1(wfi, void, env)
+DEF_HELPER_1(wfe, void, env)
 
 DEF_HELPER_3(cpsr_write, void, env, i32, i32)
 DEF_HELPER_1(cpsr_read, i32, env)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 7d06d2f..5851e04 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -225,6 +225,15 @@
     cpu_loop_exit(env);
 }
 
+void HELPER(wfe)(CPUARMState *env)
+{
+    /* Don't actually halt the CPU, just yield back to top
+     * level loop
+     */
+    env->exception_index = EXCP_YIELD;
+    cpu_loop_exit(env);
+}
+
 void HELPER(exception)(CPUARMState *env, uint32_t excp)
 {
     env->exception_index = excp;
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 08ac659..37e05e8 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -210,7 +210,7 @@
     if (use_goto_tb(s, n, dest)) {
         tcg_gen_goto_tb(n);
         gen_a64_set_pc_im(dest);
-        tcg_gen_exit_tb((tcg_target_long)tb + n);
+        tcg_gen_exit_tb((intptr_t)tb + n);
         s->is_jmp = DISAS_TB_JUMP;
     } else {
         gen_a64_set_pc_im(dest);
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 253d2a1..df259de 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -3939,6 +3939,9 @@
         s->is_jmp = DISAS_WFI;
         break;
     case 2: /* wfe */
+        gen_set_pc_im(s, s->pc);
+        s->is_jmp = DISAS_WFE;
+        break;
     case 4: /* sev */
     case 5: /* sevl */
         /* TODO: Implement SEV, SEVL and WFE.  May help SMP performance.  */
@@ -10857,6 +10860,9 @@
         case DISAS_WFI:
             gen_helper_wfi(cpu_env);
             break;
+        case DISAS_WFE:
+            gen_helper_wfe(cpu_env);
+            break;
         case DISAS_SWI:
             gen_exception(EXCP_SWI);
             break;
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 67da699..2f491f9 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -44,6 +44,8 @@
  * emitting unreachable code at the end of the TB in the A64 decoder
  */
 #define DISAS_EXC 6
+/* WFE */
+#define DISAS_WFE 7
 
 #ifdef TARGET_AARCH64
 void a64_translate_init(void);
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 0e8812a..9f69d7e 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -339,7 +339,7 @@
     [3] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
             .offset = 0x3c0, .size = 0x40  },
     [4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
-            .offset = 0x400, .size = 0x10  },
+            .offset = 0x400, .size = 0x40  },
 };
 
 const char *get_register_name_32(unsigned int reg)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 04d7ae3..f43eb67 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -13,6 +13,11 @@
 #include "tcg-be-ldst.h"
 #include "qemu/bitops.h"
 
+/* We're going to re-use TCGType in setting of the SF bit, which controls
+   the size of the operation performed.  If we know the values match, it
+   makes things much cleaner.  */
+QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
+
 #ifndef NDEBUG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
@@ -66,24 +71,22 @@
 # endif
 #endif
 
-static inline void reloc_pc26(void *code_ptr, tcg_target_long target)
+static inline void reloc_pc26(void *code_ptr, intptr_t target)
 {
-    tcg_target_long offset; uint32_t insn;
-    offset = (target - (tcg_target_long)code_ptr) / 4;
+    intptr_t offset = (target - (intptr_t)code_ptr) / 4;
     /* read instruction, mask away previous PC_REL26 parameter contents,
        set the proper offset, then write back the instruction. */
-    insn = *(uint32_t *)code_ptr;
+    uint32_t insn = *(uint32_t *)code_ptr;
     insn = deposit32(insn, 0, 26, offset);
     *(uint32_t *)code_ptr = insn;
 }
 
-static inline void reloc_pc19(void *code_ptr, tcg_target_long target)
+static inline void reloc_pc19(void *code_ptr, intptr_t target)
 {
-    tcg_target_long offset; uint32_t insn;
-    offset = (target - (tcg_target_long)code_ptr) / 4;
+    intptr_t offset = (target - (intptr_t)code_ptr) / 4;
     /* read instruction, mask away previous PC_REL19 parameter contents,
        set the proper offset, then write back the instruction. */
-    insn = *(uint32_t *)code_ptr;
+    uint32_t insn = *(uint32_t *)code_ptr;
     insn = deposit32(insn, 5, 19, offset);
     *(uint32_t *)code_ptr = insn;
 }
@@ -302,18 +305,8 @@
                                   TCGReg rd, TCGReg rn, tcg_target_long offset)
 {
     /* use LDUR with BASE register with 9bit signed unscaled offset */
-    unsigned int mod, off;
-
-    if (offset < 0) {
-        off = (256 + offset);
-        mod = 0x1;
-    } else {
-        off = offset;
-        mod = 0x0;
-    }
-
-    mod |= op_type;
-    tcg_out32(s, op_data << 24 | mod << 20 | off << 12 | rn << 5 | rd);
+    tcg_out32(s, op_data << 24 | op_type << 20
+              | (offset & 0x1ff) << 12 | rn << 5 | rd);
 }
 
 /* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
@@ -327,7 +320,8 @@
               | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
 }
 
-static inline void tcg_out_movr(TCGContext *s, int ext, TCGReg rd, TCGReg src)
+static inline void tcg_out_movr(TCGContext *s, TCGType ext,
+                                TCGReg rd, TCGReg src)
 {
     /* register to register move using MOV (shifted register with no shift) */
     /* using MOV 0x2a0003e0 | (shift).. */
@@ -408,7 +402,8 @@
 }
 
 /* mov alias implemented with add immediate, useful to move to/from SP */
-static inline void tcg_out_movr_sp(TCGContext *s, int ext, TCGReg rd, TCGReg rn)
+static inline void tcg_out_movr_sp(TCGContext *s, TCGType ext,
+                                   TCGReg rd, TCGReg rn)
 {
     /* using ADD 0x11000000 | (ext) | rn << 5 | rd */
     unsigned int base = ext ? 0x91000000 : 0x11000000;
@@ -438,7 +433,7 @@
 }
 
 static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc,
-                                 int ext, TCGReg rd, TCGReg rn, TCGReg rm,
+                                 TCGType ext, TCGReg rd, TCGReg rn, TCGReg rm,
                                  int shift_imm)
 {
     /* Using shifted register arithmetic operations */
@@ -454,7 +449,7 @@
     tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd);
 }
 
-static inline void tcg_out_mul(TCGContext *s, int ext,
+static inline void tcg_out_mul(TCGContext *s, TCGType ext,
                                TCGReg rd, TCGReg rn, TCGReg rm)
 {
     /* Using MADD 0x1b000000 with Ra = wzr alias MUL 0x1b007c00 */
@@ -463,7 +458,7 @@
 }
 
 static inline void tcg_out_shiftrot_reg(TCGContext *s,
-                                        enum aarch64_srr_opc opc, int ext,
+                                        enum aarch64_srr_opc opc, TCGType ext,
                                         TCGReg rd, TCGReg rn, TCGReg rm)
 {
     /* using 2-source data processing instructions 0x1ac02000 */
@@ -471,23 +466,23 @@
     tcg_out32(s, base | rm << 16 | opc << 8 | rn << 5 | rd);
 }
 
-static inline void tcg_out_ubfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
-                                unsigned int a, unsigned int b)
+static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
+                                TCGReg rn, unsigned int a, unsigned int b)
 {
     /* Using UBFM 0x53000000 Wd, Wn, a, b */
     unsigned int base = ext ? 0xd3400000 : 0x53000000;
     tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
 }
 
-static inline void tcg_out_sbfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
-                                unsigned int a, unsigned int b)
+static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
+                                TCGReg rn, unsigned int a, unsigned int b)
 {
     /* Using SBFM 0x13000000 Wd, Wn, a, b */
     unsigned int base = ext ? 0x93400000 : 0x13000000;
     tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
 }
 
-static inline void tcg_out_extr(TCGContext *s, int ext, TCGReg rd,
+static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
                                 TCGReg rn, TCGReg rm, unsigned int a)
 {
     /* Using EXTR 0x13800000 Wd, Wn, Wm, a */
@@ -495,7 +490,7 @@
     tcg_out32(s, base | rm << 16 | a << 10 | rn << 5 | rd);
 }
 
-static inline void tcg_out_shl(TCGContext *s, int ext,
+static inline void tcg_out_shl(TCGContext *s, TCGType ext,
                                TCGReg rd, TCGReg rn, unsigned int m)
 {
     int bits, max;
@@ -504,28 +499,28 @@
     tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
 }
 
-static inline void tcg_out_shr(TCGContext *s, int ext,
+static inline void tcg_out_shr(TCGContext *s, TCGType ext,
                                TCGReg rd, TCGReg rn, unsigned int m)
 {
     int max = ext ? 63 : 31;
     tcg_out_ubfm(s, ext, rd, rn, m & max, max);
 }
 
-static inline void tcg_out_sar(TCGContext *s, int ext,
+static inline void tcg_out_sar(TCGContext *s, TCGType ext,
                                TCGReg rd, TCGReg rn, unsigned int m)
 {
     int max = ext ? 63 : 31;
     tcg_out_sbfm(s, ext, rd, rn, m & max, max);
 }
 
-static inline void tcg_out_rotr(TCGContext *s, int ext,
+static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
                                 TCGReg rd, TCGReg rn, unsigned int m)
 {
     int max = ext ? 63 : 31;
     tcg_out_extr(s, ext, rd, rn, rn, m & max);
 }
 
-static inline void tcg_out_rotl(TCGContext *s, int ext,
+static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
                                 TCGReg rd, TCGReg rn, unsigned int m)
 {
     int bits, max;
@@ -534,24 +529,23 @@
     tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
 }
 
-static inline void tcg_out_cmp(TCGContext *s, int ext, TCGReg rn, TCGReg rm,
-                               int shift_imm)
+static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn, TCGReg rm)
 {
     /* Using CMP alias SUBS wzr, Wn, Wm */
-    tcg_out_arith(s, ARITH_SUBS, ext, TCG_REG_XZR, rn, rm, shift_imm);
+    tcg_out_arith(s, ARITH_SUBS, ext, TCG_REG_XZR, rn, rm, 0);
 }
 
-static inline void tcg_out_cset(TCGContext *s, int ext, TCGReg rd, TCGCond c)
+static inline void tcg_out_cset(TCGContext *s, TCGType ext,
+                                TCGReg rd, TCGCond c)
 {
     /* Using CSET alias of CSINC 0x1a800400 Xd, XZR, XZR, invert(cond) */
     unsigned int base = ext ? 0x9a9f07e0 : 0x1a9f07e0;
     tcg_out32(s, base | tcg_cond_to_aarch64[tcg_invert_cond(c)] << 12 | rd);
 }
 
-static inline void tcg_out_goto(TCGContext *s, tcg_target_long target)
+static inline void tcg_out_goto(TCGContext *s, intptr_t target)
 {
-    tcg_target_long offset;
-    offset = (target - (tcg_target_long)s->code_ptr) / 4;
+    intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
 
     if (offset < -0x02000000 || offset >= 0x02000000) {
         /* out of 26bit range */
@@ -582,11 +576,9 @@
     tcg_out32(s, insn);
 }
 
-static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c,
-                                     tcg_target_long target)
+static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c, intptr_t target)
 {
-    tcg_target_long offset;
-    offset = (target - (tcg_target_long)s->code_ptr) / 4;
+    intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
 
     if (offset < -0x40000 || offset >= 0x40000) {
         /* out of 19bit range */
@@ -607,11 +599,9 @@
     tcg_out32(s, 0xd61f0000 | reg << 5);
 }
 
-static inline void tcg_out_call(TCGContext *s, tcg_target_long target)
+static inline void tcg_out_call(TCGContext *s, intptr_t target)
 {
-    tcg_target_long offset;
-
-    offset = (target - (tcg_target_long)s->code_ptr) / 4;
+    intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
 
     if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
@@ -638,7 +628,7 @@
    to test a 32bit reg against 0xff000000, pass M = 8,  R = 8.
    to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8.
  */
-static inline void tcg_out_tst(TCGContext *s, int ext, TCGReg rn,
+static inline void tcg_out_tst(TCGContext *s, TCGType ext, TCGReg rn,
                                unsigned int m, unsigned int r)
 {
     /* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */
@@ -647,8 +637,8 @@
 }
 
 /* and a register with a bit pattern, similarly to TST, no flags change */
-static inline void tcg_out_andi(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
-                                unsigned int m, unsigned int r)
+static inline void tcg_out_andi(TCGContext *s, TCGType ext, TCGReg rd,
+                                TCGReg rn, unsigned int m, unsigned int r)
 {
     /* using AND 0x12000000 */
     unsigned int base = ext ? 0x92400000 : 0x12000000;
@@ -663,9 +653,8 @@
 
 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 {
-    tcg_target_long target, offset;
-    target = (tcg_target_long)addr;
-    offset = (target - (tcg_target_long)jmp_addr) / 4;
+    intptr_t target = addr;
+    intptr_t offset = (target - (intptr_t)jmp_addr) / 4;
 
     if (offset < -0x02000000 || offset >= 0x02000000) {
         /* out of 26bit range */
@@ -701,21 +690,23 @@
     }
 }
 
-static inline void tcg_out_rev(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
+static inline void tcg_out_rev(TCGContext *s, TCGType ext,
+                               TCGReg rd, TCGReg rm)
 {
     /* using REV 0x5ac00800 */
     unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
     tcg_out32(s, base | rm << 5 | rd);
 }
 
-static inline void tcg_out_rev16(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
+static inline void tcg_out_rev16(TCGContext *s, TCGType ext,
+                                 TCGReg rd, TCGReg rm)
 {
     /* using REV16 0x5ac00400 */
     unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
     tcg_out32(s, base | rm << 5 | rd);
 }
 
-static inline void tcg_out_sxt(TCGContext *s, int ext, int s_bits,
+static inline void tcg_out_sxt(TCGContext *s, TCGType ext, int s_bits,
                                TCGReg rd, TCGReg rn)
 {
     /* using ALIASes SXTB 0x13001c00, SXTH 0x13003c00, SXTW 0x93407c00
@@ -733,7 +724,7 @@
     tcg_out_ubfm(s, 0, rd, rn, 0, bits);
 }
 
-static inline void tcg_out_addi(TCGContext *s, int ext,
+static inline void tcg_out_addi(TCGContext *s, TCGType ext,
                                 TCGReg rd, TCGReg rn, unsigned int aimm)
 {
     /* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */
@@ -753,7 +744,7 @@
     tcg_out32(s, base | aimm | (rn << 5) | rd);
 }
 
-static inline void tcg_out_subi(TCGContext *s, int ext,
+static inline void tcg_out_subi(TCGContext *s, TCGType ext,
                                 TCGReg rd, TCGReg rn, unsigned int aimm)
 {
     /* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */
@@ -773,11 +764,6 @@
     tcg_out32(s, base | aimm | (rn << 5) | rd);
 }
 
-static inline void tcg_out_nop(TCGContext *s)
-{
-    tcg_out32(s, 0xd503201f);
-}
-
 #ifdef CONFIG_SOFTMMU
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  *                                     int mmu_idx, uintptr_t ra)
@@ -801,7 +787,8 @@
 
 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-    reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
+    reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
+
     tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
     tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
@@ -815,23 +802,21 @@
         tcg_out_movr(s, 1, lb->datalo_reg, TCG_REG_X0);
     }
 
-    tcg_out_goto(s, (tcg_target_long)lb->raddr);
+    tcg_out_goto(s, (intptr_t)lb->raddr);
 }
 
 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-    reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
+    reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
 
     tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
     tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
     tcg_out_movr(s, 1, TCG_REG_X2, lb->datalo_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X4, (tcg_target_long)lb->raddr);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X4, (intptr_t)lb->raddr);
     tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
-                 (tcg_target_long)qemu_st_helpers[lb->opc & 3]);
+                 (intptr_t)qemu_st_helpers[lb->opc & 3]);
     tcg_out_callr(s, TCG_REG_TMP);
-
-    tcg_out_nop(s);
     tcg_out_goto(s, (tcg_target_long)lb->raddr);
 }
 
@@ -893,7 +878,7 @@
                  (is_read ? offsetof(CPUTLBEntry, addr_read)
                   : offsetof(CPUTLBEntry, addr_write)));
     /* Perform the address comparison. */
-    tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
+    tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3);
     *label_ptr = s->code_ptr;
     /* If not equal, we jump to the slow path. */
     tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
@@ -1088,16 +1073,23 @@
 }
 
 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
-                       const TCGArg *args, const int *const_args)
+                       const TCGArg args[TCG_MAX_OP_ARGS],
+                       const int const_args[TCG_MAX_OP_ARGS])
 {
-    /* ext will be set in the switch below, which will fall through to the
-       common code. It triggers the use of extended regs where appropriate. */
-    int ext = 0;
+    /* 99% of the time, we can signal the use of extension registers
+       by looking to see if the opcode handles 64-bit data.  */
+    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
+
+    /* Hoist the loads of the most common arguments.  */
+    TCGArg a0 = args[0];
+    TCGArg a1 = args[1];
+    TCGArg a2 = args[2];
+    int c2 = const_args[2];
 
     switch (opc) {
     case INDEX_op_exit_tb:
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, args[0]);
-        tcg_out_goto(s, (tcg_target_long)tb_ret_addr);
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
+        tcg_out_goto(s, (intptr_t)tb_ret_addr);
         break;
 
     case INDEX_op_goto_tb:
@@ -1105,23 +1097,23 @@
 #error "USE_DIRECT_JUMP required for aarch64"
 #endif
         assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
-        s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+        s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
         /* actual branch destination will be patched by
            aarch64_tb_set_jmp_target later, beware retranslation. */
         tcg_out_goto_noaddr(s);
-        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
+        s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
         break;
 
     case INDEX_op_call:
         if (const_args[0]) {
-            tcg_out_call(s, args[0]);
+            tcg_out_call(s, a0);
         } else {
-            tcg_out_callr(s, args[0]);
+            tcg_out_callr(s, a0);
         }
         break;
 
     case INDEX_op_br:
-        tcg_out_goto_label(s, args[0]);
+        tcg_out_goto_label(s, a0);
         break;
 
     case INDEX_op_ld_i32:
@@ -1144,123 +1136,95 @@
     case INDEX_op_st16_i64:
     case INDEX_op_st32_i64:
         tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
-                     args[0], args[1], args[2]);
-        break;
-
-    case INDEX_op_mov_i64:
-        ext = 1; /* fall through */
-    case INDEX_op_mov_i32:
-        tcg_out_movr(s, ext, args[0], args[1]);
-        break;
-
-    case INDEX_op_movi_i64:
-        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
-        break;
-    case INDEX_op_movi_i32:
-        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
+                     a0, a1, a2);
         break;
 
     case INDEX_op_add_i64:
-        ext = 1; /* fall through */
     case INDEX_op_add_i32:
-        tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0);
+        tcg_out_arith(s, ARITH_ADD, ext, a0, a1, a2, 0);
         break;
 
     case INDEX_op_sub_i64:
-        ext = 1; /* fall through */
     case INDEX_op_sub_i32:
-        tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0);
+        tcg_out_arith(s, ARITH_SUB, ext, a0, a1, a2, 0);
         break;
 
     case INDEX_op_and_i64:
-        ext = 1; /* fall through */
     case INDEX_op_and_i32:
-        tcg_out_arith(s, ARITH_AND, ext, args[0], args[1], args[2], 0);
+        tcg_out_arith(s, ARITH_AND, ext, a0, a1, a2, 0);
         break;
 
     case INDEX_op_or_i64:
-        ext = 1; /* fall through */
     case INDEX_op_or_i32:
-        tcg_out_arith(s, ARITH_OR, ext, args[0], args[1], args[2], 0);
+        tcg_out_arith(s, ARITH_OR, ext, a0, a1, a2, 0);
         break;
 
     case INDEX_op_xor_i64:
-        ext = 1; /* fall through */
     case INDEX_op_xor_i32:
-        tcg_out_arith(s, ARITH_XOR, ext, args[0], args[1], args[2], 0);
+        tcg_out_arith(s, ARITH_XOR, ext, a0, a1, a2, 0);
         break;
 
     case INDEX_op_mul_i64:
-        ext = 1; /* fall through */
     case INDEX_op_mul_i32:
-        tcg_out_mul(s, ext, args[0], args[1], args[2]);
+        tcg_out_mul(s, ext, a0, a1, a2);
         break;
 
     case INDEX_op_shl_i64:
-        ext = 1; /* fall through */
     case INDEX_op_shl_i32:
-        if (const_args[2]) {    /* LSL / UBFM Wd, Wn, (32 - m) */
-            tcg_out_shl(s, ext, args[0], args[1], args[2]);
+        if (c2) {    /* LSL / UBFM Wd, Wn, (32 - m) */
+            tcg_out_shl(s, ext, a0, a1, a2);
         } else {                /* LSL / LSLV */
-            tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]);
+            tcg_out_shiftrot_reg(s, SRR_SHL, ext, a0, a1, a2);
         }
         break;
 
     case INDEX_op_shr_i64:
-        ext = 1; /* fall through */
     case INDEX_op_shr_i32:
-        if (const_args[2]) {    /* LSR / UBFM Wd, Wn, m, 31 */
-            tcg_out_shr(s, ext, args[0], args[1], args[2]);
+        if (c2) {    /* LSR / UBFM Wd, Wn, m, 31 */
+            tcg_out_shr(s, ext, a0, a1, a2);
         } else {                /* LSR / LSRV */
-            tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]);
+            tcg_out_shiftrot_reg(s, SRR_SHR, ext, a0, a1, a2);
         }
         break;
 
     case INDEX_op_sar_i64:
-        ext = 1; /* fall through */
     case INDEX_op_sar_i32:
-        if (const_args[2]) {    /* ASR / SBFM Wd, Wn, m, 31 */
-            tcg_out_sar(s, ext, args[0], args[1], args[2]);
+        if (c2) {    /* ASR / SBFM Wd, Wn, m, 31 */
+            tcg_out_sar(s, ext, a0, a1, a2);
         } else {                /* ASR / ASRV */
-            tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]);
+            tcg_out_shiftrot_reg(s, SRR_SAR, ext, a0, a1, a2);
         }
         break;
 
     case INDEX_op_rotr_i64:
-        ext = 1; /* fall through */
     case INDEX_op_rotr_i32:
-        if (const_args[2]) {    /* ROR / EXTR Wd, Wm, Wm, m */
-            tcg_out_rotr(s, ext, args[0], args[1], args[2]);
+        if (c2) {    /* ROR / EXTR Wd, Wm, Wm, m */
+            tcg_out_rotr(s, ext, a0, a1, a2);
         } else {                /* ROR / RORV */
-            tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]);
+            tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, a2);
         }
         break;
 
     case INDEX_op_rotl_i64:
-        ext = 1; /* fall through */
     case INDEX_op_rotl_i32:     /* same as rotate right by (32 - m) */
-        if (const_args[2]) {    /* ROR / EXTR Wd, Wm, Wm, 32 - m */
-            tcg_out_rotl(s, ext, args[0], args[1], args[2]);
+        if (c2) {    /* ROR / EXTR Wd, Wm, Wm, 32 - m */
+            tcg_out_rotl(s, ext, a0, a1, a2);
         } else {
-            tcg_out_arith(s, ARITH_SUB, 0,
-                          TCG_REG_TMP, TCG_REG_XZR, args[2], 0);
-            tcg_out_shiftrot_reg(s, SRR_ROR, ext,
-                                 args[0], args[1], TCG_REG_TMP);
+            tcg_out_arith(s, ARITH_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
+            tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, TCG_REG_TMP);
         }
         break;
 
     case INDEX_op_brcond_i64:
-        ext = 1; /* fall through */
-    case INDEX_op_brcond_i32: /* CMP 0, 1, cond(2), label 3 */
-        tcg_out_cmp(s, ext, args[0], args[1], 0);
-        tcg_out_goto_label_cond(s, args[2], args[3]);
+    case INDEX_op_brcond_i32:
+        tcg_out_cmp(s, ext, a0, a1);
+        tcg_out_goto_label_cond(s, a2, args[3]);
         break;
 
     case INDEX_op_setcond_i64:
-        ext = 1; /* fall through */
     case INDEX_op_setcond_i32:
-        tcg_out_cmp(s, ext, args[1], args[2], 0);
-        tcg_out_cset(s, 0, args[0], args[3]);
+        tcg_out_cmp(s, ext, a1, a2);
+        tcg_out_cset(s, 0, a0, args[3]);
         break;
 
     case INDEX_op_qemu_ld8u:
@@ -1300,44 +1264,50 @@
         tcg_out_qemu_st(s, args, 3);
         break;
 
-    case INDEX_op_bswap64_i64:
-        ext = 1; /* fall through */
     case INDEX_op_bswap32_i64:
+        /* Despite the _i64, this is a 32-bit bswap.  */
+        ext = 0;
+        /* FALLTHRU */
+    case INDEX_op_bswap64_i64:
     case INDEX_op_bswap32_i32:
-        tcg_out_rev(s, ext, args[0], args[1]);
+        tcg_out_rev(s, ext, a0, a1);
         break;
     case INDEX_op_bswap16_i64:
     case INDEX_op_bswap16_i32:
-        tcg_out_rev16(s, 0, args[0], args[1]);
+        tcg_out_rev16(s, 0, a0, a1);
         break;
 
     case INDEX_op_ext8s_i64:
-        ext = 1; /* fall through */
     case INDEX_op_ext8s_i32:
-        tcg_out_sxt(s, ext, 0, args[0], args[1]);
+        tcg_out_sxt(s, ext, 0, a0, a1);
         break;
     case INDEX_op_ext16s_i64:
-        ext = 1; /* fall through */
     case INDEX_op_ext16s_i32:
-        tcg_out_sxt(s, ext, 1, args[0], args[1]);
+        tcg_out_sxt(s, ext, 1, a0, a1);
         break;
     case INDEX_op_ext32s_i64:
-        tcg_out_sxt(s, 1, 2, args[0], args[1]);
+        tcg_out_sxt(s, 1, 2, a0, a1);
         break;
     case INDEX_op_ext8u_i64:
     case INDEX_op_ext8u_i32:
-        tcg_out_uxt(s, 0, args[0], args[1]);
+        tcg_out_uxt(s, 0, a0, a1);
         break;
     case INDEX_op_ext16u_i64:
     case INDEX_op_ext16u_i32:
-        tcg_out_uxt(s, 1, args[0], args[1]);
+        tcg_out_uxt(s, 1, a0, a1);
         break;
     case INDEX_op_ext32u_i64:
-        tcg_out_movr(s, 0, args[0], args[1]);
+        tcg_out_movr(s, 0, a0, a1);
         break;
 
+    case INDEX_op_mov_i64:
+    case INDEX_op_mov_i32:
+    case INDEX_op_movi_i64:
+    case INDEX_op_movi_i32:
+        /* Always implemented with tcg_out_mov/i, never with tcg_out_op.  */
     default:
-        tcg_abort(); /* opcode not implemented */
+        /* Opcode not implemented.  */
+        tcg_abort();
     }
 }
 
@@ -1441,12 +1411,6 @@
 
 static void tcg_target_init(TCGContext *s)
 {
-#if !defined(CONFIG_USER_ONLY)
-    /* fail safe */
-    if ((1ULL << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) {
-        tcg_abort();
-    }
-#endif
     tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
     tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
 
diff --git a/trace-events b/trace-events
index aec4202..002c260 100644
--- a/trace-events
+++ b/trace-events
@@ -486,6 +486,7 @@
 g_malloc(size_t size, void *ptr) "size %zu ptr %p"
 g_realloc(void *ptr, size_t size, void *newptr) "ptr %p size %zu newptr %p"
 g_free(void *ptr) "ptr %p"
+system_wakeup_request(int reason) "reason=%d"
 
 # block/qcow2.c
 qcow2_writev_start_req(void *co, int64_t sector, int nb_sectors) "co %p sector %" PRIx64 " nb_sectors %d"
@@ -1039,8 +1040,8 @@
 vmware_setmode(uint32_t w, uint32_t h, uint32_t bpp) "%dx%d @ %d bpp"
 
 # savevm.c
-savevm_section_start(void) ""
-savevm_section_end(unsigned int section_id) "section_id %u"
+savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u"
+savevm_section_end(const char *id, unsigned int section_id) "%s, section_id %u"
 
 # arch_init.c
 migration_bitmap_sync_start(void) ""
diff --git a/ui/input-legacy.c b/ui/input-legacy.c
index f38984b..7dc486b 100644
--- a/ui/input-legacy.c
+++ b/ui/input-legacy.c
@@ -359,6 +359,20 @@
         } else {
             s->buttons &= ~bmap[evt->btn->button];
         }
+        if (evt->btn->down && evt->btn->button == INPUT_BUTTON_WHEEL_UP) {
+            s->qemu_put_mouse_event(s->qemu_put_mouse_event_opaque,
+                                    s->axis[INPUT_AXIS_X],
+                                    s->axis[INPUT_AXIS_Y],
+                                    -1,
+                                    s->buttons);
+        }
+        if (evt->btn->down && evt->btn->button == INPUT_BUTTON_WHEEL_DOWN) {
+            s->qemu_put_mouse_event(s->qemu_put_mouse_event_opaque,
+                                    s->axis[INPUT_AXIS_X],
+                                    s->axis[INPUT_AXIS_Y],
+                                    1,
+                                    s->buttons);
+        }
         break;
     case INPUT_EVENT_KIND_ABS:
         s->axis[evt->abs->axis] = evt->abs->value;
diff --git a/ui/sdl.c b/ui/sdl.c
index c1a16be..4e7f920 100644
--- a/ui/sdl.c
+++ b/ui/sdl.c
@@ -455,13 +455,17 @@
                              real_screen->w);
         qemu_input_queue_abs(dcl->con, INPUT_AXIS_Y, y,
                              real_screen->h);
-    } else if (guest_cursor) {
-        x -= guest_x;
-        y -= guest_y;
-        guest_x += x;
-        guest_y += y;
-        qemu_input_queue_rel(dcl->con, INPUT_AXIS_X, x);
-        qemu_input_queue_rel(dcl->con, INPUT_AXIS_Y, y);
+    } else {
+        if (guest_cursor) {
+            x -= guest_x;
+            y -= guest_y;
+            guest_x += x;
+            guest_y += y;
+            dx = x;
+            dy = y;
+        }
+        qemu_input_queue_rel(dcl->con, INPUT_AXIS_X, dx);
+        qemu_input_queue_rel(dcl->con, INPUT_AXIS_Y, dy);
     }
     qemu_input_event_sync();
 }
diff --git a/ui/spice-display.c b/ui/spice-display.c
index 9bb42f1..e28698c 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -354,6 +354,7 @@
                                DisplaySurface *surface)
 {
     SimpleSpiceUpdate *update;
+    bool need_destroy;
 
     dprint(1, "%s/%d:\n", __func__, ssd->qxl.id);
 
@@ -366,14 +367,19 @@
     }
 
     qemu_mutex_lock(&ssd->lock);
+    need_destroy = (ssd->ds != NULL);
     ssd->ds = surface;
     while ((update = QTAILQ_FIRST(&ssd->updates)) != NULL) {
         QTAILQ_REMOVE(&ssd->updates, update, next);
         qemu_spice_destroy_update(ssd, update);
     }
     qemu_mutex_unlock(&ssd->lock);
-    qemu_spice_destroy_host_primary(ssd);
-    qemu_spice_create_host_primary(ssd);
+    if (need_destroy) {
+        qemu_spice_destroy_host_primary(ssd);
+    }
+    if (ssd->ds) {
+        qemu_spice_create_host_primary(ssd);
+    }
 
     memset(&ssd->dirty, 0, sizeof(ssd->dirty));
     ssd->notify++;
@@ -537,10 +543,29 @@
 }
 
 static int interface_client_monitors_config(QXLInstance *sin,
-                                        VDAgentMonitorsConfig *monitors_config)
+                                            VDAgentMonitorsConfig *mc)
 {
-    dprint(3, "%s:\n", __func__);
-    return 0; /* == not supported by guest */
+    SimpleSpiceDisplay *ssd = container_of(sin, SimpleSpiceDisplay, qxl);
+    QemuUIInfo info;
+    int rc;
+
+    /*
+     * FIXME: multihead is tricky due to the way
+     * spice has multihead implemented.
+     */
+    memset(&info, 0, sizeof(info));
+    if (mc->num_of_monitors > 0) {
+        info.width  = mc->monitors[0].width;
+        info.height = mc->monitors[0].height;
+    }
+    rc = dpy_set_ui_info(ssd->dcl.con, &info);
+    dprint(1, "%s/%d: size %dx%d, rc %d   <---   ==========================\n",
+           __func__, ssd->qxl.id, info.width, info.height, rc);
+    if (rc != 0) {
+        return 0; /* == not supported by guest */
+    } else {
+        return 1;
+    }
 }
 
 static const QXLInterface dpy_interface = {
@@ -610,8 +635,6 @@
     ssd->dcl.ops = &display_listener_ops;
     ssd->dcl.con = con;
     register_displaychangelistener(&ssd->dcl);
-
-    qemu_spice_create_host_primary(ssd);
 }
 
 void qemu_spice_display_init(void)
diff --git a/ui/vnc-enc-tight.c b/ui/vnc-enc-tight.c
index e6966ae..59b59c0 100644
--- a/ui/vnc-enc-tight.c
+++ b/ui/vnc-enc-tight.c
@@ -330,7 +330,7 @@
     } else {
         errors = tight_detect_smooth_image16(vs, w, h);
     }
-    if (quality != -1) {
+    if (quality != (uint8_t)-1) {
         return (errors < tight_conf[quality].jpeg_threshold);
     }
     return (errors < tight_conf[compression].gradient_threshold);
diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c
index 3f3c47b..68f3d77 100644
--- a/ui/vnc-jobs.c
+++ b/ui/vnc-jobs.c
@@ -252,6 +252,8 @@
 
         if (job->vs->csock == -1) {
             vnc_unlock_display(job->vs->vd);
+            /* Copy persistent encoding data */
+            vnc_async_encoding_end(job->vs, &vs);
             goto disconnected;
         }
 
@@ -278,6 +280,9 @@
         vnc_async_encoding_end(job->vs, &vs);
 
 	qemu_bh_schedule(job->vs->bh);
+    }  else {
+        /* Copy persistent encoding data */
+        vnc_async_encoding_end(job->vs, &vs);
     }
     vnc_unlock_output(job->vs);
 
diff --git a/ui/vnc.c b/ui/vnc.c
index 7dfc94a..9c84b3e 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -417,8 +417,7 @@
    3) resolutions > 1024
 */
 
-static int vnc_update_client(VncState *vs, int has_dirty);
-static int vnc_update_client_sync(VncState *vs, int has_dirty);
+static int vnc_update_client(VncState *vs, int has_dirty, bool sync);
 static void vnc_disconnect_start(VncState *vs);
 
 static void vnc_colordepth(VncState *vs);
@@ -431,29 +430,25 @@
 static void vnc_dpy_update(DisplayChangeListener *dcl,
                            int x, int y, int w, int h)
 {
-    int i;
     VncDisplay *vd = container_of(dcl, VncDisplay, dcl);
     struct VncSurface *s = &vd->guest;
     int width = surface_width(vd->ds);
     int height = surface_height(vd->ds);
 
-    h += y;
-
-    /* round x down to ensure the loop only spans one 16-pixel block per,
-       iteration.  otherwise, if (x % 16) != 0, the last iteration may span
-       two 16-pixel blocks but we only mark the first as dirty
-    */
-    w += (x % 16);
-    x -= (x % 16);
+    /* this is needed this to ensure we updated all affected
+     * blocks if x % VNC_DIRTY_PIXELS_PER_BIT != 0 */
+    w += (x % VNC_DIRTY_PIXELS_PER_BIT);
+    x -= (x % VNC_DIRTY_PIXELS_PER_BIT);
 
     x = MIN(x, width);
     y = MIN(y, height);
     w = MIN(x + w, width) - x;
-    h = MIN(h, height);
+    h = MIN(y + h, height);
 
-    for (; y < h; y++)
-        for (i = 0; i < w; i += 16)
-            set_bit((x + i) / 16, s->dirty[y]);
+    for (; y < h; y++) {
+        bitmap_set(s->dirty[y], x / VNC_DIRTY_PIXELS_PER_BIT,
+                   DIV_ROUND_UP(w, VNC_DIRTY_PIXELS_PER_BIT));
+    }
 }
 
 void vnc_framebuffer_update(VncState *vs, int x, int y, int w, int h,
@@ -571,6 +566,15 @@
     ptr += x * VNC_SERVER_FB_BYTES;
     return ptr;
 }
+/* this sets only the visible pixels of a dirty bitmap */
+#define VNC_SET_VISIBLE_PIXELS_DIRTY(bitmap, w, h) {\
+        int y;\
+        memset(bitmap, 0x00, sizeof(bitmap));\
+        for (y = 0; y < h; y++) {\
+            bitmap_set(bitmap[y], 0,\
+                       DIV_ROUND_UP(w, VNC_DIRTY_PIXELS_PER_BIT));\
+        } \
+    }
 
 static void vnc_dpy_switch(DisplayChangeListener *dcl,
                            DisplaySurface *surface)
@@ -596,7 +600,9 @@
     qemu_pixman_image_unref(vd->guest.fb);
     vd->guest.fb = pixman_image_ref(surface->image);
     vd->guest.format = surface->format;
-    memset(vd->guest.dirty, 0xFF, sizeof(vd->guest.dirty));
+    VNC_SET_VISIBLE_PIXELS_DIRTY(vd->guest.dirty,
+                                 surface_width(vd->ds),
+                                 surface_height(vd->ds));
 
     QTAILQ_FOREACH(vs, &vd->clients, next) {
         vnc_colordepth(vs);
@@ -604,7 +610,9 @@
         if (vs->vd->cursor) {
             vnc_cursor_define(vs);
         }
-        memset(vs->dirty, 0xFF, sizeof(vs->dirty));
+        VNC_SET_VISIBLE_PIXELS_DIRTY(vs->dirty,
+                                     surface_width(vd->ds),
+                                     surface_height(vd->ds));
     }
 }
 
@@ -751,7 +759,7 @@
     QTAILQ_FOREACH_SAFE(vs, &vd->clients, next, vn) {
         if (vnc_has_feature(vs, VNC_FEATURE_COPYRECT)) {
             vs->force_update = 1;
-            vnc_update_client_sync(vs, 1);
+            vnc_update_client(vs, 1, true);
             /* vs might be free()ed here */
         }
     }
@@ -770,11 +778,12 @@
         y = dst_y + h - 1;
         inc = -1;
     }
-    w_lim = w - (16 - (dst_x % 16));
-    if (w_lim < 0)
+    w_lim = w - (VNC_DIRTY_PIXELS_PER_BIT - (dst_x % VNC_DIRTY_PIXELS_PER_BIT));
+    if (w_lim < 0) {
         w_lim = w;
-    else
-        w_lim = w - (w_lim % 16);
+    } else {
+        w_lim = w - (w_lim % VNC_DIRTY_PIXELS_PER_BIT);
+    }
     for (i = 0; i < h; i++) {
         for (x = 0; x <= w_lim;
                 x += s, src_row += cmp_bytes, dst_row += cmp_bytes) {
@@ -782,10 +791,11 @@
                 if ((s = w - w_lim) == 0)
                     break;
             } else if (!x) {
-                s = (16 - (dst_x % 16));
+                s = (VNC_DIRTY_PIXELS_PER_BIT -
+                    (dst_x % VNC_DIRTY_PIXELS_PER_BIT));
                 s = MIN(s, w_lim);
             } else {
-                s = 16;
+                s = VNC_DIRTY_PIXELS_PER_BIT;
             }
             cmp_bytes = s * VNC_SERVER_FB_BYTES;
             if (memcmp(src_row, dst_row, cmp_bytes) == 0)
@@ -793,7 +803,8 @@
             memmove(dst_row, src_row, cmp_bytes);
             QTAILQ_FOREACH(vs, &vd->clients, next) {
                 if (!vnc_has_feature(vs, VNC_FEATURE_COPYRECT)) {
-                    set_bit(((x + dst_x) / 16), vs->dirty[y]);
+                    set_bit(((x + dst_x) / VNC_DIRTY_PIXELS_PER_BIT),
+                            vs->dirty[y]);
                 }
             }
         }
@@ -862,35 +873,24 @@
     int h;
 
     for (h = 1; h < (height - y); h++) {
-        int tmp_x;
         if (!test_bit(last_x, vs->dirty[y + h])) {
             break;
         }
-        for (tmp_x = last_x; tmp_x < x; tmp_x++) {
-            clear_bit(tmp_x, vs->dirty[y + h]);
-        }
+        bitmap_clear(vs->dirty[y + h], last_x, x - last_x);
     }
 
     return h;
 }
 
-static int vnc_update_client_sync(VncState *vs, int has_dirty)
-{
-    int ret = vnc_update_client(vs, has_dirty);
-    vnc_jobs_join(vs);
-    return ret;
-}
-
-static int vnc_update_client(VncState *vs, int has_dirty)
+static int vnc_update_client(VncState *vs, int has_dirty, bool sync)
 {
     if (vs->need_update && vs->csock != -1) {
         VncDisplay *vd = vs->vd;
         VncJob *job;
         int y;
-        int width, height;
+        int height;
         int n = 0;
 
-
         if (vs->output.offset && !vs->audio_cap && !vs->force_update)
             /* kernel send buffers are full -> drop frames to throttle */
             return 0;
@@ -906,33 +906,27 @@
          */
         job = vnc_job_new(vs);
 
-        width = MIN(pixman_image_get_width(vd->server), vs->client_width);
         height = MIN(pixman_image_get_height(vd->server), vs->client_height);
 
-        for (y = 0; y < height; y++) {
-            int x;
-            int last_x = -1;
-            for (x = 0; x < width / 16; x++) {
-                if (test_and_clear_bit(x, vs->dirty[y])) {
-                    if (last_x == -1) {
-                        last_x = x;
-                    }
-                } else {
-                    if (last_x != -1) {
-                        int h = find_and_clear_dirty_height(vs, y, last_x, x,
-                                                            height);
-
-                        n += vnc_job_add_rect(job, last_x * 16, y,
-                                              (x - last_x) * 16, h);
-                    }
-                    last_x = -1;
-                }
+        y = 0;
+        for (;;) {
+            int x, h;
+            unsigned long x2;
+            unsigned long offset = find_next_bit((unsigned long *) &vs->dirty,
+                                                 height * VNC_DIRTY_BPL(vs),
+                                                 y * VNC_DIRTY_BPL(vs));
+            if (offset == height * VNC_DIRTY_BPL(vs)) {
+                /* no more dirty bits */
+                break;
             }
-            if (last_x != -1) {
-                int h = find_and_clear_dirty_height(vs, y, last_x, x, height);
-                n += vnc_job_add_rect(job, last_x * 16, y,
-                                      (x - last_x) * 16, h);
-            }
+            y = offset / VNC_DIRTY_BPL(vs);
+            x = offset % VNC_DIRTY_BPL(vs);
+            x2 = find_next_zero_bit((unsigned long *) &vs->dirty[y],
+                                    VNC_DIRTY_BPL(vs), x);
+            bitmap_clear(vs->dirty[y], x, x2 - x);
+            h = find_and_clear_dirty_height(vs, y, x, x2, height);
+            n += vnc_job_add_rect(job, x * VNC_DIRTY_PIXELS_PER_BIT, y,
+                                  (x2 - x) * VNC_DIRTY_PIXELS_PER_BIT, h);
         }
 
         vnc_job_push(job);
@@ -940,8 +934,11 @@
         return n;
     }
 
-    if (vs->csock == -1)
+    if (vs->csock == -1) {
         vnc_disconnect_finish(vs);
+    } else if (sync) {
+        vnc_jobs_join(vs);
+    }
 
     return 0;
 }
@@ -1846,7 +1843,7 @@
                                        int w, int h)
 {
     int i;
-    const size_t width = surface_width(vs->vd->ds) / 16;
+    const size_t width = surface_width(vs->vd->ds) / VNC_DIRTY_PIXELS_PER_BIT;
     const size_t height = surface_height(vs->vd->ds);
 
     if (y_position > height) {
@@ -2548,7 +2545,9 @@
 
         vs->lossy_rect[sty][stx] = 0;
         for (j = 0; j < VNC_STAT_RECT; ++j) {
-            bitmap_set(vs->dirty[y + j], x / 16, VNC_STAT_RECT / 16);
+            bitmap_set(vs->dirty[y + j],
+                       x / VNC_DIRTY_PIXELS_PER_BIT,
+                       VNC_STAT_RECT / VNC_DIRTY_PIXELS_PER_BIT);
         }
         has_dirty++;
     }
@@ -2652,8 +2651,8 @@
     int width = pixman_image_get_width(vd->guest.fb);
     int height = pixman_image_get_height(vd->guest.fb);
     int y;
-    uint8_t *guest_row;
-    uint8_t *server_row;
+    uint8_t *guest_row0 = NULL, *server_row0;
+    int guest_stride = 0, server_stride;
     int cmp_bytes;
     VncState *vs;
     int has_dirty = 0;
@@ -2671,47 +2670,64 @@
      * Check and copy modified bits from guest to server surface.
      * Update server dirty map.
      */
-    cmp_bytes = 64;
+    cmp_bytes = VNC_DIRTY_PIXELS_PER_BIT * VNC_SERVER_FB_BYTES;
     if (cmp_bytes > vnc_server_fb_stride(vd)) {
         cmp_bytes = vnc_server_fb_stride(vd);
     }
     if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
         int width = pixman_image_get_width(vd->server);
         tmpbuf = qemu_pixman_linebuf_create(VNC_SERVER_FB_FORMAT, width);
+    } else {
+        guest_row0 = (uint8_t *)pixman_image_get_data(vd->guest.fb);
+        guest_stride = pixman_image_get_stride(vd->guest.fb);
     }
-    guest_row = (uint8_t *)pixman_image_get_data(vd->guest.fb);
-    server_row = (uint8_t *)pixman_image_get_data(vd->server);
-    for (y = 0; y < height; y++) {
-        if (!bitmap_empty(vd->guest.dirty[y], VNC_DIRTY_BITS)) {
-            int x;
-            uint8_t *guest_ptr;
-            uint8_t *server_ptr;
+    server_row0 = (uint8_t *)pixman_image_get_data(vd->server);
+    server_stride = pixman_image_get_stride(vd->server);
 
-            if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
-                qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
-                guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
-            } else {
-                guest_ptr = guest_row;
-            }
-            server_ptr = server_row;
-
-            for (x = 0; x + 15 < width;
-                    x += 16, guest_ptr += cmp_bytes, server_ptr += cmp_bytes) {
-                if (!test_and_clear_bit((x / 16), vd->guest.dirty[y]))
-                    continue;
-                if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0)
-                    continue;
-                memcpy(server_ptr, guest_ptr, cmp_bytes);
-                if (!vd->non_adaptive)
-                    vnc_rect_updated(vd, x, y, &tv);
-                QTAILQ_FOREACH(vs, &vd->clients, next) {
-                    set_bit((x / 16), vs->dirty[y]);
-                }
-                has_dirty++;
-            }
+    y = 0;
+    for (;;) {
+        int x;
+        uint8_t *guest_ptr, *server_ptr;
+        unsigned long offset = find_next_bit((unsigned long *) &vd->guest.dirty,
+                                             height * VNC_DIRTY_BPL(&vd->guest),
+                                             y * VNC_DIRTY_BPL(&vd->guest));
+        if (offset == height * VNC_DIRTY_BPL(&vd->guest)) {
+            /* no more dirty bits */
+            break;
         }
-        guest_row  += pixman_image_get_stride(vd->guest.fb);
-        server_row += pixman_image_get_stride(vd->server);
+        y = offset / VNC_DIRTY_BPL(&vd->guest);
+        x = offset % VNC_DIRTY_BPL(&vd->guest);
+
+        server_ptr = server_row0 + y * server_stride + x * cmp_bytes;
+
+        if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
+            qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
+            guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
+        } else {
+            guest_ptr = guest_row0 + y * guest_stride;
+        }
+        guest_ptr += x * cmp_bytes;
+
+        for (; x < DIV_ROUND_UP(width, VNC_DIRTY_PIXELS_PER_BIT);
+             x++, guest_ptr += cmp_bytes, server_ptr += cmp_bytes) {
+            if (!test_and_clear_bit(x, vd->guest.dirty[y])) {
+                continue;
+            }
+            if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
+                continue;
+            }
+            memcpy(server_ptr, guest_ptr, cmp_bytes);
+            if (!vd->non_adaptive) {
+                vnc_rect_updated(vd, x * VNC_DIRTY_PIXELS_PER_BIT,
+                                 y, &tv);
+            }
+            QTAILQ_FOREACH(vs, &vd->clients, next) {
+                set_bit(x, vs->dirty[y]);
+            }
+            has_dirty++;
+        }
+
+        y++;
     }
     qemu_pixman_image_unref(tmpbuf);
     return has_dirty;
@@ -2734,7 +2750,7 @@
     vnc_unlock_display(vd);
 
     QTAILQ_FOREACH_SAFE(vs, &vd->clients, next, vn) {
-        rects += vnc_update_client(vs, has_dirty);
+        rects += vnc_update_client(vs, has_dirty, false);
         /* vs might be free()ed here */
     }
 
@@ -3134,7 +3150,9 @@
             acl = 1;
 #endif
         } else if (strncmp(options, "lossy", 5) == 0) {
+#ifdef CONFIG_VNC_JPEG
             vs->lossy = true;
+#endif
         } else if (strncmp(options, "non-adaptive", 12) == 0) {
             vs->non_adaptive = true;
         } else if (strncmp(options, "share=", 6) == 0) {
@@ -3151,6 +3169,13 @@
         }
     }
 
+    /* adaptive updates are only used with tight encoding and
+     * if lossy updates are enabled so we can disable all the
+     * calculations otherwise */
+    if (!vs->lossy) {
+        vs->non_adaptive = true;
+    }
+
 #ifdef CONFIG_VNC_TLS
     if (acl && x509 && vs->tls.x509verify) {
         if (!(vs->tls.acl = qemu_acl_init("vnc.x509dname"))) {
diff --git a/ui/vnc.h b/ui/vnc.h
index e63c142..8da81b8 100644
--- a/ui/vnc.h
+++ b/ui/vnc.h
@@ -81,8 +81,16 @@
 #define VNC_MAX_WIDTH 2560
 #define VNC_MAX_HEIGHT 2048
 
+/* VNC_DIRTY_PIXELS_PER_BIT is the number of dirty pixels represented
+ * by one bit in the dirty bitmap */
+#define VNC_DIRTY_PIXELS_PER_BIT 16
+
 /* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
-#define VNC_DIRTY_BITS (VNC_MAX_WIDTH / 16)
+#define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
+
+/* VNC_DIRTY_BPL (BPL = bits per line) might be greater than
+ * VNC_DIRTY_BITS due to alignment */
+#define VNC_DIRTY_BPL(x) (sizeof((x)->dirty) / VNC_MAX_HEIGHT * BITS_PER_BYTE)
 
 #define VNC_STAT_RECT  64
 #define VNC_STAT_COLS (VNC_MAX_WIDTH / VNC_STAT_RECT)
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 45113b4..960d7f5 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -420,7 +420,7 @@
     if (err)
         error_exit(err, __func__);
 
-#ifdef _GNU_SOURCE
+#if defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12))
     if (name_threads) {
         pthread_setname_np(thread->thread, name);
     }
diff --git a/vl.c b/vl.c
index c8a5bfa..bca5c95 100644
--- a/vl.c
+++ b/vl.c
@@ -1879,6 +1879,8 @@
 
 void qemu_system_wakeup_request(WakeupReason reason)
 {
+    trace_system_wakeup_request(reason);
+
     if (!runstate_check(RUN_STATE_SUSPENDED)) {
         return;
     }