diff --git a/CODING_STYLE b/CODING_STYLE
index d46cfa5..3c6978f 100644
--- a/CODING_STYLE
+++ b/CODING_STYLE
@@ -87,10 +87,15 @@
 
 5. Declarations
 
-Mixed declarations (interleaving statements and declarations within blocks)
-are not allowed; declarations should be at the beginning of blocks.  In other
-words, the code should not generate warnings if using GCC's
--Wdeclaration-after-statement option.
+Mixed declarations (interleaving statements and declarations within
+blocks) are generally not allowed; declarations should be at the beginning
+of blocks.
+
+Every now and then, an exception is made for declarations inside a
+#ifdef or #ifndef block: if the code looks nicer, such declarations can
+be placed at the top of the block even if there are statements above.
+On the other hand, however, it's often best to move that #ifdef/#ifndef
+block to a separate function altogether.
 
 6. Conditional statements
 
diff --git a/Makefile.target b/Makefile.target
index 3e7aafd..dc32294 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -7,7 +7,7 @@
 include config-devices.mak
 include $(SRC_PATH)/rules.mak
 
-$(call set-vpath, $(SRC_PATH))
+$(call set-vpath, $(SRC_PATH):$(BUILD_DIR))
 ifdef CONFIG_LINUX
 QEMU_CFLAGS += -I../linux-headers
 endif
diff --git a/block/iscsi.c b/block/iscsi.c
index 5002916..93f1ee4 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1214,6 +1214,10 @@
 
     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
         error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
+    } else if (!iscsilun->block_size ||
+               iscsilun->block_size % BDRV_SECTOR_SIZE) {
+        error_setg(errp, "iSCSI: the target returned an invalid "
+                   "block size of %d.", iscsilun->block_size);
     }
     if (task) {
         scsi_free_scsi_task(task);
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 21cc602..735cb40 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -211,8 +211,6 @@
                        abi_ulong new_addr);
 int target_msync(abi_ulong start, abi_ulong len, int flags);
 extern unsigned long last_brk;
-void mmap_lock(void);
-void mmap_unlock(void);
 void cpu_list_lock(void);
 void cpu_list_unlock(void);
 #if defined(CONFIG_USE_NPTL)
diff --git a/configure b/configure
index 5c06663..d7c24cd 100755
--- a/configure
+++ b/configure
@@ -337,6 +337,7 @@
 vhdx=""
 numa=""
 tcmalloc="no"
+jemalloc="no"
 
 # parse CC options first
 for opt do
@@ -1143,6 +1144,10 @@
   ;;
   --enable-tcmalloc) tcmalloc="yes"
   ;;
+  --disable-jemalloc) jemalloc="no"
+  ;;
+  --enable-jemalloc) jemalloc="yes"
+  ;;
   *)
       echo "ERROR: unknown option $opt"
       echo "Try '$0 --help' for more information"
@@ -1367,6 +1372,7 @@
   vhdx            support for the Microsoft VHDX image format
   numa            libnuma support
   tcmalloc        tcmalloc support
+  jemalloc        jemalloc support
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -3395,6 +3401,11 @@
   fi
 fi
 
+if test "$tcmalloc" = "yes" && test "$jemalloc" = "yes" ; then
+    echo "ERROR: tcmalloc && jemalloc can't be used at the same time"
+    exit 1
+fi
+
 ##########################################
 # tcmalloc probe
 
@@ -3412,6 +3423,22 @@
 fi
 
 ##########################################
+# jemalloc probe
+
+if test "$jemalloc" = "yes" ; then
+  cat > $TMPC << EOF
+#include <stdlib.h>
+int main(void) { malloc(1); return 0; }
+EOF
+
+  if compile_prog "" "-ljemalloc" ; then
+    LIBS="-ljemalloc $LIBS"
+  else
+    feature_not_found "jemalloc" "install jemalloc devel"
+  fi
+fi
+
+##########################################
 # signalfd probe
 signalfd="no"
 cat > $TMPC << EOF
@@ -4629,6 +4656,7 @@
 echo "bzip2 support     $bzip2"
 echo "NUMA host support $numa"
 echo "tcmalloc support  $tcmalloc"
+echo "jemalloc support  $jemalloc"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5519,91 +5547,76 @@
 cflags=""
 ldflags=""
 
+disas_config() {
+  echo "CONFIG_${1}_DIS=y" >> $config_target_mak
+  echo "CONFIG_${1}_DIS=y" >> config-all-disas.mak
+}
+
 for i in $ARCH $TARGET_BASE_ARCH ; do
   case "$i" in
   alpha)
-    echo "CONFIG_ALPHA_DIS=y"  >> $config_target_mak
-    echo "CONFIG_ALPHA_DIS=y"  >> config-all-disas.mak
+    disas_config "ALPHA"
   ;;
   aarch64)
     if test -n "${cxx}"; then
-      echo "CONFIG_ARM_A64_DIS=y"  >> $config_target_mak
-      echo "CONFIG_ARM_A64_DIS=y"  >> config-all-disas.mak
+      disas_config "ARM_A64"
     fi
   ;;
   arm)
-    echo "CONFIG_ARM_DIS=y"  >> $config_target_mak
-    echo "CONFIG_ARM_DIS=y"  >> config-all-disas.mak
+    disas_config "ARM"
     if test -n "${cxx}"; then
-      echo "CONFIG_ARM_A64_DIS=y"  >> $config_target_mak
-      echo "CONFIG_ARM_A64_DIS=y"  >> config-all-disas.mak
+      disas_config "ARM_A64"
     fi
   ;;
   cris)
-    echo "CONFIG_CRIS_DIS=y"  >> $config_target_mak
-    echo "CONFIG_CRIS_DIS=y"  >> config-all-disas.mak
+    disas_config "CRIS"
   ;;
   hppa)
-    echo "CONFIG_HPPA_DIS=y"  >> $config_target_mak
-    echo "CONFIG_HPPA_DIS=y"  >> config-all-disas.mak
+    disas_config "HPPA"
   ;;
   i386|x86_64|x32)
-    echo "CONFIG_I386_DIS=y"  >> $config_target_mak
-    echo "CONFIG_I386_DIS=y"  >> config-all-disas.mak
+    disas_config "I386"
   ;;
   ia64*)
-    echo "CONFIG_IA64_DIS=y"  >> $config_target_mak
-    echo "CONFIG_IA64_DIS=y"  >> config-all-disas.mak
+    disas_config "IA64"
   ;;
   lm32)
-    echo "CONFIG_LM32_DIS=y"  >> $config_target_mak
-    echo "CONFIG_LM32_DIS=y"  >> config-all-disas.mak
+    disas_config "LM32"
   ;;
   m68k)
-    echo "CONFIG_M68K_DIS=y"  >> $config_target_mak
-    echo "CONFIG_M68K_DIS=y"  >> config-all-disas.mak
+    disas_config "M68K"
   ;;
   microblaze*)
-    echo "CONFIG_MICROBLAZE_DIS=y"  >> $config_target_mak
-    echo "CONFIG_MICROBLAZE_DIS=y"  >> config-all-disas.mak
+    disas_config "MICROBLAZE"
   ;;
   mips*)
-    echo "CONFIG_MIPS_DIS=y"  >> $config_target_mak
-    echo "CONFIG_MIPS_DIS=y"  >> config-all-disas.mak
+    disas_config "MIPS"
   ;;
   moxie*)
-    echo "CONFIG_MOXIE_DIS=y"  >> $config_target_mak
-    echo "CONFIG_MOXIE_DIS=y"  >> config-all-disas.mak
+    disas_config "MOXIE"
   ;;
   or32)
-    echo "CONFIG_OPENRISC_DIS=y"  >> $config_target_mak
-    echo "CONFIG_OPENRISC_DIS=y"  >> config-all-disas.mak
+    disas_config "OPENRISC"
   ;;
   ppc*)
-    echo "CONFIG_PPC_DIS=y"  >> $config_target_mak
-    echo "CONFIG_PPC_DIS=y"  >> config-all-disas.mak
+    disas_config "PPC"
   ;;
   s390*)
-    echo "CONFIG_S390_DIS=y"  >> $config_target_mak
-    echo "CONFIG_S390_DIS=y"  >> config-all-disas.mak
+    disas_config "S390"
   ;;
   sh4)
-    echo "CONFIG_SH4_DIS=y"  >> $config_target_mak
-    echo "CONFIG_SH4_DIS=y"  >> config-all-disas.mak
+    disas_config "SH4"
   ;;
   sparc*)
-    echo "CONFIG_SPARC_DIS=y"  >> $config_target_mak
-    echo "CONFIG_SPARC_DIS=y"  >> config-all-disas.mak
+    disas_config "SPARC"
   ;;
   xtensa*)
-    echo "CONFIG_XTENSA_DIS=y"  >> $config_target_mak
-    echo "CONFIG_XTENSA_DIS=y"  >> config-all-disas.mak
+    disas_config "XTENSA"
   ;;
   esac
 done
 if test "$tcg_interpreter" = "yes" ; then
-  echo "CONFIG_TCI_DIS=y"  >> $config_target_mak
-  echo "CONFIG_TCI_DIS=y"  >> config-all-disas.mak
+  disas_config "TCI"
 fi
 
 case "$ARCH" in
diff --git a/cpu-exec.c b/cpu-exec.c
index 6b6942d..8945533 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -258,10 +258,10 @@
     tb_free(tb);
 }
 
-static TranslationBlock *tb_find_slow(CPUState *cpu,
-                                      target_ulong pc,
-                                      target_ulong cs_base,
-                                      uint64_t flags)
+static TranslationBlock *tb_find_physical(CPUState *cpu,
+                                          target_ulong pc,
+                                          target_ulong cs_base,
+                                          uint64_t flags)
 {
     CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb, **ptb1;
@@ -278,8 +278,9 @@
     ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
     for(;;) {
         tb = *ptb1;
-        if (!tb)
-            goto not_found;
+        if (!tb) {
+            return NULL;
+        }
         if (tb->pc == pc &&
             tb->page_addr[0] == phys_page1 &&
             tb->cs_base == cs_base &&
@@ -291,25 +292,59 @@
                 virt_page2 = (pc & TARGET_PAGE_MASK) +
                     TARGET_PAGE_SIZE;
                 phys_page2 = get_page_addr_code(env, virt_page2);
-                if (tb->page_addr[1] == phys_page2)
-                    goto found;
+                if (tb->page_addr[1] == phys_page2) {
+                    break;
+                }
             } else {
-                goto found;
+                break;
             }
         }
         ptb1 = &tb->phys_hash_next;
     }
- not_found:
-   /* if no translated code available, then translate it now */
+
+    /* Move the TB to the head of the list */
+    *ptb1 = tb->phys_hash_next;
+    tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
+    tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
+    return tb;
+}
+
+static TranslationBlock *tb_find_slow(CPUState *cpu,
+                                      target_ulong pc,
+                                      target_ulong cs_base,
+                                      uint64_t flags)
+{
+    TranslationBlock *tb;
+
+    tb = tb_find_physical(cpu, pc, cs_base, flags);
+    if (tb) {
+        goto found;
+    }
+
+#ifdef CONFIG_USER_ONLY
+    /* mmap_lock is needed by tb_gen_code, and mmap_lock must be
+     * taken outside tb_lock.  Since we're momentarily dropping
+     * tb_lock, there's a chance that our desired tb has been
+     * translated.
+     */
+    tb_unlock();
+    mmap_lock();
+    tb_lock();
+    tb = tb_find_physical(cpu, pc, cs_base, flags);
+    if (tb) {
+        mmap_unlock();
+        goto found;
+    }
+#endif
+
+    /* if no translated code available, then translate it now */
     tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
 
- found:
-    /* Move the last found TB to the head of the list */
-    if (likely(*ptb1)) {
-        *ptb1 = tb->phys_hash_next;
-        tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
-        tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
-    }
+#ifdef CONFIG_USER_ONLY
+    mmap_unlock();
+#endif
+
+found:
     /* we add the TB in the virtual pc hash table */
     cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
     return tb;
@@ -350,7 +385,8 @@
 
 /* main execution loop */
 
-volatile sig_atomic_t exit_request;
+bool exit_request;
+CPUState *tcg_current_cpu;
 
 int cpu_exec(CPUState *cpu)
 {
@@ -365,9 +401,6 @@
     uintptr_t next_tb;
     SyncClocks sc;
 
-    /* This must be volatile so it is not trashed by longjmp() */
-    volatile bool have_tb_lock = false;
-
     if (cpu->halted) {
         if (!cpu_has_work(cpu)) {
             return EXCP_HALTED;
@@ -377,18 +410,10 @@
     }
 
     current_cpu = cpu;
-
-    /* As long as current_cpu is null, up to the assignment just above,
-     * requests by other threads to exit the execution loop are expected to
-     * be issued using the exit_request global. We must make sure that our
-     * evaluation of the global value is performed past the current_cpu
-     * value transition point, which requires a memory barrier as well as
-     * an instruction scheduling constraint on modern architectures.  */
-    smp_mb();
-
+    atomic_mb_set(&tcg_current_cpu, cpu);
     rcu_read_lock();
 
-    if (unlikely(exit_request)) {
+    if (unlikely(atomic_mb_read(&exit_request))) {
         cpu->exit_request = 1;
     }
 
@@ -484,8 +509,7 @@
                     cpu->exception_index = EXCP_INTERRUPT;
                     cpu_loop_exit(cpu);
                 }
-                spin_lock(&tcg_ctx.tb_ctx.tb_lock);
-                have_tb_lock = true;
+                tb_lock();
                 tb = tb_find_fast(cpu);
                 /* Note: we do it here to avoid a gcc bug on Mac OS X when
                    doing it in tb_find_slow */
@@ -507,20 +531,14 @@
                     tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
                                 next_tb & TB_EXIT_MASK, tb);
                 }
-                have_tb_lock = false;
-                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
-
-                /* cpu_interrupt might be called while translating the
-                   TB, but before it is linked into a potentially
-                   infinite loop and becomes env->current_tb. Avoid
-                   starting execution if there is a pending interrupt. */
-                cpu->current_tb = tb;
-                barrier();
+                tb_unlock();
                 if (likely(!cpu->exit_request)) {
                     trace_exec_tb(tb, tb->pc);
                     tc_ptr = tb->tc_ptr;
                     /* execute the generated code */
+                    cpu->current_tb = tb;
                     next_tb = cpu_tb_exec(cpu, tc_ptr);
+                    cpu->current_tb = NULL;
                     switch (next_tb & TB_EXIT_MASK) {
                     case TB_EXIT_REQUESTED:
                         /* Something asked us to stop executing
@@ -528,8 +546,12 @@
                          * loop. Whatever requested the exit will also
                          * have set something else (eg exit_request or
                          * interrupt_request) which we will handle
-                         * next time around the loop.
+                         * next time around the loop.  But we need to
+                         * ensure the tcg_exit_req read in generated code
+                         * comes before the next read of cpu->exit_request
+                         * or cpu->interrupt_request.
                          */
+                        smp_rmb();
                         next_tb = 0;
                         break;
                     case TB_EXIT_ICOUNT_EXPIRED:
@@ -559,7 +581,6 @@
                         break;
                     }
                 }
-                cpu->current_tb = NULL;
                 /* Try to align the host and virtual clocks
                    if the guest is in advance */
                 align_clocks(&sc, cpu);
@@ -576,10 +597,7 @@
             x86_cpu = X86_CPU(cpu);
             env = &x86_cpu->env;
 #endif
-            if (have_tb_lock) {
-                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
-                have_tb_lock = false;
-            }
+            tb_lock_reset();
         }
     } /* for(;;) */
 
@@ -588,5 +606,8 @@
 
     /* fail safe : never use current_cpu outside cpu_exec() */
     current_cpu = NULL;
+
+    /* Does not need atomic_mb_set because a spurious wakeup is okay.  */
+    atomic_set(&tcg_current_cpu, NULL);
     return ret;
 }
diff --git a/cpus.c b/cpus.c
index c1e74d9..dddd056 100644
--- a/cpus.c
+++ b/cpus.c
@@ -661,14 +661,6 @@
     cpu->stopped = true;
 }
 
-static void cpu_signal(int sig)
-{
-    if (current_cpu) {
-        cpu_exit(current_cpu);
-    }
-    exit_request = 1;
-}
-
 #ifdef CONFIG_LINUX
 static void sigbus_reraise(void)
 {
@@ -781,29 +773,11 @@
     }
 }
 
-static void qemu_tcg_init_cpu_signals(void)
-{
-    sigset_t set;
-    struct sigaction sigact;
-
-    memset(&sigact, 0, sizeof(sigact));
-    sigact.sa_handler = cpu_signal;
-    sigaction(SIG_IPI, &sigact, NULL);
-
-    sigemptyset(&set);
-    sigaddset(&set, SIG_IPI);
-    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-}
-
 #else /* _WIN32 */
 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
 {
     abort();
 }
-
-static void qemu_tcg_init_cpu_signals(void)
-{
-}
 #endif /* _WIN32 */
 
 static QemuMutex qemu_global_mutex;
@@ -812,9 +786,6 @@
 
 static QemuThread io_thread;
 
-static QemuThread *tcg_cpu_thread;
-static QemuCond *tcg_halt_cond;
-
 /* cpu creation */
 static QemuCond qemu_cpu_cond;
 /* system init */
@@ -845,6 +816,8 @@
     wi.func = func;
     wi.data = data;
     wi.free = false;
+
+    qemu_mutex_lock(&cpu->work_mutex);
     if (cpu->queued_work_first == NULL) {
         cpu->queued_work_first = &wi;
     } else {
@@ -853,9 +826,10 @@
     cpu->queued_work_last = &wi;
     wi.next = NULL;
     wi.done = false;
+    qemu_mutex_unlock(&cpu->work_mutex);
 
     qemu_cpu_kick(cpu);
-    while (!wi.done) {
+    while (!atomic_mb_read(&wi.done)) {
         CPUState *self_cpu = current_cpu;
 
         qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
@@ -876,6 +850,8 @@
     wi->func = func;
     wi->data = data;
     wi->free = true;
+
+    qemu_mutex_lock(&cpu->work_mutex);
     if (cpu->queued_work_first == NULL) {
         cpu->queued_work_first = wi;
     } else {
@@ -884,6 +860,7 @@
     cpu->queued_work_last = wi;
     wi->next = NULL;
     wi->done = false;
+    qemu_mutex_unlock(&cpu->work_mutex);
 
     qemu_cpu_kick(cpu);
 }
@@ -896,15 +873,23 @@
         return;
     }
 
-    while ((wi = cpu->queued_work_first)) {
+    qemu_mutex_lock(&cpu->work_mutex);
+    while (cpu->queued_work_first != NULL) {
+        wi = cpu->queued_work_first;
         cpu->queued_work_first = wi->next;
+        if (!cpu->queued_work_first) {
+            cpu->queued_work_last = NULL;
+        }
+        qemu_mutex_unlock(&cpu->work_mutex);
         wi->func(wi->data);
-        wi->done = true;
+        qemu_mutex_lock(&cpu->work_mutex);
         if (wi->free) {
             g_free(wi);
+        } else {
+            atomic_mb_set(&wi->done, true);
         }
     }
-    cpu->queued_work_last = NULL;
+    qemu_mutex_unlock(&cpu->work_mutex);
     qemu_cond_broadcast(&qemu_work_cond);
 }
 
@@ -919,15 +904,13 @@
     cpu->thread_kicked = false;
 }
 
-static void qemu_tcg_wait_io_event(void)
+static void qemu_tcg_wait_io_event(CPUState *cpu)
 {
-    CPUState *cpu;
-
     while (all_cpu_threads_idle()) {
        /* Start accounting real time to the virtual clock if the CPUs
           are idle.  */
         qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
-        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
+        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
     }
 
     while (iothread_requesting_mutex) {
@@ -1041,7 +1024,6 @@
     rcu_register_thread();
 
     qemu_mutex_lock_iothread();
-    qemu_tcg_init_cpu_signals();
     qemu_thread_get_self(cpu->thread);
 
     CPU_FOREACH(cpu) {
@@ -1053,7 +1035,7 @@
 
     /* wait for initial kick-off after machine start */
     while (first_cpu->stopped) {
-        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
+        qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
 
         /* process any pending work */
         CPU_FOREACH(cpu) {
@@ -1062,7 +1044,7 @@
     }
 
     /* process any pending work */
-    exit_request = 1;
+    atomic_mb_set(&exit_request, 1);
 
     while (1) {
         tcg_exec_all();
@@ -1074,7 +1056,7 @@
                 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
             }
         }
-        qemu_tcg_wait_io_event();
+        qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
     }
 
     return NULL;
@@ -1085,61 +1067,47 @@
 #ifndef _WIN32
     int err;
 
+    if (cpu->thread_kicked) {
+        return;
+    }
+    cpu->thread_kicked = true;
     err = pthread_kill(cpu->thread->thread, SIG_IPI);
     if (err) {
         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
         exit(1);
     }
 #else /* _WIN32 */
-    if (!qemu_cpu_is_self(cpu)) {
-        CONTEXT tcgContext;
-
-        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
-            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
-                    GetLastError());
-            exit(1);
-        }
-
-        /* On multi-core systems, we are not sure that the thread is actually
-         * suspended until we can get the context.
-         */
-        tcgContext.ContextFlags = CONTEXT_CONTROL;
-        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
-            continue;
-        }
-
-        cpu_signal(0);
-
-        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
-            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
-                    GetLastError());
-            exit(1);
-        }
-    }
+    abort();
 #endif
 }
 
+static void qemu_cpu_kick_no_halt(void)
+{
+    CPUState *cpu;
+    /* Ensure whatever caused the exit has reached the CPU threads before
+     * writing exit_request.
+     */
+    atomic_mb_set(&exit_request, 1);
+    cpu = atomic_mb_read(&tcg_current_cpu);
+    if (cpu) {
+        cpu_exit(cpu);
+    }
+}
+
 void qemu_cpu_kick(CPUState *cpu)
 {
     qemu_cond_broadcast(cpu->halt_cond);
-    if (!tcg_enabled() && !cpu->thread_kicked) {
+    if (tcg_enabled()) {
+        qemu_cpu_kick_no_halt();
+    } else {
         qemu_cpu_kick_thread(cpu);
-        cpu->thread_kicked = true;
     }
 }
 
 void qemu_cpu_kick_self(void)
 {
-#ifndef _WIN32
     assert(current_cpu);
-
-    if (!current_cpu->thread_kicked) {
-        qemu_cpu_kick_thread(current_cpu);
-        current_cpu->thread_kicked = true;
-    }
-#else
-    abort();
-#endif
+    qemu_cpu_kick_thread(current_cpu);
 }
 
 bool qemu_cpu_is_self(CPUState *cpu)
@@ -1166,12 +1134,12 @@
      * TCG code execution.
      */
     if (!tcg_enabled() || qemu_in_vcpu_thread() ||
-        !first_cpu || !first_cpu->thread) {
+        !first_cpu || !first_cpu->created) {
         qemu_mutex_lock(&qemu_global_mutex);
         atomic_dec(&iothread_requesting_mutex);
     } else {
         if (qemu_mutex_trylock(&qemu_global_mutex)) {
-            qemu_cpu_kick_thread(first_cpu);
+            qemu_cpu_kick_no_halt();
             qemu_mutex_lock(&qemu_global_mutex);
         }
         atomic_dec(&iothread_requesting_mutex);
@@ -1251,6 +1219,8 @@
 static void qemu_tcg_init_vcpu(CPUState *cpu)
 {
     char thread_name[VCPU_THREAD_NAME_SIZE];
+    static QemuCond *tcg_halt_cond;
+    static QemuThread *tcg_cpu_thread;
 
     tcg_cpu_address_space_init(cpu, cpu->as);
 
@@ -1440,7 +1410,9 @@
             break;
         }
     }
-    exit_request = 0;
+
+    /* Pairs with smp_wmb in qemu_cpu_kick.  */
+    atomic_mb_set(&exit_request, 0);
 }
 
 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
diff --git a/exec.c b/exec.c
index 31d2dc7..b61126c 100644
--- a/exec.c
+++ b/exec.c
@@ -90,7 +90,7 @@
 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
 /* current CPU in the current thread. It is only valid inside
    cpu_exec() */
-DEFINE_TLS(CPUState *, current_cpu);
+__thread CPUState *current_cpu;
 /* 0 = Do not count executed instructions.
    1 = Precise instruction counting.
    2 = Adaptive rate instruction counting.  */
diff --git a/gdbstub.c b/gdbstub.c
index eee9b25..d2c95b5 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1359,7 +1359,7 @@
        is still in the running state, which can cause packets to be dropped
        and state transition 'T' packets to be sent while the syscall is still
        being processed.  */
-    cpu_exit(s->c_cpu);
+    qemu_cpu_kick(s->c_cpu);
 #endif
 }
 
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
index 6d157c9..86fde42 100644
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -63,6 +63,11 @@
         object_initialize(&s->i2c[i], sizeof(s->i2c[i]), TYPE_IMX_I2C);
         qdev_set_parent_bus(DEVICE(&s->i2c[i]), sysbus_get_default());
     }
+
+    for (i = 0; i < FSL_IMX25_NUM_GPIOS; i++) {
+        object_initialize(&s->gpio[i], sizeof(s->gpio[i]), TYPE_IMX_GPIO);
+        qdev_set_parent_bus(DEVICE(&s->gpio[i]), sysbus_get_default());
+    }
 }
 
 static void fsl_imx25_realize(DeviceState *dev, Error **errp)
@@ -214,6 +219,30 @@
                                             i2c_table[i].irq));
     }
 
+    /* Initialize all GPIOs */
+    for (i = 0; i < FSL_IMX25_NUM_GPIOS; i++) {
+        static const struct {
+            hwaddr addr;
+            unsigned int irq;
+        } gpio_table[FSL_IMX25_NUM_GPIOS] = {
+            { FSL_IMX25_GPIO1_ADDR, FSL_IMX25_GPIO1_IRQ },
+            { FSL_IMX25_GPIO2_ADDR, FSL_IMX25_GPIO2_IRQ },
+            { FSL_IMX25_GPIO3_ADDR, FSL_IMX25_GPIO3_IRQ },
+            { FSL_IMX25_GPIO4_ADDR, FSL_IMX25_GPIO4_IRQ }
+        };
+
+        object_property_set_bool(OBJECT(&s->gpio[i]), true, "realized", &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->gpio[i]), 0, gpio_table[i].addr);
+        /* Connect GPIO IRQ to PIC */
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio[i]), 0,
+                           qdev_get_gpio_in(DEVICE(&s->avic),
+                                            gpio_table[i].irq));
+    }
+
     /* initialize 2 x 16 KB ROM */
     memory_region_init_rom_device(&s->rom[0], NULL, NULL, NULL,
                                   "imx25.rom0", FSL_IMX25_ROM0_SIZE, &err);
diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c
index 87548c8..8e1ed48 100644
--- a/hw/arm/fsl-imx31.c
+++ b/hw/arm/fsl-imx31.c
@@ -55,6 +55,11 @@
         object_initialize(&s->i2c[i], sizeof(s->i2c[i]), TYPE_IMX_I2C);
         qdev_set_parent_bus(DEVICE(&s->i2c[i]), sysbus_get_default());
     }
+
+    for (i = 0; i < FSL_IMX31_NUM_GPIOS; i++) {
+        object_initialize(&s->gpio[i], sizeof(s->gpio[i]), TYPE_IMX_GPIO);
+        qdev_set_parent_bus(DEVICE(&s->gpio[i]), sysbus_get_default());
+    }
 }
 
 static void fsl_imx31_realize(DeviceState *dev, Error **errp)
@@ -184,6 +189,31 @@
                                             i2c_table[i].irq));
     }
 
+    /* Initialize all GPIOs */
+    for (i = 0; i < FSL_IMX31_NUM_GPIOS; i++) {
+        static const struct {
+            hwaddr addr;
+            unsigned int irq;
+        } gpio_table[FSL_IMX31_NUM_GPIOS] = {
+            { FSL_IMX31_GPIO1_ADDR, FSL_IMX31_GPIO1_IRQ },
+            { FSL_IMX31_GPIO2_ADDR, FSL_IMX31_GPIO2_IRQ },
+            { FSL_IMX31_GPIO3_ADDR, FSL_IMX31_GPIO3_IRQ }
+        };
+
+        object_property_set_bool(OBJECT(&s->gpio[i]), false, "has-edge-sel",
+                                 &error_abort);
+        object_property_set_bool(OBJECT(&s->gpio[i]), true, "realized", &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->gpio[i]), 0, gpio_table[i].addr);
+        /* Connect GPIO IRQ to PIC */
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio[i]), 0,
+                           qdev_get_gpio_in(DEVICE(&s->avic),
+                                            gpio_table[i].irq));
+    }
+
     /* On a real system, the first 16k is a `secure boot rom' */
     memory_region_init_rom_device(&s->secure_rom, NULL, NULL, NULL,
                                   "imx31.secure_rom",
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 2955f3b..2185542 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -128,7 +128,7 @@
     qdev_prop_set_uint32(DEVICE(&s->gic), "num-cpu", XLNX_ZYNQMP_NUM_APU_CPUS);
     object_property_set_bool(OBJECT(&s->gic), true, "realized", &err);
     if (err) {
-        error_propagate((errp), (err));
+        error_propagate(errp, err);
         return;
     }
     assert(ARRAY_SIZE(xlnx_zynqmp_gic_regions) == XLNX_ZYNQMP_GIC_REGIONS);
@@ -173,7 +173,7 @@
         object_property_set_bool(OBJECT(&s->apu_cpu[i]), true, "realized",
                                  &err);
         if (err) {
-            error_propagate((errp), (err));
+            error_propagate(errp, err);
             return;
         }
 
@@ -206,7 +206,7 @@
         object_property_set_bool(OBJECT(&s->rpu_cpu[i]), true, "realized",
                                  &err);
         if (err) {
-            error_propagate((errp), (err));
+            error_propagate(errp, err);
             return;
         }
     }
@@ -229,7 +229,7 @@
         }
         object_property_set_bool(OBJECT(&s->gem[i]), true, "realized", &err);
         if (err) {
-            error_propagate((errp), (err));
+            error_propagate(errp, err);
             return;
         }
         sysbus_mmio_map(SYS_BUS_DEVICE(&s->gem[i]), 0, gem_addr[i]);
@@ -240,7 +240,7 @@
     for (i = 0; i < XLNX_ZYNQMP_NUM_UARTS; i++) {
         object_property_set_bool(OBJECT(&s->uart[i]), true, "realized", &err);
         if (err) {
-            error_propagate((errp), (err));
+            error_propagate(errp, err);
             return;
         }
         sysbus_mmio_map(SYS_BUS_DEVICE(&s->uart[i]), 0, uart_addr[i]);
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 5e1b67e..6686a72 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -1417,7 +1417,7 @@
                  * recall us...
                  */
                 DMA_hold_DREQ(fdctrl->dma_chann);
-                DMA_schedule(fdctrl->dma_chann);
+                DMA_schedule();
             } else {
                 /* Start transfer */
                 fdctrl_transfer_handler(fdctrl, fdctrl->dma_chann, 0,
diff --git a/hw/cpu/a15mpcore.c b/hw/cpu/a15mpcore.c
index 4ef8db1..94e8cc1 100644
--- a/hw/cpu/a15mpcore.c
+++ b/hw/cpu/a15mpcore.c
@@ -64,7 +64,7 @@
          * either all the CPUs have TZ, or none do.
          */
         cpuobj = OBJECT(qemu_get_cpu(0));
-        has_el3 = object_property_find(cpuobj, "has_el3", &error_abort) &&
+        has_el3 = object_property_find(cpuobj, "has_el3", NULL) &&
             object_property_get_bool(cpuobj, "has_el3", &error_abort);
         qdev_prop_set_bit(gicdev, "has-security-extensions", has_el3);
     }
diff --git a/hw/cpu/a9mpcore.c b/hw/cpu/a9mpcore.c
index 7046246..869818c 100644
--- a/hw/cpu/a9mpcore.c
+++ b/hw/cpu/a9mpcore.c
@@ -69,7 +69,7 @@
      * either all the CPUs have TZ, or none do.
      */
     cpuobj = OBJECT(qemu_get_cpu(0));
-    has_el3 = object_property_find(cpuobj, "has_el3", &error_abort) &&
+    has_el3 = object_property_find(cpuobj, "has_el3", NULL) &&
         object_property_get_bool(cpuobj, "has_el3", &error_abort);
     qdev_prop_set_bit(gicdev, "has-security-extensions", has_el3);
 
diff --git a/hw/dma/i82374.c b/hw/dma/i82374.c
index b8ad2e6..f630971 100644
--- a/hw/dma/i82374.c
+++ b/hw/dma/i82374.c
@@ -38,7 +38,6 @@
 
 typedef struct I82374State {
     uint8_t commands[8];
-    qemu_irq out;
     PortioList port_list;
 } I82374State;
 
@@ -101,7 +100,7 @@
 
 static void i82374_realize(I82374State *s, Error **errp)
 {
-    DMA_init(1, &s->out);
+    DMA_init(1);
     memset(s->commands, 0, sizeof(s->commands));
 }
 
@@ -145,8 +144,6 @@
                     isa->iobase);
 
     i82374_realize(s, errp);
-
-    qdev_init_gpio_out(dev, &s->out, 1);
 }
 
 static Property i82374_properties[] = {
diff --git a/hw/dma/i8257.c b/hw/dma/i8257.c
index a414029..1398424 100644
--- a/hw/dma/i8257.c
+++ b/hw/dma/i8257.c
@@ -59,7 +59,6 @@
     uint8_t flip_flop;
     int dshift;
     struct dma_regs regs[4];
-    qemu_irq *cpu_request_exit;
     MemoryRegion channel_io;
     MemoryRegion cont_io;
 } dma_controllers[2];
@@ -358,6 +357,7 @@
 }
 
 static QEMUBH *dma_bh;
+static bool dma_bh_scheduled;
 
 static void DMA_run (void)
 {
@@ -390,12 +390,15 @@
 
     running = 0;
 out:
-    if (rearm)
+    if (rearm) {
         qemu_bh_schedule_idle(dma_bh);
+        dma_bh_scheduled = true;
+    }
 }
 
 static void DMA_run_bh(void *unused)
 {
+    dma_bh_scheduled = false;
     DMA_run();
 }
 
@@ -458,12 +461,14 @@
     return len;
 }
 
-/* request the emulator to transfer a new DMA memory block ASAP */
-void DMA_schedule(int nchan)
+/* request the emulator to transfer a new DMA memory block ASAP (even
+ * if the idle bottom half would not have exited the iothread yet).
+ */
+void DMA_schedule(void)
 {
-    struct dma_cont *d = &dma_controllers[nchan > 3];
-
-    qemu_irq_pulse(*d->cpu_request_exit);
+    if (dma_bh_scheduled) {
+        qemu_notify_event();
+    }
 }
 
 static void dma_reset(void *opaque)
@@ -515,13 +520,11 @@
 
 /* dshift = 0: 8 bit DMA, 1 = 16 bit DMA */
 static void dma_init2(struct dma_cont *d, int base, int dshift,
-                      int page_base, int pageh_base,
-                      qemu_irq *cpu_request_exit)
+                      int page_base, int pageh_base)
 {
     int i;
 
     d->dshift = dshift;
-    d->cpu_request_exit = cpu_request_exit;
 
     memory_region_init_io(&d->channel_io, NULL, &channel_io_ops, d,
                           "dma-chan", 8 << d->dshift);
@@ -585,12 +588,10 @@
     }
 };
 
-void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit)
+void DMA_init(int high_page_enable)
 {
-    dma_init2(&dma_controllers[0], 0x00, 0, 0x80,
-              high_page_enable ? 0x480 : -1, cpu_request_exit);
-    dma_init2(&dma_controllers[1], 0xc0, 1, 0x88,
-              high_page_enable ? 0x488 : -1, cpu_request_exit);
+    dma_init2(&dma_controllers[0], 0x00, 0, 0x80, high_page_enable ? 0x480 : -1);
+    dma_init2(&dma_controllers[1], 0xc0, 1, 0x88, high_page_enable ? 0x488 : -1);
     vmstate_register (NULL, 0, &vmstate_dma, &dma_controllers[0]);
     vmstate_register (NULL, 1, &vmstate_dma, &dma_controllers[1]);
 
diff --git a/hw/gpio/Makefile.objs b/hw/gpio/Makefile.objs
index 1abcf17..52233f7 100644
--- a/hw/gpio/Makefile.objs
+++ b/hw/gpio/Makefile.objs
@@ -5,3 +5,4 @@
 common-obj-$(CONFIG_E500) += mpc8xxx.o
 
 obj-$(CONFIG_OMAP) += omap_gpio.o
+obj-$(CONFIG_IMX) += imx_gpio.o
diff --git a/hw/gpio/imx_gpio.c b/hw/gpio/imx_gpio.c
new file mode 100644
index 0000000..d56ffcd
--- /dev/null
+++ b/hw/gpio/imx_gpio.c
@@ -0,0 +1,340 @@
+/*
+ * i.MX processors GPIO emulation.
+ *
+ * Copyright (C) 2015 Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/gpio/imx_gpio.h"
+
+#ifndef DEBUG_IMX_GPIO
+#define DEBUG_IMX_GPIO 0
+#endif
+
+typedef enum IMXGPIOLevel {
+    IMX_GPIO_LEVEL_LOW = 0,
+    IMX_GPIO_LEVEL_HIGH = 1,
+} IMXGPIOLevel;
+
+#define DPRINTF(fmt, args...) \
+          do { \
+              if (DEBUG_IMX_GPIO) { \
+                  fprintf(stderr, "%s: " fmt , __func__, ##args); \
+              } \
+          } while (0)
+
+static const char *imx_gpio_reg_name(uint32_t reg)
+{
+    switch (reg) {
+    case DR_ADDR:
+        return "DR";
+    case GDIR_ADDR:
+        return "GDIR";
+    case PSR_ADDR:
+        return "PSR";
+    case ICR1_ADDR:
+        return "ICR1";
+    case ICR2_ADDR:
+        return "ICR2";
+    case IMR_ADDR:
+        return "IMR";
+    case ISR_ADDR:
+        return "ISR";
+    case EDGE_SEL_ADDR:
+        return "EDGE_SEL";
+    default:
+        return "[?]";
+    }
+}
+
+static void imx_gpio_update_int(IMXGPIOState *s)
+{
+    qemu_set_irq(s->irq, (s->isr & s->imr) ? 1 : 0);
+}
+
+static void imx_gpio_set_int_line(IMXGPIOState *s, int line, IMXGPIOLevel level)
+{
+    /* if this signal isn't configured as an input signal, nothing to do */
+    if (!extract32(s->gdir, line, 1)) {
+        return;
+    }
+
+    /* When set, EDGE_SEL overrides the ICR config */
+    if (extract32(s->edge_sel, line, 1)) {
+        /* we detect interrupt on rising and falling edge */
+        if (extract32(s->psr, line, 1) != level) {
+            /* level changed */
+            s->isr = deposit32(s->isr, line, 1, 1);
+        }
+    } else if (extract64(s->icr, 2*line + 1, 1)) {
+        /* interrupt is edge sensitive */
+        if (extract32(s->psr, line, 1) != level) {
+            /* level changed */
+            if (extract64(s->icr, 2*line, 1) != level) {
+                s->isr = deposit32(s->isr, line, 1, 1);
+            }
+        }
+    } else {
+        /* interrupt is level sensitive */
+        if (extract64(s->icr, 2*line, 1) == level) {
+            s->isr = deposit32(s->isr, line, 1, 1);
+        }
+    }
+}
+
+static void imx_gpio_set(void *opaque, int line, int level)
+{
+    IMXGPIOState *s = IMX_GPIO(opaque);
+    IMXGPIOLevel imx_level = level ? IMX_GPIO_LEVEL_HIGH : IMX_GPIO_LEVEL_LOW;
+
+    imx_gpio_set_int_line(s, line, imx_level);
+
+    /* this is an input signal, so set PSR */
+    s->psr = deposit32(s->psr, line, 1, imx_level);
+
+    imx_gpio_update_int(s);
+}
+
+static void imx_gpio_set_all_int_lines(IMXGPIOState *s)
+{
+    int i;
+
+    for (i = 0; i < IMX_GPIO_PIN_COUNT; i++) {
+        IMXGPIOLevel imx_level = extract32(s->psr, i, 1);
+        imx_gpio_set_int_line(s, i, imx_level);
+    }
+
+    imx_gpio_update_int(s);
+}
+
+static inline void imx_gpio_set_all_output_lines(IMXGPIOState *s)
+{
+    int i;
+
+    for (i = 0; i < IMX_GPIO_PIN_COUNT; i++) {
+        /*
+         * if the line is set as output, then forward the line
+         * level to its user.
+         */
+        if (extract32(s->gdir, i, 1) && s->output[i]) {
+            qemu_set_irq(s->output[i], extract32(s->dr, i, 1));
+        }
+    }
+}
+
+static uint64_t imx_gpio_read(void *opaque, hwaddr offset, unsigned size)
+{
+    IMXGPIOState *s = IMX_GPIO(opaque);
+    uint32_t reg_value = 0;
+
+    switch (offset) {
+    case DR_ADDR:
+        /*
+         * depending on the "line" configuration, the bit values
+         * are coming either from DR or PSR
+         */
+        reg_value = (s->dr & s->gdir) | (s->psr & ~s->gdir);
+        break;
+
+    case GDIR_ADDR:
+        reg_value = s->gdir;
+        break;
+
+    case PSR_ADDR:
+        reg_value = s->psr & ~s->gdir;
+        break;
+
+    case ICR1_ADDR:
+        reg_value = extract64(s->icr, 0, 32);
+        break;
+
+    case ICR2_ADDR:
+        reg_value = extract64(s->icr, 32, 32);
+        break;
+
+    case IMR_ADDR:
+        reg_value = s->imr;
+        break;
+
+    case ISR_ADDR:
+        reg_value = s->isr;
+        break;
+
+    case EDGE_SEL_ADDR:
+        if (s->has_edge_sel) {
+            reg_value = s->edge_sel;
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: EDGE_SEL register not "
+                          "present on this version of GPIO device\n",
+                          TYPE_IMX_GPIO, __func__);
+        }
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad register at offset %d\n",
+                      TYPE_IMX_GPIO, __func__, (int)offset);
+        break;
+    }
+
+    DPRINTF("(%s) = 0x%"PRIx32"\n", imx_gpio_reg_name(offset), reg_value);
+
+    return reg_value;
+}
+
+static void imx_gpio_write(void *opaque, hwaddr offset, uint64_t value,
+                           unsigned size)
+{
+    IMXGPIOState *s = IMX_GPIO(opaque);
+
+    DPRINTF("(%s, value = 0x%"PRIx32")\n", imx_gpio_reg_name(offset),
+            (uint32_t)value);
+
+    switch (offset) {
+    case DR_ADDR:
+        s->dr = value;
+        imx_gpio_set_all_output_lines(s);
+        break;
+
+    case GDIR_ADDR:
+        s->gdir = value;
+        imx_gpio_set_all_output_lines(s);
+        imx_gpio_set_all_int_lines(s);
+        break;
+
+    case ICR1_ADDR:
+        s->icr = deposit64(s->icr, 0, 32, value);
+        imx_gpio_set_all_int_lines(s);
+        break;
+
+    case ICR2_ADDR:
+        s->icr = deposit64(s->icr, 32, 32, value);
+        imx_gpio_set_all_int_lines(s);
+        break;
+
+    case IMR_ADDR:
+        s->imr = value;
+        imx_gpio_update_int(s);
+        break;
+
+    case ISR_ADDR:
+        s->isr |= ~value;
+        imx_gpio_set_all_int_lines(s);
+        break;
+
+    case EDGE_SEL_ADDR:
+        if (s->has_edge_sel) {
+            s->edge_sel = value;
+            imx_gpio_set_all_int_lines(s);
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: EDGE_SEL register not "
+                          "present on this version of GPIO device\n",
+                          TYPE_IMX_GPIO, __func__);
+        }
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad register at offset %d\n",
+                      TYPE_IMX_GPIO, __func__, (int)offset);
+        break;
+    }
+
+    return;
+}
+
+static const MemoryRegionOps imx_gpio_ops = {
+    .read = imx_gpio_read,
+    .write = imx_gpio_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_imx_gpio = {
+    .name = TYPE_IMX_GPIO,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(dr, IMXGPIOState),
+        VMSTATE_UINT32(gdir, IMXGPIOState),
+        VMSTATE_UINT32(psr, IMXGPIOState),
+        VMSTATE_UINT64(icr, IMXGPIOState),
+        VMSTATE_UINT32(imr, IMXGPIOState),
+        VMSTATE_UINT32(isr, IMXGPIOState),
+        VMSTATE_BOOL(has_edge_sel, IMXGPIOState),
+        VMSTATE_UINT32(edge_sel, IMXGPIOState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property imx_gpio_properties[] = {
+    DEFINE_PROP_BOOL("has-edge-sel", IMXGPIOState, has_edge_sel, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void imx_gpio_reset(DeviceState *dev)
+{
+    IMXGPIOState *s = IMX_GPIO(dev);
+
+    s->dr       = 0;
+    s->gdir     = 0;
+    s->psr      = 0;
+    s->icr      = 0;
+    s->imr      = 0;
+    s->isr      = 0;
+    s->edge_sel = 0;
+
+    imx_gpio_set_all_output_lines(s);
+    imx_gpio_update_int(s);
+}
+
+static void imx_gpio_realize(DeviceState *dev, Error **errp)
+{
+    IMXGPIOState *s = IMX_GPIO(dev);
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &imx_gpio_ops, s,
+                          TYPE_IMX_GPIO, IMX_GPIO_MEM_SIZE);
+
+    qdev_init_gpio_in(DEVICE(s), imx_gpio_set, IMX_GPIO_PIN_COUNT);
+    qdev_init_gpio_out(DEVICE(s), s->output, IMX_GPIO_PIN_COUNT);
+
+    sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem);
+}
+
+static void imx_gpio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = imx_gpio_realize;
+    dc->reset = imx_gpio_reset;
+    dc->props = imx_gpio_properties;
+    dc->vmsd = &vmstate_imx_gpio;
+    dc->desc = "i.MX GPIO controller";
+}
+
+static const TypeInfo imx_gpio_info = {
+    .name = TYPE_IMX_GPIO,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IMXGPIOState),
+    .class_init = imx_gpio_class_init,
+};
+
+static void imx_gpio_register_types(void)
+{
+    type_register_static(&imx_gpio_info);
+}
+
+type_init(imx_gpio_register_types)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index b5107f7..56aecce 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1452,15 +1452,6 @@
     return dev;
 }
 
-static void cpu_request_exit(void *opaque, int irq, int level)
-{
-    CPUState *cpu = current_cpu;
-
-    if (cpu && level) {
-        cpu_exit(cpu);
-    }
-}
-
 static const MemoryRegionOps ioport80_io_ops = {
     .write = ioport80_write,
     .read = ioport80_read,
@@ -1495,7 +1486,6 @@
     qemu_irq rtc_irq = NULL;
     qemu_irq *a20_line;
     ISADevice *i8042, *port92, *vmmouse, *pit = NULL;
-    qemu_irq *cpu_exit_irq;
     MemoryRegion *ioport80_io = g_new(MemoryRegion, 1);
     MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1);
 
@@ -1572,8 +1562,7 @@
     port92 = isa_create_simple(isa_bus, "port92");
     port92_init(port92, &a20_line[1]);
 
-    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
-    DMA_init(0, cpu_exit_irq);
+    DMA_init(0);
 
     for(i = 0; i < MAX_FD; i++) {
         fd[i] = drive_get(IF_FLOPPY, 0, i);
diff --git a/hw/isa/i82378.c b/hw/isa/i82378.c
index fcf97d8..d4c8306 100644
--- a/hw/isa/i82378.c
+++ b/hw/isa/i82378.c
@@ -100,7 +100,6 @@
 
     /* 2 82C37 (dma) */
     isa = isa_create_simple(isabus, "i82374");
-    qdev_connect_gpio_out(DEVICE(isa), 0, s->out[1]);
 
     /* timer */
     isa_create_simple(isabus, "mc146818rtc");
@@ -111,7 +110,7 @@
     DeviceState *dev = DEVICE(obj);
     I82378State *s = I82378(obj);
 
-    qdev_init_gpio_out(dev, s->out, 2);
+    qdev_init_gpio_out(dev, s->out, 1);
     qdev_init_gpio_in(dev, i82378_request_pic_irq, 16);
 }
 
diff --git a/hw/mips/mips_fulong2e.c b/hw/mips/mips_fulong2e.c
index dea941a..6d2ea30 100644
--- a/hw/mips/mips_fulong2e.c
+++ b/hw/mips/mips_fulong2e.c
@@ -251,15 +251,6 @@
     }
 }
 
-static void cpu_request_exit(void *opaque, int irq, int level)
-{
-    CPUState *cpu = current_cpu;
-
-    if (cpu && level) {
-        cpu_exit(cpu);
-    }
-}
-
 static void mips_fulong2e_init(MachineState *machine)
 {
     ram_addr_t ram_size = machine->ram_size;
@@ -274,7 +265,6 @@
     long bios_size;
     int64_t kernel_entry;
     qemu_irq *i8259;
-    qemu_irq *cpu_exit_irq;
     PCIBus *pci_bus;
     ISABus *isa_bus;
     I2CBus *smbus;
@@ -375,8 +365,7 @@
 
     /* init other devices */
     pit = pit_init(isa_bus, 0x40, 0, NULL);
-    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
-    DMA_init(0, cpu_exit_irq);
+    DMA_init(0);
 
     /* Super I/O */
     isa_create_simple(isa_bus, "i8042");
diff --git a/hw/mips/mips_jazz.c b/hw/mips/mips_jazz.c
index 9d60633..3906016 100644
--- a/hw/mips/mips_jazz.c
+++ b/hw/mips/mips_jazz.c
@@ -104,15 +104,6 @@
 #define MAGNUM_BIOS_SIZE_MAX 0x7e000
 #define MAGNUM_BIOS_SIZE (BIOS_SIZE < MAGNUM_BIOS_SIZE_MAX ? BIOS_SIZE : MAGNUM_BIOS_SIZE_MAX)
 
-static void cpu_request_exit(void *opaque, int irq, int level)
-{
-    CPUState *cpu = current_cpu;
-
-    if (cpu && level) {
-        cpu_exit(cpu);
-    }
-}
-
 static CPUUnassignedAccess real_do_unassigned_access;
 static void mips_jazz_do_unassigned_access(CPUState *cpu, hwaddr addr,
                                            bool is_write, bool is_exec,
@@ -150,7 +141,6 @@
     ISADevice *pit;
     DriveInfo *fds[MAX_FD];
     qemu_irq esp_reset, dma_enable;
-    qemu_irq *cpu_exit_irq;
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     MemoryRegion *bios = g_new(MemoryRegion, 1);
     MemoryRegion *bios2 = g_new(MemoryRegion, 1);
@@ -234,8 +224,7 @@
     /* ISA devices */
     i8259 = i8259_init(isa_bus, env->irq[4]);
     isa_bus_irqs(isa_bus, i8259);
-    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
-    DMA_init(0, cpu_exit_irq);
+    DMA_init(0);
     pit = pit_init(isa_bus, 0x40, 0, NULL);
     pcspk_init(isa_bus, pit);
 
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index 3082e75..23b6fc3 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -905,15 +905,6 @@
     }
 }
 
-static void cpu_request_exit(void *opaque, int irq, int level)
-{
-    CPUState *cpu = current_cpu;
-
-    if (cpu && level) {
-        cpu_exit(cpu);
-    }
-}
-
 static
 void mips_malta_init(MachineState *machine)
 {
@@ -939,7 +930,6 @@
     MIPSCPU *cpu;
     CPUMIPSState *env;
     qemu_irq *isa_irq;
-    qemu_irq *cpu_exit_irq;
     int piix4_devfn;
     I2CBus *smbus;
     int i;
@@ -1175,8 +1165,7 @@
     smbus_eeprom_init(smbus, 8, smbus_eeprom_buf, smbus_eeprom_size);
     g_free(smbus_eeprom_buf);
     pit = pit_init(isa_bus, 0x40, 0, NULL);
-    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
-    DMA_init(0, cpu_exit_irq);
+    DMA_init(0);
 
     /* Super I/O */
     isa_create_simple(isa_bus, "i8042");
diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c
index 994f8af..3709488 100644
--- a/hw/misc/pvpanic.c
+++ b/hw/misc/pvpanic.c
@@ -41,8 +41,7 @@
     }
 
     if (event & PVPANIC_PANICKED) {
-        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort);
-        vm_stop(RUN_STATE_GUEST_PANICKED);
+        qemu_system_guest_panicked();
         return;
     }
 }
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 45b5f62..81f0838 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -336,15 +336,6 @@
 
 #define NVRAM_SIZE        0x2000
 
-static void cpu_request_exit(void *opaque, int irq, int level)
-{
-    CPUState *cpu = current_cpu;
-
-    if (cpu && level) {
-        cpu_exit(cpu);
-    }
-}
-
 static void ppc_prep_reset(void *opaque)
 {
     PowerPCCPU *cpu = opaque;
@@ -626,8 +617,6 @@
     cpu = POWERPC_CPU(first_cpu);
     qdev_connect_gpio_out(&pci->qdev, 0,
                           cpu->env.irq_inputs[PPC6xx_INPUT_INT]);
-    qdev_connect_gpio_out(&pci->qdev, 1,
-                          qemu_allocate_irq(cpu_request_exit, NULL, 0));
     sysbus_connect_irq(&pcihost->busdev, 0, qdev_get_gpio_in(&pci->qdev, 9));
     sysbus_connect_irq(&pcihost->busdev, 1, qdev_get_gpio_in(&pci->qdev, 11));
     sysbus_connect_irq(&pcihost->busdev, 2, qdev_get_gpio_in(&pci->qdev, 9));
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 2986f94..9869bc9 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -214,7 +214,7 @@
     CPUPPCState *env = &cpu->env;
 
     cs->halted = 1;
-    cpu_exit(cs);
+    qemu_cpu_kick(cs);
     /*
      * While stopping a CPU, the guest calls H_CPPR which
      * effectively disables interrupts on XICS level.
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 7eacca9..bac9ddb 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -292,7 +292,7 @@
 {
     VHostSCSI *s = VHOST_SCSI(dev);
     /* format: channel@channel/vhost-scsi@target,lun */
-    return g_strdup_printf("channel@%x/%s@%x,%x", s->channel,
+    return g_strdup_printf("/channel@%x/%s@%x,%x", s->channel,
                            qdev_fw_name(dev), s->target, s->lun);
 }
 
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 68ac4d8..b5db8b7 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -109,9 +109,9 @@
 }
 void DMA_hold_DREQ (int nchan) {}
 void DMA_release_DREQ (int nchan) {}
-void DMA_schedule(int nchan) {}
+void DMA_schedule(void) {}
 
-void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit)
+void DMA_init(int high_page_enable)
 {
 }
 
diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c
index 30cfa0e..a887a86 100644
--- a/hw/sparc64/sun4u.c
+++ b/hw/sparc64/sun4u.c
@@ -112,9 +112,9 @@
 }
 void DMA_hold_DREQ (int nchan) {}
 void DMA_release_DREQ (int nchan) {}
-void DMA_schedule(int nchan) {}
+void DMA_schedule(void) {}
 
-void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit)
+void DMA_init(int high_page_enable)
 {
 }
 
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 89db792..f9998b9 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -266,44 +266,6 @@
 
 #if !defined(CONFIG_USER_ONLY)
 
-/* memory API */
-
-typedef struct RAMBlock RAMBlock;
-
-struct RAMBlock {
-    struct rcu_head rcu;
-    struct MemoryRegion *mr;
-    uint8_t *host;
-    ram_addr_t offset;
-    ram_addr_t used_length;
-    ram_addr_t max_length;
-    void (*resized)(const char*, uint64_t length, void *host);
-    uint32_t flags;
-    /* Protected by iothread lock.  */
-    char idstr[256];
-    /* RCU-enabled, writes protected by the ramlist lock */
-    QLIST_ENTRY(RAMBlock) next;
-    int fd;
-};
-
-static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
-{
-    assert(offset < block->used_length);
-    assert(block->host);
-    return (char *)block->host + offset;
-}
-
-typedef struct RAMList {
-    QemuMutex mutex;
-    /* Protected by the iothread lock.  */
-    unsigned long *dirty_memory[DIRTY_MEMORY_NUM];
-    RAMBlock *mru_block;
-    /* RCU-enabled, writes protected by the ramlist lock. */
-    QLIST_HEAD(, RAMBlock) blocks;
-    uint32_t version;
-} RAMList;
-extern RAMList ram_list;
-
 /* Flags stored in the low bits of the TLB virtual address.  These are
    defined so that fast path ram access is all zeros.  */
 /* Zero if TLB entry is valid.  */
@@ -316,9 +278,6 @@
 
 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
 void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf);
-ram_addr_t last_ram_offset(void);
-void qemu_mutex_lock_ramlist(void);
-void qemu_mutex_unlock_ramlist(void);
 #endif /* !CONFIG_USER_ONLY */
 
 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index b5fadf7..72d4012 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -226,7 +226,7 @@
     struct TranslationBlock *jmp_first;
 };
 
-#include "exec/spinlock.h"
+#include "qemu/thread.h"
 
 typedef struct TBContext TBContext;
 
@@ -236,7 +236,7 @@
     TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
     int nb_tbs;
     /* any access to the tbs or the page table must use this lock */
-    spinlock_t tb_lock;
+    QemuMutex tb_lock;
 
     /* statistics */
     int tb_flush_count;
@@ -375,11 +375,17 @@
 #endif
 
 #if defined(CONFIG_USER_ONLY)
+void mmap_lock(void);
+void mmap_unlock(void);
+
 static inline tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
 {
     return addr;
 }
 #else
+static inline void mmap_lock(void) {}
+static inline void mmap_unlock(void) {}
+
 /* cputlb.c */
 tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr);
 #endif
@@ -387,8 +393,9 @@
 /* vl.c */
 extern int singlestep;
 
-/* cpu-exec.c */
-extern volatile sig_atomic_t exit_request;
+/* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */
+extern CPUState *tcg_current_cpu;
+extern bool exit_request;
 
 #if !defined(CONFIG_USER_ONLY)
 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new);
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index c113f21..c400a75 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -22,6 +22,46 @@
 #ifndef CONFIG_USER_ONLY
 #include "hw/xen/xen.h"
 
+typedef struct RAMBlock RAMBlock;
+
+struct RAMBlock {
+    struct rcu_head rcu;
+    struct MemoryRegion *mr;
+    uint8_t *host;
+    ram_addr_t offset;
+    ram_addr_t used_length;
+    ram_addr_t max_length;
+    void (*resized)(const char*, uint64_t length, void *host);
+    uint32_t flags;
+    /* Protected by iothread lock.  */
+    char idstr[256];
+    /* RCU-enabled, writes protected by the ramlist lock */
+    QLIST_ENTRY(RAMBlock) next;
+    int fd;
+};
+
+static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
+{
+    assert(offset < block->used_length);
+    assert(block->host);
+    return (char *)block->host + offset;
+}
+
+typedef struct RAMList {
+    QemuMutex mutex;
+    /* Protected by the iothread lock.  */
+    unsigned long *dirty_memory[DIRTY_MEMORY_NUM];
+    RAMBlock *mru_block;
+    /* RCU-enabled, writes protected by the ramlist lock. */
+    QLIST_HEAD(, RAMBlock) blocks;
+    uint32_t version;
+} RAMList;
+extern RAMList ram_list;
+
+ram_addr_t last_ram_offset(void);
+void qemu_mutex_lock_ramlist(void);
+void qemu_mutex_unlock_ramlist(void);
+
 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
                                     bool share, const char *mem_path,
                                     Error **errp);
diff --git a/include/exec/spinlock.h b/include/exec/spinlock.h
deleted file mode 100644
index a72edda..0000000
--- a/include/exec/spinlock.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- *  Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>
- */
-
-/* configure guarantees us that we have pthreads on any host except
- * mingw32, which doesn't support any of the user-only targets.
- * So we can simply assume we have pthread mutexes here.
- */
-#if defined(CONFIG_USER_ONLY)
-
-#include <pthread.h>
-#define spin_lock pthread_mutex_lock
-#define spin_unlock pthread_mutex_unlock
-#define spinlock_t pthread_mutex_t
-#define SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER
-
-#else
-
-/* Empty implementations, on the theory that system mode emulation
- * is single-threaded. This means that these functions should only
- * be used from code run in the TCG cpu thread, and cannot protect
- * data structures which might also be accessed from the IO thread
- * or from signal handlers.
- */
-typedef int spinlock_t;
-#define SPIN_LOCK_UNLOCKED 0
-
-static inline void spin_lock(spinlock_t *lock)
-{
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-}
-
-#endif
diff --git a/include/hw/arm/fsl-imx25.h b/include/hw/arm/fsl-imx25.h
index 7f6bb64..73f50c6 100644
--- a/include/hw/arm/fsl-imx25.h
+++ b/include/hw/arm/fsl-imx25.h
@@ -25,6 +25,7 @@
 #include "hw/timer/imx_epit.h"
 #include "hw/net/imx_fec.h"
 #include "hw/i2c/imx_i2c.h"
+#include "hw/gpio/imx_gpio.h"
 #include "exec/memory.h"
 
 #define TYPE_FSL_IMX25 "fsl,imx25"
@@ -34,6 +35,7 @@
 #define FSL_IMX25_NUM_GPTS 4
 #define FSL_IMX25_NUM_EPITS 2
 #define FSL_IMX25_NUM_I2CS 3
+#define FSL_IMX25_NUM_GPIOS 4
 
 typedef struct FslIMX25State {
     /*< private >*/
@@ -48,6 +50,7 @@
     IMXEPITState   epit[FSL_IMX25_NUM_EPITS];
     IMXFECState    fec;
     IMXI2CState    i2c[FSL_IMX25_NUM_I2CS];
+    IMXGPIOState   gpio[FSL_IMX25_NUM_GPIOS];
     MemoryRegion   rom[2];
     MemoryRegion   iram;
     MemoryRegion   iram_alias;
@@ -204,6 +207,14 @@
 #define FSL_IMX25_EPIT1_SIZE    0x4000
 #define FSL_IMX25_EPIT2_ADDR    0x53F98000
 #define FSL_IMX25_EPIT2_SIZE    0x4000
+#define FSL_IMX25_GPIO4_ADDR    0x53F9C000
+#define FSL_IMX25_GPIO4_SIZE    0x4000
+#define FSL_IMX25_GPIO3_ADDR    0x53FA4000
+#define FSL_IMX25_GPIO3_SIZE    0x4000
+#define FSL_IMX25_GPIO1_ADDR    0x53FCC000
+#define FSL_IMX25_GPIO1_SIZE    0x4000
+#define FSL_IMX25_GPIO2_ADDR    0x53FD0000
+#define FSL_IMX25_GPIO2_SIZE    0x4000
 #define FSL_IMX25_AVIC_ADDR     0x68000000
 #define FSL_IMX25_AVIC_SIZE     0x4000
 #define FSL_IMX25_IRAM_ADDR     0x78000000
@@ -230,5 +241,9 @@
 #define FSL_IMX25_I2C1_IRQ      3
 #define FSL_IMX25_I2C2_IRQ      4
 #define FSL_IMX25_I2C3_IRQ      10
+#define FSL_IMX25_GPIO1_IRQ     52
+#define FSL_IMX25_GPIO2_IRQ     51
+#define FSL_IMX25_GPIO3_IRQ     16
+#define FSL_IMX25_GPIO4_IRQ     23
 
 #endif /* FSL_IMX25_H */
diff --git a/include/hw/arm/fsl-imx31.h b/include/hw/arm/fsl-imx31.h
index 891166f..5e8f795 100644
--- a/include/hw/arm/fsl-imx31.h
+++ b/include/hw/arm/fsl-imx31.h
@@ -24,6 +24,7 @@
 #include "hw/timer/imx_gpt.h"
 #include "hw/timer/imx_epit.h"
 #include "hw/i2c/imx_i2c.h"
+#include "hw/gpio/imx_gpio.h"
 #include "exec/memory.h"
 
 #define TYPE_FSL_IMX31 "fsl,imx31"
@@ -32,6 +33,7 @@
 #define FSL_IMX31_NUM_UARTS 2
 #define FSL_IMX31_NUM_EPITS 2
 #define FSL_IMX31_NUM_I2CS 3
+#define FSL_IMX31_NUM_GPIOS 3
 
 typedef struct FslIMX31State {
     /*< private >*/
@@ -45,6 +47,7 @@
     IMXGPTState    gpt;
     IMXEPITState   epit[FSL_IMX31_NUM_EPITS];
     IMXI2CState    i2c[FSL_IMX31_NUM_I2CS];
+    IMXGPIOState   gpio[FSL_IMX31_NUM_GPIOS];
     MemoryRegion   secure_rom;
     MemoryRegion   rom;
     MemoryRegion   iram;
@@ -77,6 +80,12 @@
 #define FSL_IMX31_EPIT1_SIZE            0x4000
 #define FSL_IMX31_EPIT2_ADDR            0x53F98000
 #define FSL_IMX31_EPIT2_SIZE            0x4000
+#define FSL_IMX31_GPIO3_ADDR            0x53FA4000
+#define FSL_IMX31_GPIO3_SIZE            0x4000
+#define FSL_IMX31_GPIO1_ADDR            0x53FCC000
+#define FSL_IMX31_GPIO1_SIZE            0x4000
+#define FSL_IMX31_GPIO2_ADDR            0x53FD0000
+#define FSL_IMX31_GPIO2_SIZE            0x4000
 #define FSL_IMX31_AVIC_ADDR             0x68000000
 #define FSL_IMX31_AVIC_SIZE             0x100
 #define FSL_IMX31_SDRAM0_ADDR           0x80000000
@@ -106,5 +115,8 @@
 #define FSL_IMX31_I2C1_IRQ              10
 #define FSL_IMX31_I2C2_IRQ              4
 #define FSL_IMX31_I2C3_IRQ              3
+#define FSL_IMX31_GPIO1_IRQ             52
+#define FSL_IMX31_GPIO2_IRQ             51
+#define FSL_IMX31_GPIO3_IRQ             56
 
 #endif /* FSL_IMX31_H */
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index 97622ec..4005a99 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h
@@ -46,7 +46,7 @@
  * number of memory region aliases.
  */
 
-#define XLNX_ZYNQMP_GIC_REGION_SIZE 0x4000
+#define XLNX_ZYNQMP_GIC_REGION_SIZE 0x1000
 #define XLNX_ZYNQMP_GIC_ALIASES     (0x10000 / XLNX_ZYNQMP_GIC_REGION_SIZE - 1)
 
 typedef struct XlnxZynqMPState {
diff --git a/include/hw/gpio/imx_gpio.h b/include/hw/gpio/imx_gpio.h
new file mode 100644
index 0000000..517b261
--- /dev/null
+++ b/include/hw/gpio/imx_gpio.h
@@ -0,0 +1,62 @@
+/*
+ * i.MX processors GPIO registers definition.
+ *
+ * Copyright (C) 2015 Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __IMX_GPIO_H_
+#define __IMX_GPIO_H_
+
+#include <hw/sysbus.h>
+
+#define TYPE_IMX_GPIO "imx.gpio"
+#define IMX_GPIO(obj) OBJECT_CHECK(IMXGPIOState, (obj), TYPE_IMX_GPIO)
+
+#define IMX_GPIO_MEM_SIZE 0x20
+
+/* i.MX GPIO memory map */
+#define DR_ADDR             0x00 /* DATA REGISTER */
+#define GDIR_ADDR           0x04 /* DIRECTION REGISTER */
+#define PSR_ADDR            0x08 /* PAD STATUS REGISTER */
+#define ICR1_ADDR           0x0c /* INTERRUPT CONFIGURATION REGISTER 1 */
+#define ICR2_ADDR           0x10 /* INTERRUPT CONFIGURATION REGISTER 2 */
+#define IMR_ADDR            0x14 /* INTERRUPT MASK REGISTER */
+#define ISR_ADDR            0x18 /* INTERRUPT STATUS REGISTER */
+#define EDGE_SEL_ADDR       0x1c /* EDGE SEL REGISTER */
+
+#define IMX_GPIO_PIN_COUNT 32
+
+typedef struct IMXGPIOState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    uint32_t dr;
+    uint32_t gdir;
+    uint32_t psr;
+    uint64_t icr;
+    uint32_t imr;
+    uint32_t isr;
+    bool has_edge_sel;
+    uint32_t edge_sel;
+
+    qemu_irq irq;
+    qemu_irq output[IMX_GPIO_PIN_COUNT];
+} IMXGPIOState;
+
+#endif /* __IMX_GPIO_H_ */
diff --git a/include/hw/isa/isa.h b/include/hw/isa/isa.h
index f21ceaa..d758b39 100644
--- a/include/hw/isa/isa.h
+++ b/include/hw/isa/isa.h
@@ -112,8 +112,8 @@
 int DMA_write_memory (int nchan, void *buf, int pos, int size);
 void DMA_hold_DREQ (int nchan);
 void DMA_release_DREQ (int nchan);
-void DMA_schedule(int nchan);
-void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit);
+void DMA_schedule(void);
+void DMA_init(int high_page_enable);
 void DMA_register_channel (int nchan,
                            DMA_transfer_handler transfer_handler,
                            void *opaque);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index efaf919..01d29dd 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -203,6 +203,14 @@
 int qemu_fdatasync(int fd);
 int fcntl_setfl(int fd, int flag);
 int qemu_parse_fd(const char *param);
+int qemu_strtol(const char *nptr, const char **endptr, int base,
+                long *result);
+int qemu_strtoul(const char *nptr, const char **endptr, int base,
+                 unsigned long *result);
+int qemu_strtoll(const char *nptr, const char **endptr, int base,
+                 int64_t *result);
+int qemu_strtoull(const char *nptr, const char **endptr, int base,
+                  uint64_t *result);
 
 int parse_uint(const char *s, unsigned long long *value, char **endptr,
                int base);
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index bc18ca3..9976909 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -203,6 +203,7 @@
                          IOHandler *fd_write,
                          void *opaque);
 
+GSource *iohandler_get_g_source(void);
 #ifdef CONFIG_POSIX
 /**
  * qemu_add_child_watch: Register a child process for reaping.
@@ -265,8 +266,6 @@
 /* internal interfaces */
 
 void qemu_fd_register(int fd);
-void qemu_iohandler_fill(GArray *pollfds);
-void qemu_iohandler_poll(GArray *pollfds, int rc);
 
 QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
 void qemu_bh_schedule_idle(QEMUBH *bh);
diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index 7df1e86..f6d1d56 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -71,7 +71,7 @@
     /* Data used by reader only */
     unsigned depth;
 
-    /* Data used for registry, protected by rcu_gp_lock */
+    /* Data used for registry, protected by rcu_registry_lock */
     QLIST_ENTRY(rcu_reader_data) node;
 };
 
diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h
index 3ff118a..70b01fd 100644
--- a/include/qemu/seqlock.h
+++ b/include/qemu/seqlock.h
@@ -55,18 +55,18 @@
 static inline unsigned seqlock_read_begin(QemuSeqLock *sl)
 {
     /* Always fail if a write is in progress.  */
-    unsigned ret = sl->sequence & ~1;
+    unsigned ret = atomic_read(&sl->sequence);
 
     /* Read sequence before reading other fields.  */
     smp_rmb();
-    return ret;
+    return ret & ~1;
 }
 
-static int seqlock_read_retry(const QemuSeqLock *sl, unsigned start)
+static inline int seqlock_read_retry(const QemuSeqLock *sl, unsigned start)
 {
     /* Read other fields before reading final sequence.  */
     smp_rmb();
-    return unlikely(sl->sequence != start);
+    return unlikely(atomic_read(&sl->sequence) != start);
 }
 
 #endif
diff --git a/include/qemu/tls.h b/include/qemu/tls.h
deleted file mode 100644
index b92ea9d..0000000
--- a/include/qemu/tls.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Abstraction layer for defining and using TLS variables
- *
- * Copyright (c) 2011 Red Hat, Inc
- * Copyright (c) 2011 Linaro Limited
- *
- * Authors:
- *  Paolo Bonzini <pbonzini@redhat.com>
- *  Peter Maydell <peter.maydell@linaro.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef QEMU_TLS_H
-#define QEMU_TLS_H
-
-/* Per-thread variables. Note that we only have implementations
- * which are really thread-local on Linux; the dummy implementations
- * define plain global variables.
- *
- * This means that for the moment use should be restricted to
- * per-VCPU variables, which are OK because:
- *  - the only -user mode supporting multiple VCPU threads is linux-user
- *  - TCG system mode is single-threaded regarding VCPUs
- *  - KVM system mode is multi-threaded but limited to Linux
- *
- * TODO: proper implementations via Win32 .tls sections and
- * POSIX pthread_getspecific.
- */
-#ifdef __linux__
-#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x)
-#define DEFINE_TLS(type, x)  __thread __typeof__(type) tls__##x
-#define tls_var(x)           tls__##x
-#else
-/* Dummy implementations which define plain global variables */
-#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x)
-#define DEFINE_TLS(type, x)  __typeof__(type) tls__##x
-#define tls_var(x)           tls__##x
-#endif
-
-#endif
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 39712ab..c3d610b 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -28,7 +28,6 @@
 #include "exec/memattrs.h"
 #include "qemu/queue.h"
 #include "qemu/thread.h"
-#include "qemu/tls.h"
 #include "qemu/typedefs.h"
 
 typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
@@ -244,6 +243,8 @@
  * @mem_io_pc: Host Program Counter at which the memory was accessed.
  * @mem_io_vaddr: Target virtual address at which the memory was accessed.
  * @kvm_fd: vCPU file descriptor for KVM.
+ * @work_mutex: Lock to prevent multiple access to queued_work_*.
+ * @queued_work_first: First asynchronous work pending.
  *
  * State of one CPU core or thread.
  */
@@ -264,17 +265,19 @@
     uint32_t host_tid;
     bool running;
     struct QemuCond *halt_cond;
-    struct qemu_work_item *queued_work_first, *queued_work_last;
     bool thread_kicked;
     bool created;
     bool stop;
     bool stopped;
-    volatile sig_atomic_t exit_request;
+    bool exit_request;
     uint32_t interrupt_request;
     int singlestep_enabled;
     int64_t icount_extra;
     sigjmp_buf jmp_env;
 
+    QemuMutex work_mutex;
+    struct qemu_work_item *queued_work_first, *queued_work_last;
+
     AddressSpace *as;
     struct AddressSpaceDispatch *memory_dispatch;
     MemoryListener *tcg_as_listener;
@@ -320,7 +323,7 @@
        offset from AREG0.  Leave this field at the end so as to make the
        (absolute value) offset as small as possible.  This reduces code
        size, especially for hosts without large memory offsets.  */
-    volatile sig_atomic_t tcg_exit_req;
+    uint32_t tcg_exit_req;
 };
 
 QTAILQ_HEAD(CPUTailQ, CPUState);
@@ -333,8 +336,7 @@
     QTAILQ_FOREACH_REVERSE(cpu, &cpus, CPUTailQ, node)
 #define first_cpu QTAILQ_FIRST(&cpus)
 
-DECLARE_TLS(CPUState *, current_cpu);
-#define current_cpu tls_var(current_cpu)
+extern __thread CPUState *current_cpu;
 
 /**
  * cpu_paging_enabled:
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 44570d1..1f6ff8f 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -69,6 +69,7 @@
 void qemu_system_killed(int signal, pid_t pid);
 void qemu_devices_reset(void);
 void qemu_system_reset(bool report);
+void qemu_system_guest_panicked(void);
 
 void qemu_add_exit_notifier(Notifier *notify);
 void qemu_remove_exit_notifier(Notifier *notify);
diff --git a/iohandler.c b/iohandler.c
index 826f713..55f8501 100644
--- a/iohandler.c
+++ b/iohandler.c
@@ -32,111 +32,30 @@
 #include <sys/wait.h>
 #endif
 
-typedef struct IOHandlerRecord {
-    IOHandler *fd_read;
-    IOHandler *fd_write;
-    void *opaque;
-    QLIST_ENTRY(IOHandlerRecord) next;
-    int fd;
-    int pollfds_idx;
-    bool deleted;
-} IOHandlerRecord;
+/* This context runs on top of main loop. We can't reuse qemu_aio_context
+ * because iohandlers mustn't be polled by aio_poll(qemu_aio_context). */
+static AioContext *iohandler_ctx;
 
-static QLIST_HEAD(, IOHandlerRecord) io_handlers =
-    QLIST_HEAD_INITIALIZER(io_handlers);
+static void iohandler_init(void)
+{
+    if (!iohandler_ctx) {
+        iohandler_ctx = aio_context_new(&error_abort);
+    }
+}
+
+GSource *iohandler_get_g_source(void)
+{
+    iohandler_init();
+    return aio_get_g_source(iohandler_ctx);
+}
 
 void qemu_set_fd_handler(int fd,
                          IOHandler *fd_read,
                          IOHandler *fd_write,
                          void *opaque)
 {
-    IOHandlerRecord *ioh;
-
-    assert(fd >= 0);
-
-    if (!fd_read && !fd_write) {
-        QLIST_FOREACH(ioh, &io_handlers, next) {
-            if (ioh->fd == fd) {
-                ioh->deleted = 1;
-                break;
-            }
-        }
-    } else {
-        QLIST_FOREACH(ioh, &io_handlers, next) {
-            if (ioh->fd == fd)
-                goto found;
-        }
-        ioh = g_malloc0(sizeof(IOHandlerRecord));
-        QLIST_INSERT_HEAD(&io_handlers, ioh, next);
-    found:
-        ioh->fd = fd;
-        ioh->fd_read = fd_read;
-        ioh->fd_write = fd_write;
-        ioh->opaque = opaque;
-        ioh->pollfds_idx = -1;
-        ioh->deleted = 0;
-        qemu_notify_event();
-    }
-}
-
-void qemu_iohandler_fill(GArray *pollfds)
-{
-    IOHandlerRecord *ioh;
-
-    QLIST_FOREACH(ioh, &io_handlers, next) {
-        int events = 0;
-
-        if (ioh->deleted)
-            continue;
-        if (ioh->fd_read) {
-            events |= G_IO_IN | G_IO_HUP | G_IO_ERR;
-        }
-        if (ioh->fd_write) {
-            events |= G_IO_OUT | G_IO_ERR;
-        }
-        if (events) {
-            GPollFD pfd = {
-                .fd = ioh->fd,
-                .events = events,
-            };
-            ioh->pollfds_idx = pollfds->len;
-            g_array_append_val(pollfds, pfd);
-        } else {
-            ioh->pollfds_idx = -1;
-        }
-    }
-}
-
-void qemu_iohandler_poll(GArray *pollfds, int ret)
-{
-    if (ret > 0) {
-        IOHandlerRecord *pioh, *ioh;
-
-        QLIST_FOREACH_SAFE(ioh, &io_handlers, next, pioh) {
-            int revents = 0;
-
-            if (!ioh->deleted && ioh->pollfds_idx != -1) {
-                GPollFD *pfd = &g_array_index(pollfds, GPollFD,
-                                              ioh->pollfds_idx);
-                revents = pfd->revents;
-            }
-
-            if (!ioh->deleted && ioh->fd_read &&
-                (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR))) {
-                ioh->fd_read(ioh->opaque);
-            }
-            if (!ioh->deleted && ioh->fd_write &&
-                (revents & (G_IO_OUT | G_IO_ERR))) {
-                ioh->fd_write(ioh->opaque);
-            }
-
-            /* Do this last in case read/write handlers marked it for deletion */
-            if (ioh->deleted) {
-                QLIST_REMOVE(ioh, next);
-                g_free(ioh);
-            }
-        }
-    }
+    iohandler_init();
+    aio_set_fd_handler(iohandler_ctx, fd, fd_read, fd_write, opaque);
 }
 
 /* reaping of zombies.  right now we're not passing the status to
diff --git a/linux-user/main.c b/linux-user/main.c
index 829ee64..4ba7228 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -105,7 +105,7 @@
 /* Make sure everything is in a consistent state for calling fork().  */
 void fork_start(void)
 {
-    pthread_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
+    qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
     pthread_mutex_lock(&exclusive_lock);
     mmap_fork_start();
 }
@@ -127,11 +127,11 @@
         pthread_mutex_init(&cpu_list_mutex, NULL);
         pthread_cond_init(&exclusive_cond, NULL);
         pthread_cond_init(&exclusive_resume, NULL);
-        pthread_mutex_init(&tcg_ctx.tb_ctx.tb_lock, NULL);
+        qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
         gdbserver_fork(thread_cpu);
     } else {
         pthread_mutex_unlock(&exclusive_lock);
-        pthread_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+        qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
     }
 }
 
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 8012cc2..e8606b2 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -261,8 +261,6 @@
 int target_msync(abi_ulong start, abi_ulong len, int flags);
 extern unsigned long last_brk;
 extern abi_ulong mmap_next_start;
-void mmap_lock(void);
-void mmap_unlock(void);
 abi_ulong mmap_find_vma(abi_ulong, abi_ulong);
 void cpu_list_lock(void);
 void cpu_list_unlock(void);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index c0fabf7..973cc2f 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -4512,6 +4512,7 @@
     CPUState *cpu;
     TaskState *ts;
 
+    rcu_register_thread();
     env = info->env;
     cpu = ENV_GET_CPU(env);
     thread_cpu = cpu;
@@ -5613,6 +5614,7 @@
             thread_cpu = NULL;
             object_unref(OBJECT(cpu));
             g_free(ts);
+            rcu_unregister_thread();
             pthread_exit(NULL);
         }
 #ifdef TARGET_GPROF
diff --git a/main-loop.c b/main-loop.c
index 3997043..db600a3 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -161,6 +161,9 @@
     src = aio_get_g_source(qemu_aio_context);
     g_source_attach(src, NULL);
     g_source_unref(src);
+    src = iohandler_get_g_source();
+    g_source_attach(src, NULL);
+    g_source_unref(src);
     return 0;
 }
 
@@ -487,7 +490,6 @@
 #ifdef CONFIG_SLIRP
     slirp_pollfds_fill(gpollfds, &timeout);
 #endif
-    qemu_iohandler_fill(gpollfds);
 
     if (timeout == UINT32_MAX) {
         timeout_ns = -1;
@@ -500,7 +502,6 @@
                                           &main_loop_tlg));
 
     ret = os_host_main_loop_wait(timeout_ns);
-    qemu_iohandler_poll(gpollfds, ret);
 #ifdef CONFIG_SLIRP
     slirp_pollfds_poll(gpollfds, (ret < 0));
 #endif
diff --git a/qmp.c b/qmp.c
index 403805a..9623c80 100644
--- a/qmp.c
+++ b/qmp.c
@@ -49,14 +49,20 @@
 {
     VersionInfo *info = g_new0(VersionInfo, 1);
     const char *version = QEMU_VERSION;
-    char *tmp;
+    const char *tmp;
+    int err;
 
     info->qemu = g_new0(VersionTriple, 1);
-    info->qemu->major = strtol(version, &tmp, 10);
+    err = qemu_strtoll(version, &tmp, 10, &info->qemu->major);
+    assert(err == 0);
     tmp++;
-    info->qemu->minor = strtol(tmp, &tmp, 10);
+
+    err = qemu_strtoll(tmp, &tmp, 10, &info->qemu->minor);
+    assert(err == 0);
     tmp++;
-    info->qemu->micro = strtol(tmp, &tmp, 10);
+
+    err = qemu_strtoll(tmp, &tmp, 10, &info->qemu->micro);
+    assert(err == 0);
     info->package = g_strdup(QEMU_PKGVERSION);
 
     return info;
diff --git a/qom/cpu.c b/qom/cpu.c
index 62f4b5d..3841f0d 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -114,6 +114,8 @@
 void cpu_exit(CPUState *cpu)
 {
     cpu->exit_request = 1;
+    /* Ensure cpu_exec will see the exit request after TCG has exited.  */
+    smp_wmb();
     cpu->tcg_exit_req = 1;
 }
 
@@ -314,6 +316,7 @@
 
     cpu->cpu_index = -1;
     cpu->gdb_num_regs = cpu->gdb_num_g_regs = cc->gdb_num_core_regs;
+    qemu_mutex_init(&cpu->work_mutex);
     QTAILQ_INIT(&cpu->breakpoints);
     QTAILQ_INIT(&cpu->watchpoints);
 }
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 7f0aae9..4ac00a9 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -141,44 +141,22 @@
 		}x;
 our $Storage	= qr{extern|static|asmlinkage};
 our $Sparse	= qr{
-			__user|
-			__kernel|
-			__force|
-			__iomem|
-			__must_check|
-			__init_refok|
-			__kprobes|
-			__ref
+			__force
 		}x;
 
 # Notes to $Attribute:
-# We need \b after 'init' otherwise 'initconst' will cause a false positive in a check
 our $Attribute	= qr{
 			const|
-			__percpu|
-			__nocast|
-			__safe|
-			__bitwise__|
-			__packed__|
-			__packed2__|
-			__naked|
-			__maybe_unused|
-			__always_unused|
-			__noreturn|
-			__used|
-			__cold|
-			__noclone|
-			__deprecated|
-			__read_mostly|
-			__kprobes|
-			__(?:mem|cpu|dev|)(?:initdata|initconst|init\b)|
-			____cacheline_aligned|
-			____cacheline_aligned_in_smp|
-			____cacheline_internodealigned_in_smp|
-			__weak
+			volatile|
+			QEMU_NORETURN|
+			QEMU_WARN_UNUSED_RESULT|
+			QEMU_SENTINEL|
+			QEMU_ARTIFICIAL|
+			QEMU_PACKED|
+			GCC_FMT_ATTR
 		  }x;
 our $Modifier;
-our $Inline	= qr{inline|__always_inline|noinline};
+our $Inline	= qr{inline};
 our $Member	= qr{->$Ident|\.$Ident|\[[^]]*\]};
 our $Lval	= qr{$Ident(?:$Member)*};
 
@@ -215,14 +193,6 @@
         | QEMUBH                            # all uppercase
 )};
 
-our $logFunctions = qr{(?x:
-	printk|
-	pr_(debug|dbg|vdbg|devel|info|warning|err|notice|alert|crit|emerg|cont)|
-	(dev|netdev|netif)_(printk|dbg|vdbg|info|warn|err|notice|alert|crit|emerg|WARN)|
-	WARN|
-	panic
-)};
-
 our @typeList = (
 	qr{void},
 	qr{(?:unsigned\s+)?char},
@@ -243,20 +213,20 @@
 	qr{${Ident}_handler},
 	qr{${Ident}_handler_fn},
 );
+
+# This can be modified by sub possible.  Since it can be empty, be careful
+# about regexes that always match, because they can cause infinite loops.
 our @modifierList = (
-	qr{fastcall},
 );
 
-our $allowed_asm_includes = qr{(?x:
-	irq|
-	memory
-)};
-# memory.h: ARM has a custom one
-
 sub build_types {
-	my $mods = "(?x:  \n" . join("|\n  ", @modifierList) . "\n)";
 	my $all = "(?x:  \n" . join("|\n  ", @typeList) . "\n)";
-	$Modifier	= qr{(?:$Attribute|$Sparse|$mods)};
+	if (@modifierList > 0) {
+		my $mods = "(?x:  \n" . join("|\n  ", @modifierList) . "\n)";
+		$Modifier = qr{(?:$Attribute|$Sparse|$mods)};
+	} else {
+		$Modifier = qr{(?:$Attribute|$Sparse)};
+	}
 	$NonptrType	= qr{
 			(?:$Modifier\s+|const\s+)*
 			(?:
@@ -277,27 +247,6 @@
 
 $chk_signoff = 0 if ($file);
 
-my @dep_includes = ();
-my @dep_functions = ();
-my $removal = "Documentation/feature-removal-schedule.txt";
-if ($tree && -f "$root/$removal") {
-	open(my $REMOVE, '<', "$root/$removal") ||
-				die "$P: $removal: open failed - $!\n";
-	while (<$REMOVE>) {
-		if (/^Check:\s+(.*\S)/) {
-			for my $entry (split(/[, ]+/, $1)) {
-				if ($entry =~ m@include/(.*)@) {
-					push(@dep_includes, $1);
-
-				} elsif ($entry !~ m@/@) {
-					push(@dep_functions, $entry);
-				}
-			}
-		}
-	}
-	close($REMOVE);
-}
-
 my @rawlines = ();
 my @lines = ();
 my $vname;
@@ -1127,33 +1076,6 @@
 	}
 }
 
-sub check_absolute_file {
-	my ($absolute, $herecurr) = @_;
-	my $file = $absolute;
-
-	##print "absolute<$absolute>\n";
-
-	# See if any suffix of this path is a path within the tree.
-	while ($file =~ s@^[^/]*/@@) {
-		if (-f "$root/$file") {
-			##print "file<$file>\n";
-			last;
-		}
-	}
-	if (! -f _)  {
-		return 0;
-	}
-
-	# It is, so see if the prefix is acceptable.
-	my $prefix = $absolute;
-	substr($prefix, -length($file)) = '';
-
-	##print "prefix<$prefix>\n";
-	if ($prefix ne ".../") {
-		WARN("use relative pathname instead of absolute in changelog text\n" . $herecurr);
-	}
-}
-
 sub process {
 	my $filename = shift;
 
@@ -1196,10 +1118,6 @@
 	my %suppress_export;
 
 	# Pre-scan the patch sanitizing the lines.
-	# Pre-scan the patch looking for any __setup documentation.
-	#
-	my @setup_docs = ();
-	my $setup_docs = 0;
 
 	sanitise_line_reset();
 	my $line;
@@ -1207,13 +1125,6 @@
 		$linenr++;
 		$line = $rawline;
 
-		if ($rawline=~/^\+\+\+\s+(\S+)/) {
-			$setup_docs = 0;
-			if ($1 =~ m@Documentation/kernel-parameters.txt$@) {
-				$setup_docs = 1;
-			}
-			#next;
-		}
 		if ($rawline=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) {
 			$realline=$1-1;
 			if (defined $2) {
@@ -1272,10 +1183,6 @@
 
 		#print "==>$rawline\n";
 		#print "-->$line\n";
-
-		if ($setup_docs && $line =~ /^\+/) {
-			push(@setup_docs, $line);
-		}
 	}
 
 	$prefix = '';
@@ -1350,9 +1257,6 @@
 				WARN("patch prefix '$p1_prefix' exists, appears to be a -p0 patch\n");
 			}
 
-			if ($realfile =~ m@^include/asm/@) {
-				ERROR("do not modify files in include/asm, change architecture specific files in include/asm-<architecture>\n" . "$here$rawline\n");
-			}
 			next;
 		}
 
@@ -1367,7 +1271,7 @@
 # Check for incorrect file permissions
 		if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) {
 			my $permhere = $here . "FILE: $realfile\n";
-			if ($realfile =~ /(Makefile|Kconfig|\.c|\.cpp|\.h|\.S|\.tmpl)$/) {
+			if ($realfile =~ /(\bMakefile(?:\.objs)?|\.c|\.cc|\.cpp|\.h|\.mak|\.[sS])$/) {
 				ERROR("do not set execute permissions for source files\n" . $permhere);
 			}
 		}
@@ -1392,20 +1296,6 @@
 				$herecurr) if (!$emitted_corrupt++);
 		}
 
-# Check for absolute kernel paths.
-		if ($tree) {
-			while ($line =~ m{(?:^|\s)(/\S*)}g) {
-				my $file = $1;
-
-				if ($file =~ m{^(.*?)(?::\d+)+:?$} &&
-				    check_absolute_file($1, $herecurr)) {
-					#
-				} else {
-					check_absolute_file($file, $herecurr);
-				}
-			}
-		}
-
 # UTF-8 regex found at http://www.w3.org/International/questions/qa-forms-utf-8.en.php
 		if (($realfile =~ /^$/ || $line =~ /^\+/) &&
 		    $rawline !~ m/^$UTF8*$/) {
@@ -1432,45 +1322,12 @@
 			$rpt_cleaners = 1;
 		}
 
-# check for Kconfig help text having a real description
-# Only applies when adding the entry originally, after that we do not have
-# sufficient context to determine whether it is indeed long enough.
-		if ($realfile =~ /Kconfig/ &&
-		    $line =~ /\+\s*(?:---)?help(?:---)?$/) {
-			my $length = 0;
-			my $cnt = $realcnt;
-			my $ln = $linenr + 1;
-			my $f;
-			my $is_end = 0;
-			while ($cnt > 0 && defined $lines[$ln - 1]) {
-				$f = $lines[$ln - 1];
-				$cnt-- if ($lines[$ln - 1] !~ /^-/);
-				$is_end = $lines[$ln - 1] =~ /^\+/;
-				$ln++;
-
-				next if ($f =~ /^-/);
-				$f =~ s/^.//;
-				$f =~ s/#.*//;
-				$f =~ s/^\s+//;
-				next if ($f =~ /^$/);
-				if ($f =~ /^\s*config\s/) {
-					$is_end = 1;
-					last;
-				}
-				$length++;
-			}
-			WARN("please write a paragraph that describes the config symbol fully\n" . $herecurr) if ($is_end && $length < 4);
-			#print "is_end<$is_end> length<$length>\n";
-		}
-
 # check we are in a valid source file if not then ignore this hunk
 		next if ($realfile !~ /\.(h|c|cpp|s|S|pl|sh)$/);
 
 #80 column limit
-		if ($line =~ /^\+/ && $prevrawline !~ /\/\*\*/ &&
-		    $rawline !~ /^.\s*\*\s*\@$Ident\s/ &&
-		    !($line =~ /^\+\s*$logFunctions\s*\(\s*(?:(KERN_\S+\s*|[^"]*))?"[X\t]*"\s*(?:,|\)\s*;)\s*$/ ||
-		    $line =~ /^\+\s*"[^"]*"\s*(?:\s*|,|\)\s*;)\s*$/) &&
+		if ($line =~ /^\+/ &&
+		    !($line =~ /^\+\s*"[^"]*"\s*(?:\s*|,|\)\s*;)\s*$/) &&
 		    $length > 80)
 		{
 			WARN("line over 80 characters\n" . $herecurr);
@@ -1486,18 +1343,6 @@
 			WARN("adding a line without newline at end of file\n" . $herecurr);
 		}
 
-# Blackfin: use hi/lo macros
-		if ($realfile =~ m@arch/blackfin/.*\.S$@) {
-			if ($line =~ /\.[lL][[:space:]]*=.*&[[:space:]]*0x[fF][fF][fF][fF]/) {
-				my $herevet = "$here\n" . cat_vet($line) . "\n";
-				ERROR("use the LO() macro, not (... & 0xFFFF)\n" . $herevet);
-			}
-			if ($line =~ /\.[hH][[:space:]]*=.*>>[[:space:]]*16/) {
-				my $herevet = "$here\n" . cat_vet($line) . "\n";
-				ERROR("use the HI() macro, not (... >> 16)\n" . $herevet);
-			}
-		}
-
 # check we are in a valid source file C or perl if not then ignore this hunk
 		next if ($realfile !~ /\.(h|c|cpp|pl)$/);
 
@@ -1516,16 +1361,6 @@
 			WARN("CVS style keyword markers, these will _not_ be updated\n". $herecurr);
 		}
 
-# Blackfin: don't use __builtin_bfin_[cs]sync
-		if ($line =~ /__builtin_bfin_csync/) {
-			my $herevet = "$here\n" . cat_vet($line) . "\n";
-			ERROR("use the CSYNC() macro in asm/blackfin.h\n" . $herevet);
-		}
-		if ($line =~ /__builtin_bfin_ssync/) {
-			my $herevet = "$here\n" . cat_vet($line) . "\n";
-			ERROR("use the SSYNC() macro in asm/blackfin.h\n" . $herevet);
-		}
-
 # Check for potential 'bare' types
 		my ($stat, $cond, $line_nr_next, $remain_next, $off_next,
 		    $realline_next);
@@ -1809,50 +1644,6 @@
 		$line =~ s@//.*@@;
 		$opline =~ s@//.*@@;
 
-# EXPORT_SYMBOL should immediately follow the thing it is exporting, consider
-# the whole statement.
-#print "APW <$lines[$realline_next - 1]>\n";
-		if (defined $realline_next &&
-		    exists $lines[$realline_next - 1] &&
-		    !defined $suppress_export{$realline_next} &&
-		    ($lines[$realline_next - 1] =~ /EXPORT_SYMBOL.*\((.*)\)/ ||
-		     $lines[$realline_next - 1] =~ /EXPORT_UNUSED_SYMBOL.*\((.*)\)/)) {
-			# Handle definitions which produce identifiers with
-			# a prefix:
-			#   XXX(foo);
-			#   EXPORT_SYMBOL(something_foo);
-			my $name = $1;
-			if ($stat =~ /^.([A-Z_]+)\s*\(\s*($Ident)/ &&
-			    $name =~ /^${Ident}_$2/) {
-#print "FOO C name<$name>\n";
-				$suppress_export{$realline_next} = 1;
-
-			} elsif ($stat !~ /(?:
-				\n.}\s*$|
-				^.DEFINE_$Ident\(\Q$name\E\)|
-				^.DECLARE_$Ident\(\Q$name\E\)|
-				^.LIST_HEAD\(\Q$name\E\)|
-				^.(?:$Storage\s+)?$Type\s*\(\s*\*\s*\Q$name\E\s*\)\s*\(|
-				\b\Q$name\E(?:\s+$Attribute)*\s*(?:;|=|\[|\()
-			    )/x) {
-#print "FOO A<$lines[$realline_next - 1]> stat<$stat> name<$name>\n";
-				$suppress_export{$realline_next} = 2;
-			} else {
-				$suppress_export{$realline_next} = 1;
-			}
-		}
-		if (!defined $suppress_export{$linenr} &&
-		    $prevline =~ /^.\s*$/ &&
-		    ($line =~ /EXPORT_SYMBOL.*\((.*)\)/ ||
-		     $line =~ /EXPORT_UNUSED_SYMBOL.*\((.*)\)/)) {
-#print "FOO B <$lines[$linenr - 1]>\n";
-			$suppress_export{$linenr} = 2;
-		}
-		if (defined $suppress_export{$linenr} &&
-		    $suppress_export{$linenr} == 2) {
-			WARN("EXPORT_SYMBOL(foo); should immediately follow its function/variable\n" . $herecurr);
-		}
-
 # check for global initialisers.
 		if ($line =~ /^.$Type\s*$Ident\s*(?:\s+$Modifier)*\s*=\s*(0|NULL|false)\s*;/) {
 			ERROR("do not initialise globals to 0 or NULL\n" .
@@ -1900,40 +1691,6 @@
 			}
 		}
 
-# # no BUG() or BUG_ON()
-# 		if ($line =~ /\b(BUG|BUG_ON)\b/) {
-# 			print "Try to use WARN_ON & Recovery code rather than BUG() or BUG_ON()\n";
-# 			print "$herecurr";
-# 			$clean = 0;
-# 		}
-
-		if ($line =~ /\bLINUX_VERSION_CODE\b/) {
-			WARN("LINUX_VERSION_CODE should be avoided, code should be for the version to which it is merged\n" . $herecurr);
-		}
-
-# printk should use KERN_* levels.  Note that follow on printk's on the
-# same line do not need a level, so we use the current block context
-# to try and find and validate the current printk.  In summary the current
-# printk includes all preceding printk's which have no newline on the end.
-# we assume the first bad printk is the one to report.
-		if ($line =~ /\bprintk\((?!KERN_)\s*"/) {
-			my $ok = 0;
-			for (my $ln = $linenr - 1; $ln >= $first_line; $ln--) {
-				#print "CHECK<$lines[$ln - 1]\n";
-				# we have a preceding printk if it ends
-				# with "\n" ignore it, else it is to blame
-				if ($lines[$ln - 1] =~ m{\bprintk\(}) {
-					if ($rawlines[$ln - 1] !~ m{\\n"}) {
-						$ok = 1;
-					}
-					last;
-				}
-			}
-			if ($ok == 0) {
-				WARN("printk() should include KERN_ facility level\n" . $herecurr);
-			}
-		}
-
 # function brace can't be on same line, except for #defines of do while,
 # or if closed on same line
 		if (($line=~/$Type\s*$Ident\(.*\).*\s{/) and
@@ -1947,9 +1704,14 @@
 			ERROR("open brace '{' following $1 go on the same line\n" . $hereprev);
 		}
 
+# ... however, open braces on typedef lines should be avoided.
+		if ($line =~ /^.\s*typedef\s+(enum|union|struct)(?:\s+$Ident\b)?.*[^;]$/) {
+			ERROR("typedefs should be separate from struct declaration\n" . $herecurr);
+		}
+
 # missing space after union, struct or enum definition
 		if ($line =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?(?:\s+$Ident)?[=\{]/) {
-		    WARN("missing space after $1 definition\n" . $herecurr);
+		    ERROR("missing space after $1 definition\n" . $herecurr);
 		}
 
 # check for spacing round square brackets; allowed:
@@ -2190,26 +1952,6 @@
 			}
 		}
 
-# check for multiple assignments
-		if ($line =~ /^.\s*$Lval\s*=\s*$Lval\s*=(?!=)/) {
-			CHK("multiple assignments should be avoided\n" . $herecurr);
-		}
-
-## # check for multiple declarations, allowing for a function declaration
-## # continuation.
-## 		if ($line =~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Ident.*/ &&
-## 		    $line !~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Type\s*$Ident.*/) {
-##
-## 			# Remove any bracketed sections to ensure we do not
-## 			# falsly report the parameters of functions.
-## 			my $ln = $line;
-## 			while ($ln =~ s/\([^\(\)]*\)//g) {
-## 			}
-## 			if ($ln =~ /,/) {
-## 				WARN("declaring multiple variables together should be avoided\n" . $herecurr);
-## 			}
-## 		}
-
 #need space before brace following if, while, etc
 		if (($line =~ /\(.*\){/ && $line !~ /\($Type\){/) ||
 		    $line =~ /do{/) {
@@ -2267,7 +2009,7 @@
 		if ($line =~ /^.\s*return\s*(E[A-Z]*)\s*;/) {
 			my $name = $1;
 			if ($name ne 'EOF' && $name ne 'ERROR') {
-				CHK("return of an errno should typically be -ve (return -$1)\n" . $herecurr);
+				WARN("return of an errno should typically be -ve (return -$1)\n" . $herecurr);
 			}
 		}
 
@@ -2398,22 +2140,6 @@
 			WARN("Whitepspace after \\ makes next lines useless\n" . $herecurr);
 		}
 
-#warn if <asm/foo.h> is #included and <linux/foo.h> is available (uses RAW line)
-		if ($tree && $rawline =~ m{^.\s*\#\s*include\s*\<asm\/(.*)\.h\>}) {
-			my $file = "$1.h";
-			my $checkfile = "include/linux/$file";
-			if (-f "$root/$checkfile" &&
-			    $realfile ne $checkfile &&
-			    $1 !~ /$allowed_asm_includes/)
-			{
-				if ($realfile =~ m{^arch/}) {
-					CHK("Consider using #include <linux/$file> instead of <asm/$file>\n" . $herecurr);
-				} else {
-					WARN("Use #include <linux/$file> instead of <asm/$file>\n" . $herecurr);
-				}
-			}
-		}
-
 # multi-statement macros should be enclosed in a do while loop, grab the
 # first statement and ensure its the whole macro if its not enclosed
 # in a known good container
@@ -2508,15 +2234,6 @@
 			}
 		}
 
-# make sure symbols are always wrapped with VMLINUX_SYMBOL() ...
-# all assignments may have only one of the following with an assignment:
-#	.
-#	ALIGN(...)
-#	VMLINUX_SYMBOL(...)
-		if ($realfile eq 'vmlinux.lds.h' && $line =~ /(?:(?:^|\s)$Ident\s*=|=\s*$Ident(?:\s|$))/) {
-			WARN("vmlinux.lds.h needs VMLINUX_SYMBOL() around C-visible symbols\n" . $herecurr);
-		}
-
 # check for missing bracing round if etc
 		if ($line =~ /(^.*)\bif\b/ && $line !~ /\#\s*if/) {
 			my ($level, $endln, @chunks) =
@@ -2644,64 +2361,23 @@
 			}
 		}
 
-# don't include deprecated include files (uses RAW line)
-		for my $inc (@dep_includes) {
-			if ($rawline =~ m@^.\s*\#\s*include\s*\<$inc>@) {
-				ERROR("Don't use <$inc>: see Documentation/feature-removal-schedule.txt\n" . $herecurr);
-			}
-		}
-
-# don't use deprecated functions
-		for my $func (@dep_functions) {
-			if ($line =~ /\b$func\b/) {
-				ERROR("Don't use $func(): see Documentation/feature-removal-schedule.txt\n" . $herecurr);
-			}
-		}
-
 # no volatiles please
 		my $asm_volatile = qr{\b(__asm__|asm)\s+(__volatile__|volatile)\b};
 		if ($line =~ /\bvolatile\b/ && $line !~ /$asm_volatile/) {
 			WARN("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr);
 		}
 
-# SPIN_LOCK_UNLOCKED & RW_LOCK_UNLOCKED are deprecated
-		if ($line =~ /\b(SPIN_LOCK_UNLOCKED|RW_LOCK_UNLOCKED)/) {
-			ERROR("Use of $1 is deprecated: see Documentation/spinlocks.txt\n" . $herecurr);
-		}
-
 # warn about #if 0
 		if ($line =~ /^.\s*\#\s*if\s+0\b/) {
-			CHK("if this code is redundant consider removing it\n" .
+			WARN("if this code is redundant consider removing it\n" .
 				$herecurr);
 		}
 
-# check for needless kfree() checks
+# check for needless g_free() checks
 		if ($prevline =~ /\bif\s*\(([^\)]*)\)/) {
 			my $expr = $1;
-			if ($line =~ /\bkfree\(\Q$expr\E\);/) {
-				WARN("kfree(NULL) is safe this check is probably not required\n" . $hereprev);
-			}
-		}
-# check for needless usb_free_urb() checks
-		if ($prevline =~ /\bif\s*\(([^\)]*)\)/) {
-			my $expr = $1;
-			if ($line =~ /\busb_free_urb\(\Q$expr\E\);/) {
-				WARN("usb_free_urb(NULL) is safe this check is probably not required\n" . $hereprev);
-			}
-		}
-
-# prefer usleep_range over udelay
-		if ($line =~ /\budelay\s*\(\s*(\w+)\s*\)/) {
-			# ignore udelay's < 10, however
-			if (! (($1 =~ /(\d+)/) && ($1 < 10)) ) {
-				CHK("usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt\n" . $line);
-			}
-		}
-
-# warn about unexpectedly long msleep's
-		if ($line =~ /\bmsleep\s*\((\d+)\);/) {
-			if ($1 < 20) {
-				WARN("msleep < 20ms can sleep for up to 20ms; see Documentation/timers/timers-howto.txt\n" . $line);
+			if ($line =~ /\bg_free\(\Q$expr\E\);/) {
+				WARN("g_free(NULL) is safe this check is probably not required\n" . $hereprev);
 			}
 		}
 
@@ -2716,24 +2392,17 @@
 		if ($line =~ /^.\s*\#\s*(ifdef|ifndef|elif)\s\s+/) {
 			ERROR("exactly one space required after that #$1\n" . $herecurr);
 		}
-
-# check for spinlock_t definitions without a comment.
-		if ($line =~ /^.\s*(struct\s+mutex|spinlock_t)\s+\S+;/ ||
-		    $line =~ /^.\s*(DEFINE_MUTEX)\s*\(/) {
-			my $which = $1;
-			if (!ctx_has_comment($first_line, $linenr)) {
-				CHK("$1 definition without comment\n" . $herecurr);
-			}
-		}
 # check for memory barriers without a comment.
-		if ($line =~ /\b(mb|rmb|wmb|read_barrier_depends|smp_mb|smp_rmb|smp_wmb|smp_read_barrier_depends)\(/) {
+		if ($line =~ /\b(smp_mb|smp_rmb|smp_wmb|smp_read_barrier_depends)\(/) {
 			if (!ctx_has_comment($first_line, $linenr)) {
-				CHK("memory barrier without comment\n" . $herecurr);
+				WARN("memory barrier without comment\n" . $herecurr);
 			}
 		}
 # check of hardware specific defines
-		if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) {
-			CHK("architecture specific defines should be avoided\n" .  $herecurr);
+# we have e.g. CONFIG_LINUX and CONFIG_WIN32 for common cases
+# where they might be necessary.
+		if ($line =~ m@^.\s*\#\s*if.*\b__@) {
+			WARN("architecture specific defines should be avoided\n" .  $herecurr);
 		}
 
 # Check that the storage class is at the beginning of a declaration
@@ -2748,11 +2417,6 @@
 			ERROR("inline keyword should sit between storage class and type\n" . $herecurr);
 		}
 
-# Check for __inline__ and __inline, prefer inline
-		if ($line =~ /\b(__inline__|__inline)\b/) {
-			WARN("plain inline is preferred over $1\n" . $herecurr);
-		}
-
 # check for sizeof(&)
 		if ($line =~ /\bsizeof\s*\(\s*\&/) {
 			WARN("sizeof(& should be avoided\n" . $herecurr);
@@ -2785,98 +2449,55 @@
 			WARN("externs should be avoided in .c files\n" .  $herecurr);
 		}
 
-# checks for new __setup's
-		if ($rawline =~ /\b__setup\("([^"]*)"/) {
-			my $name = $1;
-
-			if (!grep(/$name/, @setup_docs)) {
-				CHK("__setup appears un-documented -- check Documentation/kernel-parameters.txt\n" . $herecurr);
+# check for pointless casting of g_malloc return
+		if ($line =~ /\*\s*\)\s*g_(try)?(m|re)alloc(0?)(_n)?\b/) {
+			if ($2 == 'm') {
+				WARN("unnecessary cast may hide bugs, use g_$1new$3 instead\n" . $herecurr);
+			} else {
+				WARN("unnecessary cast may hide bugs, use g_$1renew$3 instead\n" . $herecurr);
 			}
 		}
 
-# check for pointless casting of kmalloc return
-		if ($line =~ /\*\s*\)\s*k[czm]alloc\b/) {
-			WARN("unnecessary cast may hide bugs, see http://c-faq.com/malloc/mallocnocast.html\n" . $herecurr);
-		}
-
 # check for gcc specific __FUNCTION__
 		if ($line =~ /__FUNCTION__/) {
 			WARN("__func__ should be used instead of gcc specific __FUNCTION__\n"  . $herecurr);
 		}
 
-# check for semaphores used as mutexes
-		if ($line =~ /^.\s*(DECLARE_MUTEX|init_MUTEX)\s*\(/) {
-			WARN("mutexes are preferred for single holder semaphores\n" . $herecurr);
+# recommend qemu_strto* over strto*
+		if ($line =~ /\b(strto.*?)\s*\(/) {
+			WARN("consider using qemu_$1 in preference to $1\n" . $herecurr);
 		}
-# check for semaphores used as mutexes
-		if ($line =~ /^.\s*init_MUTEX_LOCKED\s*\(/) {
-			WARN("consider using a completion\n" . $herecurr);
-
-		}
-# recommend strict_strto* over simple_strto*
-		if ($line =~ /\bsimple_(strto.*?)\s*\(/) {
-			WARN("consider using strict_$1 in preference to simple_$1\n" . $herecurr);
-		}
-# check for __initcall(), use device_initcall() explicitly please
-		if ($line =~ /^.\s*__initcall\s*\(/) {
-			WARN("please use device_initcall() instead of __initcall()\n" . $herecurr);
+# check for module_init(), use category-specific init macros explicitly please
+		if ($line =~ /^module_init\s*\(/) {
+			WARN("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr);
 		}
 # check for various ops structs, ensure they are const.
-		my $struct_ops = qr{acpi_dock_ops|
-				address_space_operations|
-				backlight_ops|
-				block_device_operations|
-				dentry_operations|
-				dev_pm_ops|
-				dma_map_ops|
-				extent_io_ops|
-				file_lock_operations|
-				file_operations|
-				hv_ops|
-				ide_dma_ops|
-				intel_dvo_dev_ops|
-				item_operations|
-				iwl_ops|
-				kgdb_arch|
-				kgdb_io|
-				kset_uevent_ops|
-				lock_manager_operations|
-				microcode_ops|
-				mtrr_ops|
-				neigh_ops|
-				nlmsvc_binding|
-				pci_raw_ops|
-				pipe_buf_operations|
-				platform_hibernation_ops|
-				platform_suspend_ops|
-				proto_ops|
-				rpc_pipe_ops|
-				seq_operations|
-				snd_ac97_build_ops|
-				soc_pcmcia_socket_ops|
-				stacktrace_ops|
-				sysfs_ops|
-				tty_operations|
-				usb_mon_operations|
-				wd_ops}x;
+		my $struct_ops = qr{AIOCBInfo|
+				BdrvActionOps|
+				BlockDevOps|
+				BlockJobDriver|
+				DisplayChangeListenerOps|
+				GraphicHwOps|
+				IDEDMAOps|
+				KVMCapabilityInfo|
+				MemoryRegionIOMMUOps|
+				MemoryRegionOps|
+				MemoryRegionPortio|
+				QEMUFileOps|
+				SCSIBusInfo|
+				SCSIReqOps|
+				Spice[A-Z][a-zA-Z0-9]*Interface|
+				TPMDriverOps|
+				USBDesc[A-Z][a-zA-Z0-9]*|
+				VhostOps|
+				VMStateDescription|
+				VMStateInfo}x;
 		if ($line !~ /\bconst\b/ &&
-		    $line =~ /\bstruct\s+($struct_ops)\b/) {
+		    $line =~ /\b($struct_ops)\b/) {
 			WARN("struct $1 should normally be const\n" .
 				$herecurr);
 		}
 
-# use of NR_CPUS is usually wrong
-# ignore definitions of NR_CPUS and usage to define arrays as likely right
-		if ($line =~ /\bNR_CPUS\b/ &&
-		    $line !~ /^.\s*\s*#\s*if\b.*\bNR_CPUS\b/ &&
-		    $line !~ /^.\s*\s*#\s*define\b.*\bNR_CPUS\b/ &&
-		    $line !~ /^.\s*$Declare\s.*\[[^\]]*NR_CPUS[^\]]*\]/ &&
-		    $line !~ /\[[^\]]*\.\.\.[^\]]*NR_CPUS[^\]]*\]/ &&
-		    $line !~ /\[[^\]]*NR_CPUS[^\]]*\.\.\.[^\]]*\]/)
-		{
-			WARN("usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc\n" . $herecurr);
-		}
-
 # check for %L{u,d,i} in strings
 		my $string;
 		while ($line =~ /(?:^|")([X\t]*)(?:"|$)/g) {
@@ -2888,25 +2509,6 @@
 			}
 		}
 
-# whine mightly about in_atomic
-		if ($line =~ /\bin_atomic\s*\(/) {
-			if ($realfile =~ m@^drivers/@) {
-				ERROR("do not use in_atomic in drivers\n" . $herecurr);
-			} elsif ($realfile !~ m@^kernel/@) {
-				WARN("use of in_atomic() is incorrect outside core kernel code\n" . $herecurr);
-			}
-		}
-
-# check for lockdep_set_novalidate_class
-		if ($line =~ /^.\s*lockdep_set_novalidate_class\s*\(/ ||
-		    $line =~ /__lockdep_no_validate__\s*\)/ ) {
-			if ($realfile !~ m@^kernel/lockdep@ &&
-			    $realfile !~ m@^include/linux/lockdep@ &&
-			    $realfile !~ m@^drivers/base/core@) {
-				ERROR("lockdep_no_validate class is reserved for device->mutex.\n" . $herecurr);
-			}
-		}
-
 # QEMU specific tests
 		if ($rawline =~ /\b(?:Qemu|QEmu)\b/) {
 			WARN("use QEMU instead of Qemu or QEmu\n" . $herecurr);
diff --git a/scripts/cocci-macro-file.h b/scripts/cocci-macro-file.h
new file mode 100644
index 0000000..eceb4be
--- /dev/null
+++ b/scripts/cocci-macro-file.h
@@ -0,0 +1,119 @@
+/* Macro file for Coccinelle
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or, at your
+ * option, any later version.  See the COPYING file in the top-level directory.
+ */
+
+/* Coccinelle only does limited parsing of headers, and chokes on some idioms
+ * defined in compiler.h and queue.h.  Macros that Coccinelle must know about
+ * in order to parse .c files must be in a separate macro file---which is
+ * exactly what you're staring at now.
+ *
+ * To use this file, add the "--macro-file scripts/cocci-macro-file.h" to the
+ * Coccinelle command line.
+ */
+
+/* From qemu/compiler.h */
+#define QEMU_GNUC_PREREQ(maj, min) 1
+#define QEMU_NORETURN __attribute__ ((__noreturn__))
+#define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#define QEMU_SENTINEL __attribute__((sentinel))
+#define QEMU_ARTIFICIAL __attribute__((always_inline, artificial))
+#define QEMU_PACKED __attribute__((gcc_struct, packed))
+
+#define cat(x,y) x ## y
+#define cat2(x,y) cat(x,y)
+#define QEMU_BUILD_BUG_ON(x) \
+    typedef char cat2(qemu_build_bug_on__,__LINE__)[(x)?-1:1] __attribute__((unused));
+
+#define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m)))
+
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s)	tostring(s)
+#define tostring(s)	#s
+
+#define typeof_field(type, field) typeof(((type *)0)->field)
+#define type_check(t1,t2) ((t1*)0 - (t2*)0)
+
+/* From qemu/queue.h */
+
+#define QLIST_HEAD(name, type)                                          \
+struct name {                                                           \
+        struct type *lh_first;  /* first element */                     \
+}
+
+#define QLIST_HEAD_INITIALIZER(head)                                    \
+        { NULL }
+
+#define QLIST_ENTRY(type)                                               \
+struct {                                                                \
+        struct type *le_next;   /* next element */                      \
+        struct type **le_prev;  /* address of previous next element */  \
+}
+
+/*
+ * Singly-linked List definitions.
+ */
+#define QSLIST_HEAD(name, type)                                          \
+struct name {                                                           \
+        struct type *slh_first; /* first element */                     \
+}
+
+#define QSLIST_HEAD_INITIALIZER(head)                                    \
+        { NULL }
+
+#define QSLIST_ENTRY(type)                                               \
+struct {                                                                \
+        struct type *sle_next;  /* next element */                      \
+}
+
+/*
+ * Simple queue definitions.
+ */
+#define QSIMPLEQ_HEAD(name, type)                                       \
+struct name {                                                           \
+    struct type *sqh_first;    /* first element */                      \
+    struct type **sqh_last;    /* addr of last next element */          \
+}
+
+#define QSIMPLEQ_HEAD_INITIALIZER(head)                                 \
+    { NULL, &(head).sqh_first }
+
+#define QSIMPLEQ_ENTRY(type)                                            \
+struct {                                                                \
+    struct type *sqe_next;    /* next element */                        \
+}
+
+/*
+ * Tail queue definitions.
+ */
+#define Q_TAILQ_HEAD(name, type, qual)                                  \
+struct name {                                                           \
+        qual type *tqh_first;           /* first element */             \
+        qual type *qual *tqh_last;      /* addr of last next element */ \
+}
+#define QTAILQ_HEAD(name, type)                                         \
+struct name {                                                           \
+        type *tqh_first;      /* first element */                       \
+        type **tqh_last;      /* addr of last next element */           \
+}
+
+#define QTAILQ_HEAD_INITIALIZER(head)                                   \
+        { NULL, &(head).tqh_first }
+
+#define Q_TAILQ_ENTRY(type, qual)                                       \
+struct {                                                                \
+        qual type *tqe_next;            /* next element */              \
+        qual type *qual *tqe_prev;      /* address of previous next element */\
+}
+#define QTAILQ_ENTRY(type)                                              \
+struct {                                                                \
+        type *tqe_next;       /* next element */                        \
+        type **tqe_prev;      /* address of previous next element */    \
+}
diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py
index dc8e44a..08796ff 100644
--- a/scripts/dump-guest-memory.py
+++ b/scripts/dump-guest-memory.py
@@ -118,7 +118,7 @@
     def qemu_get_ram_block(self, ram_addr):
         ram_blocks = gdb.parse_and_eval("ram_list.blocks")
         for block in self.qlist_foreach(ram_blocks, "next"):
-            if (ram_addr - block["offset"] < block["length"]):
+            if (ram_addr - block["offset"] < block["used_length"]):
                 return block
         raise gdb.GdbError("Bad ram offset %x" % ram_addr)
 
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 817d824..88fa073 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -944,7 +944,9 @@
     raw = code % kwds
     if indent_level:
         indent = genindent(indent_level)
-        raw = re.subn("^.", indent + r'\g<0>', raw, 0, re.MULTILINE)
+        # re.subn() lacks flags support before Python 2.7, use re.compile()
+        raw = re.subn(re.compile("^.", re.MULTILINE),
+                      indent + r'\g<0>', raw)
         raw = raw[0]
     return re.sub(re.escape(eatspace) + ' *', '', raw)
 
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index b9068c9..1b80516 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -222,8 +222,10 @@
             };
             uint64_t ttbr1_el[4];
         };
+        uint64_t vttbr_el2; /* Virtualization Translation Table Base.  */
         /* MMU translation table base control. */
         TCR tcr_el[4];
+        TCR vtcr_el2; /* Virtualization Translation Control.  */
         uint32_t c2_data; /* MPU data cacheable bits.  */
         uint32_t c2_insn; /* MPU instruction cacheable bits.  */
         union { /* MMU domain access control register
@@ -383,6 +385,8 @@
          */
         uint64_t c15_ccnt;
         uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
+        uint64_t vpidr_el2; /* Virtualization Processor ID Register */
+        uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
     } cp15;
 
     struct {
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 2c6ec9d..65b9ff5 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -325,6 +325,34 @@
     g_list_free(keys);
 }
 
+/*
+ * Some registers are not accessible if EL3.NS=0 and EL3 is using AArch32 but
+ * they are accessible when EL3 is using AArch64 regardless of EL3.NS.
+ *
+ * access_el3_aa32ns: Used to check AArch32 register views.
+ * access_el3_aa32ns_aa64any: Used to check both AArch32/64 register views.
+ */
+static CPAccessResult access_el3_aa32ns(CPUARMState *env,
+                                        const ARMCPRegInfo *ri)
+{
+    bool secure = arm_is_secure_below_el3(env);
+
+    assert(!arm_el_is_aa64(env, 3));
+    if (secure) {
+        return CP_ACCESS_TRAP_UNCATEGORIZED;
+    }
+    return CP_ACCESS_OK;
+}
+
+static CPAccessResult access_el3_aa32ns_aa64any(CPUARMState *env,
+                                                const ARMCPRegInfo *ri)
+{
+    if (!arm_el_is_aa64(env, 3)) {
+        return access_el3_aa32ns(env, ri);
+    }
+    return CP_ACCESS_OK;
+}
+
 static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
@@ -2185,6 +2213,20 @@
     raw_write(env, ri, value);
 }
 
+static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                        uint64_t value)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
+
+    /* Accesses to VTTBR may change the VMID so we must flush the TLB.  */
+    if (raw_read(env, ri) != value) {
+        tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0,
+                            ARMMMUIdx_S2NS, -1);
+        raw_write(env, ri, value);
+    }
+}
+
 static const ARMCPRegInfo vmsa_pmsa_cp_reginfo[] = {
     { .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .type = ARM_CP_ALIAS,
@@ -2403,7 +2445,19 @@
     REGINFO_SENTINEL
 };
 
-static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+static uint64_t midr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    unsigned int cur_el = arm_current_el(env);
+    bool secure = arm_is_secure(env);
+
+    if (arm_feature(&cpu->env, ARM_FEATURE_EL2) && !secure && cur_el == 1) {
+        return env->cp15.vpidr_el2;
+    }
+    return raw_read(env, ri);
+}
+
+static uint64_t mpidr_read_val(CPUARMState *env)
 {
     ARMCPU *cpu = ARM_CPU(arm_env_get_cpu(env));
     uint64_t mpidr = cpu->mp_affinity;
@@ -2421,6 +2475,17 @@
     return mpidr;
 }
 
+static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    unsigned int cur_el = arm_current_el(env);
+    bool secure = arm_is_secure(env);
+
+    if (arm_feature(env, ARM_FEATURE_EL2) && !secure && cur_el == 1) {
+        return env->cp15.vmpidr_el2;
+    }
+    return mpidr_read_val(env);
+}
+
 static const ARMCPRegInfo mpidr_cp_reginfo[] = {
     { .name = "MPIDR", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
@@ -3112,6 +3177,17 @@
     { .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2,
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "VTCR_EL2", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+      .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "VTTBR", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 6, .crm = 2,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
+      .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+    { .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0,
+      .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
@@ -3246,6 +3322,24 @@
       .access = PL2_RW, .writefn = vmsa_tcr_el1_write,
       .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write,
       .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[2]) },
+    { .name = "VTCR", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
+      .fieldoffset = offsetof(CPUARMState, cp15.vtcr_el2) },
+    { .name = "VTCR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
+      .access = PL2_RW, .type = ARM_CP_ALIAS,
+      .fieldoffset = offsetof(CPUARMState, cp15.vtcr_el2) },
+    { .name = "VTTBR", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 6, .crm = 2,
+      .type = ARM_CP_64BIT | ARM_CP_ALIAS,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
+      .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2),
+      .writefn = vttbr_write },
+    { .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0,
+      .access = PL2_RW, .writefn = vttbr_write,
+      .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2) },
     { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .raw_writefn = raw_write, .writefn = sctlr_write,
@@ -4050,6 +4144,30 @@
         define_arm_cp_regs(cpu, v8_cp_reginfo);
     }
     if (arm_feature(env, ARM_FEATURE_EL2)) {
+        uint64_t vmpidr_def = mpidr_read_val(env);
+        ARMCPRegInfo vpidr_regs[] = {
+            { .name = "VPIDR", .state = ARM_CP_STATE_AA32,
+              .cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
+              .access = PL2_RW, .accessfn = access_el3_aa32ns,
+              .resetvalue = cpu->midr,
+              .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
+            { .name = "VPIDR_EL2", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
+              .access = PL2_RW, .resetvalue = cpu->midr,
+              .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
+            { .name = "VMPIDR", .state = ARM_CP_STATE_AA32,
+              .cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
+              .access = PL2_RW, .accessfn = access_el3_aa32ns,
+              .resetvalue = vmpidr_def,
+              .fieldoffset = offsetof(CPUARMState, cp15.vmpidr_el2) },
+            { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
+              .access = PL2_RW,
+              .resetvalue = vmpidr_def,
+              .fieldoffset = offsetof(CPUARMState, cp15.vmpidr_el2) },
+            REGINFO_SENTINEL
+        };
+        define_arm_cp_regs(cpu, vpidr_regs);
         define_arm_cp_regs(cpu, el2_cp_reginfo);
         /* RVBAR_EL2 is only implemented if EL2 is the highest EL */
         if (!arm_feature(env, ARM_FEATURE_EL3)) {
@@ -4065,6 +4183,23 @@
          * register the no_el2 reginfos.
          */
         if (arm_feature(env, ARM_FEATURE_EL3)) {
+            /* When EL3 exists but not EL2, VPIDR and VMPIDR take the value
+             * of MIDR_EL1 and MPIDR_EL1.
+             */
+            ARMCPRegInfo vpidr_regs[] = {
+                { .name = "VPIDR_EL2", .state = ARM_CP_STATE_BOTH,
+                  .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
+                  .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+                  .type = ARM_CP_CONST, .resetvalue = cpu->midr,
+                  .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
+                { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_BOTH,
+                  .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
+                  .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+                  .type = ARM_CP_NO_RAW,
+                  .writefn = arm_cp_write_ignore, .readfn = mpidr_read },
+                REGINFO_SENTINEL
+            };
+            define_arm_cp_regs(cpu, vpidr_regs);
             define_arm_cp_regs(cpu, el3_no_el2_cp_reginfo);
         }
     }
@@ -4142,6 +4277,7 @@
               .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = CP_ANY,
               .access = PL1_R, .resetvalue = cpu->midr,
               .writefn = arm_cp_write_ignore, .raw_writefn = raw_write,
+              .readfn = midr_read,
               .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
               .type = ARM_CP_OVERRIDE },
             /* crn = 0 op1 = 0 crm = 3..7 : currently unassigned; we RAZ. */
@@ -4165,7 +4301,9 @@
         ARMCPRegInfo id_v8_midr_cp_reginfo[] = {
             { .name = "MIDR_EL1", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 0,
-              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->midr },
+              .access = PL1_R, .type = ARM_CP_NO_RAW, .resetvalue = cpu->midr,
+              .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
+              .readfn = midr_read },
             /* crn = 0 op1 = 0 crm = 0 op2 = 4,7 : AArch32 aliases of MIDR */
             { .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST,
               .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 4,
@@ -5741,8 +5879,7 @@
 static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
 {
     if (mmu_idx == ARMMMUIdx_S2NS) {
-        /* TODO: return VTCR_EL2 */
-        g_assert_not_reached();
+        return &env->cp15.vtcr_el2;
     }
     return &env->cp15.tcr_el[regime_el(env, mmu_idx)];
 }
@@ -5752,8 +5889,7 @@
                                    int ttbrn)
 {
     if (mmu_idx == ARMMMUIdx_S2NS) {
-        /* TODO: return VTTBR_EL2 */
-        g_assert_not_reached();
+        return env->cp15.vttbr_el2;
     }
     if (ttbrn == 0) {
         return env->cp15.ttbr0_el[regime_el(env, mmu_idx)];
@@ -6275,7 +6411,7 @@
     /* Read an LPAE long-descriptor translation table. */
     MMUFaultType fault_type = translation_fault;
     uint32_t level = 1;
-    uint32_t epd;
+    uint32_t epd = 0;
     int32_t tsz;
     uint32_t tg;
     uint64_t ttbr;
@@ -6301,7 +6437,9 @@
     if (arm_el_is_aa64(env, el)) {
         va_size = 64;
         if (el > 1) {
-            tbi = extract64(tcr->raw_tcr, 20, 1);
+            if (mmu_idx != ARMMMUIdx_S2NS) {
+                tbi = extract64(tcr->raw_tcr, 20, 1);
+            }
         } else {
             if (extract64(address, 55, 1)) {
                 tbi = extract64(tcr->raw_tcr, 38, 1);
@@ -6367,7 +6505,9 @@
      */
     if (ttbr_select == 0) {
         ttbr = regime_ttbr(env, mmu_idx, 0);
-        epd = extract32(tcr->raw_tcr, 7, 1);
+        if (el < 2) {
+            epd = extract32(tcr->raw_tcr, 7, 1);
+        }
         tsz = t0sz;
 
         tg = extract32(tcr->raw_tcr, 14, 2);
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index faece2c..ec0936c 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -40,16 +40,9 @@
 
 static TCGv_i64 cpu_X[32];
 static TCGv_i64 cpu_pc;
-static TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
 
 /* Load/store exclusive handling */
-static TCGv_i64 cpu_exclusive_addr;
-static TCGv_i64 cpu_exclusive_val;
 static TCGv_i64 cpu_exclusive_high;
-#ifdef CONFIG_USER_ONLY
-static TCGv_i64 cpu_exclusive_test;
-static TCGv_i32 cpu_exclusive_info;
-#endif
 
 static const char *regnames[] = {
     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
@@ -105,23 +98,8 @@
                                           regnames[i]);
     }
 
-    cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
-    cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
-    cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
-    cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
-
-    cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
-    cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_val), "exclusive_val");
     cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
         offsetof(CPUARMState, exclusive_high), "exclusive_high");
-#ifdef CONFIG_USER_ONLY
-    cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_test), "exclusive_test");
-    cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_info), "exclusive_info");
-#endif
 }
 
 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
@@ -189,6 +167,31 @@
     tcg_gen_movi_i64(cpu_pc, val);
 }
 
+typedef struct DisasCompare64 {
+    TCGCond cond;
+    TCGv_i64 value;
+} DisasCompare64;
+
+static void a64_test_cc(DisasCompare64 *c64, int cc)
+{
+    DisasCompare c32;
+
+    arm_test_cc(&c32, cc);
+
+    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
+       * properly.  The NE/EQ comparisons are also fine with this choice.  */
+    c64->cond = c32.cond;
+    c64->value = tcg_temp_new_i64();
+    tcg_gen_ext_i32_i64(c64->value, c32.value);
+
+    arm_free_cc(&c32);
+}
+
+static void a64_free_cc(DisasCompare64 *c64)
+{
+    tcg_temp_free_i64(c64->value);
+}
+
 static void gen_exception_internal(int excp)
 {
     TCGv_i32 tcg_excp = tcg_const_i32(excp);
@@ -526,13 +529,8 @@
  */
 static inline void gen_set_NZ64(TCGv_i64 result)
 {
-    TCGv_i64 flag = tcg_temp_new_i64();
-
-    tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0);
-    tcg_gen_extrl_i64_i32(cpu_ZF, flag);
-    tcg_gen_shri_i64(flag, result, 32);
-    tcg_gen_extrl_i64_i32(cpu_NF, flag);
-    tcg_temp_free_i64(flag);
+    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
+    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 }
 
 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
@@ -542,7 +540,7 @@
         gen_set_NZ64(result);
     } else {
         tcg_gen_extrl_i64_i32(cpu_ZF, result);
-        tcg_gen_extrl_i64_i32(cpu_NF, result);
+        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
     }
     tcg_gen_movi_i32(cpu_CF, 0);
     tcg_gen_movi_i32(cpu_VF, 0);
@@ -568,8 +566,7 @@
         tcg_gen_xor_i64(tmp, t0, t1);
         tcg_gen_andc_i64(flag, flag, tmp);
         tcg_temp_free_i64(tmp);
-        tcg_gen_shri_i64(flag, flag, 32);
-        tcg_gen_extrl_i64_i32(cpu_VF, flag);
+        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 
         tcg_gen_mov_i64(dest, result);
         tcg_temp_free_i64(result);
@@ -617,8 +614,7 @@
         tcg_gen_xor_i64(tmp, t0, t1);
         tcg_gen_and_i64(flag, flag, tmp);
         tcg_temp_free_i64(tmp);
-        tcg_gen_shri_i64(flag, flag, 32);
-        tcg_gen_extrl_i64_i32(cpu_VF, flag);
+        tcg_gen_extrh_i64_i32(cpu_VF, flag);
         tcg_gen_mov_i64(dest, result);
         tcg_temp_free_i64(flag);
         tcg_temp_free_i64(result);
@@ -677,8 +673,7 @@
         tcg_gen_xor_i64(vf_64, result, t0);
         tcg_gen_xor_i64(tmp, t0, t1);
         tcg_gen_andc_i64(vf_64, vf_64, tmp);
-        tcg_gen_shri_i64(vf_64, vf_64, 32);
-        tcg_gen_extrl_i64_i32(cpu_VF, vf_64);
+        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 
         tcg_gen_mov_i64(dest, result);
 
@@ -3012,9 +3007,51 @@
     }
 
     tcg_rd = cpu_reg(s, rd);
-    tcg_tmp = read_cpu_reg(s, rn, sf);
 
-    /* OPTME: probably worth recognizing common cases of ext{8,16,32}{u,s} */
+    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
+       to be smaller than bitsize, we'll never reference data outside the
+       low 32-bits anyway.  */
+    tcg_tmp = read_cpu_reg(s, rn, 1);
+
+    /* Recognize the common aliases.  */
+    if (opc == 0) { /* SBFM */
+        if (ri == 0) {
+            if (si == 7) { /* SXTB */
+                tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
+                goto done;
+            } else if (si == 15) { /* SXTH */
+                tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
+                goto done;
+            } else if (si == 31) { /* SXTW */
+                tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
+                goto done;
+            }
+        }
+        if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
+            if (si == 31) {
+                tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
+            }
+            tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
+            goto done;
+        }
+    } else if (opc == 2) { /* UBFM */
+        if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
+            tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
+            return;
+        }
+        if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
+            if (si == 31) {
+                tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
+            }
+            tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
+            return;
+        }
+        if (si + 1 == ri && si != bitsize - 1) { /* LSL */
+            int shift = bitsize - 1 - si;
+            tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
+            goto done;
+        }
+    }
 
     if (opc != 1) { /* SBFM or UBFM */
         tcg_gen_movi_i64(tcg_rd, 0);
@@ -3039,6 +3076,7 @@
         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
     }
 
+ done:
     if (!sf) { /* zero extend final result */
         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
     }
@@ -3071,17 +3109,7 @@
 
         tcg_rd = cpu_reg(s, rd);
 
-        if (imm) {
-            /* OPTME: we can special case rm==rn as a rotate */
-            tcg_rm = read_cpu_reg(s, rm, sf);
-            tcg_rn = read_cpu_reg(s, rn, sf);
-            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
-            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
-            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
-            if (!sf) {
-                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
-            }
-        } else {
+        if (unlikely(imm == 0)) {
             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
              * so an extract from bit 0 is a special case.
              */
@@ -3090,8 +3118,27 @@
             } else {
                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
             }
+        } else if (rm == rn) { /* ROR */
+            tcg_rm = cpu_reg(s, rm);
+            if (sf) {
+                tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
+            } else {
+                TCGv_i32 tmp = tcg_temp_new_i32();
+                tcg_gen_extrl_i64_i32(tmp, tcg_rm);
+                tcg_gen_rotri_i32(tmp, tmp, imm);
+                tcg_gen_extu_i32_i64(tcg_rd, tmp);
+                tcg_temp_free_i32(tmp);
+            }
+        } else {
+            tcg_rm = read_cpu_reg(s, rm, sf);
+            tcg_rn = read_cpu_reg(s, rn, sf);
+            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
+            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
+            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
+            if (!sf) {
+                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+            }
         }
-
     }
 }
 
@@ -3567,8 +3614,9 @@
 static void disas_cc(DisasContext *s, uint32_t insn)
 {
     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
-    TCGLabel *label_continue = NULL;
+    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
+    DisasCompare c;
 
     if (!extract32(insn, 29, 1)) {
         unallocated_encoding(s);
@@ -3586,19 +3634,13 @@
     rn = extract32(insn, 5, 5);
     nzcv = extract32(insn, 0, 4);
 
-    if (cond < 0x0e) { /* not always */
-        TCGLabel *label_match = gen_new_label();
-        label_continue = gen_new_label();
-        arm_gen_test_cc(cond, label_match);
-        /* nomatch: */
-        tcg_tmp = tcg_temp_new_i64();
-        tcg_gen_movi_i64(tcg_tmp, nzcv << 28);
-        gen_set_nzcv(tcg_tmp);
-        tcg_temp_free_i64(tcg_tmp);
-        tcg_gen_br(label_continue);
-        gen_set_label(label_match);
-    }
-    /* match, or condition is always */
+    /* Set T0 = !COND.  */
+    tcg_t0 = tcg_temp_new_i32();
+    arm_test_cc(&c, cond);
+    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
+    arm_free_cc(&c);
+
+    /* Load the arguments for the new comparison.  */
     if (is_imm) {
         tcg_y = new_tmp_a64(s);
         tcg_gen_movi_i64(tcg_y, y);
@@ -3607,6 +3649,7 @@
     }
     tcg_rn = cpu_reg(s, rn);
 
+    /* Set the flags for the new comparison.  */
     tcg_tmp = tcg_temp_new_i64();
     if (op) {
         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
@@ -3615,9 +3658,55 @@
     }
     tcg_temp_free_i64(tcg_tmp);
 
-    if (cond < 0x0e) { /* continue */
-        gen_set_label(label_continue);
+    /* If COND was false, force the flags to #nzcv.  Compute two masks
+     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
+     * For tcg hosts that support ANDC, we can make do with just T1.
+     * In either case, allow the tcg optimizer to delete any unused mask.
+     */
+    tcg_t1 = tcg_temp_new_i32();
+    tcg_t2 = tcg_temp_new_i32();
+    tcg_gen_neg_i32(tcg_t1, tcg_t0);
+    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
+
+    if (nzcv & 8) { /* N */
+        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
+    } else {
+        if (TCG_TARGET_HAS_andc_i32) {
+            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
+        } else {
+            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
+        }
     }
+    if (nzcv & 4) { /* Z */
+        if (TCG_TARGET_HAS_andc_i32) {
+            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
+        } else {
+            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
+        }
+    } else {
+        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
+    }
+    if (nzcv & 2) { /* C */
+        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
+    } else {
+        if (TCG_TARGET_HAS_andc_i32) {
+            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
+        } else {
+            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
+        }
+    }
+    if (nzcv & 1) { /* V */
+        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
+    } else {
+        if (TCG_TARGET_HAS_andc_i32) {
+            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
+        } else {
+            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
+        }
+    }
+    tcg_temp_free_i32(tcg_t0);
+    tcg_temp_free_i32(tcg_t1);
+    tcg_temp_free_i32(tcg_t2);
 }
 
 /* C3.5.6 Conditional select
@@ -3629,7 +3718,8 @@
 static void disas_cond_select(DisasContext *s, uint32_t insn)
 {
     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
-    TCGv_i64 tcg_rd, tcg_src;
+    TCGv_i64 tcg_rd, zero;
+    DisasCompare64 c;
 
     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
         /* S == 1 or op2<1> == 1 */
@@ -3644,48 +3734,35 @@
     rn = extract32(insn, 5, 5);
     rd = extract32(insn, 0, 5);
 
-    if (rd == 31) {
-        /* silly no-op write; until we use movcond we must special-case
-         * this to avoid a dead temporary across basic blocks.
-         */
-        return;
-    }
-
     tcg_rd = cpu_reg(s, rd);
 
-    if (cond >= 0x0e) { /* condition "always" */
-        tcg_src = read_cpu_reg(s, rn, sf);
-        tcg_gen_mov_i64(tcg_rd, tcg_src);
+    a64_test_cc(&c, cond);
+    zero = tcg_const_i64(0);
+
+    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
+        /* CSET & CSETM.  */
+        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
+        if (else_inv) {
+            tcg_gen_neg_i64(tcg_rd, tcg_rd);
+        }
     } else {
-        /* OPTME: we could use movcond here, at the cost of duplicating
-         * a lot of the arm_gen_test_cc() logic.
-         */
-        TCGLabel *label_match = gen_new_label();
-        TCGLabel *label_continue = gen_new_label();
-
-        arm_gen_test_cc(cond, label_match);
-        /* nomatch: */
-        tcg_src = cpu_reg(s, rm);
-
+        TCGv_i64 t_true = cpu_reg(s, rn);
+        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
         if (else_inv && else_inc) {
-            tcg_gen_neg_i64(tcg_rd, tcg_src);
+            tcg_gen_neg_i64(t_false, t_false);
         } else if (else_inv) {
-            tcg_gen_not_i64(tcg_rd, tcg_src);
+            tcg_gen_not_i64(t_false, t_false);
         } else if (else_inc) {
-            tcg_gen_addi_i64(tcg_rd, tcg_src, 1);
-        } else {
-            tcg_gen_mov_i64(tcg_rd, tcg_src);
+            tcg_gen_addi_i64(t_false, t_false, 1);
         }
-        if (!sf) {
-            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
-        }
-        tcg_gen_br(label_continue);
-        /* match: */
-        gen_set_label(label_match);
-        tcg_src = read_cpu_reg(s, rn, sf);
-        tcg_gen_mov_i64(tcg_rd, tcg_src);
-        /* continue: */
-        gen_set_label(label_continue);
+        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
+    }
+
+    tcg_temp_free_i64(zero);
+    a64_free_cc(&c);
+
+    if (!sf) {
+        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
     }
 }
 
@@ -4172,20 +4249,6 @@
     }
 }
 
-/* copy src FP register to dst FP register; type specifies single or double */
-static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
-{
-    if (type) {
-        TCGv_i64 v = read_fp_dreg(s, src);
-        write_fp_dreg(s, dst, v);
-        tcg_temp_free_i64(v);
-    } else {
-        TCGv_i32 v = read_fp_sreg(s, src);
-        write_fp_sreg(s, dst, v);
-        tcg_temp_free_i32(v);
-    }
-}
-
 /* C3.6.24 Floating point conditional select
  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
  * +---+---+---+-----------+------+---+------+------+-----+------+------+
@@ -4195,7 +4258,8 @@
 static void disas_fp_csel(DisasContext *s, uint32_t insn)
 {
     unsigned int mos, type, rm, cond, rn, rd;
-    TCGLabel *label_continue = NULL;
+    TCGv_i64 t_true, t_false, t_zero;
+    DisasCompare64 c;
 
     mos = extract32(insn, 29, 3);
     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
@@ -4213,21 +4277,23 @@
         return;
     }
 
-    if (cond < 0x0e) { /* not always */
-        TCGLabel *label_match = gen_new_label();
-        label_continue = gen_new_label();
-        arm_gen_test_cc(cond, label_match);
-        /* nomatch: */
-        gen_mov_fp2fp(s, type, rd, rm);
-        tcg_gen_br(label_continue);
-        gen_set_label(label_match);
-    }
+    /* Zero extend sreg inputs to 64 bits now.  */
+    t_true = tcg_temp_new_i64();
+    t_false = tcg_temp_new_i64();
+    read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
+    read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
 
-    gen_mov_fp2fp(s, type, rd, rn);
+    a64_test_cc(&c, cond);
+    t_zero = tcg_const_i64(0);
+    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
+    tcg_temp_free_i64(t_zero);
+    tcg_temp_free_i64(t_false);
+    a64_free_cc(&c);
 
-    if (cond < 0x0e) { /* continue */
-        gen_set_label(label_continue);
-    }
+    /* Note that sregs write back zeros to the high bits,
+       and we've already done the zero-extension.  */
+    write_fp_dreg(s, rd, t_true);
+    tcg_temp_free_i64(t_true);
 }
 
 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
@@ -7701,10 +7767,8 @@
             } else {
                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
-                tcg_gen_extrl_i64_i32(tcg_lo, tcg_op);
+                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
-                tcg_gen_shri_i64(tcg_op, tcg_op, 32);
-                tcg_gen_extrl_i64_i32(tcg_hi, tcg_op);
                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
                 tcg_temp_free_i32(tcg_lo);
@@ -8610,16 +8674,10 @@
     }
 }
 
-static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in)
-{
-    tcg_gen_shri_i64(in, in, 32);
-    tcg_gen_extrl_i64_i32(res, in);
-}
-
 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
 {
     tcg_gen_addi_i64(in, in, 1U << 31);
-    do_narrow_high_u32(res, in);
+    tcg_gen_extrh_i64_i32(res, in);
 }
 
 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
@@ -8638,7 +8696,7 @@
               gen_helper_neon_narrow_round_high_u8 },
             { gen_helper_neon_narrow_high_u16,
               gen_helper_neon_narrow_round_high_u16 },
-            { do_narrow_high_u32, do_narrow_round_high_u32 },
+            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
         };
         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
 
diff --git a/target-arm/translate.c b/target-arm/translate.c
index ae70577..84a21ac 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -64,12 +64,12 @@
 /* We reuse the same 64-bit temporaries for efficiency.  */
 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
 static TCGv_i32 cpu_R[16];
-static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
-static TCGv_i64 cpu_exclusive_addr;
-static TCGv_i64 cpu_exclusive_val;
+TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
+TCGv_i64 cpu_exclusive_addr;
+TCGv_i64 cpu_exclusive_val;
 #ifdef CONFIG_USER_ONLY
-static TCGv_i64 cpu_exclusive_test;
-static TCGv_i32 cpu_exclusive_info;
+TCGv_i64 cpu_exclusive_test;
+TCGv_i32 cpu_exclusive_info;
 #endif
 
 /* FIXME:  These should be removed.  */
@@ -738,81 +738,113 @@
 #undef PAS_OP
 
 /*
- * generate a conditional branch based on ARM condition code cc.
+ * Generate a conditional based on ARM condition code cc.
  * This is common between ARM and Aarch64 targets.
  */
-void arm_gen_test_cc(int cc, TCGLabel *label)
+void arm_test_cc(DisasCompare *cmp, int cc)
 {
-    TCGv_i32 tmp;
-    TCGLabel *inv;
+    TCGv_i32 value;
+    TCGCond cond;
+    bool global = true;
 
     switch (cc) {
     case 0: /* eq: Z */
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
-        break;
     case 1: /* ne: !Z */
-        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
+        cond = TCG_COND_EQ;
+        value = cpu_ZF;
         break;
+
     case 2: /* cs: C */
-        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_CF, 0, label);
-        break;
     case 3: /* cc: !C */
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
+        cond = TCG_COND_NE;
+        value = cpu_CF;
         break;
+
     case 4: /* mi: N */
-        tcg_gen_brcondi_i32(TCG_COND_LT, cpu_NF, 0, label);
-        break;
     case 5: /* pl: !N */
-        tcg_gen_brcondi_i32(TCG_COND_GE, cpu_NF, 0, label);
+        cond = TCG_COND_LT;
+        value = cpu_NF;
         break;
+
     case 6: /* vs: V */
-        tcg_gen_brcondi_i32(TCG_COND_LT, cpu_VF, 0, label);
-        break;
     case 7: /* vc: !V */
-        tcg_gen_brcondi_i32(TCG_COND_GE, cpu_VF, 0, label);
+        cond = TCG_COND_LT;
+        value = cpu_VF;
         break;
+
     case 8: /* hi: C && !Z */
-        inv = gen_new_label();
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, inv);
-        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
-        gen_set_label(inv);
+    case 9: /* ls: !C || Z -> !(C && !Z) */
+        cond = TCG_COND_NE;
+        value = tcg_temp_new_i32();
+        global = false;
+        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
+           ZF is non-zero for !Z; so AND the two subexpressions.  */
+        tcg_gen_neg_i32(value, cpu_CF);
+        tcg_gen_and_i32(value, value, cpu_ZF);
         break;
-    case 9: /* ls: !C || Z */
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
-        break;
+
     case 10: /* ge: N == V -> N ^ V == 0 */
-        tmp = tcg_temp_new_i32();
-        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
-        break;
     case 11: /* lt: N != V -> N ^ V != 0 */
-        tmp = tcg_temp_new_i32();
-        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
+        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
+        cond = TCG_COND_GE;
+        value = tcg_temp_new_i32();
+        global = false;
+        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
         break;
+
     case 12: /* gt: !Z && N == V */
-        inv = gen_new_label();
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, inv);
-        tmp = tcg_temp_new_i32();
-        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
-        gen_set_label(inv);
-        break;
     case 13: /* le: Z || N != V */
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
-        tmp = tcg_temp_new_i32();
-        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
+        cond = TCG_COND_NE;
+        value = tcg_temp_new_i32();
+        global = false;
+        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
+         * the sign bit then AND with ZF to yield the result.  */
+        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
+        tcg_gen_sari_i32(value, value, 31);
+        tcg_gen_andc_i32(value, cpu_ZF, value);
         break;
+
+    case 14: /* always */
+    case 15: /* always */
+        /* Use the ALWAYS condition, which will fold early.
+         * It doesn't matter what we use for the value.  */
+        cond = TCG_COND_ALWAYS;
+        value = cpu_ZF;
+        goto no_invert;
+
     default:
         fprintf(stderr, "Bad condition code 0x%x\n", cc);
         abort();
     }
+
+    if (cc & 1) {
+        cond = tcg_invert_cond(cond);
+    }
+
+ no_invert:
+    cmp->cond = cond;
+    cmp->value = value;
+    cmp->value_global = global;
+}
+
+void arm_free_cc(DisasCompare *cmp)
+{
+    if (!cmp->value_global) {
+        tcg_temp_free_i32(cmp->value);
+    }
+}
+
+void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
+{
+    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
+}
+
+void arm_gen_test_cc(int cc, TCGLabel *label)
+{
+    DisasCompare cmp;
+    arm_test_cc(&cmp, cc);
+    arm_jump_cc(&cmp, label);
+    arm_free_cc(&cmp);
 }
 
 static const uint8_t table_logic_cc[16] = {
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 4b618a4..b8fe37a 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -63,7 +63,21 @@
     TCGv_i64 tmp_a64[TMP_A64_MAX];
 } DisasContext;
 
+typedef struct DisasCompare {
+    TCGCond cond;
+    TCGv_i32 value;
+    bool value_global;
+} DisasCompare;
+
+/* Share the TCG temporaries common between 32 and 64 bit modes.  */
 extern TCGv_ptr cpu_env;
+extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
+extern TCGv_i64 cpu_exclusive_addr;
+extern TCGv_i64 cpu_exclusive_val;
+#ifdef CONFIG_USER_ONLY
+extern TCGv_i64 cpu_exclusive_test;
+extern TCGv_i32 cpu_exclusive_info;
+#endif
 
 static inline int arm_dc_feature(DisasContext *dc, int feature)
 {
@@ -136,6 +150,9 @@
 }
 #endif
 
+void arm_test_cc(DisasCompare *cmp, int cc);
+void arm_free_cc(DisasCompare *cmp);
+void arm_jump_cc(DisasCompare *cmp, TCGLabel *label);
 void arm_gen_test_cc(int cc, TCGLabel *label);
 
 #endif /* TARGET_ARM_TRANSLATE_H */
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 151f1b9..af97772 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -1318,6 +1318,9 @@
 void cpu_set_mxcsr(CPUX86State *env, uint32_t val);
 void cpu_set_fpuc(CPUX86State *env, uint16_t val);
 
+/* mem_helper.c */
+void helper_lock_init(void);
+
 /* svm_helper.c */
 void cpu_svm_check_intercept_param(CPUX86State *env1, uint32_t type,
                                    uint64_t param);
diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c
index 1aec8a5..8bf0da2 100644
--- a/target-i386/mem_helper.c
+++ b/target-i386/mem_helper.c
@@ -23,18 +23,37 @@
 
 /* broken thread support */
 
-static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+#if defined(CONFIG_USER_ONLY)
+QemuMutex global_cpu_lock;
 
 void helper_lock(void)
 {
-    spin_lock(&global_cpu_lock);
+    qemu_mutex_lock(&global_cpu_lock);
 }
 
 void helper_unlock(void)
 {
-    spin_unlock(&global_cpu_lock);
+    qemu_mutex_unlock(&global_cpu_lock);
 }
 
+void helper_lock_init(void)
+{
+    qemu_mutex_init(&global_cpu_lock);
+}
+#else
+void helper_lock(void)
+{
+}
+
+void helper_unlock(void)
+{
+}
+
+void helper_lock_init(void)
+{
+}
+#endif
+
 void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
 {
     uint64_t d;
diff --git a/target-i386/translate.c b/target-i386/translate.c
index d72fa46..3ce9c65 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -7898,6 +7898,8 @@
                                          offsetof(CPUX86State, regs[i]),
                                          reg_names[i]);
     }
+
+    helper_lock_init();
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 7e39045..5fdee1b 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -1796,13 +1796,6 @@
     return cs->kvm_run->psw_addr == 0xfffUL;
 }
 
-static void guest_panicked(void)
-{
-    qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE,
-                                   &error_abort);
-    vm_stop(RUN_STATE_GUEST_PANICKED);
-}
-
 static void unmanageable_intercept(S390CPU *cpu, const char *str, int pswoffset)
 {
     CPUState *cs = CPU(cpu);
@@ -1811,7 +1804,7 @@
                  str, cs->cpu_index, ldq_phys(cs->as, cpu->env.psa + pswoffset),
                  ldq_phys(cs->as, cpu->env.psa + pswoffset + 8));
     s390_cpu_halt(cpu);
-    guest_panicked();
+    qemu_system_guest_panicked();
 }
 
 static int handle_intercept(S390CPU *cpu)
@@ -1844,7 +1837,7 @@
                 if (is_special_wait_psw(cs)) {
                     qemu_system_shutdown_request();
                 } else {
-                    guest_panicked();
+                    qemu_system_guest_panicked();
                 }
             }
             r = EXCP_HALTED;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index ea5d0ee..879a665 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -595,6 +595,10 @@
 void tcg_pool_reset(TCGContext *s);
 void tcg_pool_delete(TCGContext *s);
 
+void tb_lock(void);
+void tb_unlock(void);
+void tb_lock_reset(void);
+
 static inline void *tcg_malloc(int size)
 {
     TCGContext *s = &tcg_ctx;
diff --git a/tests/test-cutils.c b/tests/test-cutils.c
index 2a4556d..0046c61 100644
--- a/tests/test-cutils.c
+++ b/tests/test-cutils.c
@@ -226,6 +226,1132 @@
     g_assert_cmpint(i, ==, 123);
 }
 
+static void test_qemu_strtol_correct(void)
+{
+    const char *str = "12345 foo";
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 12345);
+    g_assert(endptr == str + 5);
+}
+
+static void test_qemu_strtol_null(void)
+{
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(NULL, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == NULL);
+}
+
+static void test_qemu_strtol_empty(void)
+{
+    const char *str = "";
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtol_whitespace(void)
+{
+    const char *str = "  \t  ";
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtol_invalid(void)
+{
+    const char *str = "   xxxx  \t abc";
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtol_trailing(void)
+{
+    const char *str = "123xxx";
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+    g_assert(endptr == str + 3);
+}
+
+static void test_qemu_strtol_octal(void)
+{
+    const char *str = "0123";
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, &endptr, 8, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0123);
+    g_assert(endptr == str + strlen(str));
+
+    res = 999;
+    endptr = &f;
+    err = qemu_strtol(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtol_decimal(void)
+{
+    const char *str = "0123";
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, &endptr, 10, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+    g_assert(endptr == str + strlen(str));
+
+    str = "123";
+    res = 999;
+    endptr = &f;
+    err = qemu_strtol(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtol_hex(void)
+{
+    const char *str = "0123";
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, &endptr, 16, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0x123);
+    g_assert(endptr == str + strlen(str));
+
+    str = "0x123";
+    res = 999;
+    endptr = &f;
+    err = qemu_strtol(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0x123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtol_max(void)
+{
+    const char *str = g_strdup_printf("%ld", LONG_MAX);
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, LONG_MAX);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtol_overflow(void)
+{
+    const char *str = "99999999999999999999999999999999999999999999";
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert_cmpint(res, ==, LONG_MAX);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtol_underflow(void)
+{
+    const char *str = "-99999999999999999999999999999999999999999999";
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err  = qemu_strtol(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert_cmpint(res, ==, LONG_MIN);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtol_negative(void)
+{
+    const char *str = "  \t -321";
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, -321);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtol_full_correct(void)
+{
+    const char *str = "123";
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+}
+
+static void test_qemu_strtol_full_null(void)
+{
+    char f = 'X';
+    const char *endptr = &f;
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(NULL, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == NULL);
+}
+
+static void test_qemu_strtol_full_empty(void)
+{
+    const char *str = "";
+    long res = 999L;
+    int err;
+
+    err =  qemu_strtol(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtol_full_negative(void)
+{
+    const char *str = " \t -321";
+    long res = 999;
+    int err;
+
+    err = qemu_strtol(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, -321);
+}
+
+static void test_qemu_strtol_full_trailing(void)
+{
+    const char *str = "123xxx";
+    long res;
+    int err;
+
+    err = qemu_strtol(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtol_full_max(void)
+{
+    const char *str = g_strdup_printf("%ld", LONG_MAX);
+    long res;
+    int err;
+
+    err = qemu_strtol(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, LONG_MAX);
+}
+
+static void test_qemu_strtoul_correct(void)
+{
+    const char *str = "12345 foo";
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 12345);
+    g_assert(endptr == str + 5);
+}
+
+static void test_qemu_strtoul_null(void)
+{
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(NULL, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == NULL);
+}
+
+static void test_qemu_strtoul_empty(void)
+{
+    const char *str = "";
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoul_whitespace(void)
+{
+    const char *str = "  \t  ";
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoul_invalid(void)
+{
+    const char *str = "   xxxx  \t abc";
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoul_trailing(void)
+{
+    const char *str = "123xxx";
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+    g_assert(endptr == str + 3);
+}
+
+static void test_qemu_strtoul_octal(void)
+{
+    const char *str = "0123";
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, &endptr, 8, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0123);
+    g_assert(endptr == str + strlen(str));
+
+    res = 999;
+    endptr = &f;
+    err = qemu_strtoul(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoul_decimal(void)
+{
+    const char *str = "0123";
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, &endptr, 10, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+    g_assert(endptr == str + strlen(str));
+
+    str = "123";
+    res = 999;
+    endptr = &f;
+    err = qemu_strtoul(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoul_hex(void)
+{
+    const char *str = "0123";
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, &endptr, 16, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0x123);
+    g_assert(endptr == str + strlen(str));
+
+    str = "0x123";
+    res = 999;
+    endptr = &f;
+    err = qemu_strtoul(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0x123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoul_max(void)
+{
+    const char *str = g_strdup_printf("%lu", ULONG_MAX);
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, ULONG_MAX);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoul_overflow(void)
+{
+    const char *str = "99999999999999999999999999999999999999999999";
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert_cmpint(res, ==, ULONG_MAX);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoul_underflow(void)
+{
+    const char *str = "-99999999999999999999999999999999999999999999";
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err  = qemu_strtoul(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert_cmpint(res, ==, -1ul);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoul_negative(void)
+{
+    const char *str = "  \t -321";
+    char f = 'X';
+    const char *endptr = &f;
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, -321ul);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoul_full_correct(void)
+{
+    const char *str = "123";
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+}
+
+static void test_qemu_strtoul_full_null(void)
+{
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(NULL, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoul_full_empty(void)
+{
+    const char *str = "";
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+static void test_qemu_strtoul_full_negative(void)
+{
+    const char *str = " \t -321";
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, NULL, 0, &res);
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, -321ul);
+}
+
+static void test_qemu_strtoul_full_trailing(void)
+{
+    const char *str = "123xxx";
+    unsigned long res;
+    int err;
+
+    err = qemu_strtoul(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoul_full_max(void)
+{
+    const char *str = g_strdup_printf("%lu", ULONG_MAX);
+    unsigned long res = 999;
+    int err;
+
+    err = qemu_strtoul(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, ULONG_MAX);
+}
+
+static void test_qemu_strtoll_correct(void)
+{
+    const char *str = "12345 foo";
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 12345);
+    g_assert(endptr == str + 5);
+}
+
+static void test_qemu_strtoll_null(void)
+{
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(NULL, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == NULL);
+}
+
+static void test_qemu_strtoll_empty(void)
+{
+    const char *str = "";
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoll_whitespace(void)
+{
+    const char *str = "  \t  ";
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoll_invalid(void)
+{
+    const char *str = "   xxxx  \t abc";
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoll_trailing(void)
+{
+    const char *str = "123xxx";
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+    g_assert(endptr == str + 3);
+}
+
+static void test_qemu_strtoll_octal(void)
+{
+    const char *str = "0123";
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, &endptr, 8, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0123);
+    g_assert(endptr == str + strlen(str));
+
+    endptr = &f;
+    res = 999;
+    err = qemu_strtoll(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoll_decimal(void)
+{
+    const char *str = "0123";
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, &endptr, 10, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+    g_assert(endptr == str + strlen(str));
+
+    str = "123";
+    endptr = &f;
+    res = 999;
+    err = qemu_strtoll(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoll_hex(void)
+{
+    const char *str = "0123";
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, &endptr, 16, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0x123);
+    g_assert(endptr == str + strlen(str));
+
+    str = "0x123";
+    endptr = &f;
+    res = 999;
+    err = qemu_strtoll(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0x123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoll_max(void)
+{
+    const char *str = g_strdup_printf("%lld", LLONG_MAX);
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, LLONG_MAX);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoll_overflow(void)
+{
+    const char *str = "99999999999999999999999999999999999999999999";
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert_cmpint(res, ==, LLONG_MAX);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoll_underflow(void)
+{
+    const char *str = "-99999999999999999999999999999999999999999999";
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err  = qemu_strtoll(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert_cmpint(res, ==, LLONG_MIN);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoll_negative(void)
+{
+    const char *str = "  \t -321";
+    char f = 'X';
+    const char *endptr = &f;
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, -321);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoll_full_correct(void)
+{
+    const char *str = "123";
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+}
+
+static void test_qemu_strtoll_full_null(void)
+{
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(NULL, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoll_full_empty(void)
+{
+    const char *str = "";
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoll_full_negative(void)
+{
+    const char *str = " \t -321";
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, -321);
+}
+
+static void test_qemu_strtoll_full_trailing(void)
+{
+    const char *str = "123xxx";
+    int64_t res = 999;
+    int err;
+
+    err = qemu_strtoll(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoll_full_max(void)
+{
+
+    const char *str = g_strdup_printf("%lld", LLONG_MAX);
+    int64_t res;
+    int err;
+
+    err = qemu_strtoll(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, LLONG_MAX);
+}
+
+static void test_qemu_strtoull_correct(void)
+{
+    const char *str = "12345 foo";
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 12345);
+    g_assert(endptr == str + 5);
+}
+
+static void test_qemu_strtoull_null(void)
+{
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(NULL, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == NULL);
+}
+
+static void test_qemu_strtoull_empty(void)
+{
+    const char *str = "";
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoull_whitespace(void)
+{
+    const char *str = "  \t  ";
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoull_invalid(void)
+{
+    const char *str = "   xxxx  \t abc";
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoull_trailing(void)
+{
+    const char *str = "123xxx";
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+    g_assert(endptr == str + 3);
+}
+
+static void test_qemu_strtoull_octal(void)
+{
+    const char *str = "0123";
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, &endptr, 8, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0123);
+    g_assert(endptr == str + strlen(str));
+
+    endptr = &f;
+    res = 999;
+    err = qemu_strtoull(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoull_decimal(void)
+{
+    const char *str = "0123";
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, &endptr, 10, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+    g_assert(endptr == str + strlen(str));
+
+    str = "123";
+    endptr = &f;
+    res = 999;
+    err = qemu_strtoull(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoull_hex(void)
+{
+    const char *str = "0123";
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, &endptr, 16, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0x123);
+    g_assert(endptr == str + strlen(str));
+
+    str = "0x123";
+    endptr = &f;
+    res = 999;
+    err = qemu_strtoull(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0x123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoull_max(void)
+{
+    const char *str = g_strdup_printf("%llu", ULLONG_MAX);
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, ULLONG_MAX);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoull_overflow(void)
+{
+    const char *str = "99999999999999999999999999999999999999999999";
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert_cmpint(res, ==, ULLONG_MAX);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoull_underflow(void)
+{
+    const char *str = "-99999999999999999999999999999999999999999999";
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err  = qemu_strtoull(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert_cmpint(res, ==, -1);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoull_negative(void)
+{
+    const char *str = "  \t -321";
+    char f = 'X';
+    const char *endptr = &f;
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, &endptr, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, -321);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_qemu_strtoull_full_correct(void)
+{
+    const char *str = "18446744073709551614";
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 18446744073709551614LLU);
+}
+
+static void test_qemu_strtoull_full_null(void)
+{
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(NULL, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoull_full_empty(void)
+{
+    const char *str = "";
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoull_full_negative(void)
+{
+    const char *str = " \t -321";
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 18446744073709551295LLU);
+}
+
+static void test_qemu_strtoull_full_trailing(void)
+{
+    const char *str = "18446744073709551614xxxxxx";
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void test_qemu_strtoull_full_max(void)
+{
+    const char *str = g_strdup_printf("%lld", ULLONG_MAX);
+    uint64_t res = 999;
+    int err;
+
+    err = qemu_strtoull(str, NULL, 0, &res);
+
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, ULLONG_MAX);
+}
+
 int main(int argc, char **argv)
 {
     g_test_init(&argc, &argv, NULL);
@@ -247,5 +1373,134 @@
     g_test_add_func("/cutils/parse_uint_full/correct",
                     test_parse_uint_full_correct);
 
+    /* qemu_strtol() tests */
+    g_test_add_func("/cutils/qemu_strtol/correct", test_qemu_strtol_correct);
+    g_test_add_func("/cutils/qemu_strtol/null", test_qemu_strtol_null);
+    g_test_add_func("/cutils/qemu_strtol/empty", test_qemu_strtol_empty);
+    g_test_add_func("/cutils/qemu_strtol/whitespace",
+                    test_qemu_strtol_whitespace);
+    g_test_add_func("/cutils/qemu_strtol/invalid", test_qemu_strtol_invalid);
+    g_test_add_func("/cutils/qemu_strtol/trailing", test_qemu_strtol_trailing);
+    g_test_add_func("/cutils/qemu_strtol/octal", test_qemu_strtol_octal);
+    g_test_add_func("/cutils/qemu_strtol/decimal", test_qemu_strtol_decimal);
+    g_test_add_func("/cutils/qemu_strtol/hex", test_qemu_strtol_hex);
+    g_test_add_func("/cutils/qemu_strtol/max", test_qemu_strtol_max);
+    g_test_add_func("/cutils/qemu_strtol/overflow", test_qemu_strtol_overflow);
+    g_test_add_func("/cutils/qemu_strtol/underflow",
+                    test_qemu_strtol_underflow);
+    g_test_add_func("/cutils/qemu_strtol/negative", test_qemu_strtol_negative);
+    g_test_add_func("/cutils/qemu_strtol_full/correct",
+                    test_qemu_strtol_full_correct);
+    g_test_add_func("/cutils/qemu_strtol_full/null",
+                    test_qemu_strtol_full_null);
+    g_test_add_func("/cutils/qemu_strtol_full/empty",
+                    test_qemu_strtol_full_empty);
+    g_test_add_func("/cutils/qemu_strtol_full/negative",
+                    test_qemu_strtol_full_negative);
+    g_test_add_func("/cutils/qemu_strtol_full/trailing",
+                    test_qemu_strtol_full_trailing);
+    g_test_add_func("/cutils/qemu_strtol_full/max",
+                    test_qemu_strtol_full_max);
+
+    /* qemu_strtoul() tests */
+    g_test_add_func("/cutils/qemu_strtoul/correct", test_qemu_strtoul_correct);
+    g_test_add_func("/cutils/qemu_strtoul/null", test_qemu_strtoul_null);
+    g_test_add_func("/cutils/qemu_strtoul/empty", test_qemu_strtoul_empty);
+    g_test_add_func("/cutils/qemu_strtoul/whitespace",
+                    test_qemu_strtoul_whitespace);
+    g_test_add_func("/cutils/qemu_strtoul/invalid", test_qemu_strtoul_invalid);
+    g_test_add_func("/cutils/qemu_strtoul/trailing",
+                    test_qemu_strtoul_trailing);
+    g_test_add_func("/cutils/qemu_strtoul/octal", test_qemu_strtoul_octal);
+    g_test_add_func("/cutils/qemu_strtoul/decimal", test_qemu_strtoul_decimal);
+    g_test_add_func("/cutils/qemu_strtoul/hex", test_qemu_strtoul_hex);
+    g_test_add_func("/cutils/qemu_strtoul/max", test_qemu_strtoul_max);
+    g_test_add_func("/cutils/qemu_strtoul/overflow",
+                    test_qemu_strtoul_overflow);
+    g_test_add_func("/cutils/qemu_strtoul/underflow",
+                    test_qemu_strtoul_underflow);
+    g_test_add_func("/cutils/qemu_strtoul/negative",
+                    test_qemu_strtoul_negative);
+    g_test_add_func("/cutils/qemu_strtoul_full/correct",
+                    test_qemu_strtoul_full_correct);
+    g_test_add_func("/cutils/qemu_strtoul_full/null",
+                    test_qemu_strtoul_full_null);
+    g_test_add_func("/cutils/qemu_strtoul_full/empty",
+                    test_qemu_strtoul_full_empty);
+    g_test_add_func("/cutils/qemu_strtoul_full/negative",
+                    test_qemu_strtoul_full_negative);
+    g_test_add_func("/cutils/qemu_strtoul_full/trailing",
+                    test_qemu_strtoul_full_trailing);
+    g_test_add_func("/cutils/qemu_strtoul_full/max",
+                    test_qemu_strtoul_full_max);
+
+    /* qemu_strtoll() tests */
+    g_test_add_func("/cutils/qemu_strtoll/correct", test_qemu_strtoll_correct);
+    g_test_add_func("/cutils/qemu_strtoll/null", test_qemu_strtoll_null);
+    g_test_add_func("/cutils/qemu_strtoll/empty", test_qemu_strtoll_empty);
+    g_test_add_func("/cutils/qemu_strtoll/whitespace",
+                    test_qemu_strtoll_whitespace);
+    g_test_add_func("/cutils/qemu_strtoll/invalid", test_qemu_strtoll_invalid);
+    g_test_add_func("/cutils/qemu_strtoll/trailing",
+                    test_qemu_strtoll_trailing);
+    g_test_add_func("/cutils/qemu_strtoll/octal", test_qemu_strtoll_octal);
+    g_test_add_func("/cutils/qemu_strtoll/decimal", test_qemu_strtoll_decimal);
+    g_test_add_func("/cutils/qemu_strtoll/hex", test_qemu_strtoll_hex);
+    g_test_add_func("/cutils/qemu_strtoll/max", test_qemu_strtoll_max);
+    g_test_add_func("/cutils/qemu_strtoll/overflow",
+                    test_qemu_strtoll_overflow);
+    g_test_add_func("/cutils/qemu_strtoll/underflow",
+                    test_qemu_strtoll_underflow);
+    g_test_add_func("/cutils/qemu_strtoll/negative",
+                    test_qemu_strtoll_negative);
+    g_test_add_func("/cutils/qemu_strtoll_full/correct",
+                    test_qemu_strtoll_full_correct);
+    g_test_add_func("/cutils/qemu_strtoll_full/null",
+                    test_qemu_strtoll_full_null);
+    g_test_add_func("/cutils/qemu_strtoll_full/empty",
+                    test_qemu_strtoll_full_empty);
+    g_test_add_func("/cutils/qemu_strtoll_full/negative",
+                    test_qemu_strtoll_full_negative);
+    g_test_add_func("/cutils/qemu_strtoll_full/trailing",
+                    test_qemu_strtoll_full_trailing);
+    g_test_add_func("/cutils/qemu_strtoll_full/max",
+                    test_qemu_strtoll_full_max);
+
+    /* qemu_strtoull() tests */
+    g_test_add_func("/cutils/qemu_strtoull/correct",
+                    test_qemu_strtoull_correct);
+    g_test_add_func("/cutils/qemu_strtoull/null",
+                    test_qemu_strtoull_null);
+    g_test_add_func("/cutils/qemu_strtoull/empty", test_qemu_strtoull_empty);
+    g_test_add_func("/cutils/qemu_strtoull/whitespace",
+                    test_qemu_strtoull_whitespace);
+    g_test_add_func("/cutils/qemu_strtoull/invalid",
+                    test_qemu_strtoull_invalid);
+    g_test_add_func("/cutils/qemu_strtoull/trailing",
+                    test_qemu_strtoull_trailing);
+    g_test_add_func("/cutils/qemu_strtoull/octal", test_qemu_strtoull_octal);
+    g_test_add_func("/cutils/qemu_strtoull/decimal",
+                    test_qemu_strtoull_decimal);
+    g_test_add_func("/cutils/qemu_strtoull/hex", test_qemu_strtoull_hex);
+    g_test_add_func("/cutils/qemu_strtoull/max", test_qemu_strtoull_max);
+    g_test_add_func("/cutils/qemu_strtoull/overflow",
+                    test_qemu_strtoull_overflow);
+    g_test_add_func("/cutils/qemu_strtoull/underflow",
+                    test_qemu_strtoull_underflow);
+    g_test_add_func("/cutils/qemu_strtoull/negative",
+                    test_qemu_strtoull_negative);
+    g_test_add_func("/cutils/qemu_strtoull_full/correct",
+                    test_qemu_strtoull_full_correct);
+    g_test_add_func("/cutils/qemu_strtoull_full/null",
+                    test_qemu_strtoull_full_null);
+    g_test_add_func("/cutils/qemu_strtoull_full/empty",
+                    test_qemu_strtoull_full_empty);
+    g_test_add_func("/cutils/qemu_strtoull_full/negative",
+                    test_qemu_strtoull_full_negative);
+    g_test_add_func("/cutils/qemu_strtoull_full/trailing",
+                    test_qemu_strtoull_full_trailing);
+    g_test_add_func("/cutils/qemu_strtoull_full/max",
+                    test_qemu_strtoull_full_max);
+
     return g_test_run();
 }
diff --git a/translate-all.c b/translate-all.c
index 8eb4512..0b8b34b 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -122,13 +122,45 @@
 uintptr_t qemu_host_page_size;
 uintptr_t qemu_host_page_mask;
 
-/* This is a multi-level map on the virtual address space.
-   The bottom level has pointers to PageDesc.  */
+/* The bottom level has pointers to PageDesc */
 static void *l1_map[V_L1_SIZE];
 
 /* code generation context */
 TCGContext tcg_ctx;
 
+/* translation block context */
+#ifdef CONFIG_USER_ONLY
+__thread int have_tb_lock;
+#endif
+
+void tb_lock(void)
+{
+#ifdef CONFIG_USER_ONLY
+    assert(!have_tb_lock);
+    qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
+    have_tb_lock++;
+#endif
+}
+
+void tb_unlock(void)
+{
+#ifdef CONFIG_USER_ONLY
+    assert(have_tb_lock);
+    have_tb_lock--;
+    qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+#endif
+}
+
+void tb_lock_reset(void)
+{
+#ifdef CONFIG_USER_ONLY
+    if (have_tb_lock) {
+        qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+        have_tb_lock = 0;
+    }
+#endif
+}
+
 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
                          tb_page_addr_t phys_page2);
 static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
@@ -139,11 +171,13 @@
 }
 
 /* return non zero if the very first instruction is invalid so that
-   the virtual CPU can trigger an exception.
-
-   '*gen_code_size_ptr' contains the size of the generated code (host
-   code).
-*/
+ * the virtual CPU can trigger an exception.
+ *
+ * '*gen_code_size_ptr' contains the size of the generated code (host
+ * code).
+ *
+ * Called with mmap_lock held for user-mode emulation.
+ */
 int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr)
 {
     TCGContext *s = &tcg_ctx;
@@ -388,6 +422,9 @@
 #endif
 }
 
+/* If alloc=1:
+ * Called with mmap_lock held for user-mode emulation.
+ */
 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
 {
     PageDesc *pd;
@@ -399,26 +436,26 @@
 
     /* Level 2..N-1.  */
     for (i = V_L1_SHIFT / V_L2_BITS - 1; i > 0; i--) {
-        void **p = *lp;
+        void **p = atomic_rcu_read(lp);
 
         if (p == NULL) {
             if (!alloc) {
                 return NULL;
             }
             p = g_new0(void *, V_L2_SIZE);
-            *lp = p;
+            atomic_rcu_set(lp, p);
         }
 
         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
     }
 
-    pd = *lp;
+    pd = atomic_rcu_read(lp);
     if (pd == NULL) {
         if (!alloc) {
             return NULL;
         }
         pd = g_new0(PageDesc, V_L2_SIZE);
-        *lp = pd;
+        atomic_rcu_set(lp, pd);
     }
 
     return pd + (index & (V_L2_SIZE - 1));
@@ -429,11 +466,6 @@
     return page_find_alloc(index, 0);
 }
 
-#if !defined(CONFIG_USER_ONLY)
-#define mmap_lock() do { } while (0)
-#define mmap_unlock() do { } while (0)
-#endif
-
 #if defined(CONFIG_USER_ONLY)
 /* Currently it is not recommended to allocate big chunks of data in
    user mode. It will change when a dedicated libc will be used.  */
@@ -676,6 +708,7 @@
             CODE_GEN_AVG_BLOCK_SIZE;
     tcg_ctx.tb_ctx.tbs =
             g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
+    qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
 }
 
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -994,6 +1027,7 @@
     }
 }
 
+/* Called with mmap_lock held for user mode emulation.  */
 TranslationBlock *tb_gen_code(CPUState *cpu,
                               target_ulong pc, target_ulong cs_base,
                               int flags, int cflags)
@@ -1041,6 +1075,8 @@
  * 'is_cpu_write_access' should be true if called from a real cpu write
  * access: the virtual CPU will exit the current TB if code is modified inside
  * this TB.
+ *
+ * Called with mmap_lock held for user-mode emulation
  */
 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
 {
@@ -1057,6 +1093,8 @@
  * 'is_cpu_write_access' should be true if called from a real cpu write
  * access: the virtual CPU will exit the current TB if code is modified inside
  * this TB.
+ *
+ * Called with mmap_lock held for user-mode emulation
  */
 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
                                    int is_cpu_write_access)
@@ -1205,6 +1243,7 @@
 }
 
 #if !defined(CONFIG_SOFTMMU)
+/* Called with mmap_lock held.  */
 static void tb_invalidate_phys_page(tb_page_addr_t addr,
                                     uintptr_t pc, void *puc,
                                     bool locked)
@@ -1274,7 +1313,10 @@
 }
 #endif
 
-/* add the tb in the target page and protect it if necessary */
+/* add the tb in the target page and protect it if necessary
+ *
+ * Called with mmap_lock held for user-mode emulation.
+ */
 static inline void tb_alloc_page(TranslationBlock *tb,
                                  unsigned int n, tb_page_addr_t page_addr)
 {
@@ -1330,16 +1372,16 @@
 }
 
 /* add a new TB and link it to the physical page tables. phys_page2 is
-   (-1) to indicate that only one page contains the TB. */
+ * (-1) to indicate that only one page contains the TB.
+ *
+ * Called with mmap_lock held for user-mode emulation.
+ */
 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
                          tb_page_addr_t phys_page2)
 {
     unsigned int h;
     TranslationBlock **ptb;
 
-    /* Grab the mmap lock to stop another thread invalidating this TB
-       before we are done.  */
-    mmap_lock();
     /* add in the physical hash table */
     h = tb_phys_hash_func(phys_pc);
     ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
@@ -1369,7 +1411,6 @@
 #ifdef DEBUG_TB_CHECK
     tb_page_check();
 #endif
-    mmap_unlock();
 }
 
 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
diff --git a/util/cutils.c b/util/cutils.c
index 9234452..ae35198 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -354,6 +354,156 @@
 }
 
 /**
+ * Helper function for qemu_strto*l() functions.
+ */
+static int check_strtox_error(const char *p, char *endptr, const char **next,
+                              int err)
+{
+    /* If no conversion was performed, prefer BSD behavior over glibc
+     * behavior.
+     */
+    if (err == 0 && endptr == p) {
+        err = EINVAL;
+    }
+    if (!next && *endptr) {
+        return -EINVAL;
+    }
+    if (next) {
+        *next = endptr;
+    }
+    return -err;
+}
+
+/**
+ * QEMU wrappers for strtol(), strtoll(), strtoul(), strotull() C functions.
+ *
+ * Convert ASCII string @nptr to a long integer value
+ * from the given @base. Parameters @nptr, @endptr, @base
+ * follows same semantics as strtol() C function.
+ *
+ * Unlike from strtol() function, if @endptr is not NULL, this
+ * function will return -EINVAL whenever it cannot fully convert
+ * the string in @nptr with given @base to a long. This function returns
+ * the result of the conversion only through the @result parameter.
+ *
+ * If NULL is passed in @endptr, then the whole string in @ntpr
+ * is a number otherwise it returns -EINVAL.
+ *
+ * RETURN VALUE
+ * Unlike from strtol() function, this wrapper returns either
+ * -EINVAL or the errno set by strtol() function (e.g -ERANGE).
+ * If the conversion overflows, -ERANGE is returned, and @result
+ * is set to the max value of the desired type
+ * (e.g. LONG_MAX, LLONG_MAX, ULONG_MAX, ULLONG_MAX). If the case
+ * of underflow, -ERANGE is returned, and @result is set to the min
+ * value of the desired type. For strtol(), strtoll(), @result is set to
+ * LONG_MIN, LLONG_MIN, respectively, and for strtoul(), strtoull() it
+ * is set to 0.
+ */
+int qemu_strtol(const char *nptr, const char **endptr, int base,
+                long *result)
+{
+    char *p;
+    int err = 0;
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        err = -EINVAL;
+    } else {
+        errno = 0;
+        *result = strtol(nptr, &p, base);
+        err = check_strtox_error(nptr, p, endptr, errno);
+    }
+    return err;
+}
+
+/**
+ * Converts ASCII string to an unsigned long integer.
+ *
+ * If string contains a negative number, value will be converted to
+ * the unsigned representation of the signed value, unless the original
+ * (nonnegated) value would overflow, in this case, it will set @result
+ * to ULONG_MAX, and return ERANGE.
+ *
+ * The same behavior holds, for qemu_strtoull() but sets @result to
+ * ULLONG_MAX instead of ULONG_MAX.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoul(const char *nptr, const char **endptr, int base,
+                 unsigned long *result)
+{
+    char *p;
+    int err = 0;
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        err = -EINVAL;
+    } else {
+        errno = 0;
+        *result = strtoul(nptr, &p, base);
+        /* Windows returns 1 for negative out-of-range values.  */
+        if (errno == ERANGE) {
+            *result = -1;
+        }
+        err = check_strtox_error(nptr, p, endptr, errno);
+    }
+    return err;
+}
+
+/**
+ * Converts ASCII string to a long long integer.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoll(const char *nptr, const char **endptr, int base,
+                 int64_t *result)
+{
+    char *p;
+    int err = 0;
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        err = -EINVAL;
+    } else {
+        errno = 0;
+        *result = strtoll(nptr, &p, base);
+        err = check_strtox_error(nptr, p, endptr, errno);
+    }
+    return err;
+}
+
+/**
+ * Converts ASCII string to an unsigned long long integer.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoull(const char *nptr, const char **endptr, int base,
+                  uint64_t *result)
+{
+    char *p;
+    int err = 0;
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        err = -EINVAL;
+    } else {
+        errno = 0;
+        *result = strtoull(nptr, &p, base);
+        /* Windows returns 1 for negative out-of-range values.  */
+        if (errno == ERANGE) {
+            *result = -1;
+        }
+        err = check_strtox_error(nptr, p, endptr, errno);
+    }
+    return err;
+}
+
+/**
  * parse_uint:
  *
  * @s: String to parse
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index b1abe97..dbd8094 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -298,7 +298,16 @@
 
 static inline void futex_wait(QemuEvent *ev, unsigned val)
 {
-    futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0);
+    while (futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0)) {
+        switch (errno) {
+        case EWOULDBLOCK:
+            return;
+        case EINTR:
+            break; /* get out of switch and retry */
+        default:
+            abort();
+        }
+    }
 }
 #else
 static inline void futex_wake(QemuEvent *ev, int n)
diff --git a/util/rcu.c b/util/rcu.c
index 8ba304d..47c2bce 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -335,6 +335,11 @@
     qemu_mutex_unlock(&rcu_registry_lock);
     qemu_mutex_unlock(&rcu_sync_lock);
 }
+
+static void rcu_init_child(void)
+{
+    qemu_mutex_init(&rcu_registry_lock);
+}
 #endif
 
 void rcu_after_fork(void)
@@ -346,7 +351,7 @@
 static void __attribute__((__constructor__)) rcu_init(void)
 {
 #ifdef CONFIG_POSIX
-    pthread_atfork(rcu_init_lock, rcu_init_unlock, rcu_init_unlock);
+    pthread_atfork(rcu_init_lock, rcu_init_unlock, rcu_init_child);
 #endif
     rcu_init_complete();
 }
diff --git a/vl.c b/vl.c
index 1c8b28d..066a080 100644
--- a/vl.c
+++ b/vl.c
@@ -1745,6 +1745,12 @@
     cpu_synchronize_all_post_reset();
 }
 
+void qemu_system_guest_panicked(void)
+{
+    qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort);
+    vm_stop(RUN_STATE_GUEST_PANICKED);
+}
+
 void qemu_system_reset_request(void)
 {
     if (no_reboot) {
