Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20140515' into staging

migration/next for 20140515

# gpg: Signature made Thu 15 May 2014 02:32:25 BST using RSA key ID 5872D723
# gpg: Can't check signature: public key not found

* remotes/juanquintela/tags/migration/20140515:
  usb: fix up post load checks
  migration: show average throughput when migration finishes
  savevm: Remove all the unneeded version_minimum_id_old (rest)
  savevm: Remove all the unneeded version_minimum_id_old (usb)
  Split ram_save_block
  arch_init: Simplify code for load_xbzrle()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 0f9169d..4ba61da 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -1003,8 +1003,6 @@
     cpu->opaque = ts;
 
 #if defined(TARGET_I386)
-    cpu_x86_set_cpl(env, 3);
-
     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
     env->hflags |= HF_PE_MASK;
     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
diff --git a/configure b/configure
index 6adfa72..e565e59 100755
--- a/configure
+++ b/configure
@@ -4768,12 +4768,6 @@
   echo "GCOV=$gcov_tool" >> $config_host_mak
 fi
 
-iotests_common_env="tests/qemu-iotests/common.env"
-
-echo "# Automatically generated by configure - do not modify" > $iotests_common_env
-echo >> $iotests_common_env
-echo "PYTHON='$python'" >> $iotests_common_env
-
 # use included Linux headers
 if test "$linux" = "yes" ; then
   mkdir -p linux-headers
diff --git a/cpu-exec.c b/cpu-exec.c
index 2f54054..38e5f02 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -336,19 +336,25 @@
                     }
 #endif
 #if defined(TARGET_I386)
+                    if (interrupt_request & CPU_INTERRUPT_INIT) {
+                        cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0);
+                        do_cpu_init(x86_cpu);
+                        cpu->exception_index = EXCP_HALTED;
+                        cpu_loop_exit(cpu);
+                    }
+#else
+                    if (interrupt_request & CPU_INTERRUPT_RESET) {
+                        cpu_reset(cpu);
+                    }
+#endif
+#if defined(TARGET_I386)
 #if !defined(CONFIG_USER_ONLY)
                     if (interrupt_request & CPU_INTERRUPT_POLL) {
                         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
                         apic_poll_irq(x86_cpu->apic_state);
                     }
 #endif
-                    if (interrupt_request & CPU_INTERRUPT_INIT) {
-                            cpu_svm_check_intercept_param(env, SVM_EXIT_INIT,
-                                                          0);
-                            do_cpu_init(x86_cpu);
-                            cpu->exception_index = EXCP_HALTED;
-                            cpu_loop_exit(cpu);
-                    } else if (interrupt_request & CPU_INTERRUPT_SIPI) {
+                    if (interrupt_request & CPU_INTERRUPT_SIPI) {
                             do_cpu_sipi(x86_cpu);
                     } else if (env->hflags2 & HF2_GIF_MASK) {
                         if ((interrupt_request & CPU_INTERRUPT_SMI) &&
@@ -405,9 +411,6 @@
                         }
                     }
 #elif defined(TARGET_PPC)
-                    if ((interrupt_request & CPU_INTERRUPT_RESET)) {
-                        cpu_reset(cpu);
-                    }
                     if (interrupt_request & CPU_INTERRUPT_HARD) {
                         ppc_hw_interrupt(env);
                         if (env->pending_interrupts == 0) {
diff --git a/disas/libvixl/a64/assembler-a64.h b/disas/libvixl/a64/assembler-a64.h
index 93b3011..1e2947b 100644
--- a/disas/libvixl/a64/assembler-a64.h
+++ b/disas/libvixl/a64/assembler-a64.h
@@ -38,6 +38,7 @@
 typedef uint64_t RegList;
 static const int kRegListSizeInBits = sizeof(RegList) * 8;
 
+
 // Registers.
 
 // Some CPURegister methods can return Register and FPRegister types, so we
@@ -58,62 +59,62 @@
   };
 
   CPURegister() : code_(0), size_(0), type_(kNoRegister) {
-    ASSERT(!IsValid());
-    ASSERT(IsNone());
+    VIXL_ASSERT(!IsValid());
+    VIXL_ASSERT(IsNone());
   }
 
   CPURegister(unsigned code, unsigned size, RegisterType type)
       : code_(code), size_(size), type_(type) {
-    ASSERT(IsValidOrNone());
+    VIXL_ASSERT(IsValidOrNone());
   }
 
   unsigned code() const {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
     return code_;
   }
 
   RegisterType type() const {
-    ASSERT(IsValidOrNone());
+    VIXL_ASSERT(IsValidOrNone());
     return type_;
   }
 
   RegList Bit() const {
-    ASSERT(code_ < (sizeof(RegList) * 8));
+    VIXL_ASSERT(code_ < (sizeof(RegList) * 8));
     return IsValid() ? (static_cast<RegList>(1) << code_) : 0;
   }
 
   unsigned size() const {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
     return size_;
   }
 
   int SizeInBytes() const {
-    ASSERT(IsValid());
-    ASSERT(size() % 8 == 0);
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(size() % 8 == 0);
     return size_ / 8;
   }
 
   int SizeInBits() const {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
     return size_;
   }
 
   bool Is32Bits() const {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
     return size_ == 32;
   }
 
   bool Is64Bits() const {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
     return size_ == 64;
   }
 
   bool IsValid() const {
     if (IsValidRegister() || IsValidFPRegister()) {
-      ASSERT(!IsNone());
+      VIXL_ASSERT(!IsNone());
       return true;
     } else {
-      ASSERT(IsNone());
+      VIXL_ASSERT(IsNone());
       return false;
     }
   }
@@ -132,25 +133,29 @@
 
   bool IsNone() const {
     // kNoRegister types should always have size 0 and code 0.
-    ASSERT((type_ != kNoRegister) || (code_ == 0));
-    ASSERT((type_ != kNoRegister) || (size_ == 0));
+    VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0));
+    VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0));
 
     return type_ == kNoRegister;
   }
 
+  bool Aliases(const CPURegister& other) const {
+    VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
+    return (code_ == other.code_) && (type_ == other.type_);
+  }
+
   bool Is(const CPURegister& other) const {
-    ASSERT(IsValidOrNone() && other.IsValidOrNone());
-    return (code_ == other.code_) && (size_ == other.size_) &&
-           (type_ == other.type_);
+    VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
+    return Aliases(other) && (size_ == other.size_);
   }
 
   inline bool IsZero() const {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
     return IsRegister() && (code_ == kZeroRegCode);
   }
 
   inline bool IsSP() const {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
     return IsRegister() && (code_ == kSPRegInternalCode);
   }
 
@@ -188,13 +193,13 @@
   explicit Register() : CPURegister() {}
   inline explicit Register(const CPURegister& other)
       : CPURegister(other.code(), other.size(), other.type()) {
-    ASSERT(IsValidRegister());
+    VIXL_ASSERT(IsValidRegister());
   }
   explicit Register(unsigned code, unsigned size)
       : CPURegister(code, size, kRegister) {}
 
   bool IsValid() const {
-    ASSERT(IsRegister() || IsNone());
+    VIXL_ASSERT(IsRegister() || IsNone());
     return IsValidRegister();
   }
 
@@ -216,13 +221,13 @@
   inline FPRegister() : CPURegister() {}
   inline explicit FPRegister(const CPURegister& other)
       : CPURegister(other.code(), other.size(), other.type()) {
-    ASSERT(IsValidFPRegister());
+    VIXL_ASSERT(IsValidFPRegister());
   }
   inline FPRegister(unsigned code, unsigned size)
       : CPURegister(code, size, kFPRegister) {}
 
   bool IsValid() const {
-    ASSERT(IsFPRegister() || IsNone());
+    VIXL_ASSERT(IsFPRegister() || IsNone());
     return IsValidFPRegister();
   }
 
@@ -306,30 +311,30 @@
                              CPURegister reg4 = NoCPUReg)
       : list_(reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit()),
         size_(reg1.size()), type_(reg1.type()) {
-    ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4));
-    ASSERT(IsValid());
+    VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4));
+    VIXL_ASSERT(IsValid());
   }
 
   inline CPURegList(CPURegister::RegisterType type, unsigned size, RegList list)
       : list_(list), size_(size), type_(type) {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
   }
 
   inline CPURegList(CPURegister::RegisterType type, unsigned size,
                     unsigned first_reg, unsigned last_reg)
       : size_(size), type_(type) {
-    ASSERT(((type == CPURegister::kRegister) &&
-            (last_reg < kNumberOfRegisters)) ||
-           ((type == CPURegister::kFPRegister) &&
-            (last_reg < kNumberOfFPRegisters)));
-    ASSERT(last_reg >= first_reg);
-    list_ = (1UL << (last_reg + 1)) - 1;
-    list_ &= ~((1UL << first_reg) - 1);
-    ASSERT(IsValid());
+    VIXL_ASSERT(((type == CPURegister::kRegister) &&
+                 (last_reg < kNumberOfRegisters)) ||
+                ((type == CPURegister::kFPRegister) &&
+                 (last_reg < kNumberOfFPRegisters)));
+    VIXL_ASSERT(last_reg >= first_reg);
+    list_ = (UINT64_C(1) << (last_reg + 1)) - 1;
+    list_ &= ~((UINT64_C(1) << first_reg) - 1);
+    VIXL_ASSERT(IsValid());
   }
 
   inline CPURegister::RegisterType type() const {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
     return type_;
   }
 
@@ -337,9 +342,9 @@
   // this list are left unchanged. The type and size of the registers in the
   // 'other' list must match those in this list.
   void Combine(const CPURegList& other) {
-    ASSERT(IsValid());
-    ASSERT(other.type() == type_);
-    ASSERT(other.RegisterSizeInBits() == size_);
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(other.type() == type_);
+    VIXL_ASSERT(other.RegisterSizeInBits() == size_);
     list_ |= other.list();
   }
 
@@ -347,44 +352,49 @@
   // do not exist in this list are ignored. The type and size of the registers
   // in the 'other' list must match those in this list.
   void Remove(const CPURegList& other) {
-    ASSERT(IsValid());
-    ASSERT(other.type() == type_);
-    ASSERT(other.RegisterSizeInBits() == size_);
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(other.type() == type_);
+    VIXL_ASSERT(other.RegisterSizeInBits() == size_);
     list_ &= ~other.list();
   }
 
   // Variants of Combine and Remove which take a single register.
   inline void Combine(const CPURegister& other) {
-    ASSERT(other.type() == type_);
-    ASSERT(other.size() == size_);
+    VIXL_ASSERT(other.type() == type_);
+    VIXL_ASSERT(other.size() == size_);
     Combine(other.code());
   }
 
   inline void Remove(const CPURegister& other) {
-    ASSERT(other.type() == type_);
-    ASSERT(other.size() == size_);
+    VIXL_ASSERT(other.type() == type_);
+    VIXL_ASSERT(other.size() == size_);
     Remove(other.code());
   }
 
   // Variants of Combine and Remove which take a single register by its code;
   // the type and size of the register is inferred from this list.
   inline void Combine(int code) {
-    ASSERT(IsValid());
-    ASSERT(CPURegister(code, size_, type_).IsValid());
-    list_ |= (1UL << code);
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
+    list_ |= (UINT64_C(1) << code);
   }
 
   inline void Remove(int code) {
-    ASSERT(IsValid());
-    ASSERT(CPURegister(code, size_, type_).IsValid());
-    list_ &= ~(1UL << code);
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
+    list_ &= ~(UINT64_C(1) << code);
   }
 
   inline RegList list() const {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
     return list_;
   }
 
+  inline void set_list(RegList new_list) {
+    VIXL_ASSERT(IsValid());
+    list_ = new_list;
+  }
+
   // Remove all callee-saved registers from the list. This can be useful when
   // preparing registers for an AAPCS64 function call, for example.
   void RemoveCalleeSaved();
@@ -401,31 +411,41 @@
   static CPURegList GetCallerSavedFP(unsigned size = kDRegSize);
 
   inline bool IsEmpty() const {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
     return list_ == 0;
   }
 
   inline bool IncludesAliasOf(const CPURegister& other) const {
-    ASSERT(IsValid());
-    return (type_ == other.type()) && (other.Bit() & list_);
+    VIXL_ASSERT(IsValid());
+    return (type_ == other.type()) && ((other.Bit() & list_) != 0);
+  }
+
+  inline bool IncludesAliasOf(int code) const {
+    VIXL_ASSERT(IsValid());
+    return ((code & list_) != 0);
   }
 
   inline int Count() const {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
     return CountSetBits(list_, kRegListSizeInBits);
   }
 
   inline unsigned RegisterSizeInBits() const {
-    ASSERT(IsValid());
+    VIXL_ASSERT(IsValid());
     return size_;
   }
 
   inline unsigned RegisterSizeInBytes() const {
     int size_in_bits = RegisterSizeInBits();
-    ASSERT((size_in_bits % 8) == 0);
+    VIXL_ASSERT((size_in_bits % 8) == 0);
     return size_in_bits / 8;
   }
 
+  inline unsigned TotalSizeInBytes() const {
+    VIXL_ASSERT(IsValid());
+    return RegisterSizeInBytes() * Count();
+  }
+
  private:
   RegList list_;
   unsigned size_;
@@ -471,33 +491,34 @@
   bool IsImmediate() const;
   bool IsShiftedRegister() const;
   bool IsExtendedRegister() const;
+  bool IsZero() const;
 
   // This returns an LSL shift (<= 4) operand as an equivalent extend operand,
   // which helps in the encoding of instructions that use the stack pointer.
   Operand ToExtendedRegister() const;
 
   int64_t immediate() const {
-    ASSERT(IsImmediate());
+    VIXL_ASSERT(IsImmediate());
     return immediate_;
   }
 
   Register reg() const {
-    ASSERT(IsShiftedRegister() || IsExtendedRegister());
+    VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
     return reg_;
   }
 
   Shift shift() const {
-    ASSERT(IsShiftedRegister());
+    VIXL_ASSERT(IsShiftedRegister());
     return shift_;
   }
 
   Extend extend() const {
-    ASSERT(IsExtendedRegister());
+    VIXL_ASSERT(IsExtendedRegister());
     return extend_;
   }
 
   unsigned shift_amount() const {
-    ASSERT(IsShiftedRegister() || IsExtendedRegister());
+    VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
     return shift_amount_;
   }
 
@@ -556,7 +577,7 @@
   Label() : is_bound_(false), link_(NULL), target_(NULL) {}
   ~Label() {
     // If the label has been linked to, it needs to be bound to a target.
-    ASSERT(!IsLinked() || IsBound());
+    VIXL_ASSERT(!IsLinked() || IsBound());
   }
 
   inline Instruction* link() const { return link_; }
@@ -643,7 +664,7 @@
   void bind(Label* label);
   int UpdateAndGetByteOffsetTo(Label* label);
   inline int UpdateAndGetInstructionOffsetTo(Label* label) {
-    ASSERT(Label::kEndOfChain == 0);
+    VIXL_ASSERT(Label::kEndOfChain == 0);
     return UpdateAndGetByteOffsetTo(label) >> kInstructionSizeLog2;
   }
 
@@ -716,8 +737,12 @@
   // Add.
   void add(const Register& rd,
            const Register& rn,
-           const Operand& operand,
-           FlagsUpdate S = LeaveFlags);
+           const Operand& operand);
+
+  // Add and update status flags.
+  void adds(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
 
   // Compare negative.
   void cmn(const Register& rn, const Operand& operand);
@@ -725,40 +750,62 @@
   // Subtract.
   void sub(const Register& rd,
            const Register& rn,
-           const Operand& operand,
-           FlagsUpdate S = LeaveFlags);
+           const Operand& operand);
+
+  // Subtract and update status flags.
+  void subs(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
 
   // Compare.
   void cmp(const Register& rn, const Operand& operand);
 
   // Negate.
   void neg(const Register& rd,
-           const Operand& operand,
-           FlagsUpdate S = LeaveFlags);
+           const Operand& operand);
+
+  // Negate and update status flags.
+  void negs(const Register& rd,
+            const Operand& operand);
 
   // Add with carry bit.
   void adc(const Register& rd,
            const Register& rn,
-           const Operand& operand,
-           FlagsUpdate S = LeaveFlags);
+           const Operand& operand);
+
+  // Add with carry bit and update status flags.
+  void adcs(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
 
   // Subtract with carry bit.
   void sbc(const Register& rd,
            const Register& rn,
-           const Operand& operand,
-           FlagsUpdate S = LeaveFlags);
+           const Operand& operand);
+
+  // Subtract with carry bit and update status flags.
+  void sbcs(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
 
   // Negate with carry bit.
   void ngc(const Register& rd,
-           const Operand& operand,
-           FlagsUpdate S = LeaveFlags);
+           const Operand& operand);
+
+  // Negate with carry bit and update status flags.
+  void ngcs(const Register& rd,
+            const Operand& operand);
 
   // Logical instructions.
   // Bitwise and (A & B).
   void and_(const Register& rd,
             const Register& rn,
-            const Operand& operand,
-            FlagsUpdate S = LeaveFlags);
+            const Operand& operand);
+
+  // Bitwise and (A & B) and update status flags.
+  void ands(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
 
   // Bit test and set flags.
   void tst(const Register& rn, const Operand& operand);
@@ -766,8 +813,12 @@
   // Bit clear (A & ~B).
   void bic(const Register& rd,
            const Register& rn,
-           const Operand& operand,
-           FlagsUpdate S = LeaveFlags);
+           const Operand& operand);
+
+  // Bit clear (A & ~B) and update status flags.
+  void bics(const Register& rd,
+            const Register& rn,
+            const Operand& operand);
 
   // Bitwise or (A | B).
   void orr(const Register& rd, const Register& rn, const Operand& operand);
@@ -818,8 +869,8 @@
                   const Register& rn,
                   unsigned lsb,
                   unsigned width) {
-    ASSERT(width >= 1);
-    ASSERT(lsb + width <= rn.size());
+    VIXL_ASSERT(width >= 1);
+    VIXL_ASSERT(lsb + width <= rn.size());
     bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
   }
 
@@ -828,15 +879,15 @@
                     const Register& rn,
                     unsigned lsb,
                     unsigned width) {
-    ASSERT(width >= 1);
-    ASSERT(lsb + width <= rn.size());
+    VIXL_ASSERT(width >= 1);
+    VIXL_ASSERT(lsb + width <= rn.size());
     bfm(rd, rn, lsb, lsb + width - 1);
   }
 
   // Sbfm aliases.
   // Arithmetic shift right.
   inline void asr(const Register& rd, const Register& rn, unsigned shift) {
-    ASSERT(shift < rd.size());
+    VIXL_ASSERT(shift < rd.size());
     sbfm(rd, rn, shift, rd.size() - 1);
   }
 
@@ -845,8 +896,8 @@
                     const Register& rn,
                     unsigned lsb,
                     unsigned width) {
-    ASSERT(width >= 1);
-    ASSERT(lsb + width <= rn.size());
+    VIXL_ASSERT(width >= 1);
+    VIXL_ASSERT(lsb + width <= rn.size());
     sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
   }
 
@@ -855,8 +906,8 @@
                    const Register& rn,
                    unsigned lsb,
                    unsigned width) {
-    ASSERT(width >= 1);
-    ASSERT(lsb + width <= rn.size());
+    VIXL_ASSERT(width >= 1);
+    VIXL_ASSERT(lsb + width <= rn.size());
     sbfm(rd, rn, lsb, lsb + width - 1);
   }
 
@@ -879,13 +930,13 @@
   // Logical shift left.
   inline void lsl(const Register& rd, const Register& rn, unsigned shift) {
     unsigned reg_size = rd.size();
-    ASSERT(shift < reg_size);
+    VIXL_ASSERT(shift < reg_size);
     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
   }
 
   // Logical shift right.
   inline void lsr(const Register& rd, const Register& rn, unsigned shift) {
-    ASSERT(shift < rd.size());
+    VIXL_ASSERT(shift < rd.size());
     ubfm(rd, rn, shift, rd.size() - 1);
   }
 
@@ -894,8 +945,8 @@
                     const Register& rn,
                     unsigned lsb,
                     unsigned width) {
-    ASSERT(width >= 1);
-    ASSERT(lsb + width <= rn.size());
+    VIXL_ASSERT(width >= 1);
+    VIXL_ASSERT(lsb + width <= rn.size());
     ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
   }
 
@@ -904,8 +955,8 @@
                    const Register& rn,
                    unsigned lsb,
                    unsigned width) {
-    ASSERT(width >= 1);
-    ASSERT(lsb + width <= rn.size());
+    VIXL_ASSERT(width >= 1);
+    VIXL_ASSERT(lsb + width <= rn.size());
     ubfm(rd, rn, lsb, lsb + width - 1);
   }
 
@@ -1109,9 +1160,12 @@
   // Load literal to register.
   void ldr(const Register& rt, uint64_t imm);
 
-  // Load literal to FP register.
+  // Load double precision floating point literal to FP register.
   void ldr(const FPRegister& ft, double imm);
 
+  // Load single precision floating point literal to FP register.
+  void ldr(const FPRegister& ft, float imm);
+
   // Move instructions. The default shift of -1 indicates that the move
   // instruction will calculate an appropriate 16-bit immediate and left shift
   // that is equal to the 64-bit immediate argument. If an explicit left shift
@@ -1160,6 +1214,15 @@
   // System hint.
   void hint(SystemHint code);
 
+  // Data memory barrier.
+  void dmb(BarrierDomain domain, BarrierType type);
+
+  // Data synchronization barrier.
+  void dsb(BarrierDomain domain, BarrierType type);
+
+  // Instruction synchronization barrier.
+  void isb();
+
   // Alias for system instructions.
   // No-op.
   void nop() {
@@ -1167,17 +1230,20 @@
   }
 
   // FP instructions.
-  // Move immediate to FP register.
-  void fmov(FPRegister fd, double imm);
+  // Move double precision immediate to FP register.
+  void fmov(const FPRegister& fd, double imm);
+
+  // Move single precision immediate to FP register.
+  void fmov(const FPRegister& fd, float imm);
 
   // Move FP register to register.
-  void fmov(Register rd, FPRegister fn);
+  void fmov(const Register& rd, const FPRegister& fn);
 
   // Move register to FP register.
-  void fmov(FPRegister fd, Register rn);
+  void fmov(const FPRegister& fd, const Register& rn);
 
   // Move FP register to FP register.
-  void fmov(FPRegister fd, FPRegister fn);
+  void fmov(const FPRegister& fd, const FPRegister& fn);
 
   // FP add.
   void fadd(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm);
@@ -1188,12 +1254,30 @@
   // FP multiply.
   void fmul(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm);
 
-  // FP multiply and subtract.
+  // FP fused multiply and add.
+  void fmadd(const FPRegister& fd,
+             const FPRegister& fn,
+             const FPRegister& fm,
+             const FPRegister& fa);
+
+  // FP fused multiply and subtract.
   void fmsub(const FPRegister& fd,
              const FPRegister& fn,
              const FPRegister& fm,
              const FPRegister& fa);
 
+  // FP fused multiply, add and negate.
+  void fnmadd(const FPRegister& fd,
+              const FPRegister& fn,
+              const FPRegister& fm,
+              const FPRegister& fa);
+
+  // FP fused multiply, subtract and negate.
+  void fnmsub(const FPRegister& fd,
+              const FPRegister& fn,
+              const FPRegister& fm,
+              const FPRegister& fa);
+
   // FP divide.
   void fdiv(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm);
 
@@ -1203,6 +1287,12 @@
   // FP minimum.
   void fmin(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm);
 
+  // FP maximum number.
+  void fmaxnm(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm);
+
+  // FP minimum number.
+  void fminnm(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm);
+
   // FP absolute.
   void fabs(const FPRegister& fd, const FPRegister& fn);
 
@@ -1212,6 +1302,12 @@
   // FP square root.
   void fsqrt(const FPRegister& fd, const FPRegister& fn);
 
+  // FP round to integer (nearest with ties to away).
+  void frinta(const FPRegister& fd, const FPRegister& fn);
+
+  // FP round to integer (toward minus infinity).
+  void frintm(const FPRegister& fd, const FPRegister& fn);
+
   // FP round to integer (nearest with ties to even).
   void frintn(const FPRegister& fd, const FPRegister& fn);
 
@@ -1244,24 +1340,30 @@
   // FP convert between single and double precision.
   void fcvt(const FPRegister& fd, const FPRegister& fn);
 
-  // Convert FP to unsigned integer (round towards -infinity).
-  void fcvtmu(const Register& rd, const FPRegister& fn);
+  // Convert FP to signed integer (nearest with ties to away).
+  void fcvtas(const Register& rd, const FPRegister& fn);
+
+  // Convert FP to unsigned integer (nearest with ties to away).
+  void fcvtau(const Register& rd, const FPRegister& fn);
 
   // Convert FP to signed integer (round towards -infinity).
   void fcvtms(const Register& rd, const FPRegister& fn);
 
-  // Convert FP to unsigned integer (nearest with ties to even).
-  void fcvtnu(const Register& rd, const FPRegister& fn);
+  // Convert FP to unsigned integer (round towards -infinity).
+  void fcvtmu(const Register& rd, const FPRegister& fn);
 
   // Convert FP to signed integer (nearest with ties to even).
   void fcvtns(const Register& rd, const FPRegister& fn);
 
-  // Convert FP to unsigned integer (round towards zero).
-  void fcvtzu(const Register& rd, const FPRegister& fn);
+  // Convert FP to unsigned integer (nearest with ties to even).
+  void fcvtnu(const Register& rd, const FPRegister& fn);
 
   // Convert FP to signed integer (round towards zero).
   void fcvtzs(const Register& rd, const FPRegister& fn);
 
+  // Convert FP to unsigned integer (round towards zero).
+  void fcvtzu(const Register& rd, const FPRegister& fn);
+
   // Convert signed integer or fixed point to FP.
   void scvtf(const FPRegister& fd, const Register& rn, unsigned fbits = 0);
 
@@ -1282,14 +1384,14 @@
   // character. The instruction pointer (pc_) is then aligned correctly for
   // subsequent instructions.
   void EmitStringData(const char * string) {
-    ASSERT(string != NULL);
+    VIXL_ASSERT(string != NULL);
 
     size_t len = strlen(string) + 1;
     EmitData(string, len);
 
     // Pad with NULL characters until pc_ is aligned.
     const char pad[] = {'\0', '\0', '\0', '\0'};
-    ASSERT(sizeof(pad) == kInstructionSize);
+    VIXL_STATIC_ASSERT(sizeof(pad) == kInstructionSize);
     Instruction* next_pc = AlignUp(pc_, kInstructionSize);
     EmitData(&pad, next_pc - pc_);
   }
@@ -1298,44 +1400,44 @@
 
   // Register encoding.
   static Instr Rd(CPURegister rd) {
-    ASSERT(rd.code() != kSPRegInternalCode);
+    VIXL_ASSERT(rd.code() != kSPRegInternalCode);
     return rd.code() << Rd_offset;
   }
 
   static Instr Rn(CPURegister rn) {
-    ASSERT(rn.code() != kSPRegInternalCode);
+    VIXL_ASSERT(rn.code() != kSPRegInternalCode);
     return rn.code() << Rn_offset;
   }
 
   static Instr Rm(CPURegister rm) {
-    ASSERT(rm.code() != kSPRegInternalCode);
+    VIXL_ASSERT(rm.code() != kSPRegInternalCode);
     return rm.code() << Rm_offset;
   }
 
   static Instr Ra(CPURegister ra) {
-    ASSERT(ra.code() != kSPRegInternalCode);
+    VIXL_ASSERT(ra.code() != kSPRegInternalCode);
     return ra.code() << Ra_offset;
   }
 
   static Instr Rt(CPURegister rt) {
-    ASSERT(rt.code() != kSPRegInternalCode);
+    VIXL_ASSERT(rt.code() != kSPRegInternalCode);
     return rt.code() << Rt_offset;
   }
 
   static Instr Rt2(CPURegister rt2) {
-    ASSERT(rt2.code() != kSPRegInternalCode);
+    VIXL_ASSERT(rt2.code() != kSPRegInternalCode);
     return rt2.code() << Rt2_offset;
   }
 
   // These encoding functions allow the stack pointer to be encoded, and
   // disallow the zero register.
   static Instr RdSP(Register rd) {
-    ASSERT(!rd.IsZero());
+    VIXL_ASSERT(!rd.IsZero());
     return (rd.code() & kRegCodeMask) << Rd_offset;
   }
 
   static Instr RnSP(Register rn) {
-    ASSERT(!rn.IsZero());
+    VIXL_ASSERT(!rn.IsZero());
     return (rn.code() & kRegCodeMask) << Rn_offset;
   }
 
@@ -1346,7 +1448,7 @@
     } else if (S == LeaveFlags) {
       return 0 << FlagsUpdate_offset;
     }
-    UNREACHABLE();
+    VIXL_UNREACHABLE();
     return 0;
   }
 
@@ -1356,7 +1458,7 @@
 
   // PC-relative address encoding.
   static Instr ImmPCRelAddress(int imm21) {
-    ASSERT(is_int21(imm21));
+    VIXL_ASSERT(is_int21(imm21));
     Instr imm = static_cast<Instr>(truncate_to_int21(imm21));
     Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
     Instr immlo = imm << ImmPCRelLo_offset;
@@ -1365,27 +1467,27 @@
 
   // Branch encoding.
   static Instr ImmUncondBranch(int imm26) {
-    ASSERT(is_int26(imm26));
+    VIXL_ASSERT(is_int26(imm26));
     return truncate_to_int26(imm26) << ImmUncondBranch_offset;
   }
 
   static Instr ImmCondBranch(int imm19) {
-    ASSERT(is_int19(imm19));
+    VIXL_ASSERT(is_int19(imm19));
     return truncate_to_int19(imm19) << ImmCondBranch_offset;
   }
 
   static Instr ImmCmpBranch(int imm19) {
-    ASSERT(is_int19(imm19));
+    VIXL_ASSERT(is_int19(imm19));
     return truncate_to_int19(imm19) << ImmCmpBranch_offset;
   }
 
   static Instr ImmTestBranch(int imm14) {
-    ASSERT(is_int14(imm14));
+    VIXL_ASSERT(is_int14(imm14));
     return truncate_to_int14(imm14) << ImmTestBranch_offset;
   }
 
   static Instr ImmTestBranchBit(unsigned bit_pos) {
-    ASSERT(is_uint6(bit_pos));
+    VIXL_ASSERT(is_uint6(bit_pos));
     // Subtract five from the shift offset, as we need bit 5 from bit_pos.
     unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
     unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
@@ -1400,7 +1502,7 @@
   }
 
   static Instr ImmAddSub(int64_t imm) {
-    ASSERT(IsImmAddSub(imm));
+    VIXL_ASSERT(IsImmAddSub(imm));
     if (is_uint12(imm)) {  // No shift required.
       return imm << ImmAddSub_offset;
     } else {
@@ -1409,55 +1511,55 @@
   }
 
   static inline Instr ImmS(unsigned imms, unsigned reg_size) {
-    ASSERT(((reg_size == kXRegSize) && is_uint6(imms)) ||
+    VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(imms)) ||
            ((reg_size == kWRegSize) && is_uint5(imms)));
     USE(reg_size);
     return imms << ImmS_offset;
   }
 
   static inline Instr ImmR(unsigned immr, unsigned reg_size) {
-    ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
+    VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
            ((reg_size == kWRegSize) && is_uint5(immr)));
     USE(reg_size);
-    ASSERT(is_uint6(immr));
+    VIXL_ASSERT(is_uint6(immr));
     return immr << ImmR_offset;
   }
 
   static inline Instr ImmSetBits(unsigned imms, unsigned reg_size) {
-    ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
-    ASSERT(is_uint6(imms));
-    ASSERT((reg_size == kXRegSize) || is_uint6(imms + 3));
+    VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+    VIXL_ASSERT(is_uint6(imms));
+    VIXL_ASSERT((reg_size == kXRegSize) || is_uint6(imms + 3));
     USE(reg_size);
     return imms << ImmSetBits_offset;
   }
 
   static inline Instr ImmRotate(unsigned immr, unsigned reg_size) {
-    ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
-    ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
+    VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+    VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
            ((reg_size == kWRegSize) && is_uint5(immr)));
     USE(reg_size);
     return immr << ImmRotate_offset;
   }
 
   static inline Instr ImmLLiteral(int imm19) {
-    ASSERT(is_int19(imm19));
+    VIXL_ASSERT(is_int19(imm19));
     return truncate_to_int19(imm19) << ImmLLiteral_offset;
   }
 
   static inline Instr BitN(unsigned bitn, unsigned reg_size) {
-    ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
-    ASSERT((reg_size == kXRegSize) || (bitn == 0));
+    VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+    VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
     USE(reg_size);
     return bitn << BitN_offset;
   }
 
   static Instr ShiftDP(Shift shift) {
-    ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
+    VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
     return shift << ShiftDP_offset;
   }
 
   static Instr ImmDPShift(unsigned amount) {
-    ASSERT(is_uint6(amount));
+    VIXL_ASSERT(is_uint6(amount));
     return amount << ImmDPShift_offset;
   }
 
@@ -1466,12 +1568,12 @@
   }
 
   static Instr ImmExtendShift(unsigned left_shift) {
-    ASSERT(left_shift <= 4);
+    VIXL_ASSERT(left_shift <= 4);
     return left_shift << ImmExtendShift_offset;
   }
 
   static Instr ImmCondCmp(unsigned imm) {
-    ASSERT(is_uint5(imm));
+    VIXL_ASSERT(is_uint5(imm));
     return imm << ImmCondCmp_offset;
   }
 
@@ -1481,55 +1583,65 @@
 
   // MemOperand offset encoding.
   static Instr ImmLSUnsigned(int imm12) {
-    ASSERT(is_uint12(imm12));
+    VIXL_ASSERT(is_uint12(imm12));
     return imm12 << ImmLSUnsigned_offset;
   }
 
   static Instr ImmLS(int imm9) {
-    ASSERT(is_int9(imm9));
+    VIXL_ASSERT(is_int9(imm9));
     return truncate_to_int9(imm9) << ImmLS_offset;
   }
 
   static Instr ImmLSPair(int imm7, LSDataSize size) {
-    ASSERT(((imm7 >> size) << size) == imm7);
+    VIXL_ASSERT(((imm7 >> size) << size) == imm7);
     int scaled_imm7 = imm7 >> size;
-    ASSERT(is_int7(scaled_imm7));
+    VIXL_ASSERT(is_int7(scaled_imm7));
     return truncate_to_int7(scaled_imm7) << ImmLSPair_offset;
   }
 
   static Instr ImmShiftLS(unsigned shift_amount) {
-    ASSERT(is_uint1(shift_amount));
+    VIXL_ASSERT(is_uint1(shift_amount));
     return shift_amount << ImmShiftLS_offset;
   }
 
   static Instr ImmException(int imm16) {
-    ASSERT(is_uint16(imm16));
+    VIXL_ASSERT(is_uint16(imm16));
     return imm16 << ImmException_offset;
   }
 
   static Instr ImmSystemRegister(int imm15) {
-    ASSERT(is_uint15(imm15));
+    VIXL_ASSERT(is_uint15(imm15));
     return imm15 << ImmSystemRegister_offset;
   }
 
   static Instr ImmHint(int imm7) {
-    ASSERT(is_uint7(imm7));
+    VIXL_ASSERT(is_uint7(imm7));
     return imm7 << ImmHint_offset;
   }
 
+  static Instr ImmBarrierDomain(int imm2) {
+    VIXL_ASSERT(is_uint2(imm2));
+    return imm2 << ImmBarrierDomain_offset;
+  }
+
+  static Instr ImmBarrierType(int imm2) {
+    VIXL_ASSERT(is_uint2(imm2));
+    return imm2 << ImmBarrierType_offset;
+  }
+
   static LSDataSize CalcLSDataSize(LoadStoreOp op) {
-    ASSERT((SizeLS_offset + SizeLS_width) == (kInstructionSize * 8));
+    VIXL_ASSERT((SizeLS_offset + SizeLS_width) == (kInstructionSize * 8));
     return static_cast<LSDataSize>(op >> SizeLS_offset);
   }
 
   // Move immediates encoding.
   static Instr ImmMoveWide(uint64_t imm) {
-    ASSERT(is_uint16(imm));
+    VIXL_ASSERT(is_uint16(imm));
     return imm << ImmMoveWide_offset;
   }
 
   static Instr ShiftMoveWide(int64_t shift) {
-    ASSERT(is_uint2(shift));
+    VIXL_ASSERT(is_uint2(shift));
     return shift << ShiftMoveWide_offset;
   }
 
@@ -1543,20 +1655,20 @@
   }
 
   static Instr FPScale(unsigned scale) {
-    ASSERT(is_uint6(scale));
+    VIXL_ASSERT(is_uint6(scale));
     return scale << FPScale_offset;
   }
 
   // Size of the code generated in bytes
   uint64_t SizeOfCodeGenerated() const {
-    ASSERT((pc_ >= buffer_) && (pc_ < (buffer_ + buffer_size_)));
+    VIXL_ASSERT((pc_ >= buffer_) && (pc_ < (buffer_ + buffer_size_)));
     return pc_ - buffer_;
   }
 
   // Size of the code generated since label to the current position.
   uint64_t SizeOfCodeGeneratedSince(Label* label) const {
-    ASSERT(label->IsBound());
-    ASSERT((pc_ >= label->target()) && (pc_ < (buffer_ + buffer_size_)));
+    VIXL_ASSERT(label->IsBound());
+    VIXL_ASSERT((pc_ >= label->target()) && (pc_ < (buffer_ + buffer_size_)));
     return pc_ - label->target();
   }
 
@@ -1568,7 +1680,7 @@
   inline void ReleaseLiteralPool() {
     if (--literal_pool_monitor_ == 0) {
       // Has the literal pool been blocked for too long?
-      ASSERT(literals_.empty() ||
+      VIXL_ASSERT(literals_.empty() ||
              (pc_ < (literals_.back()->pc_ + kMaxLoadLiteralRange)));
     }
   }
@@ -1622,6 +1734,9 @@
                        FlagsUpdate S,
                        AddSubWithCarryOp op);
 
+  static bool IsImmFP32(float imm);
+  static bool IsImmFP64(double imm);
+
   // Functions for emulating operands not directly supported by the instruction
   // set.
   void EmitShift(const Register& rd,
@@ -1706,17 +1821,13 @@
                                const FPRegister& fa,
                                FPDataProcessing3SourceOp op);
 
-  // Encoding helpers.
-  static bool IsImmFP32(float imm);
-  static bool IsImmFP64(double imm);
-
   void RecordLiteral(int64_t imm, unsigned size);
 
   // Emit the instruction at pc_.
   void Emit(Instr instruction) {
-    ASSERT(sizeof(*pc_) == 1);
-    ASSERT(sizeof(instruction) == kInstructionSize);
-    ASSERT((pc_ + sizeof(instruction)) <= (buffer_ + buffer_size_));
+    VIXL_STATIC_ASSERT(sizeof(*pc_) == 1);
+    VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
+    VIXL_ASSERT((pc_ + sizeof(instruction)) <= (buffer_ + buffer_size_));
 
 #ifdef DEBUG
     finalized_ = false;
@@ -1729,8 +1840,8 @@
 
   // Emit data inline in the instruction stream.
   void EmitData(void const * data, unsigned size) {
-    ASSERT(sizeof(*pc_) == 1);
-    ASSERT((pc_ + size) <= (buffer_ + buffer_size_));
+    VIXL_STATIC_ASSERT(sizeof(*pc_) == 1);
+    VIXL_ASSERT((pc_ + size) <= (buffer_ + buffer_size_));
 
 #ifdef DEBUG
     finalized_ = false;
@@ -1744,7 +1855,7 @@
   }
 
   inline void CheckBufferSpace() {
-    ASSERT(pc_ < (buffer_ + buffer_size_));
+    VIXL_ASSERT(pc_ < (buffer_ + buffer_size_));
     if (pc_ > next_literal_pool_check_) {
       CheckLiteralPool();
     }
diff --git a/disas/libvixl/a64/constants-a64.h b/disas/libvixl/a64/constants-a64.h
index 2e0336d..99677c1 100644
--- a/disas/libvixl/a64/constants-a64.h
+++ b/disas/libvixl/a64/constants-a64.h
@@ -116,6 +116,8 @@
 V_(ImmLLiteral, 23, 5, SignedBits)                                             \
 V_(ImmException, 20, 5, Bits)                                                  \
 V_(ImmHint, 11, 5, Bits)                                                       \
+V_(ImmBarrierDomain, 11, 10, Bits)                                             \
+V_(ImmBarrierType, 9, 8, Bits)                                                 \
                                                                                \
 /* System (MRS, MSR) */                                                        \
 V_(ImmSystemRegister, 19, 5, Bits)                                             \
@@ -181,7 +183,7 @@
 inline Condition InvertCondition(Condition cond) {
   // Conditions al and nv behave identically, as "always true". They can't be
   // inverted, because there is no "always false" condition.
-  ASSERT((cond != al) && (cond != nv));
+  VIXL_ASSERT((cond != al) && (cond != nv));
   return static_cast<Condition>(cond ^ 1);
 }
 
@@ -246,6 +248,20 @@
   SEVL  = 5
 };
 
+enum BarrierDomain {
+  OuterShareable = 0,
+  NonShareable   = 1,
+  InnerShareable = 2,
+  FullSystem     = 3
+};
+
+enum BarrierType {
+  BarrierOther  = 0,
+  BarrierReads  = 1,
+  BarrierWrites = 2,
+  BarrierAll    = 3
+};
+
 // System/special register names.
 // This information is not encoded as one field but as the concatenation of
 // multiple fields (Op0<0>, Op1, Crn, Crm, Op2).
@@ -274,7 +290,7 @@
 //
 // The enumerations can be used like this:
 //
-// ASSERT(instr->Mask(PCRelAddressingFMask) == PCRelAddressingFixed);
+// VIXL_ASSERT(instr->Mask(PCRelAddressingFMask) == PCRelAddressingFixed);
 // switch(instr->Mask(PCRelAddressingMask)) {
 //   case ADR:  Format("adr 'Xd, 'AddrPCRelByte"); break;
 //   case ADRP: Format("adrp 'Xd, 'AddrPCRelPage"); break;
@@ -560,6 +576,15 @@
   DCPS3          = ExceptionFixed | 0x00A00003
 };
 
+enum MemBarrierOp {
+  MemBarrierFixed = 0xD503309F,
+  MemBarrierFMask = 0xFFFFF09F,
+  MemBarrierMask  = 0xFFFFF0FF,
+  DSB             = MemBarrierFixed | 0x00000000,
+  DMB             = MemBarrierFixed | 0x00000020,
+  ISB             = MemBarrierFixed | 0x00000040
+};
+
 // Any load or store.
 enum LoadStoreAnyOp {
   LoadStoreAnyFMask = 0x0a000000,
@@ -927,17 +952,22 @@
   FRINTN   = FRINTN_s,
   FRINTP_s = FPDataProcessing1SourceFixed | 0x00048000,
   FRINTP_d = FPDataProcessing1SourceFixed | FP64 | 0x00048000,
+  FRINTP   = FRINTP_s,
   FRINTM_s = FPDataProcessing1SourceFixed | 0x00050000,
   FRINTM_d = FPDataProcessing1SourceFixed | FP64 | 0x00050000,
+  FRINTM   = FRINTM_s,
   FRINTZ_s = FPDataProcessing1SourceFixed | 0x00058000,
   FRINTZ_d = FPDataProcessing1SourceFixed | FP64 | 0x00058000,
   FRINTZ   = FRINTZ_s,
   FRINTA_s = FPDataProcessing1SourceFixed | 0x00060000,
   FRINTA_d = FPDataProcessing1SourceFixed | FP64 | 0x00060000,
+  FRINTA   = FRINTA_s,
   FRINTX_s = FPDataProcessing1SourceFixed | 0x00070000,
   FRINTX_d = FPDataProcessing1SourceFixed | FP64 | 0x00070000,
+  FRINTX   = FRINTX_s,
   FRINTI_s = FPDataProcessing1SourceFixed | 0x00078000,
-  FRINTI_d = FPDataProcessing1SourceFixed | FP64 | 0x00078000
+  FRINTI_d = FPDataProcessing1SourceFixed | FP64 | 0x00078000,
+  FRINTI   = FRINTI_s
 };
 
 // Floating point data processing 2 source.
diff --git a/disas/libvixl/a64/decoder-a64.cc b/disas/libvixl/a64/decoder-a64.cc
index 9e9033c..8450eb3 100644
--- a/disas/libvixl/a64/decoder-a64.cc
+++ b/disas/libvixl/a64/decoder-a64.cc
@@ -132,7 +132,7 @@
   }
   // We reached the end of the list. The last element must be
   // registered_visitor.
-  ASSERT(*it == registered_visitor);
+  VIXL_ASSERT(*it == registered_visitor);
   visitors_.insert(it, new_visitor);
 }
 
@@ -150,7 +150,7 @@
   }
   // We reached the end of the list. The last element must be
   // registered_visitor.
-  ASSERT(*it == registered_visitor);
+  VIXL_ASSERT(*it == registered_visitor);
   visitors_.push_back(new_visitor);
 }
 
@@ -161,16 +161,16 @@
 
 
 void Decoder::DecodePCRelAddressing(Instruction* instr) {
-  ASSERT(instr->Bits(27, 24) == 0x0);
+  VIXL_ASSERT(instr->Bits(27, 24) == 0x0);
   // We know bit 28 is set, as <b28:b27> = 0 is filtered out at the top level
   // decode.
-  ASSERT(instr->Bit(28) == 0x1);
+  VIXL_ASSERT(instr->Bit(28) == 0x1);
   VisitPCRelAddressing(instr);
 }
 
 
 void Decoder::DecodeBranchSystemException(Instruction* instr) {
-  ASSERT((instr->Bits(27, 24) == 0x4) ||
+  VIXL_ASSERT((instr->Bits(27, 24) == 0x4) ||
          (instr->Bits(27, 24) == 0x5) ||
          (instr->Bits(27, 24) == 0x6) ||
          (instr->Bits(27, 24) == 0x7) );
@@ -271,7 +271,7 @@
 
 
 void Decoder::DecodeLoadStore(Instruction* instr) {
-  ASSERT((instr->Bits(27, 24) == 0x8) ||
+  VIXL_ASSERT((instr->Bits(27, 24) == 0x8) ||
          (instr->Bits(27, 24) == 0x9) ||
          (instr->Bits(27, 24) == 0xC) ||
          (instr->Bits(27, 24) == 0xD) );
@@ -390,7 +390,7 @@
 
 
 void Decoder::DecodeLogical(Instruction* instr) {
-  ASSERT(instr->Bits(27, 24) == 0x2);
+  VIXL_ASSERT(instr->Bits(27, 24) == 0x2);
 
   if (instr->Mask(0x80400000) == 0x00400000) {
     VisitUnallocated(instr);
@@ -409,7 +409,7 @@
 
 
 void Decoder::DecodeBitfieldExtract(Instruction* instr) {
-  ASSERT(instr->Bits(27, 24) == 0x3);
+  VIXL_ASSERT(instr->Bits(27, 24) == 0x3);
 
   if ((instr->Mask(0x80400000) == 0x80000000) ||
       (instr->Mask(0x80400000) == 0x00400000) ||
@@ -434,7 +434,7 @@
 
 
 void Decoder::DecodeAddSubImmediate(Instruction* instr) {
-  ASSERT(instr->Bits(27, 24) == 0x1);
+  VIXL_ASSERT(instr->Bits(27, 24) == 0x1);
   if (instr->Bit(23) == 1) {
     VisitUnallocated(instr);
   } else {
@@ -444,8 +444,8 @@
 
 
 void Decoder::DecodeDataProcessing(Instruction* instr) {
-  ASSERT((instr->Bits(27, 24) == 0xA) ||
-         (instr->Bits(27, 24) == 0xB) );
+  VIXL_ASSERT((instr->Bits(27, 24) == 0xA) ||
+              (instr->Bits(27, 24) == 0xB));
 
   if (instr->Bit(24) == 0) {
     if (instr->Bit(28) == 0) {
@@ -559,8 +559,8 @@
 
 
 void Decoder::DecodeFP(Instruction* instr) {
-  ASSERT((instr->Bits(27, 24) == 0xE) ||
-         (instr->Bits(27, 24) == 0xF) );
+  VIXL_ASSERT((instr->Bits(27, 24) == 0xE) ||
+              (instr->Bits(27, 24) == 0xF));
 
   if (instr->Bit(28) == 0) {
     DecodeAdvSIMDDataProcessing(instr);
@@ -665,14 +665,14 @@
                     VisitFPConditionalSelect(instr);
                     break;
                   }
-                  default: UNREACHABLE();
+                  default: VIXL_UNREACHABLE();
                 }
               }
             }
           }
         } else {
           // Bit 30 == 1 has been handled earlier.
-          ASSERT(instr->Bit(30) == 0);
+          VIXL_ASSERT(instr->Bit(30) == 0);
           if (instr->Mask(0xA0800000) != 0) {
             VisitUnallocated(instr);
           } else {
@@ -687,21 +687,21 @@
 
 void Decoder::DecodeAdvSIMDLoadStore(Instruction* instr) {
   // TODO: Implement Advanced SIMD load/store instruction decode.
-  ASSERT(instr->Bits(29, 25) == 0x6);
+  VIXL_ASSERT(instr->Bits(29, 25) == 0x6);
   VisitUnimplemented(instr);
 }
 
 
 void Decoder::DecodeAdvSIMDDataProcessing(Instruction* instr) {
   // TODO: Implement Advanced SIMD data processing instruction decode.
-  ASSERT(instr->Bits(27, 25) == 0x7);
+  VIXL_ASSERT(instr->Bits(27, 25) == 0x7);
   VisitUnimplemented(instr);
 }
 
 
 #define DEFINE_VISITOR_CALLERS(A)                                              \
   void Decoder::Visit##A(Instruction *instr) {                                 \
-    ASSERT(instr->Mask(A##FMask) == A##Fixed);                                 \
+    VIXL_ASSERT(instr->Mask(A##FMask) == A##Fixed);                            \
     std::list<DecoderVisitor*>::iterator it;                                   \
     for (it = visitors_.begin(); it != visitors_.end(); it++) {                \
       (*it)->Visit##A(instr);                                                  \
diff --git a/disas/libvixl/a64/disasm-a64.cc b/disas/libvixl/a64/disasm-a64.cc
index 5f172da..aa133a9 100644
--- a/disas/libvixl/a64/disasm-a64.cc
+++ b/disas/libvixl/a64/disasm-a64.cc
@@ -95,7 +95,7 @@
       }
       break;
     }
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -142,7 +142,7 @@
       }
       break;
     }
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -180,7 +180,7 @@
       }
       break;
     }
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -215,7 +215,7 @@
       }
       break;
     }
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -258,30 +258,30 @@
       }
       break;
     }
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
 
 
 bool Disassembler::IsMovzMovnImm(unsigned reg_size, uint64_t value) {
-  ASSERT((reg_size == kXRegSize) ||
-         ((reg_size == kWRegSize) && (value <= 0xffffffff)));
+  VIXL_ASSERT((reg_size == kXRegSize) ||
+              ((reg_size == kWRegSize) && (value <= 0xffffffff)));
 
   // Test for movz: 16 bits set at positions 0, 16, 32 or 48.
-  if (((value & 0xffffffffffff0000ULL) == 0ULL) ||
-      ((value & 0xffffffff0000ffffULL) == 0ULL) ||
-      ((value & 0xffff0000ffffffffULL) == 0ULL) ||
-      ((value & 0x0000ffffffffffffULL) == 0ULL)) {
+  if (((value & UINT64_C(0xffffffffffff0000)) == 0) ||
+      ((value & UINT64_C(0xffffffff0000ffff)) == 0) ||
+      ((value & UINT64_C(0xffff0000ffffffff)) == 0) ||
+      ((value & UINT64_C(0x0000ffffffffffff)) == 0)) {
     return true;
   }
 
   // Test for movn: NOT(16 bits set at positions 0, 16, 32 or 48).
   if ((reg_size == kXRegSize) &&
-      (((value & 0xffffffffffff0000ULL) == 0xffffffffffff0000ULL) ||
-       ((value & 0xffffffff0000ffffULL) == 0xffffffff0000ffffULL) ||
-       ((value & 0xffff0000ffffffffULL) == 0xffff0000ffffffffULL) ||
-       ((value & 0x0000ffffffffffffULL) == 0x0000ffffffffffffULL))) {
+      (((~value & UINT64_C(0xffffffffffff0000)) == 0) ||
+       ((~value & UINT64_C(0xffffffff0000ffff)) == 0) ||
+       ((~value & UINT64_C(0xffff0000ffffffff)) == 0) ||
+       ((~value & UINT64_C(0x0000ffffffffffff)) == 0))) {
     return true;
   }
   if ((reg_size == kWRegSize) &&
@@ -337,7 +337,7 @@
       }
       break;
     }
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
 
   Format(instr, mnemonic, form);
@@ -353,7 +353,7 @@
     case CCMN_x: mnemonic = "ccmn"; break;
     case CCMP_w:
     case CCMP_x: mnemonic = "ccmp"; break;
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -368,7 +368,7 @@
     case CCMN_x_imm: mnemonic = "ccmn"; break;
     case CCMP_w_imm:
     case CCMP_x_imm: mnemonic = "ccmp"; break;
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -421,7 +421,7 @@
       }
       break;
     }
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -520,7 +520,7 @@
       }
       break;
     }
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -538,7 +538,7 @@
 void Disassembler::VisitConditionalBranch(Instruction* instr) {
   switch (instr->Mask(ConditionalBranchMask)) {
     case B_cond: Format(instr, "b.'CBrn", "'BImmCond"); break;
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
 }
 
@@ -570,7 +570,7 @@
   switch (instr->Mask(UnconditionalBranchMask)) {
     case B: mnemonic = "b"; break;
     case BL: mnemonic = "bl"; break;
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -591,7 +591,7 @@
     FORMAT(CLS, "cls");
     #undef FORMAT
     case REV32_x: mnemonic = "rev32"; break;
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -690,7 +690,7 @@
       form = form_xxx;
       break;
     }
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -705,7 +705,7 @@
     case CBZ_x: mnemonic = "cbz"; break;
     case CBNZ_w:
     case CBNZ_x: mnemonic = "cbnz"; break;
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -722,7 +722,7 @@
   switch (instr->Mask(TestBranchMask)) {
     case TBZ: mnemonic = "tbz"; break;
     case TBNZ: mnemonic = "tbnz"; break;
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -742,7 +742,7 @@
     case MOVZ_x: mnemonic = "movz"; break;
     case MOVK_w:
     case MOVK_x: mnemonic = "movk"; form = "'Rd, 'IMoveLSL"; break;
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -981,7 +981,7 @@
   switch (instr->Mask(FPConditionalSelectMask)) {
     case FCSEL_s:
     case FCSEL_d: mnemonic = "fcsel"; break;
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -1033,7 +1033,7 @@
     FORMAT(FMINNM, "fminnm");
     FORMAT(FNMUL, "fnmul");
     #undef FORMAT
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -1052,7 +1052,7 @@
     FORMAT(FNMADD, "fnmadd");
     FORMAT(FNMSUB, "fnmsub");
     #undef FORMAT
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -1065,7 +1065,7 @@
   switch (instr->Mask(FPImmediateMask)) {
     case FMOV_s_imm: mnemonic = "fmov"; form = "'Sd, 'IFPSingle"; break;
     case FMOV_d_imm: mnemonic = "fmov"; form = "'Dd, 'IFPDouble"; break;
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -1082,6 +1082,14 @@
     case FMOV_xd: mnemonic = "fmov"; form = form_rf; break;
     case FMOV_sw:
     case FMOV_dx: mnemonic = "fmov"; form = form_fr; break;
+    case FCVTAS_ws:
+    case FCVTAS_xs:
+    case FCVTAS_wd:
+    case FCVTAS_xd: mnemonic = "fcvtas"; form = form_rf; break;
+    case FCVTAU_ws:
+    case FCVTAU_xs:
+    case FCVTAU_wd:
+    case FCVTAU_xd: mnemonic = "fcvtau"; form = form_rf; break;
     case FCVTMS_ws:
     case FCVTMS_xs:
     case FCVTMS_wd:
@@ -1141,7 +1149,7 @@
     case UCVTF_sx_fixed:
     case UCVTF_dw_fixed:
     case UCVTF_dx_fixed: mnemonic = "ucvtf"; form = form_fr; break;
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   Format(instr, mnemonic, form);
 }
@@ -1176,7 +1184,7 @@
       }
     }
   } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
-    ASSERT(instr->Mask(SystemHintMask) == HINT);
+    VIXL_ASSERT(instr->Mask(SystemHintMask) == HINT);
     switch (instr->ImmHint()) {
       case NOP: {
         mnemonic = "nop";
@@ -1184,6 +1192,24 @@
         break;
       }
     }
+  } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) {
+    switch (instr->Mask(MemBarrierMask)) {
+      case DMB: {
+        mnemonic = "dmb";
+        form = "'M";
+        break;
+      }
+      case DSB: {
+        mnemonic = "dsb";
+        form = "'M";
+        break;
+      }
+      case ISB: {
+        mnemonic = "isb";
+        form = NULL;
+        break;
+      }
+    }
   }
 
   Format(instr, mnemonic, form);
@@ -1226,7 +1252,7 @@
 
 void Disassembler::Format(Instruction* instr, const char* mnemonic,
                           const char* format) {
-  ASSERT(mnemonic != NULL);
+  VIXL_ASSERT(mnemonic != NULL);
   ResetOutput();
   Substitute(instr, mnemonic);
   if (format != NULL) {
@@ -1268,8 +1294,9 @@
     case 'A': return SubstitutePCRelAddressField(instr, format);
     case 'B': return SubstituteBranchTargetField(instr, format);
     case 'O': return SubstituteLSRegOffsetField(instr, format);
+    case 'M': return SubstituteBarrierField(instr, format);
     default: {
-      UNREACHABLE();
+      VIXL_UNREACHABLE();
       return 1;
     }
   }
@@ -1294,7 +1321,7 @@
       }
       break;
     }
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
 
   // Increase field length for registers tagged as stack.
@@ -1331,7 +1358,7 @@
 
 int Disassembler::SubstituteImmediateField(Instruction* instr,
                                            const char* format) {
-  ASSERT(format[0] == 'I');
+  VIXL_ASSERT(format[0] == 'I');
 
   switch (format[1]) {
     case 'M': {  // IMoveImm or IMoveLSL.
@@ -1339,10 +1366,10 @@
         uint64_t imm = instr->ImmMoveWide() << (16 * instr->ShiftMoveWide());
         AppendToOutput("#0x%" PRIx64, imm);
       } else {
-        ASSERT(format[5] == 'L');
+        VIXL_ASSERT(format[5] == 'L');
         AppendToOutput("#0x%" PRIx64, instr->ImmMoveWide());
         if (instr->ShiftMoveWide() > 0) {
-          AppendToOutput(", lsl #%" PRId64, 16 * instr->ShiftMoveWide());
+          AppendToOutput(", lsl #%d", 16 * instr->ShiftMoveWide());
         }
       }
       return 8;
@@ -1384,14 +1411,14 @@
       return 6;
     }
     case 'A': {  // IAddSub.
-      ASSERT(instr->ShiftAddSub() <= 1);
+      VIXL_ASSERT(instr->ShiftAddSub() <= 1);
       int64_t imm = instr->ImmAddSub() << (12 * instr->ShiftAddSub());
       AppendToOutput("#0x%" PRIx64 " (%" PRId64 ")", imm, imm);
       return 7;
     }
     case 'F': {  // IFPSingle, IFPDouble or IFPFBits.
       if (format[3] == 'F') {  // IFPFbits.
-        AppendToOutput("#%" PRId64, 64 - instr->FPScale());
+        AppendToOutput("#%d", 64 - instr->FPScale());
         return 8;
       } else {
         AppendToOutput("#0x%" PRIx64 " (%.4f)", instr->ImmFP(),
@@ -1412,27 +1439,27 @@
       return 5;
     }
     case 'P': {  // IP - Conditional compare.
-      AppendToOutput("#%" PRId64, instr->ImmCondCmp());
+      AppendToOutput("#%d", instr->ImmCondCmp());
       return 2;
     }
     case 'B': {  // Bitfields.
       return SubstituteBitfieldImmediateField(instr, format);
     }
     case 'E': {  // IExtract.
-      AppendToOutput("#%" PRId64, instr->ImmS());
+      AppendToOutput("#%d", instr->ImmS());
       return 8;
     }
     case 'S': {  // IS - Test and branch bit.
-      AppendToOutput("#%" PRId64, (instr->ImmTestBranchBit5() << 5) |
-                                  instr->ImmTestBranchBit40());
+      AppendToOutput("#%d", (instr->ImmTestBranchBit5() << 5) |
+                            instr->ImmTestBranchBit40());
       return 2;
     }
     case 'D': {  // IDebug - HLT and BRK instructions.
-      AppendToOutput("#0x%" PRIx64, instr->ImmException());
+      AppendToOutput("#0x%x", instr->ImmException());
       return 6;
     }
     default: {
-      UNIMPLEMENTED();
+      VIXL_UNIMPLEMENTED();
       return 0;
     }
   }
@@ -1441,7 +1468,7 @@
 
 int Disassembler::SubstituteBitfieldImmediateField(Instruction* instr,
                                                    const char* format) {
-  ASSERT((format[0] == 'I') && (format[1] == 'B'));
+  VIXL_ASSERT((format[0] == 'I') && (format[1] == 'B'));
   unsigned r = instr->ImmR();
   unsigned s = instr->ImmS();
 
@@ -1455,19 +1482,19 @@
         AppendToOutput("#%d", s + 1);
         return 5;
       } else {
-        ASSERT(format[3] == '-');
+        VIXL_ASSERT(format[3] == '-');
         AppendToOutput("#%d", s - r + 1);
         return 7;
       }
     }
     case 'Z': {  // IBZ-r.
-      ASSERT((format[3] == '-') && (format[4] == 'r'));
+      VIXL_ASSERT((format[3] == '-') && (format[4] == 'r'));
       unsigned reg_size = (instr->SixtyFourBits() == 1) ? kXRegSize : kWRegSize;
       AppendToOutput("#%d", reg_size - r);
       return 5;
     }
     default: {
-      UNREACHABLE();
+      VIXL_UNREACHABLE();
       return 0;
     }
   }
@@ -1476,7 +1503,7 @@
 
 int Disassembler::SubstituteLiteralField(Instruction* instr,
                                          const char* format) {
-  ASSERT(strncmp(format, "LValue", 6) == 0);
+  VIXL_ASSERT(strncmp(format, "LValue", 6) == 0);
   USE(format);
 
   switch (instr->Mask(LoadLiteralMask)) {
@@ -1484,7 +1511,7 @@
     case LDR_x_lit:
     case LDR_s_lit:
     case LDR_d_lit: AppendToOutput("(addr %p)", instr->LiteralAddress()); break;
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
 
   return 6;
@@ -1492,12 +1519,12 @@
 
 
 int Disassembler::SubstituteShiftField(Instruction* instr, const char* format) {
-  ASSERT(format[0] == 'H');
-  ASSERT(instr->ShiftDP() <= 0x3);
+  VIXL_ASSERT(format[0] == 'H');
+  VIXL_ASSERT(instr->ShiftDP() <= 0x3);
 
   switch (format[1]) {
     case 'D': {  // HDP.
-      ASSERT(instr->ShiftDP() != ROR);
+      VIXL_ASSERT(instr->ShiftDP() != ROR);
     }  // Fall through.
     case 'L': {  // HLo.
       if (instr->ImmDPShift() != 0) {
@@ -1508,7 +1535,7 @@
       return 3;
     }
     default:
-      UNIMPLEMENTED();
+      VIXL_UNIMPLEMENTED();
       return 0;
   }
 }
@@ -1516,7 +1543,7 @@
 
 int Disassembler::SubstituteConditionField(Instruction* instr,
                                            const char* format) {
-  ASSERT(format[0] == 'C');
+  VIXL_ASSERT(format[0] == 'C');
   const char* condition_code[] = { "eq", "ne", "hs", "lo",
                                    "mi", "pl", "vs", "vc",
                                    "hi", "ls", "ge", "lt",
@@ -1538,27 +1565,27 @@
 int Disassembler::SubstitutePCRelAddressField(Instruction* instr,
                                               const char* format) {
   USE(format);
-  ASSERT(strncmp(format, "AddrPCRel", 9) == 0);
+  VIXL_ASSERT(strncmp(format, "AddrPCRel", 9) == 0);
 
   int offset = instr->ImmPCRel();
 
   // Only ADR (AddrPCRelByte) is supported.
-  ASSERT(strcmp(format, "AddrPCRelByte") == 0);
+  VIXL_ASSERT(strcmp(format, "AddrPCRelByte") == 0);
 
   char sign = '+';
   if (offset < 0) {
     offset = -offset;
     sign = '-';
   }
-  // TODO: Extend this to support printing the target address.
-  AppendToOutput("#%c0x%x", sign, offset);
+  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
+  AppendToOutput("#%c0x%x (addr %p)", sign, offset, instr + offset);
   return 13;
 }
 
 
 int Disassembler::SubstituteBranchTargetField(Instruction* instr,
                                               const char* format) {
-  ASSERT(strncmp(format, "BImm", 4) == 0);
+  VIXL_ASSERT(strncmp(format, "BImm", 4) == 0);
 
   int64_t offset = 0;
   switch (format[5]) {
@@ -1570,7 +1597,7 @@
     case 'm': offset = instr->ImmCmpBranch(); break;
     // BImmTest - test and branch immediate.
     case 'e': offset = instr->ImmTestBranch(); break;
-    default: UNIMPLEMENTED();
+    default: VIXL_UNIMPLEMENTED();
   }
   offset <<= kInstructionSizeLog2;
   char sign = '+';
@@ -1578,15 +1605,16 @@
     offset = -offset;
     sign = '-';
   }
-  AppendToOutput("#%c0x%" PRIx64, sign, offset);
+  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
+  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, offset, instr + offset);
   return 8;
 }
 
 
 int Disassembler::SubstituteExtendField(Instruction* instr,
                                         const char* format) {
-  ASSERT(strncmp(format, "Ext", 3) == 0);
-  ASSERT(instr->ExtendMode() <= 7);
+  VIXL_ASSERT(strncmp(format, "Ext", 3) == 0);
+  VIXL_ASSERT(instr->ExtendMode() <= 7);
   USE(format);
 
   const char* extend_mode[] = { "uxtb", "uxth", "uxtw", "uxtx",
@@ -1598,12 +1626,12 @@
       (((instr->ExtendMode() == UXTW) && (instr->SixtyFourBits() == 0)) ||
        (instr->ExtendMode() == UXTX))) {
     if (instr->ImmExtendShift() > 0) {
-      AppendToOutput(", lsl #%" PRId64, instr->ImmExtendShift());
+      AppendToOutput(", lsl #%d", instr->ImmExtendShift());
     }
   } else {
     AppendToOutput(", %s", extend_mode[instr->ExtendMode()]);
     if (instr->ImmExtendShift() > 0) {
-      AppendToOutput(" #%" PRId64, instr->ImmExtendShift());
+      AppendToOutput(" #%d", instr->ImmExtendShift());
     }
   }
   return 3;
@@ -1612,7 +1640,7 @@
 
 int Disassembler::SubstituteLSRegOffsetField(Instruction* instr,
                                              const char* format) {
-  ASSERT(strncmp(format, "Offsetreg", 9) == 0);
+  VIXL_ASSERT(strncmp(format, "Offsetreg", 9) == 0);
   const char* extend_mode[] = { "undefined", "undefined", "uxtw", "lsl",
                                 "undefined", "undefined", "sxtw", "sxtx" };
   USE(format);
@@ -1632,7 +1660,7 @@
   if (!((ext == UXTX) && (shift == 0))) {
     AppendToOutput(", %s", extend_mode[ext]);
     if (shift != 0) {
-      AppendToOutput(" #%" PRId64, instr->SizeLS());
+      AppendToOutput(" #%d", instr->SizeLS());
     }
   }
   return 9;
@@ -1641,7 +1669,7 @@
 
 int Disassembler::SubstitutePrefetchField(Instruction* instr,
                                           const char* format) {
-  ASSERT(format[0] == 'P');
+  VIXL_ASSERT(format[0] == 'P');
   USE(format);
 
   int prefetch_mode = instr->PrefetchMode();
@@ -1654,6 +1682,23 @@
   return 6;
 }
 
+int Disassembler::SubstituteBarrierField(Instruction* instr,
+                                         const char* format) {
+  VIXL_ASSERT(format[0] == 'M');
+  USE(format);
+
+  static const char* options[4][4] = {
+    { "sy (0b0000)", "oshld", "oshst", "osh" },
+    { "sy (0b0100)", "nshld", "nshst", "nsh" },
+    { "sy (0b1000)", "ishld", "ishst", "ish" },
+    { "sy (0b1100)", "ld", "st", "sy" }
+  };
+  int domain = instr->ImmBarrierDomain();
+  int type = instr->ImmBarrierType();
+
+  AppendToOutput("%s", options[domain][type]);
+  return 1;
+}
 
 void Disassembler::ResetOutput() {
   buffer_pos_ = 0;
diff --git a/disas/libvixl/a64/disasm-a64.h b/disas/libvixl/a64/disasm-a64.h
index 857a5ac..3a56e15 100644
--- a/disas/libvixl/a64/disasm-a64.h
+++ b/disas/libvixl/a64/disasm-a64.h
@@ -64,6 +64,7 @@
   int SubstituteBranchTargetField(Instruction* instr, const char* format);
   int SubstituteLSRegOffsetField(Instruction* instr, const char* format);
   int SubstitutePrefetchField(Instruction* instr, const char* format);
+  int SubstituteBarrierField(Instruction* instr, const char* format);
 
   inline bool RdIsZROrSP(Instruction* instr) const {
     return (instr->Rd() == kZeroRegCode);
diff --git a/disas/libvixl/a64/instructions-a64.cc b/disas/libvixl/a64/instructions-a64.cc
index e87fa3a..c4eb7c4 100644
--- a/disas/libvixl/a64/instructions-a64.cc
+++ b/disas/libvixl/a64/instructions-a64.cc
@@ -33,20 +33,20 @@
 static uint64_t RotateRight(uint64_t value,
                             unsigned int rotate,
                             unsigned int width) {
-  ASSERT(width <= 64);
+  VIXL_ASSERT(width <= 64);
   rotate &= 63;
-  return ((value & ((1UL << rotate) - 1UL)) << (width - rotate)) |
-         (value >> rotate);
+  return ((value & ((UINT64_C(1) << rotate) - 1)) <<
+          (width - rotate)) | (value >> rotate);
 }
 
 
 static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
                                     uint64_t value,
                                     unsigned width) {
-  ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) ||
-         (width == 32));
-  ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
-  uint64_t result = value & ((1UL << width) - 1UL);
+  VIXL_ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) ||
+              (width == 32));
+  VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+  uint64_t result = value & ((UINT64_C(1) << width) - 1);
   for (unsigned i = width; i < reg_size; i *= 2) {
     result |= (result << i);
   }
@@ -84,7 +84,7 @@
     if (imm_s == 0x3F) {
       return 0;
     }
-    uint64_t bits = (1UL << (imm_s + 1)) - 1;
+    uint64_t bits = (UINT64_C(1) << (imm_s + 1)) - 1;
     return RotateRight(bits, imm_r, 64);
   } else {
     if ((imm_s >> 1) == 0x1F) {
@@ -96,14 +96,14 @@
         if ((imm_s & mask) == mask) {
           return 0;
         }
-        uint64_t bits = (1UL << ((imm_s & mask) + 1)) - 1;
+        uint64_t bits = (UINT64_C(1) << ((imm_s & mask) + 1)) - 1;
         return RepeatBitsAcrossReg(reg_size,
                                    RotateRight(bits, imm_r & mask, width),
                                    width);
       }
     }
   }
-  UNREACHABLE();
+  VIXL_UNREACHABLE();
   return 0;
 }
 
@@ -155,7 +155,7 @@
     offset = ImmPCRel();
   } else {
     // All PC-relative branches.
-    ASSERT(BranchType() != UnknownBranchType);
+    VIXL_ASSERT(BranchType() != UnknownBranchType);
     // Relative branch offsets are instruction-size-aligned.
     offset = ImmBranch() << kInstructionSizeLog2;
   }
@@ -169,7 +169,7 @@
     case UncondBranchType: return ImmUncondBranch();
     case CompareBranchType: return ImmCmpBranch();
     case TestBranchType: return ImmTestBranch();
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   return 0;
 }
@@ -186,7 +186,7 @@
 
 void Instruction::SetPCRelImmTarget(Instruction* target) {
   // ADRP is not supported, so 'this' must point to an ADR instruction.
-  ASSERT(Mask(PCRelAddressingMask) == ADR);
+  VIXL_ASSERT(Mask(PCRelAddressingMask) == ADR);
 
   Instr imm = Assembler::ImmPCRelAddress(target - this);
 
@@ -195,7 +195,7 @@
 
 
 void Instruction::SetBranchImmTarget(Instruction* target) {
-  ASSERT(((target - this) & 3) == 0);
+  VIXL_ASSERT(((target - this) & 3) == 0);
   Instr branch_imm = 0;
   uint32_t imm_mask = 0;
   int offset = (target - this) >> kInstructionSizeLog2;
@@ -220,14 +220,14 @@
       imm_mask = ImmTestBranch_mask;
       break;
     }
-    default: UNREACHABLE();
+    default: VIXL_UNREACHABLE();
   }
   SetInstructionBits(Mask(~imm_mask) | branch_imm);
 }
 
 
 void Instruction::SetImmLLiteral(Instruction* source) {
-  ASSERT(((source - this) & 3) == 0);
+  VIXL_ASSERT(((source - this) & 3) == 0);
   int offset = (source - this) >> kLiteralEntrySizeLog2;
   Instr imm = Assembler::ImmLLiteral(offset);
   Instr mask = ImmLLiteral_mask;
diff --git a/disas/libvixl/a64/instructions-a64.h b/disas/libvixl/a64/instructions-a64.h
index ba9068c..a4240d7 100644
--- a/disas/libvixl/a64/instructions-a64.h
+++ b/disas/libvixl/a64/instructions-a64.h
@@ -44,30 +44,36 @@
 const unsigned kWRegSize = 32;
 const unsigned kWRegSizeLog2 = 5;
 const unsigned kWRegSizeInBytes = kWRegSize / 8;
+const unsigned kWRegSizeInBytesLog2 = kWRegSizeLog2 - 3;
 const unsigned kXRegSize = 64;
 const unsigned kXRegSizeLog2 = 6;
 const unsigned kXRegSizeInBytes = kXRegSize / 8;
+const unsigned kXRegSizeInBytesLog2 = kXRegSizeLog2 - 3;
 const unsigned kSRegSize = 32;
 const unsigned kSRegSizeLog2 = 5;
 const unsigned kSRegSizeInBytes = kSRegSize / 8;
+const unsigned kSRegSizeInBytesLog2 = kSRegSizeLog2 - 3;
 const unsigned kDRegSize = 64;
 const unsigned kDRegSizeLog2 = 6;
 const unsigned kDRegSizeInBytes = kDRegSize / 8;
-const int64_t kWRegMask = 0x00000000ffffffffLL;
-const int64_t kXRegMask = 0xffffffffffffffffLL;
-const int64_t kSRegMask = 0x00000000ffffffffLL;
-const int64_t kDRegMask = 0xffffffffffffffffLL;
-const int64_t kXSignMask = 0x1LL << 63;
-const int64_t kWSignMask = 0x1LL << 31;
-const int64_t kByteMask = 0xffL;
-const int64_t kHalfWordMask = 0xffffL;
-const int64_t kWordMask = 0xffffffffLL;
-const uint64_t kXMaxUInt = 0xffffffffffffffffULL;
-const uint64_t kWMaxUInt = 0xffffffffULL;
-const int64_t kXMaxInt = 0x7fffffffffffffffLL;
-const int64_t kXMinInt = 0x8000000000000000LL;
-const int32_t kWMaxInt = 0x7fffffff;
-const int32_t kWMinInt = 0x80000000;
+const unsigned kDRegSizeInBytesLog2 = kDRegSizeLog2 - 3;
+const uint64_t kWRegMask = UINT64_C(0xffffffff);
+const uint64_t kXRegMask = UINT64_C(0xffffffffffffffff);
+const uint64_t kSRegMask = UINT64_C(0xffffffff);
+const uint64_t kDRegMask = UINT64_C(0xffffffffffffffff);
+const uint64_t kSSignMask = UINT64_C(0x80000000);
+const uint64_t kDSignMask = UINT64_C(0x8000000000000000);
+const uint64_t kWSignMask = UINT64_C(0x80000000);
+const uint64_t kXSignMask = UINT64_C(0x8000000000000000);
+const uint64_t kByteMask = UINT64_C(0xff);
+const uint64_t kHalfWordMask = UINT64_C(0xffff);
+const uint64_t kWordMask = UINT64_C(0xffffffff);
+const uint64_t kXMaxUInt = UINT64_C(0xffffffffffffffff);
+const uint64_t kWMaxUInt = UINT64_C(0xffffffff);
+const int64_t kXMaxInt = INT64_C(0x7fffffffffffffff);
+const int64_t kXMinInt = INT64_C(0x8000000000000000);
+const int32_t kWMaxInt = INT32_C(0x7fffffff);
+const int32_t kWMinInt = INT32_C(0x80000000);
 const unsigned kLinkRegCode = 30;
 const unsigned kZeroRegCode = 31;
 const unsigned kSPRegInternalCode = 63;
@@ -81,18 +87,28 @@
 
 const float kFP32PositiveInfinity = rawbits_to_float(0x7f800000);
 const float kFP32NegativeInfinity = rawbits_to_float(0xff800000);
-const double kFP64PositiveInfinity = rawbits_to_double(0x7ff0000000000000ULL);
-const double kFP64NegativeInfinity = rawbits_to_double(0xfff0000000000000ULL);
+const double kFP64PositiveInfinity =
+    rawbits_to_double(UINT64_C(0x7ff0000000000000));
+const double kFP64NegativeInfinity =
+    rawbits_to_double(UINT64_C(0xfff0000000000000));
 
 // This value is a signalling NaN as both a double and as a float (taking the
 // least-significant word).
-static const double kFP64SignallingNaN = rawbits_to_double(0x7ff000007f800001ULL);
+static const double kFP64SignallingNaN =
+    rawbits_to_double(UINT64_C(0x7ff000007f800001));
 static const float kFP32SignallingNaN = rawbits_to_float(0x7f800001);
 
 // A similar value, but as a quiet NaN.
-static const double kFP64QuietNaN = rawbits_to_double(0x7ff800007fc00001ULL);
+static const double kFP64QuietNaN =
+    rawbits_to_double(UINT64_C(0x7ff800007fc00001));
 static const float kFP32QuietNaN = rawbits_to_float(0x7fc00001);
 
+// The default NaN values (for FPCR.DN=1).
+static const double kFP64DefaultNaN =
+    rawbits_to_double(UINT64_C(0x7ff8000000000000));
+static const float kFP32DefaultNaN = rawbits_to_float(0x7fc00000);
+
+
 enum LSDataSize {
   LSByte        = 0,
   LSHalfword    = 1,
@@ -325,7 +341,7 @@
   }
 
   inline Instruction* InstructionAtOffset(int64_t offset) {
-    ASSERT(IsWordAligned(this + offset));
+    VIXL_ASSERT(IsWordAligned(this + offset));
     return this + offset;
   }
 
diff --git a/disas/libvixl/globals.h b/disas/libvixl/globals.h
index a6a3fcc..e28dc66 100644
--- a/disas/libvixl/globals.h
+++ b/disas/libvixl/globals.h
@@ -27,8 +27,20 @@
 #ifndef VIXL_GLOBALS_H
 #define VIXL_GLOBALS_H
 
-// Get the standard printf format macros for C99 stdint types.
+// Get standard C99 macros for integer types.
+#ifndef __STDC_CONSTANT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+
+#ifndef __STDC_FORMAT_MACROS
 #define __STDC_FORMAT_MACROS
+#endif
+
+#include <stdint.h>
 #include <inttypes.h>
 
 #include <assert.h>
@@ -45,21 +57,29 @@
 const int KBytes = 1024;
 const int MBytes = 1024 * KBytes;
 
-  #define ABORT() printf("in %s, line %i", __FILE__, __LINE__); abort()
+#define VIXL_ABORT() printf("in %s, line %i", __FILE__, __LINE__); abort()
 #ifdef DEBUG
-  #define ASSERT(condition) assert(condition)
-  #define CHECK(condition) ASSERT(condition)
-  #define UNIMPLEMENTED() printf("UNIMPLEMENTED\t"); ABORT()
-  #define UNREACHABLE() printf("UNREACHABLE\t"); ABORT()
+  #define VIXL_ASSERT(condition) assert(condition)
+  #define VIXL_CHECK(condition) VIXL_ASSERT(condition)
+  #define VIXL_UNIMPLEMENTED() printf("UNIMPLEMENTED\t"); VIXL_ABORT()
+  #define VIXL_UNREACHABLE() printf("UNREACHABLE\t"); VIXL_ABORT()
 #else
-  #define ASSERT(condition) ((void) 0)
-  #define CHECK(condition) assert(condition)
-  #define UNIMPLEMENTED() ((void) 0)
-  #define UNREACHABLE() ((void) 0)
+  #define VIXL_ASSERT(condition) ((void) 0)
+  #define VIXL_CHECK(condition) assert(condition)
+  #define VIXL_UNIMPLEMENTED() ((void) 0)
+  #define VIXL_UNREACHABLE() ((void) 0)
 #endif
+// This is not as powerful as template based assertions, but it is simple.
+// It assumes that the descriptions are unique. If this starts being a problem,
+// we can switch to a different implemention.
+#define VIXL_CONCAT(a, b) a##b
+#define VIXL_STATIC_ASSERT_LINE(line, condition) \
+  typedef char VIXL_CONCAT(STATIC_ASSERT_LINE_, line)[(condition) ? 1 : -1] \
+  __attribute__((unused))
+#define VIXL_STATIC_ASSERT(condition) VIXL_STATIC_ASSERT_LINE(__LINE__, condition) //NOLINT
 
 template <typename T> inline void USE(T) {}
 
-#define ALIGNMENT_EXCEPTION() printf("ALIGNMENT EXCEPTION\t"); ABORT()
+#define VIXL_ALIGNMENT_EXCEPTION() printf("ALIGNMENT EXCEPTION\t"); VIXL_ABORT()
 
 #endif  // VIXL_GLOBALS_H
diff --git a/disas/libvixl/platform.h b/disas/libvixl/platform.h
index a2600f3..b5c2085 100644
--- a/disas/libvixl/platform.h
+++ b/disas/libvixl/platform.h
@@ -34,9 +34,7 @@
 // Currently we assume running the simulator implies running on x86 hardware.
 inline void HostBreakpoint() { asm("int3"); }
 #else
-inline void HostBreakpoint() {
-  // TODO: Implement HostBreakpoint on a64.
-}
+inline void HostBreakpoint() { asm("brk"); }
 #endif
 }  // namespace vixl
 
diff --git a/disas/libvixl/utils.cc b/disas/libvixl/utils.cc
index a45fb95..c9c05d1 100644
--- a/disas/libvixl/utils.cc
+++ b/disas/libvixl/utils.cc
@@ -58,9 +58,9 @@
 
 
 int CountLeadingZeros(uint64_t value, int width) {
-  ASSERT((width == 32) || (width == 64));
+  VIXL_ASSERT((width == 32) || (width == 64));
   int count = 0;
-  uint64_t bit_test = 1UL << (width - 1);
+  uint64_t bit_test = UINT64_C(1) << (width - 1);
   while ((count < width) && ((bit_test & value) == 0)) {
     count++;
     bit_test >>= 1;
@@ -70,7 +70,7 @@
 
 
 int CountLeadingSignBits(int64_t value, int width) {
-  ASSERT((width == 32) || (width == 64));
+  VIXL_ASSERT((width == 32) || (width == 64));
   if (value >= 0) {
     return CountLeadingZeros(value, width) - 1;
   } else {
@@ -80,7 +80,7 @@
 
 
 int CountTrailingZeros(uint64_t value, int width) {
-  ASSERT((width == 32) || (width == 64));
+  VIXL_ASSERT((width == 32) || (width == 64));
   int count = 0;
   while ((count < width) && (((value >> count) & 1) == 0)) {
     count++;
@@ -92,10 +92,10 @@
 int CountSetBits(uint64_t value, int width) {
   // TODO: Other widths could be added here, as the implementation already
   // supports them.
-  ASSERT((width == 32) || (width == 64));
+  VIXL_ASSERT((width == 32) || (width == 64));
 
   // Mask out unused bits to ensure that they are not counted.
-  value &= (0xffffffffffffffffULL >> (64-width));
+  value &= (UINT64_C(0xffffffffffffffff) >> (64-width));
 
   // Add up the set bits.
   // The algorithm works by adding pairs of bit fields together iteratively,
@@ -108,18 +108,19 @@
   // value =   h+g+f+e     d+c+b+a
   //                  \          |
   // value =       h+g+f+e+d+c+b+a
-  value = ((value >> 1) & 0x5555555555555555ULL) +
-           (value & 0x5555555555555555ULL);
-  value = ((value >> 2) & 0x3333333333333333ULL) +
-           (value & 0x3333333333333333ULL);
-  value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL) +
-           (value & 0x0f0f0f0f0f0f0f0fULL);
-  value = ((value >> 8) & 0x00ff00ff00ff00ffULL) +
-           (value & 0x00ff00ff00ff00ffULL);
-  value = ((value >> 16) & 0x0000ffff0000ffffULL) +
-           (value & 0x0000ffff0000ffffULL);
-  value = ((value >> 32) & 0x00000000ffffffffULL) +
-           (value & 0x00000000ffffffffULL);
+  const uint64_t kMasks[] = {
+    UINT64_C(0x5555555555555555),
+    UINT64_C(0x3333333333333333),
+    UINT64_C(0x0f0f0f0f0f0f0f0f),
+    UINT64_C(0x00ff00ff00ff00ff),
+    UINT64_C(0x0000ffff0000ffff),
+    UINT64_C(0x00000000ffffffff),
+  };
+
+  for (unsigned i = 0; i < (sizeof(kMasks) / sizeof(kMasks[0])); i++) {
+    int shift = 1 << i;
+    value = ((value >> shift) & kMasks[i]) + (value & kMasks[i]);
+  }
 
   return value;
 }
diff --git a/disas/libvixl/utils.h b/disas/libvixl/utils.h
index 029341e..83c928c 100644
--- a/disas/libvixl/utils.h
+++ b/disas/libvixl/utils.h
@@ -27,7 +27,7 @@
 #ifndef VIXL_UTILS_H
 #define VIXL_UTILS_H
 
-
+#include <math.h>
 #include <string.h>
 #include "globals.h"
 
@@ -35,19 +35,19 @@
 
 // Check number width.
 inline bool is_intn(unsigned n, int64_t x) {
-  ASSERT((0 < n) && (n < 64));
-  int64_t limit = 1ULL << (n - 1);
+  VIXL_ASSERT((0 < n) && (n < 64));
+  int64_t limit = INT64_C(1) << (n - 1);
   return (-limit <= x) && (x < limit);
 }
 
 inline bool is_uintn(unsigned n, int64_t x) {
-  ASSERT((0 < n) && (n < 64));
+  VIXL_ASSERT((0 < n) && (n < 64));
   return !(x >> n);
 }
 
 inline unsigned truncate_to_intn(unsigned n, int64_t x) {
-  ASSERT((0 < n) && (n < 64));
-  return (x & ((1ULL << n) - 1));
+  VIXL_ASSERT((0 < n) && (n < 64));
+  return (x & ((INT64_C(1) << n) - 1));
 }
 
 #define INT_1_TO_63_LIST(V)                                                    \
@@ -90,13 +90,67 @@
   return (x << (63 - msb)) >> (lsb + 63 - msb);
 }
 
-// floating point representation
+// Floating point representation.
 uint32_t float_to_rawbits(float value);
 uint64_t double_to_rawbits(double value);
 float rawbits_to_float(uint32_t bits);
 double rawbits_to_double(uint64_t bits);
 
-// Bits counting.
+
+// NaN tests.
+inline bool IsSignallingNaN(double num) {
+  const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
+  uint64_t raw = double_to_rawbits(num);
+  if (isnan(num) && ((raw & kFP64QuietNaNMask) == 0)) {
+    return true;
+  }
+  return false;
+}
+
+
+inline bool IsSignallingNaN(float num) {
+  const uint32_t kFP32QuietNaNMask = 0x00400000;
+  uint32_t raw = float_to_rawbits(num);
+  if (isnan(num) && ((raw & kFP32QuietNaNMask) == 0)) {
+    return true;
+  }
+  return false;
+}
+
+
+template <typename T>
+inline bool IsQuietNaN(T num) {
+  return isnan(num) && !IsSignallingNaN(num);
+}
+
+
+// Convert the NaN in 'num' to a quiet NaN.
+inline double ToQuietNaN(double num) {
+  const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
+  VIXL_ASSERT(isnan(num));
+  return rawbits_to_double(double_to_rawbits(num) | kFP64QuietNaNMask);
+}
+
+
+inline float ToQuietNaN(float num) {
+  const uint32_t kFP32QuietNaNMask = 0x00400000;
+  VIXL_ASSERT(isnan(num));
+  return rawbits_to_float(float_to_rawbits(num) | kFP32QuietNaNMask);
+}
+
+
+// Fused multiply-add.
+inline double FusedMultiplyAdd(double op1, double op2, double a) {
+  return fma(op1, op2, a);
+}
+
+
+inline float FusedMultiplyAdd(float op1, float op2, float a) {
+  return fmaf(op1, op2, a);
+}
+
+
+// Bit counting.
 int CountLeadingZeros(uint64_t value, int width);
 int CountLeadingSignBits(int64_t value, int width);
 int CountTrailingZeros(uint64_t value, int width);
@@ -106,20 +160,30 @@
 // TODO: rename/refactor to make it specific to instructions.
 template<typename T>
 bool IsWordAligned(T pointer) {
-  ASSERT(sizeof(pointer) == sizeof(intptr_t));   // NOLINT(runtime/sizeof)
+  VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t));   // NOLINT(runtime/sizeof)
   return (reinterpret_cast<intptr_t>(pointer) & 3) == 0;
 }
 
 // Increment a pointer until it has the specified alignment.
 template<class T>
 T AlignUp(T pointer, size_t alignment) {
-  ASSERT(sizeof(pointer) == sizeof(uintptr_t));
+  VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(uintptr_t));
   uintptr_t pointer_raw = reinterpret_cast<uintptr_t>(pointer);
   size_t align_step = (alignment - pointer_raw) % alignment;
-  ASSERT((pointer_raw + align_step) % alignment == 0);
+  VIXL_ASSERT((pointer_raw + align_step) % alignment == 0);
   return reinterpret_cast<T>(pointer_raw + align_step);
 }
 
+// Decrement a pointer until it has the specified alignment.
+template<class T>
+T AlignDown(T pointer, size_t alignment) {
+  VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(uintptr_t));
+  uintptr_t pointer_raw = reinterpret_cast<uintptr_t>(pointer);
+  size_t align_step = pointer_raw % alignment;
+  VIXL_ASSERT((pointer_raw - align_step) % alignment == 0);
+  return reinterpret_cast<T>(pointer_raw - align_step);
+}
+
 
 }  // namespace vixl
 
diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index 46b9f1e..24231e5 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -134,7 +134,6 @@
     .name = "highbank-regs",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(regs, HighbankRegsState, NUM_REGS),
         VMSTATE_END_OF_LIST(),
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
index de54201..2a27a19d 100644
--- a/hw/arm/musicpal.c
+++ b/hw/arm/musicpal.c
@@ -405,7 +405,6 @@
     .name = "mv88w8618_eth",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(smir, mv88w8618_eth_state),
         VMSTATE_UINT32(icr, mv88w8618_eth_state),
@@ -642,7 +641,6 @@
     .name = "musicpal_lcd",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(brightness, musicpal_lcd_state),
         VMSTATE_UINT32(mode, musicpal_lcd_state),
@@ -769,7 +767,6 @@
     .name = "mv88w8618_pic",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(level, mv88w8618_pic_state),
         VMSTATE_UINT32(enabled, mv88w8618_pic_state),
@@ -940,7 +937,6 @@
     .name = "timer",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_PTIMER(ptimer, mv88w8618_timer_state),
         VMSTATE_UINT32(limit, mv88w8618_timer_state),
@@ -952,7 +948,6 @@
     .name = "mv88w8618_pit",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT_ARRAY(timer, mv88w8618_pit_state, 4, 1,
                              mv88w8618_timer_vmsd, mv88w8618_timer_state),
@@ -1041,7 +1036,6 @@
     .name = "mv88w8618_flashcfg",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(cfgr0, mv88w8618_flashcfg_state),
         VMSTATE_END_OF_LIST()
@@ -1381,7 +1375,6 @@
     .name = "musicpal_gpio",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(lcd_brightness, musicpal_gpio_state),
         VMSTATE_UINT32(out_state, musicpal_gpio_state),
@@ -1548,7 +1541,6 @@
     .name = "musicpal_key",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(kbd_extended, musicpal_key_state),
         VMSTATE_UINT32(pressed_keys, musicpal_key_state),
diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c
index b433748..b28e052 100644
--- a/hw/arm/omap1.c
+++ b/hw/arm/omap1.c
@@ -2709,8 +2709,8 @@
             s->ti += ti[1];
         } else {
             /* A less accurate version */
-            s->ti -= (s->current_tm.tm_year % 100) * 31536000;
-            s->ti += from_bcd(value) * 31536000;
+            s->ti -= (time_t)(s->current_tm.tm_year % 100) * 31536000;
+            s->ti += (time_t)from_bcd(value) * 31536000;
         }
         return;
 
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index e0cd847..2d28a11 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -148,8 +148,7 @@
     .name = "pxa2xx_pm",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(pm_regs, PXA2xxState, 0x40),
         VMSTATE_END_OF_LIST()
     }
@@ -215,8 +214,7 @@
     .name = "pxa2xx_cm",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(cm_regs, PXA2xxState, 4),
         VMSTATE_UINT32(clkcfg, PXA2xxState),
         VMSTATE_UINT32(pmnc, PXA2xxState),
@@ -440,8 +438,7 @@
     .name = "pxa2xx_mm",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(mm_regs, PXA2xxState, 0x1a),
         VMSTATE_END_OF_LIST()
     }
@@ -1172,7 +1169,6 @@
     .name = "pxa2xx_rtc",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .pre_save = pxa2xx_rtc_pre_save,
     .post_load = pxa2xx_rtc_post_load,
     .fields = (VMStateField[]) {
@@ -1436,8 +1432,7 @@
     .name = "pxa2xx_i2c_slave",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_I2C_SLAVE(parent_obj, PXA2xxI2CSlaveState),
         VMSTATE_END_OF_LIST()
     }
@@ -1447,8 +1442,7 @@
     .name = "pxa2xx_i2c",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT16(control, PXA2xxI2CState),
         VMSTATE_UINT16(status, PXA2xxI2CState),
         VMSTATE_UINT8(ibmr, PXA2xxI2CState),
@@ -1705,8 +1699,7 @@
     .name = "pxa2xx_i2s",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(control, PXA2xxI2SState, 2),
         VMSTATE_UINT32(status, PXA2xxI2SState),
         VMSTATE_UINT32(mask, PXA2xxI2SState),
diff --git a/hw/arm/pxa2xx_gpio.c b/hw/arm/pxa2xx_gpio.c
index 0727428..7f75f05 100644
--- a/hw/arm/pxa2xx_gpio.c
+++ b/hw/arm/pxa2xx_gpio.c
@@ -313,8 +313,7 @@
     .name = "pxa2xx-gpio",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_INT32(lines, PXA2xxGPIOInfo),
         VMSTATE_UINT32_ARRAY(ilevel, PXA2xxGPIOInfo, PXA2XX_GPIO_BANKS),
         VMSTATE_UINT32_ARRAY(olevel, PXA2xxGPIOInfo, PXA2XX_GPIO_BANKS),
diff --git a/hw/arm/pxa2xx_pic.c b/hw/arm/pxa2xx_pic.c
index d37fb54..9cfc714 100644
--- a/hw/arm/pxa2xx_pic.c
+++ b/hw/arm/pxa2xx_pic.c
@@ -296,7 +296,6 @@
     .name = "pxa2xx_pic",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .post_load = pxa2xx_pic_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(int_enabled, PXA2xxPICState, 2),
diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c
index 392ca84..a179c1d 100644
--- a/hw/arm/spitz.c
+++ b/hw/arm/spitz.c
@@ -1006,8 +1006,7 @@
     .name = "sl-nand",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
-    .fields = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT8(ctl, SLNANDState),
         VMSTATE_STRUCT(ecc, SLNANDState, 0, vmstate_ecc_state, ECCState),
         VMSTATE_END_OF_LIST(),
@@ -1041,9 +1040,8 @@
     .name = "spitz-keyboard",
     .version_id = 1,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .post_load = spitz_keyboard_post_load,
-    .fields = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT16(sense_state, SpitzKeyboardState),
         VMSTATE_UINT16(strobe_state, SpitzKeyboardState),
         VMSTATE_UNUSED_TEST(is_version_0, 5),
@@ -1076,8 +1074,7 @@
     .name = "corgi-ssp",
     .version_id = 2,
     .minimum_version_id = 2,
-    .minimum_version_id_old = 2,
-    .fields = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_SSI_SLAVE(ssidev, CorgiSSPState),
         VMSTATE_UINT32_ARRAY(enable, CorgiSSPState, 3),
         VMSTATE_END_OF_LIST(),
@@ -1105,8 +1102,7 @@
     .name = "spitz-lcdtg",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_SSI_SLAVE(ssidev, SpitzLCDTG),
         VMSTATE_UINT32(bl_intensity, SpitzLCDTG),
         VMSTATE_UINT32(bl_power, SpitzLCDTG),
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index d6cc77b..a2095c0 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -185,12 +185,19 @@
     case 0x44: /* TBPMR */
         return s->match_prescale[1];
     case 0x48: /* TAR */
-        if (s->control == 1)
+        if (s->config == 1) {
             return s->rtc;
+        }
+        qemu_log_mask(LOG_UNIMP,
+                      "GPTM: read of TAR but timer read not supported");
+        return 0;
     case 0x4c: /* TBR */
-        hw_error("TODO: Timer value read\n");
+        qemu_log_mask(LOG_UNIMP,
+                      "GPTM: read of TBR but timer read not supported");
+        return 0;
     default:
-        hw_error("gptm_read: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "GPTM: read at bad offset 0x%x\n", (int)offset);
         return 0;
     }
 }
@@ -286,8 +293,7 @@
     .name = "stellaris_gptm",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(config, gptm_state),
         VMSTATE_UINT32_ARRAY(mode, gptm_state, 2),
         VMSTATE_UINT32(control, gptm_state),
@@ -643,9 +649,8 @@
     .name = "stellaris_sys",
     .version_id = 2,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .post_load = stellaris_sys_post_load,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(pborctl, ssys_state),
         VMSTATE_UINT32(ldopctl, ssys_state),
         VMSTATE_UINT32(int_mask, ssys_state),
@@ -851,8 +856,7 @@
     .name = "stellaris_i2c",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(msa, stellaris_i2c_state),
         VMSTATE_UINT32(mcs, stellaris_i2c_state),
         VMSTATE_UINT32(mdr, stellaris_i2c_state),
@@ -1121,8 +1125,7 @@
     .name = "stellaris_adc",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(actss, stellaris_adc_state),
         VMSTATE_UINT32(ris, stellaris_adc_state),
         VMSTATE_UINT32(im, stellaris_adc_state),
diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c
index 170d0ce..0da9015 100644
--- a/hw/arm/strongarm.c
+++ b/hw/arm/strongarm.c
@@ -199,7 +199,6 @@
     .name = "strongarm_pic",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .post_load = strongarm_pic_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(pending, StrongARMPICState),
@@ -424,7 +423,6 @@
     .name = "strongarm-rtc",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .pre_save = strongarm_rtc_pre_save,
     .post_load = strongarm_rtc_post_load,
     .fields = (VMStateField[]) {
@@ -670,7 +668,6 @@
     .name = "strongarm-gpio",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(ilevel, StrongARMGPIOInfo),
         VMSTATE_UINT32(olevel, StrongARMGPIOInfo),
@@ -842,7 +839,6 @@
     .name = "strongarm-ppc",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(ilevel, StrongARMPPCInfo),
         VMSTATE_UINT32(olevel, StrongARMPPCInfo),
@@ -1293,7 +1289,6 @@
     .name = "strongarm-uart",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .post_load = strongarm_uart_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(utcr0, StrongARMUARTState),
@@ -1553,7 +1548,6 @@
     .name = "strongarm-ssp",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .post_load = strongarm_ssp_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT16_ARRAY(sscr, StrongARMSSPState, 2),
diff --git a/hw/arm/z2.c b/hw/arm/z2.c
index 67c1be8..5df014b 100644
--- a/hw/arm/z2.c
+++ b/hw/arm/z2.c
@@ -164,7 +164,6 @@
     .name = "zipit-lcd",
     .version_id = 2,
     .minimum_version_id = 2,
-    .minimum_version_id_old = 2,
     .fields = (VMStateField[]) {
         VMSTATE_SSI_SLAVE(ssidev, ZipitLCD),
         VMSTATE_INT32(selected, ZipitLCD),
@@ -275,7 +274,6 @@
     .name = "aer915",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(len, AER915State),
         VMSTATE_BUFFER(buf, AER915State),
diff --git a/hw/audio/lm4549.c b/hw/audio/lm4549.c
index d75f7ec..380ef60 100644
--- a/hw/audio/lm4549.c
+++ b/hw/audio/lm4549.c
@@ -324,9 +324,8 @@
     .name = "lm4549_state",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .post_load = &lm4549_post_load,
-    .fields      = (VMStateField[]) {
+    .post_load = lm4549_post_load,
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(voice_is_active, lm4549_state),
         VMSTATE_UINT16_ARRAY(regfile, lm4549_state, 128),
         VMSTATE_UINT16_ARRAY(buffer, lm4549_state, LM4549_BUFFER_SIZE),
diff --git a/hw/audio/marvell_88w8618.c b/hw/audio/marvell_88w8618.c
index cdce238..8699267 100644
--- a/hw/audio/marvell_88w8618.c
+++ b/hw/audio/marvell_88w8618.c
@@ -259,7 +259,6 @@
     .name = "mv88w8618_audio",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(playback_mode, mv88w8618_audio_state),
         VMSTATE_UINT32(status, mv88w8618_audio_state),
diff --git a/hw/audio/pl041.c b/hw/audio/pl041.c
index ed82be5..19982f2 100644
--- a/hw/audio/pl041.c
+++ b/hw/audio/pl041.c
@@ -561,8 +561,7 @@
     .name = "pl041_regfile",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
 #define REGISTER(name, offset) VMSTATE_UINT32(name, pl041_regfile),
         #include "pl041.hx"
 #undef REGISTER
@@ -574,8 +573,7 @@
     .name = "pl041_fifo",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(level, pl041_fifo),
         VMSTATE_UINT32_ARRAY(data, pl041_fifo, MAX_FIFO_DEPTH),
         VMSTATE_END_OF_LIST()
@@ -586,8 +584,7 @@
     .name = "pl041_channel",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_STRUCT(tx_fifo, pl041_channel, 0,
                        vmstate_pl041_fifo, pl041_fifo),
         VMSTATE_UINT8(tx_enabled, pl041_channel),
diff --git a/hw/audio/wm8750.c b/hw/audio/wm8750.c
index c18f245..b50b331 100644
--- a/hw/audio/wm8750.c
+++ b/hw/audio/wm8750.c
@@ -583,10 +583,9 @@
     .name = CODEC,
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .pre_save = wm8750_pre_save,
     .post_load = wm8750_post_load,
-    .fields      = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT8_ARRAY(i2c_data, WM8750State, 2),
         VMSTATE_INT32(i2c_len, WM8750State),
         VMSTATE_INT32(enable, WM8750State),
diff --git a/hw/block/ecc.c b/hw/block/ecc.c
index 8c888cc..10bb233 100644
--- a/hw/block/ecc.c
+++ b/hw/block/ecc.c
@@ -81,8 +81,7 @@
     .name = "ecc-state",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
-    .fields = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT8(cp, ECCState),
         VMSTATE_UINT16_ARRAY(lp, ECCState, 2),
         VMSTATE_UINT16(count, ECCState),
diff --git a/hw/block/nand.c b/hw/block/nand.c
index 6d7c804..38eefd4 100644
--- a/hw/block/nand.c
+++ b/hw/block/nand.c
@@ -346,10 +346,9 @@
     .name = "nand",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .pre_save = nand_pre_save,
     .post_load = nand_post_load,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT8(cle, NANDFlashState),
         VMSTATE_UINT8(ale, NANDFlashState),
         VMSTATE_UINT8(ce, NANDFlashState),
diff --git a/hw/block/onenand.c b/hw/block/onenand.c
index aae9ee7..60d5311 100644
--- a/hw/block/onenand.c
+++ b/hw/block/onenand.c
@@ -169,7 +169,6 @@
     .name = "onenand",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .pre_save = onenand_pre_save,
     .post_load = onenand_post_load,
     .fields = (VMStateField[]) {
diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c
index 1012f1a..bf0c853 100644
--- a/hw/char/cadence_uart.c
+++ b/hw/char/cadence_uart.c
@@ -504,7 +504,6 @@
     .name = "cadence_uart",
     .version_id = 2,
     .minimum_version_id = 2,
-    .minimum_version_id_old = 2,
     .post_load = cadence_uart_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(r, UartState, R_MAX),
diff --git a/hw/char/digic-uart.c b/hw/char/digic-uart.c
index fd8e077..8abe944 100644
--- a/hw/char/digic-uart.c
+++ b/hw/char/digic-uart.c
@@ -162,7 +162,6 @@
     .name = "digic-uart",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(reg_rx, DigicUartState),
         VMSTATE_UINT32(reg_st, DigicUartState),
diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c
index 19b59cc..7614e58 100644
--- a/hw/char/exynos4210_uart.c
+++ b/hw/char/exynos4210_uart.c
@@ -560,7 +560,6 @@
     .name = "exynos4210.uart.fifo",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(sp, Exynos4210UartFIFO),
         VMSTATE_UINT32(rp, Exynos4210UartFIFO),
@@ -573,7 +572,6 @@
     .name = "exynos4210.uart",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT(rx, Exynos4210UartState, 1,
                        vmstate_exynos4210_uart_fifo, Exynos4210UartFIFO),
diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
index 7f16835..f3fbc77 100644
--- a/hw/char/imx_serial.c
+++ b/hw/char/imx_serial.c
@@ -76,7 +76,6 @@
     .name = "imx-serial",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(readbuff, IMXSerialState),
         VMSTATE_UINT32(usr1, IMXSerialState),
diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index 644aad7..0a45115 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -251,8 +251,7 @@
     .name = "pl011",
     .version_id = 2,
     .minimum_version_id = 2,
-    .minimum_version_id_old = 2,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(readbuff, PL011State),
         VMSTATE_UINT32(flags, PL011State),
         VMSTATE_UINT32(lcr, PL011State),
diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c
index 85252a2..3f35369 100644
--- a/hw/display/ads7846.c
+++ b/hw/display/ads7846.c
@@ -121,9 +121,8 @@
     .name = "ads7846",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .post_load = ads7856_post_load,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_SSI_SLAVE(ssidev, ADS7846State),
         VMSTATE_INT32_ARRAY(input, ADS7846State, 8),
         VMSTATE_INT32(noise, ADS7846State),
diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c
index 9750330..45c62af 100644
--- a/hw/display/exynos4210_fimd.c
+++ b/hw/display/exynos4210_fimd.c
@@ -1845,7 +1845,7 @@
     .name = "exynos4210.fimd_window",
     .version_id = 1,
     .minimum_version_id = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(wincon, Exynos4210fimdWindow),
         VMSTATE_UINT32_ARRAY(buf_start, Exynos4210fimdWindow, 3),
         VMSTATE_UINT32_ARRAY(buf_end, Exynos4210fimdWindow, 3),
@@ -1875,7 +1875,7 @@
     .version_id = 1,
     .minimum_version_id = 1,
     .post_load = exynos4210_fimd_load,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(vidcon, Exynos4210fimdState, 4),
         VMSTATE_UINT32_ARRAY(vidtcon, Exynos4210fimdState, 4),
         VMSTATE_UINT32(shadowcon, Exynos4210fimdState),
diff --git a/hw/display/pxa2xx_lcd.c b/hw/display/pxa2xx_lcd.c
index 09cdf17..80edb70 100644
--- a/hw/display/pxa2xx_lcd.c
+++ b/hw/display/pxa2xx_lcd.c
@@ -932,8 +932,7 @@
     .name = "dma_channel",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(branch, struct DMAChannel),
         VMSTATE_UINT8(up, struct DMAChannel),
         VMSTATE_BUFFER(pbuffer, struct DMAChannel),
@@ -959,9 +958,8 @@
     .name = "pxa2xx_lcdc",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .post_load = pxa2xx_lcdc_post_load,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_INT32(irqlevel, PXA2xxLCDState),
         VMSTATE_INT32(transp, PXA2xxLCDState),
         VMSTATE_UINT32_ARRAY(control, PXA2xxLCDState, 6),
diff --git a/hw/display/ssd0303.c b/hw/display/ssd0303.c
index c2eea04..f6804fb 100644
--- a/hw/display/ssd0303.c
+++ b/hw/display/ssd0303.c
@@ -272,8 +272,7 @@
     .name = "ssd0303_oled",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_INT32(row, ssd0303_state),
         VMSTATE_INT32(col, ssd0303_state),
         VMSTATE_INT32(start_line, ssd0303_state),
diff --git a/hw/dma/omap_dma.c b/hw/dma/omap_dma.c
index 0e8cccd..0f35c42 100644
--- a/hw/dma/omap_dma.c
+++ b/hw/dma/omap_dma.c
@@ -973,7 +973,7 @@
 
     case 0x22:	/* DMA_COLOR_U */
         ch->color &= 0xffff;
-        ch->color |= value << 16;
+        ch->color |= (uint32_t)value << 16;
         break;
 
     case 0x24:	/* DMA_CCR2 */
@@ -1043,7 +1043,7 @@
 
     case 0xbca:	/* TOP_B1_U */
         s->src_f1_top &= 0x0000ffff;
-        s->src_f1_top |= value << 16;
+        s->src_f1_top |= (uint32_t)value << 16;
         break;
 
     case 0xbcc:	/* BOT_B1_L */
@@ -1265,7 +1265,7 @@
 
     case 0x304:	/* SYS_DMA_LCD_TOP_F1_U */
         s->src_f1_top &= 0x0000ffff;
-        s->src_f1_top |= value << 16;
+        s->src_f1_top |= (uint32_t)value << 16;
         break;
 
     case 0x306:	/* SYS_DMA_LCD_BOT_F1_L */
@@ -1275,7 +1275,7 @@
 
     case 0x308:	/* SYS_DMA_LCD_BOT_F1_U */
         s->src_f1_bottom &= 0x0000ffff;
-        s->src_f1_bottom |= value << 16;
+        s->src_f1_bottom |= (uint32_t)value << 16;
         break;
 
     case 0x30a:	/* SYS_DMA_LCD_TOP_F2_L */
@@ -1285,7 +1285,7 @@
 
     case 0x30c:	/* SYS_DMA_LCD_TOP_F2_U */
         s->src_f2_top &= 0x0000ffff;
-        s->src_f2_top |= value << 16;
+        s->src_f2_top |= (uint32_t)value << 16;
         break;
 
     case 0x30e:	/* SYS_DMA_LCD_BOT_F2_L */
@@ -1295,7 +1295,7 @@
 
     case 0x310:	/* SYS_DMA_LCD_BOT_F2_U */
         s->src_f2_bottom &= 0x0000ffff;
-        s->src_f2_bottom |= value << 16;
+        s->src_f2_bottom |= (uint32_t)value << 16;
         break;
 
     default:
diff --git a/hw/dma/pl330.c b/hw/dma/pl330.c
index 608a58c..6b6eaae 100644
--- a/hw/dma/pl330.c
+++ b/hw/dma/pl330.c
@@ -138,7 +138,6 @@
     .name = "pl330_chan",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(src, PL330Chan),
         VMSTATE_UINT32(dst, PL330Chan),
@@ -170,7 +169,6 @@
     .name = "pl330_chan",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_VBUFFER_UINT32(buf, PL330Fifo, 1, NULL, 0, buf_size),
         VMSTATE_VBUFFER_UINT32(tag, PL330Fifo, 1, NULL, 0, buf_size),
@@ -195,7 +193,6 @@
     .name = "pl330_queue_entry",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(addr, PL330QueueEntry),
         VMSTATE_UINT32(len, PL330QueueEntry),
@@ -218,7 +215,6 @@
     .name = "pl330_queue",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT_VARRAY_UINT32(queue, PL330Queue, queue_size, 1,
                                  vmstate_pl330_queue_entry, PL330QueueEntry),
@@ -279,7 +275,6 @@
     .name = "pl330",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT(manager, PL330State, 0, vmstate_pl330_chan, PL330Chan),
         VMSTATE_STRUCT_VARRAY_UINT32(chan, PL330State, num_chnls, 0,
diff --git a/hw/dma/pxa2xx_dma.c b/hw/dma/pxa2xx_dma.c
index c013abb..d4501fb 100644
--- a/hw/dma/pxa2xx_dma.c
+++ b/hw/dma/pxa2xx_dma.c
@@ -514,7 +514,6 @@
     .name = "pxa2xx_dma_chan",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(descr, PXA2xxDMAChannel),
         VMSTATE_UINT32(src, PXA2xxDMAChannel),
@@ -530,7 +529,6 @@
     .name = "pxa2xx_dma",
     .version_id = 1,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .fields = (VMStateField[]) {
         VMSTATE_UNUSED_TEST(is_version_0, 4),
         VMSTATE_UINT32(stopintr, PXA2xxDMAState),
diff --git a/hw/gpio/max7310.c b/hw/gpio/max7310.c
index cfcd89c..7fbf313 100644
--- a/hw/gpio/max7310.c
+++ b/hw/gpio/max7310.c
@@ -152,8 +152,7 @@
     .name = "max7310",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
-    .fields      = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_INT32(i2c_command_byte, MAX7310State),
         VMSTATE_INT32(len, MAX7310State),
         VMSTATE_UINT8(level, MAX7310State),
diff --git a/hw/gpio/zaurus.c b/hw/gpio/zaurus.c
index 8e2ce04..9408342 100644
--- a/hw/gpio/zaurus.c
+++ b/hw/gpio/zaurus.c
@@ -216,9 +216,8 @@
     .name = "scoop",
     .version_id = 1,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .post_load = scoop_post_load,
-    .fields = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT16(status, ScoopInfo),
         VMSTATE_UINT16(power, ScoopInfo),
         VMSTATE_UINT32(gpio_level, ScoopInfo),
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index e55421a..de33657 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1300,6 +1300,7 @@
     if (pos != 0 && kvm_device_msix_supported(kvm_state)) {
         int bar_nr;
         uint32_t msix_table_entry;
+        uint16_t msix_max;
 
         verify_irqchip_in_kernel(&local_err);
         if (local_err) {
@@ -1315,9 +1316,10 @@
         }
         pci_dev->msix_cap = pos;
 
-        pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS,
-                     pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) &
-                     PCI_MSIX_FLAGS_QSIZE);
+        msix_max = (pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) &
+                    PCI_MSIX_FLAGS_QSIZE) + 1;
+        msix_max = MIN(msix_max, KVM_MAX_MSIX_PER_DEV);
+        pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS, msix_max - 1);
 
         /* Only enable and function mask bits are writable */
         pci_set_word(pci_dev->wmask + pos + PCI_MSIX_FLAGS,
@@ -1327,9 +1329,7 @@
         bar_nr = msix_table_entry & PCI_MSIX_FLAGS_BIRMASK;
         msix_table_entry &= ~PCI_MSIX_FLAGS_BIRMASK;
         dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
-        dev->msix_max = pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS);
-        dev->msix_max &= PCI_MSIX_FLAGS_QSIZE;
-        dev->msix_max += 1;
+        dev->msix_max = msix_max;
     }
 
     /* Minimal PM support, nothing writable, device appears to NAK changes */
@@ -1664,6 +1664,7 @@
                            MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
     if (dev->msix_table == MAP_FAILED) {
         error_setg_errno(errp, errno, "failed to allocate msix_table");
+        dev->msix_table = NULL;
         return;
     }
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 07de238..e6369d5 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -471,11 +471,12 @@
                          unsigned size)
 {
     Port92State *s = opaque;
+    int oldval = s->outport;
 
     DPRINTF("port92: write 0x%02x\n", val);
     s->outport = val;
     qemu_set_irq(*s->a20_out, (val >> 1) & 1);
-    if (val & 1) {
+    if ((val & 1) && !(oldval & 1)) {
         qemu_system_reset_request();
     }
 }
diff --git a/hw/input/lm832x.c b/hw/input/lm832x.c
index 4ae1cd9..9eb68e8 100644
--- a/hw/input/lm832x.c
+++ b/hw/input/lm832x.c
@@ -432,9 +432,8 @@
     .name = "LM8323",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .post_load = lm_kbd_post_load,
-    .fields      = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_I2C_SLAVE(parent_obj, LM823KbdState),
         VMSTATE_UINT8(i2c_dir, LM823KbdState),
         VMSTATE_UINT8(i2c_cycle, LM823KbdState),
diff --git a/hw/input/pxa2xx_keypad.c b/hw/input/pxa2xx_keypad.c
index b90b0ba..8501114 100644
--- a/hw/input/pxa2xx_keypad.c
+++ b/hw/input/pxa2xx_keypad.c
@@ -291,8 +291,7 @@
     .name = "pxa2xx_keypad",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(kpc, PXA2xxKeyPadState),
         VMSTATE_UINT32(kpdk, PXA2xxKeyPadState),
         VMSTATE_UINT32(kprec, PXA2xxKeyPadState),
diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c
index 4e40792..0609e80 100644
--- a/hw/input/stellaris_input.c
+++ b/hw/input/stellaris_input.c
@@ -51,8 +51,7 @@
     .name = "stellaris_button",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT8(pressed, gamepad_button),
         VMSTATE_END_OF_LIST()
     }
@@ -62,8 +61,7 @@
     .name = "stellaris_gamepad",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_INT32(extension, gamepad_state),
         VMSTATE_STRUCT_VARRAY_INT32(buttons, gamepad_state, num_buttons, 0,
                               vmstate_stellaris_button, gamepad_button),
diff --git a/hw/intc/allwinner-a10-pic.c b/hw/intc/allwinner-a10-pic.c
index 0924d98..de820b9 100644
--- a/hw/intc/allwinner-a10-pic.c
+++ b/hw/intc/allwinner-a10-pic.c
@@ -97,6 +97,7 @@
     switch (offset) {
     case AW_A10_PIC_BASE_ADDR:
         s->base_addr = value & ~0x3;
+        break;
     case AW_A10_PIC_PROTECT:
         s->protect = value;
         break;
@@ -141,7 +142,6 @@
     .name = "a10.pic",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(vector, AwA10PICState),
         VMSTATE_UINT32(base_addr, AwA10PICState),
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 7137653..ce3d903 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -200,7 +200,7 @@
     s->initial_count = 0;
     s->initial_count_load_time = 0;
     s->next_time = 0;
-    s->wait_for_sipi = 1;
+    s->wait_for_sipi = !cpu_is_bsp(s->cpu);
 
     if (s->timer) {
         timer_del(s->timer);
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 9aa8ab2..75d9c6e 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -443,8 +443,7 @@
     .name = "armv7m_nvic",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(systick.control, nvic_state),
         VMSTATE_UINT32(systick.reload, nvic_state),
         VMSTATE_INT64(systick.tick, nvic_state),
diff --git a/hw/intc/exynos4210_combiner.c b/hw/intc/exynos4210_combiner.c
index 3287479..a6b7028 100644
--- a/hw/intc/exynos4210_combiner.c
+++ b/hw/intc/exynos4210_combiner.c
@@ -77,7 +77,6 @@
     .name = "exynos4210.combiner.groupstate",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(src_mask, CombinerGroupState),
         VMSTATE_UINT8(src_pending, CombinerGroupState),
@@ -89,7 +88,6 @@
     .name = "exynos4210.combiner",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT_ARRAY(group, Exynos4210CombinerState, IIC_NGRP, 0,
                 vmstate_exynos4210_combiner_group_state, CombinerGroupState),
diff --git a/hw/intc/exynos4210_gic.c b/hw/intc/exynos4210_gic.c
index 5b913f7..0590d5d 100644
--- a/hw/intc/exynos4210_gic.c
+++ b/hw/intc/exynos4210_gic.c
@@ -394,7 +394,6 @@
     .name = "exynos4210.irq_gate",
     .version_id = 2,
     .minimum_version_id = 2,
-    .minimum_version_id_old = 2,
     .fields = (VMStateField[]) {
         VMSTATE_VBUFFER_UINT32(level, Exynos4210IRQGateState, 1, NULL, 0, n_in),
         VMSTATE_END_OF_LIST()
diff --git a/hw/intc/imx_avic.c b/hw/intc/imx_avic.c
index fb00e91..ec5f9ad 100644
--- a/hw/intc/imx_avic.c
+++ b/hw/intc/imx_avic.c
@@ -77,7 +77,6 @@
     .name = "imx-avic",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(pending, IMXAVICState),
         VMSTATE_UINT64(enabled, IMXAVICState),
diff --git a/hw/misc/exynos4210_pmu.c b/hw/misc/exynos4210_pmu.c
index 5ec14d1..2b118c7 100644
--- a/hw/misc/exynos4210_pmu.c
+++ b/hw/misc/exynos4210_pmu.c
@@ -471,7 +471,7 @@
     .name = "exynos4210.pmu",
     .version_id = 1,
     .minimum_version_id = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(reg, Exynos4210PmuState, PMU_NUM_OF_REGISTERS),
         VMSTATE_END_OF_LIST()
     }
diff --git a/hw/misc/imx_ccm.c b/hw/misc/imx_ccm.c
index 63e33a4..750b906 100644
--- a/hw/misc/imx_ccm.c
+++ b/hw/misc/imx_ccm.c
@@ -57,7 +57,6 @@
     .name = "imx-ccm",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(ccmr, IMXCCMState),
         VMSTATE_UINT32(pdr0, IMXCCMState),
diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c
index bba87c2..bef3651 100644
--- a/hw/misc/max111x.c
+++ b/hw/misc/max111x.c
@@ -110,8 +110,7 @@
     .name = "max111x",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_SSI_SLAVE(parent_obj, MAX111xState),
         VMSTATE_UINT8(tb1, MAX111xState),
         VMSTATE_UINT8(rb2, MAX111xState),
diff --git a/hw/misc/mst_fpga.c b/hw/misc/mst_fpga.c
index c96810f..d509079 100644
--- a/hw/misc/mst_fpga.c
+++ b/hw/misc/mst_fpga.c
@@ -219,12 +219,11 @@
 }
 
 static VMStateDescription vmstate_mst_fpga_regs = {
-	.name = "mainstone_fpga",
-	.version_id = 0,
-	.minimum_version_id = 0,
-	.minimum_version_id_old = 0,
-	.post_load = mst_fpga_post_load,
-	.fields = (VMStateField []) {
+    .name = "mainstone_fpga",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .post_load = mst_fpga_post_load,
+    .fields = (VMStateField[]) {
 		VMSTATE_UINT32(prev_level, mst_irq_state),
 		VMSTATE_UINT32(leddat1, mst_irq_state),
 		VMSTATE_UINT32(leddat2, mst_irq_state),
diff --git a/hw/misc/omap_gpmc.c b/hw/misc/omap_gpmc.c
index 2047274..cddea24 100644
--- a/hw/misc/omap_gpmc.c
+++ b/hw/misc/omap_gpmc.c
@@ -242,6 +242,10 @@
     if (bytes > s->prefetch.count) {
         bytes = s->prefetch.count;
     }
+    if (is16bit) {
+        bytes &= ~1;
+    }
+
     s->prefetch.count -= bytes;
     s->prefetch.fifopointer += bytes;
     fptr = 64 - s->prefetch.fifopointer;
diff --git a/hw/misc/tmp105.c b/hw/misc/tmp105.c
index 636ee97..f3fe8b8 100644
--- a/hw/misc/tmp105.c
+++ b/hw/misc/tmp105.c
@@ -199,9 +199,8 @@
     .name = "TMP105",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .post_load = tmp105_post_load,
-    .fields      = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT8(len, TMP105State),
         VMSTATE_UINT8_ARRAY(buf, TMP105State, 2),
         VMSTATE_UINT8(pointer, TMP105State),
diff --git a/hw/misc/zynq_slcr.c b/hw/misc/zynq_slcr.c
index 2e53a2e..964f253 100644
--- a/hw/misc/zynq_slcr.c
+++ b/hw/misc/zynq_slcr.c
@@ -428,8 +428,7 @@
     .name = "zynq_slcr",
     .version_id = 2,
     .minimum_version_id = 2,
-    .minimum_version_id_old = 2,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(regs, ZynqSLCRState, ZYNQ_SLCR_NUM_REGS),
         VMSTATE_END_OF_LIST()
     }
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index cdb1825..47e7038 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -717,7 +717,6 @@
         rxbuf_ptr = (void *)buf;
     } else {
         unsigned crc_val;
-        int      crc_offset;
 
         /* The application wants the FCS field, which QEMU does not provide.
          * We must try and caclculate one.
@@ -727,12 +726,7 @@
         memset(rxbuf + size, 0, sizeof(rxbuf) - size);
         rxbuf_ptr = rxbuf;
         crc_val = cpu_to_le32(crc32(0, rxbuf, MAX(size, 60)));
-        if (size < 60) {
-            crc_offset = 60;
-        } else {
-            crc_offset = size;
-        }
-        memcpy(rxbuf + crc_offset, &crc_val, sizeof(crc_val));
+        memcpy(rxbuf + size, &crc_val, sizeof(crc_val));
 
         bytes_to_copy += 4;
         size += 4;
@@ -1257,8 +1251,7 @@
     .name = "cadence_gem",
     .version_id = 2,
     .minimum_version_id = 2,
-    .minimum_version_id_old = 2,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(regs, GemState, GEM_MAXREG),
         VMSTATE_UINT16_ARRAY(phy_regs, GemState, 32),
         VMSTATE_UINT8(phy_loop, GemState),
diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c
index a8e29b3..d1dca8f 100644
--- a/hw/net/smc91c111.c
+++ b/hw/net/smc91c111.c
@@ -54,7 +54,7 @@
     .name = "smc91c111",
     .version_id = 1,
     .minimum_version_id = 1,
-    .fields      = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT16(tcr, smc91c111_state),
         VMSTATE_UINT16(rcr, smc91c111_state),
         VMSTATE_UINT16(cr, smc91c111_state),
diff --git a/hw/net/stellaris_enet.c b/hw/net/stellaris_enet.c
index d04e6a4..c9ee5d3 100644
--- a/hw/net/stellaris_enet.c
+++ b/hw/net/stellaris_enet.c
@@ -47,6 +47,11 @@
     OBJECT_CHECK(stellaris_enet_state, (obj), TYPE_STELLARIS_ENET)
 
 typedef struct {
+    uint8_t data[2048];
+    uint32_t len;
+} StellarisEnetRxFrame;
+
+typedef struct {
     SysBusDevice parent_obj;
 
     uint32_t ris;
@@ -59,29 +64,159 @@
     uint32_t mtxd;
     uint32_t mrxd;
     uint32_t np;
-    int tx_frame_len;
-    int tx_fifo_len;
+    uint32_t tx_fifo_len;
     uint8_t tx_fifo[2048];
     /* Real hardware has a 2k fifo, which works out to be at most 31 packets.
        We implement a full 31 packet fifo.  */
-    struct {
-        uint8_t data[2048];
-        int len;
-    } rx[31];
-    uint8_t *rx_fifo;
-    int rx_fifo_len;
-    int next_packet;
+    StellarisEnetRxFrame rx[31];
+    uint32_t rx_fifo_offset;
+    uint32_t next_packet;
     NICState *nic;
     NICConf conf;
     qemu_irq irq;
     MemoryRegion mmio;
 } stellaris_enet_state;
 
+static const VMStateDescription vmstate_rx_frame = {
+    .name = "stellaris_enet/rx_frame",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(data, StellarisEnetRxFrame, 2048),
+        VMSTATE_UINT32(len, StellarisEnetRxFrame),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static int stellaris_enet_post_load(void *opaque, int version_id)
+{
+    stellaris_enet_state *s = opaque;
+    int i;
+
+    /* Sanitize inbound state. Note that next_packet is an index but
+     * np is a size; hence their valid upper bounds differ.
+     */
+    if (s->next_packet >= ARRAY_SIZE(s->rx)) {
+        return -1;
+    }
+
+    if (s->np > ARRAY_SIZE(s->rx)) {
+        return -1;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(s->rx); i++) {
+        if (s->rx[i].len > ARRAY_SIZE(s->rx[i].data)) {
+            return -1;
+        }
+    }
+
+    if (s->rx_fifo_offset > ARRAY_SIZE(s->rx[0].data) - 4) {
+        return -1;
+    }
+
+    if (s->tx_fifo_len > ARRAY_SIZE(s->tx_fifo)) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_stellaris_enet = {
+    .name = "stellaris_enet",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .post_load = stellaris_enet_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(ris, stellaris_enet_state),
+        VMSTATE_UINT32(im, stellaris_enet_state),
+        VMSTATE_UINT32(rctl, stellaris_enet_state),
+        VMSTATE_UINT32(tctl, stellaris_enet_state),
+        VMSTATE_UINT32(thr, stellaris_enet_state),
+        VMSTATE_UINT32(mctl, stellaris_enet_state),
+        VMSTATE_UINT32(mdv, stellaris_enet_state),
+        VMSTATE_UINT32(mtxd, stellaris_enet_state),
+        VMSTATE_UINT32(mrxd, stellaris_enet_state),
+        VMSTATE_UINT32(np, stellaris_enet_state),
+        VMSTATE_UINT32(tx_fifo_len, stellaris_enet_state),
+        VMSTATE_UINT8_ARRAY(tx_fifo, stellaris_enet_state, 2048),
+        VMSTATE_STRUCT_ARRAY(rx, stellaris_enet_state, 31, 1,
+                             vmstate_rx_frame, StellarisEnetRxFrame),
+        VMSTATE_UINT32(rx_fifo_offset, stellaris_enet_state),
+        VMSTATE_UINT32(next_packet, stellaris_enet_state),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static void stellaris_enet_update(stellaris_enet_state *s)
 {
     qemu_set_irq(s->irq, (s->ris & s->im) != 0);
 }
 
+/* Return the data length of the packet currently being assembled
+ * in the TX fifo.
+ */
+static inline int stellaris_txpacket_datalen(stellaris_enet_state *s)
+{
+    return s->tx_fifo[0] | (s->tx_fifo[1] << 8);
+}
+
+/* Return true if the packet currently in the TX FIFO is complete,
+* ie the FIFO holds enough bytes for the data length, ethernet header,
+* payload and optionally CRC.
+*/
+static inline bool stellaris_txpacket_complete(stellaris_enet_state *s)
+{
+    int framelen = stellaris_txpacket_datalen(s);
+    framelen += 16;
+    if (!(s->tctl & SE_TCTL_CRC)) {
+        framelen += 4;
+    }
+    /* Cover the corner case of a 2032 byte payload with auto-CRC disabled:
+     * this requires more bytes than will fit in the FIFO. It's not totally
+     * clear how the h/w handles this, but if using threshold-based TX
+     * it will definitely try to transmit something.
+     */
+    framelen = MIN(framelen, ARRAY_SIZE(s->tx_fifo));
+    return s->tx_fifo_len >= framelen;
+}
+
+/* Return true if the TX FIFO threshold is enabled and the FIFO
+ * has filled enough to reach it.
+ */
+static inline bool stellaris_tx_thr_reached(stellaris_enet_state *s)
+{
+    return (s->thr < 0x3f &&
+            (s->tx_fifo_len >= 4 * (s->thr * 8 + 1)));
+}
+
+/* Send the packet currently in the TX FIFO */
+static void stellaris_enet_send(stellaris_enet_state *s)
+{
+    int framelen = stellaris_txpacket_datalen(s);
+
+    /* Ethernet header is in the FIFO but not in the datacount.
+     * We don't implement explicit CRC, so just ignore any
+     * CRC value in the FIFO.
+     */
+    framelen += 14;
+    if ((s->tctl & SE_TCTL_PADEN) && framelen < 60) {
+        memset(&s->tx_fifo[framelen + 2], 0, 60 - framelen);
+        framelen = 60;
+    }
+    /* This MIN will have no effect unless the FIFO data is corrupt
+     * (eg bad data from an incoming migration); otherwise the check
+     * on the datalen at the start of writing the data into the FIFO
+     * will have caught this. Silently write a corrupt half-packet,
+     * which is what the hardware does in FIFO underrun situations.
+     */
+    framelen = MIN(framelen, ARRAY_SIZE(s->tx_fifo) - 2);
+    qemu_send_packet(qemu_get_queue(s->nic), s->tx_fifo + 2, framelen);
+    s->tx_fifo_len = 0;
+    s->ris |= SE_INT_TXEMP;
+    stellaris_enet_update(s);
+    DPRINTF("Done TX\n");
+}
+
 /* TODO: Implement MAC address filtering.  */
 static ssize_t stellaris_enet_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
@@ -97,7 +232,7 @@
         return -1;
     }
 
-    DPRINTF("Received packet len=%d\n", size);
+    DPRINTF("Received packet len=%zu\n", size);
     n = s->next_packet + s->np;
     if (n >= 31)
         n -= 31;
@@ -152,21 +287,21 @@
     case 0x0c: /* TCTL */
         return s->tctl;
     case 0x10: /* DATA */
-        if (s->rx_fifo_len == 0) {
-            if (s->np == 0) {
-                BADF("RX underflow\n");
-                return 0;
-            }
-            s->rx_fifo_len = s->rx[s->next_packet].len;
-            s->rx_fifo = s->rx[s->next_packet].data;
-            DPRINTF("RX FIFO start packet len=%d\n", s->rx_fifo_len);
+    {
+        uint8_t *rx_fifo;
+
+        if (s->np == 0) {
+            BADF("RX underflow\n");
+            return 0;
         }
-        val = s->rx_fifo[0] | (s->rx_fifo[1] << 8) | (s->rx_fifo[2] << 16)
-              | (s->rx_fifo[3] << 24);
-        s->rx_fifo += 4;
-        s->rx_fifo_len -= 4;
-        if (s->rx_fifo_len <= 0) {
-            s->rx_fifo_len = 0;
+
+        rx_fifo = s->rx[s->next_packet].data + s->rx_fifo_offset;
+
+        val = rx_fifo[0] | (rx_fifo[1] << 8) | (rx_fifo[2] << 16)
+              | (rx_fifo[3] << 24);
+        s->rx_fifo_offset += 4;
+        if (s->rx_fifo_offset >= s->rx[s->next_packet].len) {
+            s->rx_fifo_offset = 0;
             s->next_packet++;
             if (s->next_packet >= 31)
                 s->next_packet = 0;
@@ -174,6 +309,7 @@
             DPRINTF("RX done np=%d\n", s->np);
         }
         return val;
+    }
     case 0x14: /* IA0 */
         return s->conf.macaddr.a[0] | (s->conf.macaddr.a[1] << 8)
             | (s->conf.macaddr.a[2] << 16)
@@ -212,22 +348,23 @@
     switch (offset) {
     case 0x00: /* IACK */
         s->ris &= ~value;
-        DPRINTF("IRQ ack %02x/%02x\n", value, s->ris);
+        DPRINTF("IRQ ack %02" PRIx64 "/%02x\n", value, s->ris);
         stellaris_enet_update(s);
         /* Clearing TXER also resets the TX fifo.  */
-        if (value & SE_INT_TXER)
-            s->tx_frame_len = -1;
+        if (value & SE_INT_TXER) {
+            s->tx_fifo_len = 0;
+        }
         break;
     case 0x04: /* IM */
-        DPRINTF("IRQ mask %02x/%02x\n", value, s->ris);
+        DPRINTF("IRQ mask %02" PRIx64 "/%02x\n", value, s->ris);
         s->im = value;
         stellaris_enet_update(s);
         break;
     case 0x08: /* RCTL */
         s->rctl = value;
         if (value & SE_RCTL_RSTFIFO) {
-            s->rx_fifo_len = 0;
             s->np = 0;
+            s->rx_fifo_offset = 0;
             stellaris_enet_update(s);
         }
         break;
@@ -235,43 +372,26 @@
         s->tctl = value;
         break;
     case 0x10: /* DATA */
-        if (s->tx_frame_len == -1) {
-            s->tx_frame_len = value & 0xffff;
-            if (s->tx_frame_len > 2032) {
-                DPRINTF("TX frame too long (%d)\n", s->tx_frame_len);
-                s->tx_frame_len = 0;
+        if (s->tx_fifo_len == 0) {
+            /* The first word is special, it contains the data length */
+            int framelen = value & 0xffff;
+            if (framelen > 2032) {
+                DPRINTF("TX frame too long (%d)\n", framelen);
                 s->ris |= SE_INT_TXER;
                 stellaris_enet_update(s);
-            } else {
-                DPRINTF("Start TX frame len=%d\n", s->tx_frame_len);
-                /* The value written does not include the ethernet header.  */
-                s->tx_frame_len += 14;
-                if ((s->tctl & SE_TCTL_CRC) == 0)
-                    s->tx_frame_len += 4;
-                s->tx_fifo_len = 0;
-                s->tx_fifo[s->tx_fifo_len++] = value >> 16;
-                s->tx_fifo[s->tx_fifo_len++] = value >> 24;
+                break;
             }
-        } else {
+        }
+
+        if (s->tx_fifo_len + 4 <= ARRAY_SIZE(s->tx_fifo)) {
             s->tx_fifo[s->tx_fifo_len++] = value;
             s->tx_fifo[s->tx_fifo_len++] = value >> 8;
             s->tx_fifo[s->tx_fifo_len++] = value >> 16;
             s->tx_fifo[s->tx_fifo_len++] = value >> 24;
-            if (s->tx_fifo_len >= s->tx_frame_len) {
-                /* We don't implement explicit CRC, so just chop it off.  */
-                if ((s->tctl & SE_TCTL_CRC) == 0)
-                    s->tx_frame_len -= 4;
-                if ((s->tctl & SE_TCTL_PADEN) && s->tx_frame_len < 60) {
-                    memset(&s->tx_fifo[s->tx_frame_len], 0, 60 - s->tx_frame_len);
-                    s->tx_fifo_len = 60;
-                }
-                qemu_send_packet(qemu_get_queue(s->nic), s->tx_fifo,
-                                 s->tx_frame_len);
-                s->tx_frame_len = -1;
-                s->ris |= SE_INT_TXEMP;
-                stellaris_enet_update(s);
-                DPRINTF("Done TX\n");
-            }
+        }
+
+        if (stellaris_tx_thr_reached(s) && stellaris_txpacket_complete(s)) {
+            stellaris_enet_send(s);
         }
         break;
     case 0x14: /* IA0 */
@@ -299,9 +419,13 @@
     case 0x2c: /* MTXD */
         s->mtxd = value & 0xff;
         break;
+    case 0x38: /* TR */
+        if (value & 1) {
+            stellaris_enet_send(s);
+        }
+        break;
     case 0x30: /* MRXD */
     case 0x34: /* NP */
-    case 0x38: /* TR */
         /* Ignored.  */
     case 0x3c: /* Undocuented: Timestamp? */
         /* Ignored.  */
@@ -324,68 +448,7 @@
     s->im = SE_INT_PHY | SE_INT_MD | SE_INT_RXER | SE_INT_FOV | SE_INT_TXEMP
             | SE_INT_TXER | SE_INT_RX;
     s->thr = 0x3f;
-    s->tx_frame_len = -1;
-}
-
-static void stellaris_enet_save(QEMUFile *f, void *opaque)
-{
-    stellaris_enet_state *s = (stellaris_enet_state *)opaque;
-    int i;
-
-    qemu_put_be32(f, s->ris);
-    qemu_put_be32(f, s->im);
-    qemu_put_be32(f, s->rctl);
-    qemu_put_be32(f, s->tctl);
-    qemu_put_be32(f, s->thr);
-    qemu_put_be32(f, s->mctl);
-    qemu_put_be32(f, s->mdv);
-    qemu_put_be32(f, s->mtxd);
-    qemu_put_be32(f, s->mrxd);
-    qemu_put_be32(f, s->np);
-    qemu_put_be32(f, s->tx_frame_len);
-    qemu_put_be32(f, s->tx_fifo_len);
-    qemu_put_buffer(f, s->tx_fifo, sizeof(s->tx_fifo));
-    for (i = 0; i < 31; i++) {
-        qemu_put_be32(f, s->rx[i].len);
-        qemu_put_buffer(f, s->rx[i].data, sizeof(s->rx[i].data));
-
-    }
-    qemu_put_be32(f, s->next_packet);
-    qemu_put_be32(f, s->rx_fifo - s->rx[s->next_packet].data);
-    qemu_put_be32(f, s->rx_fifo_len);
-}
-
-static int stellaris_enet_load(QEMUFile *f, void *opaque, int version_id)
-{
-    stellaris_enet_state *s = (stellaris_enet_state *)opaque;
-    int i;
-
-    if (version_id != 1)
-        return -EINVAL;
-
-    s->ris = qemu_get_be32(f);
-    s->im = qemu_get_be32(f);
-    s->rctl = qemu_get_be32(f);
-    s->tctl = qemu_get_be32(f);
-    s->thr = qemu_get_be32(f);
-    s->mctl = qemu_get_be32(f);
-    s->mdv = qemu_get_be32(f);
-    s->mtxd = qemu_get_be32(f);
-    s->mrxd = qemu_get_be32(f);
-    s->np = qemu_get_be32(f);
-    s->tx_frame_len = qemu_get_be32(f);
-    s->tx_fifo_len = qemu_get_be32(f);
-    qemu_get_buffer(f, s->tx_fifo, sizeof(s->tx_fifo));
-    for (i = 0; i < 31; i++) {
-        s->rx[i].len = qemu_get_be32(f);
-        qemu_get_buffer(f, s->rx[i].data, sizeof(s->rx[i].data));
-
-    }
-    s->next_packet = qemu_get_be32(f);
-    s->rx_fifo = s->rx[s->next_packet].data + qemu_get_be32(f);
-    s->rx_fifo_len = qemu_get_be32(f);
-
-    return 0;
+    s->tx_fifo_len = 0;
 }
 
 static void stellaris_enet_cleanup(NetClientState *nc)
@@ -419,8 +482,6 @@
     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 
     stellaris_enet_reset(s);
-    register_savevm(dev, "stellaris_enet", -1, 1,
-                    stellaris_enet_save, stellaris_enet_load, s);
     return 0;
 }
 
@@ -428,8 +489,6 @@
 {
     stellaris_enet_state *s = STELLARIS_ENET(dev);
 
-    unregister_savevm(DEVICE(s), "stellaris_enet", s);
-
     memory_region_destroy(&s->mmio);
 }
 
@@ -446,6 +505,7 @@
     k->init = stellaris_enet_init;
     dc->unrealize = stellaris_enet_unrealize;
     dc->props = stellaris_enet_properties;
+    dc->vmsd = &vmstate_stellaris_enet;
 }
 
 static const TypeInfo stellaris_enet_info = {
diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c
index 88349ac..aeffcb5 100644
--- a/hw/net/xgmac.c
+++ b/hw/net/xgmac.c
@@ -156,7 +156,7 @@
     .name = "xgmac_stats",
     .version_id = 1,
     .minimum_version_id = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT64(rx_bytes, RxTxStats),
         VMSTATE_UINT64(tx_bytes, RxTxStats),
         VMSTATE_UINT64(rx, RxTxStats),
diff --git a/hw/ssi/pl022.c b/hw/ssi/pl022.c
index b19bc71..61d568f 100644
--- a/hw/ssi/pl022.c
+++ b/hw/ssi/pl022.c
@@ -257,9 +257,8 @@
     .name = "pl022_ssp",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .post_load = pl022_post_load,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(cr0, PL022State),
         VMSTATE_UINT32(cr1, PL022State),
         VMSTATE_UINT32(bitmask, PL022State),
diff --git a/hw/ssi/ssi.c b/hw/ssi/ssi.c
index 017f022..1c82a93 100644
--- a/hw/ssi/ssi.c
+++ b/hw/ssi/ssi.c
@@ -126,8 +126,7 @@
     .name = "SSISlave",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_BOOL(cs, SSISlave),
         VMSTATE_END_OF_LIST()
     }
diff --git a/hw/ssi/xilinx_spi.c b/hw/ssi/xilinx_spi.c
index d44caae..207f47a 100644
--- a/hw/ssi/xilinx_spi.c
+++ b/hw/ssi/xilinx_spi.c
@@ -351,7 +351,6 @@
     .name = "xilinx_spi",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_FIFO8(tx_fifo, XilinxSPI),
         VMSTATE_FIFO8(rx_fifo, XilinxSPI),
diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c
index 8977243..0910f54 100644
--- a/hw/ssi/xilinx_spips.c
+++ b/hw/ssi/xilinx_spips.c
@@ -704,7 +704,6 @@
     .name = "xilinx_spips",
     .version_id = 2,
     .minimum_version_id = 2,
-    .minimum_version_id_old = 2,
     .post_load = xilinx_spips_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_FIFO8(tx_fifo, XilinxSPIPS),
diff --git a/hw/timer/allwinner-a10-pit.c b/hw/timer/allwinner-a10-pit.c
index d3c02ea..34124fe 100644
--- a/hw/timer/allwinner-a10-pit.c
+++ b/hw/timer/allwinner-a10-pit.c
@@ -190,7 +190,6 @@
     .name = "a10.pit",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(irq_enable, AwA10PITState),
         VMSTATE_UINT32(irq_status, AwA10PITState),
diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c
index fb0a45c..1452910 100644
--- a/hw/timer/arm_timer.c
+++ b/hw/timer/arm_timer.c
@@ -150,8 +150,7 @@
     .name = "arm_timer",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(control, arm_timer_state),
         VMSTATE_UINT32(limit, arm_timer_state),
         VMSTATE_INT32(int_level, arm_timer_state),
@@ -271,8 +270,7 @@
     .name = "sp804",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_INT32_ARRAY(level, SP804State, 2),
         VMSTATE_END_OF_LIST()
     }
diff --git a/hw/timer/cadence_ttc.c b/hw/timer/cadence_ttc.c
index 28cb328..52bbbbc 100644
--- a/hw/timer/cadence_ttc.c
+++ b/hw/timer/cadence_ttc.c
@@ -443,7 +443,6 @@
     .name = "cadence_timer",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .pre_save = cadence_timer_pre_save,
     .post_load = cadence_timer_post_load,
     .fields = (VMStateField[]) {
@@ -464,7 +463,6 @@
     .name = "cadence_TTC",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT_ARRAY(timer, CadenceTTCState, 3, 0,
                             vmstate_cadence_timer,
diff --git a/hw/timer/digic-timer.c b/hw/timer/digic-timer.c
index 1fde22c..7e28e7e 100644
--- a/hw/timer/digic-timer.c
+++ b/hw/timer/digic-timer.c
@@ -36,7 +36,6 @@
     .name = "digic.timer",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_PTIMER(ptimer, DigicTimerState),
         VMSTATE_UINT32(control, DigicTimerState),
diff --git a/hw/timer/ds1338.c b/hw/timer/ds1338.c
index bb2f8ee..ec6dbee 100644
--- a/hw/timer/ds1338.c
+++ b/hw/timer/ds1338.c
@@ -40,7 +40,6 @@
     .name = "ds1338",
     .version_id = 2,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_I2C_SLAVE(parent_obj, DS1338State),
         VMSTATE_INT64(offset, DS1338State),
diff --git a/hw/timer/exynos4210_mct.c b/hw/timer/exynos4210_mct.c
index 86f4fcd..015bbaf 100644
--- a/hw/timer/exynos4210_mct.c
+++ b/hw/timer/exynos4210_mct.c
@@ -264,7 +264,6 @@
     .name = "exynos4210.mct.tick_timer",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(cnt_run, struct tick_timer),
         VMSTATE_UINT32(int_run, struct tick_timer),
@@ -284,7 +283,6 @@
     .name = "exynos4210.mct.lregs",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(cnt, struct lregs, L_REG_CNT_AMOUNT),
         VMSTATE_UINT32(tcon, struct lregs),
@@ -299,7 +297,6 @@
     .name = "exynos4210.mct.lt",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(id, Exynos4210MCTLT),
         VMSTATE_STRUCT(tick_timer, Exynos4210MCTLT, 0,
@@ -317,7 +314,6 @@
     .name = "exynos4210.mct.lregs",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(cnt, struct gregs),
         VMSTATE_UINT32(cnt_wstat, struct gregs),
@@ -336,7 +332,6 @@
     .name = "exynos4210.mct.lt",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT(reg, Exynos4210MCTGT, 0, vmstate_gregs,
                 struct gregs),
@@ -351,7 +346,6 @@
     .name = "exynos4210.mct",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(reg_mct_cfg, Exynos4210MCTState),
         VMSTATE_STRUCT_ARRAY(l_timer, Exynos4210MCTState, 2, 0,
@@ -824,14 +818,14 @@
          */
 
         if (s->last_tcnto) {
-            to_count = s->last_tcnto * s->last_icnto;
+            to_count = (uint64_t)s->last_tcnto * s->last_icnto;
         } else {
             to_count = s->last_icnto;
         }
     } else {
         /* distance is passed, recalculate with tcnto * icnto */
         if (s->icntb) {
-            s->distance = s->tcntb * s->icntb;
+            s->distance = (uint64_t)s->tcntb * s->icntb;
         } else {
             s->distance = s->tcntb;
         }
diff --git a/hw/timer/exynos4210_pwm.c b/hw/timer/exynos4210_pwm.c
index 1aa8f4d..1c1a2b8 100644
--- a/hw/timer/exynos4210_pwm.c
+++ b/hw/timer/exynos4210_pwm.c
@@ -120,7 +120,6 @@
     .name = "exynos4210.pwm.pwm",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(id, Exynos4210PWM),
         VMSTATE_UINT32(freq, Exynos4210PWM),
@@ -135,7 +134,6 @@
     .name = "exynos4210.pwm",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(reg_tcfg, Exynos4210PWMState, 2),
         VMSTATE_UINT32(reg_tcon, Exynos4210PWMState),
diff --git a/hw/timer/exynos4210_rtc.c b/hw/timer/exynos4210_rtc.c
index 026f81a..bf2ee9f 100644
--- a/hw/timer/exynos4210_rtc.c
+++ b/hw/timer/exynos4210_rtc.c
@@ -118,7 +118,6 @@
     .name = "exynos4210.rtc",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(reg_intp, Exynos4210RTCState),
         VMSTATE_UINT32(reg_rtccon, Exynos4210RTCState),
diff --git a/hw/timer/imx_epit.c b/hw/timer/imx_epit.c
index 0dbe15c..c855eba 100644
--- a/hw/timer/imx_epit.c
+++ b/hw/timer/imx_epit.c
@@ -353,8 +353,7 @@
     .name = "imx.epit",
     .version_id = 2,
     .minimum_version_id = 2,
-    .minimum_version_id_old = 2,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(cr, IMXEPITState),
         VMSTATE_UINT32(sr, IMXEPITState),
         VMSTATE_UINT32(lr, IMXEPITState),
diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c
index f2d1975..56ee4db 100644
--- a/hw/timer/imx_gpt.c
+++ b/hw/timer/imx_gpt.c
@@ -146,8 +146,7 @@
     .name = "imx.gpt",
     .version_id = 3,
     .minimum_version_id = 3,
-    .minimum_version_id_old = 3,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT32(cr, IMXGPTState),
         VMSTATE_UINT32(pr, IMXGPTState),
         VMSTATE_UINT32(sr, IMXGPTState),
diff --git a/hw/timer/pxa2xx_timer.c b/hw/timer/pxa2xx_timer.c
index 0f546c4..130e9dc 100644
--- a/hw/timer/pxa2xx_timer.c
+++ b/hw/timer/pxa2xx_timer.c
@@ -476,7 +476,6 @@
     .name = "pxa2xx_timer0",
     .version_id = 2,
     .minimum_version_id = 2,
-    .minimum_version_id_old = 2,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(value, PXA2xxTimer0),
         VMSTATE_END_OF_LIST(),
@@ -487,7 +486,6 @@
     .name = "pxa2xx_timer4",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT(tm, PXA2xxTimer4, 1,
                         vmstate_pxa2xx_timer0_regs, PXA2xxTimer0),
@@ -509,7 +507,6 @@
     .name = "pxa2xx_timer",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .post_load = pxa25x_timer_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(clock, PXA2xxTimerInfo),
diff --git a/hw/timer/twl92230.c b/hw/timer/twl92230.c
index 85d5990..7ded4ba 100644
--- a/hw/timer/twl92230.c
+++ b/hw/timer/twl92230.c
@@ -772,8 +772,7 @@
     .name = "menelaus_tm",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
-    .fields      = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_UINT16_HACK(tm_sec, struct tm),
         VMSTATE_UINT16_HACK(tm_min, struct tm),
         VMSTATE_UINT16_HACK(tm_hour, struct tm),
@@ -811,10 +810,9 @@
     .name = "menelaus",
     .version_id = 0,
     .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
     .pre_save = menelaus_pre_save,
     .post_load = menelaus_post_load,
-    .fields      = (VMStateField []) {
+    .fields = (VMStateField[]) {
         VMSTATE_INT32(firstbyte, MenelausState),
         VMSTATE_UINT8(reg, MenelausState),
         VMSTATE_UINT8_ARRAY(vcore, MenelausState, 5),
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index fb649a4..9cab592 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -381,6 +381,9 @@
 /* Debug event pending.  */
 #define CPU_INTERRUPT_DEBUG       0x0080
 
+/* Reset signal.  */
+#define CPU_INTERRUPT_RESET       0x0400
+
 /* Several target-specific external hardware interrupts.  Each target/cpu.h
    should define proper names based on these defines.  */
 #define CPU_INTERRUPT_TGT_EXT_0   0x0008
@@ -395,9 +398,8 @@
    instruction being executed.  These, therefore, are not masked while
    single-stepping within the debugger.  */
 #define CPU_INTERRUPT_TGT_INT_0   0x0100
-#define CPU_INTERRUPT_TGT_INT_1   0x0400
-#define CPU_INTERRUPT_TGT_INT_2   0x0800
-#define CPU_INTERRUPT_TGT_INT_3   0x2000
+#define CPU_INTERRUPT_TGT_INT_1   0x0800
+#define CPU_INTERRUPT_TGT_INT_2   0x2000
 
 /* First unused bit: 0x4000.  */
 
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 5ad4e0e..e7ad9d1 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -245,8 +245,6 @@
 /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
 unsigned long kvm_arch_vcpu_id(CPUState *cpu);
 
-void kvm_arch_reset_vcpu(CPUState *cpu);
-
 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
 int kvm_arch_on_sigbus(int code, void *addr);
 
@@ -383,4 +381,24 @@
  *          > 0: irq chip was created
  */
 int kvm_arch_irqchip_create(KVMState *s);
+
+/**
+ * kvm_set_one_reg - set a register value in KVM via KVM_SET_ONE_REG ioctl
+ * @id: The register ID
+ * @source: The pointer to the value to be set. It must point to a variable
+ *          of the correct type/size for the register being accessed.
+ *
+ * Returns: 0 on success, or a negative errno on failure.
+ */
+int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source);
+
+/**
+ * kvm_get_one_reg - get a register value from KVM via KVM_GET_ONE_REG ioctl
+ * @id: The register ID
+ * @target: The pointer where the value is to be stored. It must point to a
+ *          variable of the correct type/size for the register being accessed.
+ *
+ * Returns: 0 on success, or a negative errno on failure.
+ */
+int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
 #endif
diff --git a/kvm-all.c b/kvm-all.c
index 5cb7f26..a343ede 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -223,13 +223,6 @@
     return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
 }
 
-static void kvm_reset_vcpu(void *opaque)
-{
-    CPUState *cpu = opaque;
-
-    kvm_arch_reset_vcpu(cpu);
-}
-
 int kvm_init_vcpu(CPUState *cpu)
 {
     KVMState *s = kvm_state;
@@ -269,10 +262,6 @@
     }
 
     ret = kvm_arch_init_vcpu(cpu);
-    if (ret == 0) {
-        qemu_register_reset(kvm_reset_vcpu, cpu);
-        kvm_arch_reset_vcpu(cpu);
-    }
 err:
     return ret;
 }
@@ -2114,3 +2103,31 @@
 
     return test ? 0 : create_dev.fd;
 }
+
+int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source)
+{
+    struct kvm_one_reg reg;
+    int r;
+
+    reg.id = id;
+    reg.addr = (uintptr_t) source;
+    r = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
+    if (r) {
+        trace_kvm_failed_reg_set(id, strerror(r));
+    }
+    return r;
+}
+
+int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target)
+{
+    struct kvm_one_reg reg;
+    int r;
+
+    reg.id = id;
+    reg.addr = (uintptr_t) target;
+    r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
+    if (r) {
+        trace_kvm_failed_reg_get(id, strerror(r));
+    }
+    return r;
+}
diff --git a/linux-user/main.c b/linux-user/main.c
index c38fecf..882186e 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -4051,8 +4051,6 @@
 #endif
 
 #if defined(TARGET_I386)
-    cpu_x86_set_cpl(env, 3);
-
     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
     env->hflags |= HF_PE_MASK;
     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index c0ddc3e..6c6f2b3 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -29,6 +29,7 @@
 #include "hw/arm/arm.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
+#include "kvm_arm.h"
 
 static void arm_cpu_set_pc(CPUState *cs, vaddr value)
 {
@@ -165,6 +166,12 @@
      * tb_flush().
      */
     tb_flush(env);
+
+#ifndef CONFIG_USER_ONLY
+    if (kvm_enabled()) {
+        kvm_arm_reset_vcpu(cpu);
+    }
+#endif
 }
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 3be917c..417161e 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2081,6 +2081,13 @@
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
 
+    if (env->cp15.c1_sys == value) {
+        /* Skip the TLB flush if nothing actually changed; Linux likes
+         * to do a lot of pointless SCTLR writes.
+         */
+        return;
+    }
+
     env->cp15.c1_sys = value;
     /* ??? Lots of these bits are not implemented.  */
     /* This may enable/disable the MMU, so do a TLB flush.  */
diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c
index a690d99..b79750c 100644
--- a/target-arm/kvm32.c
+++ b/target-arm/kvm32.c
@@ -510,11 +510,9 @@
     return 0;
 }
 
-void kvm_arch_reset_vcpu(CPUState *cs)
+void kvm_arm_reset_vcpu(ARMCPU *cpu)
 {
     /* Feed the kernel back its initial register state */
-    ARMCPU *cpu = ARM_CPU(cs);
-
     memmove(cpu->cpreg_values, cpu->cpreg_reset_values,
             cpu->cpreg_array_len * sizeof(cpu->cpreg_values[0]));
 
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index e115879..c729b9e 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -260,6 +260,6 @@
     return ret;
 }
 
-void kvm_arch_reset_vcpu(CPUState *cs)
+void kvm_arm_reset_vcpu(ARMCPU *cpu)
 {
 }
diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h
index 137c567..dc4e233 100644
--- a/target-arm/kvm_arm.h
+++ b/target-arm/kvm_arm.h
@@ -67,6 +67,14 @@
  */
 bool write_kvmstate_to_list(ARMCPU *cpu);
 
+/**
+ * kvm_arm_reset_vcpu:
+ * @cpu: ARMCPU
+ *
+ * Called at reset time to kernel registers to their initial values.
+ */
+void kvm_arm_reset_vcpu(ARMCPU *cpu);
+
 #ifdef CONFIG_KVM
 /**
  * kvm_arm_create_scratch_host_vcpu:
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 810ba27..5092dcd 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -39,7 +39,6 @@
     .name = "cpu/vfp",
     .version_id = 3,
     .minimum_version_id = 3,
-    .minimum_version_id_old = 3,
     .fields = (VMStateField[]) {
         VMSTATE_FLOAT64_ARRAY(env.vfp.regs, ARMCPU, 64),
         /* The xregs array is a little awkward because element 1 (FPSCR)
@@ -72,7 +71,6 @@
     .name = "cpu/iwmmxt",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64_ARRAY(env.iwmmxt.regs, ARMCPU, 16),
         VMSTATE_UINT32_ARRAY(env.iwmmxt.cregs, ARMCPU, 16),
@@ -92,7 +90,6 @@
     .name = "cpu/m",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(env.v7m.other_sp, ARMCPU),
         VMSTATE_UINT32(env.v7m.vecbase, ARMCPU),
@@ -116,7 +113,6 @@
     .name = "cpu/thumb2ee",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(env.teecr, ARMCPU),
         VMSTATE_UINT32(env.teehbr, ARMCPU),
@@ -224,7 +220,6 @@
     .name = "cpu",
     .version_id = 17,
     .minimum_version_id = 17,
-    .minimum_version_id_old = 17,
     .pre_save = cpu_pre_save,
     .post_load = cpu_post_load,
     .fields = (VMStateField[]) {
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 8f193a9..042a48d 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -24,6 +24,7 @@
 #include "cpu.h"
 #include "sysemu/kvm.h"
 #include "sysemu/cpus.h"
+#include "kvm_i386.h"
 #include "topology.h"
 
 #include "qemu/option.h"
@@ -2417,8 +2418,7 @@
 
     xcc->parent_reset(s);
 
-
-    memset(env, 0, offsetof(CPUX86State, pat));
+    memset(env, 0, offsetof(CPUX86State, cpuid_level));
 
     tlb_flush(s, 1);
 
@@ -2484,8 +2484,7 @@
     cpu_breakpoint_remove_all(s, BP_CPU);
     cpu_watchpoint_remove_all(s, BP_CPU);
 
-    env->tsc_adjust = 0;
-    env->tsc = 0;
+    env->xcr0 = 1;
 
 #if !defined(CONFIG_USER_ONLY)
     /* We hard-wire the BSP to the first CPU. */
@@ -2494,6 +2493,10 @@
     }
 
     s->halted = !cpu_is_bsp(cpu);
+
+    if (kvm_enabled()) {
+        kvm_arch_reset_vcpu(cpu);
+    }
 #endif
 }
 
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 2a22a7d..e9cbdab 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -124,9 +124,9 @@
 #define ID_MASK                 0x00200000
 
 /* hidden flags - used internally by qemu to represent additional cpu
-   states. Only the CPL, INHIBIT_IRQ, SMM and SVMI are not
-   redundant. We avoid using the IOPL_MASK, TF_MASK, VM_MASK and AC_MASK
-   bit positions to ease oring with eflags. */
+   states. Only the INHIBIT_IRQ, SMM and SVMI are not redundant. We
+   avoid using the IOPL_MASK, TF_MASK, VM_MASK and AC_MASK bit
+   positions to ease oring with eflags. */
 /* current cpl */
 #define HF_CPL_SHIFT         0
 /* true if soft mmu is being used */
@@ -606,10 +606,11 @@
 #define CPU_INTERRUPT_NMI       CPU_INTERRUPT_TGT_EXT_3
 #define CPU_INTERRUPT_MCE       CPU_INTERRUPT_TGT_EXT_4
 #define CPU_INTERRUPT_VIRQ      CPU_INTERRUPT_TGT_INT_0
-#define CPU_INTERRUPT_INIT      CPU_INTERRUPT_TGT_INT_1
-#define CPU_INTERRUPT_SIPI      CPU_INTERRUPT_TGT_INT_2
-#define CPU_INTERRUPT_TPR       CPU_INTERRUPT_TGT_INT_3
+#define CPU_INTERRUPT_SIPI      CPU_INTERRUPT_TGT_INT_1
+#define CPU_INTERRUPT_TPR       CPU_INTERRUPT_TGT_INT_2
 
+/* Use a clearer name for this.  */
+#define CPU_INTERRUPT_INIT      CPU_INTERRUPT_RESET
 
 typedef enum {
     CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
@@ -797,6 +798,13 @@
     target_ulong cr[5]; /* NOTE: cr1 is unused */
     int32_t a20_mask;
 
+    BNDReg bnd_regs[4];
+    BNDCSReg bndcs_regs;
+    uint64_t msr_bndcfgs;
+
+    /* Beginning of state preserved by INIT (dummy marker).  */
+    struct {} start_init_save;
+
     /* FPU state */
     unsigned int fpstt; /* top of stack index */
     uint16_t fpus;
@@ -819,6 +827,8 @@
     XMMReg xmm_t0;
     MMXReg mmx_t0;
 
+    XMMReg ymmh_regs[CPU_NB_REGS];
+
     /* sysenter registers */
     uint32_t sysenter_cs;
     target_ulong sysenter_esp;
@@ -827,15 +837,6 @@
     uint64_t star;
 
     uint64_t vm_hsave;
-    uint64_t vm_vmcb;
-    uint64_t tsc_offset;
-    uint64_t intercept;
-    uint16_t intercept_cr_read;
-    uint16_t intercept_cr_write;
-    uint16_t intercept_dr_read;
-    uint16_t intercept_dr_write;
-    uint32_t intercept_exceptions;
-    uint8_t v_tpr;
 
 #ifdef TARGET_X86_64
     target_ulong lstar;
@@ -843,11 +844,6 @@
     target_ulong fmask;
     target_ulong kernelgsbase;
 #endif
-    uint64_t system_time_msr;
-    uint64_t wall_clock_msr;
-    uint64_t steal_time_msr;
-    uint64_t async_pf_en_msr;
-    uint64_t pv_eoi_en_msr;
 
     uint64_t tsc;
     uint64_t tsc_adjust;
@@ -864,6 +860,19 @@
     uint64_t msr_fixed_counters[MAX_FIXED_COUNTERS];
     uint64_t msr_gp_counters[MAX_GP_COUNTERS];
     uint64_t msr_gp_evtsel[MAX_GP_COUNTERS];
+
+    uint64_t pat;
+    uint32_t smbase;
+
+    /* End of state preserved by INIT (dummy marker).  */
+    struct {} end_init_save;
+
+    uint64_t system_time_msr;
+    uint64_t wall_clock_msr;
+    uint64_t steal_time_msr;
+    uint64_t async_pf_en_msr;
+    uint64_t pv_eoi_en_msr;
+
     uint64_t msr_hv_hypercall;
     uint64_t msr_hv_guest_os_id;
     uint64_t msr_hv_vapic;
@@ -878,9 +887,18 @@
         struct CPUBreakpoint *cpu_breakpoint[4];
         struct CPUWatchpoint *cpu_watchpoint[4];
     }; /* break/watchpoints for dr[0..3] */
-    uint32_t smbase;
     int old_exception;  /* exception in flight */
 
+    uint64_t vm_vmcb;
+    uint64_t tsc_offset;
+    uint64_t intercept;
+    uint16_t intercept_cr_read;
+    uint16_t intercept_cr_write;
+    uint16_t intercept_dr_read;
+    uint16_t intercept_dr_write;
+    uint32_t intercept_exceptions;
+    uint8_t v_tpr;
+
     /* KVM states, automatically cleared on reset */
     uint8_t nmi_injected;
     uint8_t nmi_pending;
@@ -888,7 +906,6 @@
     CPU_COMMON
 
     /* Fields from here on are preserved across CPU reset. */
-    uint64_t pat;
 
     /* processor features (e.g. for CPUID insn) */
     uint32_t cpuid_level;
@@ -928,12 +945,7 @@
     uint16_t fpus_vmstate;
     uint16_t fptag_vmstate;
     uint16_t fpregs_format_vmstate;
-
     uint64_t xstate_bv;
-    XMMReg ymmh_regs[CPU_NB_REGS];
-    BNDReg bnd_regs[4];
-    BNDCSReg bndcs_regs;
-    uint64_t msr_bndcfgs;
 
     uint64_t xcr0;
 
@@ -974,6 +986,7 @@
     /* update the hidden flags */
     {
         if (seg_reg == R_CS) {
+            int cpl = selector & 3;
 #ifdef TARGET_X86_64
             if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) {
                 /* long mode */
@@ -983,11 +996,19 @@
 #endif
             {
                 /* legacy / compatibility case */
+                if (!(env->cr[0] & CR0_PE_MASK))
+                    cpl = 0;
+                else if (env->eflags & VM_MASK)
+                    cpl = 3;
                 new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
                     >> (DESC_B_SHIFT - HF_CS32_SHIFT);
                 env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) |
                     new_hflags;
             }
+#if HF_CPL_MASK != 3
+#error HF_CPL_MASK is hardcoded
+#endif
+            env->hflags = (env->hflags & ~HF_CPL_MASK) | cpl;
         }
         new_hflags = (env->segs[R_SS].flags & DESC_B_MASK)
             >> (DESC_B_SHIFT - HF_SS32_SHIFT);
@@ -1031,16 +1052,6 @@
                             target_ulong *base, unsigned int *limit,
                             unsigned int *flags);
 
-/* wrapper, just in case memory mappings must be changed */
-static inline void cpu_x86_set_cpl(CPUX86State *s, int cpl)
-{
-#if HF_CPL_MASK == 3
-    s->hflags = (s->hflags & ~HF_CPL_MASK) | cpl;
-#else
-#error HF_CPL_MASK is hardcoded
-#endif
-}
-
 /* op_helper.c */
 /* used for debug or cpu save/restore */
 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f);
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 372f0e3..46d20e4 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -19,6 +19,7 @@
 
 #include "cpu.h"
 #include "sysemu/kvm.h"
+#include "kvm_i386.h"
 #ifndef CONFIG_USER_ONLY
 #include "sysemu/sysemu.h"
 #include "monitor/monitor.h"
@@ -1329,12 +1330,21 @@
 {
     CPUState *cs = CPU(cpu);
     CPUX86State *env = &cpu->env;
+    CPUX86State *save = g_new(CPUX86State, 1);
     int sipi = cs->interrupt_request & CPU_INTERRUPT_SIPI;
-    uint64_t pat = env->pat;
+
+    *save = *env;
 
     cpu_reset(cs);
     cs->interrupt_request = sipi;
-    env->pat = pat;
+    memcpy(&env->start_init_save, &save->start_init_save,
+           offsetof(CPUX86State, end_init_save) -
+           offsetof(CPUX86State, start_init_save));
+    g_free(save);
+
+    if (kvm_enabled()) {
+        kvm_arch_do_init_vcpu(cpu);
+    }
     apic_init_reset(cpu->apic_state);
 }
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 4389959..0d894ef 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -30,6 +30,8 @@
 #include "qemu/config-file.h"
 #include "hw/i386/pc.h"
 #include "hw/i386/apic.h"
+#include "hw/i386/apic_internal.h"
+#include "hw/i386/apic-msidef.h"
 #include "exec/ioport.h"
 #include <asm/hyperv.h>
 #include "hw/pci/pci.h"
@@ -130,14 +132,13 @@
     { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
     { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
     { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF },
-    { -1, -1 }
 };
 
 static int get_para_features(KVMState *s)
 {
     int i, features = 0;
 
-    for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
+    for (i = 0; i < ARRAY_SIZE(para_features); i++) {
         if (kvm_check_extension(s, para_features[i].cap)) {
             features |= (1 << para_features[i].feature);
         }
@@ -724,9 +725,8 @@
     return 0;
 }
 
-void kvm_arch_reset_vcpu(CPUState *cs)
+void kvm_arch_reset_vcpu(X86CPU *cpu)
 {
-    X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
 
     env->exception_injected = -1;
@@ -740,6 +740,16 @@
     }
 }
 
+void kvm_arch_do_init_vcpu(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+
+    /* APs get directly into wait-for-SIPI state.  */
+    if (env->mp_state == KVM_MP_STATE_UNINITIALIZED) {
+        env->mp_state = KVM_MP_STATE_INIT_RECEIVED;
+    }
+}
+
 static int kvm_get_supported_msrs(KVMState *s)
 {
     static int kvm_supported_msrs;
@@ -2005,14 +2015,15 @@
         }
     }
 
-    if (!kvm_irqchip_in_kernel()) {
-        /* Force the VCPU out of its inner loop to process any INIT requests
-         * or pending TPR access reports. */
-        if (cpu->interrupt_request &
-            (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
-            cpu->exit_request = 1;
-        }
+    /* Force the VCPU out of its inner loop to process any INIT requests
+     * or (for userspace APIC, but it is cheap to combine the checks here)
+     * pending TPR access reports.
+     */
+    if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
+        cpu->exit_request = 1;
+    }
 
+    if (!kvm_irqchip_in_kernel()) {
         /* Try to inject an interrupt if the guest can accept it */
         if (run->ready_for_interrupt_injection &&
             (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
@@ -2092,6 +2103,11 @@
         }
     }
 
+    if (cs->interrupt_request & CPU_INTERRUPT_INIT) {
+        kvm_cpu_synchronize_state(cs);
+        do_cpu_init(cpu);
+    }
+
     if (kvm_irqchip_in_kernel()) {
         return 0;
     }
@@ -2105,10 +2121,6 @@
         (cs->interrupt_request & CPU_INTERRUPT_NMI)) {
         cs->halted = 0;
     }
-    if (cs->interrupt_request & CPU_INTERRUPT_INIT) {
-        kvm_cpu_synchronize_state(cs);
-        do_cpu_init(cpu);
-    }
     if (cs->interrupt_request & CPU_INTERRUPT_SIPI) {
         kvm_cpu_synchronize_state(cs);
         do_cpu_sipi(cpu);
diff --git a/target-i386/kvm_i386.h b/target-i386/kvm_i386.h
index 4392ab4..cac30fd 100644
--- a/target-i386/kvm_i386.h
+++ b/target-i386/kvm_i386.h
@@ -14,6 +14,8 @@
 #include "sysemu/kvm.h"
 
 bool kvm_allows_irq0_override(void);
+void kvm_arch_reset_vcpu(X86CPU *cs);
+void kvm_arch_do_init_vcpu(X86CPU *cs);
 
 int kvm_device_pci_assign(KVMState *s, PCIHostDeviceAddress *dev_addr,
                           uint32_t flags, uint32_t *dev_id);
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 8c3f92c..3cf862e 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -409,11 +409,7 @@
         for (i = 0; i < 6; i++) {
             load_seg_vm(env, i, new_segs[i]);
         }
-        /* in vm86, CPL is always 3 */
-        cpu_x86_set_cpl(env, 3);
     } else {
-        /* CPL is set the RPL of CS */
-        cpu_x86_set_cpl(env, new_segs[R_CS] & 3);
         /* first just selectors as the rest may trigger exceptions */
         for (i = 0; i < 6; i++) {
             cpu_x86_load_seg_cache(env, i, new_segs[i], 0, 0, 0);
@@ -739,6 +735,12 @@
         }
     }
 
+    /* interrupt gate clear IF mask */
+    if ((type & 1) == 0) {
+        env->eflags &= ~IF_MASK;
+    }
+    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+
     if (new_stack) {
         if (env->eflags & VM_MASK) {
             cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0);
@@ -757,14 +759,7 @@
                    get_seg_base(e1, e2),
                    get_seg_limit(e1, e2),
                    e2);
-    cpu_x86_set_cpl(env, dpl);
     env->eip = offset;
-
-    /* interrupt gate clear IF mask */
-    if ((type & 1) == 0) {
-        env->eflags &= ~IF_MASK;
-    }
-    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
 }
 
 #ifdef TARGET_X86_64
@@ -911,6 +906,12 @@
         PUSHQ(esp, error_code);
     }
 
+    /* interrupt gate clear IF mask */
+    if ((type & 1) == 0) {
+        env->eflags &= ~IF_MASK;
+    }
+    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+
     if (new_stack) {
         ss = 0 | dpl;
         cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, 0);
@@ -922,14 +923,7 @@
                    get_seg_base(e1, e2),
                    get_seg_limit(e1, e2),
                    e2);
-    cpu_x86_set_cpl(env, dpl);
     env->eip = offset;
-
-    /* interrupt gate clear IF mask */
-    if ((type & 1) == 0) {
-        env->eflags &= ~IF_MASK;
-    }
-    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
 }
 #endif
 
@@ -960,7 +954,8 @@
 
         code64 = env->hflags & HF_CS64_MASK;
 
-        cpu_x86_set_cpl(env, 0);
+        env->eflags &= ~env->fmask;
+        cpu_load_eflags(env, env->eflags, 0);
         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
                            0, 0xffffffff,
                                DESC_G_MASK | DESC_P_MASK |
@@ -972,8 +967,6 @@
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                                DESC_S_MASK |
                                DESC_W_MASK | DESC_A_MASK);
-        env->eflags &= ~env->fmask;
-        cpu_load_eflags(env, env->eflags, 0);
         if (code64) {
             env->eip = env->lstar;
         } else {
@@ -982,7 +975,7 @@
     } else {
         env->regs[R_ECX] = (uint32_t)(env->eip + next_eip_addend);
 
-        cpu_x86_set_cpl(env, 0);
+        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
                            0, 0xffffffff,
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
@@ -993,7 +986,6 @@
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                                DESC_S_MASK |
                                DESC_W_MASK | DESC_A_MASK);
-        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
         env->eip = (uint32_t)env->star;
     }
 }
@@ -1014,6 +1006,9 @@
     }
     selector = (env->star >> 48) & 0xffff;
     if (env->hflags & HF_LMA_MASK) {
+        cpu_load_eflags(env, (uint32_t)(env->regs[11]), TF_MASK | AC_MASK
+                        | ID_MASK | IF_MASK | IOPL_MASK | VM_MASK | RF_MASK |
+                        NT_MASK);
         if (dflag == 2) {
             cpu_x86_load_seg_cache(env, R_CS, (selector + 16) | 3,
                                    0, 0xffffffff,
@@ -1035,11 +1030,8 @@
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                                DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
                                DESC_W_MASK | DESC_A_MASK);
-        cpu_load_eflags(env, (uint32_t)(env->regs[11]), TF_MASK | AC_MASK
-                        | ID_MASK | IF_MASK | IOPL_MASK | VM_MASK | RF_MASK |
-                        NT_MASK);
-        cpu_x86_set_cpl(env, 3);
     } else {
+        env->eflags |= IF_MASK;
         cpu_x86_load_seg_cache(env, R_CS, selector | 3,
                                0, 0xffffffff,
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
@@ -1051,8 +1043,6 @@
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                                DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
                                DESC_W_MASK | DESC_A_MASK);
-        env->eflags |= IF_MASK;
-        cpu_x86_set_cpl(env, 3);
     }
 }
 #endif
@@ -1905,7 +1895,6 @@
                        get_seg_base(e1, e2),
                        get_seg_limit(e1, e2),
                        e2);
-        cpu_x86_set_cpl(env, dpl);
         SET_ESP(sp, sp_mask);
         env->eip = offset;
     }
@@ -2134,7 +2123,6 @@
                        get_seg_base(e1, e2),
                        get_seg_limit(e1, e2),
                        e2);
-        cpu_x86_set_cpl(env, rpl);
         sp = new_esp;
 #ifdef TARGET_X86_64
         if (env->hflags & HF_CS64_MASK) {
@@ -2185,7 +2173,6 @@
                     IF_MASK | IOPL_MASK | VM_MASK | NT_MASK | VIF_MASK |
                     VIP_MASK);
     load_seg_vm(env, R_CS, new_cs & 0xffff);
-    cpu_x86_set_cpl(env, 3);
     load_seg_vm(env, R_SS, new_ss & 0xffff);
     load_seg_vm(env, R_ES, new_es & 0xffff);
     load_seg_vm(env, R_DS, new_ds & 0xffff);
@@ -2238,7 +2225,6 @@
         raise_exception_err(env, EXCP0D_GPF, 0);
     }
     env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
-    cpu_x86_set_cpl(env, 0);
 
 #ifdef TARGET_X86_64
     if (env->hflags & HF_LMA_MASK) {
@@ -2274,7 +2260,6 @@
     if (env->sysenter_cs == 0 || cpl != 0) {
         raise_exception_err(env, EXCP0D_GPF, 0);
     }
-    cpu_x86_set_cpl(env, 3);
 #ifdef TARGET_X86_64
     if (dflag == 2) {
         cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 32) & 0xfffc) |
diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c
index 35901c9..4841d53 100644
--- a/target-i386/smm_helper.c
+++ b/target-i386/smm_helper.c
@@ -163,6 +163,13 @@
     cpu_load_eflags(env, 0, ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C |
                               DF_MASK));
     env->eip = 0x00008000;
+    cpu_x86_update_cr0(env,
+                       env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK |
+                                      CR0_PG_MASK));
+    cpu_x86_update_cr4(env, 0);
+    env->dr[7] = 0x00000400;
+    CC_OP = CC_OP_EFLAGS;
+
     cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
                            0xffffffff, 0);
     cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0);
@@ -170,13 +177,6 @@
     cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0);
     cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0);
     cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0);
-
-    cpu_x86_update_cr0(env,
-                       env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK |
-                                      CR0_PG_MASK));
-    cpu_x86_update_cr4(env, 0);
-    env->dr[7] = 0x00000400;
-    CC_OP = CC_OP_EFLAGS;
 }
 
 void helper_rsm(CPUX86State *env)
@@ -191,16 +191,6 @@
 #ifdef TARGET_X86_64
     cpu_load_efer(env, ldq_phys(cs->as, sm_state + 0x7ed0));
 
-    for (i = 0; i < 6; i++) {
-        offset = 0x7e00 + i * 16;
-        cpu_x86_load_seg_cache(env, i,
-                               lduw_phys(cs->as, sm_state + offset),
-                               ldq_phys(cs->as, sm_state + offset + 8),
-                               ldl_phys(cs->as, sm_state + offset + 4),
-                               (lduw_phys(cs->as, sm_state + offset + 2) &
-                                0xf0ff) << 8);
-    }
-
     env->gdt.base = ldq_phys(cs->as, sm_state + 0x7e68);
     env->gdt.limit = ldl_phys(cs->as, sm_state + 0x7e64);
 
@@ -238,6 +228,16 @@
     cpu_x86_update_cr3(env, ldl_phys(cs->as, sm_state + 0x7f50));
     cpu_x86_update_cr0(env, ldl_phys(cs->as, sm_state + 0x7f58));
 
+    for (i = 0; i < 6; i++) {
+        offset = 0x7e00 + i * 16;
+        cpu_x86_load_seg_cache(env, i,
+                               lduw_phys(cs->as, sm_state + offset),
+                               ldq_phys(cs->as, sm_state + offset + 8),
+                               ldl_phys(cs->as, sm_state + offset + 4),
+                               (lduw_phys(cs->as, sm_state + offset + 2) &
+                                0xf0ff) << 8);
+    }
+
     val = ldl_phys(cs->as, sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
         env->smbase = ldl_phys(cs->as, sm_state + 0x7f00) & ~0x7fff;
diff --git a/target-i386/svm_helper.c b/target-i386/svm_helper.c
index aa17ecd..846eaa5 100644
--- a/target-i386/svm_helper.c
+++ b/target-i386/svm_helper.c
@@ -282,9 +282,6 @@
                           env->vm_vmcb + offsetof(struct vmcb, save.dr7));
     env->dr[6] = ldq_phys(cs->as,
                           env->vm_vmcb + offsetof(struct vmcb, save.dr6));
-    cpu_x86_set_cpl(env, ldub_phys(cs->as,
-                                   env->vm_vmcb + offsetof(struct vmcb,
-                                                           save.cpl)));
 
     /* FIXME: guest state consistency checks */
 
@@ -703,7 +700,8 @@
     cpu_load_eflags(env, ldq_phys(cs->as,
                                   env->vm_hsave + offsetof(struct vmcb,
                                                            save.rflags)),
-                    ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+                    ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK |
+                      VM_MASK));
     CC_OP = CC_OP_EFLAGS;
 
     svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.es),
@@ -728,7 +726,6 @@
                           env->vm_hsave + offsetof(struct vmcb, save.dr7));
 
     /* other setups */
-    cpu_x86_set_cpl(env, 0);
     stq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, control.exit_code),
              exit_code);
     stq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1),
@@ -756,10 +753,6 @@
        from the page table indicated the host's CR3. If the PDPEs contain
        illegal state, the processor causes a shutdown. */
 
-    /* Forces CR0.PE = 1, RFLAGS.VM = 0. */
-    env->cr[0] |= CR0_PE_MASK;
-    env->eflags &= ~VM_MASK;
-
     /* Disables all breakpoints in the host DR7 register. */
 
     /* Checks the reloaded host state for consistency. */
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 02625e3..032b0fd 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -6708,41 +6708,63 @@
         }
     bt_op:
         tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
+        tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
         switch(op) {
         case 0:
-            tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]);
-            tcg_gen_movi_tl(cpu_cc_dst, 0);
             break;
         case 1:
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
             tcg_gen_movi_tl(cpu_tmp0, 1);
             tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
             tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
             break;
         case 2:
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
             tcg_gen_movi_tl(cpu_tmp0, 1);
             tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
-            tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
-            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+            tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
             break;
         default:
         case 3:
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
             tcg_gen_movi_tl(cpu_tmp0, 1);
             tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
             tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
             break;
         }
-        set_cc_op(s, CC_OP_SARB + ot);
         if (op != 0) {
             if (mod != 3) {
                 gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
             } else {
                 gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             }
+        }
+
+        /* Delay all CC updates until after the store above.  Note that
+           C is the result of the test, Z is unchanged, and the others
+           are all undefined.  */
+        switch (s->cc_op) {
+        case CC_OP_MULB ... CC_OP_MULQ:
+        case CC_OP_ADDB ... CC_OP_ADDQ:
+        case CC_OP_ADCB ... CC_OP_ADCQ:
+        case CC_OP_SUBB ... CC_OP_SUBQ:
+        case CC_OP_SBBB ... CC_OP_SBBQ:
+        case CC_OP_LOGICB ... CC_OP_LOGICQ:
+        case CC_OP_INCB ... CC_OP_INCQ:
+        case CC_OP_DECB ... CC_OP_DECQ:
+        case CC_OP_SHLB ... CC_OP_SHLQ:
+        case CC_OP_SARB ... CC_OP_SARQ:
+        case CC_OP_BMILGB ... CC_OP_BMILGQ:
+            /* Z was going to be computed from the non-zero status of CC_DST.
+               We can get that same Z value (and the new C value) by leaving
+               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
+               same width.  */
             tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
-            tcg_gen_movi_tl(cpu_cc_dst, 0);
+            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
+            break;
+        default:
+            /* Otherwise, generate EFLAGS and replace the C bit.  */
+            gen_compute_eflags(s);
+            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
+                               ctz32(CC_C), 1);
+            break;
         }
         break;
     case 0x1bc: /* bsf / tzcnt */
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index d498340..75ed5fa 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -2042,9 +2042,6 @@
     PPC_INTERRUPT_PERFM,          /* Performance monitor interrupt        */
 };
 
-/* CPU should be reset next, restart from scratch afterwards */
-#define CPU_INTERRUPT_RESET       CPU_INTERRUPT_TGT_INT_0
-
 /*****************************************************************************/
 
 static inline target_ulong cpu_read_xer(CPUPPCState *env)
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 4b81e5f..8ff1777 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -430,10 +430,6 @@
     return ret;
 }
 
-void kvm_arch_reset_vcpu(CPUState *cpu)
-{
-}
-
 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 {
     CPUPPCState *env = &cpu->env;
diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c
index dfd83e8..c3082b7 100644
--- a/target-s390x/cpu.c
+++ b/target-s390x/cpu.c
@@ -152,6 +152,10 @@
      * after incrementing the cpu counter */
 #if !defined(CONFIG_USER_ONLY)
     s->halted = 1;
+
+    if (kvm_enabled()) {
+        kvm_s390_reset_vcpu(cpu);
+    }
 #endif
     tlb_flush(s, 1);
 }
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index aad277a..06454d6 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -359,11 +359,16 @@
 int s390_virtio_hypercall(CPUS390XState *env);
 
 #ifdef CONFIG_KVM
+void kvm_s390_reset_vcpu(S390CPU *cpu);
 void kvm_s390_interrupt(S390CPU *cpu, int type, uint32_t code);
 void kvm_s390_virtio_irq(S390CPU *cpu, int config_change, uint64_t token);
 void kvm_s390_interrupt_internal(S390CPU *cpu, int type, uint32_t parm,
                                  uint64_t parm64, int vm);
 #else
+static inline void kvm_s390_reset_vcpu(S390CPU *cpu)
+{
+}
+
 static inline void kvm_s390_interrupt(S390CPU *cpu, int type, uint32_t code)
 {
 }
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index b7b0edc..56179af 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -117,47 +117,20 @@
     return 0;
 }
 
-void kvm_arch_reset_vcpu(CPUState *cpu)
+void kvm_s390_reset_vcpu(S390CPU *cpu)
 {
+    CPUState *cs = CPU(cpu);
+
     /* The initial reset call is needed here to reset in-kernel
      * vcpu data that we can't access directly from QEMU
      * (i.e. with older kernels which don't support sync_regs/ONE_REG).
      * Before this ioctl cpu_synchronize_state() is called in common kvm
      * code (kvm-all) */
-    if (kvm_vcpu_ioctl(cpu, KVM_S390_INITIAL_RESET, NULL)) {
+    if (kvm_vcpu_ioctl(cs, KVM_S390_INITIAL_RESET, NULL)) {
         perror("Can't reset vcpu\n");
     }
 }
 
-static int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source)
-{
-    struct kvm_one_reg reg;
-    int r;
-
-    reg.id = id;
-    reg.addr = (uint64_t) source;
-    r = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
-    if (r) {
-        trace_kvm_failed_reg_set(id, strerror(errno));
-    }
-    return r;
-}
-
-static int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target)
-{
-    struct kvm_one_reg reg;
-    int r;
-
-    reg.id = id;
-    reg.addr = (uint64_t) target;
-    r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
-    if (r) {
-        trace_kvm_failed_reg_get(id, strerror(errno));
-    }
-    return r;
-}
-
-
 int kvm_arch_put_registers(CPUState *cs, int level)
 {
     S390CPU *cpu = S390_CPU(cs);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 0670aff..ea8aa70 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2096,12 +2096,15 @@
 {
     TCGRegSet allocated_regs;
     TCGTemp *ts, *ots;
-    TCGType type;
+    TCGType otype, itype;
 
     tcg_regset_set(allocated_regs, s->reserved_regs);
     ots = &s->temps[args[0]];
     ts = &s->temps[args[1]];
-    type = ots->type;
+
+    /* Note that otype != itype for no-op truncation.  */
+    otype = ots->type;
+    itype = ts->type;
 
     /* If the source value is not in a register, and we're going to be
        forced to have it in a register in order to perform the copy,
@@ -2109,13 +2112,13 @@
        we don't have to reload SOURCE the next time it is used. */
     if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
         || ts->val_type == TEMP_VAL_MEM) {
-        ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[type],
+        ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[itype],
                                 allocated_regs);
         if (ts->val_type == TEMP_VAL_MEM) {
-            tcg_out_ld(s, type, ts->reg, ts->mem_reg, ts->mem_offset);
+            tcg_out_ld(s, itype, ts->reg, ts->mem_reg, ts->mem_offset);
             ts->mem_coherent = 1;
         } else if (ts->val_type == TEMP_VAL_CONST) {
-            tcg_out_movi(s, type, ts->reg, ts->val);
+            tcg_out_movi(s, itype, ts->reg, ts->val);
         }
         s->reg_to_temp[ts->reg] = args[1];
         ts->val_type = TEMP_VAL_REG;
@@ -2130,7 +2133,7 @@
         if (!ots->mem_allocated) {
             temp_allocate_frame(s, args[0]);
         }
-        tcg_out_st(s, type, ts->reg, ots->mem_reg, ots->mem_offset);
+        tcg_out_st(s, otype, ts->reg, ots->mem_reg, ots->mem_offset);
         if (IS_DEAD_ARG(1)) {
             temp_dead(s, args[1]);
         }
@@ -2158,10 +2161,10 @@
                 /* When allocating a new register, make sure to not spill the
                    input one. */
                 tcg_regset_set_reg(allocated_regs, ts->reg);
-                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[type],
+                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
                                          allocated_regs);
             }
-            tcg_out_mov(s, type, ots->reg, ts->reg);
+            tcg_out_mov(s, otype, ots->reg, ts->reg);
         }
         ots->val_type = TEMP_VAL_REG;
         ots->mem_coherent = 0;
diff --git a/tests/qemu-iotests/031 b/tests/qemu-iotests/031
index 5aefb88..1d920ea 100755
--- a/tests/qemu-iotests/031
+++ b/tests/qemu-iotests/031
@@ -35,7 +35,6 @@
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
 # get standard environment, filters and checks
-. ./common.env
 . ./common.rc
 . ./common.filter
 . ./common.pattern
@@ -57,22 +56,22 @@
     echo === Create image with unknown header extension ===
     echo
     _make_test_img 64M
-    $PYTHON qcow2.py "$TEST_IMG" add-header-ext 0x12345678 "This is a test header extension"
-    $PYTHON qcow2.py "$TEST_IMG" dump-header
+    ./qcow2.py "$TEST_IMG" add-header-ext 0x12345678 "This is a test header extension"
+    ./qcow2.py "$TEST_IMG" dump-header
     _check_test_img
 
     echo
     echo === Rewrite header with no backing file ===
     echo
     $QEMU_IMG rebase -u -b "" "$TEST_IMG"
-    $PYTHON qcow2.py "$TEST_IMG" dump-header
+    ./qcow2.py "$TEST_IMG" dump-header
     _check_test_img
 
     echo
     echo === Add a backing file and format ===
     echo
     $QEMU_IMG rebase -u -b "/some/backing/file/path" -F host_device "$TEST_IMG"
-    $PYTHON qcow2.py "$TEST_IMG" dump-header
+    ./qcow2.py "$TEST_IMG" dump-header
 done
 
 # success, all done
diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036
index 29c35d1..03b6aa9 100755
--- a/tests/qemu-iotests/036
+++ b/tests/qemu-iotests/036
@@ -38,7 +38,6 @@
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
 # get standard environment, filters and checks
-. ./common.env
 . ./common.rc
 . ./common.filter
 . ./common.pattern
@@ -54,15 +53,15 @@
 echo === Create image with unknown autoclear feature bit ===
 echo
 _make_test_img 64M
-$PYTHON qcow2.py "$TEST_IMG" set-feature-bit autoclear 63
-$PYTHON qcow2.py "$TEST_IMG" dump-header
+./qcow2.py "$TEST_IMG" set-feature-bit autoclear 63
+./qcow2.py "$TEST_IMG" dump-header
 
 echo
 echo === Repair image ===
 echo
 _check_test_img -r all
 
-$PYTHON qcow2.py "$TEST_IMG" dump-header
+./qcow2.py "$TEST_IMG" dump-header
 
 # success, all done
 echo "*** done"
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index b7b7030..b9cbe99 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -38,7 +38,6 @@
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
 # get standard environment, filters and checks
-. ./common.env
 . ./common.rc
 . ./common.filter
 
@@ -59,7 +58,7 @@
 $QEMU_IO -c "write -P 0x5a 0 512" "$TEST_IMG" | _filter_qemu_io
 
 # The dirty bit must not be set
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 _check_test_img
 
 echo
@@ -74,7 +73,7 @@
 ulimit -c "$old_ulimit"
 
 # The dirty bit must be set
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 _check_test_img
 
 echo
@@ -83,7 +82,7 @@
 $QEMU_IO -r -c "read -P 0x5a 0 512" "$TEST_IMG" | _filter_qemu_io
 
 # The dirty bit must be set
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 echo
 echo "== Repairing the image file must succeed =="
@@ -91,7 +90,7 @@
 _check_test_img -r all
 
 # The dirty bit must not be set
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 echo
 echo "== Data should still be accessible after repair =="
@@ -110,12 +109,12 @@
 ulimit -c "$old_ulimit"
 
 # The dirty bit must be set
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 $QEMU_IO -c "write 0 512" "$TEST_IMG" | _filter_qemu_io
 
 # The dirty bit must not be set
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 echo
 echo "== Creating an image file with lazy_refcounts=off =="
@@ -129,7 +128,7 @@
 ulimit -c "$old_ulimit"
 
 # The dirty bit must not be set since lazy_refcounts=off
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 _check_test_img
 
 echo
@@ -145,8 +144,8 @@
 $QEMU_IMG commit "$TEST_IMG"
 
 # The dirty bit must not be set
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
-$PYTHON qcow2.py "$TEST_IMG".base dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG".base dump-header | grep incompatible_features
 
 _check_test_img
 TEST_IMG="$TEST_IMG".base _check_test_img
diff --git a/tests/qemu-iotests/054 b/tests/qemu-iotests/054
index a5ebf99..c8b7082 100755
--- a/tests/qemu-iotests/054
+++ b/tests/qemu-iotests/054
@@ -35,7 +35,6 @@
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
 # get standard environment, filters and checks
-. ./common.env
 . ./common.rc
 . ./common.filter
 
@@ -50,7 +49,7 @@
 echo
 echo "creating too large image (1 EB) using qcow2.py"
 _make_test_img 4G
-$PYTHON qcow2.py "$TEST_IMG" set-header size $((1024 ** 6))
+./qcow2.py "$TEST_IMG" set-header size $((1024 ** 6))
 _check_test_img
 
 # success, all done
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 5447b27..f0116aa 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -35,7 +35,6 @@
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
 # get standard environment, filters and checks
-. ./common.env
 . ./common.rc
 . ./common.filter
 
@@ -69,13 +68,13 @@
 _check_test_img
 
 # The corrupt bit should not be set anyway
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 # Try to write something, thereby forcing the corrupt bit to be set
 $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
 
 # The corrupt bit must now be set
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 # Try to open the image R/W (which should fail)
 $QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
@@ -100,19 +99,19 @@
 # Redirect new data cluster onto refcount block
 poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
 _check_test_img
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 # Try to fix it
 _check_test_img -r all
 
 # The corrupt bit should be cleared
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 # Look if it's really really fixed
 $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 echo
 echo "=== Testing cluster data reference into inactive L2 table ==="
@@ -125,13 +124,13 @@
 poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
                       "\x80\x00\x00\x00\x00\x04\x00\x00"
 _check_test_img
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 $QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 _check_test_img -r all
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 $QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
-$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 # Check data
 $QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index 0de7897..d3a6b38 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -35,7 +35,6 @@
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
 # get standard environment, filters and checks
-. ./common.env
 . ./common.rc
 . ./common.filter
 
@@ -49,9 +48,9 @@
 echo
 IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
 $QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io
-$PYTHON qcow2.py "$TEST_IMG" dump-header
+./qcow2.py "$TEST_IMG" dump-header
 $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
-$PYTHON qcow2.py "$TEST_IMG" dump-header
+./qcow2.py "$TEST_IMG" dump-header
 $QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io
 _check_test_img
 
@@ -60,9 +59,9 @@
 echo
 IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
 $QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" | _filter_qemu_io
-$PYTHON qcow2.py "$TEST_IMG" dump-header
+./qcow2.py "$TEST_IMG" dump-header
 $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
-$PYTHON qcow2.py "$TEST_IMG" dump-header
+./qcow2.py "$TEST_IMG" dump-header
 $QEMU_IO -c "read -P 0x2a 0 128k" "$TEST_IMG" | _filter_qemu_io
 _check_test_img
 
@@ -70,11 +69,11 @@
 echo "=== Testing version downgrade with unknown compat/autoclear flags ==="
 echo
 IMGOPTS="compat=1.1" _make_test_img 64M
-$PYTHON qcow2.py "$TEST_IMG" set-feature-bit compatible 42
-$PYTHON qcow2.py "$TEST_IMG" set-feature-bit autoclear 42
-$PYTHON qcow2.py "$TEST_IMG" dump-header
+./qcow2.py "$TEST_IMG" set-feature-bit compatible 42
+./qcow2.py "$TEST_IMG" set-feature-bit autoclear 42
+./qcow2.py "$TEST_IMG" dump-header
 $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
-$PYTHON qcow2.py "$TEST_IMG" dump-header
+./qcow2.py "$TEST_IMG" dump-header
 _check_test_img
 
 echo
@@ -82,9 +81,9 @@
 echo
 IMGOPTS="compat=0.10" _make_test_img 64M
 $QEMU_IO -c "write -P 0x2a 42M 64k" "$TEST_IMG" | _filter_qemu_io
-$PYTHON qcow2.py "$TEST_IMG" dump-header
+./qcow2.py "$TEST_IMG" dump-header
 $QEMU_IMG amend -o "compat=1.1,lazy_refcounts=on,size=128M" "$TEST_IMG"
-$PYTHON qcow2.py "$TEST_IMG" dump-header
+./qcow2.py "$TEST_IMG" dump-header
 $QEMU_IO -c "read -P 0x2a 42M 64k" "$TEST_IMG" | _filter_qemu_io
 _check_test_img
 
@@ -93,9 +92,9 @@
 echo
 IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
 $QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" | _filter_qemu_io
-$PYTHON qcow2.py "$TEST_IMG" dump-header
+./qcow2.py "$TEST_IMG" dump-header
 $QEMU_IMG amend -o "lazy_refcounts=off" "$TEST_IMG"
-$PYTHON qcow2.py "$TEST_IMG" dump-header
+./qcow2.py "$TEST_IMG" dump-header
 $QEMU_IO -c "read -P 0x2a 0 128k" "$TEST_IMG" | _filter_qemu_io
 _check_test_img
 
diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065
index e89b61d..ab5445f 100755
--- a/tests/qemu-iotests/065
+++ b/tests/qemu-iotests/065
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
 #
 # Test for additional information emitted by qemu-img info on qcow2
 # images
diff --git a/tests/qemu-iotests/083 b/tests/qemu-iotests/083
index 6a52c96..f764534 100755
--- a/tests/qemu-iotests/083
+++ b/tests/qemu-iotests/083
@@ -29,7 +29,6 @@
 status=1	# failure is the default!
 
 # get standard environment, filters and checks
-. ./common.env
 . ./common.rc
 . ./common.filter
 
@@ -82,7 +81,7 @@
 		nbd_url="nbd:127.0.0.1:$port:exportname=foo"
 	fi
 
-	$PYTHON nbd-fault-injector.py $extra_args "127.0.0.1:$port" "$TEST_DIR/nbd-fault-injector.conf" 2>&1 >/dev/null &
+	./nbd-fault-injector.py $extra_args "127.0.0.1:$port" "$TEST_DIR/nbd-fault-injector.conf" 2>&1 >/dev/null &
 	wait_for_tcp_port "127.0.0.1:$port"
 	$QEMU_IO -c "read 0 512" "$nbd_url" 2>&1 | _filter_qemu_io | filter_nbd
 
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index ca2ee43..e2ed5a9 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -34,13 +34,6 @@
 # generic initialization
 iam=check
 
-# we need common.env
-if ! . ./common.env
-then
-    echo "$iam: failed to source common.env"
-    exit 1
-fi
-
 # we need common.config
 if ! . ./common.config
 then
@@ -222,16 +215,9 @@
 
         start=`_wallclock`
         $timestamp && echo -n "        ["`date "+%T"`"]"
-
-        if [ "$(head -n 1 $seq)" == "#!/usr/bin/env python" ]; then
-            run_command="$PYTHON $seq"
-        else
-            [ ! -x $seq ] && chmod u+x $seq # ensure we can run it
-            run_command="./$seq"
-        fi
-
+        [ ! -x $seq ] && chmod u+x $seq # ensure we can run it
         MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(($RANDOM % 255 + 1))} \
-                $run_command >$tmp.out 2>&1
+                ./$seq >$tmp.out 2>&1
         sts=$?
         $timestamp && _timestamp
         stop=`_wallclock`
diff --git a/trace-events b/trace-events
index af4449d..2c5b307 100644
--- a/trace-events
+++ b/trace-events
@@ -1230,6 +1230,8 @@
 kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
 kvm_failed_spr_set(int str, const char *msg) "Warning: Unable to set SPR %d to KVM: %s"
 kvm_failed_spr_get(int str, const char *msg) "Warning: Unable to retrieve SPR %d from KVM: %s"
+kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
+kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
 
 # memory.c
 memory_region_ops_read(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
@@ -1246,7 +1248,3 @@
 # hw/pci/pci_host.c
 pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x"
 pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x"
-
-# target-s390/kvm.c
-kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
-kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
diff --git a/util/fifo8.c b/util/fifo8.c
index 6a43482..0ea5ad9 100644
--- a/util/fifo8.c
+++ b/util/fifo8.c
@@ -116,8 +116,7 @@
     .name = "Fifo8",
     .version_id = 1,
     .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
+    .fields = (VMStateField[]) {
         VMSTATE_VBUFFER_UINT32(data, Fifo8, 1, NULL, 0, capacity),
         VMSTATE_UINT32(head, Fifo8),
         VMSTATE_UINT32(num, Fifo8),