Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20141024' into staging

target-arm queue:
 * remove pointless 'info pcmcia' and a lot of now-dead code
 * register ARM cpu reset handlers even if not using -kernel
 * update to libvixl 1.6
 * various minor code cleanups
 * support PSCI under TCG ('virt' machine can now be shut down,
   SMP configurations work)
 * correct the sense of the AArch64 DCZID DZP bit
 * report a valid L1Ip field in CTR_EL0 for CPU type "any"
 * correctly UNDEF writes to FPINST/FPINST2 from EL0
 * more preparatory code refactoring for EL2/EL3 support

# gpg: Signature made Fri 24 Oct 2014 12:35:52 BST using RSA key ID 14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"

* remotes/pmaydell/tags/pull-target-arm-20141024: (23 commits)
  target-arm: A32: Emulate the SMC instruction
  target-arm: make arm_current_el() return EL3
  target-arm: rename arm_current_pl to arm_current_el
  target-arm: reject switching to monitor mode
  target-arm: add arm_is_secure() function
  target-arm: increase arrays of registers R13 & R14
  target-arm: correctly UNDEF writes to FPINST/FPINST2 from EL0
  target-arm: Report a valid L1Ip field in CTR_EL0 for CPU type "any"
  target-arm: Correct sense of the DCZID DZP bit
  arm/virt: enable PSCI emulation support for system emulation
  target-arm: add emulation of PSCI calls for system emulation
  target-arm: Add support for A32 and T32 HVC and SMC insns
  target-arm: Handle SMC/HVC undef-if-no-ELx in pre_* helpers
  target-arm: add missing PSCI constants needed for PSCI emulation
  target-arm: do not set do_interrupt handlers for ARM and AArch64 user modes
  target-arm: add powered off cpu state
  omap_gpmc.c: Remove duplicate assignment
  disas/libvixl/a64/instructions-a64.h: Remove unused constants
  arm_gic: remove unused parameter.
  disas/libvixl: Update to libvixl 1.6
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/disas/arm-a64.cc b/disas/arm-a64.cc
index 162be0c..ca29f6f 100644
--- a/disas/arm-a64.cc
+++ b/disas/arm-a64.cc
@@ -39,7 +39,7 @@
     ~QEMUDisassembler() { }
 
 protected:
-    void ProcessOutput(Instruction *instr) {
+    virtual void ProcessOutput(const Instruction *instr) {
         fprintf(stream_, "%08" PRIx32 "      %s",
                 instr->InstructionBits(), GetOutput());
     }
diff --git a/disas/libvixl/README b/disas/libvixl/README
index 8301996..cba31b4 100644
--- a/disas/libvixl/README
+++ b/disas/libvixl/README
@@ -2,7 +2,7 @@
 The code in this directory is a subset of libvixl:
  https://github.com/armvixl/vixl
 (specifically, it is the set of files needed for disassembly only,
-taken from libvixl 1.5).
+taken from libvixl 1.6).
 Bugfixes should preferably be sent upstream initially.
 
 The disassembler does not currently support the entire A64 instruction
diff --git a/disas/libvixl/a64/assembler-a64.h b/disas/libvixl/a64/assembler-a64.h
index cc0b758..16a704b 100644
--- a/disas/libvixl/a64/assembler-a64.h
+++ b/disas/libvixl/a64/assembler-a64.h
@@ -32,6 +32,7 @@
 
 #include "globals.h"
 #include "utils.h"
+#include "code-buffer.h"
 #include "a64/instructions-a64.h"
 
 namespace vixl {
@@ -168,6 +169,11 @@
     return type_ == kFPRegister;
   }
 
+  bool IsW() const { return IsValidRegister() && Is32Bits(); }
+  bool IsX() const { return IsValidRegister() && Is64Bits(); }
+  bool IsS() const { return IsValidFPRegister() && Is32Bits(); }
+  bool IsD() const { return IsValidFPRegister() && Is64Bits(); }
+
   const Register& W() const;
   const Register& X() const;
   const FPRegister& S() const;
@@ -191,12 +197,12 @@
 
 class Register : public CPURegister {
  public:
-  explicit Register() : CPURegister() {}
+  Register() : CPURegister() {}
   inline explicit Register(const CPURegister& other)
       : CPURegister(other.code(), other.size(), other.type()) {
     VIXL_ASSERT(IsValidRegister());
   }
-  explicit Register(unsigned code, unsigned size)
+  Register(unsigned code, unsigned size)
       : CPURegister(code, size, kRegister) {}
 
   bool IsValid() const {
@@ -536,7 +542,7 @@
 class MemOperand {
  public:
   explicit MemOperand(Register base,
-                      ptrdiff_t offset = 0,
+                      int64_t offset = 0,
                       AddrMode addrmode = Offset);
   explicit MemOperand(Register base,
                       Register regoffset,
@@ -552,7 +558,7 @@
 
   const Register& base() const { return base_; }
   const Register& regoffset() const { return regoffset_; }
-  ptrdiff_t offset() const { return offset_; }
+  int64_t offset() const { return offset_; }
   AddrMode addrmode() const { return addrmode_; }
   Shift shift() const { return shift_; }
   Extend extend() const { return extend_; }
@@ -565,7 +571,7 @@
  private:
   Register base_;
   Register regoffset_;
-  ptrdiff_t offset_;
+  int64_t offset_;
   AddrMode addrmode_;
   Shift shift_;
   Extend extend_;
@@ -680,32 +686,80 @@
 };
 
 
-// TODO: Obtain better values for these, based on real-world data.
-const int kLiteralPoolCheckInterval = 4 * KBytes;
-const int kRecommendedLiteralPoolRange = 2 * kLiteralPoolCheckInterval;
+// A literal is a 32-bit or 64-bit piece of data stored in the instruction
+// stream and loaded through a pc relative load. The same literal can be
+// referred to by multiple instructions but a literal can only reside at one
+// place in memory. A literal can be used by a load before or after being
+// placed in memory.
+//
+// Internally an offset of 0 is associated with a literal which has been
+// neither used nor placed. Then two possibilities arise:
+//  1) the label is placed, the offset (stored as offset + 1) is used to
+//     resolve any subsequent load using the label.
+//  2) the label is not placed and offset is the offset of the last load using
+//     the literal (stored as -offset -1). If multiple loads refer to this
+//     literal then the last load holds the offset of the preceding load and
+//     all loads form a chain. Once the offset is placed all the loads in the
+//     chain are resolved and future loads fall back to possibility 1.
+class RawLiteral {
+ public:
+  RawLiteral() : size_(0), offset_(0), raw_value_(0) {}
 
+  size_t size() {
+    VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
+    VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
+    VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes));
+    return size_;
+  }
+  uint64_t raw_value64() {
+    VIXL_ASSERT(size_ == kXRegSizeInBytes);
+    return raw_value_;
+  }
+  uint32_t raw_value32() {
+    VIXL_ASSERT(size_ == kWRegSizeInBytes);
+    VIXL_ASSERT(is_uint32(raw_value_) || is_int32(raw_value_));
+    return static_cast<uint32_t>(raw_value_);
+  }
+  bool IsUsed() { return offset_ < 0; }
+  bool IsPlaced() { return offset_ > 0; }
 
-// Control whether a branch over the literal pool should also be emitted. This
-// is needed if the literal pool has to be emitted in the middle of the JITted
-// code.
-enum LiteralPoolEmitOption {
-  JumpRequired,
-  NoJumpRequired
+ protected:
+  ptrdiff_t offset() {
+    VIXL_ASSERT(IsPlaced());
+    return offset_ - 1;
+  }
+  void set_offset(ptrdiff_t offset) {
+    VIXL_ASSERT(offset >= 0);
+    VIXL_ASSERT(IsWordAligned(offset));
+    VIXL_ASSERT(!IsPlaced());
+    offset_ = offset + 1;
+  }
+  ptrdiff_t last_use() {
+    VIXL_ASSERT(IsUsed());
+    return -offset_ - 1;
+  }
+  void set_last_use(ptrdiff_t offset) {
+    VIXL_ASSERT(offset >= 0);
+    VIXL_ASSERT(IsWordAligned(offset));
+    VIXL_ASSERT(!IsPlaced());
+    offset_ = -offset - 1;
+  }
+
+  size_t size_;
+  ptrdiff_t offset_;
+  uint64_t raw_value_;
+
+  friend class Assembler;
 };
 
 
-// Literal pool entry.
-class Literal {
+template <typename T>
+class Literal : public RawLiteral {
  public:
-  Literal(Instruction* pc, uint64_t imm, unsigned size)
-      : pc_(pc), value_(imm), size_(size) {}
-
- private:
-  Instruction* pc_;
-  int64_t value_;
-  unsigned size_;
-
-  friend class Assembler;
+  explicit Literal(T value) {
+    size_ = sizeof(value);
+    memcpy(&raw_value_, &value, sizeof(value));
+  }
 };
 
 
@@ -750,7 +804,9 @@
 // Assembler.
 class Assembler {
  public:
-  Assembler(byte* buffer, unsigned buffer_size,
+  Assembler(size_t capacity,
+            PositionIndependentCodeOption pic = PositionIndependentCode);
+  Assembler(byte* buffer, size_t capacity,
             PositionIndependentCodeOption pic = PositionIndependentCode);
 
   // The destructor asserts that one of the following is true:
@@ -763,9 +819,6 @@
 
   // Start generating code from the beginning of the buffer, discarding any code
   // and data that has already been emitted into the buffer.
-  //
-  // In order to avoid any accidental transfer of state, Reset ASSERTs that the
-  // constant pool is not blocked.
   void Reset();
 
   // Finalize a code buffer of generated instructions. This function must be
@@ -776,13 +829,47 @@
   // Bind a label to the current PC.
   void bind(Label* label);
 
+  // Bind a label to a specified offset from the start of the buffer.
+  void BindToOffset(Label* label, ptrdiff_t offset);
+
+  // Place a literal at the current PC.
+  void place(RawLiteral* literal);
+
+  ptrdiff_t CursorOffset() const {
+    return buffer_->CursorOffset();
+  }
+
+  ptrdiff_t BufferEndOffset() const {
+    return static_cast<ptrdiff_t>(buffer_->capacity());
+  }
+
+  // Return the address of an offset in the buffer.
+  template <typename T>
+  inline T GetOffsetAddress(ptrdiff_t offset) {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    return buffer_->GetOffsetAddress<T>(offset);
+  }
+
   // Return the address of a bound label.
   template <typename T>
   inline T GetLabelAddress(const Label * label) {
     VIXL_ASSERT(label->IsBound());
     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
-    VIXL_STATIC_ASSERT(sizeof(*buffer_) == 1);
-    return reinterpret_cast<T>(buffer_ + label->location());
+    return GetOffsetAddress<T>(label->location());
+  }
+
+  // Return the address of the cursor.
+  template <typename T>
+  inline T GetCursorAddress() {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    return GetOffsetAddress<T>(CursorOffset());
+  }
+
+  // Return the address of the start of the buffer.
+  template <typename T>
+  inline T GetStartAddress() {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    return GetOffsetAddress<T>(0);
   }
 
   // Instruction set functions.
@@ -1324,14 +1411,17 @@
   void stnp(const CPURegister& rt, const CPURegister& rt2,
             const MemOperand& dst);
 
-  // Load literal to register.
-  void ldr(const Register& rt, uint64_t imm);
+  // Load integer or FP register from literal pool.
+  void ldr(const CPURegister& rt, RawLiteral* literal);
 
-  // Load double precision floating point literal to FP register.
-  void ldr(const FPRegister& ft, double imm);
+  // Load word with sign extension from literal pool.
+  void ldrsw(const Register& rt, RawLiteral* literal);
 
-  // Load single precision floating point literal to FP register.
-  void ldr(const FPRegister& ft, float imm);
+  // Load integer or FP register from pc + imm19 << 2.
+  void ldr(const CPURegister& rt, int imm19);
+
+  // Load word with sign extension from pc + imm19 << 2.
+  void ldrsw(const Register& rt, int imm19);
 
   // Store exclusive byte.
   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
@@ -1618,25 +1708,26 @@
   inline void dci(Instr raw_inst) { Emit(raw_inst); }
 
   // Emit 32 bits of data into the instruction stream.
-  inline void dc32(uint32_t data) { EmitData(&data, sizeof(data)); }
+  inline void dc32(uint32_t data) {
+    VIXL_ASSERT(buffer_monitor_ > 0);
+    buffer_->Emit32(data);
+  }
 
   // Emit 64 bits of data into the instruction stream.
-  inline void dc64(uint64_t data) { EmitData(&data, sizeof(data)); }
+  inline void dc64(uint64_t data) {
+    VIXL_ASSERT(buffer_monitor_ > 0);
+    buffer_->Emit64(data);
+  }
 
   // Copy a string into the instruction stream, including the terminating NULL
-  // character. The instruction pointer (pc_) is then aligned correctly for
+  // character. The instruction pointer is then aligned correctly for
   // subsequent instructions.
-  void EmitStringData(const char * string) {
+  void EmitString(const char * string) {
     VIXL_ASSERT(string != NULL);
+    VIXL_ASSERT(buffer_monitor_ > 0);
 
-    size_t len = strlen(string) + 1;
-    EmitData(string, len);
-
-    // Pad with NULL characters until pc_ is aligned.
-    const char pad[] = {'\0', '\0', '\0', '\0'};
-    VIXL_STATIC_ASSERT(sizeof(pad) == kInstructionSize);
-    Instruction* next_pc = AlignUp(pc_, kInstructionSize);
-    EmitData(&pad, next_pc - pc_);
+    buffer_->EmitString(string);
+    buffer_->Align();
   }
 
   // Code generation helpers.
@@ -1912,43 +2003,39 @@
     return scale << FPScale_offset;
   }
 
-  // Size of the code generated in bytes
-  size_t SizeOfCodeGenerated() const {
-    VIXL_ASSERT((pc_ >= buffer_) && (pc_ < (buffer_ + buffer_size_)));
-    return pc_ - buffer_;
-  }
-
   // Size of the code generated since label to the current position.
   size_t SizeOfCodeGeneratedSince(Label* label) const {
-    size_t pc_offset = SizeOfCodeGenerated();
-
     VIXL_ASSERT(label->IsBound());
-    VIXL_ASSERT(pc_offset >= static_cast<size_t>(label->location()));
-    VIXL_ASSERT(pc_offset < buffer_size_);
-
-    return pc_offset - label->location();
+    return buffer_->OffsetFrom(label->location());
   }
 
+  size_t BufferCapacity() const { return buffer_->capacity(); }
 
-  inline void BlockLiteralPool() {
-    literal_pool_monitor_++;
-  }
+  size_t RemainingBufferSpace() const { return buffer_->RemainingBytes(); }
 
-  inline void ReleaseLiteralPool() {
-    if (--literal_pool_monitor_ == 0) {
-      // Has the literal pool been blocked for too long?
-      VIXL_ASSERT(literals_.empty() ||
-             (pc_ < (literals_.back()->pc_ + kMaxLoadLiteralRange)));
+  void EnsureSpaceFor(size_t amount) {
+    if (buffer_->RemainingBytes() < amount) {
+      size_t capacity = buffer_->capacity();
+      size_t size = buffer_->CursorOffset();
+      do {
+        // TODO(all): refine.
+        capacity *= 2;
+      } while ((capacity - size) <  amount);
+      buffer_->Grow(capacity);
     }
   }
 
-  inline bool IsLiteralPoolBlocked() {
-    return literal_pool_monitor_ != 0;
+#ifdef DEBUG
+  void AcquireBuffer() {
+    VIXL_ASSERT(buffer_monitor_ >= 0);
+    buffer_monitor_++;
   }
 
-  void CheckLiteralPool(LiteralPoolEmitOption option = JumpRequired);
-  void EmitLiteralPool(LiteralPoolEmitOption option = NoJumpRequired);
-  size_t LiteralPoolSize();
+  void ReleaseBuffer() {
+    buffer_monitor_--;
+    VIXL_ASSERT(buffer_monitor_ >= 0);
+  }
+#endif
 
   inline PositionIndependentCodeOption pic() {
     return pic_;
@@ -1959,22 +2046,30 @@
            (pic() == PositionDependentCode);
   }
 
- protected:
-  inline const Register& AppropriateZeroRegFor(const CPURegister& reg) const {
+  static inline const Register& AppropriateZeroRegFor(const CPURegister& reg) {
     return reg.Is64Bits() ? xzr : wzr;
   }
 
 
+ protected:
   void LoadStore(const CPURegister& rt,
                  const MemOperand& addr,
                  LoadStoreOp op,
                  LoadStoreScalingOption option = PreferScaledOffset);
-  static bool IsImmLSUnscaled(ptrdiff_t offset);
-  static bool IsImmLSScaled(ptrdiff_t offset, LSDataSize size);
+  static bool IsImmLSUnscaled(int64_t offset);
+  static bool IsImmLSScaled(int64_t offset, LSDataSize size);
 
+  void LoadStorePair(const CPURegister& rt,
+                     const CPURegister& rt2,
+                     const MemOperand& addr,
+                     LoadStorePairOp op);
+  static bool IsImmLSPair(int64_t offset, LSDataSize size);
+
+  // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
+  // reports a bogus uninitialised warning then.
   void Logical(const Register& rd,
                const Register& rn,
-               const Operand& operand,
+               const Operand operand,
                LogicalOp op);
   void LogicalImmediate(const Register& rd,
                         const Register& rn,
@@ -2035,6 +2130,7 @@
     const CPURegister& rt, const CPURegister& rt2);
   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
     const CPURegister& rt, const CPURegister& rt2);
+  static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
 
 
  private:
@@ -2053,10 +2149,6 @@
                                 const Operand& operand,
                                 FlagsUpdate S,
                                 Instr op);
-  void LoadStorePair(const CPURegister& rt,
-                     const CPURegister& rt2,
-                     const MemOperand& addr,
-                     LoadStorePairOp op);
   void LoadStorePairNonTemporal(const CPURegister& rt,
                                 const CPURegister& rt2,
                                 const MemOperand& addr,
@@ -2088,8 +2180,6 @@
                                const FPRegister& fa,
                                FPDataProcessing3SourceOp op);
 
-  void RecordLiteral(int64_t imm, unsigned size);
-
   // Link the current (not-yet-emitted) instruction to the specified label, then
   // return an offset to be encoded in the instruction. If the label is not yet
   // bound, an offset of 0 is returned.
@@ -2098,79 +2188,102 @@
   ptrdiff_t LinkAndGetPageOffsetTo(Label * label);
 
   // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
-  template <int element_size>
+  template <int element_shift>
   ptrdiff_t LinkAndGetOffsetTo(Label* label);
 
-  // Emit the instruction at pc_.
+  // Literal load offset are in words (32-bit).
+  ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
+
+  // Emit the instruction in buffer_.
   void Emit(Instr instruction) {
-    VIXL_STATIC_ASSERT(sizeof(*pc_) == 1);
     VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
-    VIXL_ASSERT((pc_ + sizeof(instruction)) <= (buffer_ + buffer_size_));
-
-#ifdef DEBUG
-    finalized_ = false;
-#endif
-
-    memcpy(pc_, &instruction, sizeof(instruction));
-    pc_ += sizeof(instruction);
-    CheckBufferSpace();
+    VIXL_ASSERT(buffer_monitor_ > 0);
+    buffer_->Emit32(instruction);
   }
 
-  // Emit data inline in the instruction stream.
-  void EmitData(void const * data, unsigned size) {
-    VIXL_STATIC_ASSERT(sizeof(*pc_) == 1);
-    VIXL_ASSERT((pc_ + size) <= (buffer_ + buffer_size_));
-
-#ifdef DEBUG
-    finalized_ = false;
-#endif
-
-    // TODO: Record this 'instruction' as data, so that it can be disassembled
-    // correctly.
-    memcpy(pc_, data, size);
-    pc_ += size;
-    CheckBufferSpace();
-  }
-
-  inline void CheckBufferSpace() {
-    VIXL_ASSERT(pc_ < (buffer_ + buffer_size_));
-    if (pc_ > next_literal_pool_check_) {
-      CheckLiteralPool();
-    }
-  }
-
-  // The buffer into which code and relocation info are generated.
-  Instruction* buffer_;
-  // Buffer size, in bytes.
-  size_t buffer_size_;
-  Instruction* pc_;
-  std::list<Literal*> literals_;
-  Instruction* next_literal_pool_check_;
-  unsigned literal_pool_monitor_;
-
+  // Buffer where the code is emitted.
+  CodeBuffer* buffer_;
   PositionIndependentCodeOption pic_;
 
-  friend class Label;
-  friend class BlockLiteralPoolScope;
-
 #ifdef DEBUG
-  bool finalized_;
+  int64_t buffer_monitor_;
 #endif
 };
 
-class BlockLiteralPoolScope {
+
+// All Assembler emits MUST acquire/release the underlying code buffer. The
+// helper scope below will do so and optionally ensure the buffer is big enough
+// to receive the emit. It is possible to request the scope not to perform any
+// checks (kNoCheck) if for example it is known in advance the buffer size is
+// adequate or there is some other size checking mechanism in place.
+class CodeBufferCheckScope {
  public:
-  explicit BlockLiteralPoolScope(Assembler* assm) : assm_(assm) {
-    assm_->BlockLiteralPool();
+  // Tell whether or not the scope needs to ensure the associated CodeBuffer
+  // has enough space for the requested size.
+  enum CheckPolicy {
+    kNoCheck,
+    kCheck
+  };
+
+  // Tell whether or not the scope should assert the amount of code emitted
+  // within the scope is consistent with the requested amount.
+  enum AssertPolicy {
+    kNoAssert,    // No assert required.
+    kExactSize,   // The code emitted must be exactly size bytes.
+    kMaximumSize  // The code emitted must be at most size bytes.
+  };
+
+  CodeBufferCheckScope(Assembler* assm,
+                       size_t size,
+                       CheckPolicy check_policy = kCheck,
+                       AssertPolicy assert_policy = kMaximumSize)
+      : assm_(assm) {
+    if (check_policy == kCheck) assm->EnsureSpaceFor(size);
+#ifdef DEBUG
+    assm->bind(&start_);
+    size_ = size;
+    assert_policy_ = assert_policy;
+    assm->AcquireBuffer();
+#else
+    USE(assert_policy);
+#endif
   }
 
-  ~BlockLiteralPoolScope() {
-    assm_->ReleaseLiteralPool();
+  // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert).
+  explicit CodeBufferCheckScope(Assembler* assm) : assm_(assm) {
+#ifdef DEBUG
+    size_ = 0;
+    assert_policy_ = kNoAssert;
+    assm->AcquireBuffer();
+#endif
   }
 
- private:
+  ~CodeBufferCheckScope() {
+#ifdef DEBUG
+    assm_->ReleaseBuffer();
+    switch (assert_policy_) {
+      case kNoAssert: break;
+      case kExactSize:
+        VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) == size_);
+        break;
+      case kMaximumSize:
+        VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) <= size_);
+        break;
+      default:
+        VIXL_UNREACHABLE();
+    }
+#endif
+  }
+
+ protected:
   Assembler* assm_;
+#ifdef DEBUG
+  Label start_;
+  size_t size_;
+  AssertPolicy assert_policy_;
+#endif
 };
+
 }  // namespace vixl
 
 #endif  // VIXL_A64_ASSEMBLER_A64_H_
diff --git a/disas/libvixl/a64/decoder-a64.cc b/disas/libvixl/a64/decoder-a64.cc
index 5831b73..82591ca 100644
--- a/disas/libvixl/a64/decoder-a64.cc
+++ b/disas/libvixl/a64/decoder-a64.cc
@@ -29,8 +29,8 @@
 #include "a64/decoder-a64.h"
 
 namespace vixl {
-// Top-level instruction decode function.
-void Decoder::Decode(Instruction *instr) {
+
+void Decoder::DecodeInstruction(const Instruction *instr) {
   if (instr->Bits(28, 27) == 0) {
     VisitUnallocated(instr);
   } else {
@@ -109,20 +109,17 @@
 }
 
 void Decoder::AppendVisitor(DecoderVisitor* new_visitor) {
-  visitors_.remove(new_visitor);
-  visitors_.push_front(new_visitor);
+  visitors_.push_back(new_visitor);
 }
 
 
 void Decoder::PrependVisitor(DecoderVisitor* new_visitor) {
-  visitors_.remove(new_visitor);
-  visitors_.push_back(new_visitor);
+  visitors_.push_front(new_visitor);
 }
 
 
 void Decoder::InsertVisitorBefore(DecoderVisitor* new_visitor,
                                   DecoderVisitor* registered_visitor) {
-  visitors_.remove(new_visitor);
   std::list<DecoderVisitor*>::iterator it;
   for (it = visitors_.begin(); it != visitors_.end(); it++) {
     if (*it == registered_visitor) {
@@ -139,7 +136,6 @@
 
 void Decoder::InsertVisitorAfter(DecoderVisitor* new_visitor,
                                  DecoderVisitor* registered_visitor) {
-  visitors_.remove(new_visitor);
   std::list<DecoderVisitor*>::iterator it;
   for (it = visitors_.begin(); it != visitors_.end(); it++) {
     if (*it == registered_visitor) {
@@ -160,7 +156,7 @@
 }
 
 
-void Decoder::DecodePCRelAddressing(Instruction* instr) {
+void Decoder::DecodePCRelAddressing(const Instruction* instr) {
   VIXL_ASSERT(instr->Bits(27, 24) == 0x0);
   // We know bit 28 is set, as <b28:b27> = 0 is filtered out at the top level
   // decode.
@@ -169,7 +165,7 @@
 }
 
 
-void Decoder::DecodeBranchSystemException(Instruction* instr) {
+void Decoder::DecodeBranchSystemException(const Instruction* instr) {
   VIXL_ASSERT((instr->Bits(27, 24) == 0x4) ||
               (instr->Bits(27, 24) == 0x5) ||
               (instr->Bits(27, 24) == 0x6) ||
@@ -270,7 +266,7 @@
 }
 
 
-void Decoder::DecodeLoadStore(Instruction* instr) {
+void Decoder::DecodeLoadStore(const Instruction* instr) {
   VIXL_ASSERT((instr->Bits(27, 24) == 0x8) ||
               (instr->Bits(27, 24) == 0x9) ||
               (instr->Bits(27, 24) == 0xC) ||
@@ -388,7 +384,7 @@
 }
 
 
-void Decoder::DecodeLogical(Instruction* instr) {
+void Decoder::DecodeLogical(const Instruction* instr) {
   VIXL_ASSERT(instr->Bits(27, 24) == 0x2);
 
   if (instr->Mask(0x80400000) == 0x00400000) {
@@ -407,7 +403,7 @@
 }
 
 
-void Decoder::DecodeBitfieldExtract(Instruction* instr) {
+void Decoder::DecodeBitfieldExtract(const Instruction* instr) {
   VIXL_ASSERT(instr->Bits(27, 24) == 0x3);
 
   if ((instr->Mask(0x80400000) == 0x80000000) ||
@@ -432,7 +428,7 @@
 }
 
 
-void Decoder::DecodeAddSubImmediate(Instruction* instr) {
+void Decoder::DecodeAddSubImmediate(const Instruction* instr) {
   VIXL_ASSERT(instr->Bits(27, 24) == 0x1);
   if (instr->Bit(23) == 1) {
     VisitUnallocated(instr);
@@ -442,7 +438,7 @@
 }
 
 
-void Decoder::DecodeDataProcessing(Instruction* instr) {
+void Decoder::DecodeDataProcessing(const Instruction* instr) {
   VIXL_ASSERT((instr->Bits(27, 24) == 0xA) ||
               (instr->Bits(27, 24) == 0xB));
 
@@ -557,7 +553,7 @@
 }
 
 
-void Decoder::DecodeFP(Instruction* instr) {
+void Decoder::DecodeFP(const Instruction* instr) {
   VIXL_ASSERT((instr->Bits(27, 24) == 0xE) ||
               (instr->Bits(27, 24) == 0xF));
 
@@ -684,14 +680,14 @@
 }
 
 
-void Decoder::DecodeAdvSIMDLoadStore(Instruction* instr) {
+void Decoder::DecodeAdvSIMDLoadStore(const Instruction* instr) {
   // TODO: Implement Advanced SIMD load/store instruction decode.
   VIXL_ASSERT(instr->Bits(29, 25) == 0x6);
   VisitUnimplemented(instr);
 }
 
 
-void Decoder::DecodeAdvSIMDDataProcessing(Instruction* instr) {
+void Decoder::DecodeAdvSIMDDataProcessing(const Instruction* instr) {
   // TODO: Implement Advanced SIMD data processing instruction decode.
   VIXL_ASSERT(instr->Bits(27, 25) == 0x7);
   VisitUnimplemented(instr);
@@ -699,7 +695,7 @@
 
 
 #define DEFINE_VISITOR_CALLERS(A)                                              \
-  void Decoder::Visit##A(Instruction *instr) {                                 \
+  void Decoder::Visit##A(const Instruction *instr) {                           \
     VIXL_ASSERT(instr->Mask(A##FMask) == A##Fixed);                            \
     std::list<DecoderVisitor*>::iterator it;                                   \
     for (it = visitors_.begin(); it != visitors_.end(); it++) {                \
diff --git a/disas/libvixl/a64/decoder-a64.h b/disas/libvixl/a64/decoder-a64.h
index 72c1519..172594c 100644
--- a/disas/libvixl/a64/decoder-a64.h
+++ b/disas/libvixl/a64/decoder-a64.h
@@ -88,112 +88,152 @@
 // must provide implementations for all of these functions.
 class DecoderVisitor {
  public:
-  #define DECLARE(A) virtual void Visit##A(Instruction* instr) = 0;
-  VISITOR_LIST(DECLARE)
-  #undef DECLARE
+  enum VisitorConstness {
+    kConstVisitor,
+    kNonConstVisitor
+  };
+  explicit DecoderVisitor(VisitorConstness constness = kConstVisitor)
+      : constness_(constness) {}
 
   virtual ~DecoderVisitor() {}
 
- private:
-  // Visitors are registered in a list.
-  std::list<DecoderVisitor*> visitors_;
+  #define DECLARE(A) virtual void Visit##A(const Instruction* instr) = 0;
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE
 
-  friend class Decoder;
+  bool IsConstVisitor() const { return constness_ == kConstVisitor; }
+  Instruction* MutableInstruction(const Instruction* instr) {
+    VIXL_ASSERT(!IsConstVisitor());
+    return const_cast<Instruction*>(instr);
+  }
+
+ private:
+  VisitorConstness constness_;
 };
 
 
-class Decoder: public DecoderVisitor {
+class Decoder {
  public:
   Decoder() {}
 
-  // Top-level instruction decoder function. Decodes an instruction and calls
-  // the visitor functions registered with the Decoder class.
-  void Decode(Instruction *instr);
+  // Top-level wrappers around the actual decoding function.
+  void Decode(const Instruction* instr) {
+    std::list<DecoderVisitor*>::iterator it;
+    for (it = visitors_.begin(); it != visitors_.end(); it++) {
+      VIXL_ASSERT((*it)->IsConstVisitor());
+    }
+    DecodeInstruction(instr);
+  }
+  void Decode(Instruction* instr) {
+    DecodeInstruction(const_cast<const Instruction*>(instr));
+  }
 
   // Register a new visitor class with the decoder.
   // Decode() will call the corresponding visitor method from all registered
   // visitor classes when decoding reaches the leaf node of the instruction
   // decode tree.
-  // Visitors are called in the order.
-  // A visitor can only be registered once.
-  // Registering an already registered visitor will update its position.
+  // Visitors are called in order.
+  // A visitor can be registered multiple times.
   //
   //   d.AppendVisitor(V1);
   //   d.AppendVisitor(V2);
-  //   d.PrependVisitor(V2);            // Move V2 at the start of the list.
-  //   d.InsertVisitorBefore(V3, V2);
-  //   d.AppendVisitor(V4);
-  //   d.AppendVisitor(V4);             // No effect.
+  //   d.PrependVisitor(V2);
+  //   d.AppendVisitor(V3);
   //
   //   d.Decode(i);
   //
-  // will call in order visitor methods in V3, V2, V1, V4.
+  // will call in order visitor methods in V2, V1, V2, V3.
   void AppendVisitor(DecoderVisitor* visitor);
   void PrependVisitor(DecoderVisitor* visitor);
+  // These helpers register `new_visitor` before or after the first instance of
+  // `registered_visiter` in the list.
+  // So if
+  //   V1, V2, V1, V2
+  // are registered in this order in the decoder, calls to
+  //   d.InsertVisitorAfter(V3, V1);
+  //   d.InsertVisitorBefore(V4, V2);
+  // will yield the order
+  //   V1, V3, V4, V2, V1, V2
+  //
+  // For more complex modifications of the order of registered visitors, one can
+  // directly access and modify the list of visitors via the `visitors()'
+  // accessor.
   void InsertVisitorBefore(DecoderVisitor* new_visitor,
                            DecoderVisitor* registered_visitor);
   void InsertVisitorAfter(DecoderVisitor* new_visitor,
                           DecoderVisitor* registered_visitor);
 
-  // Remove a previously registered visitor class from the list of visitors
-  // stored by the decoder.
+  // Remove all instances of a previously registered visitor class from the list
+  // of visitors stored by the decoder.
   void RemoveVisitor(DecoderVisitor* visitor);
 
-  #define DECLARE(A) void Visit##A(Instruction* instr);
+  #define DECLARE(A) void Visit##A(const Instruction* instr);
   VISITOR_LIST(DECLARE)
   #undef DECLARE
 
+
+  std::list<DecoderVisitor*>* visitors() { return &visitors_; }
+
  private:
+  // Decodes an instruction and calls the visitor functions registered with the
+  // Decoder class.
+  void DecodeInstruction(const Instruction* instr);
+
   // Decode the PC relative addressing instruction, and call the corresponding
   // visitors.
   // On entry, instruction bits 27:24 = 0x0.
-  void DecodePCRelAddressing(Instruction* instr);
+  void DecodePCRelAddressing(const Instruction* instr);
 
   // Decode the add/subtract immediate instruction, and call the correspoding
   // visitors.
   // On entry, instruction bits 27:24 = 0x1.
-  void DecodeAddSubImmediate(Instruction* instr);
+  void DecodeAddSubImmediate(const Instruction* instr);
 
   // Decode the branch, system command, and exception generation parts of
   // the instruction tree, and call the corresponding visitors.
   // On entry, instruction bits 27:24 = {0x4, 0x5, 0x6, 0x7}.
-  void DecodeBranchSystemException(Instruction* instr);
+  void DecodeBranchSystemException(const Instruction* instr);
 
   // Decode the load and store parts of the instruction tree, and call
   // the corresponding visitors.
   // On entry, instruction bits 27:24 = {0x8, 0x9, 0xC, 0xD}.
-  void DecodeLoadStore(Instruction* instr);
+  void DecodeLoadStore(const Instruction* instr);
 
   // Decode the logical immediate and move wide immediate parts of the
   // instruction tree, and call the corresponding visitors.
   // On entry, instruction bits 27:24 = 0x2.
-  void DecodeLogical(Instruction* instr);
+  void DecodeLogical(const Instruction* instr);
 
   // Decode the bitfield and extraction parts of the instruction tree,
   // and call the corresponding visitors.
   // On entry, instruction bits 27:24 = 0x3.
-  void DecodeBitfieldExtract(Instruction* instr);
+  void DecodeBitfieldExtract(const Instruction* instr);
 
   // Decode the data processing parts of the instruction tree, and call the
   // corresponding visitors.
   // On entry, instruction bits 27:24 = {0x1, 0xA, 0xB}.
-  void DecodeDataProcessing(Instruction* instr);
+  void DecodeDataProcessing(const Instruction* instr);
 
   // Decode the floating point parts of the instruction tree, and call the
   // corresponding visitors.
   // On entry, instruction bits 27:24 = {0xE, 0xF}.
-  void DecodeFP(Instruction* instr);
+  void DecodeFP(const Instruction* instr);
 
   // Decode the Advanced SIMD (NEON) load/store part of the instruction tree,
   // and call the corresponding visitors.
   // On entry, instruction bits 29:25 = 0x6.
-  void DecodeAdvSIMDLoadStore(Instruction* instr);
+  void DecodeAdvSIMDLoadStore(const Instruction* instr);
 
   // Decode the Advanced SIMD (NEON) data processing part of the instruction
   // tree, and call the corresponding visitors.
   // On entry, instruction bits 27:25 = 0x7.
-  void DecodeAdvSIMDDataProcessing(Instruction* instr);
+  void DecodeAdvSIMDDataProcessing(const Instruction* instr);
+
+ private:
+  // Visitors are registered in a list.
+  std::list<DecoderVisitor*> visitors_;
 };
+
 }  // namespace vixl
 
 #endif  // VIXL_A64_DECODER_A64_H_
diff --git a/disas/libvixl/a64/disasm-a64.cc b/disas/libvixl/a64/disasm-a64.cc
index 248ebfd..e4a74aa 100644
--- a/disas/libvixl/a64/disasm-a64.cc
+++ b/disas/libvixl/a64/disasm-a64.cc
@@ -57,7 +57,7 @@
 }
 
 
-void Disassembler::VisitAddSubImmediate(Instruction* instr) {
+void Disassembler::VisitAddSubImmediate(const Instruction* instr) {
   bool rd_is_zr = RdIsZROrSP(instr);
   bool stack_op = (rd_is_zr || RnIsZROrSP(instr)) &&
                   (instr->ImmAddSub() == 0) ? true : false;
@@ -102,7 +102,7 @@
 }
 
 
-void Disassembler::VisitAddSubShifted(Instruction* instr) {
+void Disassembler::VisitAddSubShifted(const Instruction* instr) {
   bool rd_is_zr = RdIsZROrSP(instr);
   bool rn_is_zr = RnIsZROrSP(instr);
   const char *mnemonic = "";
@@ -149,7 +149,7 @@
 }
 
 
-void Disassembler::VisitAddSubExtended(Instruction* instr) {
+void Disassembler::VisitAddSubExtended(const Instruction* instr) {
   bool rd_is_zr = RdIsZROrSP(instr);
   const char *mnemonic = "";
   Extend mode = static_cast<Extend>(instr->ExtendMode());
@@ -187,7 +187,7 @@
 }
 
 
-void Disassembler::VisitAddSubWithCarry(Instruction* instr) {
+void Disassembler::VisitAddSubWithCarry(const Instruction* instr) {
   bool rn_is_zr = RnIsZROrSP(instr);
   const char *mnemonic = "";
   const char *form = "'Rd, 'Rn, 'Rm";
@@ -222,7 +222,7 @@
 }
 
 
-void Disassembler::VisitLogicalImmediate(Instruction* instr) {
+void Disassembler::VisitLogicalImmediate(const Instruction* instr) {
   bool rd_is_zr = RdIsZROrSP(instr);
   bool rn_is_zr = RnIsZROrSP(instr);
   const char *mnemonic = "";
@@ -294,7 +294,7 @@
 }
 
 
-void Disassembler::VisitLogicalShifted(Instruction* instr) {
+void Disassembler::VisitLogicalShifted(const Instruction* instr) {
   bool rd_is_zr = RdIsZROrSP(instr);
   bool rn_is_zr = RnIsZROrSP(instr);
   const char *mnemonic = "";
@@ -345,7 +345,7 @@
 }
 
 
-void Disassembler::VisitConditionalCompareRegister(Instruction* instr) {
+void Disassembler::VisitConditionalCompareRegister(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rn, 'Rm, 'INzcv, 'Cond";
 
@@ -360,7 +360,7 @@
 }
 
 
-void Disassembler::VisitConditionalCompareImmediate(Instruction* instr) {
+void Disassembler::VisitConditionalCompareImmediate(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rn, 'IP, 'INzcv, 'Cond";
 
@@ -375,7 +375,7 @@
 }
 
 
-void Disassembler::VisitConditionalSelect(Instruction* instr) {
+void Disassembler::VisitConditionalSelect(const Instruction* instr) {
   bool rnm_is_zr = (RnIsZROrSP(instr) && RmIsZROrSP(instr));
   bool rn_is_rm = (instr->Rn() == instr->Rm());
   const char *mnemonic = "";
@@ -428,7 +428,7 @@
 }
 
 
-void Disassembler::VisitBitfield(Instruction* instr) {
+void Disassembler::VisitBitfield(const Instruction* instr) {
   unsigned s = instr->ImmS();
   unsigned r = instr->ImmR();
   unsigned rd_size_minus_1 =
@@ -506,7 +506,7 @@
 }
 
 
-void Disassembler::VisitExtract(Instruction* instr) {
+void Disassembler::VisitExtract(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rd, 'Rn, 'Rm, 'IExtract";
 
@@ -527,7 +527,7 @@
 }
 
 
-void Disassembler::VisitPCRelAddressing(Instruction* instr) {
+void Disassembler::VisitPCRelAddressing(const Instruction* instr) {
   switch (instr->Mask(PCRelAddressingMask)) {
     case ADR: Format(instr, "adr", "'Xd, 'AddrPCRelByte"); break;
     case ADRP: Format(instr, "adrp", "'Xd, 'AddrPCRelPage"); break;
@@ -536,7 +536,7 @@
 }
 
 
-void Disassembler::VisitConditionalBranch(Instruction* instr) {
+void Disassembler::VisitConditionalBranch(const Instruction* instr) {
   switch (instr->Mask(ConditionalBranchMask)) {
     case B_cond: Format(instr, "b.'CBrn", "'BImmCond"); break;
     default: VIXL_UNREACHABLE();
@@ -544,7 +544,8 @@
 }
 
 
-void Disassembler::VisitUnconditionalBranchToRegister(Instruction* instr) {
+void Disassembler::VisitUnconditionalBranchToRegister(
+    const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'Xn";
 
@@ -564,7 +565,7 @@
 }
 
 
-void Disassembler::VisitUnconditionalBranch(Instruction* instr) {
+void Disassembler::VisitUnconditionalBranch(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'BImmUncn";
 
@@ -577,7 +578,7 @@
 }
 
 
-void Disassembler::VisitDataProcessing1Source(Instruction* instr) {
+void Disassembler::VisitDataProcessing1Source(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rd, 'Rn";
 
@@ -598,7 +599,7 @@
 }
 
 
-void Disassembler::VisitDataProcessing2Source(Instruction* instr) {
+void Disassembler::VisitDataProcessing2Source(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'Rd, 'Rn, 'Rm";
 
@@ -619,7 +620,7 @@
 }
 
 
-void Disassembler::VisitDataProcessing3Source(Instruction* instr) {
+void Disassembler::VisitDataProcessing3Source(const Instruction* instr) {
   bool ra_is_zr = RaIsZROrSP(instr);
   const char *mnemonic = "";
   const char *form = "'Xd, 'Wn, 'Wm, 'Xa";
@@ -697,7 +698,7 @@
 }
 
 
-void Disassembler::VisitCompareBranch(Instruction* instr) {
+void Disassembler::VisitCompareBranch(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rt, 'BImmCmpa";
 
@@ -712,7 +713,7 @@
 }
 
 
-void Disassembler::VisitTestBranch(Instruction* instr) {
+void Disassembler::VisitTestBranch(const Instruction* instr) {
   const char *mnemonic = "";
   // If the top bit of the immediate is clear, the tested register is
   // disassembled as Wt, otherwise Xt. As the top bit of the immediate is
@@ -729,7 +730,7 @@
 }
 
 
-void Disassembler::VisitMoveWideImmediate(Instruction* instr) {
+void Disassembler::VisitMoveWideImmediate(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rd, 'IMoveImm";
 
@@ -768,7 +769,7 @@
   V(LDR_s, "ldr", "'St")      \
   V(LDR_d, "ldr", "'Dt")
 
-void Disassembler::VisitLoadStorePreIndex(Instruction* instr) {
+void Disassembler::VisitLoadStorePreIndex(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStorePreIndex)";
 
@@ -782,7 +783,7 @@
 }
 
 
-void Disassembler::VisitLoadStorePostIndex(Instruction* instr) {
+void Disassembler::VisitLoadStorePostIndex(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStorePostIndex)";
 
@@ -796,7 +797,7 @@
 }
 
 
-void Disassembler::VisitLoadStoreUnsignedOffset(Instruction* instr) {
+void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStoreUnsignedOffset)";
 
@@ -811,7 +812,7 @@
 }
 
 
-void Disassembler::VisitLoadStoreRegisterOffset(Instruction* instr) {
+void Disassembler::VisitLoadStoreRegisterOffset(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStoreRegisterOffset)";
 
@@ -826,7 +827,7 @@
 }
 
 
-void Disassembler::VisitLoadStoreUnscaledOffset(Instruction* instr) {
+void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'Wt, ['Xns'ILS]";
   const char *form_x = "'Xt, ['Xns'ILS]";
@@ -857,7 +858,7 @@
 }
 
 
-void Disassembler::VisitLoadLiteral(Instruction* instr) {
+void Disassembler::VisitLoadLiteral(const Instruction* instr) {
   const char *mnemonic = "ldr";
   const char *form = "(LoadLiteral)";
 
@@ -866,6 +867,11 @@
     case LDR_x_lit: form = "'Xt, 'ILLiteral 'LValue"; break;
     case LDR_s_lit: form = "'St, 'ILLiteral 'LValue"; break;
     case LDR_d_lit: form = "'Dt, 'ILLiteral 'LValue"; break;
+    case LDRSW_x_lit: {
+      mnemonic = "ldrsw";
+      form = "'Xt, 'ILLiteral 'LValue";
+      break;
+    }
     default: mnemonic = "unimplemented";
   }
   Format(instr, mnemonic, form);
@@ -883,7 +889,7 @@
   V(STP_d, "stp", "'Dt, 'Dt2", "8")     \
   V(LDP_d, "ldp", "'Dt, 'Dt2", "8")
 
-void Disassembler::VisitLoadStorePairPostIndex(Instruction* instr) {
+void Disassembler::VisitLoadStorePairPostIndex(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStorePairPostIndex)";
 
@@ -897,7 +903,7 @@
 }
 
 
-void Disassembler::VisitLoadStorePairPreIndex(Instruction* instr) {
+void Disassembler::VisitLoadStorePairPreIndex(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStorePairPreIndex)";
 
@@ -911,7 +917,7 @@
 }
 
 
-void Disassembler::VisitLoadStorePairOffset(Instruction* instr) {
+void Disassembler::VisitLoadStorePairOffset(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStorePairOffset)";
 
@@ -925,7 +931,7 @@
 }
 
 
-void Disassembler::VisitLoadStorePairNonTemporal(Instruction* instr) {
+void Disassembler::VisitLoadStorePairNonTemporal(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form;
 
@@ -944,7 +950,7 @@
 }
 
 
-void Disassembler::VisitLoadStoreExclusive(Instruction* instr) {
+void Disassembler::VisitLoadStoreExclusive(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form;
 
@@ -987,7 +993,7 @@
 }
 
 
-void Disassembler::VisitFPCompare(Instruction* instr) {
+void Disassembler::VisitFPCompare(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'Fn, 'Fm";
   const char *form_zero = "'Fn, #0.0";
@@ -1003,7 +1009,7 @@
 }
 
 
-void Disassembler::VisitFPConditionalCompare(Instruction* instr) {
+void Disassembler::VisitFPConditionalCompare(const Instruction* instr) {
   const char *mnemonic = "unmplemented";
   const char *form = "'Fn, 'Fm, 'INzcv, 'Cond";
 
@@ -1018,7 +1024,7 @@
 }
 
 
-void Disassembler::VisitFPConditionalSelect(Instruction* instr) {
+void Disassembler::VisitFPConditionalSelect(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Fd, 'Fn, 'Fm, 'Cond";
 
@@ -1031,7 +1037,7 @@
 }
 
 
-void Disassembler::VisitFPDataProcessing1Source(Instruction* instr) {
+void Disassembler::VisitFPDataProcessing1Source(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'Fd, 'Fn";
 
@@ -1059,7 +1065,7 @@
 }
 
 
-void Disassembler::VisitFPDataProcessing2Source(Instruction* instr) {
+void Disassembler::VisitFPDataProcessing2Source(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Fd, 'Fn, 'Fm";
 
@@ -1083,7 +1089,7 @@
 }
 
 
-void Disassembler::VisitFPDataProcessing3Source(Instruction* instr) {
+void Disassembler::VisitFPDataProcessing3Source(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Fd, 'Fn, 'Fm, 'Fa";
 
@@ -1102,7 +1108,7 @@
 }
 
 
-void Disassembler::VisitFPImmediate(Instruction* instr) {
+void Disassembler::VisitFPImmediate(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "(FPImmediate)";
 
@@ -1115,7 +1121,7 @@
 }
 
 
-void Disassembler::VisitFPIntegerConvert(Instruction* instr) {
+void Disassembler::VisitFPIntegerConvert(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(FPIntegerConvert)";
   const char *form_rf = "'Rd, 'Fn";
@@ -1171,7 +1177,7 @@
 }
 
 
-void Disassembler::VisitFPFixedPointConvert(Instruction* instr) {
+void Disassembler::VisitFPFixedPointConvert(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rd, 'Fn, 'IFPFBits";
   const char *form_fr = "'Fd, 'Rn, 'IFPFBits";
@@ -1199,7 +1205,7 @@
 }
 
 
-void Disassembler::VisitSystem(Instruction* instr) {
+void Disassembler::VisitSystem(const Instruction* instr) {
   // Some system instructions hijack their Op and Cp fields to represent a
   // range of immediates instead of indicating a different instruction. This
   // makes the decoding tricky.
@@ -1267,7 +1273,7 @@
 }
 
 
-void Disassembler::VisitException(Instruction* instr) {
+void Disassembler::VisitException(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'IDebug";
 
@@ -1286,22 +1292,75 @@
 }
 
 
-void Disassembler::VisitUnimplemented(Instruction* instr) {
+void Disassembler::VisitUnimplemented(const Instruction* instr) {
   Format(instr, "unimplemented", "(Unimplemented)");
 }
 
 
-void Disassembler::VisitUnallocated(Instruction* instr) {
+void Disassembler::VisitUnallocated(const Instruction* instr) {
   Format(instr, "unallocated", "(Unallocated)");
 }
 
 
-void Disassembler::ProcessOutput(Instruction* /*instr*/) {
+void Disassembler::ProcessOutput(const Instruction* /*instr*/) {
   // The base disasm does nothing more than disassembling into a buffer.
 }
 
 
-void Disassembler::Format(Instruction* instr, const char* mnemonic,
+void Disassembler::AppendRegisterNameToOutput(const Instruction* instr,
+                                              const CPURegister& reg) {
+  USE(instr);
+  VIXL_ASSERT(reg.IsValid());
+  char reg_char;
+
+  if (reg.IsRegister()) {
+    reg_char = reg.Is64Bits() ? 'x' : 'w';
+  } else {
+    VIXL_ASSERT(reg.IsFPRegister());
+    reg_char = reg.Is64Bits() ? 'd' : 's';
+  }
+
+  if (reg.IsFPRegister() || !(reg.Aliases(sp) || reg.Aliases(xzr))) {
+    // A normal register: w0 - w30, x0 - x30, s0 - s31, d0 - d31.
+    AppendToOutput("%c%d", reg_char, reg.code());
+  } else if (reg.Aliases(sp)) {
+    // Disassemble w31/x31 as stack pointer wsp/sp.
+    AppendToOutput("%s", reg.Is64Bits() ? "sp" : "wsp");
+  } else {
+    // Disassemble w31/x31 as zero register wzr/xzr.
+    AppendToOutput("%czr", reg_char);
+  }
+}
+
+
+void Disassembler::AppendPCRelativeOffsetToOutput(const Instruction* instr,
+                                                  int64_t offset) {
+  USE(instr);
+  char sign = (offset < 0) ? '-' : '+';
+  AppendToOutput("#%c0x%" PRIx64, sign, std::abs(offset));
+}
+
+
+void Disassembler::AppendAddressToOutput(const Instruction* instr,
+                                         const void* addr) {
+  USE(instr);
+  AppendToOutput("(addr %p)", addr);
+}
+
+
+void Disassembler::AppendCodeAddressToOutput(const Instruction* instr,
+                                             const void* addr) {
+  AppendAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::AppendDataAddressToOutput(const Instruction* instr,
+                                             const void* addr) {
+  AppendAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::Format(const Instruction* instr, const char* mnemonic,
                           const char* format) {
   VIXL_ASSERT(mnemonic != NULL);
   ResetOutput();
@@ -1315,7 +1374,7 @@
 }
 
 
-void Disassembler::Substitute(Instruction* instr, const char* string) {
+void Disassembler::Substitute(const Instruction* instr, const char* string) {
   char chr = *string++;
   while (chr != '\0') {
     if (chr == '\'') {
@@ -1328,7 +1387,8 @@
 }
 
 
-int Disassembler::SubstituteField(Instruction* instr, const char* format) {
+int Disassembler::SubstituteField(const Instruction* instr,
+                                  const char* format) {
   switch (format[0]) {
     case 'R':  // Register. X or W, selected by sf bit.
     case 'F':  // FP Register. S or D, selected by type field.
@@ -1354,7 +1414,7 @@
 }
 
 
-int Disassembler::SubstituteRegisterField(Instruction* instr,
+int Disassembler::SubstituteRegisterField(const Instruction* instr,
                                           const char* format) {
   unsigned reg_num = 0;
   unsigned field_len = 2;
@@ -1381,34 +1441,47 @@
     field_len = 3;
   }
 
-  char reg_type;
+  CPURegister::RegisterType reg_type;
+  unsigned reg_size;
+
   if (format[0] == 'R') {
     // Register type is R: use sf bit to choose X and W.
-    reg_type = instr->SixtyFourBits() ? 'x' : 'w';
+    reg_type = CPURegister::kRegister;
+    reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   } else if (format[0] == 'F') {
     // Floating-point register: use type field to choose S or D.
-    reg_type = ((instr->FPType() & 1) == 0) ? 's' : 'd';
+    reg_type = CPURegister::kFPRegister;
+    reg_size = ((instr->FPType() & 1) == 0) ? kSRegSize : kDRegSize;
   } else {
-    // Register type is specified. Make it lower case.
-    reg_type = format[0] + 0x20;
+    // The register type is specified.
+    switch (format[0]) {
+      case 'W':
+        reg_type = CPURegister::kRegister; reg_size = kWRegSize; break;
+      case 'X':
+        reg_type = CPURegister::kRegister; reg_size = kXRegSize; break;
+      case 'S':
+        reg_type = CPURegister::kFPRegister; reg_size = kSRegSize; break;
+      case 'D':
+        reg_type = CPURegister::kFPRegister; reg_size = kDRegSize; break;
+      default:
+        VIXL_UNREACHABLE();
+        reg_type = CPURegister::kRegister;
+        reg_size = kXRegSize;
+    }
   }
 
-  if ((reg_num != kZeroRegCode) || (reg_type == 's') || (reg_type == 'd')) {
-    // A normal register: w0 - w30, x0 - x30, s0 - s31, d0 - d31.
-    AppendToOutput("%c%d", reg_type, reg_num);
-  } else if (format[2] == 's') {
-    // Disassemble w31/x31 as stack pointer wsp/sp.
-    AppendToOutput("%s", (reg_type == 'w') ? "wsp" : "sp");
-  } else {
-    // Disassemble w31/x31 as zero register wzr/xzr.
-    AppendToOutput("%czr", reg_type);
+  if ((reg_type == CPURegister::kRegister) &&
+      (reg_num == kZeroRegCode) && (format[2] == 's')) {
+    reg_num = kSPRegInternalCode;
   }
 
+  AppendRegisterNameToOutput(instr, CPURegister(reg_num, reg_size, reg_type));
+
   return field_len;
 }
 
 
-int Disassembler::SubstituteImmediateField(Instruction* instr,
+int Disassembler::SubstituteImmediateField(const Instruction* instr,
                                            const char* format) {
   VIXL_ASSERT(format[0] == 'I');
 
@@ -1458,8 +1531,7 @@
     }
     case 'C': {  // ICondB - Immediate Conditional Branch.
       int64_t offset = instr->ImmCondBranch() << 2;
-      char sign = (offset >= 0) ? '+' : '-';
-      AppendToOutput("#%c0x%" PRIx64, sign, offset);
+      AppendPCRelativeOffsetToOutput(instr, offset);
       return 6;
     }
     case 'A': {  // IAddSub.
@@ -1522,7 +1594,7 @@
 }
 
 
-int Disassembler::SubstituteBitfieldImmediateField(Instruction* instr,
+int Disassembler::SubstituteBitfieldImmediateField(const Instruction* instr,
                                                    const char* format) {
   VIXL_ASSERT((format[0] == 'I') && (format[1] == 'B'));
   unsigned r = instr->ImmR();
@@ -1557,7 +1629,7 @@
 }
 
 
-int Disassembler::SubstituteLiteralField(Instruction* instr,
+int Disassembler::SubstituteLiteralField(const Instruction* instr,
                                          const char* format) {
   VIXL_ASSERT(strncmp(format, "LValue", 6) == 0);
   USE(format);
@@ -1565,16 +1637,21 @@
   switch (instr->Mask(LoadLiteralMask)) {
     case LDR_w_lit:
     case LDR_x_lit:
+    case LDRSW_x_lit:
     case LDR_s_lit:
-    case LDR_d_lit: AppendToOutput("(addr %p)", instr->LiteralAddress()); break;
-    default: VIXL_UNREACHABLE();
+    case LDR_d_lit:
+      AppendDataAddressToOutput(instr, instr->LiteralAddress());
+      break;
+    default:
+      VIXL_UNREACHABLE();
   }
 
   return 6;
 }
 
 
-int Disassembler::SubstituteShiftField(Instruction* instr, const char* format) {
+int Disassembler::SubstituteShiftField(const Instruction* instr,
+                                       const char* format) {
   VIXL_ASSERT(format[0] == 'H');
   VIXL_ASSERT(instr->ShiftDP() <= 0x3);
 
@@ -1597,7 +1674,7 @@
 }
 
 
-int Disassembler::SubstituteConditionField(Instruction* instr,
+int Disassembler::SubstituteConditionField(const Instruction* instr,
                                            const char* format) {
   VIXL_ASSERT(format[0] == 'C');
   const char* condition_code[] = { "eq", "ne", "hs", "lo",
@@ -1618,27 +1695,28 @@
 }
 
 
-int Disassembler::SubstitutePCRelAddressField(Instruction* instr,
+int Disassembler::SubstitutePCRelAddressField(const Instruction* instr,
                                               const char* format) {
   VIXL_ASSERT((strcmp(format, "AddrPCRelByte") == 0) ||   // Used by `adr`.
               (strcmp(format, "AddrPCRelPage") == 0));    // Used by `adrp`.
 
   int64_t offset = instr->ImmPCRel();
-  Instruction * base = instr;
+  const Instruction * base = instr;
 
   if (format[9] == 'P') {
     offset *= kPageSize;
     base = AlignDown(base, kPageSize);
   }
 
-  char sign = (offset < 0) ? '-' : '+';
-  void * target = reinterpret_cast<void *>(base + offset);
-  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, std::abs(offset), target);
+  const void* target = reinterpret_cast<const void*>(base + offset);
+  AppendPCRelativeOffsetToOutput(instr, offset);
+  AppendToOutput(" ");
+  AppendAddressToOutput(instr, target);
   return 13;
 }
 
 
-int Disassembler::SubstituteBranchTargetField(Instruction* instr,
+int Disassembler::SubstituteBranchTargetField(const Instruction* instr,
                                               const char* format) {
   VIXL_ASSERT(strncmp(format, "BImm", 4) == 0);
 
@@ -1655,19 +1733,18 @@
     default: VIXL_UNIMPLEMENTED();
   }
   offset <<= kInstructionSizeLog2;
-  char sign = '+';
-  if (offset < 0) {
-    offset = -offset;
-    sign = '-';
-  }
+  const void* target_address = reinterpret_cast<const void*>(instr + offset);
   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
-  void * address = reinterpret_cast<void *>(instr + offset);
-  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, offset, address);
+
+  AppendPCRelativeOffsetToOutput(instr, offset);
+  AppendToOutput(" ");
+  AppendCodeAddressToOutput(instr, target_address);
+
   return 8;
 }
 
 
-int Disassembler::SubstituteExtendField(Instruction* instr,
+int Disassembler::SubstituteExtendField(const Instruction* instr,
                                         const char* format) {
   VIXL_ASSERT(strncmp(format, "Ext", 3) == 0);
   VIXL_ASSERT(instr->ExtendMode() <= 7);
@@ -1694,7 +1771,7 @@
 }
 
 
-int Disassembler::SubstituteLSRegOffsetField(Instruction* instr,
+int Disassembler::SubstituteLSRegOffsetField(const Instruction* instr,
                                              const char* format) {
   VIXL_ASSERT(strncmp(format, "Offsetreg", 9) == 0);
   const char* extend_mode[] = { "undefined", "undefined", "uxtw", "lsl",
@@ -1723,7 +1800,7 @@
 }
 
 
-int Disassembler::SubstitutePrefetchField(Instruction* instr,
+int Disassembler::SubstitutePrefetchField(const Instruction* instr,
                                           const char* format) {
   VIXL_ASSERT(format[0] == 'P');
   USE(format);
@@ -1738,7 +1815,7 @@
   return 6;
 }
 
-int Disassembler::SubstituteBarrierField(Instruction* instr,
+int Disassembler::SubstituteBarrierField(const Instruction* instr,
                                          const char* format) {
   VIXL_ASSERT(format[0] == 'M');
   USE(format);
@@ -1770,7 +1847,7 @@
 }
 
 
-void PrintDisassembler::ProcessOutput(Instruction* instr) {
+void PrintDisassembler::ProcessOutput(const Instruction* instr) {
   fprintf(stream_, "0x%016" PRIx64 "  %08" PRIx32 "\t\t%s\n",
           reinterpret_cast<uint64_t>(instr),
           instr->InstructionBits(),
diff --git a/disas/libvixl/a64/disasm-a64.h b/disas/libvixl/a64/disasm-a64.h
index 06ee43f..db04337 100644
--- a/disas/libvixl/a64/disasm-a64.h
+++ b/disas/libvixl/a64/disasm-a64.h
@@ -31,6 +31,7 @@
 #include "utils.h"
 #include "instructions-a64.h"
 #include "decoder-a64.h"
+#include "assembler-a64.h"
 
 namespace vixl {
 
@@ -42,48 +43,83 @@
   char* GetOutput();
 
   // Declare all Visitor functions.
-  #define DECLARE(A)  void Visit##A(Instruction* instr);
+  #define DECLARE(A)  void Visit##A(const Instruction* instr);
   VISITOR_LIST(DECLARE)
   #undef DECLARE
 
  protected:
-  virtual void ProcessOutput(Instruction* instr);
+  virtual void ProcessOutput(const Instruction* instr);
+
+  // Default output functions.  The functions below implement a default way of
+  // printing elements in the disassembly. A sub-class can override these to
+  // customize the disassembly output.
+
+  // Prints the name of a register.
+  virtual void AppendRegisterNameToOutput(const Instruction* instr,
+                                          const CPURegister& reg);
+
+  // Prints a PC-relative offset. This is used for example when disassembling
+  // branches to immediate offsets.
+  virtual void AppendPCRelativeOffsetToOutput(const Instruction* instr,
+                                              int64_t offset);
+
+  // Prints an address, in the general case. It can be code or data. This is
+  // used for example to print the target address of an ADR instruction.
+  virtual void AppendAddressToOutput(const Instruction* instr,
+                                     const void* addr);
+
+  // Prints the address of some code.
+  // This is used for example to print the target address of a branch to an
+  // immediate offset.
+  // A sub-class can for example override this method to lookup the address and
+  // print an appropriate name.
+  virtual void AppendCodeAddressToOutput(const Instruction* instr,
+                                         const void* addr);
+
+  // Prints the address of some data.
+  // This is used for example to print the source address of a load literal
+  // instruction.
+  virtual void AppendDataAddressToOutput(const Instruction* instr,
+                                         const void* addr);
 
  private:
-  void Format(Instruction* instr, const char* mnemonic, const char* format);
-  void Substitute(Instruction* instr, const char* string);
-  int SubstituteField(Instruction* instr, const char* format);
-  int SubstituteRegisterField(Instruction* instr, const char* format);
-  int SubstituteImmediateField(Instruction* instr, const char* format);
-  int SubstituteLiteralField(Instruction* instr, const char* format);
-  int SubstituteBitfieldImmediateField(Instruction* instr, const char* format);
-  int SubstituteShiftField(Instruction* instr, const char* format);
-  int SubstituteExtendField(Instruction* instr, const char* format);
-  int SubstituteConditionField(Instruction* instr, const char* format);
-  int SubstitutePCRelAddressField(Instruction* instr, const char* format);
-  int SubstituteBranchTargetField(Instruction* instr, const char* format);
-  int SubstituteLSRegOffsetField(Instruction* instr, const char* format);
-  int SubstitutePrefetchField(Instruction* instr, const char* format);
-  int SubstituteBarrierField(Instruction* instr, const char* format);
+  void Format(
+      const Instruction* instr, const char* mnemonic, const char* format);
+  void Substitute(const Instruction* instr, const char* string);
+  int SubstituteField(const Instruction* instr, const char* format);
+  int SubstituteRegisterField(const Instruction* instr, const char* format);
+  int SubstituteImmediateField(const Instruction* instr, const char* format);
+  int SubstituteLiteralField(const Instruction* instr, const char* format);
+  int SubstituteBitfieldImmediateField(
+      const Instruction* instr, const char* format);
+  int SubstituteShiftField(const Instruction* instr, const char* format);
+  int SubstituteExtendField(const Instruction* instr, const char* format);
+  int SubstituteConditionField(const Instruction* instr, const char* format);
+  int SubstitutePCRelAddressField(const Instruction* instr, const char* format);
+  int SubstituteBranchTargetField(const Instruction* instr, const char* format);
+  int SubstituteLSRegOffsetField(const Instruction* instr, const char* format);
+  int SubstitutePrefetchField(const Instruction* instr, const char* format);
+  int SubstituteBarrierField(const Instruction* instr, const char* format);
 
-  inline bool RdIsZROrSP(Instruction* instr) const {
+  inline bool RdIsZROrSP(const Instruction* instr) const {
     return (instr->Rd() == kZeroRegCode);
   }
 
-  inline bool RnIsZROrSP(Instruction* instr) const {
+  inline bool RnIsZROrSP(const Instruction* instr) const {
     return (instr->Rn() == kZeroRegCode);
   }
 
-  inline bool RmIsZROrSP(Instruction* instr) const {
+  inline bool RmIsZROrSP(const Instruction* instr) const {
     return (instr->Rm() == kZeroRegCode);
   }
 
-  inline bool RaIsZROrSP(Instruction* instr) const {
+  inline bool RaIsZROrSP(const Instruction* instr) const {
     return (instr->Ra() == kZeroRegCode);
   }
 
   bool IsMovzMovnImm(unsigned reg_size, uint64_t value);
 
+ protected:
   void ResetOutput();
   void AppendToOutput(const char* string, ...) PRINTF_CHECK(2, 3);
 
@@ -97,10 +133,10 @@
 class PrintDisassembler: public Disassembler {
  public:
   explicit PrintDisassembler(FILE* stream) : stream_(stream) { }
-  ~PrintDisassembler() { }
+  virtual ~PrintDisassembler() { }
 
  protected:
-  virtual void ProcessOutput(Instruction* instr);
+  virtual void ProcessOutput(const Instruction* instr);
 
  private:
   FILE *stream_;
diff --git a/disas/libvixl/a64/instructions-a64.cc b/disas/libvixl/a64/instructions-a64.cc
index e9caceb..1f08c78 100644
--- a/disas/libvixl/a64/instructions-a64.cc
+++ b/disas/libvixl/a64/instructions-a64.cc
@@ -57,7 +57,7 @@
 // Logical immediates can't encode zero, so a return value of zero is used to
 // indicate a failure case. Specifically, where the constraints on imm_s are
 // not met.
-uint64_t Instruction::ImmLogical() {
+uint64_t Instruction::ImmLogical() const {
   unsigned reg_size = SixtyFourBits() ? kXRegSize : kWRegSize;
   int64_t n = BitN();
   int64_t imm_s = ImmSetBits();
@@ -108,7 +108,7 @@
 }
 
 
-float Instruction::ImmFP32() {
+float Instruction::ImmFP32() const {
   //  ImmFP: abcdefgh (8 bits)
   // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits)
   // where B is b ^ 1
@@ -122,7 +122,7 @@
 }
 
 
-double Instruction::ImmFP64() {
+double Instruction::ImmFP64() const {
   //  ImmFP: abcdefgh (8 bits)
   // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
   //         0000.0000.0000.0000.0000.0000.0000.0000 (64 bits)
@@ -148,8 +148,8 @@
 }
 
 
-Instruction* Instruction::ImmPCOffsetTarget() {
-  Instruction * base = this;
+const Instruction* Instruction::ImmPCOffsetTarget() const {
+  const Instruction * base = this;
   ptrdiff_t offset;
   if (IsPCRelAddressing()) {
     // ADR and ADRP.
@@ -182,7 +182,7 @@
 }
 
 
-void Instruction::SetImmPCOffsetTarget(Instruction* target) {
+void Instruction::SetImmPCOffsetTarget(const Instruction* target) {
   if (IsPCRelAddressing()) {
     SetPCRelImmTarget(target);
   } else {
@@ -191,7 +191,7 @@
 }
 
 
-void Instruction::SetPCRelImmTarget(Instruction* target) {
+void Instruction::SetPCRelImmTarget(const Instruction* target) {
   int32_t imm21;
   if ((Mask(PCRelAddressingMask) == ADR)) {
     imm21 = target - this;
@@ -207,7 +207,7 @@
 }
 
 
-void Instruction::SetBranchImmTarget(Instruction* target) {
+void Instruction::SetBranchImmTarget(const Instruction* target) {
   VIXL_ASSERT(((target - this) & 3) == 0);
   Instr branch_imm = 0;
   uint32_t imm_mask = 0;
@@ -239,9 +239,9 @@
 }
 
 
-void Instruction::SetImmLLiteral(Instruction* source) {
-  VIXL_ASSERT(((source - this) & 3) == 0);
-  int offset = (source - this) >> kLiteralEntrySizeLog2;
+void Instruction::SetImmLLiteral(const Instruction* source) {
+  VIXL_ASSERT(IsWordAligned(source));
+  ptrdiff_t offset = (source - this) >> kLiteralEntrySizeLog2;
   Instr imm = Assembler::ImmLLiteral(offset);
   Instr mask = ImmLLiteral_mask;
 
diff --git a/disas/libvixl/a64/instructions-a64.h b/disas/libvixl/a64/instructions-a64.h
index d5b90c5..29f9722 100644
--- a/disas/libvixl/a64/instructions-a64.h
+++ b/disas/libvixl/a64/instructions-a64.h
@@ -44,6 +44,7 @@
 // This is the nominal page size (as used by the adrp instruction); the actual
 // size of the memory pages allocated by the kernel is likely to differ.
 const unsigned kPageSize = 4 * KBytes;
+const unsigned kPageSizeLog2 = 12;
 
 const unsigned kWRegSize = 32;
 const unsigned kWRegSizeLog2 = 5;
@@ -95,30 +96,6 @@
 const unsigned kFloatMantissaBits = 23;
 const unsigned kFloatExponentBits = 8;
 
-const float kFP32PositiveInfinity = rawbits_to_float(0x7f800000);
-const float kFP32NegativeInfinity = rawbits_to_float(0xff800000);
-const double kFP64PositiveInfinity =
-    rawbits_to_double(UINT64_C(0x7ff0000000000000));
-const double kFP64NegativeInfinity =
-    rawbits_to_double(UINT64_C(0xfff0000000000000));
-
-// This value is a signalling NaN as both a double and as a float (taking the
-// least-significant word).
-static const double kFP64SignallingNaN =
-    rawbits_to_double(UINT64_C(0x7ff000007f800001));
-static const float kFP32SignallingNaN = rawbits_to_float(0x7f800001);
-
-// A similar value, but as a quiet NaN.
-static const double kFP64QuietNaN =
-    rawbits_to_double(UINT64_C(0x7ff800007fc00001));
-static const float kFP32QuietNaN = rawbits_to_float(0x7fc00001);
-
-// The default NaN values (for FPCR.DN=1).
-static const double kFP64DefaultNaN =
-    rawbits_to_double(UINT64_C(0x7ff8000000000000));
-static const float kFP32DefaultNaN = rawbits_to_float(0x7fc00000);
-
-
 enum LSDataSize {
   LSByte        = 0,
   LSHalfword    = 1,
@@ -201,9 +178,9 @@
     return signed_bitextract_32(width-1, 0, offset);
   }
 
-  uint64_t ImmLogical();
-  float ImmFP32();
-  double ImmFP64();
+  uint64_t ImmLogical() const;
+  float ImmFP32() const;
+  double ImmFP64() const;
 
   inline LSDataSize SizeLSPair() const {
     return CalcLSPairDataSize(
@@ -311,46 +288,49 @@
 
   // Find the target of this instruction. 'this' may be a branch or a
   // PC-relative addressing instruction.
-  Instruction* ImmPCOffsetTarget();
+  const Instruction* ImmPCOffsetTarget() const;
 
   // Patch a PC-relative offset to refer to 'target'. 'this' may be a branch or
   // a PC-relative addressing instruction.
-  void SetImmPCOffsetTarget(Instruction* target);
+  void SetImmPCOffsetTarget(const Instruction* target);
   // Patch a literal load instruction to load from 'source'.
-  void SetImmLLiteral(Instruction* source);
+  void SetImmLLiteral(const Instruction* source);
 
-  inline uint8_t* LiteralAddress() {
+  inline uint8_t* LiteralAddress() const {
     int offset = ImmLLiteral() << kLiteralEntrySizeLog2;
-    return reinterpret_cast<uint8_t*>(this) + offset;
+    const uint8_t* address = reinterpret_cast<const uint8_t*>(this) + offset;
+    // Note that the result is safely mutable only if the backing buffer is
+    // safely mutable.
+    return const_cast<uint8_t*>(address);
   }
 
-  inline uint32_t Literal32() {
+  inline uint32_t Literal32() const {
     uint32_t literal;
     memcpy(&literal, LiteralAddress(), sizeof(literal));
 
     return literal;
   }
 
-  inline uint64_t Literal64() {
+  inline uint64_t Literal64() const {
     uint64_t literal;
     memcpy(&literal, LiteralAddress(), sizeof(literal));
 
     return literal;
   }
 
-  inline float LiteralFP32() {
+  inline float LiteralFP32() const {
     return rawbits_to_float(Literal32());
   }
 
-  inline double LiteralFP64() {
+  inline double LiteralFP64() const {
     return rawbits_to_double(Literal64());
   }
 
-  inline Instruction* NextInstruction() {
+  inline const Instruction* NextInstruction() const {
     return this + kInstructionSize;
   }
 
-  inline Instruction* InstructionAtOffset(int64_t offset) {
+  inline const Instruction* InstructionAtOffset(int64_t offset) const {
     VIXL_ASSERT(IsWordAligned(this + offset));
     return this + offset;
   }
@@ -359,11 +339,15 @@
     return reinterpret_cast<Instruction*>(src);
   }
 
+  template<typename T> static inline const Instruction* CastConst(T src) {
+    return reinterpret_cast<const Instruction*>(src);
+  }
+
  private:
   inline int ImmBranch() const;
 
-  void SetPCRelImmTarget(Instruction* target);
-  void SetBranchImmTarget(Instruction* target);
+  void SetPCRelImmTarget(const Instruction* target);
+  void SetBranchImmTarget(const Instruction* target);
 };
 }  // namespace vixl
 
diff --git a/disas/libvixl/code-buffer.h b/disas/libvixl/code-buffer.h
new file mode 100644
index 0000000..da6233d
--- /dev/null
+++ b/disas/libvixl/code-buffer.h
@@ -0,0 +1,113 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_CODE_BUFFER_H
+#define VIXL_CODE_BUFFER_H
+
+#include <string.h>
+#include "globals.h"
+
+namespace vixl {
+
+class CodeBuffer {
+ public:
+  explicit CodeBuffer(size_t capacity = 4 * KBytes);
+  CodeBuffer(void* buffer, size_t capacity);
+  ~CodeBuffer();
+
+  void Reset();
+
+  ptrdiff_t OffsetFrom(ptrdiff_t offset) const {
+    ptrdiff_t cursor_offset = cursor_ - buffer_;
+    VIXL_ASSERT((offset >= 0) && (offset <= cursor_offset));
+    return cursor_offset - offset;
+  }
+
+  ptrdiff_t CursorOffset() const {
+    return OffsetFrom(0);
+  }
+
+  template <typename T>
+  T GetOffsetAddress(ptrdiff_t offset) const {
+    VIXL_ASSERT((offset >= 0) && (offset <= (cursor_ - buffer_)));
+    return reinterpret_cast<T>(buffer_ + offset);
+  }
+
+  size_t RemainingBytes() const {
+    VIXL_ASSERT((cursor_ >= buffer_) && (cursor_ <= (buffer_ + capacity_)));
+    return (buffer_ + capacity_) - cursor_;
+  }
+
+  // A code buffer can emit:
+  //  * 32-bit data: instruction and constant.
+  //  * 64-bit data: constant.
+  //  * string: debug info.
+  void Emit32(uint32_t data) { Emit(data); }
+
+  void Emit64(uint64_t data) { Emit(data); }
+
+  void EmitString(const char* string);
+
+  // Align to kInstructionSize.
+  void Align();
+
+  size_t capacity() const { return capacity_; }
+
+  bool IsManaged() const { return managed_; }
+
+  void Grow(size_t new_capacity);
+
+  bool IsDirty() const { return dirty_; }
+
+  void SetClean() { dirty_ = false; }
+
+ private:
+  template <typename T>
+  void Emit(T value) {
+    VIXL_ASSERT(RemainingBytes() >= sizeof(value));
+    dirty_ = true;
+    memcpy(cursor_, &value, sizeof(value));
+    cursor_ += sizeof(value);
+  }
+
+  // Backing store of the buffer.
+  byte* buffer_;
+  // If true the backing store is allocated and deallocated by the buffer. The
+  // backing store can then grow on demand. If false the backing store is
+  // provided by the user and cannot be resized internally.
+  bool managed_;
+  // Pointer to the next location to be written.
+  byte* cursor_;
+  // True if there has been any write since the buffer was created or cleaned.
+  bool dirty_;
+  // Capacity in bytes of the backing store.
+  size_t capacity_;
+};
+
+}  // namespace vixl
+
+#endif  // VIXL_CODE_BUFFER_H
+
diff --git a/disas/libvixl/utils.cc b/disas/libvixl/utils.cc
index 4d4fcbd..21965d7 100644
--- a/disas/libvixl/utils.cc
+++ b/disas/libvixl/utils.cc
@@ -134,4 +134,5 @@
 bool IsPowerOf2(int64_t value) {
   return (value != 0) && ((value & (value - 1)) == 0);
 }
+
 }  // namespace vixl
diff --git a/disas/libvixl/utils.h b/disas/libvixl/utils.h
index b472f0e..1540c30 100644
--- a/disas/libvixl/utils.h
+++ b/disas/libvixl/utils.h
@@ -171,7 +171,7 @@
 template<typename T>
 bool IsWordAligned(T pointer) {
   VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t));   // NOLINT(runtime/sizeof)
-  return (reinterpret_cast<intptr_t>(pointer) & 3) == 0;
+  return ((intptr_t)(pointer) & 3) == 0;
 }
 
 // Increment a pointer until it has the specified alignment.
@@ -204,7 +204,6 @@
   return (T)(pointer_raw - align_step);
 }
 
-
 }  // namespace vixl
 
 #endif  // VIXL_UTILS_H
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 0b1a4f7..e37bc8b 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1748,8 +1748,6 @@
 show list of VM snapshots
 @item info status
 show the current VM status (running|paused)
-@item info pcmcia
-show guest PCMCIA status
 @item info mice
 show which guest mouse is receiving events
 @item info vnc
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index c8dc34f..bffbea5 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -478,7 +478,7 @@
 
 void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
 {
-    CPUState *cs = CPU(cpu);
+    CPUState *cs;
     int kernel_size;
     int initrd_size;
     int is_linux = 0;
@@ -488,6 +488,15 @@
     int big_endian;
     static const ARMInsnFixup *primary_loader;
 
+    /* CPU objects (unlike devices) are not automatically reset on system
+     * reset, so we must always register a handler to do so. If we're
+     * actually loading a kernel, the handler is also responsible for
+     * arranging that we start it correctly.
+     */
+    for (cs = CPU(cpu); cs; cs = CPU_NEXT(cs)) {
+        qemu_register_reset(do_cpu_reset, ARM_CPU(cs));
+    }
+
     /* Load the kernel.  */
     if (!info->kernel_filename) {
 
@@ -652,9 +661,7 @@
     }
     info->is_linux = is_linux;
 
-    for (; cs; cs = CPU_NEXT(cs)) {
-        cpu = ARM_CPU(cs);
-        cpu->env.boot_info = info;
-        qemu_register_reset(do_cpu_reset, cpu);
+    for (cs = CPU(cpu); cs; cs = CPU_NEXT(cs)) {
+        ARM_CPU(cs)->env.boot_info = info;
     }
 }
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 8ea592e..78f618d 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -191,47 +191,48 @@
 
 static void fdt_add_psci_node(const VirtBoardInfo *vbi)
 {
+    uint32_t cpu_suspend_fn;
+    uint32_t cpu_off_fn;
+    uint32_t cpu_on_fn;
+    uint32_t migrate_fn;
     void *fdt = vbi->fdt;
     ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0));
 
-    /* No PSCI for TCG yet */
-    if (kvm_enabled()) {
-        uint32_t cpu_suspend_fn;
-        uint32_t cpu_off_fn;
-        uint32_t cpu_on_fn;
-        uint32_t migrate_fn;
+    qemu_fdt_add_subnode(fdt, "/psci");
+    if (armcpu->psci_version == 2) {
+        const char comp[] = "arm,psci-0.2\0arm,psci";
+        qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp));
 
-        qemu_fdt_add_subnode(fdt, "/psci");
-        if (armcpu->psci_version == 2) {
-            const char comp[] = "arm,psci-0.2\0arm,psci";
-            qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp));
-
-            cpu_off_fn = QEMU_PSCI_0_2_FN_CPU_OFF;
-            if (arm_feature(&armcpu->env, ARM_FEATURE_AARCH64)) {
-                cpu_suspend_fn = QEMU_PSCI_0_2_FN64_CPU_SUSPEND;
-                cpu_on_fn = QEMU_PSCI_0_2_FN64_CPU_ON;
-                migrate_fn = QEMU_PSCI_0_2_FN64_MIGRATE;
-            } else {
-                cpu_suspend_fn = QEMU_PSCI_0_2_FN_CPU_SUSPEND;
-                cpu_on_fn = QEMU_PSCI_0_2_FN_CPU_ON;
-                migrate_fn = QEMU_PSCI_0_2_FN_MIGRATE;
-            }
+        cpu_off_fn = QEMU_PSCI_0_2_FN_CPU_OFF;
+        if (arm_feature(&armcpu->env, ARM_FEATURE_AARCH64)) {
+            cpu_suspend_fn = QEMU_PSCI_0_2_FN64_CPU_SUSPEND;
+            cpu_on_fn = QEMU_PSCI_0_2_FN64_CPU_ON;
+            migrate_fn = QEMU_PSCI_0_2_FN64_MIGRATE;
         } else {
-            qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci");
-
-            cpu_suspend_fn = QEMU_PSCI_0_1_FN_CPU_SUSPEND;
-            cpu_off_fn = QEMU_PSCI_0_1_FN_CPU_OFF;
-            cpu_on_fn = QEMU_PSCI_0_1_FN_CPU_ON;
-            migrate_fn = QEMU_PSCI_0_1_FN_MIGRATE;
+            cpu_suspend_fn = QEMU_PSCI_0_2_FN_CPU_SUSPEND;
+            cpu_on_fn = QEMU_PSCI_0_2_FN_CPU_ON;
+            migrate_fn = QEMU_PSCI_0_2_FN_MIGRATE;
         }
+    } else {
+        qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci");
 
-        qemu_fdt_setprop_string(fdt, "/psci", "method", "hvc");
-
-        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend", cpu_suspend_fn);
-        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
-        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
-        qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
+        cpu_suspend_fn = QEMU_PSCI_0_1_FN_CPU_SUSPEND;
+        cpu_off_fn = QEMU_PSCI_0_1_FN_CPU_OFF;
+        cpu_on_fn = QEMU_PSCI_0_1_FN_CPU_ON;
+        migrate_fn = QEMU_PSCI_0_1_FN_MIGRATE;
     }
+
+    /* We adopt the PSCI spec's nomenclature, and use 'conduit' to refer
+     * to the instruction that should be used to invoke PSCI functions.
+     * However, the device tree binding uses 'method' instead, so that is
+     * what we should use here.
+     */
+    qemu_fdt_setprop_string(fdt, "/psci", "method", "hvc");
+
+    qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend", cpu_suspend_fn);
+    qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
+    qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
+    qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
 }
 
 static void fdt_add_timer_nodes(const VirtBoardInfo *vbi)
@@ -240,14 +241,23 @@
      * but for the GIC implementation provided by both QEMU and KVM
      * they are edge-triggered.
      */
+    ARMCPU *armcpu;
     uint32_t irqflags = GIC_FDT_IRQ_FLAGS_EDGE_LO_HI;
 
     irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
                          GIC_FDT_IRQ_PPI_CPU_WIDTH, (1 << vbi->smp_cpus) - 1);
 
     qemu_fdt_add_subnode(vbi->fdt, "/timer");
-    qemu_fdt_setprop_string(vbi->fdt, "/timer",
-                                "compatible", "arm,armv7-timer");
+
+    armcpu = ARM_CPU(qemu_get_cpu(0));
+    if (arm_feature(&armcpu->env, ARM_FEATURE_V8)) {
+        const char compat[] = "arm,armv8-timer\0arm,armv7-timer";
+        qemu_fdt_setprop(vbi->fdt, "/timer", "compatible",
+                         compat, sizeof(compat));
+    } else {
+        qemu_fdt_setprop_string(vbi->fdt, "/timer", "compatible",
+                                "arm,armv7-timer");
+    }
     qemu_fdt_setprop_cells(vbi->fdt, "/timer", "interrupts",
                                GIC_FDT_IRQ_TYPE_PPI, 13, irqflags,
                                GIC_FDT_IRQ_TYPE_PPI, 14, irqflags,
@@ -539,23 +549,12 @@
 
     vbi->smp_cpus = smp_cpus;
 
-    /*
-     * Only supported method of starting secondary CPUs is PSCI and
-     * PSCI is not yet supported with TCG, so limit smp_cpus to 1
-     * if we're not using KVM.
-     */
-    if (!kvm_enabled() && smp_cpus > 1) {
-        error_report("mach-virt: must enable KVM to use multiple CPUs");
-        exit(1);
-    }
-
     if (machine->ram_size > vbi->memmap[VIRT_MEM].size) {
         error_report("mach-virt: cannot model more than 30GB RAM");
         exit(1);
     }
 
     create_fdt(vbi);
-    fdt_add_timer_nodes(vbi);
 
     for (n = 0; n < smp_cpus; n++) {
         ObjectClass *oc = cpu_class_by_name(TYPE_ARM_CPU, cpu_model);
@@ -567,6 +566,9 @@
         }
         cpuobj = object_new(object_class_get_name(oc));
 
+        object_property_set_int(cpuobj, QEMU_PSCI_CONDUIT_HVC, "psci-conduit",
+                                NULL);
+
         /* Secondary CPUs start in PSCI powered-down state */
         if (n > 0) {
             object_property_set_bool(cpuobj, true, "start-powered-off", NULL);
@@ -579,6 +581,7 @@
 
         object_property_set_bool(cpuobj, true, "realized", NULL);
     }
+    fdt_add_timer_nodes(vbi);
     fdt_add_cpu_nodes(vbi);
     fdt_add_psci_node(vbi);
 
diff --git a/hw/ide/microdrive.c b/hw/ide/microdrive.c
index ed85185..6639dd4 100644
--- a/hw/ide/microdrive.c
+++ b/hw/ide/microdrive.c
@@ -543,7 +543,6 @@
     device_reset(DEVICE(md));
     md_interrupt_update(md);
 
-    card->slot->card_string = "DSCM-1xxxx Hitachi Microdrive";
     return 0;
 }
 
diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index db9110c..270ce05 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -769,7 +769,7 @@
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-void gic_init_irqs_and_distributor(GICState *s, int num_irq)
+void gic_init_irqs_and_distributor(GICState *s)
 {
     SysBusDevice *sbd = SYS_BUS_DEVICE(s);
     int i;
@@ -808,7 +808,7 @@
         return;
     }
 
-    gic_init_irqs_and_distributor(s, s->num_irq);
+    gic_init_irqs_and_distributor(s);
 
     /* Memory regions for the CPU interfaces (NVIC doesn't have these):
      * a region for "CPU interface for this core", then a region for
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 1a7af45..d0543d4 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -488,7 +488,7 @@
         error_propagate(errp, local_err);
         return;
     }
-    gic_init_irqs_and_distributor(&s->gic, s->num_irq);
+    gic_init_irqs_and_distributor(&s->gic);
     /* The NVIC and system controller register area looks like this:
      *  0..0xff : system control registers, including systick
      *  0x100..0xcff : GIC-like registers
diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
index 48a58d7..e87ef36 100644
--- a/hw/intc/gic_internal.h
+++ b/hw/intc/gic_internal.h
@@ -59,7 +59,7 @@
 uint32_t gic_acknowledge_irq(GICState *s, int cpu);
 void gic_complete_irq(GICState *s, int cpu, int irq);
 void gic_update(GICState *s);
-void gic_init_irqs_and_distributor(GICState *s, int num_irq);
+void gic_init_irqs_and_distributor(GICState *s);
 void gic_set_priority(GICState *s, int cpu, int irq, uint8_t val);
 
 static inline bool gic_test_pending(GICState *s, int irq, int cm)
diff --git a/hw/misc/omap_gpmc.c b/hw/misc/omap_gpmc.c
index fbbe2ff..a0de52f 100644
--- a/hw/misc/omap_gpmc.c
+++ b/hw/misc/omap_gpmc.c
@@ -466,8 +466,6 @@
         s->cs_file[i].config[3] = 0x10031003;
         s->cs_file[i].config[4] = 0x10f1111;
         s->cs_file[i].config[5] = 0;
-        s->cs_file[i].config[6] = 0xf00 | (i ? 0 : 1 << 6);
-
         s->cs_file[i].config[6] = 0xf00;
         /* In theory we could probe attached devices for some CFG1
          * bits here, but we just retain them across resets as they
diff --git a/hw/pcmcia/pxa2xx.c b/hw/pcmcia/pxa2xx.c
index 55e8a2a..a7e1877 100644
--- a/hw/pcmcia/pxa2xx.c
+++ b/hw/pcmcia/pxa2xx.c
@@ -149,24 +149,11 @@
     sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
     s = PXA2XX_PCMCIA(dev);
 
-    if (base == 0x30000000) {
-        s->slot.slot_string = "PXA PC Card Socket 1";
-    } else {
-        s->slot.slot_string = "PXA PC Card Socket 0";
-    }
-
     qdev_init_nofail(dev);
 
     return s;
 }
 
-static void pxa2xx_pcmcia_realize(DeviceState *dev, Error **errp)
-{
-    PXA2xxPCMCIAState *s = PXA2XX_PCMCIA(dev);
-
-    pcmcia_socket_register(&s->slot);
-}
-
 static void pxa2xx_pcmcia_initfn(Object *obj)
 {
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
@@ -262,19 +249,11 @@
     s->cd_irq = cd_irq;
 }
 
-static void pxa2xx_pcmcia_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = pxa2xx_pcmcia_realize;
-}
-
 static const TypeInfo pxa2xx_pcmcia_type_info = {
     .name = TYPE_PXA2XX_PCMCIA,
     .parent = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(PXA2xxPCMCIAState),
     .instance_init = pxa2xx_pcmcia_initfn,
-    .class_init = pxa2xx_pcmcia_class_init,
 };
 
 static void pxa2xx_pcmcia_register_types(void)
diff --git a/include/hw/pcmcia.h b/include/hw/pcmcia.h
index 2695d3c..98406ff 100644
--- a/include/hw/pcmcia.h
+++ b/include/hw/pcmcia.h
@@ -8,14 +8,8 @@
 typedef struct PCMCIASocket {
     qemu_irq irq;
     bool attached;
-    const char *slot_string;
-    const char *card_string;
 } PCMCIASocket;
 
-void pcmcia_socket_register(PCMCIASocket *socket);
-void pcmcia_socket_unregister(PCMCIASocket *socket);
-void pcmcia_info(Monitor *mon, const QDict *qdict);
-
 #define TYPE_PCMCIA_CARD "pcmcia-card"
 #define PCMCIA_CARD(obj) \
     OBJECT_CHECK(PCMCIACardState, (obj), TYPE_PCMCIA_CARD)
diff --git a/monitor.c b/monitor.c
index 07fb36e..1fc201a 100644
--- a/monitor.c
+++ b/monitor.c
@@ -25,7 +25,6 @@
 #include "hw/hw.h"
 #include "monitor/qdev.h"
 #include "hw/usb.h"
-#include "hw/pcmcia.h"
 #include "hw/i386/pc.h"
 #include "hw/pci/pci.h"
 #include "sysemu/watchdog.h"
@@ -2792,13 +2791,6 @@
         .mhandler.cmd = hmp_info_status,
     },
     {
-        .name       = "pcmcia",
-        .args_type  = "",
-        .params     = "",
-        .help       = "show guest PCMCIA status",
-        .mhandler.cmd = pcmcia_info,
-    },
-    {
         .name       = "mice",
         .args_type  = "",
         .params     = "",
diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
index dcd167e..9460b40 100644
--- a/target-arm/Makefile.objs
+++ b/target-arm/Makefile.objs
@@ -7,5 +7,6 @@
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-y += neon_helper.o iwmmxt_helper.o
 obj-y += gdbstub.o
+obj-$(CONFIG_SOFTMMU) += psci.o
 obj-$(TARGET_AARCH64) += cpu64.o translate-a64.o helper-a64.o gdbstub64.o
 obj-y += crypto_helper.o
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 96a3da9..dcfda7d 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -98,6 +98,13 @@
 
     /* Should CPU start in PSCI powered-off state? */
     bool start_powered_off;
+    /* CPU currently in PSCI powered-off state */
+    bool powered_off;
+
+    /* PSCI conduit used to invoke PSCI methods
+     * 0 - disabled, 1 - smc, 2 - hvc
+     */
+    uint32_t psci_conduit;
 
     /* [QEMU_]KVM_ARM_TARGET_* constant for this CPU, or
      * QEMU_KVM_ARM_TARGET_NONE if the kernel doesn't support this CPU type.
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index edfd586..e0b82a6 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -40,7 +40,10 @@
 
 static bool arm_cpu_has_work(CPUState *cs)
 {
-    return cs->interrupt_request &
+    ARMCPU *cpu = ARM_CPU(cs);
+
+    return !cpu->powered_off
+        && cs->interrupt_request &
         (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD
          | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ
          | CPU_INTERRUPT_EXITTB);
@@ -93,6 +96,9 @@
     env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1;
     env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2;
 
+    cpu->powered_off = cpu->start_powered_off;
+    s->halted = cpu->start_powered_off;
+
     if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
         env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q';
     }
@@ -102,8 +108,8 @@
         env->aarch64 = 1;
 #if defined(CONFIG_USER_ONLY)
         env->pstate = PSTATE_MODE_EL0t;
-        /* Userspace expects access to CTL_EL0 and the cache ops */
-        env->cp15.c1_sys |= SCTLR_UCT | SCTLR_UCI;
+        /* Userspace expects access to DC ZVA, CTL_EL0 and the cache ops */
+        env->cp15.c1_sys |= SCTLR_UCT | SCTLR_UCI | SCTLR_DZE;
         /* and to the FP/Neon instructions */
         env->cp15.c1_coproc = deposit64(env->cp15.c1_coproc, 20, 2, 3);
 #else
@@ -328,9 +334,12 @@
     cpu->psci_version = 1; /* By default assume PSCI v0.1 */
     cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE;
 
-    if (tcg_enabled() && !inited) {
-        inited = true;
-        arm_translate_init();
+    if (tcg_enabled()) {
+        cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
+        if (!inited) {
+            inited = true;
+            arm_translate_init();
+        }
     }
 }
 
@@ -1084,6 +1093,7 @@
 
 static Property arm_cpu_properties[] = {
     DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false),
+    DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0),
     DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
     DEFINE_PROP_END_OF_LIST()
 };
@@ -1103,7 +1113,6 @@
 
     cc->class_by_name = arm_cpu_class_by_name;
     cc->has_work = arm_cpu_has_work;
-    cc->do_interrupt = arm_cpu_do_interrupt;
     cc->cpu_exec_interrupt = arm_cpu_exec_interrupt;
     cc->dump_state = arm_cpu_dump_state;
     cc->set_pc = arm_cpu_set_pc;
@@ -1112,6 +1121,7 @@
 #ifdef CONFIG_USER_ONLY
     cc->handle_mmu_fault = arm_cpu_handle_mmu_fault;
 #else
+    cc->do_interrupt = arm_cpu_do_interrupt;
     cc->get_phys_page_debug = arm_cpu_get_phys_page_debug;
     cc->vmsd = &vmstate_arm_cpu;
 #endif
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 65a3417..cb6ec5c 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -100,7 +100,7 @@
 
 struct arm_boot_info;
 
-#define NB_MMU_MODES 2
+#define NB_MMU_MODES 4
 
 /* We currently assume float and double are IEEE single and double
    precision respectively.
@@ -153,8 +153,8 @@
 
     /* Banked registers.  */
     uint64_t banked_spsr[8];
-    uint32_t banked_r13[6];
-    uint32_t banked_r14[6];
+    uint32_t banked_r13[8];
+    uint32_t banked_r14[8];
 
     /* These hold r8-r12.  */
     uint32_t usr_regs[5];
@@ -753,14 +753,62 @@
     return (env->features & (1ULL << feature)) != 0;
 }
 
+#if !defined(CONFIG_USER_ONLY)
+/* Return true if exception levels below EL3 are in secure state,
+ * or would be following an exception return to that level.
+ * Unlike arm_is_secure() (which is always a question about the
+ * _current_ state of the CPU) this doesn't care about the current
+ * EL or mode.
+ */
+static inline bool arm_is_secure_below_el3(CPUARMState *env)
+{
+    if (arm_feature(env, ARM_FEATURE_EL3)) {
+        return !(env->cp15.scr_el3 & SCR_NS);
+    } else {
+        /* If EL2 is not supported then the secure state is implementation
+         * defined, in which case QEMU defaults to non-secure.
+         */
+        return false;
+    }
+}
+
+/* Return true if the processor is in secure state */
+static inline bool arm_is_secure(CPUARMState *env)
+{
+    if (arm_feature(env, ARM_FEATURE_EL3)) {
+        if (is_a64(env) && extract32(env->pstate, 2, 2) == 3) {
+            /* CPU currently in AArch64 state and EL3 */
+            return true;
+        } else if (!is_a64(env) &&
+                (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_MON) {
+            /* CPU currently in AArch32 state and monitor mode */
+            return true;
+        }
+    }
+    return arm_is_secure_below_el3(env);
+}
+
+#else
+static inline bool arm_is_secure_below_el3(CPUARMState *env)
+{
+    return false;
+}
+
+static inline bool arm_is_secure(CPUARMState *env)
+{
+    return false;
+}
+#endif
+
 /* Return true if the specified exception level is running in AArch64 state. */
 static inline bool arm_el_is_aa64(CPUARMState *env, int el)
 {
-    /* We don't currently support EL2 or EL3, and this isn't valid for EL0
+    /* We don't currently support EL2, and this isn't valid for EL0
      * (if we're in EL0, is_a64() is what you want, and if we're not in EL0
      * then the state of EL0 isn't well defined.)
      */
-    assert(el == 1);
+    assert(el == 1 || el == 3);
+
     /* AArch64-capable CPUs always run with EL1 in AArch64 mode. This
      * is a QEMU-imposed simplification which we may wish to change later.
      * If we in future support EL2 and/or EL3, then the state of lower
@@ -944,19 +992,32 @@
 #define PL1_RW (PL1_R | PL1_W)
 #define PL0_RW (PL0_R | PL0_W)
 
-static inline int arm_current_pl(CPUARMState *env)
+/* Return the current Exception Level (as per ARMv8; note that this differs
+ * from the ARMv7 Privilege Level).
+ */
+static inline int arm_current_el(CPUARMState *env)
 {
-    if (env->aarch64) {
+    if (is_a64(env)) {
         return extract32(env->pstate, 2, 2);
     }
 
-    if ((env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_USR) {
+    switch (env->uncached_cpsr & 0x1f) {
+    case ARM_CPU_MODE_USR:
         return 0;
+    case ARM_CPU_MODE_HYP:
+        return 2;
+    case ARM_CPU_MODE_MON:
+        return 3;
+    default:
+        if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
+            /* If EL3 is 32-bit then all secure privileged modes run in
+             * EL3
+             */
+            return 3;
+        }
+
+        return 1;
     }
-    /* We don't currently implement the Virtualization or TrustZone
-     * extensions, so PL2 and PL3 don't exist for us.
-     */
-    return 1;
 }
 
 typedef struct ARMCPRegInfo ARMCPRegInfo;
@@ -1117,10 +1178,10 @@
     return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT);
 }
 
-static inline bool cp_access_ok(int current_pl,
+static inline bool cp_access_ok(int current_el,
                                 const ARMCPRegInfo *ri, int isread)
 {
-    return (ri->access >> ((current_pl * 2) + isread)) & 1;
+    return (ri->access >> ((current_el * 2) + isread)) & 1;
 }
 
 /**
@@ -1184,7 +1245,7 @@
 static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx)
 {
     CPUARMState *env = cs->env_ptr;
-    unsigned int cur_el = arm_current_pl(env);
+    unsigned int cur_el = arm_current_el(env);
     unsigned int target_el = arm_excp_target_el(cs, excp_idx);
     /* FIXME: Use actual secure state.  */
     bool secure = false;
@@ -1256,7 +1317,7 @@
 #define MMU_USER_IDX 0
 static inline int cpu_mmu_index (CPUARMState *env)
 {
-    return arm_current_pl(env);
+    return arm_current_el(env);
 }
 
 /* Return the Exception Level targeted by debug exceptions;
@@ -1269,7 +1330,7 @@
 
 static inline bool aa64_generate_debug_exceptions(CPUARMState *env)
 {
-    if (arm_current_pl(env) == arm_debug_target_el(env)) {
+    if (arm_current_el(env) == arm_debug_target_el(env)) {
         if ((extract32(env->cp15.mdscr_el1, 13, 1) == 0)
             || (env->daif & PSTATE_D)) {
             return false;
@@ -1280,10 +1341,10 @@
 
 static inline bool aa32_generate_debug_exceptions(CPUARMState *env)
 {
-    if (arm_current_pl(env) == 0 && arm_el_is_aa64(env, 1)) {
+    if (arm_current_el(env) == 0 && arm_el_is_aa64(env, 1)) {
         return aa64_generate_debug_exceptions(env);
     }
-    return arm_current_pl(env) != 2;
+    return arm_current_el(env) != 2;
 }
 
 /* Return true if debugging exceptions are currently enabled.
@@ -1413,8 +1474,8 @@
     if (is_a64(env)) {
         *pc = env->pc;
         *flags = ARM_TBFLAG_AARCH64_STATE_MASK
-            | (arm_current_pl(env) << ARM_TBFLAG_AA64_EL_SHIFT);
-        if (fpen == 3 || (fpen == 1 && arm_current_pl(env) != 0)) {
+            | (arm_current_el(env) << ARM_TBFLAG_AA64_EL_SHIFT);
+        if (fpen == 3 || (fpen == 1 && arm_current_el(env) != 0)) {
             *flags |= ARM_TBFLAG_AA64_FPEN_MASK;
         }
         /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
@@ -1450,7 +1511,7 @@
             || arm_el_is_aa64(env, 1)) {
             *flags |= ARM_TBFLAG_VFPEN_MASK;
         }
-        if (fpen == 3 || (fpen == 1 && arm_current_pl(env) != 0)) {
+        if (fpen == 3 || (fpen == 1 && arm_current_el(env) != 0)) {
             *flags |= ARM_TBFLAG_CPACR_FPEN_MASK;
         }
         /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
@@ -1484,4 +1545,10 @@
     }
 }
 
+enum {
+    QEMU_PSCI_CONDUIT_DISABLED = 0,
+    QEMU_PSCI_CONDUIT_SMC = 1,
+    QEMU_PSCI_CONDUIT_HVC = 2,
+};
+
 #endif
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index c30f47e..bb778b3 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -151,7 +151,7 @@
     set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
     set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
     set_feature(&cpu->env, ARM_FEATURE_CRC);
-    cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
+    cpu->ctr = 0x80038003; /* 32 byte I and D cacheline size, VIPT icache */
     cpu->dcz_blocksize = 7; /*  512 bytes */
 }
 #endif
@@ -196,7 +196,9 @@
 {
     CPUClass *cc = CPU_CLASS(oc);
 
+#if !defined(CONFIG_USER_ONLY)
     cc->do_interrupt = aarch64_cpu_do_interrupt;
+#endif
     cc->cpu_exec_interrupt = arm_cpu_exec_interrupt;
     cc->set_pc = aarch64_cpu_set_pc;
     cc->gdb_read_register = aarch64_cpu_gdb_read_register;
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index 8228e29..81066ca 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -438,6 +438,8 @@
     return crc32c(acc, buf, bytes) ^ 0xffffffff;
 }
 
+#if !defined(CONFIG_USER_ONLY)
+
 /* Handle a CPU exception.  */
 void aarch64_cpu_do_interrupt(CPUState *cs)
 {
@@ -448,7 +450,7 @@
     unsigned int new_mode = aarch64_pstate_mode(new_el, true);
     int i;
 
-    if (arm_current_pl(env) < new_el) {
+    if (arm_current_el(env) < new_el) {
         if (env->aarch64) {
             addr += 0x400;
         } else {
@@ -459,13 +461,19 @@
     }
 
     arm_log_exception(cs->exception_index);
-    qemu_log_mask(CPU_LOG_INT, "...from EL%d\n", arm_current_pl(env));
+    qemu_log_mask(CPU_LOG_INT, "...from EL%d\n", arm_current_el(env));
     if (qemu_loglevel_mask(CPU_LOG_INT)
         && !excp_is_internal(cs->exception_index)) {
         qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%" PRIx32 "\n",
                       env->exception.syndrome);
     }
 
+    if (arm_is_psci_call(cpu, cs->exception_index)) {
+        arm_handle_psci_call(cpu);
+        qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n");
+        return;
+    }
+
     switch (cs->exception_index) {
     case EXCP_PREFETCH_ABORT:
     case EXCP_DATA_ABORT:
@@ -495,7 +503,7 @@
 
     if (is_a64(env)) {
         env->banked_spsr[aarch64_banked_spsr_index(new_el)] = pstate_read(env);
-        aarch64_save_sp(env, arm_current_pl(env));
+        aarch64_save_sp(env, arm_current_el(env));
         env->elr_el[new_el] = env->pc;
     } else {
         env->banked_spsr[0] = cpsr_read(env);
@@ -518,3 +526,4 @@
     env->pc = addr;
     cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
 }
+#endif
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 2669e15..c47487a 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -571,7 +571,7 @@
     /* Performance monitor registers user accessibility is controlled
      * by PMUSERENR.
      */
-    if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
+    if (arm_current_el(env) == 0 && !env->cp15.c9_pmuserenr) {
         return CP_ACCESS_TRAP;
     }
     return CP_ACCESS_OK;
@@ -996,7 +996,7 @@
 
 static CPAccessResult teehbr_access(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    if (arm_current_pl(env) == 0 && (env->teecr & 1)) {
+    if (arm_current_el(env) == 0 && (env->teecr & 1)) {
         return CP_ACCESS_TRAP;
     }
     return CP_ACCESS_OK;
@@ -1042,7 +1042,7 @@
 static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     /* CNTFRQ: not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero */
-    if (arm_current_pl(env) == 0 && !extract32(env->cp15.c14_cntkctl, 0, 2)) {
+    if (arm_current_el(env) == 0 && !extract32(env->cp15.c14_cntkctl, 0, 2)) {
         return CP_ACCESS_TRAP;
     }
     return CP_ACCESS_OK;
@@ -1051,7 +1051,7 @@
 static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx)
 {
     /* CNT[PV]CT: not visible from PL0 if ELO[PV]CTEN is zero */
-    if (arm_current_pl(env) == 0 &&
+    if (arm_current_el(env) == 0 &&
         !extract32(env->cp15.c14_cntkctl, timeridx, 1)) {
         return CP_ACCESS_TRAP;
     }
@@ -1063,7 +1063,7 @@
     /* CNT[PV]_CVAL, CNT[PV]_CTL, CNT[PV]_TVAL: not visible from PL0 if
      * EL0[PV]TEN is zero.
      */
-    if (arm_current_pl(env) == 0 &&
+    if (arm_current_el(env) == 0 &&
         !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
         return CP_ACCESS_TRAP;
     }
@@ -1911,7 +1911,7 @@
 
 static CPAccessResult aa64_daif_access(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) {
+    if (arm_current_el(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) {
         return CP_ACCESS_TRAP;
     }
     return CP_ACCESS_OK;
@@ -1929,7 +1929,7 @@
     /* Cache invalidate/clean: NOP, but EL0 must UNDEF unless
      * SCTLR_EL1.UCI is set.
      */
-    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCI)) {
+    if (arm_current_el(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCI)) {
         return CP_ACCESS_TRAP;
     }
     return CP_ACCESS_OK;
@@ -2006,7 +2006,7 @@
     /* We don't implement EL2, so the only control on DC ZVA is the
      * bit in the SCTLR which can prohibit access for EL0.
      */
-    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_DZE)) {
+    if (arm_current_el(env) == 0 && !(env->cp15.c1_sys & SCTLR_DZE)) {
         return CP_ACCESS_TRAP;
     }
     return CP_ACCESS_OK;
@@ -2018,7 +2018,7 @@
     int dzp_bit = 1 << 4;
 
     /* DZP indicates whether DC ZVA access is allowed */
-    if (aa64_zva_access(env, NULL) != CP_ACCESS_OK) {
+    if (aa64_zva_access(env, NULL) == CP_ACCESS_OK) {
         dzp_bit = 0;
     }
     return cpu->dcz_blocksize | dzp_bit;
@@ -2366,7 +2366,7 @@
     /* Only accessible in EL0 if SCTLR.UCT is set (and only in AArch64,
      * but the AArch32 CTR has its own reginfo struct)
      */
-    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCT)) {
+    if (arm_current_el(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCT)) {
         return CP_ACCESS_TRAP;
     }
     return CP_ACCESS_OK;
@@ -3531,6 +3531,8 @@
     case ARM_CPU_MODE_IRQ:
     case ARM_CPU_MODE_FIQ:
         return 0;
+    case ARM_CPU_MODE_MON:
+        return !arm_is_secure(env);
     default:
         return 1;
     }
@@ -3644,11 +3646,6 @@
 
 #if defined(CONFIG_USER_ONLY)
 
-void arm_cpu_do_interrupt(CPUState *cs)
-{
-    cs->exception_index = -1;
-}
-
 int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int rw,
                              int mmu_idx)
 {
@@ -3771,7 +3768,7 @@
 {
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
-    unsigned int cur_el = arm_current_pl(env);
+    unsigned int cur_el = arm_current_el(env);
     unsigned int target_el;
     /* FIXME: Use actual secure state.  */
     bool secure = false;
@@ -3975,6 +3972,12 @@
 
     arm_log_exception(cs->exception_index);
 
+    if (arm_is_psci_call(cpu, cs->exception_index)) {
+        arm_handle_psci_call(cpu);
+        qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n");
+        return;
+    }
+
     /* If this is a debug exception we must update the DBGDSCR.MOE bits */
     switch (env->exception.syndrome >> ARM_EL_EC_SHIFT) {
     case EC_BREAKPOINT:
@@ -4088,6 +4091,12 @@
         mask = CPSR_A | CPSR_I | CPSR_F;
         offset = 4;
         break;
+    case EXCP_SMC:
+        new_mode = ARM_CPU_MODE_MON;
+        addr = 0x08;
+        mask = CPSR_A | CPSR_I | CPSR_F;
+        offset = 0;
+        break;
     default:
         cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
         return; /* Never happens.  Keep compiler happy.  */
@@ -4106,6 +4115,11 @@
          */
         addr += env->cp15.vbar_el[1];
     }
+
+    if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_MON) {
+        env->cp15.scr_el3 &= ~SCR_NS;
+    }
+
     switch_mode (env, new_mode);
     /* For exceptions taken to AArch32 we must clear the SS bit in both
      * PSTATE and in the old-state value we save to SPSR_<mode>, so zero it now.
@@ -4767,7 +4781,7 @@
     int prot;
     int ret, is_user;
     uint32_t syn;
-    bool same_el = (arm_current_pl(env) != 0);
+    bool same_el = (arm_current_el(env) != 0);
 
     is_user = mmu_idx == MMU_USER_IDX;
     ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot,
diff --git a/target-arm/internals.h b/target-arm/internals.h
index b7547bb..2dff4ff 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h
@@ -130,7 +130,7 @@
 
 static inline void update_spsel(CPUARMState *env, uint32_t imm)
 {
-    unsigned int cur_el = arm_current_pl(env);
+    unsigned int cur_el = arm_current_el(env);
     /* Update PSTATE SPSel bit; this requires us to update the
      * working stack pointer in xregs[31].
      */
@@ -236,6 +236,16 @@
         | (is_thumb ? 0 : ARM_EL_IL);
 }
 
+static inline uint32_t syn_aa32_hvc(uint32_t imm16)
+{
+    return (EC_AA32_HVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
+}
+
+static inline uint32_t syn_aa32_smc(void)
+{
+    return (EC_AA32_SMC << ARM_EL_EC_SHIFT) | ARM_EL_IL;
+}
+
 static inline uint32_t syn_aa64_bkpt(uint32_t imm16)
 {
     return (EC_AA64_BKPT << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
@@ -356,4 +366,16 @@
 /* Callback function for when a watchpoint or breakpoint triggers. */
 void arm_debug_excp_handler(CPUState *cs);
 
+#ifdef CONFIG_USER_ONLY
+static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type)
+{
+    return false;
+}
+#else
+/* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. */
+bool arm_is_psci_call(ARMCPU *cpu, int excp_type);
+/* Actually handle a PSCI call */
+void arm_handle_psci_call(ARMCPU *cpu);
+#endif
+
 #endif
diff --git a/target-arm/kvm-consts.h b/target-arm/kvm-consts.h
index 091c126..aea12f1 100644
--- a/target-arm/kvm-consts.h
+++ b/target-arm/kvm-consts.h
@@ -59,14 +59,21 @@
         (QEMU_PSCI_0_2_FN_BASE + QEMU_PSCI_0_2_64BIT)
 #define QEMU_PSCI_0_2_FN64(n) (QEMU_PSCI_0_2_FN64_BASE + (n))
 
+#define QEMU_PSCI_0_2_FN_PSCI_VERSION QEMU_PSCI_0_2_FN(0)
 #define QEMU_PSCI_0_2_FN_CPU_SUSPEND QEMU_PSCI_0_2_FN(1)
 #define QEMU_PSCI_0_2_FN_CPU_OFF QEMU_PSCI_0_2_FN(2)
 #define QEMU_PSCI_0_2_FN_CPU_ON QEMU_PSCI_0_2_FN(3)
+#define QEMU_PSCI_0_2_FN_AFFINITY_INFO QEMU_PSCI_0_2_FN(4)
 #define QEMU_PSCI_0_2_FN_MIGRATE QEMU_PSCI_0_2_FN(5)
+#define QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE QEMU_PSCI_0_2_FN(6)
+#define QEMU_PSCI_0_2_FN_MIGRATE_INFO_UP_CPU QEMU_PSCI_0_2_FN(7)
+#define QEMU_PSCI_0_2_FN_SYSTEM_OFF QEMU_PSCI_0_2_FN(8)
+#define QEMU_PSCI_0_2_FN_SYSTEM_RESET QEMU_PSCI_0_2_FN(9)
 
 #define QEMU_PSCI_0_2_FN64_CPU_SUSPEND QEMU_PSCI_0_2_FN64(1)
 #define QEMU_PSCI_0_2_FN64_CPU_OFF QEMU_PSCI_0_2_FN64(2)
 #define QEMU_PSCI_0_2_FN64_CPU_ON QEMU_PSCI_0_2_FN64(3)
+#define QEMU_PSCI_0_2_FN64_AFFINITY_INFO QEMU_PSCI_0_2_FN64(4)
 #define QEMU_PSCI_0_2_FN64_MIGRATE QEMU_PSCI_0_2_FN64(5)
 
 MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_SUSPEND, PSCI_0_2_FN_CPU_SUSPEND)
@@ -77,6 +84,39 @@
 MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_ON, PSCI_0_2_FN64_CPU_ON)
 MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_MIGRATE, PSCI_0_2_FN64_MIGRATE)
 
+/* PSCI v0.2 return values used by TCG emulation of PSCI */
+
+/* No Trusted OS migration to worry about when offlining CPUs */
+#define QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED        2
+
+/* We implement version 0.2 only */
+#define QEMU_PSCI_0_2_RET_VERSION_0_2                       2
+
+MISMATCH_CHECK(QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED, PSCI_0_2_TOS_MP)
+MISMATCH_CHECK(QEMU_PSCI_0_2_RET_VERSION_0_2,
+               (PSCI_VERSION_MAJOR(0) | PSCI_VERSION_MINOR(2)))
+
+/* PSCI return values (inclusive of all PSCI versions) */
+#define QEMU_PSCI_RET_SUCCESS                     0
+#define QEMU_PSCI_RET_NOT_SUPPORTED               -1
+#define QEMU_PSCI_RET_INVALID_PARAMS              -2
+#define QEMU_PSCI_RET_DENIED                      -3
+#define QEMU_PSCI_RET_ALREADY_ON                  -4
+#define QEMU_PSCI_RET_ON_PENDING                  -5
+#define QEMU_PSCI_RET_INTERNAL_FAILURE            -6
+#define QEMU_PSCI_RET_NOT_PRESENT                 -7
+#define QEMU_PSCI_RET_DISABLED                    -8
+
+MISMATCH_CHECK(QEMU_PSCI_RET_SUCCESS, PSCI_RET_SUCCESS)
+MISMATCH_CHECK(QEMU_PSCI_RET_NOT_SUPPORTED, PSCI_RET_NOT_SUPPORTED)
+MISMATCH_CHECK(QEMU_PSCI_RET_INVALID_PARAMS, PSCI_RET_INVALID_PARAMS)
+MISMATCH_CHECK(QEMU_PSCI_RET_DENIED, PSCI_RET_DENIED)
+MISMATCH_CHECK(QEMU_PSCI_RET_ALREADY_ON, PSCI_RET_ALREADY_ON)
+MISMATCH_CHECK(QEMU_PSCI_RET_ON_PENDING, PSCI_RET_ON_PENDING)
+MISMATCH_CHECK(QEMU_PSCI_RET_INTERNAL_FAILURE, PSCI_RET_INTERNAL_FAILURE)
+MISMATCH_CHECK(QEMU_PSCI_RET_NOT_PRESENT, PSCI_RET_NOT_PRESENT)
+MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED)
+
 /* Note that KVM uses overlapping values for AArch32 and AArch64
  * target CPU numbers. AArch32 targets:
  */
diff --git a/target-arm/machine.c b/target-arm/machine.c
index ddb7d05..6437690 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -222,8 +222,8 @@
 
 const VMStateDescription vmstate_arm_cpu = {
     .name = "cpu",
-    .version_id = 20,
-    .minimum_version_id = 20,
+    .version_id = 21,
+    .minimum_version_id = 21,
     .pre_save = cpu_pre_save,
     .post_load = cpu_post_load,
     .fields = (VMStateField[]) {
@@ -238,8 +238,8 @@
         },
         VMSTATE_UINT32(env.spsr, ARMCPU),
         VMSTATE_UINT64_ARRAY(env.banked_spsr, ARMCPU, 8),
-        VMSTATE_UINT32_ARRAY(env.banked_r13, ARMCPU, 6),
-        VMSTATE_UINT32_ARRAY(env.banked_r14, ARMCPU, 6),
+        VMSTATE_UINT32_ARRAY(env.banked_r13, ARMCPU, 8),
+        VMSTATE_UINT32_ARRAY(env.banked_r14, ARMCPU, 8),
         VMSTATE_UINT32_ARRAY(env.usr_regs, ARMCPU, 5),
         VMSTATE_UINT32_ARRAY(env.fiq_regs, ARMCPU, 5),
         VMSTATE_UINT64_ARRAY(env.elr_el, ARMCPU, 4),
@@ -263,6 +263,7 @@
         VMSTATE_UINT64(env.exception.vaddress, ARMCPU),
         VMSTATE_TIMER(gt_timer[GTIMER_PHYS], ARMCPU),
         VMSTATE_TIMER(gt_timer[GTIMER_VIRT], ARMCPU),
+        VMSTATE_BOOL(powered_off, ARMCPU),
         VMSTATE_END_OF_LIST()
     },
     .subsections = (VMStateSubsection[]) {
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 03ac92a..62012c3 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -361,7 +361,7 @@
      * Note that SPSel is never OK from EL0; we rely on handle_msr_i()
      * to catch that case at translate time.
      */
-    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) {
+    if (arm_current_el(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) {
         raise_exception(env, EXCP_UDEF);
     }
 
@@ -387,15 +387,24 @@
 
 void HELPER(pre_hvc)(CPUARMState *env)
 {
-    int cur_el = arm_current_pl(env);
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    int cur_el = arm_current_el(env);
     /* FIXME: Use actual secure state.  */
     bool secure = false;
     bool undef;
 
-    /* We've already checked that EL2 exists at translation time.
-     * EL3.HCE has priority over EL2.HCD.
-     */
-    if (arm_feature(env, ARM_FEATURE_EL3)) {
+    if (arm_is_psci_call(cpu, EXCP_HVC)) {
+        /* If PSCI is enabled and this looks like a valid PSCI call then
+         * that overrides the architecturally mandated HVC behaviour.
+         */
+        return;
+    }
+
+    if (!arm_feature(env, ARM_FEATURE_EL2)) {
+        /* If EL2 doesn't exist, HVC always UNDEFs */
+        undef = true;
+    } else if (arm_feature(env, ARM_FEATURE_EL3)) {
+        /* EL3.HCE has priority over EL2.HCD. */
         undef = !(env->cp15.scr_el3 & SCR_HCE);
     } else {
         undef = env->cp15.hcr_el2 & HCR_HCD;
@@ -418,9 +427,9 @@
 
 void HELPER(pre_smc)(CPUARMState *env, uint32_t syndrome)
 {
-    int cur_el = arm_current_pl(env);
-    /* FIXME: Use real secure state.  */
-    bool secure = false;
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    int cur_el = arm_current_el(env);
+    bool secure = arm_is_secure(env);
     bool smd = env->cp15.scr_el3 & SCR_SMD;
     /* On ARMv8 AArch32, SMD only applies to NS state.
      * On ARMv7 SMD only applies to NS state and only if EL2 is available.
@@ -429,13 +438,22 @@
      */
     bool undef = is_a64(env) ? smd : (!secure && smd);
 
-    /* In NS EL1, HCR controlled routing to EL2 has priority over SMD.  */
-    if (!secure && cur_el == 1 && (env->cp15.hcr_el2 & HCR_TSC)) {
+    if (arm_is_psci_call(cpu, EXCP_SMC)) {
+        /* If PSCI is enabled and this looks like a valid PSCI call then
+         * that overrides the architecturally mandated SMC behaviour.
+         */
+        return;
+    }
+
+    if (!arm_feature(env, ARM_FEATURE_EL3)) {
+        /* If we have no EL3 then SMC always UNDEFs */
+        undef = true;
+    } else if (!secure && cur_el == 1 && (env->cp15.hcr_el2 & HCR_TSC)) {
+        /* In NS EL1, HCR controlled routing to EL2 has priority over SMD. */
         env->exception.syndrome = syndrome;
         raise_exception(env, EXCP_HYP_TRAP);
     }
 
-    /* We've already checked that EL3 exists at translation time.  */
     if (undef) {
         env->exception.syndrome = syn_uncategorized();
         raise_exception(env, EXCP_UDEF);
@@ -444,7 +462,7 @@
 
 void HELPER(exception_return)(CPUARMState *env)
 {
-    int cur_el = arm_current_pl(env);
+    int cur_el = arm_current_el(env);
     unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el);
     uint32_t spsr = env->banked_spsr[spsr_idx];
     int new_el, i;
@@ -561,7 +579,7 @@
 
     switch (bt) {
     case 3: /* linked context ID match */
-        if (arm_current_pl(env) > 1) {
+        if (arm_current_el(env) > 1) {
             /* Context matches never fire in EL2 or (AArch64) EL3 */
             return false;
         }
@@ -641,7 +659,7 @@
      * rely on this behaviour currently.
      * For breakpoints we do want to use the current CPU state.
      */
-    switch (arm_current_pl(env)) {
+    switch (arm_current_el(env)) {
     case 3:
     case 2:
         if (!hmc) {
@@ -728,7 +746,7 @@
             cs->watchpoint_hit = NULL;
             if (check_watchpoints(cpu)) {
                 bool wnr = (wp_hit->flags & BP_WATCHPOINT_HIT_WRITE) != 0;
-                bool same_el = arm_debug_target_el(env) == arm_current_pl(env);
+                bool same_el = arm_debug_target_el(env) == arm_current_el(env);
 
                 env->exception.syndrome = syn_watchpoint(same_el, 0, wnr);
                 if (extended_addresses_enabled(env)) {
@@ -744,7 +762,7 @@
         }
     } else {
         if (check_breakpoints(cpu)) {
-            bool same_el = (arm_debug_target_el(env) == arm_current_pl(env));
+            bool same_el = (arm_debug_target_el(env) == arm_current_el(env));
             env->exception.syndrome = syn_breakpoint(same_el);
             if (extended_addresses_enabled(env)) {
                 env->exception.fsr = (1 << 9) | 0x22;
diff --git a/target-arm/psci.c b/target-arm/psci.c
new file mode 100644
index 0000000..d8fafab
--- /dev/null
+++ b/target-arm/psci.c
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2014 - Linaro
+ * Author: Rob Herring <rob.herring@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <cpu.h>
+#include <cpu-qom.h>
+#include <exec/helper-proto.h>
+#include <kvm-consts.h>
+#include <sysemu/sysemu.h>
+#include "internals.h"
+
+bool arm_is_psci_call(ARMCPU *cpu, int excp_type)
+{
+    /* Return true if the r0/x0 value indicates a PSCI call and
+     * the exception type matches the configured PSCI conduit. This is
+     * called before the SMC/HVC instruction is executed, to decide whether
+     * we should treat it as a PSCI call or with the architecturally
+     * defined behaviour for an SMC or HVC (which might be UNDEF or trap
+     * to EL2 or to EL3).
+     */
+    CPUARMState *env = &cpu->env;
+    uint64_t param = is_a64(env) ? env->xregs[0] : env->regs[0];
+
+    switch (excp_type) {
+    case EXCP_HVC:
+        if (cpu->psci_conduit != QEMU_PSCI_CONDUIT_HVC) {
+            return false;
+        }
+        break;
+    case EXCP_SMC:
+        if (cpu->psci_conduit != QEMU_PSCI_CONDUIT_SMC) {
+            return false;
+        }
+        break;
+    default:
+        return false;
+    }
+
+    switch (param) {
+    case QEMU_PSCI_0_2_FN_PSCI_VERSION:
+    case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+    case QEMU_PSCI_0_2_FN_AFFINITY_INFO:
+    case QEMU_PSCI_0_2_FN64_AFFINITY_INFO:
+    case QEMU_PSCI_0_2_FN_SYSTEM_RESET:
+    case QEMU_PSCI_0_2_FN_SYSTEM_OFF:
+    case QEMU_PSCI_0_1_FN_CPU_ON:
+    case QEMU_PSCI_0_2_FN_CPU_ON:
+    case QEMU_PSCI_0_2_FN64_CPU_ON:
+    case QEMU_PSCI_0_1_FN_CPU_OFF:
+    case QEMU_PSCI_0_2_FN_CPU_OFF:
+    case QEMU_PSCI_0_1_FN_CPU_SUSPEND:
+    case QEMU_PSCI_0_2_FN_CPU_SUSPEND:
+    case QEMU_PSCI_0_2_FN64_CPU_SUSPEND:
+    case QEMU_PSCI_0_1_FN_MIGRATE:
+    case QEMU_PSCI_0_2_FN_MIGRATE:
+        return true;
+    default:
+        return false;
+    }
+}
+
+void arm_handle_psci_call(ARMCPU *cpu)
+{
+    /*
+     * This function partially implements the logic for dispatching Power State
+     * Coordination Interface (PSCI) calls (as described in ARM DEN 0022B.b),
+     * to the extent required for bringing up and taking down secondary cores,
+     * and for handling reset and poweroff requests.
+     * Additional information about the calling convention used is available in
+     * the document 'SMC Calling Convention' (ARM DEN 0028)
+     */
+    CPUState *cs = CPU(cpu);
+    CPUARMState *env = &cpu->env;
+    uint64_t param[4];
+    uint64_t context_id, mpidr;
+    target_ulong entry;
+    int32_t ret = 0;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        /*
+         * All PSCI functions take explicit 32-bit or native int sized
+         * arguments so we can simply zero-extend all arguments regardless
+         * of which exact function we are about to call.
+         */
+        param[i] = is_a64(env) ? env->xregs[i] : env->regs[i];
+    }
+
+    if ((param[0] & QEMU_PSCI_0_2_64BIT) && !is_a64(env)) {
+        ret = QEMU_PSCI_RET_INVALID_PARAMS;
+        goto err;
+    }
+
+    switch (param[0]) {
+        CPUState *target_cpu_state;
+        ARMCPU *target_cpu;
+        CPUClass *target_cpu_class;
+
+    case QEMU_PSCI_0_2_FN_PSCI_VERSION:
+        ret = QEMU_PSCI_0_2_RET_VERSION_0_2;
+        break;
+    case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+        ret = QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED; /* No trusted OS */
+        break;
+    case QEMU_PSCI_0_2_FN_AFFINITY_INFO:
+    case QEMU_PSCI_0_2_FN64_AFFINITY_INFO:
+        mpidr = param[1];
+
+        switch (param[2]) {
+        case 0:
+            target_cpu_state = qemu_get_cpu(mpidr & 0xff);
+            if (!target_cpu_state) {
+                ret = QEMU_PSCI_RET_INVALID_PARAMS;
+                break;
+            }
+            target_cpu = ARM_CPU(target_cpu_state);
+            ret = target_cpu->powered_off ? 1 : 0;
+            break;
+        default:
+            /* Everything above affinity level 0 is always on. */
+            ret = 0;
+        }
+        break;
+    case QEMU_PSCI_0_2_FN_SYSTEM_RESET:
+        qemu_system_reset_request();
+        /* QEMU reset and shutdown are async requests, but PSCI
+         * mandates that we never return from the reset/shutdown
+         * call, so power the CPU off now so it doesn't execute
+         * anything further.
+         */
+        goto cpu_off;
+    case QEMU_PSCI_0_2_FN_SYSTEM_OFF:
+        qemu_system_shutdown_request();
+        goto cpu_off;
+    case QEMU_PSCI_0_1_FN_CPU_ON:
+    case QEMU_PSCI_0_2_FN_CPU_ON:
+    case QEMU_PSCI_0_2_FN64_CPU_ON:
+        mpidr = param[1];
+        entry = param[2];
+        context_id = param[3];
+
+        /* change to the cpu we are powering up */
+        target_cpu_state = qemu_get_cpu(mpidr & 0xff);
+        if (!target_cpu_state) {
+            ret = QEMU_PSCI_RET_INVALID_PARAMS;
+            break;
+        }
+        target_cpu = ARM_CPU(target_cpu_state);
+        if (!target_cpu->powered_off) {
+            ret = QEMU_PSCI_RET_ALREADY_ON;
+            break;
+        }
+        target_cpu_class = CPU_GET_CLASS(target_cpu);
+
+        /* Initialize the cpu we are turning on */
+        cpu_reset(target_cpu_state);
+        target_cpu->powered_off = false;
+        target_cpu_state->halted = 0;
+
+        /*
+         * The PSCI spec mandates that newly brought up CPUs enter the
+         * exception level of the caller in the same execution mode as
+         * the caller, with context_id in x0/r0, respectively.
+         *
+         * For now, it is sufficient to assert() that CPUs come out of
+         * reset in the same mode as the calling CPU, since we only
+         * implement EL1, which means that
+         * (a) there is no EL2 for the calling CPU to trap into to change
+         *     its state
+         * (b) the newly brought up CPU enters EL1 immediately after coming
+         *     out of reset in the default state
+         */
+        assert(is_a64(env) == is_a64(&target_cpu->env));
+        if (is_a64(env)) {
+            if (entry & 1) {
+                ret = QEMU_PSCI_RET_INVALID_PARAMS;
+                break;
+            }
+            target_cpu->env.xregs[0] = context_id;
+        } else {
+            target_cpu->env.regs[0] = context_id;
+            target_cpu->env.thumb = entry & 1;
+        }
+        target_cpu_class->set_pc(target_cpu_state, entry);
+
+        ret = 0;
+        break;
+    case QEMU_PSCI_0_1_FN_CPU_OFF:
+    case QEMU_PSCI_0_2_FN_CPU_OFF:
+        goto cpu_off;
+    case QEMU_PSCI_0_1_FN_CPU_SUSPEND:
+    case QEMU_PSCI_0_2_FN_CPU_SUSPEND:
+    case QEMU_PSCI_0_2_FN64_CPU_SUSPEND:
+        /* Affinity levels are not supported in QEMU */
+        if (param[1] & 0xfffe0000) {
+            ret = QEMU_PSCI_RET_INVALID_PARAMS;
+            break;
+        }
+        /* Powerdown is not supported, we always go into WFI */
+        if (is_a64(env)) {
+            env->xregs[0] = 0;
+        } else {
+            env->regs[0] = 0;
+        }
+        helper_wfi(env);
+        break;
+    case QEMU_PSCI_0_1_FN_MIGRATE:
+    case QEMU_PSCI_0_2_FN_MIGRATE:
+        ret = QEMU_PSCI_RET_NOT_SUPPORTED;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+err:
+    if (is_a64(env)) {
+        env->xregs[0] = ret;
+    } else {
+        env->regs[0] = ret;
+    }
+    return;
+
+cpu_off:
+    cpu->powered_off = true;
+    cs->halted = 1;
+    cs->exception_index = EXCP_HLT;
+    cpu_loop_exit(cs);
+    /* notreached */
+}
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 35ae3ea..3a3c48a 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -1226,7 +1226,7 @@
     int op = op1 << 3 | op2;
     switch (op) {
     case 0x05: /* SPSel */
-        if (s->current_pl == 0) {
+        if (s->current_el == 0) {
             unallocated_encoding(s);
             return;
         }
@@ -1323,7 +1323,7 @@
     }
 
     /* Check access permissions */
-    if (!cp_access_ok(s->current_pl, ri, isread)) {
+    if (!cp_access_ok(s->current_el, ri, isread)) {
         unallocated_encoding(s);
         return;
     }
@@ -1362,7 +1362,7 @@
          * guaranteed to be constant by the tb flags.
          */
         tcg_rt = cpu_reg(s, rt);
-        tcg_gen_movi_i64(tcg_rt, s->current_pl << 2);
+        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
         return;
     case ARM_CP_DC_ZVA:
         /* Writes clear the aligned block of memory which rt points into. */
@@ -1485,7 +1485,7 @@
             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16));
             break;
         case 2:
-            if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_pl == 0) {
+            if (s->current_el == 0) {
                 unallocated_encoding(s);
                 break;
             }
@@ -1498,7 +1498,7 @@
             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16));
             break;
         case 3:
-            if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->current_pl == 0) {
+            if (s->current_el == 0) {
                 unallocated_encoding(s);
                 break;
             }
@@ -1575,7 +1575,7 @@
         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
         break;
     case 4: /* ERET */
-        if (s->current_pl == 0) {
+        if (s->current_el == 0) {
             unallocated_encoding(s);
             return;
         }
@@ -10930,7 +10930,7 @@
     dc->vec_len = 0;
     dc->vec_stride = 0;
     dc->cp_regs = cpu->cp_regs;
-    dc->current_pl = arm_current_pl(env);
+    dc->current_el = arm_current_el(env);
     dc->features = env->features;
 
     /* Single step state. The code-generation logic here is:
@@ -10951,7 +10951,7 @@
     dc->ss_active = ARM_TBFLAG_AA64_SS_ACTIVE(tb->flags);
     dc->pstate_ss = ARM_TBFLAG_AA64_PSTATE_SS(tb->flags);
     dc->is_ldex = false;
-    dc->ss_same_el = (arm_debug_target_el(env) == dc->current_pl);
+    dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
 
     init_tmp_a64_array(dc);
 
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 8a2994f..1d52e47 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -941,6 +941,39 @@
     tcg_gen_movi_i32(cpu_R[15], val);
 }
 
+static inline void gen_hvc(DisasContext *s, int imm16)
+{
+    /* The pre HVC helper handles cases when HVC gets trapped
+     * as an undefined insn by runtime configuration (ie before
+     * the insn really executes).
+     */
+    gen_set_pc_im(s, s->pc - 4);
+    gen_helper_pre_hvc(cpu_env);
+    /* Otherwise we will treat this as a real exception which
+     * happens after execution of the insn. (The distinction matters
+     * for the PC value reported to the exception handler and also
+     * for single stepping.)
+     */
+    s->svc_imm = imm16;
+    gen_set_pc_im(s, s->pc);
+    s->is_jmp = DISAS_HVC;
+}
+
+static inline void gen_smc(DisasContext *s)
+{
+    /* As with HVC, we may take an exception either before or after
+     * the insn executes.
+     */
+    TCGv_i32 tmp;
+
+    gen_set_pc_im(s, s->pc - 4);
+    tmp = tcg_const_i32(syn_aa32_smc());
+    gen_helper_pre_smc(cpu_env, tmp);
+    tcg_temp_free_i32(tmp);
+    gen_set_pc_im(s, s->pc);
+    s->is_jmp = DISAS_SMC;
+}
+
 static inline void
 gen_set_condexec (DisasContext *s)
 {
@@ -3199,6 +3232,9 @@
                             break;
                         case ARM_VFP_FPINST:
                         case ARM_VFP_FPINST2:
+                            if (IS_USER(s)) {
+                                return 1;
+                            }
                             tmp = load_reg(s, rd);
                             store_cpu_field(tmp, vfp.xregs[rn]);
                             break;
@@ -7041,7 +7077,7 @@
                             ENCODE_CP_REG(cpnum, is64, crn, crm, opc1, opc2));
     if (ri) {
         /* Check access permissions */
-        if (!cp_access_ok(s->current_pl, ri, isread)) {
+        if (!cp_access_ok(s->current_el, ri, isread)) {
             return 1;
         }
 
@@ -7872,15 +7908,32 @@
         case 7:
         {
             int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
-            /* SMC instruction (op1 == 3)
-               and undefined instructions (op1 == 0 || op1 == 2)
-               will trap */
-            if (op1 != 1) {
+            switch (op1) {
+            case 1:
+                /* bkpt */
+                ARCH(5);
+                gen_exception_insn(s, 4, EXCP_BKPT,
+                                   syn_aa32_bkpt(imm16, false));
+                break;
+            case 2:
+                /* Hypervisor call (v7) */
+                ARCH(7);
+                if (IS_USER(s)) {
+                    goto illegal_op;
+                }
+                gen_hvc(s, imm16);
+                break;
+            case 3:
+                /* Secure monitor call (v6+) */
+                ARCH(6K);
+                if (IS_USER(s)) {
+                    goto illegal_op;
+                }
+                gen_smc(s);
+                break;
+            default:
                 goto illegal_op;
             }
-            /* bkpt */
-            ARCH(5);
-            gen_exception_insn(s, 4, EXCP_BKPT, syn_aa32_bkpt(imm16, false));
             break;
         }
         case 0x8: /* signed multiply */
@@ -9710,10 +9763,23 @@
                     goto illegal_op;
 
                 if (insn & (1 << 26)) {
-                    /* Secure monitor call (v6Z) */
-                    qemu_log_mask(LOG_UNIMP,
-                                  "arm: unimplemented secure monitor call\n");
-                    goto illegal_op; /* not implemented.  */
+                    if (!(insn & (1 << 20))) {
+                        /* Hypervisor call (v7) */
+                        int imm16 = extract32(insn, 16, 4) << 12
+                            | extract32(insn, 0, 12);
+                        ARCH(7);
+                        if (IS_USER(s)) {
+                            goto illegal_op;
+                        }
+                        gen_hvc(s, imm16);
+                    } else {
+                        /* Secure monitor call (v6+) */
+                        ARCH(6K);
+                        if (IS_USER(s)) {
+                            goto illegal_op;
+                        }
+                        gen_smc(s);
+                    }
                 } else {
                     op = (insn >> 20) & 7;
                     switch (op) {
@@ -10945,7 +11011,7 @@
     dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
     dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(tb->flags);
     dc->cp_regs = cpu->cp_regs;
-    dc->current_pl = arm_current_pl(env);
+    dc->current_el = arm_current_el(env);
     dc->features = env->features;
 
     /* Single step state. The code-generation logic here is:
@@ -11148,6 +11214,12 @@
             if (dc->is_jmp == DISAS_SWI) {
                 gen_ss_advance(dc);
                 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
+            } else if (dc->is_jmp == DISAS_HVC) {
+                gen_ss_advance(dc);
+                gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm));
+            } else if (dc->is_jmp == DISAS_SMC) {
+                gen_ss_advance(dc);
+                gen_exception(EXCP_SMC, syn_aa32_smc());
             } else if (dc->ss_active) {
                 gen_step_complete_exception(dc);
             } else {
@@ -11163,6 +11235,12 @@
         if (dc->is_jmp == DISAS_SWI && !dc->condjmp) {
             gen_ss_advance(dc);
             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
+        } else if (dc->is_jmp == DISAS_HVC && !dc->condjmp) {
+            gen_ss_advance(dc);
+            gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm));
+        } else if (dc->is_jmp == DISAS_SMC && !dc->condjmp) {
+            gen_ss_advance(dc);
+            gen_exception(EXCP_SMC, syn_aa32_smc());
         } else if (dc->ss_active) {
             gen_step_complete_exception(dc);
         } else {
@@ -11202,6 +11280,12 @@
         case DISAS_SWI:
             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
             break;
+        case DISAS_HVC:
+            gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm));
+            break;
+        case DISAS_SMC:
+            gen_exception(EXCP_SMC, syn_aa32_smc());
+            break;
         }
         if (dc->condjmp) {
             gen_set_label(dc->condlabel);
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 85c6f9d..41a9071 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -29,7 +29,7 @@
      */
     uint32_t svc_imm;
     int aarch64;
-    int current_pl;
+    int current_el;
     GHashTable *cp_regs;
     uint64_t features; /* CPU features bits */
     /* Because unallocated encodings generate different exception syndrome
@@ -68,7 +68,7 @@
 
 static inline int get_mem_index(DisasContext *s)
 {
-    return s->current_pl;
+    return s->current_el;
 }
 
 /* target-specific extra values for is_jmp */
@@ -84,6 +84,8 @@
 #define DISAS_EXC 6
 /* WFE */
 #define DISAS_WFE 7
+#define DISAS_HVC 8
+#define DISAS_SMC 9
 
 #ifdef TARGET_AARCH64
 void a64_translate_init(void);
diff --git a/vl.c b/vl.c
index aee73e1..2f81384 100644
--- a/vl.c
+++ b/vl.c
@@ -63,7 +63,6 @@
 #include "hw/boards.h"
 #include "sysemu/accel.h"
 #include "hw/usb.h"
-#include "hw/pcmcia.h"
 #include "hw/i386/pc.h"
 #include "hw/isa/isa.h"
 #include "hw/bt.h"
@@ -1408,49 +1407,6 @@
 }
 
 /***********************************************************/
-/* PCMCIA/Cardbus */
-
-static struct pcmcia_socket_entry_s {
-    PCMCIASocket *socket;
-    struct pcmcia_socket_entry_s *next;
-} *pcmcia_sockets = 0;
-
-void pcmcia_socket_register(PCMCIASocket *socket)
-{
-    struct pcmcia_socket_entry_s *entry;
-
-    entry = g_malloc(sizeof(struct pcmcia_socket_entry_s));
-    entry->socket = socket;
-    entry->next = pcmcia_sockets;
-    pcmcia_sockets = entry;
-}
-
-void pcmcia_socket_unregister(PCMCIASocket *socket)
-{
-    struct pcmcia_socket_entry_s *entry, **ptr;
-
-    ptr = &pcmcia_sockets;
-    for (entry = *ptr; entry; ptr = &entry->next, entry = *ptr)
-        if (entry->socket == socket) {
-            *ptr = entry->next;
-            g_free(entry);
-        }
-}
-
-void pcmcia_info(Monitor *mon, const QDict *qdict)
-{
-    struct pcmcia_socket_entry_s *iter;
-
-    if (!pcmcia_sockets)
-        monitor_printf(mon, "No PCMCIA sockets\n");
-
-    for (iter = pcmcia_sockets; iter; iter = iter->next)
-        monitor_printf(mon, "%s: %s\n", iter->socket->slot_string,
-                       iter->socket->attached ? iter->socket->card_string :
-                       "Empty");
-}
-
-/***********************************************************/
 /* machine registration */
 
 MachineState *current_machine;