Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20141211' into staging

target-arm queue:
 * pass semihosting exit code out to system
 * more TrustZone support code (still not enabled yet)
 * allow user to direct semihosting to gdb or native explicitly
   rather than always auto-guessing the destination
 * fix memory leak in realview_init
 * fix coverity warning in hw/arm/boot
 * get state migration working for AArch64 CPUs
 * check errors in kvm_arm_reset_vcpu

# gpg: Signature made Thu 11 Dec 2014 12:16:19 GMT using RSA key ID 14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"

* remotes/pmaydell/tags/pull-target-arm-20141211: (33 commits)
  target-arm: Check error conditions on kvm_arm_reset_vcpu
  target-arm: Support save/load for 64 bit CPUs
  target-arm/kvm: make reg sync code common between kvm32/64
  arm_gic_kvm: Tell kernel about number of IRQs
  hw/arm/boot: fix uninitialized scalar variable warning reported by coverity
  hw/arm/realview.c: Fix memory leak in realview_init()
  target-arm: make MAIR0/1 banked
  target-arm: make c13 cp regs banked (FCSEIDR, ...)
  target-arm: make VBAR banked
  target-arm: make PAR banked
  target-arm: make IFAR/DFAR banked
  target-arm: make DFSR banked
  target-arm: make IFSR banked
  target-arm: make DACR banked
  target-arm: make TTBCR banked
  target-arm: make TTBR0/1 banked
  target-arm: make CSSELR banked
  target-arm: respect SCR.FW, SCR.AW and SCTLR.NMFI
  target-arm: add SCTLR_EL3 and make SCTLR banked
  target-arm: add MVBAR support
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/gdbstub.c b/gdbstub.c
index 0faca56..e4a1a79 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -317,6 +317,8 @@
 
 bool gdb_has_xml;
 
+int semihosting_target = SEMIHOSTING_TARGET_AUTO;
+
 #ifdef CONFIG_USER_ONLY
 /* XXX: This is not thread safe.  Do we care?  */
 static int gdbserver_fd = -1;
@@ -351,10 +353,19 @@
     GDB_SYS_DISABLED,
 } gdb_syscall_mode;
 
-/* If gdb is connected when the first semihosting syscall occurs then use
-   remote gdb syscalls.  Otherwise use native file IO.  */
+/* Decide if either remote gdb syscalls or native file IO should be used. */
 int use_gdb_syscalls(void)
 {
+    if (semihosting_target == SEMIHOSTING_TARGET_NATIVE) {
+        /* -semihosting-config target=native */
+        return false;
+    } else if (semihosting_target == SEMIHOSTING_TARGET_GDB) {
+        /* -semihosting-config target=gdb */
+        return true;
+    }
+
+    /* -semihosting-config target=auto */
+    /* On the first call check if gdb is connected and remember. */
     if (gdb_syscall_mode == GDB_SYS_UNKNOWN) {
         gdb_syscall_mode = (gdbserver_state ? GDB_SYS_ENABLED
                                             : GDB_SYS_DISABLED);
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 0014c34..e6a3c5b 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -329,6 +329,8 @@
  * Returns: the size of the device tree image on success,
  *          0 if the image size exceeds the limit,
  *          -1 on errors.
+ *
+ * Note: Must not be called unless have_dtb(binfo) is true.
  */
 static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
                     hwaddr addr_limit)
@@ -352,7 +354,7 @@
             goto fail;
         }
         g_free(filename);
-    } else if (binfo->get_dtb) {
+    } else {
         fdt = binfo->get_dtb(binfo, &size);
         if (!fdt) {
             fprintf(stderr, "Board was unable to create a dtb blob\n");
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index 693dfec..8967cc4 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -273,10 +273,10 @@
     case 3:
         s->cpu->env.uncached_cpsr = ARM_CPU_MODE_SVC;
         s->cpu->env.daif = PSTATE_A | PSTATE_F | PSTATE_I;
-        s->cpu->env.cp15.c1_sys = 0;
+        s->cpu->env.cp15.sctlr_ns = 0;
         s->cpu->env.cp15.c1_coproc = 0;
-        s->cpu->env.cp15.ttbr0_el1 = 0;
-        s->cpu->env.cp15.c3 = 0;
+        s->cpu->env.cp15.ttbr0_el[1] = 0;
+        s->cpu->env.cp15.dacr_ns = 0;
         s->pm_regs[PSSR >> 2] |= 0x8; /* Set STS */
         s->pm_regs[RCSR >> 2] |= 0x8; /* Set GPR */
 
diff --git a/hw/arm/realview.c b/hw/arm/realview.c
index af65aa4..d41ec97 100644
--- a/hw/arm/realview.c
+++ b/hw/arm/realview.c
@@ -52,7 +52,7 @@
     CPUARMState *env;
     ObjectClass *cpu_oc;
     MemoryRegion *sysmem = get_system_memory();
-    MemoryRegion *ram_lo = g_new(MemoryRegion, 1);
+    MemoryRegion *ram_lo;
     MemoryRegion *ram_hi = g_new(MemoryRegion, 1);
     MemoryRegion *ram_alias = g_new(MemoryRegion, 1);
     MemoryRegion *ram_hack = g_new(MemoryRegion, 1);
@@ -135,6 +135,7 @@
 
     if (is_pb && ram_size > 0x20000000) {
         /* Core tile RAM.  */
+        ram_lo = g_new(MemoryRegion, 1);
         low_ram_size = ram_size - 0x20000000;
         ram_size = 0x20000000;
         memory_region_init_ram(ram_lo, NULL, "realview.lowmem", low_ram_size,
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index 5038885..1ad3eb0 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -92,6 +92,21 @@
     return s->dev_fd >= 0;
 }
 
+static bool kvm_gic_supports_attr(GICState *s, int group, int attrnum)
+{
+    struct kvm_device_attr attr = {
+        .group = group,
+        .attr = attrnum,
+        .flags = 0,
+    };
+
+    if (s->dev_fd == -1) {
+        return false;
+    }
+
+    return kvm_device_ioctl(s->dev_fd, KVM_HAS_DEVICE_ATTR, &attr) == 0;
+}
+
 static void kvm_gic_access(GICState *s, int group, int offset,
                                    int cpu, uint32_t *val, bool write)
 {
@@ -553,6 +568,11 @@
         return;
     }
 
+    if (kvm_gic_supports_attr(s, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0)) {
+        uint32_t numirqs = s->num_irq;
+        kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, 0, &numirqs, 1);
+    }
+
     /* Distributor */
     memory_region_init_reservation(&s->iomem, OBJECT(s),
                                    "kvm-gic_dist", 0x1000);
diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index a608a26..c633248 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -95,4 +95,10 @@
 /* in gdbstub-xml.c, generated by scripts/feature_to_c.sh */
 extern const char *const xml_builtin[][2];
 
+/* Command line option defining whether semihosting should go via gdb or not */
+extern int semihosting_target;
+#define SEMIHOSTING_TARGET_AUTO     0
+#define SEMIHOSTING_TARGET_NATIVE   1
+#define SEMIHOSTING_TARGET_GDB      2
+
 #endif
diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h
index 21560ef..b5593dc 100644
--- a/linux-user/aarch64/target_cpu.h
+++ b/linux-user/aarch64/target_cpu.h
@@ -32,7 +32,7 @@
     /* Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
      * different from AArch32 Linux, which uses TPIDRRO.
      */
-    env->cp15.tpidr_el0 = newtls;
+    env->cp15.tpidr_el[0] = newtls;
 }
 
 #endif
diff --git a/linux-user/arm/target_cpu.h b/linux-user/arm/target_cpu.h
index 39d65b6..d8a534d 100644
--- a/linux-user/arm/target_cpu.h
+++ b/linux-user/arm/target_cpu.h
@@ -29,7 +29,7 @@
 
 static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
 {
-    env->cp15.tpidrro_el0 = newtls;
+    env->cp15.tpidrro_el[0] = newtls;
 }
 
 #endif
diff --git a/linux-user/main.c b/linux-user/main.c
index 5c14c1e..186ee4d 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -564,7 +564,7 @@
         end_exclusive();
         break;
     case 0xffff0fe0: /* __kernel_get_tls */
-        env->regs[0] = env->cp15.tpidrro_el0;
+        env->regs[0] = env->cp15.tpidrro_el[0];
         break;
     case 0xffff0f60: /* __kernel_cmpxchg64 */
         arm_kernel_cmpxchg64_helper(env);
diff --git a/qemu-options.hx b/qemu-options.hx
index 6f273ee..10b9568 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3226,7 +3226,17 @@
 STEXI
 @item -semihosting
 @findex -semihosting
-Semihosting mode (ARM, M68K, Xtensa only).
+Enable semihosting mode (ARM, M68K, Xtensa only).
+ETEXI
+DEF("semihosting-config", HAS_ARG, QEMU_OPTION_semihosting_config,
+    "-semihosting-config [enable=on|off,]target=native|gdb|auto   semihosting configuration\n",
+QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32)
+STEXI
+@item -semihosting-config [enable=on|off,]target=native|gdb|auto
+@findex -semihosting-config
+Enable semihosting and define where the semihosting calls will be addressed,
+to QEMU (@code{native}) or to GDB (@code{gdb}). The default is @code{auto}, which means
+@code{gdb} during debug sessions and @code{native} otherwise (ARM, M68K, Xtensa only).
 ETEXI
 DEF("old-param", 0, QEMU_OPTION_old_param,
     "-old-param      old param mode\n", QEMU_ARCH_ARM)
diff --git a/target-arm/arm-semi.c b/target-arm/arm-semi.c
index ebb5235..a8b83e6 100644
--- a/target-arm/arm-semi.c
+++ b/target-arm/arm-semi.c
@@ -58,6 +58,10 @@
 #define TARGET_SYS_HEAPINFO    0x16
 #define TARGET_SYS_EXIT        0x18
 
+/* ADP_Stopped_ApplicationExit is used for exit(0),
+ * anything else is implemented as exit(1) */
+#define ADP_Stopped_ApplicationExit     (0x20026)
+
 #ifndef O_BINARY
 #define O_BINARY 0
 #endif
@@ -551,8 +555,11 @@
             return 0;
         }
     case TARGET_SYS_EXIT:
-        gdb_exit(env, 0);
-        exit(0);
+        /* ARM specifies only Stopped_ApplicationExit as normal
+         * exit, everything else is considered an error */
+        ret = (args == ADP_Stopped_ApplicationExit) ? 0 : 1;
+        gdb_exit(env, ret);
+        exit(ret);
     default:
         fprintf(stderr, "qemu: Unsupported SemiHosting SWI 0x%02x\n", nr);
         cpu_dump_state(cs, stderr, fprintf, 0);
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 5ce7350..d3db279 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -109,7 +109,7 @@
 #if defined(CONFIG_USER_ONLY)
         env->pstate = PSTATE_MODE_EL0t;
         /* Userspace expects access to DC ZVA, CTL_EL0 and the cache ops */
-        env->cp15.c1_sys |= SCTLR_UCT | SCTLR_UCI | SCTLR_DZE;
+        env->cp15.sctlr_el[1] |= SCTLR_UCT | SCTLR_UCI | SCTLR_DZE;
         /* and to the FP/Neon instructions */
         env->cp15.c1_coproc = deposit64(env->cp15.c1_coproc, 20, 2, 3);
 #else
@@ -167,7 +167,11 @@
         env->thumb = initial_pc & 1;
     }
 
-    if (env->cp15.c1_sys & SCTLR_V) {
+    /* AArch32 has a hard highvec setting of 0xFFFF0000.  If we are currently
+     * executing as AArch32 then check if highvecs are enabled and
+     * adjust the PC accordingly.
+     */
+    if (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_V) {
         env->regs[15] = 0xFFFF0000;
     }
 
@@ -548,7 +552,7 @@
         ARMCPRegInfo ifar = {
             .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1,
             .access = PL1_RW,
-            .fieldoffset = offsetofhigh32(CPUARMState, cp15.far_el[1]),
+            .fieldoffset = offsetof(CPUARMState, cp15.ifar_ns),
             .resetvalue = 0
         };
         define_one_arm_cp_reg(cpu, &ifar);
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 7f80090..7ba55f0 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -120,6 +120,12 @@
 #define GTIMER_VIRT 1
 #define NUM_GTIMERS 2
 
+typedef struct {
+    uint64_t raw_tcr;
+    uint32_t mask;
+    uint32_t base_mask;
+} TCR;
+
 typedef struct CPUARMState {
     /* Regs for current mode.  */
     uint32_t regs[16];
@@ -177,28 +183,111 @@
     /* System control coprocessor (cp15) */
     struct {
         uint32_t c0_cpuid;
-        uint64_t c0_cssel; /* Cache size selection.  */
-        uint64_t c1_sys; /* System control register.  */
+        union { /* Cache size selection */
+            struct {
+                uint64_t _unused_csselr0;
+                uint64_t csselr_ns;
+                uint64_t _unused_csselr1;
+                uint64_t csselr_s;
+            };
+            uint64_t csselr_el[4];
+        };
+        union { /* System control register. */
+            struct {
+                uint64_t _unused_sctlr;
+                uint64_t sctlr_ns;
+                uint64_t hsctlr;
+                uint64_t sctlr_s;
+            };
+            uint64_t sctlr_el[4];
+        };
         uint64_t c1_coproc; /* Coprocessor access register.  */
         uint32_t c1_xscaleauxcr; /* XScale auxiliary control register.  */
-        uint64_t ttbr0_el1; /* MMU translation table base 0. */
-        uint64_t ttbr1_el1; /* MMU translation table base 1. */
-        uint64_t c2_control; /* MMU translation table base control.  */
-        uint32_t c2_mask; /* MMU translation table base selection mask.  */
-        uint32_t c2_base_mask; /* MMU translation table base 0 mask. */
+        uint64_t sder; /* Secure debug enable register. */
+        uint32_t nsacr; /* Non-secure access control register. */
+        union { /* MMU translation table base 0. */
+            struct {
+                uint64_t _unused_ttbr0_0;
+                uint64_t ttbr0_ns;
+                uint64_t _unused_ttbr0_1;
+                uint64_t ttbr0_s;
+            };
+            uint64_t ttbr0_el[4];
+        };
+        union { /* MMU translation table base 1. */
+            struct {
+                uint64_t _unused_ttbr1_0;
+                uint64_t ttbr1_ns;
+                uint64_t _unused_ttbr1_1;
+                uint64_t ttbr1_s;
+            };
+            uint64_t ttbr1_el[4];
+        };
+        /* MMU translation table base control. */
+        TCR tcr_el[4];
         uint32_t c2_data; /* MPU data cachable bits.  */
         uint32_t c2_insn; /* MPU instruction cachable bits.  */
-        uint32_t c3; /* MMU domain access control register
-                        MPU write buffer control.  */
+        union { /* MMU domain access control register
+                 * MPU write buffer control.
+                 */
+            struct {
+                uint64_t dacr_ns;
+                uint64_t dacr_s;
+            };
+            struct {
+                uint64_t dacr32_el2;
+            };
+        };
         uint32_t pmsav5_data_ap; /* PMSAv5 MPU data access permissions */
         uint32_t pmsav5_insn_ap; /* PMSAv5 MPU insn access permissions */
         uint64_t hcr_el2; /* Hypervisor configuration register */
         uint64_t scr_el3; /* Secure configuration register.  */
-        uint32_t ifsr_el2; /* Fault status registers.  */
-        uint64_t esr_el[4];
+        union { /* Fault status registers.  */
+            struct {
+                uint64_t ifsr_ns;
+                uint64_t ifsr_s;
+            };
+            struct {
+                uint64_t ifsr32_el2;
+            };
+        };
+        union {
+            struct {
+                uint64_t _unused_dfsr;
+                uint64_t dfsr_ns;
+                uint64_t hsr;
+                uint64_t dfsr_s;
+            };
+            uint64_t esr_el[4];
+        };
         uint32_t c6_region[8]; /* MPU base/size registers.  */
-        uint64_t far_el[4]; /* Fault address registers.  */
-        uint64_t par_el1;  /* Translation result. */
+        union { /* Fault address registers. */
+            struct {
+                uint64_t _unused_far0;
+#ifdef HOST_WORDS_BIGENDIAN
+                uint32_t ifar_ns;
+                uint32_t dfar_ns;
+                uint32_t ifar_s;
+                uint32_t dfar_s;
+#else
+                uint32_t dfar_ns;
+                uint32_t ifar_ns;
+                uint32_t dfar_s;
+                uint32_t ifar_s;
+#endif
+                uint64_t _unused_far3;
+            };
+            uint64_t far_el[4];
+        };
+        union { /* Translation result. */
+            struct {
+                uint64_t _unused_par_0;
+                uint64_t par_ns;
+                uint64_t _unused_par_1;
+                uint64_t par_s;
+            };
+            uint64_t par_el[4];
+        };
         uint32_t c9_insn; /* Cache lockdown registers.  */
         uint32_t c9_data;
         uint64_t c9_pmcr; /* performance monitor control register */
@@ -207,13 +296,67 @@
         uint32_t c9_pmxevtyper; /* perf monitor event type */
         uint32_t c9_pmuserenr; /* perf monitor user enable */
         uint32_t c9_pminten; /* perf monitor interrupt enables */
-        uint64_t mair_el1;
-        uint64_t vbar_el[4]; /* vector base address register */
-        uint32_t c13_fcse; /* FCSE PID.  */
-        uint64_t contextidr_el1; /* Context ID.  */
-        uint64_t tpidr_el0; /* User RW Thread register.  */
-        uint64_t tpidrro_el0; /* User RO Thread register.  */
-        uint64_t tpidr_el1; /* Privileged Thread register.  */
+        union { /* Memory attribute redirection */
+            struct {
+#ifdef HOST_WORDS_BIGENDIAN
+                uint64_t _unused_mair_0;
+                uint32_t mair1_ns;
+                uint32_t mair0_ns;
+                uint64_t _unused_mair_1;
+                uint32_t mair1_s;
+                uint32_t mair0_s;
+#else
+                uint64_t _unused_mair_0;
+                uint32_t mair0_ns;
+                uint32_t mair1_ns;
+                uint64_t _unused_mair_1;
+                uint32_t mair0_s;
+                uint32_t mair1_s;
+#endif
+            };
+            uint64_t mair_el[4];
+        };
+        union { /* vector base address register */
+            struct {
+                uint64_t _unused_vbar;
+                uint64_t vbar_ns;
+                uint64_t hvbar;
+                uint64_t vbar_s;
+            };
+            uint64_t vbar_el[4];
+        };
+        uint32_t mvbar; /* (monitor) vector base address register */
+        struct { /* FCSE PID. */
+            uint32_t fcseidr_ns;
+            uint32_t fcseidr_s;
+        };
+        union { /* Context ID. */
+            struct {
+                uint64_t _unused_contextidr_0;
+                uint64_t contextidr_ns;
+                uint64_t _unused_contextidr_1;
+                uint64_t contextidr_s;
+            };
+            uint64_t contextidr_el[4];
+        };
+        union { /* User RW Thread register. */
+            struct {
+                uint64_t tpidrurw_ns;
+                uint64_t tpidrprw_ns;
+                uint64_t htpidr;
+                uint64_t _tpidr_el3;
+            };
+            uint64_t tpidr_el[4];
+        };
+        /* The secure banks of these registers don't map anywhere */
+        uint64_t tpidrurw_s;
+        uint64_t tpidrprw_s;
+        uint64_t tpidruro_s;
+
+        union { /* User RO Thread register. */
+            uint64_t tpidruro_ns;
+            uint64_t tpidrro_el[1];
+        };
         uint64_t c14_cntfrq; /* Counter Frequency register */
         uint64_t c14_cntkctl; /* Timer Control register */
         ARMGenericTimer c14_timer[NUM_GTIMERS];
@@ -817,6 +960,49 @@
     return arm_feature(env, ARM_FEATURE_AARCH64);
 }
 
+/* Function for determing whether guest cp register reads and writes should
+ * access the secure or non-secure bank of a cp register.  When EL3 is
+ * operating in AArch32 state, the NS-bit determines whether the secure
+ * instance of a cp register should be used. When EL3 is AArch64 (or if
+ * it doesn't exist at all) then there is no register banking, and all
+ * accesses are to the non-secure version.
+ */
+static inline bool access_secure_reg(CPUARMState *env)
+{
+    bool ret = (arm_feature(env, ARM_FEATURE_EL3) &&
+                !arm_el_is_aa64(env, 3) &&
+                !(env->cp15.scr_el3 & SCR_NS));
+
+    return ret;
+}
+
+/* Macros for accessing a specified CP register bank */
+#define A32_BANKED_REG_GET(_env, _regname, _secure)    \
+    ((_secure) ? (_env)->cp15._regname##_s : (_env)->cp15._regname##_ns)
+
+#define A32_BANKED_REG_SET(_env, _regname, _secure, _val)   \
+    do {                                                \
+        if (_secure) {                                   \
+            (_env)->cp15._regname##_s = (_val);            \
+        } else {                                        \
+            (_env)->cp15._regname##_ns = (_val);           \
+        }                                               \
+    } while (0)
+
+/* Macros for automatically accessing a specific CP register bank depending on
+ * the current secure state of the system.  These macros are not intended for
+ * supporting instruction translation reads/writes as these are dependent
+ * solely on the SCR.NS bit and not the mode.
+ */
+#define A32_BANKED_CURRENT_REG_GET(_env, _regname)        \
+    A32_BANKED_REG_GET((_env), _regname,                \
+                       ((!arm_el_is_aa64((_env), 3) && arm_is_secure(_env))))
+
+#define A32_BANKED_CURRENT_REG_SET(_env, _regname, _val)                       \
+    A32_BANKED_REG_SET((_env), _regname,                                    \
+                       ((!arm_el_is_aa64((_env), 3) && arm_is_secure(_env))),  \
+                       (_val))
+
 void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 unsigned int arm_excp_target_el(CPUState *cs, unsigned int excp_idx);
 
@@ -836,6 +1022,7 @@
  *  Crn, Crm, opc1, opc2 fields
  *  32 or 64 bit register (ie is it accessed via MRC/MCR
  *    or via MRRC/MCRR?)
+ *  non-secure/secure bank (AArch32 only)
  * We allow 4 bits for opc1 because MRRC/MCRR have a 4 bit field.
  * (In this case crn and opc2 should be zero.)
  * For AArch64, there is no 32/64 bit size distinction;
@@ -853,9 +1040,16 @@
 #define CP_REG_AA64_SHIFT 28
 #define CP_REG_AA64_MASK (1 << CP_REG_AA64_SHIFT)
 
-#define ENCODE_CP_REG(cp, is64, crn, crm, opc1, opc2)   \
-    (((cp) << 16) | ((is64) << 15) | ((crn) << 11) |    \
-     ((crm) << 7) | ((opc1) << 3) | (opc2))
+/* To enable banking of coprocessor registers depending on ns-bit we
+ * add a bit to distinguish between secure and non-secure cpregs in the
+ * hashtable.
+ */
+#define CP_REG_NS_SHIFT 29
+#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT)
+
+#define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2)   \
+    ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) |   \
+     ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2))
 
 #define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \
     (CP_REG_AA64_MASK |                                 \
@@ -874,8 +1068,15 @@
     uint32_t cpregid = kvmid;
     if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) {
         cpregid |= CP_REG_AA64_MASK;
-    } else if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
-        cpregid |= (1 << 15);
+    } else {
+        if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
+            cpregid |= (1 << 15);
+        }
+
+        /* KVM is always non-secure so add the NS flag on AArch32 register
+         * entries.
+         */
+         cpregid |= 1 << CP_REG_NS_SHIFT;
     }
     return cpregid;
 }
@@ -950,6 +1151,21 @@
     ARM_CP_STATE_BOTH = 2,
 };
 
+/* ARM CP register secure state flags.  These flags identify security state
+ * attributes for a given CP register entry.
+ * The existence of both or neither secure and non-secure flags indicates that
+ * the register has both a secure and non-secure hash entry.  A single one of
+ * these flags causes the register to only be hashed for the specified
+ * security state.
+ * Although definitions may have any combination of the S/NS bits, each
+ * registered entry will only have one to identify whether the entry is secure
+ * or non-secure.
+ */
+enum {
+    ARM_CP_SECSTATE_S =   (1 << 0), /* bit[0]: Secure state register */
+    ARM_CP_SECSTATE_NS =  (1 << 1), /* bit[1]: Non-secure state register */
+};
+
 /* Return true if cptype is a valid type field. This is used to try to
  * catch errors where the sentinel has been accidentally left off the end
  * of a list of registers.
@@ -1084,6 +1300,8 @@
     int type;
     /* Access rights: PL*_[RW] */
     int access;
+    /* Security state: ARM_CP_SECSTATE_* bits/values */
+    int secure;
     /* The opaque pointer passed to define_arm_cp_regs_with_opaque() when
      * this register was defined: can be used to hand data through to the
      * register read/write functions, since they are passed the ARMCPRegInfo*.
@@ -1093,12 +1311,27 @@
      * fieldoffset is non-zero, the reset value of the register.
      */
     uint64_t resetvalue;
-    /* Offset of the field in CPUARMState for this register. This is not
-     * needed if either:
+    /* Offset of the field in CPUARMState for this register.
+     *
+     * This is not needed if either:
      *  1. type is ARM_CP_CONST or one of the ARM_CP_SPECIALs
      *  2. both readfn and writefn are specified
      */
     ptrdiff_t fieldoffset; /* offsetof(CPUARMState, field) */
+
+    /* Offsets of the secure and non-secure fields in CPUARMState for the
+     * register if it is banked.  These fields are only used during the static
+     * registration of a register.  During hashing the bank associated
+     * with a given security state is copied to fieldoffset which is used from
+     * there on out.
+     *
+     * It is expected that register definitions use either fieldoffset or
+     * bank_fieldoffsets in the definition but not both.  It is also expected
+     * that both bank offsets are set when defining a banked register.  This
+     * use indicates that a register is banked.
+     */
+    ptrdiff_t bank_fieldoffsets[2];
+
     /* Function for making any access checks for this register in addition to
      * those specified by the 'access' permissions bits. If NULL, no extra
      * checks required. The access check is performed at runtime, not at
@@ -1247,27 +1480,50 @@
     CPUARMState *env = cs->env_ptr;
     unsigned int cur_el = arm_current_el(env);
     unsigned int target_el = arm_excp_target_el(cs, excp_idx);
-    /* FIXME: Use actual secure state.  */
-    bool secure = false;
-    /* If in EL1/0, Physical IRQ routing to EL2 only happens from NS state.  */
-    bool irq_can_hyp = !secure && cur_el < 2 && target_el == 2;
+    bool secure = arm_is_secure(env);
+    uint32_t scr;
+    uint32_t hcr;
+    bool pstate_unmasked;
+    int8_t unmasked = 0;
 
-    /* Don't take exceptions if they target a lower EL.  */
+    /* Don't take exceptions if they target a lower EL.
+     * This check should catch any exceptions that would not be taken but left
+     * pending.
+     */
     if (cur_el > target_el) {
         return false;
     }
 
     switch (excp_idx) {
     case EXCP_FIQ:
-        if (irq_can_hyp && (env->cp15.hcr_el2 & HCR_FMO)) {
-            return true;
-        }
-        return !(env->daif & PSTATE_F);
+        /* If FIQs are routed to EL3 or EL2 then there are cases where we
+         * override the CPSR.F in determining if the exception is masked or
+         * not.  If neither of these are set then we fall back to the CPSR.F
+         * setting otherwise we further assess the state below.
+         */
+        hcr = (env->cp15.hcr_el2 & HCR_FMO);
+        scr = (env->cp15.scr_el3 & SCR_FIQ);
+
+        /* When EL3 is 32-bit, the SCR.FW bit controls whether the CPSR.F bit
+         * masks FIQ interrupts when taken in non-secure state.  If SCR.FW is
+         * set then FIQs can be masked by CPSR.F when non-secure but only
+         * when FIQs are only routed to EL3.
+         */
+        scr &= !((env->cp15.scr_el3 & SCR_FW) && !hcr);
+        pstate_unmasked = !(env->daif & PSTATE_F);
+        break;
+
     case EXCP_IRQ:
-        if (irq_can_hyp && (env->cp15.hcr_el2 & HCR_IMO)) {
-            return true;
-        }
-        return !(env->daif & PSTATE_I);
+        /* When EL3 execution state is 32-bit, if HCR.IMO is set then we may
+         * override the CPSR.I masking when in non-secure state.  The SCR.IRQ
+         * setting has already been taken into consideration when setting the
+         * target EL, so it does not have a further affect here.
+         */
+        hcr = (env->cp15.hcr_el2 & HCR_IMO);
+        scr = false;
+        pstate_unmasked = !(env->daif & PSTATE_I);
+        break;
+
     case EXCP_VFIQ:
         if (secure || !(env->cp15.hcr_el2 & HCR_FMO)) {
             /* VFIQs are only taken when hypervized and non-secure.  */
@@ -1283,6 +1539,21 @@
     default:
         g_assert_not_reached();
     }
+
+    /* Use the target EL, current execution state and SCR/HCR settings to
+     * determine whether the corresponding CPSR bit is used to mask the
+     * interrupt.
+     */
+    if ((target_el > cur_el) && (target_el != 1)) {
+        if (arm_el_is_aa64(env, 3) || ((scr || hcr) && (!secure))) {
+            unmasked = 1;
+        }
+    }
+
+    /* The PSTATE bits only mask the interrupt if we have not overriden the
+     * ability above.
+     */
+    return unmasked || pstate_unmasked;
 }
 
 static inline CPUARMState *cpu_init(const char *cpu_model)
@@ -1402,6 +1673,12 @@
  */
 #define ARM_TBFLAG_XSCALE_CPAR_SHIFT 20
 #define ARM_TBFLAG_XSCALE_CPAR_MASK (3 << ARM_TBFLAG_XSCALE_CPAR_SHIFT)
+/* Indicates whether cp register reads and writes by guest code should access
+ * the secure or nonsecure bank of banked registers; note that this is not
+ * the same thing as the current security state of the processor!
+ */
+#define ARM_TBFLAG_NS_SHIFT         22
+#define ARM_TBFLAG_NS_MASK          (1 << ARM_TBFLAG_NS_SHIFT)
 
 /* Bit usage when in AArch64 state */
 #define ARM_TBFLAG_AA64_EL_SHIFT    0
@@ -1446,6 +1723,8 @@
     (((F) & ARM_TBFLAG_AA64_SS_ACTIVE_MASK) >> ARM_TBFLAG_AA64_SS_ACTIVE_SHIFT)
 #define ARM_TBFLAG_AA64_PSTATE_SS(F) \
     (((F) & ARM_TBFLAG_AA64_PSTATE_SS_MASK) >> ARM_TBFLAG_AA64_PSTATE_SS_SHIFT)
+#define ARM_TBFLAG_NS(F) \
+    (((F) & ARM_TBFLAG_NS_MASK) >> ARM_TBFLAG_NS_SHIFT)
 
 static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
@@ -1495,6 +1774,9 @@
         if (privmode) {
             *flags |= ARM_TBFLAG_PRIV_MASK;
         }
+        if (!(access_secure_reg(env))) {
+            *flags |= ARM_TBFLAG_NS_MASK;
+        }
         if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
             || arm_el_is_aa64(env, 1)) {
             *flags |= ARM_TBFLAG_VFPEN_MASK;
diff --git a/target-arm/helper.c b/target-arm/helper.c
index b74d348..96abbed 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -136,6 +136,11 @@
     }
 }
 
+static void *raw_ptr(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    return (char *)env + ri->fieldoffset;
+}
+
 static uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     /* Raw read of a coprocessor register (as needed for migration, etc). */
@@ -419,13 +424,36 @@
 }
 
 static const ARMCPRegInfo cp_reginfo[] = {
-    { .name = "FCSEIDR", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c13_fcse),
+    /* Define the secure and non-secure FCSE identifier CP registers
+     * separately because there is no secure bank in V8 (no _EL3).  This allows
+     * the secure register to be properly reset and migrated. There is also no
+     * v8 EL1 version of the register so the non-secure instance stands alone.
+     */
+    { .name = "FCSEIDR(NS)",
+      .cp = 15, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 0,
+      .access = PL1_RW, .secure = ARM_CP_SECSTATE_NS,
+      .fieldoffset = offsetof(CPUARMState, cp15.fcseidr_ns),
       .resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, },
-    { .name = "CONTEXTIDR", .state = ARM_CP_STATE_BOTH,
+    { .name = "FCSEIDR(S)",
+      .cp = 15, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 0,
+      .access = PL1_RW, .secure = ARM_CP_SECSTATE_S,
+      .fieldoffset = offsetof(CPUARMState, cp15.fcseidr_s),
+      .resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, },
+    /* Define the secure and non-secure context identifier CP registers
+     * separately because there is no secure bank in V8 (no _EL3).  This allows
+     * the secure register to be properly reset and migrated.  In the
+     * non-secure case, the 32-bit register will have reset and migration
+     * disabled during registration as it is handled by the 64-bit instance.
+     */
+    { .name = "CONTEXTIDR_EL1", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 1,
-      .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.contextidr_el1),
+      .access = PL1_RW, .secure = ARM_CP_SECSTATE_NS,
+      .fieldoffset = offsetof(CPUARMState, cp15.contextidr_el[1]),
+      .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, },
+    { .name = "CONTEXTIDR(S)", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 1,
+      .access = PL1_RW, .secure = ARM_CP_SECSTATE_S,
+      .fieldoffset = offsetof(CPUARMState, cp15.contextidr_s),
       .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, },
     REGINFO_SENTINEL
 };
@@ -435,10 +463,12 @@
      * definitions that don't use CP_ANY wildcards (mostly in v8_cp_reginfo[]).
      */
     /* MMU Domain access control / MPU write buffer control */
-    { .name = "DACR", .cp = 15,
-      .crn = 3, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c3),
-      .resetvalue = 0, .writefn = dacr_write, .raw_writefn = raw_write, },
+    { .name = "DACR",
+      .cp = 15, .opc1 = CP_ANY, .crn = 3, .crm = CP_ANY, .opc2 = CP_ANY,
+      .access = PL1_RW, .resetvalue = 0,
+      .writefn = dacr_write, .raw_writefn = raw_write,
+      .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dacr_s),
+                             offsetoflow32(CPUARMState, cp15.dacr_ns) } },
     /* ??? This covers not just the impdef TLB lockdown registers but also
      * some v7VMSA registers relating to TEX remap, so it is overly broad.
      */
@@ -552,7 +582,8 @@
       .access = PL0_W, .type = ARM_CP_NOP },
     { .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 2,
       .access = PL1_RW,
-      .fieldoffset = offsetofhigh32(CPUARMState, cp15.far_el[1]),
+      .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ifar_s),
+                             offsetof(CPUARMState, cp15.ifar_ns) },
       .resetvalue = 0, },
     /* Watchpoint Fault Address Register : should actually only be present
      * for 1136, 1176, 11MPCore.
@@ -776,7 +807,14 @@
 static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
-    return cpu->ccsidr[env->cp15.c0_cssel];
+
+    /* Acquire the CSSELR index from the bank corresponding to the CCSIDR
+     * bank
+     */
+    uint32_t index = A32_BANKED_REG_GET(env, csselr,
+                                        ri->secure & ARM_CP_SECSTATE_S);
+
+    return cpu->ccsidr[index];
 }
 
 static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -896,18 +934,17 @@
     { .name = "VBAR", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .writefn = vbar_write,
-      .fieldoffset = offsetof(CPUARMState, cp15.vbar_el[1]),
+      .bank_fieldoffsets = { offsetof(CPUARMState, cp15.vbar_s),
+                             offsetof(CPUARMState, cp15.vbar_ns) },
       .resetvalue = 0 },
-    { .name = "SCR", .cp = 15, .crn = 1, .crm = 1, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW, .fieldoffset = offsetoflow32(CPUARMState, cp15.scr_el3),
-      .resetvalue = 0, .writefn = scr_write },
     { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0,
       .access = PL1_R, .readfn = ccsidr_read, .type = ARM_CP_NO_MIGRATE },
     { .name = "CSSELR", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c0_cssel),
-      .writefn = csselr_write, .resetvalue = 0 },
+      .access = PL1_RW, .writefn = csselr_write, .resetvalue = 0,
+      .bank_fieldoffsets = { offsetof(CPUARMState, cp15.csselr_s),
+                             offsetof(CPUARMState, cp15.csselr_ns) } },
     /* Auxiliary ID register: this actually has an IMPDEF value but for now
      * just RAZ for all cores:
      */
@@ -928,20 +965,26 @@
      */
     { .name = "MAIR_EL1", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el1),
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[1]),
       .resetvalue = 0 },
     /* For non-long-descriptor page tables these are PRRR and NMRR;
      * regardless they still act as reads-as-written for QEMU.
      * The override is necessary because of the overly-broad TLB_LOCKDOWN
      * definition.
      */
+     /* MAIR0/1 are defined seperately from their 64-bit counterpart which
+      * allows them to assign the correct fieldoffset based on the endianness
+      * handled in the field definitions.
+      */
     { .name = "MAIR0", .state = ARM_CP_STATE_AA32, .type = ARM_CP_OVERRIDE,
       .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0, .access = PL1_RW,
-      .fieldoffset = offsetoflow32(CPUARMState, cp15.mair_el1),
+      .bank_fieldoffsets = { offsetof(CPUARMState, cp15.mair0_s),
+                             offsetof(CPUARMState, cp15.mair0_ns) },
       .resetfn = arm_cp_reset_ignore },
     { .name = "MAIR1", .state = ARM_CP_STATE_AA32, .type = ARM_CP_OVERRIDE,
       .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 1, .access = PL1_RW,
-      .fieldoffset = offsetofhigh32(CPUARMState, cp15.mair_el1),
+      .bank_fieldoffsets = { offsetof(CPUARMState, cp15.mair1_s),
+                             offsetof(CPUARMState, cp15.mair1_ns) },
       .resetfn = arm_cp_reset_ignore },
     { .name = "ISR_EL1", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 1, .opc2 = 0,
@@ -1017,23 +1060,31 @@
     { .name = "TPIDR_EL0", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 2, .crn = 13, .crm = 0,
       .access = PL0_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el0), .resetvalue = 0 },
+      .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[0]), .resetvalue = 0 },
     { .name = "TPIDRURW", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 2,
       .access = PL0_RW,
-      .fieldoffset = offsetoflow32(CPUARMState, cp15.tpidr_el0),
+      .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidrurw_s),
+                             offsetoflow32(CPUARMState, cp15.tpidrurw_ns) },
       .resetfn = arm_cp_reset_ignore },
     { .name = "TPIDRRO_EL0", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 3, .crn = 13, .crm = 0,
       .access = PL0_R|PL1_W,
-      .fieldoffset = offsetof(CPUARMState, cp15.tpidrro_el0), .resetvalue = 0 },
+      .fieldoffset = offsetof(CPUARMState, cp15.tpidrro_el[0]),
+      .resetvalue = 0},
     { .name = "TPIDRURO", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 3,
       .access = PL0_R|PL1_W,
-      .fieldoffset = offsetoflow32(CPUARMState, cp15.tpidrro_el0),
+      .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidruro_s),
+                             offsetoflow32(CPUARMState, cp15.tpidruro_ns) },
       .resetfn = arm_cp_reset_ignore },
-    { .name = "TPIDR_EL1", .state = ARM_CP_STATE_BOTH,
+    { .name = "TPIDR_EL1", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 0, .opc2 = 4, .crn = 13, .crm = 0,
       .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el1), .resetvalue = 0 },
+      .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[1]), .resetvalue = 0 },
+    { .name = "TPIDRPRW", .opc1 = 0, .cp = 15, .crn = 13, .crm = 0, .opc2 = 4,
+      .access = PL1_RW,
+      .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidrprw_s),
+                             offsetoflow32(CPUARMState, cp15.tpidrprw_ns) },
+      .resetvalue = 0 },
     REGINFO_SENTINEL
 };
 
@@ -1391,6 +1442,7 @@
     int prot;
     int ret, is_user = ri->opc2 & 2;
     int access_type = ri->opc2 & 1;
+    uint64_t par64;
 
     ret = get_phys_addr(env, value, access_type, is_user,
                         &phys_addr, &prot, &page_size);
@@ -1399,7 +1451,7 @@
          * translation table format, but with WnR always clear.
          * Convert it to a 64-bit PAR.
          */
-        uint64_t par64 = (1 << 11); /* LPAE bit always set */
+        par64 = (1 << 11); /* LPAE bit always set */
         if (ret == 0) {
             par64 |= phys_addr & ~0xfffULL;
             /* We don't set the ATTR or SH fields in the PAR. */
@@ -1411,7 +1463,6 @@
              * fault.
              */
         }
-        env->cp15.par_el1 = par64;
     } else {
         /* ret is a DFSR/IFSR value for the short descriptor
          * translation table format (with WnR always clear).
@@ -1421,23 +1472,25 @@
             /* We do not set any attribute bits in the PAR */
             if (page_size == (1 << 24)
                 && arm_feature(env, ARM_FEATURE_V7)) {
-                env->cp15.par_el1 = (phys_addr & 0xff000000) | 1 << 1;
+                par64 = (phys_addr & 0xff000000) | (1 << 1);
             } else {
-                env->cp15.par_el1 = phys_addr & 0xfffff000;
+                par64 = phys_addr & 0xfffff000;
             }
         } else {
-            env->cp15.par_el1 = ((ret & (1 << 10)) >> 5) |
-                ((ret & (1 << 12)) >> 6) |
-                ((ret & 0xf) << 1) | 1;
+            par64 = ((ret & (1 << 10)) >> 5) | ((ret & (1 << 12)) >> 6) |
+                    ((ret & 0xf) << 1) | 1;
         }
     }
+
+    A32_BANKED_CURRENT_REG_SET(env, par, par64);
 }
 #endif
 
 static const ARMCPRegInfo vapa_cp_reginfo[] = {
     { .name = "PAR", .cp = 15, .crn = 7, .crm = 4, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .resetvalue = 0,
-      .fieldoffset = offsetoflow32(CPUARMState, cp15.par_el1),
+      .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.par_s),
+                             offsetoflow32(CPUARMState, cp15.par_ns) },
       .writefn = par_write },
 #ifndef CONFIG_USER_ONLY
     { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY,
@@ -1555,6 +1608,7 @@
 static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                  uint64_t value)
 {
+    TCR *tcr = raw_ptr(env, ri);
     int maskshift = extract32(value, 0, 3);
 
     if (!arm_feature(env, ARM_FEATURE_V8)) {
@@ -1573,14 +1627,15 @@
         }
     }
 
-    /* Note that we always calculate c2_mask and c2_base_mask, but
+    /* Update the masks corresponding to the the TCR bank being written
+     * Note that we always calculate mask and base_mask, but
      * they are only used for short-descriptor tables (ie if EAE is 0);
-     * for long-descriptor tables the TTBCR fields are used differently
-     * and the c2_mask and c2_base_mask values are meaningless.
+     * for long-descriptor tables the TCR fields are used differently
+     * and the mask and base_mask values are meaningless.
      */
-    raw_write(env, ri, value);
-    env->cp15.c2_mask = ~(((uint32_t)0xffffffffu) >> maskshift);
-    env->cp15.c2_base_mask = ~((uint32_t)0x3fffu >> maskshift);
+    tcr->raw_tcr = value;
+    tcr->mask = ~(((uint32_t)0xffffffffu) >> maskshift);
+    tcr->base_mask = ~((uint32_t)0x3fffu >> maskshift);
 }
 
 static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1599,19 +1654,25 @@
 
 static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    env->cp15.c2_base_mask = 0xffffc000u;
-    raw_write(env, ri, 0);
-    env->cp15.c2_mask = 0;
+    TCR *tcr = raw_ptr(env, ri);
+
+    /* Reset both the TCR as well as the masks corresponding to the bank of
+     * the TCR being reset.
+     */
+    tcr->raw_tcr = 0;
+    tcr->mask = 0;
+    tcr->base_mask = 0xffffc000u;
 }
 
 static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                uint64_t value)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
+    TCR *tcr = raw_ptr(env, ri);
 
     /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */
     tlb_flush(CPU(cpu), 1);
-    raw_write(env, ri, value);
+    tcr->raw_tcr = value;
 }
 
 static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1631,37 +1692,45 @@
 static const ARMCPRegInfo vmsa_cp_reginfo[] = {
     { .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
-      .fieldoffset = offsetoflow32(CPUARMState, cp15.esr_el[1]),
+      .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dfsr_s),
+                             offsetoflow32(CPUARMState, cp15.dfsr_ns) },
       .resetfn = arm_cp_reset_ignore, },
     { .name = "IFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1,
-      .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.ifsr_el2), .resetvalue = 0, },
+      .access = PL1_RW, .resetvalue = 0,
+      .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.ifsr_s),
+                             offsetoflow32(CPUARMState, cp15.ifsr_ns) } },
     { .name = "ESR_EL1", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .crn = 5, .crm = 2, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.esr_el[1]), .resetvalue = 0, },
     { .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH,
-      .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1),
-      .writefn = vmsa_ttbr_write, .resetvalue = 0 },
+      .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 0,
+      .access = PL1_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0,
+      .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s),
+                             offsetof(CPUARMState, cp15.ttbr0_ns) } },
     { .name = "TTBR1_EL1", .state = ARM_CP_STATE_BOTH,
-      .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 1,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el1),
-      .writefn = vmsa_ttbr_write, .resetvalue = 0 },
+      .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 1,
+      .access = PL1_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0,
+      .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s),
+                             offsetof(CPUARMState, cp15.ttbr1_ns) } },
     { .name = "TCR_EL1", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
       .access = PL1_RW, .writefn = vmsa_tcr_el1_write,
       .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write,
-      .fieldoffset = offsetof(CPUARMState, cp15.c2_control) },
+      .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[1]) },
     { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
       .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, .writefn = vmsa_ttbcr_write,
       .resetfn = arm_cp_reset_ignore, .raw_writefn = vmsa_ttbcr_raw_write,
-      .fieldoffset = offsetoflow32(CPUARMState, cp15.c2_control) },
-    /* 64-bit FAR; this entry also gives us the AArch32 DFAR */
-    { .name = "FAR_EL1", .state = ARM_CP_STATE_BOTH,
+      .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tcr_el[3]),
+                             offsetoflow32(CPUARMState, cp15.tcr_el[1])} },
+    { .name = "FAR_EL1", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[1]),
       .resetvalue = 0, },
+    { .name = "DFAR", .cp = 15, .opc1 = 0, .crn = 6, .crm = 0, .opc2 = 0,
+      .access = PL1_RW, .resetvalue = 0,
+      .bank_fieldoffsets = { offsetof(CPUARMState, cp15.dfar_s),
+                             offsetof(CPUARMState, cp15.dfar_ns) } },
     REGINFO_SENTINEL
 };
 
@@ -1874,15 +1943,18 @@
       .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE,
       .resetvalue = 0 },
     { .name = "PAR", .cp = 15, .crm = 7, .opc1 = 0,
-      .access = PL1_RW, .type = ARM_CP_64BIT,
-      .fieldoffset = offsetof(CPUARMState, cp15.par_el1), .resetvalue = 0 },
+      .access = PL1_RW, .type = ARM_CP_64BIT, .resetvalue = 0,
+      .bank_fieldoffsets = { offsetof(CPUARMState, cp15.par_s),
+                             offsetof(CPUARMState, cp15.par_ns)} },
     { .name = "TTBR0", .cp = 15, .crm = 2, .opc1 = 0,
       .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE,
-      .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1),
+      .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s),
+                             offsetof(CPUARMState, cp15.ttbr0_ns) },
       .writefn = vmsa_ttbr_write, .resetfn = arm_cp_reset_ignore },
     { .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1,
       .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE,
-      .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el1),
+      .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s),
+                             offsetof(CPUARMState, cp15.ttbr1_ns) },
       .writefn = vmsa_ttbr_write, .resetfn = arm_cp_reset_ignore },
     REGINFO_SENTINEL
 };
@@ -1911,7 +1983,7 @@
 
 static CPAccessResult aa64_daif_access(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    if (arm_current_el(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) {
+    if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UMA)) {
         return CP_ACCESS_TRAP;
     }
     return CP_ACCESS_OK;
@@ -1929,7 +2001,7 @@
     /* Cache invalidate/clean: NOP, but EL0 must UNDEF unless
      * SCTLR_EL1.UCI is set.
      */
-    if (arm_current_el(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCI)) {
+    if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UCI)) {
         return CP_ACCESS_TRAP;
     }
     return CP_ACCESS_OK;
@@ -2006,7 +2078,7 @@
     /* We don't implement EL2, so the only control on DC ZVA is the
      * bit in the SCTLR which can prohibit access for EL0.
      */
-    if (arm_current_el(env) == 0 && !(env->cp15.c1_sys & SCTLR_DZE)) {
+    if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_DZE)) {
         return CP_ACCESS_TRAP;
     }
     return CP_ACCESS_OK;
@@ -2045,6 +2117,24 @@
     update_spsel(env, val);
 }
 
+static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                        uint64_t value)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+
+    if (raw_read(env, ri) == value) {
+        /* Skip the TLB flush if nothing actually changed; Linux likes
+         * to do a lot of pointless SCTLR writes.
+         */
+        return;
+    }
+
+    raw_write(env, ri, value);
+    /* ??? Lots of these bits are not implemented.  */
+    /* This may enable/disable the MMU, so do a TLB flush.  */
+    tlb_flush(CPU(cpu), 1);
+}
+
 static const ARMCPRegInfo v8_cp_reginfo[] = {
     /* Minimal set of EL0-visible registers. This will need to be expanded
      * significantly for system emulation of AArch64 CPUs.
@@ -2216,10 +2306,11 @@
     { .name = "DCCISW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2,
       .type = ARM_CP_NOP, .access = PL1_W },
     /* MMU Domain access control / MPU write buffer control */
-    { .name = "DACR", .cp = 15,
-      .opc1 = 0, .crn = 3, .crm = 0, .opc2 = 0,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c3),
-      .resetvalue = 0, .writefn = dacr_write, .raw_writefn = raw_write, },
+    { .name = "DACR", .cp = 15, .opc1 = 0, .crn = 3, .crm = 0, .opc2 = 0,
+      .access = PL1_RW, .resetvalue = 0,
+      .writefn = dacr_write, .raw_writefn = raw_write,
+      .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dacr_s),
+                             offsetoflow32(CPUARMState, cp15.dacr_ns) } },
     { .name = "ELR_EL1", .state = ARM_CP_STATE_AA64,
       .type = ARM_CP_NO_MIGRATE,
       .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1,
@@ -2289,6 +2380,11 @@
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
       .writefn = hcr_write },
+    { .name = "DACR32_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 3, .crm = 0, .opc2 = 0,
+      .access = PL2_RW, .resetvalue = 0,
+      .writefn = dacr_write, .raw_writefn = raw_write,
+      .fieldoffset = offsetof(CPUARMState, cp15.dacr32_el2) },
     { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64,
       .type = ARM_CP_NO_MIGRATE,
       .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1,
@@ -2298,6 +2394,10 @@
       .type = ARM_CP_NO_MIGRATE,
       .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0,
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.esr_el[2]) },
+    { .name = "IFSR32_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 0, .opc2 = 1,
+      .access = PL2_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.ifsr32_el2) },
     { .name = "FAR_EL2", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[2]) },
@@ -2314,6 +2414,19 @@
 };
 
 static const ARMCPRegInfo v8_el3_cp_reginfo[] = {
+    { .name = "SCTLR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 0, .opc2 = 0,
+      .access = PL3_RW, .raw_writefn = raw_write, .writefn = sctlr_write,
+      .fieldoffset = offsetof(CPUARMState, cp15.sctlr_el[3]) },
+    { .name = "TTBR0_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 0,
+      .access = PL3_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[3]) },
+    { .name = "TCR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 2,
+      .access = PL3_RW, .writefn = vmsa_tcr_el1_write,
+      .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write,
+      .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[3]) },
     { .name = "ELR_EL3", .state = ARM_CP_STATE_AA64,
       .type = ARM_CP_NO_MIGRATE,
       .opc0 = 3, .opc1 = 6, .crn = 4, .crm = 0, .opc2 = 1,
@@ -2335,38 +2448,42 @@
       .access = PL3_RW, .writefn = vbar_write,
       .fieldoffset = offsetof(CPUARMState, cp15.vbar_el[3]),
       .resetvalue = 0 },
-    { .name = "SCR_EL3", .state = ARM_CP_STATE_AA64,
-      .type = ARM_CP_NO_MIGRATE,
-      .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 0,
-      .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.scr_el3),
-      .writefn = scr_write },
     REGINFO_SENTINEL
 };
 
-static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t value)
-{
-    ARMCPU *cpu = arm_env_get_cpu(env);
-
-    if (raw_read(env, ri) == value) {
-        /* Skip the TLB flush if nothing actually changed; Linux likes
-         * to do a lot of pointless SCTLR writes.
-         */
-        return;
-    }
-
-    raw_write(env, ri, value);
-    /* ??? Lots of these bits are not implemented.  */
-    /* This may enable/disable the MMU, so do a TLB flush.  */
-    tlb_flush(CPU(cpu), 1);
-}
+static const ARMCPRegInfo el3_cp_reginfo[] = {
+    { .name = "SCR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 0,
+      .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.scr_el3),
+      .resetvalue = 0, .writefn = scr_write },
+    { .name = "SCR",  .type = ARM_CP_NO_MIGRATE,
+      .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 0,
+      .access = PL3_RW, .fieldoffset = offsetoflow32(CPUARMState, cp15.scr_el3),
+      .resetfn = arm_cp_reset_ignore, .writefn = scr_write },
+    { .name = "SDER32_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 1,
+      .access = PL3_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.sder) },
+    { .name = "SDER",
+      .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 1,
+      .access = PL3_RW, .resetvalue = 0,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.sder) },
+      /* TODO: Implement NSACR trapping of secure EL1 accesses to EL3 */
+    { .name = "NSACR", .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 2,
+      .access = PL3_W | PL1_R, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.nsacr) },
+    { .name = "MVBAR", .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1,
+      .access = PL3_RW, .writefn = vbar_write, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.mvbar) },
+    REGINFO_SENTINEL
+};
 
 static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     /* Only accessible in EL0 if SCTLR.UCT is set (and only in AArch64,
      * but the AArch32 CTR has its own reginfo struct)
      */
-    if (arm_current_el(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCT)) {
+    if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UCT)) {
         return CP_ACCESS_TRAP;
     }
     return CP_ACCESS_OK;
@@ -2960,7 +3077,10 @@
         }
     }
     if (arm_feature(env, ARM_FEATURE_EL3)) {
-        define_arm_cp_regs(cpu, v8_el3_cp_reginfo);
+        if (arm_feature(env, ARM_FEATURE_V8)) {
+            define_arm_cp_regs(cpu, v8_el3_cp_reginfo);
+        }
+        define_arm_cp_regs(cpu, el3_cp_reginfo);
     }
     if (arm_feature(env, ARM_FEATURE_MPU)) {
         /* These are the MPU registers prior to PMSAv6. Any new
@@ -3160,8 +3280,10 @@
     {
         ARMCPRegInfo sctlr = {
             .name = "SCTLR", .state = ARM_CP_STATE_BOTH,
-            .opc0 = 3, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
-            .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_sys),
+            .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0,
+            .access = PL1_RW,
+            .bank_fieldoffsets = { offsetof(CPUARMState, cp15.sctlr_s),
+                                   offsetof(CPUARMState, cp15.sctlr_ns) },
             .writefn = sctlr_write, .resetvalue = cpu->reset_sctlr,
             .raw_writefn = raw_write,
         };
@@ -3287,7 +3409,7 @@
 }
 
 static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
-                                   void *opaque, int state,
+                                   void *opaque, int state, int secstate,
                                    int crm, int opc1, int opc2)
 {
     /* Private utility function for define_one_arm_cp_reg_with_opaque():
@@ -3296,22 +3418,59 @@
     uint32_t *key = g_new(uint32_t, 1);
     ARMCPRegInfo *r2 = g_memdup(r, sizeof(ARMCPRegInfo));
     int is64 = (r->type & ARM_CP_64BIT) ? 1 : 0;
-    if (r->state == ARM_CP_STATE_BOTH && state == ARM_CP_STATE_AA32) {
-        /* The AArch32 view of a shared register sees the lower 32 bits
-         * of a 64 bit backing field. It is not migratable as the AArch64
-         * view handles that. AArch64 also handles reset.
-         * We assume it is a cp15 register if the .cp field is left unset.
+    int ns = (secstate & ARM_CP_SECSTATE_NS) ? 1 : 0;
+
+    /* Reset the secure state to the specific incoming state.  This is
+     * necessary as the register may have been defined with both states.
+     */
+    r2->secure = secstate;
+
+    if (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1]) {
+        /* Register is banked (using both entries in array).
+         * Overwriting fieldoffset as the array is only used to define
+         * banked registers but later only fieldoffset is used.
          */
-        if (r2->cp == 0) {
-            r2->cp = 15;
+        r2->fieldoffset = r->bank_fieldoffsets[ns];
+    }
+
+    if (state == ARM_CP_STATE_AA32) {
+        if (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1]) {
+            /* If the register is banked then we don't need to migrate or
+             * reset the 32-bit instance in certain cases:
+             *
+             * 1) If the register has both 32-bit and 64-bit instances then we
+             *    can count on the 64-bit instance taking care of the
+             *    non-secure bank.
+             * 2) If ARMv8 is enabled then we can count on a 64-bit version
+             *    taking care of the secure bank.  This requires that separate
+             *    32 and 64-bit definitions are provided.
+             */
+            if ((r->state == ARM_CP_STATE_BOTH && ns) ||
+                (arm_feature(&cpu->env, ARM_FEATURE_V8) && !ns)) {
+                r2->type |= ARM_CP_NO_MIGRATE;
+                r2->resetfn = arm_cp_reset_ignore;
+            }
+        } else if ((secstate != r->secure) && !ns) {
+            /* The register is not banked so we only want to allow migration of
+             * the non-secure instance.
+             */
+            r2->type |= ARM_CP_NO_MIGRATE;
+            r2->resetfn = arm_cp_reset_ignore;
         }
-        r2->type |= ARM_CP_NO_MIGRATE;
-        r2->resetfn = arm_cp_reset_ignore;
+
+        if (r->state == ARM_CP_STATE_BOTH) {
+            /* We assume it is a cp15 register if the .cp field is left unset.
+             */
+            if (r2->cp == 0) {
+                r2->cp = 15;
+            }
+
 #ifdef HOST_WORDS_BIGENDIAN
-        if (r2->fieldoffset) {
-            r2->fieldoffset += sizeof(uint32_t);
-        }
+            if (r2->fieldoffset) {
+                r2->fieldoffset += sizeof(uint32_t);
+            }
 #endif
+        }
     }
     if (state == ARM_CP_STATE_AA64) {
         /* To allow abbreviation of ARMCPRegInfo
@@ -3327,7 +3486,7 @@
         *key = ENCODE_AA64_CP_REG(r2->cp, r2->crn, crm,
                                   r2->opc0, opc1, opc2);
     } else {
-        *key = ENCODE_CP_REG(r2->cp, is64, r2->crn, crm, opc1, opc2);
+        *key = ENCODE_CP_REG(r2->cp, is64, ns, r2->crn, crm, opc1, opc2);
     }
     if (opaque) {
         r2->opaque = opaque;
@@ -3460,10 +3619,14 @@
      */
     if (!(r->type & (ARM_CP_SPECIAL|ARM_CP_CONST))) {
         if (r->access & PL3_R) {
-            assert(r->fieldoffset || r->readfn);
+            assert((r->fieldoffset ||
+                   (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1])) ||
+                   r->readfn);
         }
         if (r->access & PL3_W) {
-            assert(r->fieldoffset || r->writefn);
+            assert((r->fieldoffset ||
+                   (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1])) ||
+                   r->writefn);
         }
     }
     /* Bad type field probably means missing sentinel at end of reg list */
@@ -3476,8 +3639,32 @@
                     if (r->state != state && r->state != ARM_CP_STATE_BOTH) {
                         continue;
                     }
-                    add_cpreg_to_hashtable(cpu, r, opaque, state,
-                                           crm, opc1, opc2);
+                    if (state == ARM_CP_STATE_AA32) {
+                        /* Under AArch32 CP registers can be common
+                         * (same for secure and non-secure world) or banked.
+                         */
+                        switch (r->secure) {
+                        case ARM_CP_SECSTATE_S:
+                        case ARM_CP_SECSTATE_NS:
+                            add_cpreg_to_hashtable(cpu, r, opaque, state,
+                                                   r->secure, crm, opc1, opc2);
+                            break;
+                        default:
+                            add_cpreg_to_hashtable(cpu, r, opaque, state,
+                                                   ARM_CP_SECSTATE_S,
+                                                   crm, opc1, opc2);
+                            add_cpreg_to_hashtable(cpu, r, opaque, state,
+                                                   ARM_CP_SECSTATE_NS,
+                                                   crm, opc1, opc2);
+                            break;
+                        }
+                    } else {
+                        /* AArch64 registers get mapped to non-secure instance
+                         * of AArch32 */
+                        add_cpreg_to_hashtable(cpu, r, opaque, state,
+                                               ARM_CP_SECSTATE_NS,
+                                               crm, opc1, opc2);
+                    }
                 }
             }
         }
@@ -3551,6 +3738,8 @@
 
 void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
 {
+    uint32_t changed_daif;
+
     if (mask & CPSR_NZCV) {
         env->ZF = (~val) & CPSR_Z;
         env->NF = val;
@@ -3573,6 +3762,58 @@
         env->GE = (val >> 16) & 0xf;
     }
 
+    /* In a V7 implementation that includes the security extensions but does
+     * not include Virtualization Extensions the SCR.FW and SCR.AW bits control
+     * whether non-secure software is allowed to change the CPSR_F and CPSR_A
+     * bits respectively.
+     *
+     * In a V8 implementation, it is permitted for privileged software to
+     * change the CPSR A/F bits regardless of the SCR.AW/FW bits.
+     */
+    if (!arm_feature(env, ARM_FEATURE_V8) &&
+        arm_feature(env, ARM_FEATURE_EL3) &&
+        !arm_feature(env, ARM_FEATURE_EL2) &&
+        !arm_is_secure(env)) {
+
+        changed_daif = (env->daif ^ val) & mask;
+
+        if (changed_daif & CPSR_A) {
+            /* Check to see if we are allowed to change the masking of async
+             * abort exceptions from a non-secure state.
+             */
+            if (!(env->cp15.scr_el3 & SCR_AW)) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Ignoring attempt to switch CPSR_A flag from "
+                              "non-secure world with SCR.AW bit clear\n");
+                mask &= ~CPSR_A;
+            }
+        }
+
+        if (changed_daif & CPSR_F) {
+            /* Check to see if we are allowed to change the masking of FIQ
+             * exceptions from a non-secure state.
+             */
+            if (!(env->cp15.scr_el3 & SCR_FW)) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Ignoring attempt to switch CPSR_F flag from "
+                              "non-secure world with SCR.FW bit clear\n");
+                mask &= ~CPSR_F;
+            }
+
+            /* Check whether non-maskable FIQ (NMFI) support is enabled.
+             * If this bit is set software is not allowed to mask
+             * FIQs, but is allowed to set CPSR_F to 0.
+             */
+            if ((A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_NMFI) &&
+                (val & CPSR_F)) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Ignoring attempt to enable CPSR_F flag "
+                              "(non-maskable FIQ [NMFI] support enabled)\n");
+                mask &= ~CPSR_F;
+            }
+        }
+    }
+
     env->daif &= ~(CPSR_AIF & mask);
     env->daif |= val & CPSR_AIF & mask;
 
@@ -3761,6 +4002,101 @@
     env->spsr = env->banked_spsr[i];
 }
 
+/* Physical Interrupt Target EL Lookup Table
+ *
+ * [ From ARM ARM section G1.13.4 (Table G1-15) ]
+ *
+ * The below multi-dimensional table is used for looking up the target
+ * exception level given numerous condition criteria.  Specifically, the
+ * target EL is based on SCR and HCR routing controls as well as the
+ * currently executing EL and secure state.
+ *
+ *    Dimensions:
+ *    target_el_table[2][2][2][2][2][4]
+ *                    |  |  |  |  |  +--- Current EL
+ *                    |  |  |  |  +------ Non-secure(0)/Secure(1)
+ *                    |  |  |  +--------- HCR mask override
+ *                    |  |  +------------ SCR exec state control
+ *                    |  +--------------- SCR mask override
+ *                    +------------------ 32-bit(0)/64-bit(1) EL3
+ *
+ *    The table values are as such:
+ *    0-3 = EL0-EL3
+ *     -1 = Cannot occur
+ *
+ * The ARM ARM target EL table includes entries indicating that an "exception
+ * is not taken".  The two cases where this is applicable are:
+ *    1) An exception is taken from EL3 but the SCR does not have the exception
+ *    routed to EL3.
+ *    2) An exception is taken from EL2 but the HCR does not have the exception
+ *    routed to EL2.
+ * In these two cases, the below table contain a target of EL1.  This value is
+ * returned as it is expected that the consumer of the table data will check
+ * for "target EL >= current EL" to ensure the exception is not taken.
+ *
+ *            SCR     HCR
+ *         64  EA     AMO                 From
+ *        BIT IRQ     IMO      Non-secure         Secure
+ *        EL3 FIQ  RW FMO   EL0 EL1 EL2 EL3   EL0 EL1 EL2 EL3
+ */
+const int8_t target_el_table[2][2][2][2][2][4] = {
+    {{{{/* 0   0   0   0 */{ 1,  1,  2, -1 },{ 3, -1, -1,  3 },},
+       {/* 0   0   0   1 */{ 2,  2,  2, -1 },{ 3, -1, -1,  3 },},},
+      {{/* 0   0   1   0 */{ 1,  1,  2, -1 },{ 3, -1, -1,  3 },},
+       {/* 0   0   1   1 */{ 2,  2,  2, -1 },{ 3, -1, -1,  3 },},},},
+     {{{/* 0   1   0   0 */{ 3,  3,  3, -1 },{ 3, -1, -1,  3 },},
+       {/* 0   1   0   1 */{ 3,  3,  3, -1 },{ 3, -1, -1,  3 },},},
+      {{/* 0   1   1   0 */{ 3,  3,  3, -1 },{ 3, -1, -1,  3 },},
+       {/* 0   1   1   1 */{ 3,  3,  3, -1 },{ 3, -1, -1,  3 },},},},},
+    {{{{/* 1   0   0   0 */{ 1,  1,  2, -1 },{ 1,  1, -1,  1 },},
+       {/* 1   0   0   1 */{ 2,  2,  2, -1 },{ 1,  1, -1,  1 },},},
+      {{/* 1   0   1   0 */{ 1,  1,  1, -1 },{ 1,  1, -1,  1 },},
+       {/* 1   0   1   1 */{ 2,  2,  2, -1 },{ 1,  1, -1,  1 },},},},
+     {{{/* 1   1   0   0 */{ 3,  3,  3, -1 },{ 3,  3, -1,  3 },},
+       {/* 1   1   0   1 */{ 3,  3,  3, -1 },{ 3,  3, -1,  3 },},},
+      {{/* 1   1   1   0 */{ 3,  3,  3, -1 },{ 3,  3, -1,  3 },},
+       {/* 1   1   1   1 */{ 3,  3,  3, -1 },{ 3,  3, -1,  3 },},},},},
+};
+
+/*
+ * Determine the target EL for physical exceptions
+ */
+static inline uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
+                                        uint32_t cur_el, bool secure)
+{
+    CPUARMState *env = cs->env_ptr;
+    int rw = ((env->cp15.scr_el3 & SCR_RW) == SCR_RW);
+    int scr;
+    int hcr;
+    int target_el;
+    int is64 = arm_el_is_aa64(env, 3);
+
+    switch (excp_idx) {
+    case EXCP_IRQ:
+        scr = ((env->cp15.scr_el3 & SCR_IRQ) == SCR_IRQ);
+        hcr = ((env->cp15.hcr_el2 & HCR_IMO) == HCR_IMO);
+        break;
+    case EXCP_FIQ:
+        scr = ((env->cp15.scr_el3 & SCR_FIQ) == SCR_FIQ);
+        hcr = ((env->cp15.hcr_el2 & HCR_FMO) == HCR_FMO);
+        break;
+    default:
+        scr = ((env->cp15.scr_el3 & SCR_EA) == SCR_EA);
+        hcr = ((env->cp15.hcr_el2 & HCR_AMO) == HCR_AMO);
+        break;
+    };
+
+    /* If HCR.TGE is set then HCR is treated as being 1 */
+    hcr |= ((env->cp15.hcr_el2 & HCR_TGE) == HCR_TGE);
+
+    /* Perform a table-lookup for the target EL given the current state */
+    target_el = target_el_table[is64][scr][rw][hcr][secure][cur_el];
+
+    assert(target_el > 0);
+
+    return target_el;
+}
+
 /*
  * Determine the target EL for a given exception type.
  */
@@ -3770,13 +4106,7 @@
     CPUARMState *env = &cpu->env;
     unsigned int cur_el = arm_current_el(env);
     unsigned int target_el;
-    /* FIXME: Use actual secure state.  */
-    bool secure = false;
-
-    if (!env->aarch64) {
-        /* TODO: Add EL2 and 3 exception handling for AArch32.  */
-        return 1;
-    }
+    bool secure = arm_is_secure(env);
 
     switch (excp_idx) {
     case EXCP_HVC:
@@ -3788,19 +4118,8 @@
         break;
     case EXCP_FIQ:
     case EXCP_IRQ:
-    {
-        const uint64_t hcr_mask = excp_idx == EXCP_FIQ ? HCR_FMO : HCR_IMO;
-        const uint32_t scr_mask = excp_idx == EXCP_FIQ ? SCR_FIQ : SCR_IRQ;
-
-        target_el = 1;
-        if (!secure && (env->cp15.hcr_el2 & hcr_mask)) {
-            target_el = 2;
-        }
-        if (env->cp15.scr_el3 & scr_mask) {
-            target_el = 3;
-        }
+        target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure);
         break;
-    }
     case EXCP_VIRQ:
     case EXCP_VFIQ:
         target_el = 1;
@@ -4055,22 +4374,20 @@
         env->exception.fsr = 2;
         /* Fall through to prefetch abort.  */
     case EXCP_PREFETCH_ABORT:
-        env->cp15.ifsr_el2 = env->exception.fsr;
-        env->cp15.far_el[1] = deposit64(env->cp15.far_el[1], 32, 32,
-                                        env->exception.vaddress);
+        A32_BANKED_CURRENT_REG_SET(env, ifsr, env->exception.fsr);
+        A32_BANKED_CURRENT_REG_SET(env, ifar, env->exception.vaddress);
         qemu_log_mask(CPU_LOG_INT, "...with IFSR 0x%x IFAR 0x%x\n",
-                      env->cp15.ifsr_el2, (uint32_t)env->exception.vaddress);
+                      env->exception.fsr, (uint32_t)env->exception.vaddress);
         new_mode = ARM_CPU_MODE_ABT;
         addr = 0x0c;
         mask = CPSR_A | CPSR_I;
         offset = 4;
         break;
     case EXCP_DATA_ABORT:
-        env->cp15.esr_el[1] = env->exception.fsr;
-        env->cp15.far_el[1] = deposit64(env->cp15.far_el[1], 0, 32,
-                                        env->exception.vaddress);
+        A32_BANKED_CURRENT_REG_SET(env, dfsr, env->exception.fsr);
+        A32_BANKED_CURRENT_REG_SET(env, dfar, env->exception.vaddress);
         qemu_log_mask(CPU_LOG_INT, "...with DFSR 0x%x DFAR 0x%x\n",
-                      (uint32_t)env->cp15.esr_el[1],
+                      env->exception.fsr,
                       (uint32_t)env->exception.vaddress);
         new_mode = ARM_CPU_MODE_ABT;
         addr = 0x10;
@@ -4083,12 +4400,21 @@
         /* Disable IRQ and imprecise data aborts.  */
         mask = CPSR_A | CPSR_I;
         offset = 4;
+        if (env->cp15.scr_el3 & SCR_IRQ) {
+            /* IRQ routed to monitor mode */
+            new_mode = ARM_CPU_MODE_MON;
+            mask |= CPSR_F;
+        }
         break;
     case EXCP_FIQ:
         new_mode = ARM_CPU_MODE_FIQ;
         addr = 0x1c;
         /* Disable FIQ, IRQ and imprecise data aborts.  */
         mask = CPSR_A | CPSR_I | CPSR_F;
+        if (env->cp15.scr_el3 & SCR_FIQ) {
+            /* FIQ routed to monitor mode */
+            new_mode = ARM_CPU_MODE_MON;
+        }
         offset = 4;
         break;
     case EXCP_SMC:
@@ -4101,19 +4427,19 @@
         cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
         return; /* Never happens.  Keep compiler happy.  */
     }
-    /* High vectors.  */
-    if (env->cp15.c1_sys & SCTLR_V) {
-        /* when enabled, base address cannot be remapped.  */
+
+    if (new_mode == ARM_CPU_MODE_MON) {
+        addr += env->cp15.mvbar;
+    } else if (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_V) {
+        /* High vectors. When enabled, base address cannot be remapped. */
         addr += 0xffff0000;
     } else {
         /* ARM v7 architectures provide a vector base address register to remap
          * the interrupt vector table.
-         * This register is only followed in non-monitor mode, and has a secure
-         * and un-secure copy. Since the cpu is always in a un-secure operation
-         * and is never in monitor mode this feature is always active.
+         * This register is only followed in non-monitor mode, and is banked.
          * Note: only bits 31:5 are valid.
          */
-        addr += env->cp15.vbar_el[1];
+        addr += A32_BANKED_CURRENT_REG_GET(env, vbar);
     }
 
     if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_MON) {
@@ -4134,7 +4460,7 @@
     /* this is a lie, as the was no c1_sys on V4T/V5, but who cares
      * and we should just guard the thumb mode on V4 */
     if (arm_feature(env, ARM_FEATURE_V4T)) {
-        env->thumb = (env->cp15.c1_sys & SCTLR_TE) != 0;
+        env->thumb = (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_TE) != 0;
     }
     env->regs[14] = env->regs[15] + offset;
     env->regs[15] = addr;
@@ -4165,7 +4491,7 @@
       }
       if (access_type == 1)
           return 0;
-      switch (env->cp15.c1_sys & (SCTLR_S | SCTLR_R)) {
+      switch (A32_BANKED_CURRENT_REG_GET(env, sctlr) & (SCTLR_S | SCTLR_R)) {
       case SCTLR_S:
           return is_user ? 0 : PAGE_READ;
       case SCTLR_R:
@@ -4200,18 +4526,25 @@
 static bool get_level1_table_address(CPUARMState *env, uint32_t *table,
                                          uint32_t address)
 {
-    if (address & env->cp15.c2_mask) {
-        if ((env->cp15.c2_control & TTBCR_PD1)) {
+    /* Get the TCR bank based on our security state */
+    TCR *tcr = &env->cp15.tcr_el[arm_is_secure(env) ? 3 : 1];
+
+    /* We only get here if EL1 is running in AArch32. If EL3 is running in
+     * AArch32 there is a secure and non-secure instance of the translation
+     * table registers.
+     */
+    if (address & tcr->mask) {
+        if (tcr->raw_tcr & TTBCR_PD1) {
             /* Translation table walk disabled for TTBR1 */
             return false;
         }
-        *table = env->cp15.ttbr1_el1 & 0xffffc000;
+        *table = A32_BANKED_CURRENT_REG_GET(env, ttbr1) & 0xffffc000;
     } else {
-        if ((env->cp15.c2_control & TTBCR_PD0)) {
+        if (tcr->raw_tcr & TTBCR_PD0) {
             /* Translation table walk disabled for TTBR0 */
             return false;
         }
-        *table = env->cp15.ttbr0_el1 & env->cp15.c2_base_mask;
+        *table = A32_BANKED_CURRENT_REG_GET(env, ttbr0) & tcr->base_mask;
     }
     *table |= (address >> 18) & 0x3ffc;
     return true;
@@ -4241,7 +4574,7 @@
     desc = ldl_phys(cs->as, table);
     type = (desc & 3);
     domain = (desc >> 5) & 0x0f;
-    domain_prot = (env->cp15.c3 >> (domain * 2)) & 3;
+    domain_prot = (A32_BANKED_CURRENT_REG_GET(env, dacr) >> (domain * 2)) & 3;
     if (type == 0) {
         /* Section translation fault.  */
         code = 5;
@@ -4353,7 +4686,7 @@
         /* Page or Section.  */
         domain = (desc >> 5) & 0x0f;
     }
-    domain_prot = (env->cp15.c3 >> (domain * 2)) & 3;
+    domain_prot = (A32_BANKED_CURRENT_REG_GET(env, dacr) >> (domain * 2)) & 3;
     if (domain_prot == 0 || domain_prot == 2) {
         if (type != 1) {
             code = 9; /* Section domain fault.  */
@@ -4414,7 +4747,8 @@
             goto do_fault;
 
         /* The simplified model uses AP[0] as an access control bit.  */
-        if ((env->cp15.c1_sys & SCTLR_AFE) && (ap & 1) == 0) {
+        if ((A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_AFE)
+                && (ap & 1) == 0) {
             /* Access flag fault.  */
             code = (code == 15) ? 6 : 3;
             goto do_fault;
@@ -4464,13 +4798,14 @@
     int32_t granule_sz = 9;
     int32_t va_size = 32;
     int32_t tbi = 0;
+    TCR *tcr = &env->cp15.tcr_el[arm_is_secure(env) ? 3 : 1];
 
     if (arm_el_is_aa64(env, 1)) {
         va_size = 64;
         if (extract64(address, 55, 1))
-            tbi = extract64(env->cp15.c2_control, 38, 1);
+            tbi = extract64(tcr->raw_tcr, 38, 1);
         else
-            tbi = extract64(env->cp15.c2_control, 37, 1);
+            tbi = extract64(tcr->raw_tcr, 37, 1);
         tbi *= 8;
     }
 
@@ -4479,12 +4814,12 @@
      * This is a Non-secure PL0/1 stage 1 translation, so controlled by
      * TTBCR/TTBR0/TTBR1 in accordance with ARM ARM DDI0406C table B-32:
      */
-    uint32_t t0sz = extract32(env->cp15.c2_control, 0, 6);
+    uint32_t t0sz = extract32(tcr->raw_tcr, 0, 6);
     if (arm_el_is_aa64(env, 1)) {
         t0sz = MIN(t0sz, 39);
         t0sz = MAX(t0sz, 16);
     }
-    uint32_t t1sz = extract32(env->cp15.c2_control, 16, 6);
+    uint32_t t1sz = extract32(tcr->raw_tcr, 16, 6);
     if (arm_el_is_aa64(env, 1)) {
         t1sz = MIN(t1sz, 39);
         t1sz = MAX(t1sz, 16);
@@ -4515,11 +4850,11 @@
      * we will always flush the TLB any time the ASID is changed).
      */
     if (ttbr_select == 0) {
-        ttbr = env->cp15.ttbr0_el1;
-        epd = extract32(env->cp15.c2_control, 7, 1);
+        ttbr = A32_BANKED_CURRENT_REG_GET(env, ttbr0);
+        epd = extract32(tcr->raw_tcr, 7, 1);
         tsz = t0sz;
 
-        tg = extract32(env->cp15.c2_control, 14, 2);
+        tg = extract32(tcr->raw_tcr, 14, 2);
         if (tg == 1) { /* 64KB pages */
             granule_sz = 13;
         }
@@ -4527,11 +4862,11 @@
             granule_sz = 11;
         }
     } else {
-        ttbr = env->cp15.ttbr1_el1;
-        epd = extract32(env->cp15.c2_control, 23, 1);
+        ttbr = A32_BANKED_CURRENT_REG_GET(env, ttbr1);
+        epd = extract32(tcr->raw_tcr, 23, 1);
         tsz = t1sz;
 
-        tg = extract32(env->cp15.c2_control, 30, 2);
+        tg = extract32(tcr->raw_tcr, 30, 2);
         if (tg == 3)  { /* 64KB pages */
             granule_sz = 13;
         }
@@ -4747,11 +5082,17 @@
                                 hwaddr *phys_ptr, int *prot,
                                 target_ulong *page_size)
 {
-    /* Fast Context Switch Extension.  */
-    if (address < 0x02000000)
-        address += env->cp15.c13_fcse;
+    /* This is not entirely correct as get_phys_addr() can also be called
+     * from ats_write() for an address translation of a specific regime.
+     */
+    uint32_t sctlr = A32_BANKED_CURRENT_REG_GET(env, sctlr);
 
-    if ((env->cp15.c1_sys & SCTLR_M) == 0) {
+    /* Fast Context Switch Extension.  */
+    if (address < 0x02000000) {
+        address += A32_BANKED_CURRENT_REG_GET(env, fcseidr);
+    }
+
+    if ((sctlr & SCTLR_M) == 0) {
         /* MMU/MPU disabled.  */
         *phys_ptr = address;
         *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
@@ -4764,7 +5105,7 @@
     } else if (extended_addresses_enabled(env)) {
         return get_phys_addr_lpae(env, address, access_type, is_user, phys_ptr,
                                   prot, page_size);
-    } else if (env->cp15.c1_sys & SCTLR_XP) {
+    } else if (sctlr & SCTLR_XP) {
         return get_phys_addr_v6(env, address, access_type, is_user, phys_ptr,
                                 prot, page_size);
     } else {
diff --git a/target-arm/internals.h b/target-arm/internals.h
index 2dff4ff..bb171a7 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h
@@ -153,9 +153,9 @@
  */
 static inline bool extended_addresses_enabled(CPUARMState *env)
 {
-    return arm_el_is_aa64(env, 1)
-        || ((arm_feature(env, ARM_FEATURE_LPAE)
-             && (env->cp15.c2_control & TTBCR_EAE)));
+    TCR *tcr = &env->cp15.tcr_el[arm_is_secure(env) ? 3 : 1];
+    return arm_el_is_aa64(env, 1) ||
+           (arm_feature(env, ARM_FEATURE_LPAE) && (tcr->raw_tcr & TTBCR_EAE));
 }
 
 /* Valid Syndrome Register EC field values */
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index 319784d..4d81f3d 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -21,6 +21,7 @@
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
 #include "cpu.h"
+#include "internals.h"
 #include "hw/arm/arm.h"
 
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
@@ -279,6 +280,94 @@
     memory_region_ref(kd->mr);
 }
 
+static int compare_u64(const void *a, const void *b)
+{
+    if (*(uint64_t *)a > *(uint64_t *)b) {
+        return 1;
+    }
+    if (*(uint64_t *)a < *(uint64_t *)b) {
+        return -1;
+    }
+    return 0;
+}
+
+/* Initialize the CPUState's cpreg list according to the kernel's
+ * definition of what CPU registers it knows about (and throw away
+ * the previous TCG-created cpreg list).
+ */
+int kvm_arm_init_cpreg_list(ARMCPU *cpu)
+{
+    struct kvm_reg_list rl;
+    struct kvm_reg_list *rlp;
+    int i, ret, arraylen;
+    CPUState *cs = CPU(cpu);
+
+    rl.n = 0;
+    ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl);
+    if (ret != -E2BIG) {
+        return ret;
+    }
+    rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t));
+    rlp->n = rl.n;
+    ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp);
+    if (ret) {
+        goto out;
+    }
+    /* Sort the list we get back from the kernel, since cpreg_tuples
+     * must be in strictly ascending order.
+     */
+    qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64);
+
+    for (i = 0, arraylen = 0; i < rlp->n; i++) {
+        if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) {
+            continue;
+        }
+        switch (rlp->reg[i] & KVM_REG_SIZE_MASK) {
+        case KVM_REG_SIZE_U32:
+        case KVM_REG_SIZE_U64:
+            break;
+        default:
+            fprintf(stderr, "Can't handle size of register in kernel list\n");
+            ret = -EINVAL;
+            goto out;
+        }
+
+        arraylen++;
+    }
+
+    cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen);
+    cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen);
+    cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes,
+                                         arraylen);
+    cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values,
+                                        arraylen);
+    cpu->cpreg_array_len = arraylen;
+    cpu->cpreg_vmstate_array_len = arraylen;
+
+    for (i = 0, arraylen = 0; i < rlp->n; i++) {
+        uint64_t regidx = rlp->reg[i];
+        if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) {
+            continue;
+        }
+        cpu->cpreg_indexes[arraylen] = regidx;
+        arraylen++;
+    }
+    assert(cpu->cpreg_array_len == arraylen);
+
+    if (!write_kvmstate_to_list(cpu)) {
+        /* Shouldn't happen unless kernel is inconsistent about
+         * what registers exist.
+         */
+        fprintf(stderr, "Initial read of kernel register state failed\n");
+        ret = -EINVAL;
+        goto out;
+    }
+
+out:
+    g_free(rlp);
+    return ret;
+}
+
 bool write_kvmstate_to_list(ARMCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
@@ -351,6 +440,24 @@
     return ok;
 }
 
+void kvm_arm_reset_vcpu(ARMCPU *cpu)
+{
+    int ret;
+
+    /* Re-init VCPU so that all registers are set to
+     * their respective reset values.
+     */
+    ret = kvm_arm_vcpu_init(CPU(cpu));
+    if (ret < 0) {
+        fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret));
+        abort();
+    }
+    if (!write_kvmstate_to_list(cpu)) {
+        fprintf(stderr, "write_kvmstate_to_list failed\n");
+        abort();
+    }
+}
+
 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
 {
 }
diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c
index 5ec4eb1..94030d1 100644
--- a/target-arm/kvm32.c
+++ b/target-arm/kvm32.c
@@ -51,17 +51,17 @@
     struct kvm_one_reg idregs[] = {
         {
             .id = KVM_REG_ARM | KVM_REG_SIZE_U32
-            | ENCODE_CP_REG(15, 0, 0, 0, 0, 0),
+            | ENCODE_CP_REG(15, 0, 0, 0, 0, 0, 0),
             .addr = (uintptr_t)&midr,
         },
         {
             .id = KVM_REG_ARM | KVM_REG_SIZE_U32
-            | ENCODE_CP_REG(15, 0, 0, 1, 0, 0),
+            | ENCODE_CP_REG(15, 0, 0, 0, 1, 0, 0),
             .addr = (uintptr_t)&id_pfr0,
         },
         {
             .id = KVM_REG_ARM | KVM_REG_SIZE_U32
-            | ENCODE_CP_REG(15, 0, 0, 2, 0, 0),
+            | ENCODE_CP_REG(15, 0, 0, 0, 2, 0, 0),
             .addr = (uintptr_t)&id_isar0,
         },
         {
@@ -138,7 +138,7 @@
     return true;
 }
 
-static bool reg_syncs_via_tuple_list(uint64_t regidx)
+bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
 {
     /* Return true if the regidx is a register we should synchronize
      * via the cpreg_tuples array (ie is not a core reg we sync by
@@ -153,24 +153,11 @@
     }
 }
 
-static int compare_u64(const void *a, const void *b)
-{
-    if (*(uint64_t *)a > *(uint64_t *)b) {
-        return 1;
-    }
-    if (*(uint64_t *)a < *(uint64_t *)b) {
-        return -1;
-    }
-    return 0;
-}
-
 int kvm_arch_init_vcpu(CPUState *cs)
 {
-    int i, ret, arraylen;
+    int ret;
     uint64_t v;
     struct kvm_one_reg r;
-    struct kvm_reg_list rl;
-    struct kvm_reg_list *rlp;
     ARMCPU *cpu = ARM_CPU(cs);
 
     if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE) {
@@ -206,73 +193,7 @@
         return -EINVAL;
     }
 
-    /* Populate the cpreg list based on the kernel's idea
-     * of what registers exist (and throw away the TCG-created list).
-     */
-    rl.n = 0;
-    ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl);
-    if (ret != -E2BIG) {
-        return ret;
-    }
-    rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t));
-    rlp->n = rl.n;
-    ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp);
-    if (ret) {
-        goto out;
-    }
-    /* Sort the list we get back from the kernel, since cpreg_tuples
-     * must be in strictly ascending order.
-     */
-    qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64);
-
-    for (i = 0, arraylen = 0; i < rlp->n; i++) {
-        if (!reg_syncs_via_tuple_list(rlp->reg[i])) {
-            continue;
-        }
-        switch (rlp->reg[i] & KVM_REG_SIZE_MASK) {
-        case KVM_REG_SIZE_U32:
-        case KVM_REG_SIZE_U64:
-            break;
-        default:
-            fprintf(stderr, "Can't handle size of register in kernel list\n");
-            ret = -EINVAL;
-            goto out;
-        }
-
-        arraylen++;
-    }
-
-    cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen);
-    cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen);
-    cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes,
-                                         arraylen);
-    cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values,
-                                        arraylen);
-    cpu->cpreg_array_len = arraylen;
-    cpu->cpreg_vmstate_array_len = arraylen;
-
-    for (i = 0, arraylen = 0; i < rlp->n; i++) {
-        uint64_t regidx = rlp->reg[i];
-        if (!reg_syncs_via_tuple_list(regidx)) {
-            continue;
-        }
-        cpu->cpreg_indexes[arraylen] = regidx;
-        arraylen++;
-    }
-    assert(cpu->cpreg_array_len == arraylen);
-
-    if (!write_kvmstate_to_list(cpu)) {
-        /* Shouldn't happen unless kernel is inconsistent about
-         * what registers exist.
-         */
-        fprintf(stderr, "Initial read of kernel register state failed\n");
-        ret = -EINVAL;
-        goto out;
-    }
-
-out:
-    g_free(rlp);
-    return ret;
+    return kvm_arm_init_cpreg_list(cpu);
 }
 
 typedef struct Reg {
@@ -508,12 +429,3 @@
 
     return 0;
 }
-
-void kvm_arm_reset_vcpu(ARMCPU *cpu)
-{
-    /* Re-init VCPU so that all registers are set to
-     * their respective reset values.
-     */
-    kvm_arm_vcpu_init(CPU(cpu));
-    write_kvmstate_to_list(cpu);
-}
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index c615286..ba16821 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -103,9 +103,21 @@
         return ret;
     }
 
-    /* TODO : support for save/restore/reset of system regs via tuple list */
+    return kvm_arm_init_cpreg_list(cpu);
+}
 
-    return 0;
+bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
+{
+    /* Return true if the regidx is a register we should synchronize
+     * via the cpreg_tuples array (ie is not a core reg we sync by
+     * hand in kvm_arch_get/put_registers())
+     */
+    switch (regidx & KVM_REG_ARM_COPROC_MASK) {
+    case KVM_REG_ARM_CORE:
+        return false;
+    default:
+        return true;
+    }
 }
 
 #define AARCH64_CORE_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
@@ -260,11 +272,3 @@
     /* TODO: other registers */
     return ret;
 }
-
-void kvm_arm_reset_vcpu(ARMCPU *cpu)
-{
-    /* Re-init VCPU so that all registers are set to
-     * their respective reset values.
-     */
-    kvm_arm_vcpu_init(CPU(cpu));
-}
diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h
index af93105..455dea3 100644
--- a/target-arm/kvm_arm.h
+++ b/target-arm/kvm_arm.h
@@ -47,6 +47,28 @@
                              uint64_t attr, int dev_fd);
 
 /**
+ * kvm_arm_init_cpreg_list:
+ * @cs: CPUState
+ *
+ * Initialize the CPUState's cpreg list according to the kernel's
+ * definition of what CPU registers it knows about (and throw away
+ * the previous TCG-created cpreg list).
+ *
+ * Returns: 0 if success, else < 0 error code
+ */
+int kvm_arm_init_cpreg_list(ARMCPU *cpu);
+
+/**
+ * kvm_arm_reg_syncs_via_cpreg_list
+ * regidx: KVM register index
+ *
+ * Return true if this KVM register should be synchronized via the
+ * cpreg list of arbitrary system registers, false if it is synchronized
+ * by hand using code in kvm_arch_get/put_registers().
+ */
+bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx);
+
+/**
  * write_list_to_kvmstate:
  * @cpu: ARMCPU
  *
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 6437690..c29e7a2 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -127,6 +127,13 @@
     CPUARMState *env = &cpu->env;
     uint32_t val = qemu_get_be32(f);
 
+    env->aarch64 = ((val & PSTATE_nRW) == 0);
+
+    if (is_a64(env)) {
+        pstate_write(env, val);
+        return 0;
+    }
+
     /* Avoid mode switch when restoring CPSR */
     env->uncached_cpsr = val & CPSR_M;
     cpsr_write(env, val, 0xffffffff);
@@ -137,8 +144,15 @@
 {
     ARMCPU *cpu = opaque;
     CPUARMState *env = &cpu->env;
+    uint32_t val;
 
-    qemu_put_be32(f, cpsr_read(env));
+    if (is_a64(env)) {
+        val = pstate_read(env);
+    } else {
+        val = cpsr_read(env);
+    }
+
+    qemu_put_be32(f, val);
 }
 
 static const VMStateInfo vmstate_cpsr = {
@@ -222,12 +236,14 @@
 
 const VMStateDescription vmstate_arm_cpu = {
     .name = "cpu",
-    .version_id = 21,
-    .minimum_version_id = 21,
+    .version_id = 22,
+    .minimum_version_id = 22,
     .pre_save = cpu_pre_save,
     .post_load = cpu_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(env.regs, ARMCPU, 16),
+        VMSTATE_UINT64_ARRAY(env.xregs, ARMCPU, 32),
+        VMSTATE_UINT64(env.pc, ARMCPU),
         {
             .name = "cpsr",
             .version_id = 0,
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 62012c3..2bed914 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -361,7 +361,7 @@
      * Note that SPSel is never OK from EL0; we rely on handle_msr_i()
      * to catch that case at translate time.
      */
-    if (arm_current_el(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) {
+    if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UMA)) {
         raise_exception(env, EXCP_UDEF);
     }
 
@@ -575,7 +575,7 @@
      * short descriptor format (in which case it holds both PROCID and ASID),
      * since we don't implement the optional v7 context ID masking.
      */
-    contextidr = extract64(env->cp15.contextidr_el1, 0, 32);
+    contextidr = extract64(env->cp15.contextidr_el[1], 0, 32);
 
     switch (bt) {
     case 3: /* linked context ID match */
diff --git a/target-arm/translate.c b/target-arm/translate.c
index af51568..b52c758 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -7091,7 +7091,7 @@
     rt = (insn >> 12) & 0xf;
 
     ri = get_arm_cp_reginfo(s->cp_regs,
-                            ENCODE_CP_REG(cpnum, is64, crn, crm, opc1, opc2));
+            ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
     if (ri) {
         /* Check access permissions */
         if (!cp_access_ok(s->current_el, ri, isread)) {
@@ -7281,12 +7281,16 @@
      */
     if (is64) {
         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
-                      "64 bit system register cp:%d opc1: %d crm:%d\n",
-                      isread ? "read" : "write", cpnum, opc1, crm);
+                      "64 bit system register cp:%d opc1: %d crm:%d "
+                      "(%s)\n",
+                      isread ? "read" : "write", cpnum, opc1, crm,
+                      s->ns ? "non-secure" : "secure");
     } else {
         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
-                      "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d\n",
-                      isread ? "read" : "write", cpnum, opc1, crn, crm, opc2);
+                      "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
+                      "(%s)\n",
+                      isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
+                      s->ns ? "non-secure" : "secure");
     }
 
     return 1;
@@ -11031,6 +11035,7 @@
 #if !defined(CONFIG_USER_ONLY)
     dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0);
 #endif
+    dc->ns = ARM_TBFLAG_NS(tb->flags);
     dc->cpacr_fpen = ARM_TBFLAG_CPACR_FPEN(tb->flags);
     dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
     dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 41a9071..f6ee789 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -20,6 +20,7 @@
 #if !defined(CONFIG_USER_ONLY)
     int user;
 #endif
+    bool ns;        /* Use non-secure CPREG bank on access */
     bool cpacr_fpen; /* FP enabled via CPACR.FPEN */
     bool vfp_enabled; /* FP enabled via FPSCR.EN */
     int vec_len;
diff --git a/vl.c b/vl.c
index f1a5d66..113e98e 100644
--- a/vl.c
+++ b/vl.c
@@ -554,6 +554,22 @@
     },
 };
 
+static QemuOptsList qemu_semihosting_config_opts = {
+    .name = "semihosting-config",
+    .implied_opt_name = "enable",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_semihosting_config_opts.head),
+    .desc = {
+        {
+            .name = "enable",
+            .type = QEMU_OPT_BOOL,
+        }, {
+            .name = "target",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
 /**
  * Get machine options
  *
@@ -2812,6 +2828,7 @@
     qemu_add_opts(&qemu_name_opts);
     qemu_add_opts(&qemu_numa_opts);
     qemu_add_opts(&qemu_icount_opts);
+    qemu_add_opts(&qemu_semihosting_config_opts);
 
     runstate_init();
 
@@ -3623,6 +3640,37 @@
 		break;
             case QEMU_OPTION_semihosting:
                 semihosting_enabled = 1;
+                semihosting_target = SEMIHOSTING_TARGET_AUTO;
+                break;
+            case QEMU_OPTION_semihosting_config:
+                semihosting_enabled = 1;
+                opts = qemu_opts_parse(qemu_find_opts("semihosting-config"),
+                                           optarg, 0);
+                if (opts != NULL) {
+                    semihosting_enabled = qemu_opt_get_bool(opts, "enable",
+                                                            true);
+                    const char *target = qemu_opt_get(opts, "target");
+                    if (target != NULL) {
+                        if (strcmp("native", target) == 0) {
+                            semihosting_target = SEMIHOSTING_TARGET_NATIVE;
+                        } else if (strcmp("gdb", target) == 0) {
+                            semihosting_target = SEMIHOSTING_TARGET_GDB;
+                        } else  if (strcmp("auto", target) == 0) {
+                            semihosting_target = SEMIHOSTING_TARGET_AUTO;
+                        } else {
+                            fprintf(stderr, "Unsupported semihosting-config"
+                                    " %s\n",
+                                optarg);
+                            exit(1);
+                        }
+                    } else {
+                        semihosting_target = SEMIHOSTING_TARGET_AUTO;
+                    }
+                } else {
+                    fprintf(stderr, "Unsupported semihosting-config %s\n",
+                            optarg);
+                    exit(1);
+                }
                 break;
             case QEMU_OPTION_tdf:
                 fprintf(stderr, "Warning: user space PIT time drift fix "