Merge remote-tracking branch 'remotes/rth/tags/tcg-ia64-pull-20140421' into staging

Pull for 20140421

# gpg: Signature made Mon 21 Apr 2014 17:57:24 BST using RSA key ID 4DD0279B
# gpg: Can't check signature: public key not found

* remotes/rth/tags/tcg-ia64-pull-20140421:
  tcg-ia64: Convert to new ldst opcodes
  tcg-ia64: Move part of softmmu slow path out of line
  tcg-ia64: Convert to new ldst helpers
  tcg-ia64: Reduce code duplication in tcg_out_qemu_ld
  tcg-ia64: Move tlb addend load into tlb read
  tcg-ia64: Move bswap for store into tlb load
  tcg-ia64: Re-bundle the tlb load
  tcg-ia64: Optimize small arguments to exit_tb

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/configure b/configure
index 69b9f56..b08afc3 100755
--- a/configure
+++ b/configure
@@ -1217,8 +1217,8 @@
   --enable-modules         enable modules support
   --enable-debug-tcg       enable TCG debugging
   --disable-debug-tcg      disable TCG debugging (default)
-  --enable-debug-info       enable debugging information (default)
-  --disable-debug-info      disable debugging information
+  --enable-debug-info      enable debugging information (default)
+  --disable-debug-info     disable debugging information
   --enable-debug           enable common debug build options
   --enable-sparse          enable sparse checker
   --disable-sparse         disable sparse checker (default)
@@ -4095,7 +4095,6 @@
 echo "strip binaries    $strip_opt"
 echo "profiler          $profiler"
 echo "static build      $static"
-echo "-Werror enabled   $werror"
 if test "$darwin" = "yes" ; then
     echo "Cocoa support     $cocoa"
 fi
diff --git a/hw/arm/cubieboard.c b/hw/arm/cubieboard.c
index d95a7f3..9d158c7 100644
--- a/hw/arm/cubieboard.c
+++ b/hw/arm/cubieboard.c
@@ -43,6 +43,19 @@
         exit(1);
     }
 
+    object_property_set_int(OBJECT(&s->a10->timer), 32768, "clk0-freq", &err);
+    if (err != NULL) {
+        error_report("Couldn't set clk0 frequency: %s", error_get_pretty(err));
+        exit(1);
+    }
+
+    object_property_set_int(OBJECT(&s->a10->timer), 24000000, "clk1-freq",
+                            &err);
+    if (err != NULL) {
+        error_report("Couldn't set clk1 frequency: %s", error_get_pretty(err));
+        exit(1);
+    }
+
     object_property_set_bool(OBJECT(s->a10), true, "realized", &err);
     if (err != NULL) {
         error_report("Couldn't realize Allwinner A10: %s",
diff --git a/hw/i2c/smbus_eeprom.c b/hw/i2c/smbus_eeprom.c
index 86f35c1..72c09cb 100644
--- a/hw/i2c/smbus_eeprom.c
+++ b/hw/i2c/smbus_eeprom.c
@@ -71,7 +71,7 @@
     printf("eeprom_write_byte: addr=0x%02x cmd=0x%02x val=0x%02x\n",
            dev->i2c.address, cmd, buf[0]);
 #endif
-    /* An page write operation is not a valid SMBus command.
+    /* A page write operation is not a valid SMBus command.
        It is a block write without a length byte.  Fortunately we
        get the full block anyway.  */
     /* TODO: Should this set the current location?  */
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index bfe633f..50327ff 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -438,9 +438,9 @@
 
     if ((pr->cmd & PORT_CMD_START) && pr->cmd_issue) {
         for (slot = 0; (slot < 32) && pr->cmd_issue; slot++) {
-            if ((pr->cmd_issue & (1 << slot)) &&
+            if ((pr->cmd_issue & (1U << slot)) &&
                 !handle_cmd(s, port, slot)) {
-                pr->cmd_issue &= ~(1 << slot);
+                pr->cmd_issue &= ~(1U << slot);
             }
         }
     }
diff --git a/hw/intc/allwinner-a10-pic.c b/hw/intc/allwinner-a10-pic.c
index 407d563..0924d98 100644
--- a/hw/intc/allwinner-a10-pic.c
+++ b/hw/intc/allwinner-a10-pic.c
@@ -23,11 +23,20 @@
 static void aw_a10_pic_update(AwA10PICState *s)
 {
     uint8_t i;
-    int irq = 0, fiq = 0;
+    int irq = 0, fiq = 0, pending;
+
+    s->vector = 0;
 
     for (i = 0; i < AW_A10_PIC_REG_NUM; i++) {
         irq |= s->irq_pending[i] & ~s->mask[i];
         fiq |= s->select[i] & s->irq_pending[i] & ~s->mask[i];
+
+        if (!s->vector) {
+            pending = ffs(s->irq_pending[i] & ~s->mask[i]);
+            if (pending) {
+                s->vector = (i * 32 + pending - 1) * 4;
+            }
+        }
     }
 
     qemu_set_irq(s->parent_irq, !!irq);
@@ -40,6 +49,8 @@
 
     if (level) {
         set_bit(irq % 32, (void *)&s->irq_pending[irq / 32]);
+    } else {
+        clear_bit(irq % 32, (void *)&s->irq_pending[irq / 32]);
     }
     aw_a10_pic_update(s);
 }
@@ -84,9 +95,6 @@
     uint8_t index = (offset & 0xc) / 4;
 
     switch (offset) {
-    case AW_A10_PIC_VECTOR:
-        s->vector = value & ~0x3;
-        break;
     case AW_A10_PIC_BASE_ADDR:
         s->base_addr = value & ~0x3;
     case AW_A10_PIC_PROTECT:
@@ -96,7 +104,11 @@
         s->nmi = value;
         break;
     case AW_A10_PIC_IRQ_PENDING ... AW_A10_PIC_IRQ_PENDING + 8:
-        s->irq_pending[index] &= ~value;
+        /*
+         * The register is read-only; nevertheless, Linux (including
+         * the version originally shipped by Allwinner) pretends to
+         * write to the register. Just ignore it.
+         */
         break;
     case AW_A10_PIC_FIQ_PENDING ... AW_A10_PIC_FIQ_PENDING + 8:
         s->fiq_pending[index] &= ~value;
diff --git a/hw/misc/zynq_slcr.c b/hw/misc/zynq_slcr.c
index d1cc233..2e53a2e 100644
--- a/hw/misc/zynq_slcr.c
+++ b/hw/misc/zynq_slcr.c
@@ -19,102 +19,155 @@
 #include "hw/sysbus.h"
 #include "sysemu/sysemu.h"
 
-#ifdef ZYNQ_ARM_SLCR_ERR_DEBUG
-#define DB_PRINT(...) do { \
-    fprintf(stderr,  ": %s: ", __func__); \
-    fprintf(stderr, ## __VA_ARGS__); \
-    } while (0);
-#else
-    #define DB_PRINT(...)
+#ifndef ZYNQ_SLCR_ERR_DEBUG
+#define ZYNQ_SLCR_ERR_DEBUG 0
 #endif
 
+#define DB_PRINT(...) do { \
+        if (ZYNQ_SLCR_ERR_DEBUG) { \
+            fprintf(stderr,  ": %s: ", __func__); \
+            fprintf(stderr, ## __VA_ARGS__); \
+        } \
+    } while (0);
+
 #define XILINX_LOCK_KEY 0x767b
 #define XILINX_UNLOCK_KEY 0xdf0d
 
 #define R_PSS_RST_CTRL_SOFT_RST 0x1
 
-typedef enum {
-  ARM_PLL_CTRL,
-  DDR_PLL_CTRL,
-  IO_PLL_CTRL,
-  PLL_STATUS,
-  ARM_PPL_CFG,
-  DDR_PLL_CFG,
-  IO_PLL_CFG,
-  PLL_BG_CTRL,
-  PLL_MAX
-} PLLValues;
+enum {
+    SCL             = 0x000 / 4,
+    LOCK,
+    UNLOCK,
+    LOCKSTA,
 
-typedef enum {
-  ARM_CLK_CTRL,
-  DDR_CLK_CTRL,
-  DCI_CLK_CTRL,
-  APER_CLK_CTRL,
-  USB0_CLK_CTRL,
-  USB1_CLK_CTRL,
-  GEM0_RCLK_CTRL,
-  GEM1_RCLK_CTRL,
-  GEM0_CLK_CTRL,
-  GEM1_CLK_CTRL,
-  SMC_CLK_CTRL,
-  LQSPI_CLK_CTRL,
-  SDIO_CLK_CTRL,
-  UART_CLK_CTRL,
-  SPI_CLK_CTRL,
-  CAN_CLK_CTRL,
-  CAN_MIOCLK_CTRL,
-  DBG_CLK_CTRL,
-  PCAP_CLK_CTRL,
-  TOPSW_CLK_CTRL,
-  CLK_MAX
-} ClkValues;
+    ARM_PLL_CTRL    = 0x100 / 4,
+    DDR_PLL_CTRL,
+    IO_PLL_CTRL,
+    PLL_STATUS,
+    ARM_PLL_CFG,
+    DDR_PLL_CFG,
+    IO_PLL_CFG,
 
-typedef enum {
-  CLK_CTRL,
-  THR_CTRL,
-  THR_CNT,
-  THR_STA,
-  FPGA_MAX
-} FPGAValues;
+    ARM_CLK_CTRL    = 0x120 / 4,
+    DDR_CLK_CTRL,
+    DCI_CLK_CTRL,
+    APER_CLK_CTRL,
+    USB0_CLK_CTRL,
+    USB1_CLK_CTRL,
+    GEM0_RCLK_CTRL,
+    GEM1_RCLK_CTRL,
+    GEM0_CLK_CTRL,
+    GEM1_CLK_CTRL,
+    SMC_CLK_CTRL,
+    LQSPI_CLK_CTRL,
+    SDIO_CLK_CTRL,
+    UART_CLK_CTRL,
+    SPI_CLK_CTRL,
+    CAN_CLK_CTRL,
+    CAN_MIOCLK_CTRL,
+    DBG_CLK_CTRL,
+    PCAP_CLK_CTRL,
+    TOPSW_CLK_CTRL,
 
-typedef enum {
-  SYNC_CTRL,
-  SYNC_STATUS,
-  BANDGAP_TRIP,
-  CC_TEST,
-  PLL_PREDIVISOR,
-  CLK_621_TRUE,
-  PICTURE_DBG,
-  PICTURE_DBG_UCNT,
-  PICTURE_DBG_LCNT,
-  MISC_MAX
-} MiscValues;
+#define FPGA_CTRL_REGS(n, start) \
+    FPGA ## n ## _CLK_CTRL = (start) / 4, \
+    FPGA ## n ## _THR_CTRL, \
+    FPGA ## n ## _THR_CNT, \
+    FPGA ## n ## _THR_STA,
+    FPGA_CTRL_REGS(0, 0x170)
+    FPGA_CTRL_REGS(1, 0x180)
+    FPGA_CTRL_REGS(2, 0x190)
+    FPGA_CTRL_REGS(3, 0x1a0)
 
-typedef enum {
-  PSS,
-  DDDR,
-  DMAC = 3,
-  USB,
-  GEM,
-  SDIO,
-  SPI,
-  CAN,
-  I2C,
-  UART,
-  GPIO,
-  LQSPI,
-  SMC,
-  OCM,
-  DEVCI,
-  FPGA,
-  A9_CPU,
-  RS_AWDT,
-  RST_REASON,
-  RST_REASON_CLR,
-  REBOOT_STATUS,
-  BOOT_MODE,
-  RESET_MAX
-} ResetValues;
+    BANDGAP_TRIP    = 0x1b8 / 4,
+    PLL_PREDIVISOR  = 0x1c0 / 4,
+    CLK_621_TRUE,
+
+    PSS_RST_CTRL    = 0x200 / 4,
+    DDR_RST_CTRL,
+    TOPSW_RESET_CTRL,
+    DMAC_RST_CTRL,
+    USB_RST_CTRL,
+    GEM_RST_CTRL,
+    SDIO_RST_CTRL,
+    SPI_RST_CTRL,
+    CAN_RST_CTRL,
+    I2C_RST_CTRL,
+    UART_RST_CTRL,
+    GPIO_RST_CTRL,
+    LQSPI_RST_CTRL,
+    SMC_RST_CTRL,
+    OCM_RST_CTRL,
+    FPGA_RST_CTRL   = 0x240 / 4,
+    A9_CPU_RST_CTRL,
+
+    RS_AWDT_CTRL    = 0x24c / 4,
+    RST_REASON,
+
+    REBOOT_STATUS   = 0x258 / 4,
+    BOOT_MODE,
+
+    APU_CTRL        = 0x300 / 4,
+    WDT_CLK_SEL,
+
+    TZ_DMA_NS       = 0x440 / 4,
+    TZ_DMA_IRQ_NS,
+    TZ_DMA_PERIPH_NS,
+
+    PSS_IDCODE      = 0x530 / 4,
+
+    DDR_URGENT      = 0x600 / 4,
+    DDR_CAL_START   = 0x60c / 4,
+    DDR_REF_START   = 0x614 / 4,
+    DDR_CMD_STA,
+    DDR_URGENT_SEL,
+    DDR_DFI_STATUS,
+
+    MIO             = 0x700 / 4,
+#define MIO_LENGTH 54
+
+    MIO_LOOPBACK    = 0x804 / 4,
+    MIO_MST_TRI0,
+    MIO_MST_TRI1,
+
+    SD0_WP_CD_SEL   = 0x830 / 4,
+    SD1_WP_CD_SEL,
+
+    LVL_SHFTR_EN    = 0x900 / 4,
+    OCM_CFG         = 0x910 / 4,
+
+    CPU_RAM         = 0xa00 / 4,
+
+    IOU             = 0xa30 / 4,
+
+    DMAC_RAM        = 0xa50 / 4,
+
+    AFI0            = 0xa60 / 4,
+    AFI1 = AFI0 + 3,
+    AFI2 = AFI1 + 3,
+    AFI3 = AFI2 + 3,
+#define AFI_LENGTH 3
+
+    OCM             = 0xa90 / 4,
+
+    DEVCI_RAM       = 0xaa0 / 4,
+
+    CSG_RAM         = 0xab0 / 4,
+
+    GPIOB_CTRL      = 0xb00 / 4,
+    GPIOB_CFG_CMOS18,
+    GPIOB_CFG_CMOS25,
+    GPIOB_CFG_CMOS33,
+    GPIOB_CFG_HSTL  = 0xb14 / 4,
+    GPIOB_DRVR_BIAS_CTRL,
+
+    DDRIOB          = 0xb40 / 4,
+#define DDRIOB_LENGTH 14
+};
+
+#define ZYNQ_SLCR_MMIO_SIZE     0x1000
+#define ZYNQ_SLCR_NUM_REGS      (ZYNQ_SLCR_MMIO_SIZE / 4)
 
 #define TYPE_ZYNQ_SLCR "xilinx,zynq_slcr"
 #define ZYNQ_SLCR(obj) OBJECT_CHECK(ZynqSLCRState, (obj), TYPE_ZYNQ_SLCR)
@@ -124,42 +177,7 @@
 
     MemoryRegion iomem;
 
-    union {
-        struct {
-            uint16_t scl;
-            uint16_t lockval;
-            uint32_t pll[PLL_MAX]; /* 0x100 - 0x11C */
-            uint32_t clk[CLK_MAX]; /* 0x120 - 0x16C */
-            uint32_t fpga[4][FPGA_MAX]; /* 0x170 - 0x1AC */
-            uint32_t misc[MISC_MAX]; /* 0x1B0 - 0x1D8 */
-            uint32_t reset[RESET_MAX]; /* 0x200 - 0x25C */
-            uint32_t apu_ctrl; /* 0x300 */
-            uint32_t wdt_clk_sel; /* 0x304 */
-            uint32_t tz_ocm[3]; /* 0x400 - 0x408 */
-            uint32_t tz_ddr; /* 0x430 */
-            uint32_t tz_dma[3]; /* 0x440 - 0x448 */
-            uint32_t tz_misc[3]; /* 0x450 - 0x458 */
-            uint32_t tz_fpga[2]; /* 0x484 - 0x488 */
-            uint32_t dbg_ctrl; /* 0x500 */
-            uint32_t pss_idcode; /* 0x530 */
-            uint32_t ddr[8]; /* 0x600 - 0x620 - 0x604-missing */
-            uint32_t mio[54]; /* 0x700 - 0x7D4 */
-            uint32_t mio_func[4]; /* 0x800 - 0x810 */
-            uint32_t sd[2]; /* 0x830 - 0x834 */
-            uint32_t lvl_shftr_en; /* 0x900 */
-            uint32_t ocm_cfg; /* 0x910 */
-            uint32_t cpu_ram[8]; /* 0xA00 - 0xA1C */
-            uint32_t iou[7]; /* 0xA30 - 0xA48 */
-            uint32_t dmac_ram; /* 0xA50 */
-            uint32_t afi[4][3]; /* 0xA60 - 0xA8C */
-            uint32_t ocm[3]; /* 0xA90 - 0xA98 */
-            uint32_t devci_ram; /* 0xAA0 */
-            uint32_t csg_ram; /* 0xAB0 */
-            uint32_t gpiob[12]; /* 0xB00 - 0xB2C */
-            uint32_t ddriob[14]; /* 0xB40 - 0xB74 */
-        };
-        uint8_t data[0x1000];
-    };
+    uint32_t regs[ZYNQ_SLCR_NUM_REGS];
 } ZynqSLCRState;
 
 static void zynq_slcr_reset(DeviceState *d)
@@ -169,177 +187,169 @@
 
     DB_PRINT("RESET\n");
 
-    s->lockval = 1;
+    s->regs[LOCKSTA] = 1;
     /* 0x100 - 0x11C */
-    s->pll[ARM_PLL_CTRL] = 0x0001A008;
-    s->pll[DDR_PLL_CTRL] = 0x0001A008;
-    s->pll[IO_PLL_CTRL] = 0x0001A008;
-    s->pll[PLL_STATUS] = 0x0000003F;
-    s->pll[ARM_PPL_CFG] = 0x00014000;
-    s->pll[DDR_PLL_CFG] = 0x00014000;
-    s->pll[IO_PLL_CFG] = 0x00014000;
+    s->regs[ARM_PLL_CTRL]   = 0x0001A008;
+    s->regs[DDR_PLL_CTRL]   = 0x0001A008;
+    s->regs[IO_PLL_CTRL]    = 0x0001A008;
+    s->regs[PLL_STATUS]     = 0x0000003F;
+    s->regs[ARM_PLL_CFG]    = 0x00014000;
+    s->regs[DDR_PLL_CFG]    = 0x00014000;
+    s->regs[IO_PLL_CFG]     = 0x00014000;
 
     /* 0x120 - 0x16C */
-    s->clk[ARM_CLK_CTRL] = 0x1F000400;
-    s->clk[DDR_CLK_CTRL] = 0x18400003;
-    s->clk[DCI_CLK_CTRL] = 0x01E03201;
-    s->clk[APER_CLK_CTRL] = 0x01FFCCCD;
-    s->clk[USB0_CLK_CTRL] = s->clk[USB1_CLK_CTRL] = 0x00101941;
-    s->clk[GEM0_RCLK_CTRL] = s->clk[GEM1_RCLK_CTRL] = 0x00000001;
-    s->clk[GEM0_CLK_CTRL] = s->clk[GEM1_CLK_CTRL] = 0x00003C01;
-    s->clk[SMC_CLK_CTRL] = 0x00003C01;
-    s->clk[LQSPI_CLK_CTRL] = 0x00002821;
-    s->clk[SDIO_CLK_CTRL] = 0x00001E03;
-    s->clk[UART_CLK_CTRL] = 0x00003F03;
-    s->clk[SPI_CLK_CTRL] = 0x00003F03;
-    s->clk[CAN_CLK_CTRL] = 0x00501903;
-    s->clk[DBG_CLK_CTRL] = 0x00000F03;
-    s->clk[PCAP_CLK_CTRL] = 0x00000F01;
+    s->regs[ARM_CLK_CTRL]   = 0x1F000400;
+    s->regs[DDR_CLK_CTRL]   = 0x18400003;
+    s->regs[DCI_CLK_CTRL]   = 0x01E03201;
+    s->regs[APER_CLK_CTRL]  = 0x01FFCCCD;
+    s->regs[USB0_CLK_CTRL]  = s->regs[USB1_CLK_CTRL]    = 0x00101941;
+    s->regs[GEM0_RCLK_CTRL] = s->regs[GEM1_RCLK_CTRL]   = 0x00000001;
+    s->regs[GEM0_CLK_CTRL]  = s->regs[GEM1_CLK_CTRL]    = 0x00003C01;
+    s->regs[SMC_CLK_CTRL]   = 0x00003C01;
+    s->regs[LQSPI_CLK_CTRL] = 0x00002821;
+    s->regs[SDIO_CLK_CTRL]  = 0x00001E03;
+    s->regs[UART_CLK_CTRL]  = 0x00003F03;
+    s->regs[SPI_CLK_CTRL]   = 0x00003F03;
+    s->regs[CAN_CLK_CTRL]   = 0x00501903;
+    s->regs[DBG_CLK_CTRL]   = 0x00000F03;
+    s->regs[PCAP_CLK_CTRL]  = 0x00000F01;
 
     /* 0x170 - 0x1AC */
-    s->fpga[0][CLK_CTRL] = s->fpga[1][CLK_CTRL] = s->fpga[2][CLK_CTRL] =
-            s->fpga[3][CLK_CTRL] = 0x00101800;
-    s->fpga[0][THR_STA] = s->fpga[1][THR_STA] = s->fpga[2][THR_STA] =
-            s->fpga[3][THR_STA] = 0x00010000;
+    s->regs[FPGA0_CLK_CTRL] = s->regs[FPGA1_CLK_CTRL] = s->regs[FPGA2_CLK_CTRL]
+                            = s->regs[FPGA3_CLK_CTRL] = 0x00101800;
+    s->regs[FPGA0_THR_STA] = s->regs[FPGA1_THR_STA] = s->regs[FPGA2_THR_STA]
+                           = s->regs[FPGA3_THR_STA] = 0x00010000;
 
     /* 0x1B0 - 0x1D8 */
-    s->misc[BANDGAP_TRIP] = 0x0000001F;
-    s->misc[PLL_PREDIVISOR] = 0x00000001;
-    s->misc[CLK_621_TRUE] = 0x00000001;
+    s->regs[BANDGAP_TRIP]   = 0x0000001F;
+    s->regs[PLL_PREDIVISOR] = 0x00000001;
+    s->regs[CLK_621_TRUE]   = 0x00000001;
 
     /* 0x200 - 0x25C */
-    s->reset[FPGA] = 0x01F33F0F;
-    s->reset[RST_REASON] = 0x00000040;
+    s->regs[FPGA_RST_CTRL]  = 0x01F33F0F;
+    s->regs[RST_REASON]     = 0x00000040;
+
+    s->regs[BOOT_MODE]      = 0x00000001;
 
     /* 0x700 - 0x7D4 */
     for (i = 0; i < 54; i++) {
-        s->mio[i] = 0x00001601;
+        s->regs[MIO + i] = 0x00001601;
     }
     for (i = 2; i <= 8; i++) {
-        s->mio[i] = 0x00000601;
+        s->regs[MIO + i] = 0x00000601;
     }
 
-    /* MIO_MST_TRI0, MIO_MST_TRI1 */
-    s->mio_func[2] = s->mio_func[3] = 0xFFFFFFFF;
+    s->regs[MIO_MST_TRI0] = s->regs[MIO_MST_TRI1] = 0xFFFFFFFF;
 
-    s->cpu_ram[0] = s->cpu_ram[1] = s->cpu_ram[3] =
-            s->cpu_ram[4] = s->cpu_ram[7] = 0x00010101;
-    s->cpu_ram[2] = s->cpu_ram[5] = 0x01010101;
-    s->cpu_ram[6] = 0x00000001;
+    s->regs[CPU_RAM + 0] = s->regs[CPU_RAM + 1] = s->regs[CPU_RAM + 3]
+                         = s->regs[CPU_RAM + 4] = s->regs[CPU_RAM + 7]
+                         = 0x00010101;
+    s->regs[CPU_RAM + 2] = s->regs[CPU_RAM + 5] = 0x01010101;
+    s->regs[CPU_RAM + 6] = 0x00000001;
 
-    s->iou[0] = s->iou[1] = s->iou[2] = s->iou[3] = 0x09090909;
-    s->iou[4] = s->iou[5] = 0x00090909;
-    s->iou[6] = 0x00000909;
+    s->regs[IOU + 0] = s->regs[IOU + 1] = s->regs[IOU + 2] = s->regs[IOU + 3]
+                     = 0x09090909;
+    s->regs[IOU + 4] = s->regs[IOU + 5] = 0x00090909;
+    s->regs[IOU + 6] = 0x00000909;
 
-    s->dmac_ram = 0x00000009;
+    s->regs[DMAC_RAM] = 0x00000009;
 
-    s->afi[0][0] = s->afi[0][1] = 0x09090909;
-    s->afi[1][0] = s->afi[1][1] = 0x09090909;
-    s->afi[2][0] = s->afi[2][1] = 0x09090909;
-    s->afi[3][0] = s->afi[3][1] = 0x09090909;
-    s->afi[0][2] = s->afi[1][2] = s->afi[2][2] = s->afi[3][2] = 0x00000909;
+    s->regs[AFI0 + 0] = s->regs[AFI0 + 1] = 0x09090909;
+    s->regs[AFI1 + 0] = s->regs[AFI1 + 1] = 0x09090909;
+    s->regs[AFI2 + 0] = s->regs[AFI2 + 1] = 0x09090909;
+    s->regs[AFI3 + 0] = s->regs[AFI3 + 1] = 0x09090909;
+    s->regs[AFI0 + 2] = s->regs[AFI1 + 2] = s->regs[AFI2 + 2]
+                      = s->regs[AFI3 + 2] = 0x00000909;
 
-    s->ocm[0] = 0x01010101;
-    s->ocm[1] = s->ocm[2] = 0x09090909;
+    s->regs[OCM + 0]    = 0x01010101;
+    s->regs[OCM + 1]    = s->regs[OCM + 2] = 0x09090909;
 
-    s->devci_ram = 0x00000909;
-    s->csg_ram = 0x00000001;
+    s->regs[DEVCI_RAM]  = 0x00000909;
+    s->regs[CSG_RAM]    = 0x00000001;
 
-    s->ddriob[0] = s->ddriob[1] = s->ddriob[2] = s->ddriob[3] = 0x00000e00;
-    s->ddriob[4] = s->ddriob[5] = s->ddriob[6] = 0x00000e00;
-    s->ddriob[12] = 0x00000021;
+    s->regs[DDRIOB + 0] = s->regs[DDRIOB + 1] = s->regs[DDRIOB + 2]
+                        = s->regs[DDRIOB + 3] = 0x00000e00;
+    s->regs[DDRIOB + 4] = s->regs[DDRIOB + 5] = s->regs[DDRIOB + 6]
+                        = 0x00000e00;
+    s->regs[DDRIOB + 12] = 0x00000021;
 }
 
-static inline uint32_t zynq_slcr_read_imp(void *opaque,
-    hwaddr offset)
-{
-    ZynqSLCRState *s = (ZynqSLCRState *)opaque;
 
+static bool zynq_slcr_check_offset(hwaddr offset, bool rnw)
+{
     switch (offset) {
-    case 0x0: /* SCL */
-        return s->scl;
-    case 0x4: /* LOCK */
-    case 0x8: /* UNLOCK */
-        DB_PRINT("Reading SCLR_LOCK/UNLOCK is not enabled\n");
-        return 0;
-    case 0x0C: /* LOCKSTA */
-        return s->lockval;
-    case 0x100 ... 0x11C:
-        return s->pll[(offset - 0x100) / 4];
-    case 0x120 ... 0x16C:
-        return s->clk[(offset - 0x120) / 4];
-    case 0x170 ... 0x1AC:
-        return s->fpga[0][(offset - 0x170) / 4];
-    case 0x1B0 ... 0x1D8:
-        return s->misc[(offset - 0x1B0) / 4];
-    case 0x200 ... 0x258:
-        return s->reset[(offset - 0x200) / 4];
-    case 0x25c:
-        return 1;
-    case 0x300:
-        return s->apu_ctrl;
-    case 0x304:
-        return s->wdt_clk_sel;
-    case 0x400 ... 0x408:
-        return s->tz_ocm[(offset - 0x400) / 4];
-    case 0x430:
-        return s->tz_ddr;
-    case 0x440 ... 0x448:
-        return s->tz_dma[(offset - 0x440) / 4];
-    case 0x450 ... 0x458:
-        return s->tz_misc[(offset - 0x450) / 4];
-    case 0x484 ... 0x488:
-        return s->tz_fpga[(offset - 0x484) / 4];
-    case 0x500:
-        return s->dbg_ctrl;
-    case 0x530:
-        return s->pss_idcode;
-    case 0x600 ... 0x620:
-        if (offset == 0x604) {
-            goto bad_reg;
-        }
-        return s->ddr[(offset - 0x600) / 4];
-    case 0x700 ... 0x7D4:
-        return s->mio[(offset - 0x700) / 4];
-    case 0x800 ... 0x810:
-        return s->mio_func[(offset - 0x800) / 4];
-    case 0x830 ... 0x834:
-        return s->sd[(offset - 0x830) / 4];
-    case 0x900:
-        return s->lvl_shftr_en;
-    case 0x910:
-        return s->ocm_cfg;
-    case 0xA00 ... 0xA1C:
-        return s->cpu_ram[(offset - 0xA00) / 4];
-    case 0xA30 ... 0xA48:
-        return s->iou[(offset - 0xA30) / 4];
-    case 0xA50:
-        return s->dmac_ram;
-    case 0xA60 ... 0xA8C:
-        return s->afi[0][(offset - 0xA60) / 4];
-    case 0xA90 ... 0xA98:
-        return s->ocm[(offset - 0xA90) / 4];
-    case 0xAA0:
-        return s->devci_ram;
-    case 0xAB0:
-        return s->csg_ram;
-    case 0xB00 ... 0xB2C:
-        return s->gpiob[(offset - 0xB00) / 4];
-    case 0xB40 ... 0xB74:
-        return s->ddriob[(offset - 0xB40) / 4];
+    case LOCK:
+    case UNLOCK:
+    case DDR_CAL_START:
+    case DDR_REF_START:
+        return !rnw; /* Write only */
+    case LOCKSTA:
+    case FPGA0_THR_STA:
+    case FPGA1_THR_STA:
+    case FPGA2_THR_STA:
+    case FPGA3_THR_STA:
+    case BOOT_MODE:
+    case PSS_IDCODE:
+    case DDR_CMD_STA:
+    case DDR_DFI_STATUS:
+    case PLL_STATUS:
+        return rnw;/* read only */
+    case SCL:
+    case ARM_PLL_CTRL ... IO_PLL_CTRL:
+    case ARM_PLL_CFG ... IO_PLL_CFG:
+    case ARM_CLK_CTRL ... TOPSW_CLK_CTRL:
+    case FPGA0_CLK_CTRL ... FPGA0_THR_CNT:
+    case FPGA1_CLK_CTRL ... FPGA1_THR_CNT:
+    case FPGA2_CLK_CTRL ... FPGA2_THR_CNT:
+    case FPGA3_CLK_CTRL ... FPGA3_THR_CNT:
+    case BANDGAP_TRIP:
+    case PLL_PREDIVISOR:
+    case CLK_621_TRUE:
+    case PSS_RST_CTRL ... A9_CPU_RST_CTRL:
+    case RS_AWDT_CTRL:
+    case RST_REASON:
+    case REBOOT_STATUS:
+    case APU_CTRL:
+    case WDT_CLK_SEL:
+    case TZ_DMA_NS ... TZ_DMA_PERIPH_NS:
+    case DDR_URGENT:
+    case DDR_URGENT_SEL:
+    case MIO ... MIO + MIO_LENGTH - 1:
+    case MIO_LOOPBACK ... MIO_MST_TRI1:
+    case SD0_WP_CD_SEL:
+    case SD1_WP_CD_SEL:
+    case LVL_SHFTR_EN:
+    case OCM_CFG:
+    case CPU_RAM:
+    case IOU:
+    case DMAC_RAM:
+    case AFI0 ... AFI3 + AFI_LENGTH - 1:
+    case OCM:
+    case DEVCI_RAM:
+    case CSG_RAM:
+    case GPIOB_CTRL ... GPIOB_CFG_CMOS33:
+    case GPIOB_CFG_HSTL:
+    case GPIOB_DRVR_BIAS_CTRL:
+    case DDRIOB ... DDRIOB + DDRIOB_LENGTH - 1:
+        return true;
     default:
-    bad_reg:
-        DB_PRINT("Bad register offset 0x%x\n", (int)offset);
-        return 0;
+        return false;
     }
 }
 
 static uint64_t zynq_slcr_read(void *opaque, hwaddr offset,
     unsigned size)
 {
-    uint32_t ret = zynq_slcr_read_imp(opaque, offset);
+    ZynqSLCRState *s = opaque;
+    offset /= 4;
+    uint32_t ret = s->regs[offset];
 
-    DB_PRINT("addr: %08x data: %08x\n", (unsigned)offset, (unsigned)ret);
+    if (!zynq_slcr_check_offset(offset, true)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "zynq_slcr: Invalid read access to "
+                      " addr %" HWADDR_PRIx "\n", offset * 4);
+    }
+
+    DB_PRINT("addr: %08" HWADDR_PRIx " data: %08" PRIx32 "\n", offset * 4, ret);
     return ret;
 }
 
@@ -347,148 +357,55 @@
                           uint64_t val, unsigned size)
 {
     ZynqSLCRState *s = (ZynqSLCRState *)opaque;
+    offset /= 4;
 
-    DB_PRINT("offset: %08x data: %08x\n", (unsigned)offset, (unsigned)val);
+    DB_PRINT("addr: %08" HWADDR_PRIx " data: %08" PRIx64 "\n", offset * 4, val);
+
+    if (!zynq_slcr_check_offset(offset, false)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "zynq_slcr: Invalid write access to "
+                      "addr %" HWADDR_PRIx "\n", offset * 4);
+        return;
+    }
 
     switch (offset) {
-    case 0x00: /* SCL */
-        s->scl = val & 0x1;
-    return;
-    case 0x4: /* SLCR_LOCK */
+    case SCL:
+        s->regs[SCL] = val & 0x1;
+        return;
+    case LOCK:
         if ((val & 0xFFFF) == XILINX_LOCK_KEY) {
             DB_PRINT("XILINX LOCK 0xF8000000 + 0x%x <= 0x%x\n", (int)offset,
                 (unsigned)val & 0xFFFF);
-            s->lockval = 1;
+            s->regs[LOCKSTA] = 1;
         } else {
             DB_PRINT("WRONG XILINX LOCK KEY 0xF8000000 + 0x%x <= 0x%x\n",
                 (int)offset, (unsigned)val & 0xFFFF);
         }
         return;
-    case 0x8: /* SLCR_UNLOCK */
+    case UNLOCK:
         if ((val & 0xFFFF) == XILINX_UNLOCK_KEY) {
             DB_PRINT("XILINX UNLOCK 0xF8000000 + 0x%x <= 0x%x\n", (int)offset,
                 (unsigned)val & 0xFFFF);
-            s->lockval = 0;
+            s->regs[LOCKSTA] = 0;
         } else {
             DB_PRINT("WRONG XILINX UNLOCK KEY 0xF8000000 + 0x%x <= 0x%x\n",
                 (int)offset, (unsigned)val & 0xFFFF);
         }
         return;
-    case 0xc: /* LOCKSTA */
-        DB_PRINT("Writing SCLR_LOCKSTA is not enabled\n");
+    }
+
+    if (!s->regs[LOCKSTA]) {
+        s->regs[offset / 4] = val;
+    } else {
+        DB_PRINT("SCLR registers are locked. Unlock them first\n");
         return;
     }
 
-    if (!s->lockval) {
-        switch (offset) {
-        case 0x100 ... 0x11C:
-            if (offset == 0x10C) {
-                goto bad_reg;
-            }
-            s->pll[(offset - 0x100) / 4] = val;
-            break;
-        case 0x120 ... 0x16C:
-            s->clk[(offset - 0x120) / 4] = val;
-            break;
-        case 0x170 ... 0x1AC:
-            s->fpga[0][(offset - 0x170) / 4] = val;
-            break;
-        case 0x1B0 ... 0x1D8:
-            s->misc[(offset - 0x1B0) / 4] = val;
-            break;
-        case 0x200 ... 0x25C:
-            if (offset == 0x250) {
-                goto bad_reg;
-            }
-            s->reset[(offset - 0x200) / 4] = val;
-            if (offset == 0x200 && (val & R_PSS_RST_CTRL_SOFT_RST)) {
-                qemu_system_reset_request();
-            }
-            break;
-        case 0x300:
-            s->apu_ctrl = val;
-            break;
-        case 0x304:
-            s->wdt_clk_sel = val;
-            break;
-        case 0x400 ... 0x408:
-            s->tz_ocm[(offset - 0x400) / 4] = val;
-            break;
-        case 0x430:
-            s->tz_ddr = val;
-            break;
-        case 0x440 ... 0x448:
-            s->tz_dma[(offset - 0x440) / 4] = val;
-            break;
-        case 0x450 ... 0x458:
-            s->tz_misc[(offset - 0x450) / 4] = val;
-            break;
-        case 0x484 ... 0x488:
-            s->tz_fpga[(offset - 0x484) / 4] = val;
-            break;
-        case 0x500:
-            s->dbg_ctrl = val;
-            break;
-        case 0x530:
-            s->pss_idcode = val;
-            break;
-        case 0x600 ... 0x620:
-            if (offset == 0x604) {
-                goto bad_reg;
-            }
-            s->ddr[(offset - 0x600) / 4] = val;
-            break;
-        case 0x700 ... 0x7D4:
-            s->mio[(offset - 0x700) / 4] = val;
-            break;
-        case 0x800 ... 0x810:
-            s->mio_func[(offset - 0x800) / 4] = val;
-            break;
-        case 0x830 ... 0x834:
-            s->sd[(offset - 0x830) / 4] = val;
-            break;
-        case 0x900:
-            s->lvl_shftr_en = val;
-            break;
-        case 0x910:
-            break;
-        case 0xA00 ... 0xA1C:
-            s->cpu_ram[(offset - 0xA00) / 4] = val;
-            break;
-        case 0xA30 ... 0xA48:
-            s->iou[(offset - 0xA30) / 4] = val;
-            break;
-        case 0xA50:
-            s->dmac_ram = val;
-            break;
-        case 0xA60 ... 0xA8C:
-            s->afi[0][(offset - 0xA60) / 4] = val;
-            break;
-        case 0xA90:
-            s->ocm[0] = val;
-            break;
-        case 0xAA0:
-            s->devci_ram = val;
-            break;
-        case 0xAB0:
-            s->csg_ram = val;
-            break;
-        case 0xB00 ... 0xB2C:
-            if (offset == 0xB20 || offset == 0xB2C) {
-                goto bad_reg;
-            }
-            s->gpiob[(offset - 0xB00) / 4] = val;
-            break;
-        case 0xB40 ... 0xB74:
-            s->ddriob[(offset - 0xB40) / 4] = val;
-            break;
-        default:
-        bad_reg:
-            DB_PRINT("Bad register write %x <= %08x\n", (int)offset,
-                     (unsigned)val);
+    switch (offset) {
+    case PSS_RST_CTRL:
+        if (val & R_PSS_RST_CTRL_SOFT_RST) {
+            qemu_system_reset_request();
         }
-    } else {
-        DB_PRINT("SCLR registers are locked. Unlock them first\n");
+        break;
     }
 }
 
@@ -498,23 +415,22 @@
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int zynq_slcr_init(SysBusDevice *dev)
+static void zynq_slcr_init(Object *obj)
 {
-    ZynqSLCRState *s = ZYNQ_SLCR(dev);
+    ZynqSLCRState *s = ZYNQ_SLCR(obj);
 
-    memory_region_init_io(&s->iomem, OBJECT(s), &slcr_ops, s, "slcr", 0x1000);
-    sysbus_init_mmio(dev, &s->iomem);
-
-    return 0;
+    memory_region_init_io(&s->iomem, obj, &slcr_ops, s, "slcr",
+                          ZYNQ_SLCR_MMIO_SIZE);
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->iomem);
 }
 
 static const VMStateDescription vmstate_zynq_slcr = {
     .name = "zynq_slcr",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .minimum_version_id_old = 2,
     .fields      = (VMStateField[]) {
-        VMSTATE_UINT8_ARRAY(data, ZynqSLCRState, 0x1000),
+        VMSTATE_UINT32_ARRAY(regs, ZynqSLCRState, ZYNQ_SLCR_NUM_REGS),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -522,9 +438,7 @@
 static void zynq_slcr_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
 
-    sdc->init = zynq_slcr_init;
     dc->vmsd = &vmstate_zynq_slcr;
     dc->reset = zynq_slcr_reset;
 }
@@ -534,6 +448,7 @@
     .name  = TYPE_ZYNQ_SLCR,
     .parent = TYPE_SYS_BUS_DEVICE,
     .instance_size  = sizeof(ZynqSLCRState),
+    .instance_init = zynq_slcr_init,
 };
 
 static void zynq_slcr_register_types(void)
diff --git a/hw/net/allwinner_emac.c b/hw/net/allwinner_emac.c
index 469f2f0..d780ba0 100644
--- a/hw/net/allwinner_emac.c
+++ b/hw/net/allwinner_emac.c
@@ -27,11 +27,11 @@
 static void mii_set_link(RTL8201CPState *mii, bool link_ok)
 {
     if (link_ok) {
-        mii->bmsr |= MII_BMSR_LINK_ST;
+        mii->bmsr |= MII_BMSR_LINK_ST | MII_BMSR_AN_COMP;
         mii->anlpar |= MII_ANAR_TXFD | MII_ANAR_10FD | MII_ANAR_10 |
                        MII_ANAR_CSMACD;
     } else {
-        mii->bmsr &= ~MII_BMSR_LINK_ST;
+        mii->bmsr &= ~(MII_BMSR_LINK_ST | MII_BMSR_AN_COMP);
         mii->anlpar = MII_ANAR_TX;
     }
 }
@@ -391,9 +391,11 @@
         break;
     case EMAC_INT_CTL_REG:
         s->int_ctl = value;
+        aw_emac_update_irq(s);
         break;
     case EMAC_INT_STA_REG:
         s->int_sta &= ~value;
+        aw_emac_update_irq(s);
         break;
     case EMAC_MAC_MADR_REG:
         s->phy_target = value;
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index 92dc2f2..e34b25e 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -1093,7 +1093,7 @@
             uint32_t phy_addr, reg_num;
 
             phy_addr = (retval & GEM_PHYMNTNC_ADDR) >> GEM_PHYMNTNC_ADDR_SHFT;
-            if (phy_addr == BOARD_PHY_ADDRESS) {
+            if (phy_addr == BOARD_PHY_ADDRESS || phy_addr == 0) {
                 reg_num = (retval & GEM_PHYMNTNC_REG) >> GEM_PHYMNTNC_REG_SHIFT;
                 retval &= 0xFFFF0000;
                 retval |= gem_phy_read(s, reg_num);
@@ -1193,7 +1193,7 @@
             uint32_t phy_addr, reg_num;
 
             phy_addr = (val & GEM_PHYMNTNC_ADDR) >> GEM_PHYMNTNC_ADDR_SHFT;
-            if (phy_addr == BOARD_PHY_ADDRESS) {
+            if (phy_addr == BOARD_PHY_ADDRESS || phy_addr == 0) {
                 reg_num = (val & GEM_PHYMNTNC_REG) >> GEM_PHYMNTNC_REG_SHIFT;
                 gem_phy_write(s, reg_num, val);
             }
diff --git a/hw/timer/allwinner-a10-pit.c b/hw/timer/allwinner-a10-pit.c
index b27fce8..d3c02ea 100644
--- a/hw/timer/allwinner-a10-pit.c
+++ b/hw/timer/allwinner-a10-pit.c
@@ -19,6 +19,15 @@
 #include "sysemu/sysemu.h"
 #include "hw/timer/allwinner-a10-pit.h"
 
+static void a10_pit_update_irq(AwA10PITState *s)
+{
+    int i;
+
+    for (i = 0; i < AW_A10_PIT_TIMER_NR; i++) {
+        qemu_set_irq(s->irq[i], !!(s->irq_status & s->irq_enable & (1 << i)));
+    }
+}
+
 static uint64_t a10_pit_read(void *opaque, hwaddr offset, unsigned size)
 {
     AwA10PITState *s = AW_A10_PIT(opaque);
@@ -65,6 +74,22 @@
     return 0;
 }
 
+static void a10_pit_set_freq(AwA10PITState *s, int index)
+{
+    uint32_t prescaler, source, source_freq;
+
+    prescaler = 1 << extract32(s->control[index], 4, 3);
+    source = extract32(s->control[index], 2, 2);
+    source_freq = s->clk_freq[source];
+
+    if (source_freq) {
+        ptimer_set_freq(s->timer[index], source_freq / prescaler);
+    } else {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid clock source %u\n",
+                      __func__, source);
+    }
+}
+
 static void a10_pit_write(void *opaque, hwaddr offset, uint64_t value,
                             unsigned size)
 {
@@ -74,9 +99,11 @@
     switch (offset) {
     case AW_A10_PIT_TIMER_IRQ_EN:
         s->irq_enable = value;
+        a10_pit_update_irq(s);
         break;
     case AW_A10_PIT_TIMER_IRQ_ST:
         s->irq_status &= ~value;
+        a10_pit_update_irq(s);
         break;
     case AW_A10_PIT_TIMER_BASE ... AW_A10_PIT_TIMER_BASE_END:
         index = offset & 0xf0;
@@ -85,6 +112,7 @@
         switch (offset & 0x0f) {
         case AW_A10_PIT_TIMER_CONTROL:
             s->control[index] = value;
+            a10_pit_set_freq(s, index);
             if (s->control[index] & AW_A10_PIT_TIMER_RELOAD) {
                 ptimer_set_count(s->timer[index], s->interval[index]);
             }
@@ -150,6 +178,14 @@
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+static Property a10_pit_properties[] = {
+    DEFINE_PROP_UINT32("clk0-freq", AwA10PITState, clk_freq[0], 0),
+    DEFINE_PROP_UINT32("clk1-freq", AwA10PITState, clk_freq[1], 0),
+    DEFINE_PROP_UINT32("clk2-freq", AwA10PITState, clk_freq[2], 0),
+    DEFINE_PROP_UINT32("clk3-freq", AwA10PITState, clk_freq[3], 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static const VMStateDescription vmstate_a10_pit = {
     .name = "a10.pit",
     .version_id = 1,
@@ -178,11 +214,14 @@
 
     s->irq_enable = 0;
     s->irq_status = 0;
+    a10_pit_update_irq(s);
+
     for (i = 0; i < 6; i++) {
         s->control[i] = AW_A10_PIT_DEFAULT_CLOCK;
         s->interval[i] = 0;
         s->count[i] = 0;
         ptimer_stop(s->timer[i]);
+        a10_pit_set_freq(s, i);
     }
     s->watch_dog_mode = 0;
     s->watch_dog_control = 0;
@@ -193,18 +232,17 @@
 
 static void a10_pit_timer_cb(void *opaque)
 {
-    AwA10PITState *s = AW_A10_PIT(opaque);
-    uint8_t i;
+    AwA10TimerContext *tc = opaque;
+    AwA10PITState *s = tc->container;
+    uint8_t i = tc->index;
 
-    for (i = 0; i < AW_A10_PIT_TIMER_NR; i++) {
-        if (s->control[i] & AW_A10_PIT_TIMER_EN) {
-            s->irq_status |= 1 << i;
-            if (s->control[i] & AW_A10_PIT_TIMER_MODE) {
-                ptimer_stop(s->timer[i]);
-                s->control[i] &= ~AW_A10_PIT_TIMER_EN;
-            }
-            qemu_irq_pulse(s->irq[i]);
+    if (s->control[i] & AW_A10_PIT_TIMER_EN) {
+        s->irq_status |= 1 << i;
+        if (s->control[i] & AW_A10_PIT_TIMER_MODE) {
+            ptimer_stop(s->timer[i]);
+            s->control[i] &= ~AW_A10_PIT_TIMER_EN;
         }
+        a10_pit_update_irq(s);
     }
 }
 
@@ -223,9 +261,12 @@
     sysbus_init_mmio(sbd, &s->iomem);
 
     for (i = 0; i < AW_A10_PIT_TIMER_NR; i++) {
-        bh[i] = qemu_bh_new(a10_pit_timer_cb, s);
+        AwA10TimerContext *tc = &s->timer_context[i];
+
+        tc->container = s;
+        tc->index = i;
+        bh[i] = qemu_bh_new(a10_pit_timer_cb, tc);
         s->timer[i] = ptimer_init(bh[i]);
-        ptimer_set_freq(s->timer[i], 240000);
     }
 }
 
@@ -234,6 +275,7 @@
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->reset = a10_pit_reset;
+    dc->props = a10_pit_properties;
     dc->desc = "allwinner a10 timer";
     dc->vmsd = &vmstate_a10_pit;
 }
diff --git a/hw/timer/cadence_ttc.c b/hw/timer/cadence_ttc.c
index a279bce..28cb328 100644
--- a/hw/timer/cadence_ttc.c
+++ b/hw/timer/cadence_ttc.c
@@ -346,11 +346,13 @@
     case 0x34:
     case 0x38:
         s->reg_match[0] = value & 0xffff;
+        break;
 
     case 0x3c: /* match register */
     case 0x40:
     case 0x44:
         s->reg_match[1] = value & 0xffff;
+        break;
 
     case 0x48: /* match register */
     case 0x4c:
diff --git a/include/exec/softmmu_exec.h b/include/exec/softmmu_exec.h
index 6fde154..470db20 100644
--- a/include/exec/softmmu_exec.h
+++ b/include/exec/softmmu_exec.h
@@ -162,3 +162,55 @@
 #define stw(p, v) stw_data(p, v)
 #define stl(p, v) stl_data(p, v)
 #define stq(p, v) stq_data(p, v)
+
+/**
+ * tlb_vaddr_to_host:
+ * @env: CPUArchState
+ * @addr: guest virtual address to look up
+ * @access_type: 0 for read, 1 for write, 2 for execute
+ * @mmu_idx: MMU index to use for lookup
+ *
+ * Look up the specified guest virtual index in the TCG softmmu TLB.
+ * If the TLB contains a host virtual address suitable for direct RAM
+ * access, then return it. Otherwise (TLB miss, TLB entry is for an
+ * I/O access, etc) return NULL.
+ *
+ * This is the equivalent of the initial fast-path code used by
+ * TCG backends for guest load and store accesses.
+ */
+static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
+                                      int access_type, int mmu_idx)
+{
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index];
+    target_ulong tlb_addr;
+    uintptr_t haddr;
+
+    switch (access_type) {
+    case 0:
+        tlb_addr = tlbentry->addr_read;
+        break;
+    case 1:
+        tlb_addr = tlbentry->addr_write;
+        break;
+    case 2:
+        tlb_addr = tlbentry->addr_code;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        /* TLB entry is for a different page */
+        return NULL;
+    }
+
+    if (tlb_addr & ~TARGET_PAGE_MASK) {
+        /* IO access */
+        return NULL;
+    }
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    return (void *)haddr;
+}
diff --git a/include/hw/net/allwinner_emac.h b/include/hw/net/allwinner_emac.h
index a5e944a..5ae7717 100644
--- a/include/hw/net/allwinner_emac.h
+++ b/include/hw/net/allwinner_emac.h
@@ -144,6 +144,7 @@
 #define MII_BMSR_10T_FD     (1 << 12)
 #define MII_BMSR_10T_HD     (1 << 11)
 #define MII_BMSR_MFPS       (1 << 6)
+#define MII_BMSR_AN_COMP    (1 << 5)
 #define MII_BMSR_AUTONEG    (1 << 3)
 #define MII_BMSR_LINK_ST    (1 << 2)
 
diff --git a/include/hw/timer/allwinner-a10-pit.h b/include/hw/timer/allwinner-a10-pit.h
index 15efab8..770bdc0 100644
--- a/include/hw/timer/allwinner-a10-pit.h
+++ b/include/hw/timer/allwinner-a10-pit.h
@@ -35,13 +35,22 @@
 
 #define AW_A10_PIT_DEFAULT_CLOCK   0x4
 
-typedef struct AwA10PITState {
+typedef struct AwA10PITState AwA10PITState;
+
+typedef struct AwA10TimerContext {
+    AwA10PITState *container;
+    int index;
+} AwA10TimerContext;
+
+struct AwA10PITState {
     /*< private >*/
     SysBusDevice parent_obj;
     /*< public >*/
     qemu_irq irq[AW_A10_PIT_TIMER_NR];
     ptimer_state * timer[AW_A10_PIT_TIMER_NR];
+    AwA10TimerContext timer_context[AW_A10_PIT_TIMER_NR];
     MemoryRegion iomem;
+    uint32_t clk_freq[4];
 
     uint32_t irq_enable;
     uint32_t irq_status;
@@ -53,6 +62,6 @@
     uint32_t count_lo;
     uint32_t count_hi;
     uint32_t count_ctl;
-} AwA10PITState;
+};
 
 #endif
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 9ed47aa..f597031 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -53,7 +53,7 @@
     if (n >= 64) {
         return (Int128) { h, h >> 63 };
     } else {
-        return (Int128) { (a.lo >> n) | (a.hi << (64 - n)), h };
+        return (Int128) { (a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h };
     }
 }
 
@@ -78,7 +78,7 @@
 
 static inline Int128 int128_sub(Int128 a, Int128 b)
 {
-    return (Int128){ a.lo - b.lo, a.hi - b.hi - (a.lo < b.lo) };
+    return (Int128){ a.lo - b.lo, (uint64_t)a.hi - b.hi - (a.lo < b.lo) };
 }
 
 static inline bool int128_nonneg(Int128 a)
diff --git a/linux-user/main.c b/linux-user/main.c
index af924dc..947358a 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -483,17 +483,17 @@
     addr = env->regs[2];
 
     if (get_user_u64(oldval, env->regs[0])) {
-        env->cp15.c6_data = env->regs[0];
+        env->exception.vaddress = env->regs[0];
         goto segv;
     };
 
     if (get_user_u64(newval, env->regs[1])) {
-        env->cp15.c6_data = env->regs[1];
+        env->exception.vaddress = env->regs[1];
         goto segv;
     };
 
     if (get_user_u64(val, addr)) {
-        env->cp15.c6_data = addr;
+        env->exception.vaddress = addr;
         goto segv;
     }
 
@@ -501,7 +501,7 @@
         val = newval;
 
         if (put_user_u64(val, addr)) {
-            env->cp15.c6_data = addr;
+            env->exception.vaddress = addr;
             goto segv;
         };
 
@@ -523,7 +523,7 @@
     info.si_errno = 0;
     /* XXX: check env->error_code */
     info.si_code = TARGET_SEGV_MAPERR;
-    info._sifields._sigfault._addr = env->cp15.c6_data;
+    info._sifields._sigfault._addr = env->exception.vaddress;
     queue_signal(env, info.si_signo, &info);
 
     end_exclusive();
@@ -620,14 +620,14 @@
         abort();
     }
     if (segv) {
-        env->cp15.c6_data = addr;
+        env->exception.vaddress = addr;
         goto done;
     }
     if (size == 3) {
         uint32_t valhi;
         segv = get_user_u32(valhi, addr + 4);
         if (segv) {
-            env->cp15.c6_data = addr + 4;
+            env->exception.vaddress = addr + 4;
             goto done;
         }
         val = deposit64(val, 32, 32, valhi);
@@ -650,14 +650,14 @@
         break;
     }
     if (segv) {
-        env->cp15.c6_data = addr;
+        env->exception.vaddress = addr;
         goto done;
     }
     if (size == 3) {
         val = env->regs[(env->exclusive_info >> 12) & 0xf];
         segv = put_user_u32(val, addr + 4);
         if (segv) {
-            env->cp15.c6_data = addr + 4;
+            env->exception.vaddress = addr + 4;
             goto done;
         }
     }
@@ -832,12 +832,14 @@
         case EXCP_INTERRUPT:
             /* just indicate that signals should be handled asap */
             break;
+        case EXCP_STREX:
+            if (!do_strex(env)) {
+                break;
+            }
+            /* fall through for segv */
         case EXCP_PREFETCH_ABORT:
-            addr = env->cp15.c6_insn;
-            goto do_segv;
         case EXCP_DATA_ABORT:
-            addr = env->cp15.c6_data;
-        do_segv:
+            addr = env->exception.vaddress;
             {
                 info.si_signo = SIGSEGV;
                 info.si_errno = 0;
@@ -865,12 +867,6 @@
             if (do_kernel_trap(env))
               goto error;
             break;
-        case EXCP_STREX:
-            if (do_strex(env)) {
-                addr = env->cp15.c6_data;
-                goto do_segv;
-            }
-            break;
         default:
         error:
             fprintf(stderr, "qemu: unhandled CPU exception 0x%x - aborting\n",
@@ -933,7 +929,7 @@
         abort();
     }
     if (segv) {
-        env->cp15.c6_data = addr;
+        env->exception.vaddress = addr;
         goto error;
     }
     if (val != env->exclusive_val) {
@@ -946,7 +942,7 @@
             segv = get_user_u64(val, addr + 8);
         }
         if (segv) {
-            env->cp15.c6_data = addr + (size == 2 ? 4 : 8);
+            env->exception.vaddress = addr + (size == 2 ? 4 : 8);
             goto error;
         }
         if (val != env->exclusive_high) {
@@ -981,7 +977,7 @@
             segv = put_user_u64(val, addr + 8);
         }
         if (segv) {
-            env->cp15.c6_data = addr + (size == 2 ? 4 : 8);
+            env->exception.vaddress = addr + (size == 2 ? 4 : 8);
             goto error;
         }
     }
@@ -1037,12 +1033,14 @@
             info._sifields._sigfault._addr = env->pc;
             queue_signal(env, info.si_signo, &info);
             break;
+        case EXCP_STREX:
+            if (!do_strex_a64(env)) {
+                break;
+            }
+            /* fall through for segv */
         case EXCP_PREFETCH_ABORT:
-            addr = env->cp15.c6_insn;
-            goto do_segv;
         case EXCP_DATA_ABORT:
-            addr = env->cp15.c6_data;
-        do_segv:
+            addr = env->exception.vaddress;
             info.si_signo = SIGSEGV;
             info.si_errno = 0;
             /* XXX: check env->error_code */
@@ -1060,12 +1058,6 @@
                 queue_signal(env, info.si_signo, &info);
             }
             break;
-        case EXCP_STREX:
-            if (do_strex_a64(env)) {
-                addr = env->cp15.c6_data;
-                goto do_segv;
-            }
-            break;
         default:
             fprintf(stderr, "qemu: unhandled CPU exception 0x%x - aborting\n",
                     trapnr);
diff --git a/net/net.c b/net/net.c
index e3ef1e4..a4aadff 100644
--- a/net/net.c
+++ b/net/net.c
@@ -952,10 +952,12 @@
 
     nc = net_hub_find_client_by_name(vlan_id, device);
     if (!nc) {
+        error_report("Host network device '%s' on hub '%d' not found",
+                     device, vlan_id);
         return;
     }
     if (!net_host_check_device(nc->model)) {
-        monitor_printf(mon, "invalid host network device %s\n", device);
+        error_report("invalid host network device '%s'", device);
         return;
     }
     qemu_del_net_client(nc);
diff --git a/qemu-doc.texi b/qemu-doc.texi
index e6e20eb..88ec9bb 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -823,7 +823,7 @@
 qemu-nbd --socket=/tmp/my_socket my_disk.qcow2
 @end example
 
-The use of qemu-nbd allows to share a disk between several guests:
+The use of qemu-nbd allows sharing of a disk between several guests:
 @example
 qemu-nbd --socket=/tmp/my_socket --share=2 my_disk.qcow2
 @end example
diff --git a/qemu-options.hx b/qemu-options.hx
index 2d33815..6457034 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -444,7 +444,8 @@
 @item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}]
 These options have the same definition as they have in @option{-hdachs}.
 @item snapshot=@var{snapshot}
-@var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}).
+@var{snapshot} is "on" or "off" and controls snapshot mode for the given drive
+(see @option{-snapshot}).
 @item cache=@var{cache}
 @var{cache} is "none", "writeback", "unsafe", "directsync" or "writethrough" and controls how the host cache is used to access block data.
 @item aio=@var{aio}
@@ -1242,7 +1243,7 @@
 An adaptive encoding will try to detect frequently updated screen regions,
 and send updates in these regions using a lossy encoding (like JPEG).
 This can be really helpful to save bandwidth when playing videos. Disabling
-adaptive encodings allows to restore the original static behavior of encodings
+adaptive encodings restores the original static behavior of encodings
 like Tight.
 
 @item share=[allow-exclusive|force-shared|ignore]
@@ -2805,7 +2806,7 @@
 MS-DOS or Windows. To start at a specific point in time, provide @var{date} in the
 format @code{2006-06-17T16:01:21} or @code{2006-06-17}. The default base is UTC.
 
-By default the RTC is driven by the host system time. This allows to use the
+By default the RTC is driven by the host system time. This allows using of the
 RTC as accurate reference clock inside the guest, specifically if the host
 time is smoothly following an accurate external reference clock, e.g. via NTP.
 If you want to isolate the guest time from the host, you can set @option{clock}
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 6b5f11f..935a4ec 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -171,7 +171,7 @@
     /* Now, if user has passed a time to set and the system time is set, we
      * just need to synchronize the hardware clock. However, if no time was
      * passed, user is requesting the opposite: set the system time from the
-     * hardware clock. */
+     * hardware clock (RTC). */
     pid = fork();
     if (pid == 0) {
         setsid();
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index 80edca1..a8cdcb3 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -96,8 +96,8 @@
 ##
 # @guest-get-time:
 #
-# Get the information about guest time relative to the Epoch
-# of 1970-01-01 in UTC.
+# Get the information about guest's System Time relative to
+# the Epoch of 1970-01-01 in UTC.
 #
 # Returns: Time in nanoseconds.
 #
@@ -117,11 +117,11 @@
 # gap was, NTP might not be able to resynchronize the
 # guest.
 #
-# This command tries to set guest time to the given value,
-# then sets the Hardware Clock to the current System Time.
-# This will make it easier for a guest to resynchronize
-# without waiting for NTP. If no @time is specified, then
-# the time to set is read from RTC.
+# This command tries to set guest's System Time to the
+# given value, then sets the Hardware Clock (RTC) to the
+# current System Time. This will make it easier for a guest
+# to resynchronize without waiting for NTP. If no @time is
+# specified, then the time to set is read from RTC.
 #
 # @time: #optional time of nanoseconds, relative to the Epoch
 #        of 1970-01-01 in UTC.
diff --git a/scripts/coverity-model.c b/scripts/coverity-model.c
new file mode 100644
index 0000000..4c99a85
--- /dev/null
+++ b/scripts/coverity-model.c
@@ -0,0 +1,183 @@
+/* Coverity Scan model
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>
+ *  Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or, at your
+ * option, any later version.  See the COPYING file in the top-level directory.
+ */
+
+
+/*
+ * This is the source code for our Coverity user model file.  The
+ * purpose of user models is to increase scanning accuracy by explaining
+ * code Coverity can't see (out of tree libraries) or doesn't
+ * sufficiently understand.  Better accuracy means both fewer false
+ * positives and more true defects.  Memory leaks in particular.
+ *
+ * - A model file can't import any header files.  Some built-in primitives are
+ *   available but not wchar_t, NULL etc.
+ * - Modeling doesn't need full structs and typedefs. Rudimentary structs
+ *   and similar types are sufficient.
+ * - An uninitialized local variable signifies that the variable could be
+ *   any value.
+ *
+ * The model file must be uploaded by an admin in the analysis settings of
+ * http://scan.coverity.com/projects/378
+ */
+
+#define NULL ((void *)0)
+
+typedef unsigned char uint8_t;
+typedef char int8_t;
+typedef unsigned int uint32_t;
+typedef int int32_t;
+typedef long ssize_t;
+typedef unsigned long long uint64_t;
+typedef long long int64_t;
+typedef _Bool bool;
+
+/* exec.c */
+
+typedef struct AddressSpace AddressSpace;
+typedef uint64_t hwaddr;
+
+static void __write(uint8_t *buf, ssize_t len)
+{
+    int first, last;
+    __coverity_negative_sink__(len);
+    if (len == 0) return;
+    buf[0] = first;
+    buf[len-1] = last;
+    __coverity_writeall__(buf);
+}
+
+static void __read(uint8_t *buf, ssize_t len)
+{
+    __coverity_negative_sink__(len);
+    if (len == 0) return;
+    int first = buf[0];
+    int last = buf[len-1];
+}
+
+bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
+                      int len, bool is_write)
+{
+    bool result;
+
+    // TODO: investigate impact of treating reads as producing
+    // tainted data, with __coverity_tainted_data_argument__(buf).
+    if (is_write) __write(buf, len); else __read(buf, len);
+
+    return result;
+}
+
+/* Tainting */
+
+typedef struct {} name2keysym_t;
+static int get_keysym(const name2keysym_t *table,
+                      const char *name)
+{
+    int result;
+    if (result > 0) {
+        __coverity_tainted_string_sanitize_content__(name);
+        return result;
+    } else {
+        return 0;
+    }
+}
+
+/* glib memory allocation functions.
+ *
+ * Note that we ignore the fact that g_malloc of 0 bytes returns NULL,
+ * and g_realloc of 0 bytes frees the pointer.
+ *
+ * Modeling this would result in Coverity flagging a lot of memory
+ * allocations as potentially returning NULL, and asking us to check
+ * whether the result of the allocation is NULL or not.  However, the
+ * resulting pointer should never be dereferenced anyway, and in fact
+ * it is not in the vast majority of cases.
+ *
+ * If a dereference did happen, this would suppress a defect report
+ * for an actual null pointer dereference.  But it's too unlikely to
+ * be worth wading through the false positives, and with some luck
+ * we'll get a buffer overflow reported anyway.
+ */
+
+void *malloc(size_t);
+void *calloc(size_t, size_t);
+void *realloc(void *, size_t);
+void free(void *);
+
+void *
+g_malloc(size_t n_bytes)
+{
+    void *mem;
+    __coverity_negative_sink__(n_bytes);
+    mem = malloc(n_bytes == 0 ? 1 : n_bytes);
+    if (!mem) __coverity_panic__();
+    return mem;
+}
+
+void *
+g_malloc0(size_t n_bytes)
+{
+    void *mem;
+    __coverity_negative_sink__(n_bytes);
+    mem = calloc(1, n_bytes == 0 ? 1 : n_bytes);
+    if (!mem) __coverity_panic__();
+    return mem;
+}
+
+void g_free(void *mem)
+{
+    free(mem);
+}
+
+void *g_realloc(void * mem, size_t n_bytes)
+{
+    __coverity_negative_sink__(n_bytes);
+    mem = realloc(mem, n_bytes == 0 ? 1 : n_bytes);
+    if (!mem) __coverity_panic__();
+    return mem;
+}
+
+void *g_try_malloc(size_t n_bytes)
+{
+    __coverity_negative_sink__(n_bytes);
+    return malloc(n_bytes == 0 ? 1 : n_bytes);
+}
+
+void *g_try_malloc0(size_t n_bytes)
+{
+    __coverity_negative_sink__(n_bytes);
+    return calloc(1, n_bytes == 0 ? 1 : n_bytes);
+}
+
+void *g_try_realloc(void *mem, size_t n_bytes)
+{
+    __coverity_negative_sink__(n_bytes);
+    return realloc(mem, n_bytes == 0 ? 1 : n_bytes);
+}
+
+/* Other glib functions */
+
+typedef struct _GIOChannel GIOChannel;
+GIOChannel *g_io_channel_unix_new(int fd)
+{
+    GIOChannel *c = g_malloc0(sizeof(GIOChannel));
+    __coverity_escape__(fd);
+    return c;
+}
+
+void g_assertion_message_expr(const char     *domain,
+                              const char     *file,
+                              int             line,
+                              const char     *func,
+                              const char     *expr)
+{
+    __coverity_panic__();
+}
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 00234e1..edc7f26 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -116,6 +116,7 @@
     uint32_t reset_fpsid;
     uint32_t mvfr0;
     uint32_t mvfr1;
+    uint32_t mvfr2;
     uint32_t ctr;
     uint32_t reset_sctlr;
     uint32_t id_pfr0;
@@ -147,9 +148,12 @@
      * in the order L1DCache, L1ICache, L2DCache, L2ICache, etc.
      */
     uint32_t ccsidr[16];
-    uint32_t reset_cbar;
+    uint64_t reset_cbar;
     uint32_t reset_auxcr;
     bool reset_hivecs;
+    /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */
+    uint32_t dcz_blocksize;
+    uint64_t rvbar;
 } ARMCPU;
 
 #define TYPE_AARCH64_CPU "aarch64-cpu"
@@ -196,10 +200,10 @@
 void arm_gt_vtimer_cb(void *opaque);
 
 #ifdef TARGET_AARCH64
-void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
-                            fprintf_function cpu_fprintf, int flags);
 int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+
+void aarch64_cpu_do_interrupt(CPUState *cs);
 #endif
 
 #endif
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index c32d8c4..c0ddc3e 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -19,6 +19,7 @@
  */
 
 #include "cpu.h"
+#include "internals.h"
 #include "qemu-common.h"
 #include "hw/qdev-properties.h"
 #include "qapi/qmp/qerror.h"
@@ -87,6 +88,7 @@
     env->vfp.xregs[ARM_VFP_FPSID] = cpu->reset_fpsid;
     env->vfp.xregs[ARM_VFP_MVFR0] = cpu->mvfr0;
     env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1;
+    env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2;
 
     if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
         env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q';
@@ -99,8 +101,16 @@
         env->pstate = PSTATE_MODE_EL0t;
         /* Userspace expects access to CTL_EL0 and the cache ops */
         env->cp15.c1_sys |= SCTLR_UCT | SCTLR_UCI;
+        /* and to the FP/Neon instructions */
+        env->cp15.c1_coproc = deposit64(env->cp15.c1_coproc, 20, 2, 3);
 #else
         env->pstate = PSTATE_MODE_EL1h;
+        env->pc = cpu->rvbar;
+#endif
+    } else {
+#if defined(CONFIG_USER_ONLY)
+        /* Userspace expects access to cp10 and cp11 for FP/Neon */
+        env->cp15.c1_coproc = deposit64(env->cp15.c1_coproc, 20, 4, 0xf);
 #endif
     }
 
@@ -252,16 +262,20 @@
 }
 
 static Property arm_cpu_reset_cbar_property =
-            DEFINE_PROP_UINT32("reset-cbar", ARMCPU, reset_cbar, 0);
+            DEFINE_PROP_UINT64("reset-cbar", ARMCPU, reset_cbar, 0);
 
 static Property arm_cpu_reset_hivecs_property =
             DEFINE_PROP_BOOL("reset-hivecs", ARMCPU, reset_hivecs, false);
 
+static Property arm_cpu_rvbar_property =
+            DEFINE_PROP_UINT64("rvbar", ARMCPU, rvbar, 0);
+
 static void arm_cpu_post_init(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
 
-    if (arm_feature(&cpu->env, ARM_FEATURE_CBAR)) {
+    if (arm_feature(&cpu->env, ARM_FEATURE_CBAR) ||
+        arm_feature(&cpu->env, ARM_FEATURE_CBAR_RO)) {
         qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_cbar_property,
                                  &error_abort);
     }
@@ -270,6 +284,11 @@
         qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_hivecs_property,
                                  &error_abort);
     }
+
+    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+        qdev_property_add_static(DEVICE(obj), &arm_cpu_rvbar_property,
+                                 &error_abort);
+    }
 }
 
 static void arm_cpu_finalizefn(Object *obj)
@@ -331,6 +350,9 @@
         set_feature(env, ARM_FEATURE_V7MP);
         set_feature(env, ARM_FEATURE_PXN);
     }
+    if (arm_feature(env, ARM_FEATURE_CBAR_RO)) {
+        set_feature(env, ARM_FEATURE_CBAR);
+    }
 
     if (cpu->reset_hivecs) {
             cpu->reset_sctlr |= (1 << 13);
@@ -417,7 +439,7 @@
         ARMCPRegInfo ifar = {
             .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1,
             .access = PL1_RW,
-            .fieldoffset = offsetof(CPUARMState, cp15.c6_insn),
+            .fieldoffset = offsetofhigh32(CPUARMState, cp15.far_el1),
             .resetvalue = 0
         };
         define_one_arm_cp_reg(cpu, &ifar);
@@ -722,7 +744,7 @@
     set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
     set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
     set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
-    set_feature(&cpu->env, ARM_FEATURE_CBAR);
+    set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
     set_feature(&cpu->env, ARM_FEATURE_LPAE);
     cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15;
     cpu->midr = 0x412fc0f1;
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index bf37cd6..c83f249 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -111,11 +111,6 @@
 #define GTIMER_VIRT 1
 #define NUM_GTIMERS 2
 
-/* Scale factor for generic timers, ie number of ns per tick.
- * This gives a 62.5MHz timer.
- */
-#define GTIMER_SCALE 16
-
 typedef struct CPUARMState {
     /* Regs for current mode.  */
     uint32_t regs[16];
@@ -148,7 +143,7 @@
     uint32_t spsr;
 
     /* Banked registers.  */
-    uint32_t banked_spsr[6];
+    uint64_t banked_spsr[6];
     uint32_t banked_r13[6];
     uint32_t banked_r14[6];
 
@@ -165,7 +160,10 @@
     uint32_t GE; /* cpsr[19:16] */
     uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */
     uint32_t condexec_bits; /* IT bits.  cpsr[15:10,26:25].  */
-    uint32_t daif; /* exception masks, in the bits they are in in PSTATE */
+    uint64_t daif; /* exception masks, in the bits they are in in PSTATE */
+
+    uint64_t elr_el1; /* AArch64 ELR_EL1 */
+    uint64_t sp_el[2]; /* AArch64 banked stack pointers */
 
     /* System control coprocessor (cp15) */
     struct {
@@ -184,13 +182,13 @@
         uint32_t c2_insn; /* MPU instruction cachable bits.  */
         uint32_t c3; /* MMU domain access control register
                         MPU write buffer control.  */
-        uint32_t c5_insn; /* Fault status registers.  */
-        uint32_t c5_data;
+        uint32_t pmsav5_data_ap; /* PMSAv5 MPU data access permissions */
+        uint32_t pmsav5_insn_ap; /* PMSAv5 MPU insn access permissions */
+        uint32_t ifsr_el2; /* Fault status registers.  */
+        uint64_t esr_el1;
         uint32_t c6_region[8]; /* MPU base/size registers.  */
-        uint32_t c6_insn; /* Fault address registers.  */
-        uint32_t c6_data;
-        uint32_t c7_par;  /* Translation result. */
-        uint32_t c7_par_hi;  /* Translation result, high 32 bits */
+        uint64_t far_el1; /* Fault address registers.  */
+        uint64_t par_el1;  /* Translation result. */
         uint32_t c9_insn; /* Cache lockdown registers.  */
         uint32_t c9_data;
         uint32_t c9_pmcr; /* performance monitor control register */
@@ -202,7 +200,7 @@
         uint64_t mair_el1;
         uint64_t c12_vbar; /* vector base address register */
         uint32_t c13_fcse; /* FCSE PID.  */
-        uint32_t c13_context; /* Context ID.  */
+        uint64_t contextidr_el1; /* Context ID.  */
         uint64_t tpidr_el0; /* User RW Thread register.  */
         uint64_t tpidrro_el0; /* User RO Thread register.  */
         uint64_t tpidr_el1; /* Privileged Thread register.  */
@@ -238,6 +236,21 @@
         int pending_exception;
     } v7m;
 
+    /* Information associated with an exception about to be taken:
+     * code which raises an exception must set cs->exception_index and
+     * the relevant parts of this structure; the cpu_do_interrupt function
+     * will then set the guest-visible registers as part of the exception
+     * entry process.
+     */
+    struct {
+        uint32_t syndrome; /* AArch64 format syndrome register */
+        uint32_t fsr; /* AArch32 format fault status register info */
+        uint64_t vaddress; /* virtual addr associated with exception, if any */
+        /* If we implement EL2 we will also need to store information
+         * about the intermediate physical address for stage 2 faults.
+         */
+    } exception;
+
     /* Thumb-2 EE state.  */
     uint32_t teecr;
     uint32_t teehbr;
@@ -322,11 +335,7 @@
 #include "cpu-qom.h"
 
 ARMCPU *cpu_arm_init(const char *cpu_model);
-void arm_translate_init(void);
-void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
 int cpu_arm_exec(CPUARMState *s);
-int bank_number(int mode);
-void switch_mode(CPUARMState *, int);
 uint32_t do_arm_semihosting(CPUARMState *env);
 
 static inline bool is_a64(CPUARMState *env)
@@ -425,6 +434,7 @@
  * Only these are valid when in AArch64 mode; in
  * AArch32 mode SPSRs are basically CPSR-format.
  */
+#define PSTATE_SP (1U)
 #define PSTATE_M (0xFU)
 #define PSTATE_nRW (1U << 4)
 #define PSTATE_F (1U << 6)
@@ -548,17 +558,6 @@
     vfp_set_fpscr(env, new_fpscr);
 }
 
-enum arm_fprounding {
-    FPROUNDING_TIEEVEN,
-    FPROUNDING_POSINF,
-    FPROUNDING_NEGINF,
-    FPROUNDING_ZERO,
-    FPROUNDING_TIEAWAY,
-    FPROUNDING_ODD
-};
-
-int arm_rmode_to_sf(int rmode);
-
 enum arm_cpu_mode {
   ARM_CPU_MODE_USR = 0x10,
   ARM_CPU_MODE_FIQ = 0x11,
@@ -572,6 +571,7 @@
 /* VFP system registers.  */
 #define ARM_VFP_FPSID   0
 #define ARM_VFP_FPSCR   1
+#define ARM_VFP_MVFR2   5
 #define ARM_VFP_MVFR1   6
 #define ARM_VFP_MVFR0   7
 #define ARM_VFP_FPEXC   8
@@ -630,6 +630,7 @@
     ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */
     ARM_FEATURE_CBAR, /* has cp15 CBAR */
     ARM_FEATURE_CRC, /* ARMv8 CRC instructions */
+    ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
@@ -763,7 +764,8 @@
 #define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8))
 #define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8))
 #define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | (4 << 8))
-#define ARM_LAST_SPECIAL ARM_CP_CURRENTEL
+#define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | (5 << 8))
+#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA
 /* Used only as a terminator for ARMCPRegInfo lists */
 #define ARM_CP_SENTINEL 0xffff
 /* Mask of only the flag bits in a type field */
@@ -1109,10 +1111,14 @@
 #define ARM_TBFLAG_CONDEXEC_MASK    (0xff << ARM_TBFLAG_CONDEXEC_SHIFT)
 #define ARM_TBFLAG_BSWAP_CODE_SHIFT 16
 #define ARM_TBFLAG_BSWAP_CODE_MASK  (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT)
+#define ARM_TBFLAG_CPACR_FPEN_SHIFT 17
+#define ARM_TBFLAG_CPACR_FPEN_MASK  (1 << ARM_TBFLAG_CPACR_FPEN_SHIFT)
 
 /* Bit usage when in AArch64 state */
 #define ARM_TBFLAG_AA64_EL_SHIFT    0
 #define ARM_TBFLAG_AA64_EL_MASK     (0x3 << ARM_TBFLAG_AA64_EL_SHIFT)
+#define ARM_TBFLAG_AA64_FPEN_SHIFT  2
+#define ARM_TBFLAG_AA64_FPEN_MASK   (1 << ARM_TBFLAG_AA64_FPEN_SHIFT)
 
 /* some convenience accessor macros */
 #define ARM_TBFLAG_AARCH64_STATE(F) \
@@ -1131,16 +1137,25 @@
     (((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT)
 #define ARM_TBFLAG_BSWAP_CODE(F) \
     (((F) & ARM_TBFLAG_BSWAP_CODE_MASK) >> ARM_TBFLAG_BSWAP_CODE_SHIFT)
+#define ARM_TBFLAG_CPACR_FPEN(F) \
+    (((F) & ARM_TBFLAG_CPACR_FPEN_MASK) >> ARM_TBFLAG_CPACR_FPEN_SHIFT)
 #define ARM_TBFLAG_AA64_EL(F) \
     (((F) & ARM_TBFLAG_AA64_EL_MASK) >> ARM_TBFLAG_AA64_EL_SHIFT)
+#define ARM_TBFLAG_AA64_FPEN(F) \
+    (((F) & ARM_TBFLAG_AA64_FPEN_MASK) >> ARM_TBFLAG_AA64_FPEN_SHIFT)
 
 static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
+    int fpen = extract32(env->cp15.c1_coproc, 20, 2);
+
     if (is_a64(env)) {
         *pc = env->pc;
         *flags = ARM_TBFLAG_AARCH64_STATE_MASK
             | (arm_current_pl(env) << ARM_TBFLAG_AA64_EL_SHIFT);
+        if (fpen == 3 || (fpen == 1 && arm_current_pl(env) != 0)) {
+            *flags |= ARM_TBFLAG_AA64_FPEN_MASK;
+        }
     } else {
         int privmode;
         *pc = env->regs[15];
@@ -1157,9 +1172,13 @@
         if (privmode) {
             *flags |= ARM_TBFLAG_PRIV_MASK;
         }
-        if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
+        if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
+            || arm_el_is_aa64(env, 1)) {
             *flags |= ARM_TBFLAG_VFPEN_MASK;
         }
+        if (fpen == 3 || (fpen == 1 && arm_current_pl(env) != 0)) {
+            *flags |= ARM_TBFLAG_CPACR_FPEN_MASK;
+        }
     }
 
     *cs_base = 0;
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 8426bf1..8daa622 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -32,6 +32,104 @@
     env->features |= 1ULL << feature;
 }
 
+#ifndef CONFIG_USER_ONLY
+static uint64_t a57_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /* Number of processors is in [25:24]; otherwise we RAZ */
+    return (smp_cpus - 1) << 24;
+}
+#endif
+
+static const ARMCPRegInfo cortexa57_cp_reginfo[] = {
+#ifndef CONFIG_USER_ONLY
+    { .name = "L2CTLR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 2,
+      .access = PL1_RW, .readfn = a57_l2ctlr_read,
+      .writefn = arm_cp_write_ignore },
+    { .name = "L2CTLR",
+      .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 2,
+      .access = PL1_RW, .readfn = a57_l2ctlr_read,
+      .writefn = arm_cp_write_ignore },
+#endif
+    { .name = "L2ECTLR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 3,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "L2ECTLR",
+      .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 3,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "L2ACTLR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 0, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUACTLR",
+      .cp = 15, .opc1 = 0, .crm = 15,
+      .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+    { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 1,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUECTLR",
+      .cp = 15, .opc1 = 1, .crm = 15,
+      .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+    { .name = "CPUMERRSR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 2,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUMERRSR",
+      .cp = 15, .opc1 = 2, .crm = 15,
+      .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+    { .name = "L2MERRSR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 3,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "L2MERRSR",
+      .cp = 15, .opc1 = 3, .crm = 15,
+      .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+    REGINFO_SENTINEL
+};
+
+static void aarch64_a57_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    set_feature(&cpu->env, ARM_FEATURE_V8);
+    set_feature(&cpu->env, ARM_FEATURE_VFP4);
+    set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
+    set_feature(&cpu->env, ARM_FEATURE_NEON);
+    set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+    set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+    set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+    cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A57;
+    cpu->midr = 0x411fd070;
+    cpu->reset_fpsid = 0x41034070;
+    cpu->mvfr0 = 0x10110222;
+    cpu->mvfr1 = 0x12111111;
+    cpu->mvfr2 = 0x00000043;
+    cpu->ctr = 0x8444c004;
+    cpu->reset_sctlr = 0x00c50838;
+    cpu->id_pfr0 = 0x00000131;
+    cpu->id_pfr1 = 0x00011011;
+    cpu->id_dfr0 = 0x03010066;
+    cpu->id_afr0 = 0x00000000;
+    cpu->id_mmfr0 = 0x10101105;
+    cpu->id_mmfr1 = 0x40000000;
+    cpu->id_mmfr2 = 0x01260000;
+    cpu->id_mmfr3 = 0x02102211;
+    cpu->id_isar0 = 0x02101110;
+    cpu->id_isar1 = 0x13112111;
+    cpu->id_isar2 = 0x21232042;
+    cpu->id_isar3 = 0x01112131;
+    cpu->id_isar4 = 0x00011142;
+    cpu->id_aa64pfr0 = 0x00002222;
+    cpu->id_aa64dfr0 = 0x10305106;
+    cpu->id_aa64isar0 = 0x00010000;
+    cpu->id_aa64mmfr0 = 0x00001124;
+    cpu->clidr = 0x0a200023;
+    cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
+    cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
+    cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */
+    cpu->dcz_blocksize = 4; /* 64 bytes */
+}
+
 #ifdef CONFIG_USER_ONLY
 static void aarch64_any_initfn(Object *obj)
 {
@@ -41,11 +139,11 @@
     set_feature(&cpu->env, ARM_FEATURE_VFP4);
     set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
-    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
     set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
     set_feature(&cpu->env, ARM_FEATURE_V7MP);
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
     cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
+    cpu->dcz_blocksize = 7; /*  512 bytes */
 }
 #endif
 
@@ -56,6 +154,7 @@
 } ARMCPUInfo;
 
 static const ARMCPUInfo aarch64_cpus[] = {
+    { .name = "cortex-a57",         .initfn = aarch64_a57_initfn },
 #ifdef CONFIG_USER_ONLY
     { .name = "any",         .initfn = aarch64_any_initfn },
 #endif
@@ -73,18 +172,22 @@
 static void aarch64_cpu_set_pc(CPUState *cs, vaddr value)
 {
     ARMCPU *cpu = ARM_CPU(cs);
-    /*
-     * TODO: this will need updating for system emulation,
-     * when the core may be in AArch32 mode.
+    /* It's OK to look at env for the current mode here, because it's
+     * never possible for an AArch64 TB to chain to an AArch32 TB.
+     * (Otherwise we would need to use synchronize_from_tb instead.)
      */
-    cpu->env.pc = value;
+    if (is_a64(&cpu->env)) {
+        cpu->env.pc = value;
+    } else {
+        cpu->env.regs[15] = value;
+    }
 }
 
 static void aarch64_cpu_class_init(ObjectClass *oc, void *data)
 {
     CPUClass *cc = CPU_CLASS(oc);
 
-    cc->dump_state = aarch64_cpu_dump_state;
+    cc->do_interrupt = aarch64_cpu_do_interrupt;
     cc->set_pc = aarch64_cpu_set_pc;
     cc->gdb_read_register = aarch64_cpu_gdb_read_register;
     cc->gdb_write_register = aarch64_cpu_gdb_write_register;
diff --git a/target-arm/gdbstub64.c b/target-arm/gdbstub64.c
index e8a8295..8f3b8d1 100644
--- a/target-arm/gdbstub64.c
+++ b/target-arm/gdbstub64.c
@@ -32,10 +32,8 @@
     switch (n) {
     case 31:
         return gdb_get_reg64(mem_buf, env->xregs[31]);
-        break;
     case 32:
         return gdb_get_reg64(mem_buf, env->pc);
-        break;
     case 33:
         return gdb_get_reg32(mem_buf, pstate_read(env));
     }
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index ec02582..bf921cc 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -23,6 +23,7 @@
 #include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
 #include "qemu/bitops.h"
+#include "internals.h"
 
 /* C2.4.7 Multiply and divide */
 /* special cases for 0 and LLONG_MIN are mandated by the standard */
@@ -436,3 +437,78 @@
     set_float_exception_flags(exflags, fpst);
     return r;
 }
+
+/* Handle a CPU exception.  */
+void aarch64_cpu_do_interrupt(CPUState *cs)
+{
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+    target_ulong addr = env->cp15.c12_vbar;
+    int i;
+
+    if (arm_current_pl(env) == 0) {
+        if (env->aarch64) {
+            addr += 0x400;
+        } else {
+            addr += 0x600;
+        }
+    } else if (pstate_read(env) & PSTATE_SP) {
+        addr += 0x200;
+    }
+
+    arm_log_exception(cs->exception_index);
+    qemu_log_mask(CPU_LOG_INT, "...from EL%d\n", arm_current_pl(env));
+    if (qemu_loglevel_mask(CPU_LOG_INT)
+        && !excp_is_internal(cs->exception_index)) {
+        qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%" PRIx32 "\n",
+                      env->exception.syndrome);
+    }
+
+    env->cp15.esr_el1 = env->exception.syndrome;
+    env->cp15.far_el1 = env->exception.vaddress;
+
+    switch (cs->exception_index) {
+    case EXCP_PREFETCH_ABORT:
+    case EXCP_DATA_ABORT:
+        qemu_log_mask(CPU_LOG_INT, "...with FAR 0x%" PRIx64 "\n",
+                      env->cp15.far_el1);
+        break;
+    case EXCP_BKPT:
+    case EXCP_UDEF:
+    case EXCP_SWI:
+        break;
+    case EXCP_IRQ:
+        addr += 0x80;
+        break;
+    case EXCP_FIQ:
+        addr += 0x100;
+        break;
+    default:
+        cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
+    }
+
+    if (is_a64(env)) {
+        env->banked_spsr[0] = pstate_read(env);
+        env->sp_el[arm_current_pl(env)] = env->xregs[31];
+        env->xregs[31] = env->sp_el[1];
+        env->elr_el1 = env->pc;
+    } else {
+        env->banked_spsr[0] = cpsr_read(env);
+        if (!env->thumb) {
+            env->cp15.esr_el1 |= 1 << 25;
+        }
+        env->elr_el1 = env->regs[15];
+
+        for (i = 0; i < 15; i++) {
+            env->xregs[i] = env->regs[i];
+        }
+
+        env->condexec_bits = 0;
+    }
+
+    pstate_write(env, PSTATE_DAIF | PSTATE_MODE_EL1h);
+    env->aarch64 = 1;
+
+    env->pc = addr;
+    cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
+}
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 55077ed..43c1b4f 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1,4 +1,5 @@
 #include "cpu.h"
+#include "internals.h"
 #include "exec/gdbstub.h"
 #include "helper.h"
 #include "qemu/host-utils.h"
@@ -9,7 +10,9 @@
 #include <zlib.h> /* For crc32 */
 
 #ifndef CONFIG_USER_ONLY
-static inline int get_phys_addr(CPUARMState *env, uint32_t address,
+#include "exec/softmmu_exec.h"
+
+static inline int get_phys_addr(CPUARMState *env, target_ulong address,
                                 int access_type, int is_user,
                                 hwaddr *phys_ptr, int *prot,
                                 target_ulong *page_size);
@@ -301,6 +304,17 @@
     g_list_free(keys);
 }
 
+/* Return true if extended addresses are enabled.
+ * This is always the case if our translation regime is 64 bit,
+ * but depends on TTBCR.EAE for 32 bit.
+ */
+static inline bool extended_addresses_enabled(CPUARMState *env)
+{
+    return arm_el_is_aa64(env, 1)
+        || ((arm_feature(env, ARM_FEATURE_LPAE)
+             && (env->cp15.c2_control & (1U << 31))));
+}
+
 static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
@@ -327,14 +341,15 @@
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
 
-    if (env->cp15.c13_context != value && !arm_feature(env, ARM_FEATURE_MPU)) {
+    if (env->cp15.contextidr_el1 != value && !arm_feature(env, ARM_FEATURE_MPU)
+        && !extended_addresses_enabled(env)) {
         /* For VMSA (when not using the LPAE long descriptor page table
          * format) this register includes the ASID, so do a TLB flush.
          * For PMSA it is purely a process ID and no action is needed.
          */
         tlb_flush(CPU(cpu), 1);
     }
-    env->cp15.c13_context = value;
+    env->cp15.contextidr_el1 = value;
 }
 
 static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -380,17 +395,26 @@
      */
     { .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "FCSEIDR", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 0,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c13_fcse),
+      .resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, },
+    { .name = "CONTEXTIDR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 1,
+      .access = PL1_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.contextidr_el1),
+      .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, },
+    REGINFO_SENTINEL
+};
+
+static const ARMCPRegInfo not_v8_cp_reginfo[] = {
+    /* NB: Some of these registers exist in v8 but with more precise
+     * definitions that don't use CP_ANY wildcards (mostly in v8_cp_reginfo[]).
+     */
     /* MMU Domain access control / MPU write buffer control */
     { .name = "DACR", .cp = 15,
       .crn = 3, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c3),
       .resetvalue = 0, .writefn = dacr_write, .raw_writefn = raw_write, },
-    { .name = "FCSEIDR", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c13_fcse),
-      .resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, },
-    { .name = "CONTEXTIDR", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 1,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c13_context),
-      .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, },
     /* ??? This covers not just the impdef TLB lockdown registers but also
      * some v7VMSA registers relating to TEX remap, so it is overly broad.
      */
@@ -472,7 +496,8 @@
     { .name = "DMB", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 5,
       .access = PL0_W, .type = ARM_CP_NOP },
     { .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 2,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c6_insn),
+      .access = PL1_RW,
+      .fieldoffset = offsetofhigh32(CPUARMState, cp15.far_el1),
       .resetvalue = 0, },
     /* Watchpoint Fault Address Register : should actually only be present
      * for 1136, 1176, 11MPCore.
@@ -647,6 +672,21 @@
     env->cp15.c0_cssel = value & 0xf;
 }
 
+static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    CPUState *cs = ENV_GET_CPU(env);
+    uint64_t ret = 0;
+
+    if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
+        ret |= CPSR_I;
+    }
+    if (cs->interrupt_request & CPU_INTERRUPT_FIQ) {
+        ret |= CPSR_F;
+    }
+    /* External aborts are not possible in QEMU so A bit is always clear */
+    return ret;
+}
+
 static const ARMCPRegInfo v7_cp_reginfo[] = {
     /* DBGDRAR, DBGDSAR: always RAZ since we don't implement memory mapped
      * debug components
@@ -741,8 +781,18 @@
     /* Auxiliary ID register: this actually has an IMPDEF value but for now
      * just RAZ for all cores:
      */
-    { .name = "AIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 7,
+    { .name = "AIDR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 7,
       .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    /* Auxiliary fault status registers: these also are IMPDEF, and we
+     * choose to RAZ/WI for all cores.
+     */
+    { .name = "AFSR0_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "AFSR1_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 1,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
     /* MAIR can just read-as-written because we don't implement caches
      * and so don't need to care about memory attributes.
      */
@@ -763,6 +813,9 @@
       .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 1, .access = PL1_RW,
       .fieldoffset = offsetofhigh32(CPUARMState, cp15.mair_el1),
       .resetfn = arm_cp_reset_ignore },
+    { .name = "ISR_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 1, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_R, .readfn = isr_read },
     REGINFO_SENTINEL
 };
 
@@ -1139,27 +1192,17 @@
 static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     if (arm_feature(env, ARM_FEATURE_LPAE)) {
-        env->cp15.c7_par = value;
+        env->cp15.par_el1 = value;
     } else if (arm_feature(env, ARM_FEATURE_V7)) {
-        env->cp15.c7_par = value & 0xfffff6ff;
+        env->cp15.par_el1 = value & 0xfffff6ff;
     } else {
-        env->cp15.c7_par = value & 0xfffff1ff;
+        env->cp15.par_el1 = value & 0xfffff1ff;
     }
 }
 
 #ifndef CONFIG_USER_ONLY
 /* get_phys_addr() isn't present for user-mode-only targets */
 
-/* Return true if extended addresses are enabled, ie this is an
- * LPAE implementation and we are using the long-descriptor translation
- * table format because the TTBCR EAE bit is set.
- */
-static inline bool extended_addresses_enabled(CPUARMState *env)
-{
-    return arm_feature(env, ARM_FEATURE_LPAE)
-        && (env->cp15.c2_control & (1U << 31));
-}
-
 static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     if (ri->opc2 & 4) {
@@ -1200,8 +1243,7 @@
              * fault.
              */
         }
-        env->cp15.c7_par = par64;
-        env->cp15.c7_par_hi = par64 >> 32;
+        env->cp15.par_el1 = par64;
     } else {
         /* ret is a DFSR/IFSR value for the short descriptor
          * translation table format (with WnR always clear).
@@ -1211,16 +1253,15 @@
             /* We do not set any attribute bits in the PAR */
             if (page_size == (1 << 24)
                 && arm_feature(env, ARM_FEATURE_V7)) {
-                env->cp15.c7_par = (phys_addr & 0xff000000) | 1 << 1;
+                env->cp15.par_el1 = (phys_addr & 0xff000000) | 1 << 1;
             } else {
-                env->cp15.c7_par = phys_addr & 0xfffff000;
+                env->cp15.par_el1 = phys_addr & 0xfffff000;
             }
         } else {
-            env->cp15.c7_par = ((ret & (1 << 10)) >> 5) |
+            env->cp15.par_el1 = ((ret & (1 << 10)) >> 5) |
                 ((ret & (1 << 12)) >> 6) |
                 ((ret & 0xf) << 1) | 1;
         }
-        env->cp15.c7_par_hi = 0;
     }
 }
 #endif
@@ -1228,7 +1269,7 @@
 static const ARMCPRegInfo vapa_cp_reginfo[] = {
     { .name = "PAR", .cp = 15, .crn = 7, .crm = 4, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .resetvalue = 0,
-      .fieldoffset = offsetof(CPUARMState, cp15.c7_par),
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.par_el1),
       .writefn = par_write },
 #ifndef CONFIG_USER_ONLY
     { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY,
@@ -1271,40 +1312,44 @@
 static void pmsav5_data_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                  uint64_t value)
 {
-    env->cp15.c5_data = extended_mpu_ap_bits(value);
+    env->cp15.pmsav5_data_ap = extended_mpu_ap_bits(value);
 }
 
 static uint64_t pmsav5_data_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    return simple_mpu_ap_bits(env->cp15.c5_data);
+    return simple_mpu_ap_bits(env->cp15.pmsav5_data_ap);
 }
 
 static void pmsav5_insn_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                  uint64_t value)
 {
-    env->cp15.c5_insn = extended_mpu_ap_bits(value);
+    env->cp15.pmsav5_insn_ap = extended_mpu_ap_bits(value);
 }
 
 static uint64_t pmsav5_insn_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    return simple_mpu_ap_bits(env->cp15.c5_insn);
+    return simple_mpu_ap_bits(env->cp15.pmsav5_insn_ap);
 }
 
 static const ARMCPRegInfo pmsav5_cp_reginfo[] = {
     { .name = "DATA_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_data), .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_data_ap),
+      .resetvalue = 0,
       .readfn = pmsav5_data_ap_read, .writefn = pmsav5_data_ap_write, },
     { .name = "INSN_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_insn), .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_insn_ap),
+      .resetvalue = 0,
       .readfn = pmsav5_insn_ap_read, .writefn = pmsav5_insn_ap_write, },
     { .name = "DATA_EXT_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 2,
       .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_data), .resetvalue = 0, },
+      .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_data_ap),
+      .resetvalue = 0, },
     { .name = "INSN_EXT_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 3,
       .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_insn), .resetvalue = 0, },
+      .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_insn_ap),
+      .resetvalue = 0, },
     { .name = "DCACHE_CFG", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c2_data), .resetvalue = 0, },
@@ -1406,11 +1451,16 @@
 
 static const ARMCPRegInfo vmsa_cp_reginfo[] = {
     { .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_data), .resetvalue = 0, },
+      .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.esr_el1),
+      .resetfn = arm_cp_reset_ignore, },
     { .name = "IFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_insn), .resetvalue = 0, },
+      .fieldoffset = offsetof(CPUARMState, cp15.ifsr_el2), .resetvalue = 0, },
+    { .name = "ESR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .crn = 5, .crm = 2, .opc1 = 0, .opc2 = 0,
+      .access = PL1_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.esr_el1), .resetvalue = 0, },
     { .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1),
@@ -1428,8 +1478,10 @@
       .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, .writefn = vmsa_ttbcr_write,
       .resetfn = arm_cp_reset_ignore, .raw_writefn = vmsa_ttbcr_raw_write,
       .fieldoffset = offsetoflow32(CPUARMState, cp15.c2_control) },
-    { .name = "DFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c6_data),
+    /* 64-bit FAR; this entry also gives us the AArch32 DFAR */
+    { .name = "FAR_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el1),
       .resetvalue = 0, },
     REGINFO_SENTINEL
 };
@@ -1469,7 +1521,8 @@
 static const ARMCPRegInfo omap_cp_reginfo[] = {
     { .name = "DFSR", .cp = 15, .crn = 5, .crm = CP_ANY,
       .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, .type = ARM_CP_OVERRIDE,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_data), .resetvalue = 0, },
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.esr_el1),
+      .resetvalue = 0, },
     { .name = "", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .type = ARM_CP_NOP },
     { .name = "TICONFIG", .cp = 15, .crn = 15, .crm = 1, .opc1 = 0, .opc2 = 0,
@@ -1619,24 +1672,6 @@
     REGINFO_SENTINEL
 };
 
-static uint64_t par64_read(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    return ((uint64_t)env->cp15.c7_par_hi << 32) | env->cp15.c7_par;
-}
-
-static void par64_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t value)
-{
-    env->cp15.c7_par_hi = value >> 32;
-    env->cp15.c7_par = value;
-}
-
-static void par64_reset(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    env->cp15.c7_par_hi = 0;
-    env->cp15.c7_par = 0;
-}
-
 static const ARMCPRegInfo lpae_cp_reginfo[] = {
     /* NOP AMAIR0/1: the override is because these clash with the rather
      * broadly specified TLB_LOCKDOWN entry in the generic cp_reginfo.
@@ -1656,7 +1691,7 @@
       .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 },
     { .name = "PAR", .cp = 15, .crm = 7, .opc1 = 0,
       .access = PL1_RW, .type = ARM_CP_64BIT,
-      .readfn = par64_read, .writefn = par64_write, .resetfn = par64_reset },
+      .fieldoffset = offsetof(CPUARMState, cp15.par_el1), .resetvalue = 0 },
     { .name = "TTBR0", .cp = 15, .crm = 2, .opc1 = 0,
       .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE,
       .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1),
@@ -1690,6 +1725,20 @@
     vfp_set_fpsr(env, value);
 }
 
+static CPAccessResult aa64_daif_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) {
+        return CP_ACCESS_TRAP;
+    }
+    return CP_ACCESS_OK;
+}
+
+static void aa64_daif_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                            uint64_t value)
+{
+    env->daif = value & PSTATE_DAIF;
+}
+
 static CPAccessResult aa64_cacheop_access(CPUARMState *env,
                                           const ARMCPRegInfo *ri)
 {
@@ -1729,6 +1778,50 @@
     tlb_flush(CPU(cpu), asid == 0);
 }
 
+static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /* We don't implement EL2, so the only control on DC ZVA is the
+     * bit in the SCTLR which can prohibit access for EL0.
+     */
+    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_DZE)) {
+        return CP_ACCESS_TRAP;
+    }
+    return CP_ACCESS_OK;
+}
+
+static uint64_t aa64_dczid_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    int dzp_bit = 1 << 4;
+
+    /* DZP indicates whether DC ZVA access is allowed */
+    if (aa64_zva_access(env, NULL) != CP_ACCESS_OK) {
+        dzp_bit = 0;
+    }
+    return cpu->dcz_blocksize | dzp_bit;
+}
+
+static CPAccessResult sp_el0_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    if (!env->pstate & PSTATE_SP) {
+        /* Access to SP_EL0 is undefined if it's being used as
+         * the stack pointer.
+         */
+        return CP_ACCESS_TRAP_UNCATEGORIZED;
+    }
+    return CP_ACCESS_OK;
+}
+
+static uint64_t spsel_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    return env->pstate & PSTATE_SP;
+}
+
+static void spsel_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val)
+{
+    update_spsel(env, val);
+}
+
 static const ARMCPRegInfo v8_cp_reginfo[] = {
     /* Minimal set of EL0-visible registers. This will need to be expanded
      * significantly for system emulation of AArch64 CPUs.
@@ -1736,19 +1829,30 @@
     { .name = "NZCV", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 2,
       .access = PL0_RW, .type = ARM_CP_NZCV },
+    { .name = "DAIF", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 2,
+      .type = ARM_CP_NO_MIGRATE,
+      .access = PL0_RW, .accessfn = aa64_daif_access,
+      .fieldoffset = offsetof(CPUARMState, daif),
+      .writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore },
     { .name = "FPCR", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4,
       .access = PL0_RW, .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write },
     { .name = "FPSR", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
       .access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write },
-    /* Prohibit use of DC ZVA. OPTME: implement DC ZVA and allow its use.
-     * For system mode the DZP bit here will need to be computed, not constant.
-     */
     { .name = "DCZID_EL0", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0,
-      .access = PL0_R, .type = ARM_CP_CONST,
-      .resetvalue = 0x10 },
+      .access = PL0_R, .type = ARM_CP_NO_MIGRATE,
+      .readfn = aa64_dczid_read },
+    { .name = "DC_ZVA", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_DC_ZVA,
+#ifndef CONFIG_USER_ONLY
+      /* Avoid overhead of an access check that always passes in user-mode */
+      .accessfn = aa64_zva_access,
+#endif
+    },
     { .name = "CURRENTEL", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2,
       .access = PL1_R, .type = ARM_CP_CURRENTEL },
@@ -1836,6 +1940,93 @@
       .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 7,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbi_aa64_vaa_write },
+#ifndef CONFIG_USER_ONLY
+    /* 64 bit address translation operations */
+    { .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 0,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write },
+    { .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write },
+    { .name = "AT_S1E0R", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write },
+    { .name = "AT_S1E0W", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write },
+#endif
+    /* 32 bit TLB invalidates, Inner Shareable */
+    { .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
+    { .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
+    { .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write },
+    { .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "TLBIMVAALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write },
+    /* 32 bit ITLB invalidates */
+    { .name = "ITLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
+    { .name = "ITLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "ITLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 2,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
+    /* 32 bit DTLB invalidates */
+    { .name = "DTLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
+    { .name = "DTLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "DTLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 2,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
+    /* 32 bit TLB invalidates */
+    { .name = "TLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
+    { .name = "TLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "TLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
+    { .name = "TLBIMVAA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write },
+    { .name = "TLBIMVAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "TLBIMVAAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write },
+    /* 32 bit cache operations */
+    { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "BPIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 6,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "ICIALLU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "ICIMVAU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 1,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "BPIALL", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 6,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "BPIMVA", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 7,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCIMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCISW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 2,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCCMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 1,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCCSW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCCMVAU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 11, .opc2 = 1,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCCIMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 1,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCCISW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    /* MMU Domain access control / MPU write buffer control */
+    { .name = "DACR", .cp = 15,
+      .opc1 = 0, .crn = 3, .crm = 0, .opc2 = 0,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c3),
+      .resetvalue = 0, .writefn = dacr_write, .raw_writefn = raw_write, },
     /* Dummy implementation of monitor debug system control register:
      * we don't support debug.
      */
@@ -1846,6 +2037,27 @@
     { .name = "OSLAR_EL1", .state = ARM_CP_STATE_AA64,
       .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4,
       .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "ELR_EL1", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_NO_MIGRATE,
+      .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, elr_el1) },
+    { .name = "SPSR_EL1", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_NO_MIGRATE,
+      .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[0]) },
+    /* We rely on the access checks not allowing the guest to write to the
+     * state field when SPSel indicates that it's being used as the stack
+     * pointer.
+     */
+    { .name = "SP_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 1, .opc2 = 0,
+      .access = PL1_RW, .accessfn = sp_el0_access,
+      .type = ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetof(CPUARMState, sp_el[0]) },
+    { .name = "SPSel", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 2, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE,
+      .access = PL1_RW, .readfn = spsel_read, .writefn = spsel_write },
     REGINFO_SENTINEL
 };
 
@@ -1912,50 +2124,71 @@
     }
 
     define_arm_cp_regs(cpu, cp_reginfo);
+    if (!arm_feature(env, ARM_FEATURE_V8)) {
+        /* Must go early as it is full of wildcards that may be
+         * overridden by later definitions.
+         */
+        define_arm_cp_regs(cpu, not_v8_cp_reginfo);
+    }
+
     if (arm_feature(env, ARM_FEATURE_V6)) {
         /* The ID registers all have impdef reset values */
         ARMCPRegInfo v6_idregs[] = {
-            { .name = "ID_PFR0", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_PFR0", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_pfr0 },
-            { .name = "ID_PFR1", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_PFR1", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_pfr1 },
-            { .name = "ID_DFR0", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_DFR0", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_dfr0 },
-            { .name = "ID_AFR0", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_afr0 },
-            { .name = "ID_MMFR0", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_MMFR0", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_mmfr0 },
-            { .name = "ID_MMFR1", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_mmfr1 },
-            { .name = "ID_MMFR2", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_mmfr2 },
-            { .name = "ID_MMFR3", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_mmfr3 },
-            { .name = "ID_ISAR0", .cp = 15, .crn = 0, .crm = 2,
-              .opc1 = 0, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_isar0 },
-            { .name = "ID_ISAR1", .cp = 15, .crn = 0, .crm = 2,
-              .opc1 = 0, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_isar1 },
-            { .name = "ID_ISAR2", .cp = 15, .crn = 0, .crm = 2,
-              .opc1 = 0, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_isar2 },
-            { .name = "ID_ISAR3", .cp = 15, .crn = 0, .crm = 2,
-              .opc1 = 0, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_isar3 },
-            { .name = "ID_ISAR4", .cp = 15, .crn = 0, .crm = 2,
-              .opc1 = 0, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_isar4 },
-            { .name = "ID_ISAR5", .cp = 15, .crn = 0, .crm = 2,
-              .opc1 = 0, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_isar5 },
             /* 6..7 are as yet unallocated and must RAZ */
             { .name = "ID_ISAR6", .cp = 15, .crn = 0, .crm = 2,
@@ -2014,7 +2247,12 @@
             { .name = "ID_AA64DFR0_EL1", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_aa64dfr0 },
+              /* We mask out the PMUVer field, beacuse we don't currently
+               * implement the PMU. Not advertising it prevents the guest
+               * from trying to use it and getting UNDEFs on registers we
+               * don't implement.
+               */
+              .resetvalue = cpu->id_aa64dfr0 & ~0xf00 },
             { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1,
               .access = PL1_R, .type = ARM_CP_CONST,
@@ -2043,8 +2281,26 @@
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1,
               .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_aa64mmfr1 },
+            { .name = "MVFR0_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->mvfr0 },
+            { .name = "MVFR1_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->mvfr1 },
+            { .name = "MVFR2_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->mvfr2 },
             REGINFO_SENTINEL
         };
+        ARMCPRegInfo rvbar = {
+            .name = "RVBAR_EL1", .state = ARM_CP_STATE_AA64,
+            .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 2,
+            .type = ARM_CP_CONST, .access = PL1_R, .resetvalue = cpu->rvbar
+        };
+        define_one_arm_cp_reg(cpu, &rvbar);
         define_arm_cp_regs(cpu, v8_idregs);
         define_arm_cp_regs(cpu, v8_cp_reginfo);
         define_aarch64_debug_regs(cpu);
@@ -2098,8 +2354,9 @@
      * be read-only (ie write causes UNDEF exception).
      */
     {
-        ARMCPRegInfo id_cp_reginfo[] = {
-            /* Note that the MIDR isn't a simple constant register because
+        ARMCPRegInfo id_pre_v8_midr_cp_reginfo[] = {
+            /* Pre-v8 MIDR space.
+             * Note that the MIDR isn't a simple constant register because
              * of the TI925 behaviour where writes to another register can
              * cause the MIDR value to change.
              *
@@ -2113,22 +2370,6 @@
               .writefn = arm_cp_write_ignore, .raw_writefn = raw_write,
               .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
               .type = ARM_CP_OVERRIDE },
-            { .name = "MIDR_EL1", .state = ARM_CP_STATE_AA64,
-              .opc0 = 3, .opc1 = 0, .opc2 = 0, .crn = 0, .crm = 0,
-              .access = PL1_R, .resetvalue = cpu->midr, .type = ARM_CP_CONST },
-            { .name = "CTR",
-              .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 1,
-              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
-            { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64,
-              .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0,
-              .access = PL0_R, .accessfn = ctr_el0_access,
-              .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
-            { .name = "TCMTR",
-              .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 2,
-              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
-            { .name = "TLBTR",
-              .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 3,
-              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
             /* crn = 0 op1 = 0 crm = 3..7 : currently unassigned; we RAZ. */
             { .name = "DUMMY",
               .cp = 15, .crn = 0, .crm = 3, .opc1 = 0, .opc2 = CP_ANY,
@@ -2147,6 +2388,37 @@
               .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
             REGINFO_SENTINEL
         };
+        ARMCPRegInfo id_v8_midr_cp_reginfo[] = {
+            /* v8 MIDR -- the wildcard isn't necessary, and nor is the
+             * variable-MIDR TI925 behaviour. Instead we have a single
+             * (strictly speaking IMPDEF) alias of the MIDR, REVIDR.
+             */
+            { .name = "MIDR_EL1", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->midr },
+            { .name = "REVIDR_EL1", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 6,
+              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->midr },
+            REGINFO_SENTINEL
+        };
+        ARMCPRegInfo id_cp_reginfo[] = {
+            /* These are common to v8 and pre-v8 */
+            { .name = "CTR",
+              .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
+            { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0,
+              .access = PL0_R, .accessfn = ctr_el0_access,
+              .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
+            /* TCMTR and TLBTR exist in v8 but have no 64-bit versions */
+            { .name = "TCMTR",
+              .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 2,
+              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+            { .name = "TLBTR",
+              .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 3,
+              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+            REGINFO_SENTINEL
+        };
         ARMCPRegInfo crn0_wi_reginfo = {
             .name = "CRN0_WI", .cp = 15, .crn = 0, .crm = CP_ANY,
             .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_W,
@@ -2161,10 +2433,19 @@
              * UNDEF.
              */
             define_one_arm_cp_reg(cpu, &crn0_wi_reginfo);
+            for (r = id_pre_v8_midr_cp_reginfo;
+                 r->type != ARM_CP_SENTINEL; r++) {
+                r->access = PL1_RW;
+            }
             for (r = id_cp_reginfo; r->type != ARM_CP_SENTINEL; r++) {
                 r->access = PL1_RW;
             }
         }
+        if (arm_feature(env, ARM_FEATURE_V8)) {
+            define_arm_cp_regs(cpu, id_v8_midr_cp_reginfo);
+        } else {
+            define_arm_cp_regs(cpu, id_pre_v8_midr_cp_reginfo);
+        }
         define_arm_cp_regs(cpu, id_cp_reginfo);
     }
 
@@ -2174,7 +2455,8 @@
 
     if (arm_feature(env, ARM_FEATURE_AUXCR)) {
         ARMCPRegInfo auxcr = {
-            .name = "AUXCR", .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 1,
+            .name = "ACTLR_EL1", .state = ARM_CP_STATE_BOTH,
+            .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 1,
             .access = PL1_RW, .type = ARM_CP_CONST,
             .resetvalue = cpu->reset_auxcr
         };
@@ -2182,12 +2464,39 @@
     }
 
     if (arm_feature(env, ARM_FEATURE_CBAR)) {
-        ARMCPRegInfo cbar = {
-            .name = "CBAR", .cp = 15, .crn = 15, .crm = 0, .opc1 = 4, .opc2 = 0,
-            .access = PL1_R|PL3_W, .resetvalue = cpu->reset_cbar,
-            .fieldoffset = offsetof(CPUARMState, cp15.c15_config_base_address)
-        };
-        define_one_arm_cp_reg(cpu, &cbar);
+        if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+            /* 32 bit view is [31:18] 0...0 [43:32]. */
+            uint32_t cbar32 = (extract64(cpu->reset_cbar, 18, 14) << 18)
+                | extract64(cpu->reset_cbar, 32, 12);
+            ARMCPRegInfo cbar_reginfo[] = {
+                { .name = "CBAR",
+                  .type = ARM_CP_CONST,
+                  .cp = 15, .crn = 15, .crm = 0, .opc1 = 4, .opc2 = 0,
+                  .access = PL1_R, .resetvalue = cpu->reset_cbar },
+                { .name = "CBAR_EL1", .state = ARM_CP_STATE_AA64,
+                  .type = ARM_CP_CONST,
+                  .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 0,
+                  .access = PL1_R, .resetvalue = cbar32 },
+                REGINFO_SENTINEL
+            };
+            /* We don't implement a r/w 64 bit CBAR currently */
+            assert(arm_feature(env, ARM_FEATURE_CBAR_RO));
+            define_arm_cp_regs(cpu, cbar_reginfo);
+        } else {
+            ARMCPRegInfo cbar = {
+                .name = "CBAR",
+                .cp = 15, .crn = 15, .crm = 0, .opc1 = 4, .opc2 = 0,
+                .access = PL1_R|PL3_W, .resetvalue = cpu->reset_cbar,
+                .fieldoffset = offsetof(CPUARMState,
+                                        cp15.c15_config_base_address)
+            };
+            if (arm_feature(env, ARM_FEATURE_CBAR_RO)) {
+                cbar.access = PL1_R;
+                cbar.fieldoffset = 0;
+                cbar.type = ARM_CP_CONST;
+            }
+            define_one_arm_cp_reg(cpu, &cbar);
+        }
     }
 
     /* Generic registers whose values depend on the implementation */
@@ -2684,12 +2993,11 @@
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
 
+    env->exception.vaddress = address;
     if (rw == 2) {
         cs->exception_index = EXCP_PREFETCH_ABORT;
-        env->cp15.c6_insn = address;
     } else {
         cs->exception_index = EXCP_DATA_ABORT;
-        env->cp15.c6_data = address;
     }
     return 1;
 }
@@ -2846,37 +3154,6 @@
        pointer.  */
 }
 
-/* Exception names for debug logging; note that not all of these
- * precisely correspond to architectural exceptions.
- */
-static const char * const excnames[] = {
-    [EXCP_UDEF] = "Undefined Instruction",
-    [EXCP_SWI] = "SVC",
-    [EXCP_PREFETCH_ABORT] = "Prefetch Abort",
-    [EXCP_DATA_ABORT] = "Data Abort",
-    [EXCP_IRQ] = "IRQ",
-    [EXCP_FIQ] = "FIQ",
-    [EXCP_BKPT] = "Breakpoint",
-    [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit",
-    [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage",
-    [EXCP_STREX] = "QEMU intercept of STREX",
-};
-
-static inline void arm_log_exception(int idx)
-{
-    if (qemu_loglevel_mask(CPU_LOG_INT)) {
-        const char *exc = NULL;
-
-        if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
-            exc = excnames[idx];
-        }
-        if (!exc) {
-            exc = "unknown";
-        }
-        qemu_log_mask(CPU_LOG_INT, "Taking exception %d [%s]\n", idx, exc);
-    }
-}
-
 void arm_v7m_cpu_do_interrupt(CPUState *cs)
 {
     ARMCPU *cpu = ARM_CPU(cs);
@@ -2907,6 +3184,9 @@
         return;
     case EXCP_PREFETCH_ABORT:
     case EXCP_DATA_ABORT:
+        /* TODO: if we implemented the MPU registers, this is where we
+         * should set the MMFAR, etc from exception.fsr and exception.vaddress.
+         */
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM);
         return;
     case EXCP_BKPT:
@@ -3021,19 +3301,26 @@
                 return;
             }
         }
-        env->cp15.c5_insn = 2;
+        env->exception.fsr = 2;
         /* Fall through to prefetch abort.  */
     case EXCP_PREFETCH_ABORT:
+        env->cp15.ifsr_el2 = env->exception.fsr;
+        env->cp15.far_el1 = deposit64(env->cp15.far_el1, 32, 32,
+                                      env->exception.vaddress);
         qemu_log_mask(CPU_LOG_INT, "...with IFSR 0x%x IFAR 0x%x\n",
-                      env->cp15.c5_insn, env->cp15.c6_insn);
+                      env->cp15.ifsr_el2, (uint32_t)env->exception.vaddress);
         new_mode = ARM_CPU_MODE_ABT;
         addr = 0x0c;
         mask = CPSR_A | CPSR_I;
         offset = 4;
         break;
     case EXCP_DATA_ABORT:
+        env->cp15.esr_el1 = env->exception.fsr;
+        env->cp15.far_el1 = deposit64(env->cp15.far_el1, 0, 32,
+                                      env->exception.vaddress);
         qemu_log_mask(CPU_LOG_INT, "...with DFSR 0x%x DFAR 0x%x\n",
-                      env->cp15.c5_data, env->cp15.c6_data);
+                      (uint32_t)env->cp15.esr_el1,
+                      (uint32_t)env->exception.vaddress);
         new_mode = ARM_CPU_MODE_ABT;
         addr = 0x10;
         mask = CPSR_A | CPSR_I;
@@ -3375,7 +3662,7 @@
     permission_fault = 3,
 } MMUFaultType;
 
-static int get_phys_addr_lpae(CPUARMState *env, uint32_t address,
+static int get_phys_addr_lpae(CPUARMState *env, target_ulong address,
                               int access_type, int is_user,
                               hwaddr *phys_ptr, int *prot,
                               target_ulong *page_size_ptr)
@@ -3385,26 +3672,46 @@
     MMUFaultType fault_type = translation_fault;
     uint32_t level = 1;
     uint32_t epd;
-    uint32_t tsz;
+    int32_t tsz;
+    uint32_t tg;
     uint64_t ttbr;
     int ttbr_select;
-    int n;
-    hwaddr descaddr;
+    hwaddr descaddr, descmask;
     uint32_t tableattrs;
     target_ulong page_size;
     uint32_t attrs;
+    int32_t granule_sz = 9;
+    int32_t va_size = 32;
+    int32_t tbi = 0;
+
+    if (arm_el_is_aa64(env, 1)) {
+        va_size = 64;
+        if (extract64(address, 55, 1))
+            tbi = extract64(env->cp15.c2_control, 38, 1);
+        else
+            tbi = extract64(env->cp15.c2_control, 37, 1);
+        tbi *= 8;
+    }
 
     /* Determine whether this address is in the region controlled by
      * TTBR0 or TTBR1 (or if it is in neither region and should fault).
      * This is a Non-secure PL0/1 stage 1 translation, so controlled by
      * TTBCR/TTBR0/TTBR1 in accordance with ARM ARM DDI0406C table B-32:
      */
-    uint32_t t0sz = extract32(env->cp15.c2_control, 0, 3);
-    uint32_t t1sz = extract32(env->cp15.c2_control, 16, 3);
-    if (t0sz && !extract32(address, 32 - t0sz, t0sz)) {
+    uint32_t t0sz = extract32(env->cp15.c2_control, 0, 6);
+    if (arm_el_is_aa64(env, 1)) {
+        t0sz = MIN(t0sz, 39);
+        t0sz = MAX(t0sz, 16);
+    }
+    uint32_t t1sz = extract32(env->cp15.c2_control, 16, 6);
+    if (arm_el_is_aa64(env, 1)) {
+        t1sz = MIN(t1sz, 39);
+        t1sz = MAX(t1sz, 16);
+    }
+    if (t0sz && !extract64(address, va_size - t0sz, t0sz - tbi)) {
         /* there is a ttbr0 region and we are in it (high bits all zero) */
         ttbr_select = 0;
-    } else if (t1sz && !extract32(~address, 32 - t1sz, t1sz)) {
+    } else if (t1sz && !extract64(~address, va_size - t1sz, t1sz - tbi)) {
         /* there is a ttbr1 region and we are in it (high bits all one) */
         ttbr_select = 1;
     } else if (!t0sz) {
@@ -3430,10 +3737,26 @@
         ttbr = env->cp15.ttbr0_el1;
         epd = extract32(env->cp15.c2_control, 7, 1);
         tsz = t0sz;
+
+        tg = extract32(env->cp15.c2_control, 14, 2);
+        if (tg == 1) { /* 64KB pages */
+            granule_sz = 13;
+        }
+        if (tg == 2) { /* 16KB pages */
+            granule_sz = 11;
+        }
     } else {
         ttbr = env->cp15.ttbr1_el1;
         epd = extract32(env->cp15.c2_control, 23, 1);
         tsz = t1sz;
+
+        tg = extract32(env->cp15.c2_control, 30, 2);
+        if (tg == 3)  { /* 64KB pages */
+            granule_sz = 13;
+        }
+        if (tg == 1) { /* 16KB pages */
+            granule_sz = 11;
+        }
     }
 
     if (epd) {
@@ -3441,34 +3764,37 @@
         goto do_fault;
     }
 
-    /* If the region is small enough we will skip straight to a 2nd level
-     * lookup. This affects the number of bits of the address used in
-     * combination with the TTBR to find the first descriptor. ('n' here
-     * matches the usage in the ARM ARM sB3.6.6, where bits [39..n] are
-     * from the TTBR, [n-1..3] from the vaddr, and [2..0] always zero).
+    /* The starting level depends on the virtual address size which can be
+     * up to 48-bits and the translation granule size.
      */
-    if (tsz > 1) {
-        level = 2;
-        n = 14 - tsz;
+    if ((va_size - tsz) > (granule_sz * 4 + 3)) {
+        level = 0;
+    } else if ((va_size - tsz) > (granule_sz * 3 + 3)) {
+        level = 1;
     } else {
-        n = 5 - tsz;
+        level = 2;
     }
 
     /* Clear the vaddr bits which aren't part of the within-region address,
      * so that we don't have to special case things when calculating the
      * first descriptor address.
      */
-    address &= (0xffffffffU >> tsz);
+    if (tsz) {
+        address &= (1ULL << (va_size - tsz)) - 1;
+    }
+
+    descmask = (1ULL << (granule_sz + 3)) - 1;
 
     /* Now we can extract the actual base address from the TTBR */
-    descaddr = extract64(ttbr, 0, 40);
-    descaddr &= ~((1ULL << n) - 1);
+    descaddr = extract64(ttbr, 0, 48);
+    descaddr &= ~((1ULL << (va_size - tsz - (granule_sz * (4 - level)))) - 1);
 
     tableattrs = 0;
     for (;;) {
         uint64_t descriptor;
 
-        descaddr |= ((address >> (9 * (4 - level))) & 0xff8);
+        descaddr |= (address >> (granule_sz * (4 - level))) & descmask;
+        descaddr &= ~7ULL;
         descriptor = ldq_phys(cs->as, descaddr);
         if (!(descriptor & 1) ||
             (!(descriptor & 2) && (level == 3))) {
@@ -3491,11 +3817,16 @@
          * These are basically the same thing, although the number
          * of bits we pull in from the vaddr varies.
          */
-        page_size = (1 << (39 - (9 * level)));
+        page_size = (1 << ((granule_sz * (4 - level)) + 3));
         descaddr |= (address & (page_size - 1));
         /* Extract attributes from the descriptor and merge with table attrs */
-        attrs = extract64(descriptor, 2, 10)
-            | (extract64(descriptor, 52, 12) << 10);
+        if (arm_feature(env, ARM_FEATURE_V8)) {
+            attrs = extract64(descriptor, 2, 10)
+                | (extract64(descriptor, 53, 11) << 10);
+        } else {
+            attrs = extract64(descriptor, 2, 10)
+                | (extract64(descriptor, 52, 12) << 10);
+        }
         attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
         attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */
         /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
@@ -3569,9 +3900,9 @@
 	return 2;
 
     if (access_type == 2) {
-	mask = env->cp15.c5_insn;
+        mask = env->cp15.pmsav5_insn_ap;
     } else {
-	mask = env->cp15.c5_data;
+        mask = env->cp15.pmsav5_data_ap;
     }
     mask = (mask >> (n * 4)) & 0xf;
     switch (mask) {
@@ -3629,7 +3960,7 @@
  * @prot: set to the permissions for the page containing phys_ptr
  * @page_size: set to the size of the page containing phys_ptr
  */
-static inline int get_phys_addr(CPUARMState *env, uint32_t address,
+static inline int get_phys_addr(CPUARMState *env, target_ulong address,
                                 int access_type, int is_user,
                                 hwaddr *phys_ptr, int *prot,
                                 target_ulong *page_size)
@@ -3669,6 +4000,8 @@
     target_ulong page_size;
     int prot;
     int ret, is_user;
+    uint32_t syn;
+    bool same_el = (arm_current_pl(env) != 0);
 
     is_user = mmu_idx == MMU_USER_IDX;
     ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot,
@@ -3676,22 +4009,31 @@
     if (ret == 0) {
         /* Map a single [sub]page.  */
         phys_addr &= ~(hwaddr)0x3ff;
-        address &= ~(uint32_t)0x3ff;
+        address &= ~(target_ulong)0x3ff;
         tlb_set_page(cs, address, phys_addr, prot, mmu_idx, page_size);
         return 0;
     }
 
+    /* AArch64 syndrome does not have an LPAE bit */
+    syn = ret & ~(1 << 9);
+
+    /* For insn and data aborts we assume there is no instruction syndrome
+     * information; this is always true for exceptions reported to EL1.
+     */
     if (access_type == 2) {
-        env->cp15.c5_insn = ret;
-        env->cp15.c6_insn = address;
+        syn = syn_insn_abort(same_el, 0, 0, syn);
         cs->exception_index = EXCP_PREFETCH_ABORT;
     } else {
-        env->cp15.c5_data = ret;
-        if (access_type == 1 && arm_feature(env, ARM_FEATURE_V6))
-            env->cp15.c5_data |= (1 << 11);
-        env->cp15.c6_data = address;
+        syn = syn_data_abort(same_el, 0, 0, 0, access_type == 1, syn);
+        if (access_type == 1 && arm_feature(env, ARM_FEATURE_V6)) {
+            ret |= (1 << 11);
+        }
         cs->exception_index = EXCP_DATA_ABORT;
     }
+
+    env->exception.syndrome = syn;
+    env->exception.vaddress = address;
+    env->exception.fsr = ret;
     return 1;
 }
 
@@ -3842,6 +4184,88 @@
 
 #endif
 
+void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
+{
+    /* Implement DC ZVA, which zeroes a fixed-length block of memory.
+     * Note that we do not implement the (architecturally mandated)
+     * alignment fault for attempts to use this on Device memory
+     * (which matches the usual QEMU behaviour of not implementing either
+     * alignment faults or any memory attribute handling).
+     */
+
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    uint64_t blocklen = 4 << cpu->dcz_blocksize;
+    uint64_t vaddr = vaddr_in & ~(blocklen - 1);
+
+#ifndef CONFIG_USER_ONLY
+    {
+        /* Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
+         * the block size so we might have to do more than one TLB lookup.
+         * We know that in fact for any v8 CPU the page size is at least 4K
+         * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
+         * 1K as an artefact of legacy v5 subpage support being present in the
+         * same QEMU executable.
+         */
+        int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
+        void *hostaddr[maxidx];
+        int try, i;
+
+        for (try = 0; try < 2; try++) {
+
+            for (i = 0; i < maxidx; i++) {
+                hostaddr[i] = tlb_vaddr_to_host(env,
+                                                vaddr + TARGET_PAGE_SIZE * i,
+                                                1, cpu_mmu_index(env));
+                if (!hostaddr[i]) {
+                    break;
+                }
+            }
+            if (i == maxidx) {
+                /* If it's all in the TLB it's fair game for just writing to;
+                 * we know we don't need to update dirty status, etc.
+                 */
+                for (i = 0; i < maxidx - 1; i++) {
+                    memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
+                }
+                memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
+                return;
+            }
+            /* OK, try a store and see if we can populate the tlb. This
+             * might cause an exception if the memory isn't writable,
+             * in which case we will longjmp out of here. We must for
+             * this purpose use the actual register value passed to us
+             * so that we get the fault address right.
+             */
+            helper_ret_stb_mmu(env, vaddr_in, 0, cpu_mmu_index(env), GETRA());
+            /* Now we can populate the other TLB entries, if any */
+            for (i = 0; i < maxidx; i++) {
+                uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
+                if (va != (vaddr_in & TARGET_PAGE_MASK)) {
+                    helper_ret_stb_mmu(env, va, 0, cpu_mmu_index(env), GETRA());
+                }
+            }
+        }
+
+        /* Slow path (probably attempt to do this to an I/O device or
+         * similar, or clearing of a block of code we have translations
+         * cached for). Just do a series of byte writes as the architecture
+         * demands. It's not worth trying to use a cpu_physical_memory_map(),
+         * memset(), unmap() sequence here because:
+         *  + we'd need to account for the blocksize being larger than a page
+         *  + the direct-RAM access case is almost always going to be dealt
+         *    with in the fastpath code above, so there's no speed benefit
+         *  + we would have to deal with the map returning NULL because the
+         *    bounce buffer was in use
+         */
+        for (i = 0; i < blocklen; i++) {
+            helper_ret_stb_mmu(env, vaddr + i, 0, cpu_mmu_index(env), GETRA());
+        }
+    }
+#else
+    memset(g2h(vaddr), 0, blocklen);
+#endif
+}
+
 /* Note that signed overflow is undefined in C.  The following routines are
    careful to use unsigned types where modulo arithmetic is required.
    Failure to do so _will_ break on newer gcc.  */
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 366c1b3..a5449e7 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -48,7 +48,8 @@
 
 DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE,
                    i32, i32, i32, i32)
-DEF_HELPER_2(exception, void, env, i32)
+DEF_HELPER_2(exception_internal, void, env, i32)
+DEF_HELPER_3(exception_with_syndrome, void, env, i32, i32)
 DEF_HELPER_1(wfi, void, env)
 DEF_HELPER_1(wfe, void, env)
 
@@ -58,13 +59,14 @@
 DEF_HELPER_3(v7m_msr, void, env, i32, i32)
 DEF_HELPER_2(v7m_mrs, i32, env, i32)
 
-DEF_HELPER_2(access_check_cp_reg, void, env, ptr)
+DEF_HELPER_3(access_check_cp_reg, void, env, ptr, i32)
 DEF_HELPER_3(set_cp_reg, void, env, ptr, i32)
 DEF_HELPER_2(get_cp_reg, i32, env, ptr)
 DEF_HELPER_3(set_cp_reg64, void, env, ptr, i64)
 DEF_HELPER_2(get_cp_reg64, i64, env, ptr)
 
 DEF_HELPER_3(msr_i_pstate, void, env, i32, i32)
+DEF_HELPER_1(exception_return, void, env)
 
 DEF_HELPER_2(get_r13_banked, i32, env, i32)
 DEF_HELPER_3(set_r13_banked, void, env, i32, i32)
@@ -514,6 +516,7 @@
 
 DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+DEF_HELPER_2(dc_zva, void, env, i64)
 
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
diff --git a/target-arm/internals.h b/target-arm/internals.h
new file mode 100644
index 0000000..d63a975
--- /dev/null
+++ b/target-arm/internals.h
@@ -0,0 +1,267 @@
+/*
+ * QEMU ARM CPU -- internal functions and types
+ *
+ * Copyright (c) 2014 Linaro Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ *
+ * This header defines functions, types, etc which need to be shared
+ * between different source files within target-arm/ but which are
+ * private to it and not required by the rest of QEMU.
+ */
+
+#ifndef TARGET_ARM_INTERNALS_H
+#define TARGET_ARM_INTERNALS_H
+
+static inline bool excp_is_internal(int excp)
+{
+    /* Return true if this exception number represents a QEMU-internal
+     * exception that will not be passed to the guest.
+     */
+    return excp == EXCP_INTERRUPT
+        || excp == EXCP_HLT
+        || excp == EXCP_DEBUG
+        || excp == EXCP_HALTED
+        || excp == EXCP_EXCEPTION_EXIT
+        || excp == EXCP_KERNEL_TRAP
+        || excp == EXCP_STREX;
+}
+
+/* Exception names for debug logging; note that not all of these
+ * precisely correspond to architectural exceptions.
+ */
+static const char * const excnames[] = {
+    [EXCP_UDEF] = "Undefined Instruction",
+    [EXCP_SWI] = "SVC",
+    [EXCP_PREFETCH_ABORT] = "Prefetch Abort",
+    [EXCP_DATA_ABORT] = "Data Abort",
+    [EXCP_IRQ] = "IRQ",
+    [EXCP_FIQ] = "FIQ",
+    [EXCP_BKPT] = "Breakpoint",
+    [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit",
+    [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage",
+    [EXCP_STREX] = "QEMU intercept of STREX",
+};
+
+static inline void arm_log_exception(int idx)
+{
+    if (qemu_loglevel_mask(CPU_LOG_INT)) {
+        const char *exc = NULL;
+
+        if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
+            exc = excnames[idx];
+        }
+        if (!exc) {
+            exc = "unknown";
+        }
+        qemu_log_mask(CPU_LOG_INT, "Taking exception %d [%s]\n", idx, exc);
+    }
+}
+
+/* Scale factor for generic timers, ie number of ns per tick.
+ * This gives a 62.5MHz timer.
+ */
+#define GTIMER_SCALE 16
+
+int bank_number(int mode);
+void switch_mode(CPUARMState *, int);
+void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
+void arm_translate_init(void);
+
+enum arm_fprounding {
+    FPROUNDING_TIEEVEN,
+    FPROUNDING_POSINF,
+    FPROUNDING_NEGINF,
+    FPROUNDING_ZERO,
+    FPROUNDING_TIEAWAY,
+    FPROUNDING_ODD
+};
+
+int arm_rmode_to_sf(int rmode);
+
+static inline void update_spsel(CPUARMState *env, uint32_t imm)
+{
+    /* Update PSTATE SPSel bit; this requires us to update the
+     * working stack pointer in xregs[31].
+     */
+    if (!((imm ^ env->pstate) & PSTATE_SP)) {
+        return;
+    }
+    env->pstate = deposit32(env->pstate, 0, 1, imm);
+
+    /* EL0 has no access rights to update SPSel, and this code
+     * assumes we are updating SP for EL1 while running as EL1.
+     */
+    assert(arm_current_pl(env) == 1);
+    if (env->pstate & PSTATE_SP) {
+        /* Switch from using SP_EL0 to using SP_ELx */
+        env->sp_el[0] = env->xregs[31];
+        env->xregs[31] = env->sp_el[1];
+    } else {
+        /* Switch from SP_EL0 to SP_ELx */
+        env->sp_el[1] = env->xregs[31];
+        env->xregs[31] = env->sp_el[0];
+    }
+}
+
+/* Valid Syndrome Register EC field values */
+enum arm_exception_class {
+    EC_UNCATEGORIZED          = 0x00,
+    EC_WFX_TRAP               = 0x01,
+    EC_CP15RTTRAP             = 0x03,
+    EC_CP15RRTTRAP            = 0x04,
+    EC_CP14RTTRAP             = 0x05,
+    EC_CP14DTTRAP             = 0x06,
+    EC_ADVSIMDFPACCESSTRAP    = 0x07,
+    EC_FPIDTRAP               = 0x08,
+    EC_CP14RRTTRAP            = 0x0c,
+    EC_ILLEGALSTATE           = 0x0e,
+    EC_AA32_SVC               = 0x11,
+    EC_AA32_HVC               = 0x12,
+    EC_AA32_SMC               = 0x13,
+    EC_AA64_SVC               = 0x15,
+    EC_AA64_HVC               = 0x16,
+    EC_AA64_SMC               = 0x17,
+    EC_SYSTEMREGISTERTRAP     = 0x18,
+    EC_INSNABORT              = 0x20,
+    EC_INSNABORT_SAME_EL      = 0x21,
+    EC_PCALIGNMENT            = 0x22,
+    EC_DATAABORT              = 0x24,
+    EC_DATAABORT_SAME_EL      = 0x25,
+    EC_SPALIGNMENT            = 0x26,
+    EC_AA32_FPTRAP            = 0x28,
+    EC_AA64_FPTRAP            = 0x2c,
+    EC_SERROR                 = 0x2f,
+    EC_BREAKPOINT             = 0x30,
+    EC_BREAKPOINT_SAME_EL     = 0x31,
+    EC_SOFTWARESTEP           = 0x32,
+    EC_SOFTWARESTEP_SAME_EL   = 0x33,
+    EC_WATCHPOINT             = 0x34,
+    EC_WATCHPOINT_SAME_EL     = 0x35,
+    EC_AA32_BKPT              = 0x38,
+    EC_VECTORCATCH            = 0x3a,
+    EC_AA64_BKPT              = 0x3c,
+};
+
+#define ARM_EL_EC_SHIFT 26
+#define ARM_EL_IL_SHIFT 25
+#define ARM_EL_IL (1 << ARM_EL_IL_SHIFT)
+
+/* Utility functions for constructing various kinds of syndrome value.
+ * Note that in general we follow the AArch64 syndrome values; in a
+ * few cases the value in HSR for exceptions taken to AArch32 Hyp
+ * mode differs slightly, so if we ever implemented Hyp mode then the
+ * syndrome value would need some massaging on exception entry.
+ * (One example of this is that AArch64 defaults to IL bit set for
+ * exceptions which don't specifically indicate information about the
+ * trapping instruction, whereas AArch32 defaults to IL bit clear.)
+ */
+static inline uint32_t syn_uncategorized(void)
+{
+    return (EC_UNCATEGORIZED << ARM_EL_EC_SHIFT) | ARM_EL_IL;
+}
+
+static inline uint32_t syn_aa64_svc(uint32_t imm16)
+{
+    return (EC_AA64_SVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
+}
+
+static inline uint32_t syn_aa32_svc(uint32_t imm16, bool is_thumb)
+{
+    return (EC_AA32_SVC << ARM_EL_EC_SHIFT) | (imm16 & 0xffff)
+        | (is_thumb ? 0 : ARM_EL_IL);
+}
+
+static inline uint32_t syn_aa64_bkpt(uint32_t imm16)
+{
+    return (EC_AA64_BKPT << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
+}
+
+static inline uint32_t syn_aa32_bkpt(uint32_t imm16, bool is_thumb)
+{
+    return (EC_AA32_BKPT << ARM_EL_EC_SHIFT) | (imm16 & 0xffff)
+        | (is_thumb ? 0 : ARM_EL_IL);
+}
+
+static inline uint32_t syn_aa64_sysregtrap(int op0, int op1, int op2,
+                                           int crn, int crm, int rt,
+                                           int isread)
+{
+    return (EC_SYSTEMREGISTERTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL
+        | (op0 << 20) | (op2 << 17) | (op1 << 14) | (crn << 10) | (rt << 5)
+        | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_cp14_rt_trap(int cv, int cond, int opc1, int opc2,
+                                        int crn, int crm, int rt, int isread,
+                                        bool is_thumb)
+{
+    return (EC_CP14RTTRAP << ARM_EL_EC_SHIFT)
+        | (is_thumb ? 0 : ARM_EL_IL)
+        | (cv << 24) | (cond << 20) | (opc2 << 17) | (opc1 << 14)
+        | (crn << 10) | (rt << 5) | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_cp15_rt_trap(int cv, int cond, int opc1, int opc2,
+                                        int crn, int crm, int rt, int isread,
+                                        bool is_thumb)
+{
+    return (EC_CP15RTTRAP << ARM_EL_EC_SHIFT)
+        | (is_thumb ? 0 : ARM_EL_IL)
+        | (cv << 24) | (cond << 20) | (opc2 << 17) | (opc1 << 14)
+        | (crn << 10) | (rt << 5) | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_cp14_rrt_trap(int cv, int cond, int opc1, int crm,
+                                         int rt, int rt2, int isread,
+                                         bool is_thumb)
+{
+    return (EC_CP14RRTTRAP << ARM_EL_EC_SHIFT)
+        | (is_thumb ? 0 : ARM_EL_IL)
+        | (cv << 24) | (cond << 20) | (opc1 << 16)
+        | (rt2 << 10) | (rt << 5) | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_cp15_rrt_trap(int cv, int cond, int opc1, int crm,
+                                         int rt, int rt2, int isread,
+                                         bool is_thumb)
+{
+    return (EC_CP15RRTTRAP << ARM_EL_EC_SHIFT)
+        | (is_thumb ? 0 : ARM_EL_IL)
+        | (cv << 24) | (cond << 20) | (opc1 << 16)
+        | (rt2 << 10) | (rt << 5) | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_thumb)
+{
+    return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT)
+        | (is_thumb ? 0 : ARM_EL_IL)
+        | (cv << 24) | (cond << 20);
+}
+
+static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
+{
+    return (EC_INSNABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
+        | (ea << 9) | (s1ptw << 7) | fsc;
+}
+
+static inline uint32_t syn_data_abort(int same_el, int ea, int cm, int s1ptw,
+                                      int wnr, int fsc)
+{
+    return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
+        | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc;
+}
+
+#endif
diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c
index a4fde07..a690d99 100644
--- a/target-arm/kvm32.c
+++ b/target-arm/kvm32.c
@@ -21,6 +21,7 @@
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
 #include "cpu.h"
+#include "internals.h"
 #include "hw/arm/arm.h"
 
 static inline void set_feature(uint64_t *features, int feature)
@@ -294,6 +295,14 @@
         offsetof(CPUARMState, vfp.xregs[ARM_VFP_##R])      \
     }
 
+/* Like COREREG, but handle fields which are in a uint64_t in CPUARMState. */
+#define COREREG64(KERNELNAME, QEMUFIELD)                     \
+    {                                                        \
+        KVM_REG_ARM | KVM_REG_SIZE_U32 |                     \
+        KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(KERNELNAME), \
+        offsetoflow32(CPUARMState, QEMUFIELD)                \
+    }
+
 static const Reg regs[] = {
     /* R0_usr .. R14_usr */
     COREREG(usr_regs.uregs[0], regs[0]),
@@ -314,16 +323,16 @@
     /* R13, R14, SPSR for SVC, ABT, UND, IRQ banks */
     COREREG(svc_regs[0], banked_r13[1]),
     COREREG(svc_regs[1], banked_r14[1]),
-    COREREG(svc_regs[2], banked_spsr[1]),
+    COREREG64(svc_regs[2], banked_spsr[1]),
     COREREG(abt_regs[0], banked_r13[2]),
     COREREG(abt_regs[1], banked_r14[2]),
-    COREREG(abt_regs[2], banked_spsr[2]),
+    COREREG64(abt_regs[2], banked_spsr[2]),
     COREREG(und_regs[0], banked_r13[3]),
     COREREG(und_regs[1], banked_r14[3]),
-    COREREG(und_regs[2], banked_spsr[3]),
+    COREREG64(und_regs[2], banked_spsr[3]),
     COREREG(irq_regs[0], banked_r13[4]),
     COREREG(irq_regs[1], banked_r14[4]),
-    COREREG(irq_regs[2], banked_spsr[4]),
+    COREREG64(irq_regs[2], banked_spsr[4]),
     /* R8_fiq .. R14_fiq and SPSR_fiq */
     COREREG(fiq_regs[0], fiq_regs[0]),
     COREREG(fiq_regs[1], fiq_regs[1]),
@@ -332,7 +341,7 @@
     COREREG(fiq_regs[4], fiq_regs[4]),
     COREREG(fiq_regs[5], banked_r13[5]),
     COREREG(fiq_regs[6], banked_r14[5]),
-    COREREG(fiq_regs[7], banked_spsr[5]),
+    COREREG64(fiq_regs[7], banked_spsr[5]),
     /* R15 */
     COREREG(usr_regs.uregs[15], regs[15]),
     /* VFP system registers */
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index 1b7ca90..e115879 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -121,8 +121,24 @@
         }
     }
 
+    /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
+     * QEMU side we keep the current SP in xregs[31] as well.
+     */
+    if (env->pstate & PSTATE_SP) {
+        env->sp_el[1] = env->xregs[31];
+    } else {
+        env->sp_el[0] = env->xregs[31];
+    }
+
     reg.id = AARCH64_CORE_REG(regs.sp);
-    reg.addr = (uintptr_t) &env->xregs[31];
+    reg.addr = (uintptr_t) &env->sp_el[0];
+    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
+    if (ret) {
+        return ret;
+    }
+
+    reg.id = AARCH64_CORE_REG(sp_el1);
+    reg.addr = (uintptr_t) &env->sp_el[1];
     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
     if (ret) {
         return ret;
@@ -144,10 +160,23 @@
         return ret;
     }
 
+    reg.id = AARCH64_CORE_REG(elr_el1);
+    reg.addr = (uintptr_t) &env->elr_el1;
+    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
+    if (ret) {
+        return ret;
+    }
+
+    for (i = 0; i < KVM_NR_SPSR; i++) {
+        reg.id = AARCH64_CORE_REG(spsr[i]);
+        reg.addr = (uintptr_t) &env->banked_spsr[i - 1];
+        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
+        if (ret) {
+            return ret;
+        }
+    }
+
     /* TODO:
-     * SP_EL1
-     * ELR_EL1
-     * SPSR[]
      * FP state
      * system registers
      */
@@ -174,7 +203,14 @@
     }
 
     reg.id = AARCH64_CORE_REG(regs.sp);
-    reg.addr = (uintptr_t) &env->xregs[31];
+    reg.addr = (uintptr_t) &env->sp_el[0];
+    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
+    if (ret) {
+        return ret;
+    }
+
+    reg.id = AARCH64_CORE_REG(sp_el1);
+    reg.addr = (uintptr_t) &env->sp_el[1];
     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
     if (ret) {
         return ret;
@@ -188,6 +224,15 @@
     }
     pstate_write(env, val);
 
+    /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
+     * QEMU side we keep the current SP in xregs[31] as well.
+     */
+    if (env->pstate & PSTATE_SP) {
+        env->xregs[31] = env->sp_el[1];
+    } else {
+        env->xregs[31] = env->sp_el[0];
+    }
+
     reg.id = AARCH64_CORE_REG(regs.pc);
     reg.addr = (uintptr_t) &env->pc;
     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
@@ -195,6 +240,22 @@
         return ret;
     }
 
+    reg.id = AARCH64_CORE_REG(elr_el1);
+    reg.addr = (uintptr_t) &env->elr_el1;
+    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
+    if (ret) {
+        return ret;
+    }
+
+    for (i = 0; i < KVM_NR_SPSR; i++) {
+        reg.id = AARCH64_CORE_REG(spsr[i]);
+        reg.addr = (uintptr_t) &env->banked_spsr[i - 1];
+        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
+        if (ret) {
+            return ret;
+        }
+    }
+
     /* TODO: other registers */
     return ret;
 }
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 7ced87a..b967223 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -222,9 +222,9 @@
 
 const VMStateDescription vmstate_arm_cpu = {
     .name = "cpu",
-    .version_id = 14,
-    .minimum_version_id = 14,
-    .minimum_version_id_old = 14,
+    .version_id = 17,
+    .minimum_version_id = 17,
+    .minimum_version_id_old = 17,
     .pre_save = cpu_pre_save,
     .post_load = cpu_post_load,
     .fields = (VMStateField[]) {
@@ -238,11 +238,13 @@
             .offset = 0,
         },
         VMSTATE_UINT32(env.spsr, ARMCPU),
-        VMSTATE_UINT32_ARRAY(env.banked_spsr, ARMCPU, 6),
+        VMSTATE_UINT64_ARRAY(env.banked_spsr, ARMCPU, 6),
         VMSTATE_UINT32_ARRAY(env.banked_r13, ARMCPU, 6),
         VMSTATE_UINT32_ARRAY(env.banked_r14, ARMCPU, 6),
         VMSTATE_UINT32_ARRAY(env.usr_regs, ARMCPU, 5),
         VMSTATE_UINT32_ARRAY(env.fiq_regs, ARMCPU, 5),
+        VMSTATE_UINT64(env.elr_el1, ARMCPU),
+        VMSTATE_UINT64_ARRAY(env.sp_el, ARMCPU, 2),
         /* The length-check must come before the arrays to avoid
          * incoming data possibly overflowing the array.
          */
@@ -257,6 +259,9 @@
         VMSTATE_UINT64(env.exclusive_val, ARMCPU),
         VMSTATE_UINT64(env.exclusive_high, ARMCPU),
         VMSTATE_UINT64(env.features, ARMCPU),
+        VMSTATE_UINT32(env.exception.syndrome, ARMCPU),
+        VMSTATE_UINT32(env.exception.fsr, ARMCPU),
+        VMSTATE_UINT64(env.exception.vaddress, ARMCPU),
         VMSTATE_TIMER(gt_timer[GTIMER_PHYS], ARMCPU),
         VMSTATE_TIMER(gt_timer[GTIMER_VIRT], ARMCPU),
         VMSTATE_END_OF_LIST()
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 21ff58e..57e7d9c 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -18,6 +18,7 @@
  */
 #include "cpu.h"
 #include "helper.h"
+#include "internals.h"
 
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
@@ -243,14 +244,33 @@
     cpu_loop_exit(cs);
 }
 
-void HELPER(exception)(CPUARMState *env, uint32_t excp)
+/* Raise an internal-to-QEMU exception. This is limited to only
+ * those EXCP values which are special cases for QEMU to interrupt
+ * execution and not to be used for exceptions which are passed to
+ * the guest (those must all have syndrome information and thus should
+ * use exception_with_syndrome).
+ */
+void HELPER(exception_internal)(CPUARMState *env, uint32_t excp)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
 
+    assert(excp_is_internal(excp));
     cs->exception_index = excp;
     cpu_loop_exit(cs);
 }
 
+/* Raise an exception with the specified syndrome register value */
+void HELPER(exception_with_syndrome)(CPUARMState *env, uint32_t excp,
+                                     uint32_t syndrome)
+{
+    CPUState *cs = CPU(arm_env_get_cpu(env));
+
+    assert(!excp_is_internal(excp));
+    cs->exception_index = excp;
+    env->exception.syndrome = syndrome;
+    cpu_loop_exit(cs);
+}
+
 uint32_t HELPER(cpsr_read)(CPUARMState *env)
 {
     return cpsr_read(env) & ~CPSR_EXEC;
@@ -293,17 +313,17 @@
     }
 }
 
-void HELPER(access_check_cp_reg)(CPUARMState *env, void *rip)
+void HELPER(access_check_cp_reg)(CPUARMState *env, void *rip, uint32_t syndrome)
 {
     const ARMCPRegInfo *ri = rip;
     switch (ri->accessfn(env, ri)) {
     case CP_ACCESS_OK:
         return;
     case CP_ACCESS_TRAP:
+        env->exception.syndrome = syndrome;
+        break;
     case CP_ACCESS_TRAP_UNCATEGORIZED:
-        /* These cases will eventually need to generate different
-         * syndrome information.
-         */
+        env->exception.syndrome = syn_uncategorized();
         break;
     default:
         g_assert_not_reached();
@@ -351,7 +371,7 @@
 
     switch (op) {
     case 0x05: /* SPSel */
-        env->pstate = deposit32(env->pstate, 0, 1, imm);
+        update_spsel(env, imm);
         break;
     case 0x1e: /* DAIFSet */
         env->daif |= (imm << 6) & PSTATE_DAIF;
@@ -364,6 +384,66 @@
     }
 }
 
+void HELPER(exception_return)(CPUARMState *env)
+{
+    uint32_t spsr = env->banked_spsr[0];
+    int new_el, i;
+
+    if (env->pstate & PSTATE_SP) {
+        env->sp_el[1] = env->xregs[31];
+    } else {
+        env->sp_el[0] = env->xregs[31];
+    }
+
+    env->exclusive_addr = -1;
+
+    if (spsr & PSTATE_nRW) {
+        env->aarch64 = 0;
+        new_el = 0;
+        env->uncached_cpsr = 0x10;
+        cpsr_write(env, spsr, ~0);
+        for (i = 0; i < 15; i++) {
+            env->regs[i] = env->xregs[i];
+        }
+
+        env->regs[15] = env->elr_el1 & ~0x1;
+    } else {
+        new_el = extract32(spsr, 2, 2);
+        if (new_el > 1) {
+            /* Return to unimplemented EL */
+            goto illegal_return;
+        }
+        if (extract32(spsr, 1, 1)) {
+            /* Return with reserved M[1] bit set */
+            goto illegal_return;
+        }
+        if (new_el == 0 && (spsr & PSTATE_SP)) {
+            /* Return to EL1 with M[0] bit set */
+            goto illegal_return;
+        }
+        env->aarch64 = 1;
+        pstate_write(env, spsr);
+        env->xregs[31] = env->sp_el[new_el];
+        env->pc = env->elr_el1;
+    }
+
+    return;
+
+illegal_return:
+    /* Illegal return events of various kinds have architecturally
+     * mandated behaviour:
+     * restore NZCV and DAIF from SPSR_ELx
+     * set PSTATE.IL
+     * restore PC from ELR_ELx
+     * no change to exception level, execution state or stack pointer
+     */
+    env->pstate |= PSTATE_IL;
+    env->pc = env->elr_el1;
+    spsr &= PSTATE_NZCV | PSTATE_DAIF;
+    spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF);
+    pstate_write(env, spsr);
+}
+
 /* ??? Flag setting arithmetic is awkward because we need to do comparisons.
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 9175e48..d86b8ff 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -26,6 +26,7 @@
 #include "tcg-op.h"
 #include "qemu/log.h"
 #include "translate.h"
+#include "internals.h"
 #include "qemu/host-utils.h"
 
 #include "exec/gen-icount.h"
@@ -175,18 +176,37 @@
     tcg_gen_movi_i64(cpu_pc, val);
 }
 
-static void gen_exception(int excp)
+static void gen_exception_internal(int excp)
 {
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    tcg_gen_movi_i32(tmp, excp);
-    gen_helper_exception(cpu_env, tmp);
-    tcg_temp_free_i32(tmp);
+    TCGv_i32 tcg_excp = tcg_const_i32(excp);
+
+    assert(excp_is_internal(excp));
+    gen_helper_exception_internal(cpu_env, tcg_excp);
+    tcg_temp_free_i32(tcg_excp);
 }
 
-static void gen_exception_insn(DisasContext *s, int offset, int excp)
+static void gen_exception(int excp, uint32_t syndrome)
+{
+    TCGv_i32 tcg_excp = tcg_const_i32(excp);
+    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
+
+    gen_helper_exception_with_syndrome(cpu_env, tcg_excp, tcg_syn);
+    tcg_temp_free_i32(tcg_syn);
+    tcg_temp_free_i32(tcg_excp);
+}
+
+static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 {
     gen_a64_set_pc_im(s->pc - offset);
-    gen_exception(excp);
+    gen_exception_internal(excp);
+    s->is_jmp = DISAS_EXC;
+}
+
+static void gen_exception_insn(DisasContext *s, int offset, int excp,
+                               uint32_t syndrome)
+{
+    gen_a64_set_pc_im(s->pc - offset);
+    gen_exception(excp, syndrome);
     s->is_jmp = DISAS_EXC;
 }
 
@@ -218,7 +238,7 @@
     } else {
         gen_a64_set_pc_im(dest);
         if (s->singlestep_enabled) {
-            gen_exception(EXCP_DEBUG);
+            gen_exception_internal(EXCP_DEBUG);
         }
         tcg_gen_exit_tb(0);
         s->is_jmp = DISAS_JUMP;
@@ -227,7 +247,8 @@
 
 static void unallocated_encoding(DisasContext *s)
 {
-    gen_exception_insn(s, 4, EXCP_UDEF);
+    /* Unallocated and reserved encodings are uncategorized */
+    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized());
 }
 
 #define unsupported_encoding(s, insn)                                    \
@@ -332,11 +353,30 @@
     return v;
 }
 
+/* We should have at some point before trying to access an FP register
+ * done the necessary access check, so assert that
+ * (a) we did the check and
+ * (b) we didn't then just plough ahead anyway if it failed.
+ * Print the instruction pattern in the abort message so we can figure
+ * out what we need to fix if a user encounters this problem in the wild.
+ */
+static inline void assert_fp_access_checked(DisasContext *s)
+{
+#ifdef CONFIG_DEBUG_TCG
+    if (unlikely(!s->fp_access_checked || !s->cpacr_fpen)) {
+        fprintf(stderr, "target-arm: FP access check missing for "
+                "instruction 0x%08x\n", s->insn);
+        abort();
+    }
+#endif
+}
+
 /* Return the offset into CPUARMState of an element of specified
  * size, 'element' places in from the least significant end of
  * the FP/vector register Qn.
  */
-static inline int vec_reg_offset(int regno, int element, TCGMemOp size)
+static inline int vec_reg_offset(DisasContext *s, int regno,
+                                 int element, TCGMemOp size)
 {
     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 #ifdef HOST_WORDS_BIGENDIAN
@@ -351,6 +391,7 @@
 #else
     offs += element * (1 << size);
 #endif
+    assert_fp_access_checked(s);
     return offs;
 }
 
@@ -359,18 +400,20 @@
  * Dn, Sn, Hn or Bn).
  * (Note that this is not the same mapping as for A32; see cpu.h)
  */
-static inline int fp_reg_offset(int regno, TCGMemOp size)
+static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 {
     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 #ifdef HOST_WORDS_BIGENDIAN
     offs += (8 - (1 << size));
 #endif
+    assert_fp_access_checked(s);
     return offs;
 }
 
 /* Offset of the high half of the 128 bit vector Qn */
-static inline int fp_reg_hi_offset(int regno)
+static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 {
+    assert_fp_access_checked(s);
     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 }
 
@@ -384,7 +427,7 @@
 {
     TCGv_i64 v = tcg_temp_new_i64();
 
-    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
+    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
     return v;
 }
 
@@ -392,7 +435,7 @@
 {
     TCGv_i32 v = tcg_temp_new_i32();
 
-    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(reg, MO_32));
+    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
     return v;
 }
 
@@ -400,8 +443,8 @@
 {
     TCGv_i64 tcg_zero = tcg_const_i64(0);
 
-    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
-    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(reg));
+    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
+    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
     tcg_temp_free_i64(tcg_zero);
 }
 
@@ -672,14 +715,14 @@
 {
     /* This writes the bottom N bits of a 128 bit wide vector to memory */
     TCGv_i64 tmp = tcg_temp_new_i64();
-    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(srcidx, MO_64));
+    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
     if (size < 4) {
         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
     } else {
         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
         tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
-        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(srcidx));
+        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
         tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
         tcg_temp_free_i64(tcg_hiaddr);
@@ -712,8 +755,8 @@
         tcg_temp_free_i64(tcg_hiaddr);
     }
 
-    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(destidx, MO_64));
-    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(destidx));
+    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
+    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 
     tcg_temp_free_i64(tmplo);
     tcg_temp_free_i64(tmphi);
@@ -735,7 +778,7 @@
 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
                              int element, TCGMemOp memop)
 {
-    int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
+    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
     switch (memop) {
     case MO_8:
         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
@@ -767,7 +810,7 @@
 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
                                  int element, TCGMemOp memop)
 {
-    int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
+    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
     switch (memop) {
     case MO_8:
         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
@@ -794,7 +837,7 @@
 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
                               int element, TCGMemOp memop)
 {
-    int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
+    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
     switch (memop) {
     case MO_8:
         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
@@ -816,7 +859,7 @@
 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
                                   int destidx, int element, TCGMemOp memop)
 {
-    int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
+    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
     switch (memop) {
     case MO_8:
         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
@@ -869,6 +912,26 @@
     tcg_temp_free_i64(tcg_tmp);
 }
 
+/* Check that FP/Neon access is enabled. If it is, return
+ * true. If not, emit code to generate an appropriate exception,
+ * and return false; the caller should not emit any code for
+ * the instruction. Note that this check must happen after all
+ * unallocated-encoding checks (otherwise the syndrome information
+ * for the resulting exception will be incorrect).
+ */
+static inline bool fp_access_check(DisasContext *s)
+{
+    assert(!s->fp_access_checked);
+    s->fp_access_checked = true;
+
+    if (s->cpacr_fpen) {
+        return true;
+    }
+
+    gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false));
+    return false;
+}
+
 /*
  * This utility function is for doing register extension with an
  * optional shift. You will likely want to pass a temporary for the
@@ -1241,10 +1304,16 @@
          * runtime; this may result in an exception.
          */
         TCGv_ptr tmpptr;
+        TCGv_i32 tcg_syn;
+        uint32_t syndrome;
+
         gen_a64_set_pc_im(s->pc - 4);
         tmpptr = tcg_const_ptr(ri);
-        gen_helper_access_check_cp_reg(cpu_env, tmpptr);
+        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
+        tcg_syn = tcg_const_i32(syndrome);
+        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
         tcg_temp_free_ptr(tmpptr);
+        tcg_temp_free_i32(tcg_syn);
     }
 
     /* Handle special cases first */
@@ -1266,6 +1335,11 @@
         tcg_rt = cpu_reg(s, rt);
         tcg_gen_movi_i64(tcg_rt, s->current_pl << 2);
         return;
+    case ARM_CP_DC_ZVA:
+        /* Writes clear the aligned block of memory which rt points into. */
+        tcg_rt = cpu_reg(s, rt);
+        gen_helper_dc_zva(cpu_env, tcg_rt);
+        return;
     default:
         break;
     }
@@ -1366,6 +1440,7 @@
 {
     int opc = extract32(insn, 21, 3);
     int op2_ll = extract32(insn, 0, 5);
+    int imm16 = extract32(insn, 5, 16);
 
     switch (opc) {
     case 0:
@@ -1376,7 +1451,7 @@
             unallocated_encoding(s);
             break;
         }
-        gen_exception_insn(s, 0, EXCP_SWI);
+        gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16));
         break;
     case 1:
         if (op2_ll != 0) {
@@ -1384,7 +1459,7 @@
             break;
         }
         /* BRK */
-        gen_exception_insn(s, 0, EXCP_BKPT);
+        gen_exception_insn(s, 0, EXCP_BKPT, syn_aa64_bkpt(imm16));
         break;
     case 2:
         if (op2_ll != 0) {
@@ -1437,6 +1512,9 @@
         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
         break;
     case 4: /* ERET */
+        gen_helper_exception_return(cpu_env);
+        s->is_jmp = DISAS_JUMP;
+        return;
     case 5: /* DRPS */
         if (rn != 0x1f) {
             unallocated_encoding(s);
@@ -1533,7 +1611,7 @@
     tcg_gen_mov_i64(cpu_exclusive_test, addr);
     tcg_gen_movi_i32(cpu_exclusive_info,
                      size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
-    gen_exception_insn(s, 4, EXCP_STREX);
+    gen_exception_internal_insn(s, 4, EXCP_STREX);
 }
 #else
 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
@@ -1700,6 +1778,9 @@
             return;
         }
         size = 2 + opc;
+        if (!fp_access_check(s)) {
+            return;
+        }
     } else {
         if (opc == 3) {
             /* PRFM (literal) : prefetch */
@@ -1809,6 +1890,10 @@
         break;
     }
 
+    if (is_vector && !fp_access_check(s)) {
+        return;
+    }
+
     offset <<= size;
 
     if (rn == 31) {
@@ -1902,6 +1987,9 @@
             return;
         }
         is_store = ((opc & 1) == 0);
+        if (!fp_access_check(s)) {
+            return;
+        }
     } else {
         if (size == 3 && opc == 2) {
             /* PRFM - prefetch */
@@ -2022,6 +2110,9 @@
             return;
         }
         is_store = !extract32(opc, 0, 1);
+        if (!fp_access_check(s)) {
+            return;
+        }
     } else {
         if (size == 3 && opc == 2) {
             /* PRFM - prefetch */
@@ -2102,6 +2193,9 @@
             return;
         }
         is_store = !extract32(opc, 0, 1);
+        if (!fp_access_check(s)) {
+            return;
+        }
     } else {
         if (size == 3 && opc == 2) {
             /* PRFM - prefetch */
@@ -2244,6 +2338,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (rn == 31) {
         gen_check_sp_alignment(s);
     }
@@ -2370,6 +2468,10 @@
         g_assert_not_reached();
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     ebytes = 1 << scale;
 
     if (rn == 31) {
@@ -3846,6 +3948,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
 }
 
@@ -3874,6 +3980,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (cond < 0x0e) { /* not always */
         int label_match = gen_new_label();
         label_continue = gen_new_label();
@@ -3930,6 +4040,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (cond < 0x0e) { /* not always */
         int label_match = gen_new_label();
         label_continue = gen_new_label();
@@ -4147,6 +4261,10 @@
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
         break;
     }
@@ -4156,9 +4274,17 @@
         /* 32-to-32 and 64-to-64 ops */
         switch (type) {
         case 0:
+            if (!fp_access_check(s)) {
+                return;
+            }
+
             handle_fp_1src_single(s, opcode, rd, rn);
             break;
         case 1:
+            if (!fp_access_check(s)) {
+                return;
+            }
+
             handle_fp_1src_double(s, opcode, rd, rn);
             break;
         default:
@@ -4298,9 +4424,15 @@
 
     switch (type) {
     case 0:
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_fp_2src_single(s, opcode, rd, rn, rm);
         break;
     case 1:
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_fp_2src_double(s, opcode, rd, rn, rm);
         break;
     default:
@@ -4402,9 +4534,15 @@
 
     switch (type) {
     case 0:
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
         break;
     case 1:
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
         break;
     default:
@@ -4431,6 +4569,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* The imm8 encodes the sign bit, enough bits to represent
      * an exponent in the range 01....1xx to 10....0xx,
      * and the most significant 4 bits of the mantissa; see
@@ -4617,6 +4759,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
 }
 
@@ -4635,9 +4781,9 @@
             /* 32 bit */
             TCGv_i64 tmp = tcg_temp_new_i64();
             tcg_gen_ext32u_i64(tmp, tcg_rn);
-            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(rd, MO_64));
+            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
             tcg_gen_movi_i64(tmp, 0);
-            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
+            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
             tcg_temp_free_i64(tmp);
             break;
         }
@@ -4645,14 +4791,14 @@
         {
             /* 64 bit */
             TCGv_i64 tmp = tcg_const_i64(0);
-            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(rd, MO_64));
-            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
+            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
+            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
             tcg_temp_free_i64(tmp);
             break;
         }
         case 2:
             /* 64 bit to top half. */
-            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(rd));
+            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
             break;
         }
     } else {
@@ -4661,15 +4807,15 @@
         switch (type) {
         case 0:
             /* 32 bit */
-            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_32));
+            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
             break;
         case 1:
             /* 64 bit */
-            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_64));
+            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
             break;
         case 2:
             /* 64 bits from top half */
-            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(rn));
+            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
             break;
         }
     }
@@ -4716,6 +4862,9 @@
             break;
         }
 
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_fmov(s, rd, rn, type, itof);
     } else {
         /* actual FP conversions */
@@ -4726,6 +4875,9 @@
             return;
         }
 
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
     }
 }
@@ -4826,6 +4978,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_resh = tcg_temp_new_i64();
     tcg_resl = tcg_temp_new_i64();
 
@@ -4896,6 +5052,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* This does a table lookup: for every byte element in the input
      * we index into a table formed from up to four vector registers,
      * and then the output is the result of the lookups. Our helper
@@ -4966,6 +5126,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_resl = tcg_const_i64(0);
     tcg_resh = tcg_const_i64(0);
     tcg_res = tcg_temp_new_i64();
@@ -5099,6 +5263,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     esize = 8 << size;
     elements = (is_q ? 128 : 64) / esize;
 
@@ -5231,6 +5399,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     index = imm5 >> (size + 1);
 
     tmp = tcg_temp_new_i64();
@@ -5265,6 +5437,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     index = imm5 >> (size + 1);
 
     /* This instruction just extracts the specified element and
@@ -5297,6 +5473,11 @@
         unallocated_encoding(s);
         return;
     }
+
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     for (i = 0; i < elements; i++) {
         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
     }
@@ -5326,6 +5507,11 @@
         unallocated_encoding(s);
         return;
     }
+
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     dst_index = extract32(imm5, 1+size, 5);
     src_index = extract32(imm4, size, 4);
 
@@ -5358,6 +5544,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     idx = extract32(imm5, 1 + size, 4 - size);
     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
 }
@@ -5395,6 +5585,11 @@
             return;
         }
     }
+
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     element = extract32(imm5, 1+size, 4);
 
     tcg_rd = cpu_reg(s, rd);
@@ -5487,6 +5682,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* See AdvSIMDExpandImm() in ARM ARM */
     switch (cmode_3_1) {
     case 0: /* Replicate(Zeros(24):imm8, 2) */
@@ -5561,7 +5760,7 @@
     tcg_rd = new_tmp_a64(s);
 
     for (i = 0; i < 2; i++) {
-        int foffs = i ? fp_reg_hi_offset(rd) : fp_reg_offset(rd, MO_64);
+        int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
 
         if (i == 1 && !is_q) {
             /* non-quad ops clear high half of vector */
@@ -5635,6 +5834,10 @@
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         TCGV_UNUSED_PTR(fpst);
         break;
     case 0xc: /* FMAXNMP */
@@ -5647,6 +5850,10 @@
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         size = extract32(size, 0, 1) ? 3 : 2;
         fpst = get_fpstatus_ptr();
         break;
@@ -5865,6 +6072,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     switch (opcode) {
     case 0x02: /* SSRA / USRA (accumulate) */
         accumulate = true;
@@ -5922,6 +6133,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_rn = read_fp_dreg(s, rn);
     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
 
@@ -5977,6 +6192,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (is_u_shift) {
         narrowfn = unsigned_narrow_fns[size];
     } else {
@@ -6059,6 +6278,10 @@
         }
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (size == 3) {
         TCGv_i64 tcg_shift = tcg_const_i64(shift);
         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
@@ -6219,6 +6442,11 @@
             return;
         }
     }
+
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* immh == 0 would be a failure of the decode logic */
     g_assert(immh);
 
@@ -6247,6 +6475,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     assert(!(is_scalar && is_q));
 
     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
@@ -6410,6 +6642,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (size == 2) {
         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
@@ -6794,6 +7030,10 @@
             return;
         }
 
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
         return;
     }
@@ -6826,6 +7066,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_rd = tcg_temp_new_i64();
 
     if (size == 3) {
@@ -7029,7 +7273,13 @@
                                    int size, int rn, int rd)
 {
     bool is_double = (size == 3);
-    TCGv_ptr fpst = get_fpstatus_ptr();
+    TCGv_ptr fpst;
+
+    if (!fp_access_check(s)) {
+        return;
+    }
+
+    fpst = get_fpstatus_ptr();
 
     if (is_double) {
         TCGv_i64 tcg_op = tcg_temp_new_i64();
@@ -7436,6 +7686,9 @@
 
     switch (opcode) {
     case 0x3: /* USQADD / SUQADD*/
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_2misc_satacc(s, true, u, false, size, rn, rd);
         return;
     case 0x7: /* SQABS / SQNEG */
@@ -7455,7 +7708,7 @@
         }
         break;
     case 0x12: /* SQXTUN */
-        if (u) {
+        if (!u) {
             unallocated_encoding(s);
             return;
         }
@@ -7465,6 +7718,9 @@
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
         return;
     case 0xc ... 0xf:
@@ -7487,12 +7743,18 @@
         case 0x5d: /* UCVTF */
         {
             bool is_signed = (opcode == 0x1d);
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
             return;
         }
         case 0x3d: /* FRECPE */
         case 0x3f: /* FRECPX */
         case 0x7d: /* FRSQRTE */
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
             return;
         case 0x1a: /* FCVTNS */
@@ -7517,6 +7779,9 @@
                 unallocated_encoding(s);
                 return;
             }
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
             return;
         default:
@@ -7529,6 +7794,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (is_fcvt) {
         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
@@ -7632,6 +7901,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     switch (opcode) {
     case 0x02: /* SSRA / USRA (accumulate) */
         accumulate = true;
@@ -7703,6 +7976,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     for (i = 0; i < elements; i++) {
         read_vec_element(s, tcg_rn, rn, i, size);
         if (insert) {
@@ -7738,6 +8015,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* For the LL variants the store is larger than the load,
      * so if rd == rn we would overwrite parts of our input.
      * So load everything right now and use shifts in the main loop.
@@ -7772,6 +8053,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_rn = tcg_temp_new_i64();
     tcg_rd = tcg_temp_new_i64();
     tcg_final = tcg_temp_new_i64();
@@ -8268,6 +8553,9 @@
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
         break;
     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
@@ -8277,6 +8565,9 @@
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
         break;
     case 14: /* PMULL, PMULL2 */
@@ -8289,6 +8580,9 @@
                 unallocated_encoding(s);
                 return;
             }
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_pmull_64(s, is_q, rd, rn, rm);
             return;
         }
@@ -8314,6 +8608,10 @@
             return;
         }
     is_widening:
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
         break;
     default:
@@ -8332,11 +8630,15 @@
     int size = extract32(insn, 22, 2);
     bool is_u = extract32(insn, 29, 1);
     bool is_q = extract32(insn, 30, 1);
-    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
-    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
-    TCGv_i64 tcg_res[2];
+    TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
     int pass;
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
+    tcg_op1 = tcg_temp_new_i64();
+    tcg_op2 = tcg_temp_new_i64();
     tcg_res[0] = tcg_temp_new_i64();
     tcg_res[1] = tcg_temp_new_i64();
 
@@ -8439,6 +8741,10 @@
         TCGV_UNUSED_PTR(fpst);
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* These operations work on the concatenated rm:rn, with each pair of
      * adjacent elements being operated on to produce an element in the result.
      */
@@ -8631,6 +8937,10 @@
     case 0x5f: /* FDIV */
     case 0x7a: /* FABD */
     case 0x7c: /* FCMGT */
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
         return;
     default:
@@ -8685,6 +8995,10 @@
         break;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (size == 3) {
         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
@@ -9049,6 +9363,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (size == 0) {
         /* Special case bytes, use bswap op on each group of elements */
         int groups = dsize / (8 << grp_size);
@@ -9251,6 +9569,10 @@
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
         return;
     case 0x4: /* CLS, CLZ */
@@ -9265,6 +9587,9 @@
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
         return;
     case 0x13: /* SHLL, SHLL2 */
@@ -9272,6 +9597,9 @@
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_shll(s, is_q, size, rn, rd);
         return;
     case 0xa: /* CMLT */
@@ -9293,6 +9621,9 @@
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
         return;
     case 0x7: /* SQABS, SQNEG */
@@ -9328,6 +9659,9 @@
                 unallocated_encoding(s);
                 return;
             }
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
             return;
         }
@@ -9386,6 +9720,9 @@
                 unallocated_encoding(s);
                 return;
             }
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
             return;
         case 0x56: /* FCVTXN, FCVTXN2 */
@@ -9398,9 +9735,15 @@
             /* handle_2misc_narrow does a 2*size -> size operation, but these
              * instructions encode the source size rather than dest size.
              */
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
             return;
         case 0x17: /* FCVTL, FCVTL2 */
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
             return;
         case 0x18: /* FRINTN */
@@ -9445,6 +9788,10 @@
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (need_fpstatus) {
         tcg_fpstatus = get_fpstatus_ptr();
     } else {
@@ -9808,6 +10155,10 @@
         }
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (is_fp) {
         fpst = get_fpstatus_ptr();
     } else {
@@ -10239,6 +10590,8 @@
     s->insn = insn;
     s->pc += 4;
 
+    s->fp_access_checked = false;
+
     switch (extract32(insn, 25, 4)) {
     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
         unallocated_encoding(s);
@@ -10306,7 +10659,7 @@
 #if !defined(CONFIG_USER_ONLY)
     dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0);
 #endif
-    dc->vfp_enabled = 0;
+    dc->cpacr_fpen = ARM_TBFLAG_AA64_FPEN(tb->flags);
     dc->vec_len = 0;
     dc->vec_stride = 0;
     dc->cp_regs = cpu->cp_regs;
@@ -10331,7 +10684,7 @@
         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
                 if (bp->pc == dc->pc) {
-                    gen_exception_insn(dc, 0, EXCP_DEBUG);
+                    gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
                     /* Advance PC so that clearing the breakpoint will
                        invalidate this TB.  */
                     dc->pc += 2;
@@ -10394,7 +10747,7 @@
         if (dc->is_jmp != DISAS_JUMP) {
             gen_a64_set_pc_im(dc->pc);
         }
-        gen_exception(EXCP_DEBUG);
+        gen_exception_internal(EXCP_DEBUG);
     } else {
         switch (dc->is_jmp) {
         case DISAS_NEXT:
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 56e3b4b..a4d920b 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -25,6 +25,7 @@
 #include <inttypes.h>
 
 #include "cpu.h"
+#include "internals.h"
 #include "disas/disas.h"
 #include "tcg-op.h"
 #include "qemu/log.h"
@@ -182,12 +183,23 @@
 /* Set NZCV flags from the high 4 bits of var.  */
 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 
-static void gen_exception(int excp)
+static void gen_exception_internal(int excp)
 {
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    tcg_gen_movi_i32(tmp, excp);
-    gen_helper_exception(cpu_env, tmp);
-    tcg_temp_free_i32(tmp);
+    TCGv_i32 tcg_excp = tcg_const_i32(excp);
+
+    assert(excp_is_internal(excp));
+    gen_helper_exception_internal(cpu_env, tcg_excp);
+    tcg_temp_free_i32(tcg_excp);
+}
+
+static void gen_exception(int excp, uint32_t syndrome)
+{
+    TCGv_i32 tcg_excp = tcg_const_i32(excp);
+    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
+
+    gen_helper_exception_with_syndrome(cpu_env, tcg_excp, tcg_syn);
+    tcg_temp_free_i32(tcg_syn);
+    tcg_temp_free_i32(tcg_excp);
 }
 
 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
@@ -899,6 +911,33 @@
     tcg_gen_movi_i32(cpu_R[15], val);
 }
 
+static inline void
+gen_set_condexec (DisasContext *s)
+{
+    if (s->condexec_mask) {
+        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
+        TCGv_i32 tmp = tcg_temp_new_i32();
+        tcg_gen_movi_i32(tmp, val);
+        store_cpu_field(tmp, condexec_bits);
+    }
+}
+
+static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
+{
+    gen_set_condexec(s);
+    gen_set_pc_im(s, s->pc - offset);
+    gen_exception_internal(excp);
+    s->is_jmp = DISAS_JUMP;
+}
+
+static void gen_exception_insn(DisasContext *s, int offset, int excp, int syn)
+{
+    gen_set_condexec(s);
+    gen_set_pc_im(s, s->pc - offset);
+    gen_exception(excp, syn);
+    s->is_jmp = DISAS_JUMP;
+}
+
 /* Force a TB lookup after an instruction that changes the CPU state.  */
 static inline void gen_lookup_tb(DisasContext *s)
 {
@@ -2913,14 +2952,25 @@
     if (!arm_feature(env, ARM_FEATURE_VFP))
         return 1;
 
+    /* FIXME: this access check should not take precedence over UNDEF
+     * for invalid encodings; we will generate incorrect syndrome information
+     * for attempts to execute invalid vfp/neon encodings with FP disabled.
+     */
+    if (!s->cpacr_fpen) {
+        gen_exception_insn(s, 4, EXCP_UDEF,
+                           syn_fp_access_trap(1, 0xe, s->thumb));
+        return 0;
+    }
+
     if (!s->vfp_enabled) {
         /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
         if ((insn & 0x0fe00fff) != 0x0ee00a10)
             return 1;
         rn = (insn >> 16) & 0xf;
-        if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC
-            && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0)
+        if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC && rn != ARM_VFP_MVFR2
+            && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0) {
             return 1;
+        }
     }
 
     if (extract32(insn, 28, 4) == 0xf) {
@@ -3066,6 +3116,11 @@
                                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
                             }
                             break;
+                        case ARM_VFP_MVFR2:
+                            if (!arm_feature(env, ARM_FEATURE_V8)) {
+                                return 1;
+                            }
+                            /* fall through */
                         case ARM_VFP_MVFR0:
                         case ARM_VFP_MVFR1:
                             if (IS_USER(s)
@@ -3912,25 +3967,6 @@
     s->is_jmp = DISAS_UPDATE;
 }
 
-static inline void
-gen_set_condexec (DisasContext *s)
-{
-    if (s->condexec_mask) {
-        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
-        TCGv_i32 tmp = tcg_temp_new_i32();
-        tcg_gen_movi_i32(tmp, val);
-        store_cpu_field(tmp, condexec_bits);
-    }
-}
-
-static void gen_exception_insn(DisasContext *s, int offset, int excp)
-{
-    gen_set_condexec(s);
-    gen_set_pc_im(s, s->pc - offset);
-    gen_exception(excp);
-    s->is_jmp = DISAS_JUMP;
-}
-
 static void gen_nop_hint(DisasContext *s, int val)
 {
     switch (val) {
@@ -4212,6 +4248,16 @@
     TCGv_i32 tmp2;
     TCGv_i64 tmp64;
 
+    /* FIXME: this access check should not take precedence over UNDEF
+     * for invalid encodings; we will generate incorrect syndrome information
+     * for attempts to execute invalid vfp/neon encodings with FP disabled.
+     */
+    if (!s->cpacr_fpen) {
+        gen_exception_insn(s, 4, EXCP_UDEF,
+                           syn_fp_access_trap(1, 0xe, s->thumb));
+        return 0;
+    }
+
     if (!s->vfp_enabled)
       return 1;
     VFP_DREG_D(rd, insn);
@@ -4934,6 +4980,16 @@
     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
     TCGv_i64 tmp64;
 
+    /* FIXME: this access check should not take precedence over UNDEF
+     * for invalid encodings; we will generate incorrect syndrome information
+     * for attempts to execute invalid vfp/neon encodings with FP disabled.
+     */
+    if (!s->cpacr_fpen) {
+        gen_exception_insn(s, 4, EXCP_UDEF,
+                           syn_fp_access_trap(1, 0xe, s->thumb));
+        return 0;
+    }
+
     if (!s->vfp_enabled)
       return 1;
     q = (insn & (1 << 6)) != 0;
@@ -6861,10 +6917,53 @@
              * runtime; this may result in an exception.
              */
             TCGv_ptr tmpptr;
+            TCGv_i32 tcg_syn;
+            uint32_t syndrome;
+
+            /* Note that since we are an implementation which takes an
+             * exception on a trapped conditional instruction only if the
+             * instruction passes its condition code check, we can take
+             * advantage of the clause in the ARM ARM that allows us to set
+             * the COND field in the instruction to 0xE in all cases.
+             * We could fish the actual condition out of the insn (ARM)
+             * or the condexec bits (Thumb) but it isn't necessary.
+             */
+            switch (cpnum) {
+            case 14:
+                if (is64) {
+                    syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
+                                                 isread, s->thumb);
+                } else {
+                    syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
+                                                rt, isread, s->thumb);
+                }
+                break;
+            case 15:
+                if (is64) {
+                    syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
+                                                 isread, s->thumb);
+                } else {
+                    syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
+                                                rt, isread, s->thumb);
+                }
+                break;
+            default:
+                /* ARMv8 defines that only coprocessors 14 and 15 exist,
+                 * so this can only happen if this is an ARMv7 or earlier CPU,
+                 * in which case the syndrome information won't actually be
+                 * guest visible.
+                 */
+                assert(!arm_feature(env, ARM_FEATURE_V8));
+                syndrome = syn_uncategorized();
+                break;
+            }
+
             gen_set_pc_im(s, s->pc);
             tmpptr = tcg_const_ptr(ri);
-            gen_helper_access_check_cp_reg(cpu_env, tmpptr);
+            tcg_syn = tcg_const_i32(syndrome);
+            gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
             tcg_temp_free_ptr(tmpptr);
+            tcg_temp_free_i32(tcg_syn);
         }
 
         /* Handle special cases first */
@@ -7116,7 +7215,7 @@
     tcg_gen_extu_i32_i64(cpu_exclusive_test, addr);
     tcg_gen_movi_i32(cpu_exclusive_info,
                      size | (rd << 4) | (rt << 8) | (rt2 << 12));
-    gen_exception_insn(s, 4, EXCP_STREX);
+    gen_exception_internal_insn(s, 4, EXCP_STREX);
 }
 #else
 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
@@ -7626,6 +7725,8 @@
             store_reg(s, rd, tmp);
             break;
         case 7:
+        {
+            int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
             /* SMC instruction (op1 == 3)
                and undefined instructions (op1 == 0 || op1 == 2)
                will trap */
@@ -7634,8 +7735,9 @@
             }
             /* bkpt */
             ARCH(5);
-            gen_exception_insn(s, 4, EXCP_BKPT);
+            gen_exception_insn(s, 4, EXCP_BKPT, syn_aa32_bkpt(imm16, false));
             break;
+        }
         case 0x8: /* signed multiply */
         case 0xa:
         case 0xc:
@@ -8328,27 +8430,39 @@
                         if (insn & (1 << 5))
                             gen_swap_half(tmp2);
                         gen_smul_dual(tmp, tmp2);
-                        if (insn & (1 << 6)) {
-                            /* This subtraction cannot overflow. */
-                            tcg_gen_sub_i32(tmp, tmp, tmp2);
-                        } else {
-                            /* This addition cannot overflow 32 bits;
-                             * however it may overflow considered as a signed
-                             * operation, in which case we must set the Q flag.
-                             */
-                            gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
-                        }
-                        tcg_temp_free_i32(tmp2);
                         if (insn & (1 << 22)) {
                             /* smlald, smlsld */
+                            TCGv_i64 tmp64_2;
+
                             tmp64 = tcg_temp_new_i64();
+                            tmp64_2 = tcg_temp_new_i64();
                             tcg_gen_ext_i32_i64(tmp64, tmp);
+                            tcg_gen_ext_i32_i64(tmp64_2, tmp2);
                             tcg_temp_free_i32(tmp);
+                            tcg_temp_free_i32(tmp2);
+                            if (insn & (1 << 6)) {
+                                tcg_gen_sub_i64(tmp64, tmp64, tmp64_2);
+                            } else {
+                                tcg_gen_add_i64(tmp64, tmp64, tmp64_2);
+                            }
+                            tcg_temp_free_i64(tmp64_2);
                             gen_addq(s, tmp64, rd, rn);
                             gen_storeq_reg(s, rd, rn, tmp64);
                             tcg_temp_free_i64(tmp64);
                         } else {
                             /* smuad, smusd, smlad, smlsd */
+                            if (insn & (1 << 6)) {
+                                /* This subtraction cannot overflow. */
+                                tcg_gen_sub_i32(tmp, tmp, tmp2);
+                            } else {
+                                /* This addition cannot overflow 32 bits;
+                                 * however it may overflow considered as a
+                                 * signed operation, in which case we must set
+                                 * the Q flag.
+                                 */
+                                gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
+                            }
+                            tcg_temp_free_i32(tmp2);
                             if (rd != 15)
                               {
                                 tmp2 = load_reg(s, rd);
@@ -8642,11 +8756,12 @@
         case 0xf:
             /* swi */
             gen_set_pc_im(s, s->pc);
+            s->svc_imm = extract32(insn, 0, 24);
             s->is_jmp = DISAS_SWI;
             break;
         default:
         illegal_op:
-            gen_exception_insn(s, 4, EXCP_UDEF);
+            gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized());
             break;
         }
     }
@@ -10457,9 +10572,12 @@
             break;
 
         case 0xe: /* bkpt */
+        {
+            int imm8 = extract32(insn, 0, 8);
             ARCH(5);
-            gen_exception_insn(s, 2, EXCP_BKPT);
+            gen_exception_insn(s, 2, EXCP_BKPT, syn_aa32_bkpt(imm8, true));
             break;
+        }
 
         case 0xa: /* rev */
             ARCH(6);
@@ -10576,6 +10694,7 @@
         if (cond == 0xf) {
             /* swi */
             gen_set_pc_im(s, s->pc);
+            s->svc_imm = extract32(insn, 0, 8);
             s->is_jmp = DISAS_SWI;
             break;
         }
@@ -10611,11 +10730,11 @@
     }
     return;
 undef32:
-    gen_exception_insn(s, 4, EXCP_UDEF);
+    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized());
     return;
 illegal_op:
 undef:
-    gen_exception_insn(s, 2, EXCP_UDEF);
+    gen_exception_insn(s, 2, EXCP_UDEF, syn_uncategorized());
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
@@ -10665,6 +10784,7 @@
 #if !defined(CONFIG_USER_ONLY)
     dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0);
 #endif
+    dc->cpacr_fpen = ARM_TBFLAG_CPACR_FPEN(tb->flags);
     dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
     dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
     dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
@@ -10736,7 +10856,7 @@
         if (dc->pc >= 0xffff0000) {
             /* We always get here via a jump, so know we are not in a
                conditional execution block.  */
-            gen_exception(EXCP_KERNEL_TRAP);
+            gen_exception_internal(EXCP_KERNEL_TRAP);
             dc->is_jmp = DISAS_UPDATE;
             break;
         }
@@ -10744,7 +10864,7 @@
         if (dc->pc >= 0xfffffff0 && IS_M(env)) {
             /* We always get here via a jump, so know we are not in a
                conditional execution block.  */
-            gen_exception(EXCP_EXCEPTION_EXIT);
+            gen_exception_internal(EXCP_EXCEPTION_EXIT);
             dc->is_jmp = DISAS_UPDATE;
             break;
         }
@@ -10753,7 +10873,7 @@
         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
                 if (bp->pc == dc->pc) {
-                    gen_exception_insn(dc, 0, EXCP_DEBUG);
+                    gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
                     /* Advance PC so that clearing the breakpoint will
                        invalidate this TB.  */
                     dc->pc += 2;
@@ -10833,9 +10953,9 @@
         if (dc->condjmp) {
             gen_set_condexec(dc);
             if (dc->is_jmp == DISAS_SWI) {
-                gen_exception(EXCP_SWI);
+                gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
             } else {
-                gen_exception(EXCP_DEBUG);
+                gen_exception_internal(EXCP_DEBUG);
             }
             gen_set_label(dc->condlabel);
         }
@@ -10845,11 +10965,11 @@
         }
         gen_set_condexec(dc);
         if (dc->is_jmp == DISAS_SWI && !dc->condjmp) {
-            gen_exception(EXCP_SWI);
+            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
         } else {
             /* FIXME: Single stepping a WFI insn will not halt
                the CPU.  */
-            gen_exception(EXCP_DEBUG);
+            gen_exception_internal(EXCP_DEBUG);
         }
     } else {
         /* While branches must always occur at the end of an IT block,
@@ -10881,7 +11001,7 @@
             gen_helper_wfe(cpu_env);
             break;
         case DISAS_SWI:
-            gen_exception(EXCP_SWI);
+            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
             break;
         }
         if (dc->condjmp) {
@@ -10939,6 +11059,11 @@
     int i;
     uint32_t psr;
 
+    if (is_a64(env)) {
+        aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags);
+        return;
+    }
+
     for(i=0;i<16;i++) {
         cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
         if ((i % 4) == 3)
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 3525ffc..34328f4 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -20,13 +20,26 @@
 #if !defined(CONFIG_USER_ONLY)
     int user;
 #endif
-    int vfp_enabled;
+    bool cpacr_fpen; /* FP enabled via CPACR.FPEN */
+    bool vfp_enabled; /* FP enabled via FPSCR.EN */
     int vec_len;
     int vec_stride;
+    /* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI
+     * so that top level loop can generate correct syndrome information.
+     */
+    uint32_t svc_imm;
     int aarch64;
     int current_pl;
     GHashTable *cp_regs;
     uint64_t features; /* CPU features bits */
+    /* Because unallocated encodings generate different exception syndrome
+     * information from traps due to FP being disabled, we can't do a single
+     * "is fp access disabled" check at a high level in the decode tree.
+     * To help in catching bugs where the access check was forgotten in some
+     * code path, we set this flag when the access check is done, and assert
+     * that it is set at the point where we actually touch the FP regs.
+     */
+    bool fp_access_checked;
 #define TMP_A64_MAX 16
     int tmp_a64_count;
     TCGv_i64 tmp_a64[TMP_A64_MAX];
@@ -59,6 +72,8 @@
                                         TranslationBlock *tb,
                                         bool search_pc);
 void gen_a64_set_pc_im(uint64_t val);
+void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
+                            fprintf_function cpu_fprintf, int flags);
 #else
 static inline void a64_translate_init(void)
 {
@@ -73,6 +88,12 @@
 static inline void gen_a64_set_pc_im(uint64_t val)
 {
 }
+
+static inline void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
+                                          fprintf_function cpu_fprintf,
+                                          int flags)
+{
+}
 #endif
 
 void arm_gen_test_cc(int cc, int label);
diff --git a/vl.c b/vl.c
index 9975e5a..db9ea90 100644
--- a/vl.c
+++ b/vl.c
@@ -2740,7 +2740,7 @@
                 if (!accel_list[i].available()) {
                     printf("%s not supported for this target\n",
                            accel_list[i].name);
-                    continue;
+                    break;
                 }
                 *(accel_list[i].allowed) = true;
                 ret = accel_list[i].init(machine);
diff --git a/xbzrle.c b/xbzrle.c
index fbcb35d..8e220bf 100644
--- a/xbzrle.c
+++ b/xbzrle.c
@@ -28,7 +28,7 @@
 {
     uint32_t zrun_len = 0, nzrun_len = 0;
     int d = 0, i = 0;
-    long res, xor;
+    long res;
     uint8_t *nzrun_start = NULL;
 
     g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) %
@@ -93,9 +93,11 @@
         /* word at a time for speed, use of 32-bit long okay */
         if (!res) {
             /* truncation to 32-bit long okay */
-            long mask = (long)0x0101010101010101ULL;
+            unsigned long mask = (unsigned long)0x0101010101010101ULL;
             while (i < slen) {
-                xor = *(long *)(old_buf + i) ^ *(long *)(new_buf + i);
+                unsigned long xor;
+                xor = *(unsigned long *)(old_buf + i)
+                    ^ *(unsigned long *)(new_buf + i);
                 if ((xor - mask) & ~xor & (mask << 7)) {
                     /* found the end of an nzrun within the current long */
                     while (old_buf[i] != new_buf[i]) {