Merge remote-tracking branch 'remotes/borntraeger/tags/kvm-s390-20140227' into staging

Several features, fixes and cleanups for kvm/s390:

- sclp event facility: cleanup structure. This allows to use
  realize/unrealize   as well as migration support via vmsd
- reboot: Two fixes that make reboot much more reliable
- ipl: make elf loading more robust
- flic interrupt controller: This allows to migrate floating
  interrupts, as well as clear them on reset etc.
- enable async_pf feature of KVM on s390
- several sclp fixes and cleanups
- several sigp fixes and cleanups

* remotes/borntraeger/tags/kvm-s390-20140227: (22 commits)
  s390x/ipl: Fix crash of ELF images with arbitrary entry points
  s390x/kvm: Rework priv instruction handlers
  s390x/kvm: Add missing SIGP CPU RESET order
  s390x/kvm: Rework SIGP INITIAL CPU RESET handler
  s390x/cpu: Use ioctl to reset state in the kernel
  s390-ccw.img: new binary rom to match latest fixes
  s390-ccw.img: Fix sporadic errors with ccw boot image - initialize css
  s390-ccw.img: Fix sporadic reboot hangs: Initialize next_idx
  s390x/event-facility: exploit realize/unrealize
  s390x/event-facility: add support for live migration
  s390x/event-facility: code restructure
  s390x/event-facility: some renaming
  s390x/sclp: Fixed setting of condition code register
  s390x/sclp: Add missing checks to SCLP handler
  s390x/sclp: Fixed the size of sccb and code parameter
  s390x/eventfacility: mask out commands
  s390x/virtio-hcall: Specification exception for illegal subcodes
  s390x/virtio-hcall: Add range check for hypervisor call
  s390x/kvm: Fixed bad SIGP SET-ARCHITECTURE handler
  s390x/async_pf: Check for apf extension and enable pfault
  ...

Conflicts:
	linux-headers/linux/kvm.h

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/Makefile b/Makefile
index a28a3c8..ea6c712 100644
--- a/Makefile
+++ b/Makefile
@@ -159,6 +159,7 @@
 SUBDIR_RULES=$(patsubst %,subdir-%, $(TARGET_DIRS))
 SOFTMMU_SUBDIR_RULES=$(filter %-softmmu,$(SUBDIR_RULES))
 
+$(SOFTMMU_SUBDIR_RULES): $(block-obj-y)
 $(SOFTMMU_SUBDIR_RULES): config-all-devices.mak
 
 subdir-%:
@@ -319,7 +320,7 @@
 BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
 vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin \
 acpi-dsdt.aml q35-acpi-dsdt.aml \
-ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin \
+ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
 pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
 pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
 efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \
diff --git a/arch_init.c b/arch_init.c
index 80574a0..fe17279 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -122,7 +122,6 @@
 #define RAM_SAVE_FLAG_XBZRLE   0x40
 /* 0x80 is reserved in migration.h start with 0x100 next */
 
-
 static struct defconfig_file {
     const char *filename;
     /* Indicates it is an user config file (disabled by -no-user-config) */
@@ -133,6 +132,7 @@
     { NULL }, /* end of list */
 };
 
+static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
 
 int qemu_read_default_config_files(bool userconfig)
 {
@@ -273,6 +273,34 @@
     return size;
 }
 
+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
+static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+static bool ram_bulk_stage;
+
+/* Update the xbzrle cache to reflect a page that's been sent as all 0.
+ * The important thing is that a stale (not-yet-0'd) page be replaced
+ * by the new data.
+ * As a bonus, if the page wasn't in the cache it gets added so that
+ * when a small write is made into the 0'd page it gets XBZRLE sent
+ */
+static void xbzrle_cache_zero_page(ram_addr_t current_addr)
+{
+    if (ram_bulk_stage || !migrate_use_xbzrle()) {
+        return;
+    }
+
+    /* We don't care if this fails to allocate a new cache page
+     * as long as it updated an old one */
+    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
+}
+
 #define ENCODING_FLAG_XBZRLE 0x1
 
 static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
@@ -329,18 +357,6 @@
     return bytes_sent;
 }
 
-
-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
                                                  ram_addr_t start)
@@ -512,6 +528,7 @@
         } else {
             int ret;
             uint8_t *p;
+            bool send_async = true;
             int cont = (block == last_sent_block) ?
                 RAM_SAVE_FLAG_CONTINUE : 0;
 
@@ -522,6 +539,7 @@
             ret = ram_control_save_page(f, block->offset,
                                offset, TARGET_PAGE_SIZE, &bytes_sent);
 
+            current_addr = block->offset + offset;
             if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
                 if (ret != RAM_SAVE_CONTROL_DELAYED) {
                     if (bytes_sent > 0) {
@@ -536,19 +554,35 @@
                                             RAM_SAVE_FLAG_COMPRESS);
                 qemu_put_byte(f, 0);
                 bytes_sent++;
+                /* Must let xbzrle know, otherwise a previous (now 0'd) cached
+                 * page would be stale
+                 */
+                xbzrle_cache_zero_page(current_addr);
             } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
-                current_addr = block->offset + offset;
                 bytes_sent = save_xbzrle_page(f, p, current_addr, block,
                                               offset, cont, last_stage);
                 if (!last_stage) {
+                    /* We must send exactly what's in the xbzrle cache
+                     * even if the page wasn't xbzrle compressed, so that
+                     * it's right next time.
+                     */
                     p = get_cached_data(XBZRLE.cache, current_addr);
+
+                    /* Can't send this cached data async, since the cache page
+                     * might get updated before it gets to the wire
+                     */
+                    send_async = false;
                 }
             }
 
             /* XBZRLE overflow or normal page */
             if (bytes_sent == -1) {
                 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
-                qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+                if (send_async) {
+                    qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+                } else {
+                    qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+                }
                 bytes_sent += TARGET_PAGE_SIZE;
                 acct_info.norm_pages++;
             }
diff --git a/block/iscsi.c b/block/iscsi.c
index 41ec097..0a15f53 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -145,12 +145,13 @@
 
     if (iTask->retries-- > 0 && status == SCSI_STATUS_CHECK_CONDITION
         && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
+        error_report("iSCSI CheckCondition: %s", iscsi_get_error(iscsi));
         iTask->do_retry = 1;
         goto out;
     }
 
     if (status != SCSI_STATUS_GOOD) {
-        error_report("iSCSI: Failure. %s", iscsi_get_error(iscsi));
+        error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
     }
 
 out:
@@ -325,6 +326,7 @@
     }
 
     if (iTask.do_retry) {
+        iTask.complete = 0;
         goto retry;
     }
 
@@ -399,6 +401,7 @@
     }
 
     if (iTask.do_retry) {
+        iTask.complete = 0;
         goto retry;
     }
 
@@ -433,6 +436,7 @@
     }
 
     if (iTask.do_retry) {
+        iTask.complete = 0;
         goto retry;
     }
 
@@ -683,6 +687,7 @@
             scsi_free_scsi_task(iTask.task);
             iTask.task = NULL;
         }
+        iTask.complete = 0;
         goto retry;
     }
 
@@ -767,6 +772,7 @@
     }
 
     if (iTask.do_retry) {
+        iTask.complete = 0;
         goto retry;
     }
 
@@ -830,24 +836,26 @@
         qemu_coroutine_yield();
     }
 
+    if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
+        iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
+        iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE) {
+        /* WRITE SAME is not supported by the target */
+        iscsilun->has_write_same = false;
+        scsi_free_scsi_task(iTask.task);
+        return -ENOTSUP;
+    }
+
     if (iTask.task != NULL) {
         scsi_free_scsi_task(iTask.task);
         iTask.task = NULL;
     }
 
     if (iTask.do_retry) {
+        iTask.complete = 0;
         goto retry;
     }
 
     if (iTask.status != SCSI_STATUS_GOOD) {
-        if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
-            iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
-            iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE) {
-            /* WRITE SAME is not supported by the target */
-            iscsilun->has_write_same = false;
-            return -ENOTSUP;
-        }
-
         return -EIO;
     }
 
@@ -1060,7 +1068,7 @@
 };
 
 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
-                                          int evpd, int pc, Error **errp)
+                                          int evpd, int pc, void **inq, Error **errp)
 {
     int full_size;
     struct scsi_task *task = NULL;
@@ -1079,14 +1087,19 @@
         }
     }
 
+    *inq = scsi_datain_unmarshall(task);
+    if (*inq == NULL) {
+        error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
+        goto fail;
+    }
+
     return task;
 
 fail:
     error_setg(errp, "iSCSI: Inquiry command failed : %s",
                iscsi_get_error(iscsi));
-    if (task) {
+    if (task != NULL) {
         scsi_free_scsi_task(task);
-        return NULL;
     }
     return NULL;
 }
@@ -1107,11 +1120,12 @@
     struct iscsi_url *iscsi_url = NULL;
     struct scsi_task *task = NULL;
     struct scsi_inquiry_standard *inq = NULL;
+    struct scsi_inquiry_supported_pages *inq_vpd;
     char *initiator_name = NULL;
     QemuOpts *opts;
     Error *local_err = NULL;
     const char *filename;
-    int ret;
+    int i, ret;
 
     if ((BDRV_SECTOR_SIZE % 512) != 0) {
         error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
@@ -1197,25 +1211,18 @@
 
     iscsilun->iscsi = iscsi;
     iscsilun->lun   = iscsi_url->lun;
-
-    task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 0, 0, 36);
-
-    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
-        error_setg(errp, "iSCSI: failed to send inquiry command.");
-        ret = -EINVAL;
-        goto out;
-    }
-
-    inq = scsi_datain_unmarshall(task);
-    if (inq == NULL) {
-        error_setg(errp, "iSCSI: Failed to unmarshall inquiry data.");
-        ret = -EINVAL;
-        goto out;
-    }
-
-    iscsilun->type = inq->periperal_device_type;
     iscsilun->has_write_same = true;
 
+    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
+                            (void **) &inq, errp);
+    if (task == NULL) {
+        ret = -EINVAL;
+        goto out;
+    }
+    iscsilun->type = inq->periperal_device_type;
+    scsi_free_scsi_task(task);
+    task = NULL;
+
     iscsi_readcapacity_sync(iscsilun, &local_err);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
@@ -1233,46 +1240,48 @@
         bs->sg = 1;
     }
 
-    if (iscsilun->lbpme) {
+    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
+                            SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
+                            (void **) &inq_vpd, errp);
+    if (task == NULL) {
+        ret = -EINVAL;
+        goto out;
+    }
+    for (i = 0; i < inq_vpd->num_pages; i++) {
+        struct scsi_task *inq_task;
         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
-        task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
-                                SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
-                                errp);
-        if (task == NULL) {
-            ret = -EINVAL;
-            goto out;
-        }
-        inq_lbp = scsi_datain_unmarshall(task);
-        if (inq_lbp == NULL) {
-            error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
-            ret = -EINVAL;
-            goto out;
-        }
-        memcpy(&iscsilun->lbp, inq_lbp,
-               sizeof(struct scsi_inquiry_logical_block_provisioning));
-        scsi_free_scsi_task(task);
-        task = NULL;
-    }
-
-    if (iscsilun->lbp.lbpu || iscsilun->lbp.lbpws) {
         struct scsi_inquiry_block_limits *inq_bl;
-        task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
-                                SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS, errp);
-        if (task == NULL) {
-            ret = -EINVAL;
-            goto out;
+        switch (inq_vpd->pages[i]) {
+        case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
+            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
+                                        SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
+                                        (void **) &inq_lbp, errp);
+            if (inq_task == NULL) {
+                ret = -EINVAL;
+                goto out;
+            }
+            memcpy(&iscsilun->lbp, inq_lbp,
+                   sizeof(struct scsi_inquiry_logical_block_provisioning));
+            scsi_free_scsi_task(inq_task);
+            break;
+        case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
+            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
+                                    SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
+                                    (void **) &inq_bl, errp);
+            if (inq_task == NULL) {
+                ret = -EINVAL;
+                goto out;
+            }
+            memcpy(&iscsilun->bl, inq_bl,
+                   sizeof(struct scsi_inquiry_block_limits));
+            scsi_free_scsi_task(inq_task);
+            break;
+        default:
+            break;
         }
-        inq_bl = scsi_datain_unmarshall(task);
-        if (inq_bl == NULL) {
-            error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
-            ret = -EINVAL;
-            goto out;
-        }
-        memcpy(&iscsilun->bl, inq_bl,
-               sizeof(struct scsi_inquiry_block_limits));
-        scsi_free_scsi_task(task);
-        task = NULL;
     }
+    scsi_free_scsi_task(task);
+    task = NULL;
 
 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
     /* Set up a timer for sending out iSCSI NOPs */
diff --git a/configure b/configure
index 423f435..8ad03ea 100755
--- a/configure
+++ b/configure
@@ -1657,7 +1657,7 @@
             "Make sure to have the zlib libs and headers installed."
     fi
 fi
-libs_softmmu="$libs_softmmu -lz"
+LIBS="$LIBS -lz"
 
 ##########################################
 # libseccomp check
@@ -2249,13 +2249,21 @@
 fi
 
 ##########################################
-# netmap headers probe
+# netmap support probe
+# Apart from looking for netmap headers, we make sure that the host API version
+# supports the netmap backend (>=11). The upper bound (15) is meant to simulate
+# a minor/major version number. Minor new features will be marked with values up
+# to 15, and if something happens that requires a change to the backend we will
+# move above 15, submit the backend fixes and modify this two bounds.
 if test "$netmap" != "no" ; then
   cat > $TMPC << EOF
 #include <inttypes.h>
 #include <net/if.h>
 #include <net/netmap.h>
 #include <net/netmap_user.h>
+#if (NETMAP_API < 11) || (NETMAP_API > 15)
+#error
+#endif
 int main(void) { return 0; }
 EOF
   if compile_prog "" "" ; then
diff --git a/cpu-exec.c b/cpu-exec.c
index 8943493..1b0f617 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -477,7 +477,7 @@
                     }
 #elif defined(TARGET_ARM)
                     if (interrupt_request & CPU_INTERRUPT_FIQ
-                        && !(env->uncached_cpsr & CPSR_F)) {
+                        && !(env->daif & PSTATE_F)) {
                         env->exception_index = EXCP_FIQ;
                         cc->do_interrupt(cpu);
                         next_tb = 0;
@@ -493,7 +493,7 @@
                        pc contains a magic address.  */
                     if (interrupt_request & CPU_INTERRUPT_HARD
                         && ((IS_M(env) && env->regs[15] < 0xfffffff0)
-                            || !(env->uncached_cpsr & CPSR_I))) {
+                            || !(env->daif & PSTATE_I))) {
                         env->exception_index = EXCP_IRQ;
                         cc->do_interrupt(cpu);
                         next_tb = 0;
diff --git a/default-configs/sparc-softmmu.mak b/default-configs/sparc-softmmu.mak
index 8fc93dd..ab796b3 100644
--- a/default-configs/sparc-softmmu.mak
+++ b/default-configs/sparc-softmmu.mak
@@ -10,6 +10,7 @@
 CONFIG_PCNET_COMMON=y
 CONFIG_LANCE=y
 CONFIG_TCX=y
+CONFIG_CG3=y
 CONFIG_SLAVIO=y
 CONFIG_CS4231=y
 CONFIG_GRLIB=y
diff --git a/docs/rdma.txt b/docs/rdma.txt
index 2aca63b..1f5d9e9 100644
--- a/docs/rdma.txt
+++ b/docs/rdma.txt
@@ -66,7 +66,7 @@
 high-performance RDMA hardware using the following command:
 
 QEMU Monitor Command:
-$ migrate_set_capability x-rdma-pin-all on # disabled by default
+$ migrate_set_capability rdma-pin-all on # disabled by default
 
 Performing this action will cause all 8GB to be pinned, so if that's
 not what you want, then please ignore this step altogether.
@@ -93,12 +93,12 @@
 
 Next, on the destination machine, add the following to the QEMU command line:
 
-qemu ..... -incoming x-rdma:host:port
+qemu ..... -incoming rdma:host:port
 
 Finally, perform the actual migration on the source machine:
 
 QEMU Monitor Command:
-$ migrate -d x-rdma:host:port
+$ migrate -d rdma:host:port
 
 PERFORMANCE
 ===========
@@ -120,8 +120,8 @@
 active use and the VM itself is completely idle using the same 40 gbps
 infiniband link:
 
-1. x-rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
-2. x-rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
+1. rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
+2. rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
 
 These numbers would of course scale up to whatever size virtual machine
 you have to migrate using RDMA.
@@ -407,18 +407,14 @@
 
 TODO:
 =====
-1. 'migrate x-rdma:host:port' and '-incoming x-rdma' options will be
-   renamed to 'rdma' after the experimental phase of this work has
-   completed upstream.
-2. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
+1. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
    are not compatible with infinband memory pinning and will result in
    an aborted migration (but with the source VM left unaffected).
-3. Use of the recent /proc/<pid>/pagemap would likely speed up
+2. Use of the recent /proc/<pid>/pagemap would likely speed up
    the use of KSM and ballooning while using RDMA.
-4. Also, some form of balloon-device usage tracking would also
+3. Also, some form of balloon-device usage tracking would also
    help alleviate some issues.
-5. Move UNREGISTER requests to a separate thread.
-6. Use LRU to provide more fine-grained direction of UNREGISTER
+4. Use LRU to provide more fine-grained direction of UNREGISTER
    requests for unpinning memory in an overcommitted environment.
-7. Expose UNREGISTER support to the user by way of workload-specific
+5. Expose UNREGISTER support to the user by way of workload-specific
    hints about application behavior.
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
index 50a3b8f..cce7127 100644
--- a/hw/arm/musicpal.c
+++ b/hw/arm/musicpal.c
@@ -92,8 +92,6 @@
 #define MP_ETH_CRDP3            0x4AC
 #define MP_ETH_CTDP0            0x4E0
 #define MP_ETH_CTDP1            0x4E4
-#define MP_ETH_CTDP2            0x4E8
-#define MP_ETH_CTDP3            0x4EC
 
 /* MII PHY access */
 #define MP_ETH_SMIR_DATA        0x0000FFFF
@@ -308,7 +306,7 @@
     case MP_ETH_CRDP0 ... MP_ETH_CRDP3:
         return s->rx_queue[(offset - MP_ETH_CRDP0)/4];
 
-    case MP_ETH_CTDP0 ... MP_ETH_CTDP3:
+    case MP_ETH_CTDP0 ... MP_ETH_CTDP1:
         return s->tx_queue[(offset - MP_ETH_CTDP0)/4];
 
     default:
@@ -362,7 +360,7 @@
             s->cur_rx[(offset - MP_ETH_CRDP0)/4] = value;
         break;
 
-    case MP_ETH_CTDP0 ... MP_ETH_CTDP3:
+    case MP_ETH_CTDP0 ... MP_ETH_CTDP1:
         s->tx_queue[(offset - MP_ETH_CTDP0)/4] = value;
         break;
     }
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index 45a99c8..904277a 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -272,11 +272,11 @@
         goto message;
 
     case 3:
-        s->cpu->env.uncached_cpsr =
-            ARM_CPU_MODE_SVC | CPSR_A | CPSR_F | CPSR_I;
+        s->cpu->env.uncached_cpsr = ARM_CPU_MODE_SVC;
+        s->cpu->env.daif = PSTATE_A | PSTATE_F | PSTATE_I;
         s->cpu->env.cp15.c1_sys = 0;
         s->cpu->env.cp15.c1_coproc = 0;
-        s->cpu->env.cp15.c2_base0 = 0;
+        s->cpu->env.cp15.ttbr0_el1 = 0;
         s->cpu->env.cp15.c3 = 0;
         s->pm_regs[PSSR >> 2] |= 0x8; /* Set STS */
         s->pm_regs[RCSR >> 2] |= 0x8; /* Set GPR */
diff --git a/hw/audio/hda-codec.c b/hw/audio/hda-codec.c
index 986f2a9..a67ca91 100644
--- a/hw/audio/hda-codec.c
+++ b/hw/audio/hda-codec.c
@@ -157,6 +157,9 @@
     uint32_t bpos;
 };
 
+#define TYPE_HDA_AUDIO "hda-audio"
+#define HDA_AUDIO(obj) OBJECT_CHECK(HDAAudioState, (obj), TYPE_HDA_AUDIO)
+
 struct HDAAudioState {
     HDACodecDevice hda;
     const char *name;
@@ -288,7 +291,7 @@
 
 static void hda_audio_command(HDACodecDevice *hda, uint32_t nid, uint32_t data)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
     HDAAudioStream *st;
     const desc_node *node = NULL;
     const desc_param *param;
@@ -448,7 +451,7 @@
 
 static void hda_audio_stream(HDACodecDevice *hda, uint32_t stnr, bool running, bool output)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
     int s;
 
     a->running_compat[stnr] = running;
@@ -469,7 +472,7 @@
 
 static int hda_audio_init(HDACodecDevice *hda, const struct desc_codec *desc)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
     HDAAudioStream *st;
     const desc_node *node;
     const desc_param *param;
@@ -514,7 +517,7 @@
 
 static int hda_audio_exit(HDACodecDevice *hda)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
     HDAAudioStream *st;
     int i;
 
@@ -561,7 +564,7 @@
 
 static void hda_audio_reset(DeviceState *dev)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda.qdev, dev);
+    HDAAudioState *a = HDA_AUDIO(dev);
     HDAAudioStream *st;
     int i;
 
@@ -613,7 +616,7 @@
 
 static int hda_audio_init_output(HDACodecDevice *hda)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
 
     if (!a->mixer) {
         return hda_audio_init(hda, &output_nomixemu);
@@ -624,7 +627,7 @@
 
 static int hda_audio_init_duplex(HDACodecDevice *hda)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
 
     if (!a->mixer) {
         return hda_audio_init(hda, &duplex_nomixemu);
@@ -635,7 +638,7 @@
 
 static int hda_audio_init_micro(HDACodecDevice *hda)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
 
     if (!a->mixer) {
         return hda_audio_init(hda, &micro_nomixemu);
@@ -644,25 +647,39 @@
     }
 }
 
+static void hda_audio_base_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    HDACodecDeviceClass *k = HDA_CODEC_DEVICE_CLASS(klass);
+
+    k->exit = hda_audio_exit;
+    k->command = hda_audio_command;
+    k->stream = hda_audio_stream;
+    set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
+    dc->reset = hda_audio_reset;
+    dc->vmsd = &vmstate_hda_audio;
+    dc->props = hda_audio_properties;
+}
+
+static const TypeInfo hda_audio_info = {
+    .name          = TYPE_HDA_AUDIO,
+    .parent        = TYPE_HDA_CODEC_DEVICE,
+    .class_init    = hda_audio_base_class_init,
+    .abstract      = true,
+};
+
 static void hda_audio_output_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     HDACodecDeviceClass *k = HDA_CODEC_DEVICE_CLASS(klass);
 
     k->init = hda_audio_init_output;
-    k->exit = hda_audio_exit;
-    k->command = hda_audio_command;
-    k->stream = hda_audio_stream;
-    set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
     dc->desc = "HDA Audio Codec, output-only (line-out)";
-    dc->reset = hda_audio_reset;
-    dc->vmsd = &vmstate_hda_audio;
-    dc->props = hda_audio_properties;
 }
 
 static const TypeInfo hda_audio_output_info = {
     .name          = "hda-output",
-    .parent        = TYPE_HDA_CODEC_DEVICE,
+    .parent        = TYPE_HDA_AUDIO,
     .instance_size = sizeof(HDAAudioState),
     .class_init    = hda_audio_output_class_init,
 };
@@ -673,19 +690,12 @@
     HDACodecDeviceClass *k = HDA_CODEC_DEVICE_CLASS(klass);
 
     k->init = hda_audio_init_duplex;
-    k->exit = hda_audio_exit;
-    k->command = hda_audio_command;
-    k->stream = hda_audio_stream;
-    set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
     dc->desc = "HDA Audio Codec, duplex (line-out, line-in)";
-    dc->reset = hda_audio_reset;
-    dc->vmsd = &vmstate_hda_audio;
-    dc->props = hda_audio_properties;
 }
 
 static const TypeInfo hda_audio_duplex_info = {
     .name          = "hda-duplex",
-    .parent        = TYPE_HDA_CODEC_DEVICE,
+    .parent        = TYPE_HDA_AUDIO,
     .instance_size = sizeof(HDAAudioState),
     .class_init    = hda_audio_duplex_class_init,
 };
@@ -696,25 +706,19 @@
     HDACodecDeviceClass *k = HDA_CODEC_DEVICE_CLASS(klass);
 
     k->init = hda_audio_init_micro;
-    k->exit = hda_audio_exit;
-    k->command = hda_audio_command;
-    k->stream = hda_audio_stream;
-    set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
     dc->desc = "HDA Audio Codec, duplex (speaker, microphone)";
-    dc->reset = hda_audio_reset;
-    dc->vmsd = &vmstate_hda_audio;
-    dc->props = hda_audio_properties;
 }
 
 static const TypeInfo hda_audio_micro_info = {
     .name          = "hda-micro",
-    .parent        = TYPE_HDA_CODEC_DEVICE,
+    .parent        = TYPE_HDA_AUDIO,
     .instance_size = sizeof(HDAAudioState),
     .class_init    = hda_audio_micro_class_init,
 };
 
 static void hda_audio_register_types(void)
 {
+    type_register_static(&hda_audio_info);
     type_register_static(&hda_audio_output_info);
     type_register_static(&hda_audio_duplex_info);
     type_register_static(&hda_audio_micro_info);
diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs
index 540df82..7ed76a9 100644
--- a/hw/display/Makefile.objs
+++ b/hw/display/Makefile.objs
@@ -28,6 +28,7 @@
 obj-$(CONFIG_PXA2XX) += pxa2xx_lcd.o
 obj-$(CONFIG_SM501) += sm501.o
 obj-$(CONFIG_TCX) += tcx.o
+obj-$(CONFIG_CG3) += cg3.o
 
 obj-$(CONFIG_VGA) += vga.o
 
diff --git a/hw/display/cg3.c b/hw/display/cg3.c
new file mode 100644
index 0000000..6db8ca3
--- /dev/null
+++ b/hw/display/cg3.c
@@ -0,0 +1,385 @@
+/*
+ * QEMU CG3 Frame buffer
+ *
+ * Copyright (c) 2012 Bob Breuer
+ * Copyright (c) 2013 Mark Cave-Ayland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "ui/console.h"
+#include "hw/sysbus.h"
+#include "hw/loader.h"
+
+/* Change to 1 to enable debugging */
+#define DEBUG_CG3 0
+
+#define CG3_ROM_FILE  "QEMU,cgthree.bin"
+#define FCODE_MAX_ROM_SIZE 0x10000
+
+#define CG3_REG_SIZE            0x20
+
+#define CG3_REG_BT458_ADDR      0x0
+#define CG3_REG_BT458_COLMAP    0x4
+#define CG3_REG_FBC_CTRL        0x10
+#define CG3_REG_FBC_STATUS      0x11
+#define CG3_REG_FBC_CURSTART    0x12
+#define CG3_REG_FBC_CUREND      0x13
+#define CG3_REG_FBC_VCTRL       0x14
+
+/* Control register flags */
+#define CG3_CR_ENABLE_INTS      0x80
+
+/* Status register flags */
+#define CG3_SR_PENDING_INT      0x80
+#define CG3_SR_1152_900_76_B    0x60
+#define CG3_SR_ID_COLOR         0x01
+
+#define CG3_VRAM_SIZE 0x100000
+#define CG3_VRAM_OFFSET 0x800000
+
+#define DPRINTF(fmt, ...) do { \
+    if (DEBUG_CG3) { \
+        printf("CG3: " fmt , ## __VA_ARGS__); \
+    } \
+} while (0);
+
+#define TYPE_CG3 "cgthree"
+#define CG3(obj) OBJECT_CHECK(CG3State, (obj), TYPE_CG3)
+
+typedef struct CG3State {
+    SysBusDevice parent_obj;
+
+    QemuConsole *con;
+    qemu_irq irq;
+    hwaddr prom_addr;
+    MemoryRegion vram_mem;
+    MemoryRegion rom;
+    MemoryRegion reg;
+    uint32_t vram_size;
+    int full_update;
+    uint8_t regs[16];
+    uint8_t r[256], g[256], b[256];
+    uint16_t width, height, depth;
+    uint8_t dac_index, dac_state;
+} CG3State;
+
+static void cg3_update_display(void *opaque)
+{
+    CG3State *s = opaque;
+    DisplaySurface *surface = qemu_console_surface(s->con);
+    const uint8_t *pix;
+    uint32_t *data;
+    uint32_t dval;
+    int x, y, y_start;
+    unsigned int width, height;
+    ram_addr_t page, page_min, page_max;
+
+    if (surface_bits_per_pixel(surface) != 32) {
+        return;
+    }
+    width = s->width;
+    height = s->height;
+
+    y_start = -1;
+    page_min = -1;
+    page_max = 0;
+    page = 0;
+    pix = memory_region_get_ram_ptr(&s->vram_mem);
+    data = (uint32_t *)surface_data(surface);
+
+    for (y = 0; y < height; y++) {
+        int update = s->full_update;
+
+        page = (y * width) & TARGET_PAGE_MASK;
+        update |= memory_region_get_dirty(&s->vram_mem, page, page + width,
+                                          DIRTY_MEMORY_VGA);
+        if (update) {
+            if (y_start < 0) {
+                y_start = y;
+            }
+            if (page < page_min) {
+                page_min = page;
+            }
+            if (page > page_max) {
+                page_max = page;
+            }
+
+            for (x = 0; x < width; x++) {
+                dval = *pix++;
+                dval = (s->r[dval] << 16) | (s->g[dval] << 8) | s->b[dval];
+                *data++ = dval;
+            }
+        } else {
+            if (y_start >= 0) {
+                dpy_gfx_update(s->con, 0, y_start, s->width, y - y_start);
+                y_start = -1;
+            }
+            pix += width;
+            data += width;
+        }
+    }
+    s->full_update = 0;
+    if (y_start >= 0) {
+        dpy_gfx_update(s->con, 0, y_start, s->width, y - y_start);
+    }
+    if (page_max >= page_min) {
+        memory_region_reset_dirty(&s->vram_mem,
+                              page_min, page_max - page_min + TARGET_PAGE_SIZE,
+                              DIRTY_MEMORY_VGA);
+    }
+    /* vsync interrupt? */
+    if (s->regs[0] & CG3_CR_ENABLE_INTS) {
+        s->regs[1] |= CG3_SR_PENDING_INT;
+        qemu_irq_raise(s->irq);
+    }
+}
+
+static void cg3_invalidate_display(void *opaque)
+{
+    CG3State *s = opaque;
+
+    memory_region_set_dirty(&s->vram_mem, 0, CG3_VRAM_SIZE);
+}
+
+static uint64_t cg3_reg_read(void *opaque, hwaddr addr, unsigned size)
+{
+    CG3State *s = opaque;
+    int val;
+
+    switch (addr) {
+    case CG3_REG_BT458_ADDR:
+    case CG3_REG_BT458_COLMAP:
+        val = 0;
+        break;
+    case CG3_REG_FBC_CTRL:
+        val = s->regs[0];
+        break;
+    case CG3_REG_FBC_STATUS:
+        /* monitor ID 6, board type = 1 (color) */
+        val = s->regs[1] | CG3_SR_1152_900_76_B | CG3_SR_ID_COLOR;
+        break;
+    case CG3_REG_FBC_CURSTART ... CG3_REG_SIZE:
+        val = s->regs[addr - 0x10];
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP,
+                  "cg3: Unimplemented register read "
+                  "reg 0x%" HWADDR_PRIx " size 0x%x\n",
+                  addr, size);
+        val = 0;
+        break;
+    }
+    DPRINTF("read %02x from reg %" HWADDR_PRIx "\n", val, addr);
+    return val;
+}
+
+static void cg3_reg_write(void *opaque, hwaddr addr, uint64_t val,
+                          unsigned size)
+{
+    CG3State *s = opaque;
+    uint8_t regval;
+    int i;
+
+    DPRINTF("write %" PRIx64 " to reg %" HWADDR_PRIx " size %d\n",
+            val, addr, size);
+
+    switch (addr) {
+    case CG3_REG_BT458_ADDR:
+        s->dac_index = val;
+        s->dac_state = 0;
+        break;
+    case CG3_REG_BT458_COLMAP:
+        /* This register can be written to as either a long word or a byte */
+        if (size == 1) {
+            val <<= 24;
+        }
+
+        for (i = 0; i < size; i++) {
+            regval = val >> 24;
+
+            switch (s->dac_state) {
+            case 0:
+                s->r[s->dac_index] = regval;
+                s->dac_state++;
+                break;
+            case 1:
+                s->g[s->dac_index] = regval;
+                s->dac_state++;
+                break;
+            case 2:
+                s->b[s->dac_index] = regval;
+                /* Index autoincrement */
+                s->dac_index = (s->dac_index + 1) & 0xff;
+            default:
+                s->dac_state = 0;
+                break;
+            }
+            val <<= 8;
+        }
+        s->full_update = 1;
+        break;
+    case CG3_REG_FBC_CTRL:
+        s->regs[0] = val;
+        break;
+    case CG3_REG_FBC_STATUS:
+        if (s->regs[1] & CG3_SR_PENDING_INT) {
+            /* clear interrupt */
+            s->regs[1] &= ~CG3_SR_PENDING_INT;
+            qemu_irq_lower(s->irq);
+        }
+        break;
+    case CG3_REG_FBC_CURSTART ... CG3_REG_SIZE:
+        s->regs[addr - 0x10] = val;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP,
+                  "cg3: Unimplemented register write "
+                  "reg 0x%" HWADDR_PRIx " size 0x%x value 0x%" PRIx64 "\n",
+                  addr, size, val);
+        break;
+    }
+}
+
+static const MemoryRegionOps cg3_reg_ops = {
+    .read = cg3_reg_read,
+    .write = cg3_reg_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+};
+
+static const GraphicHwOps cg3_ops = {
+    .invalidate = cg3_invalidate_display,
+    .gfx_update = cg3_update_display,
+};
+
+static void cg3_realizefn(DeviceState *dev, Error **errp)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    CG3State *s = CG3(dev);
+    int ret;
+    char *fcode_filename;
+
+    /* FCode ROM */
+    memory_region_init_ram(&s->rom, NULL, "cg3.prom", FCODE_MAX_ROM_SIZE);
+    vmstate_register_ram_global(&s->rom);
+    memory_region_set_readonly(&s->rom, true);
+    sysbus_init_mmio(sbd, &s->rom);
+
+    fcode_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, CG3_ROM_FILE);
+    if (fcode_filename) {
+        ret = load_image_targphys(fcode_filename, s->prom_addr,
+                                  FCODE_MAX_ROM_SIZE);
+        if (ret < 0 || ret > FCODE_MAX_ROM_SIZE) {
+            error_report("cg3: could not load prom '%s'", CG3_ROM_FILE);
+        }
+    }
+
+    memory_region_init_io(&s->reg, NULL, &cg3_reg_ops, s, "cg3.reg",
+                          CG3_REG_SIZE);
+    sysbus_init_mmio(sbd, &s->reg);
+
+    memory_region_init_ram(&s->vram_mem, NULL, "cg3.vram", s->vram_size);
+    vmstate_register_ram_global(&s->vram_mem);
+    sysbus_init_mmio(sbd, &s->vram_mem);
+
+    sysbus_init_irq(sbd, &s->irq);
+
+    s->con = graphic_console_init(DEVICE(dev), &cg3_ops, s);
+    qemu_console_resize(s->con, s->width, s->height);
+}
+
+static int vmstate_cg3_post_load(void *opaque, int version_id)
+{
+    CG3State *s = opaque;
+
+    cg3_invalidate_display(s);
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_cg3 = {
+    .name = "cg3",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = vmstate_cg3_post_load,
+    .fields    = (VMStateField[]) {
+        VMSTATE_UINT16(height, CG3State),
+        VMSTATE_UINT16(width, CG3State),
+        VMSTATE_UINT16(depth, CG3State),
+        VMSTATE_BUFFER(r, CG3State),
+        VMSTATE_BUFFER(g, CG3State),
+        VMSTATE_BUFFER(b, CG3State),
+        VMSTATE_UINT8(dac_index, CG3State),
+        VMSTATE_UINT8(dac_state, CG3State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void cg3_reset(DeviceState *d)
+{
+    CG3State *s = CG3(d);
+
+    /* Initialize palette */
+    memset(s->r, 0, 256);
+    memset(s->g, 0, 256);
+    memset(s->b, 0, 256);
+
+    s->dac_state = 0;
+    s->full_update = 1;
+    qemu_irq_lower(s->irq);
+}
+
+static Property cg3_properties[] = {
+    DEFINE_PROP_UINT32("vram-size",    CG3State, vram_size, -1),
+    DEFINE_PROP_UINT16("width",        CG3State, width,     -1),
+    DEFINE_PROP_UINT16("height",       CG3State, height,    -1),
+    DEFINE_PROP_UINT16("depth",        CG3State, depth,     -1),
+    DEFINE_PROP_UINT64("prom-addr",    CG3State, prom_addr, -1),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void cg3_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = cg3_realizefn;
+    dc->reset = cg3_reset;
+    dc->vmsd = &vmstate_cg3;
+    dc->props = cg3_properties;
+}
+
+static const TypeInfo cg3_info = {
+    .name          = TYPE_CG3,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(CG3State),
+    .class_init    = cg3_class_init,
+};
+
+static void cg3_register_types(void)
+{
+    type_register_static(&cg3_info);
+}
+
+type_init(cg3_register_types)
diff --git a/hw/dma/pl330.c b/hw/dma/pl330.c
index 401399d..608a58c 100644
--- a/hw/dma/pl330.c
+++ b/hw/dma/pl330.c
@@ -227,7 +227,8 @@
 };
 
 struct PL330State {
-    SysBusDevice busdev;
+    SysBusDevice parent_obj;
+
     MemoryRegion iomem;
     qemu_irq irq_abort;
     qemu_irq *irq;
@@ -577,7 +578,7 @@
 
 static inline void pl330_fault(PL330Chan *ch, uint32_t flags)
 {
-    DB_PRINT("ch: %p, flags: %x\n", ch, flags);
+    DB_PRINT("ch: %p, flags: %" PRIx32 "\n", ch, flags);
     ch->fault_type |= flags;
     if (ch->state == pl330_chan_fault) {
         return;
@@ -600,10 +601,12 @@
  *   LEN - number of elements in ARGS array
  */
 
-static void pl330_dmaaddh(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len)
+static void pl330_dmaadxh(PL330Chan *ch, uint8_t *args, bool ra, bool neg)
 {
-    uint16_t im = (((uint16_t)args[1]) << 8) | ((uint16_t)args[0]);
-    uint8_t ra = (opcode >> 1) & 1;
+    uint32_t im = (args[1] << 8) | args[0];
+    if (neg) {
+        im |= 0xffffu << 16;
+    }
 
     if (ch->is_manager) {
         pl330_fault(ch, PL330_FAULT_UNDEF_INSTR);
@@ -616,6 +619,16 @@
     }
 }
 
+static void pl330_dmaaddh(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len)
+{
+    pl330_dmaadxh(ch, args, extract32(opcode, 1, 1), false);
+}
+
+static void pl330_dmaadnh(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len)
+{
+    pl330_dmaadxh(ch, args, extract32(opcode, 1, 1), true);
+}
+
 static void pl330_dmaend(PL330Chan *ch, uint8_t opcode,
                          uint8_t *args, int len)
 {
@@ -723,7 +736,8 @@
     ch->stall = pl330_queue_put_insn(&ch->parent->read_queue, ch->src,
                                     size, num, inc, 0, ch->tag);
     if (!ch->stall) {
-        DB_PRINT("channel:%d address:%08x size:%d num:%d %c\n",
+        DB_PRINT("channel:%" PRId8 " address:%08" PRIx32 " size:%" PRIx32
+                 " num:%" PRId32 " %c\n",
                  ch->tag, ch->src, size, num, inc ? 'Y' : 'N');
         ch->src += inc ? size * num - (ch->src & (size - 1)) : 0;
     }
@@ -868,9 +882,10 @@
     }
     if (ch->parent->inten & (1 << ev_id)) {
         ch->parent->int_status |= (1 << ev_id);
-        DB_PRINT("event interrupt raised %d\n", ev_id);
+        DB_PRINT("event interrupt raised %" PRId8 "\n", ev_id);
         qemu_irq_raise(ch->parent->irq[ev_id]);
     }
+    DB_PRINT("event raised %" PRId8 "\n", ev_id);
     ch->parent->ev_status |= (1 << ev_id);
 }
 
@@ -895,7 +910,8 @@
     ch->stall = pl330_queue_put_insn(&ch->parent->write_queue, ch->dst,
                                     size, num, inc, 0, ch->tag);
     if (!ch->stall) {
-        DB_PRINT("channel:%d address:%08x size:%d num:%d %c\n",
+        DB_PRINT("channel:%" PRId8 " address:%08" PRIx32 " size:%" PRIx32
+                 " num:%" PRId32 " %c\n",
                  ch->tag, ch->dst, size, num, inc ? 'Y' : 'N');
         ch->dst += inc ? size * num - (ch->dst & (size - 1)) : 0;
     }
@@ -972,6 +988,7 @@
             }
         }
         ch->parent->ev_status &= ~(1 << ev_id);
+        DB_PRINT("event lowered %" PRIx8 "\n", ev_id);
     } else {
         ch->stall = 1;
     }
@@ -1037,6 +1054,7 @@
 /* NULL terminated array of the instruction descriptions. */
 static const PL330InsnDesc insn_desc[] = {
     { .opcode = 0x54, .opmask = 0xFD, .size = 3, .exec = pl330_dmaaddh, },
+    { .opcode = 0x5c, .opmask = 0xFD, .size = 3, .exec = pl330_dmaadnh, },
     { .opcode = 0x00, .opmask = 0xFF, .size = 1, .exec = pl330_dmaend, },
     { .opcode = 0x35, .opmask = 0xFF, .size = 2, .exec = pl330_dmaflushp, },
     { .opcode = 0xA0, .opmask = 0xFD, .size = 6, .exec = pl330_dmago, },
@@ -1108,7 +1126,6 @@
             ch->state != pl330_chan_waiting_periph &&
             ch->state != pl330_chan_at_barrier &&
             ch->state != pl330_chan_waiting_event) {
-        DB_PRINT("%d\n", ch->state);
         return 0;
     }
     ch->stall = 0;
@@ -1155,7 +1172,7 @@
 
         dma_memory_read(&address_space_memory, q->addr, buf, len);
         if (PL330_ERR_DEBUG > 1) {
-            DB_PRINT("PL330 read from memory @%08x (size = %08x):\n",
+            DB_PRINT("PL330 read from memory @%08" PRIx32 " (size = %08x):\n",
                       q->addr, len);
             qemu_hexdump((char *)buf, stderr, "", len);
         }
@@ -1187,8 +1204,8 @@
         if (fifo_res == PL330_FIFO_OK || q->z) {
             dma_memory_write(&address_space_memory, q->addr, buf, len);
             if (PL330_ERR_DEBUG > 1) {
-                DB_PRINT("PL330 read from memory @%08x (size = %08x):\n",
-                         q->addr, len);
+                DB_PRINT("PL330 read from memory @%08" PRIx32
+                         " (size = %08x):\n", q->addr, len);
                 qemu_hexdump((char *)buf, stderr, "", len);
             }
             if (q->inc) {
@@ -1277,7 +1294,7 @@
     args[2] = (s->dbg[1] >>  8) & 0xff;
     args[3] = (s->dbg[1] >> 16) & 0xff;
     args[4] = (s->dbg[1] >> 24) & 0xff;
-    DB_PRINT("chan id: %d\n", chan_id);
+    DB_PRINT("chan id: %" PRIx8 "\n", chan_id);
     if (s->dbg[0] & 1) {
         ch = &s->chan[chan_id];
     } else {
@@ -1311,7 +1328,7 @@
                               uint64_t value, unsigned size)
 {
     PL330State *s = (PL330State *) opaque;
-    uint32_t i;
+    int i;
 
     DB_PRINT("addr: %08x data: %08x\n", (unsigned)offset, (unsigned)value);
 
@@ -1467,8 +1484,8 @@
 static uint64_t pl330_iomem_read(void *opaque, hwaddr offset,
         unsigned size)
 {
-    int ret = pl330_iomem_read_imp(opaque, offset);
-    DB_PRINT("addr: %08x data: %08x\n", (unsigned)offset, ret);
+    uint32_t ret = pl330_iomem_read_imp(opaque, offset);
+    DB_PRINT("addr: %08" HWADDR_PRIx " data: %08" PRIx32 "\n", offset, ret);
     return ret;
 }
 
@@ -1554,7 +1571,7 @@
         s->cfg[1] |= 5;
         break;
     default:
-        error_setg(errp, "Bad value for i-cache_len property: %d\n",
+        error_setg(errp, "Bad value for i-cache_len property: %" PRIx8 "\n",
                    s->i_cache_len);
         return;
     }
@@ -1589,7 +1606,7 @@
         s->cfg[CFG_CRD] |= 0x4;
         break;
     default:
-        error_setg(errp, "Bad value for data_width property: %d\n",
+        error_setg(errp, "Bad value for data_width property: %" PRIx8 "\n",
                    s->data_width);
         return;
     }
@@ -1602,7 +1619,7 @@
 
     pl330_queue_init(&s->read_queue, s->rd_q_dep, s);
     pl330_queue_init(&s->write_queue, s->wr_q_dep, s);
-    pl330_fifo_init(&s->fifo, s->data_buffer_dep);
+    pl330_fifo_init(&s->fifo, s->data_width / 4 * s->data_buffer_dep);
 }
 
 static Property pl330_properties[] = {
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index 9686801..a825871 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -743,6 +743,7 @@
         goto fail;
     }
 
+    driver[r] = 0;
     ns = strrchr(driver, '/');
     if (!ns) {
         goto fail;
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index 59a3da5..100b6bf 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2012 Linaro Limited
  * Written by Peter Maydell
+ * Save/Restore logic added by Christoffer Dall.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -23,6 +24,20 @@
 #include "kvm_arm.h"
 #include "gic_internal.h"
 
+//#define DEBUG_GIC_KVM
+
+#ifdef DEBUG_GIC_KVM
+static const int debug_gic_kvm = 1;
+#else
+static const int debug_gic_kvm = 0;
+#endif
+
+#define DPRINTF(fmt, ...) do { \
+        if (debug_gic_kvm) { \
+            printf("arm_gic: " fmt , ## __VA_ARGS__); \
+        } \
+    } while (0)
+
 #define TYPE_KVM_ARM_GIC "kvm-arm-gic"
 #define KVM_ARM_GIC(obj) \
      OBJECT_CHECK(GICState, (obj), TYPE_KVM_ARM_GIC)
@@ -72,14 +87,419 @@
     kvm_set_irq(kvm_state, kvm_irq, !!level);
 }
 
+static bool kvm_arm_gic_can_save_restore(GICState *s)
+{
+    return s->dev_fd >= 0;
+}
+
+static void kvm_gic_access(GICState *s, int group, int offset,
+                                   int cpu, uint32_t *val, bool write)
+{
+    struct kvm_device_attr attr;
+    int type;
+    int err;
+
+    cpu = cpu & 0xff;
+
+    attr.flags = 0;
+    attr.group = group;
+    attr.attr = (((uint64_t)cpu << KVM_DEV_ARM_VGIC_CPUID_SHIFT) &
+                 KVM_DEV_ARM_VGIC_CPUID_MASK) |
+                (((uint64_t)offset << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) &
+                 KVM_DEV_ARM_VGIC_OFFSET_MASK);
+    attr.addr = (uintptr_t)val;
+
+    if (write) {
+        type = KVM_SET_DEVICE_ATTR;
+    } else {
+        type = KVM_GET_DEVICE_ATTR;
+    }
+
+    err = kvm_device_ioctl(s->dev_fd, type, &attr);
+    if (err < 0) {
+        fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
+                strerror(-err));
+        abort();
+    }
+}
+
+static void kvm_gicd_access(GICState *s, int offset, int cpu,
+                            uint32_t *val, bool write)
+{
+    kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+                   offset, cpu, val, write);
+}
+
+static void kvm_gicc_access(GICState *s, int offset, int cpu,
+                            uint32_t *val, bool write)
+{
+    kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+                   offset, cpu, val, write);
+}
+
+#define for_each_irq_reg(_ctr, _max_irq, _field_width) \
+    for (_ctr = 0; _ctr < ((_max_irq) / (32 / (_field_width))); _ctr++)
+
+/*
+ * Translate from the in-kernel field for an IRQ value to/from the qemu
+ * representation.
+ */
+typedef void (*vgic_translate_fn)(GICState *s, int irq, int cpu,
+                                  uint32_t *field, bool to_kernel);
+
+/* synthetic translate function used for clear/set registers to completely
+ * clear a setting using a clear-register before setting the remaing bits
+ * using a set-register */
+static void translate_clear(GICState *s, int irq, int cpu,
+                            uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = ~0;
+    } else {
+        /* does not make sense: qemu model doesn't use set/clear regs */
+        abort();
+    }
+}
+
+static void translate_enabled(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
+
+    if (to_kernel) {
+        *field = GIC_TEST_ENABLED(irq, cm);
+    } else {
+        if (*field & 1) {
+            GIC_SET_ENABLED(irq, cm);
+        }
+    }
+}
+
+static void translate_pending(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
+
+    if (to_kernel) {
+        *field = gic_test_pending(s, irq, cm);
+    } else {
+        if (*field & 1) {
+            GIC_SET_PENDING(irq, cm);
+            /* TODO: Capture is level-line is held high in the kernel */
+        }
+    }
+}
+
+static void translate_active(GICState *s, int irq, int cpu,
+                             uint32_t *field, bool to_kernel)
+{
+    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
+
+    if (to_kernel) {
+        *field = GIC_TEST_ACTIVE(irq, cm);
+    } else {
+        if (*field & 1) {
+            GIC_SET_ACTIVE(irq, cm);
+        }
+    }
+}
+
+static void translate_trigger(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = (GIC_TEST_EDGE_TRIGGER(irq)) ? 0x2 : 0x0;
+    } else {
+        if (*field & 0x2) {
+            GIC_SET_EDGE_TRIGGER(irq);
+        }
+    }
+}
+
+static void translate_priority(GICState *s, int irq, int cpu,
+                               uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = GIC_GET_PRIORITY(irq, cpu) & 0xff;
+    } else {
+        gic_set_priority(s, cpu, irq, *field & 0xff);
+    }
+}
+
+static void translate_targets(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = s->irq_target[irq] & 0xff;
+    } else {
+        s->irq_target[irq] = *field & 0xff;
+    }
+}
+
+static void translate_sgisource(GICState *s, int irq, int cpu,
+                                uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = s->sgi_pending[irq][cpu] & 0xff;
+    } else {
+        s->sgi_pending[irq][cpu] = *field & 0xff;
+    }
+}
+
+/* Read a register group from the kernel VGIC */
+static void kvm_dist_get(GICState *s, uint32_t offset, int width,
+                         int maxirq, vgic_translate_fn translate_fn)
+{
+    uint32_t reg;
+    int i;
+    int j;
+    int irq;
+    int cpu;
+    int regsz = 32 / width; /* irqs per kernel register */
+    uint32_t field;
+
+    for_each_irq_reg(i, maxirq, width) {
+        irq = i * regsz;
+        cpu = 0;
+        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
+            kvm_gicd_access(s, offset, cpu, &reg, false);
+            for (j = 0; j < regsz; j++) {
+                field = extract32(reg, j * width, width);
+                translate_fn(s, irq + j, cpu, &field, false);
+            }
+
+            cpu++;
+        }
+        offset += 4;
+    }
+}
+
+/* Write a register group to the kernel VGIC */
+static void kvm_dist_put(GICState *s, uint32_t offset, int width,
+                         int maxirq, vgic_translate_fn translate_fn)
+{
+    uint32_t reg;
+    int i;
+    int j;
+    int irq;
+    int cpu;
+    int regsz = 32 / width; /* irqs per kernel register */
+    uint32_t field;
+
+    for_each_irq_reg(i, maxirq, width) {
+        irq = i * regsz;
+        cpu = 0;
+        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
+            reg = 0;
+            for (j = 0; j < regsz; j++) {
+                translate_fn(s, irq + j, cpu, &field, true);
+                reg = deposit32(reg, j * width, width, field);
+            }
+            kvm_gicd_access(s, offset, cpu, &reg, true);
+
+            cpu++;
+        }
+        offset += 4;
+    }
+}
+
 static void kvm_arm_gic_put(GICState *s)
 {
-    /* TODO: there isn't currently a kernel interface to set the GIC state */
+    uint32_t reg;
+    int i;
+    int cpu;
+    int num_cpu;
+    int num_irq;
+
+    if (!kvm_arm_gic_can_save_restore(s)) {
+            DPRINTF("Cannot put kernel gic state, no kernel interface");
+            return;
+    }
+
+    /* Note: We do the restore in a slightly different order than the save
+     * (where the order doesn't matter and is simply ordered according to the
+     * register offset values */
+
+    /*****************************************************************
+     * Distributor State
+     */
+
+    /* s->enabled -> GICD_CTLR */
+    reg = s->enabled;
+    kvm_gicd_access(s, 0x0, 0, &reg, true);
+
+    /* Sanity checking on GICD_TYPER and s->num_irq, s->num_cpu */
+    kvm_gicd_access(s, 0x4, 0, &reg, false);
+    num_irq = ((reg & 0x1f) + 1) * 32;
+    num_cpu = ((reg & 0xe0) >> 5) + 1;
+
+    if (num_irq < s->num_irq) {
+            fprintf(stderr, "Restoring %u IRQs, but kernel supports max %d\n",
+                    s->num_irq, num_irq);
+            abort();
+    } else if (num_cpu != s->num_cpu) {
+            fprintf(stderr, "Restoring %u CPU interfaces, kernel only has %d\n",
+                    s->num_cpu, num_cpu);
+            /* Did we not create the VCPUs in the kernel yet? */
+            abort();
+    }
+
+    /* TODO: Consider checking compatibility with the IIDR ? */
+
+    /* irq_state[n].enabled -> GICD_ISENABLERn */
+    kvm_dist_put(s, 0x180, 1, s->num_irq, translate_clear);
+    kvm_dist_put(s, 0x100, 1, s->num_irq, translate_enabled);
+
+    /* s->irq_target[irq] -> GICD_ITARGETSRn
+     * (restore targets before pending to ensure the pending state is set on
+     * the appropriate CPU interfaces in the kernel) */
+    kvm_dist_put(s, 0x800, 8, s->num_irq, translate_targets);
+
+    /* irq_state[n].pending + irq_state[n].level -> GICD_ISPENDRn */
+    kvm_dist_put(s, 0x280, 1, s->num_irq, translate_clear);
+    kvm_dist_put(s, 0x200, 1, s->num_irq, translate_pending);
+
+    /* irq_state[n].active -> GICD_ISACTIVERn */
+    kvm_dist_put(s, 0x380, 1, s->num_irq, translate_clear);
+    kvm_dist_put(s, 0x300, 1, s->num_irq, translate_active);
+
+    /* irq_state[n].trigger -> GICD_ICFRn */
+    kvm_dist_put(s, 0xc00, 2, s->num_irq, translate_trigger);
+
+    /* s->priorityX[irq] -> ICD_IPRIORITYRn */
+    kvm_dist_put(s, 0x400, 8, s->num_irq, translate_priority);
+
+    /* s->sgi_pending -> ICD_CPENDSGIRn */
+    kvm_dist_put(s, 0xf10, 8, GIC_NR_SGIS, translate_clear);
+    kvm_dist_put(s, 0xf20, 8, GIC_NR_SGIS, translate_sgisource);
+
+
+    /*****************************************************************
+     * CPU Interface(s) State
+     */
+
+    for (cpu = 0; cpu < s->num_cpu; cpu++) {
+        /* s->cpu_enabled[cpu] -> GICC_CTLR */
+        reg = s->cpu_enabled[cpu];
+        kvm_gicc_access(s, 0x00, cpu, &reg, true);
+
+        /* s->priority_mask[cpu] -> GICC_PMR */
+        reg = (s->priority_mask[cpu] & 0xff);
+        kvm_gicc_access(s, 0x04, cpu, &reg, true);
+
+        /* s->bpr[cpu] -> GICC_BPR */
+        reg = (s->bpr[cpu] & 0x7);
+        kvm_gicc_access(s, 0x08, cpu, &reg, true);
+
+        /* s->abpr[cpu] -> GICC_ABPR */
+        reg = (s->abpr[cpu] & 0x7);
+        kvm_gicc_access(s, 0x1c, cpu, &reg, true);
+
+        /* s->apr[n][cpu] -> GICC_APRn */
+        for (i = 0; i < 4; i++) {
+            reg = s->apr[i][cpu];
+            kvm_gicc_access(s, 0xd0 + i * 4, cpu, &reg, true);
+        }
+    }
 }
 
 static void kvm_arm_gic_get(GICState *s)
 {
-    /* TODO: there isn't currently a kernel interface to get the GIC state */
+    uint32_t reg;
+    int i;
+    int cpu;
+
+    if (!kvm_arm_gic_can_save_restore(s)) {
+            DPRINTF("Cannot get kernel gic state, no kernel interface");
+            return;
+    }
+
+    /*****************************************************************
+     * Distributor State
+     */
+
+    /* GICD_CTLR -> s->enabled */
+    kvm_gicd_access(s, 0x0, 0, &reg, false);
+    s->enabled = reg & 1;
+
+    /* Sanity checking on GICD_TYPER -> s->num_irq, s->num_cpu */
+    kvm_gicd_access(s, 0x4, 0, &reg, false);
+    s->num_irq = ((reg & 0x1f) + 1) * 32;
+    s->num_cpu = ((reg & 0xe0) >> 5) + 1;
+
+    if (s->num_irq > GIC_MAXIRQ) {
+            fprintf(stderr, "Too many IRQs reported from the kernel: %d\n",
+                    s->num_irq);
+            abort();
+    }
+
+    /* GICD_IIDR -> ? */
+    kvm_gicd_access(s, 0x8, 0, &reg, false);
+
+    /* Verify no GROUP 1 interrupts configured in the kernel */
+    for_each_irq_reg(i, s->num_irq, 1) {
+        kvm_gicd_access(s, 0x80 + (i * 4), 0, &reg, false);
+        if (reg != 0) {
+            fprintf(stderr, "Unsupported GICD_IGROUPRn value: %08x\n",
+                    reg);
+            abort();
+        }
+    }
+
+    /* Clear all the IRQ settings */
+    for (i = 0; i < s->num_irq; i++) {
+        memset(&s->irq_state[i], 0, sizeof(s->irq_state[0]));
+    }
+
+    /* GICD_ISENABLERn -> irq_state[n].enabled */
+    kvm_dist_get(s, 0x100, 1, s->num_irq, translate_enabled);
+
+    /* GICD_ISPENDRn -> irq_state[n].pending + irq_state[n].level */
+    kvm_dist_get(s, 0x200, 1, s->num_irq, translate_pending);
+
+    /* GICD_ISACTIVERn -> irq_state[n].active */
+    kvm_dist_get(s, 0x300, 1, s->num_irq, translate_active);
+
+    /* GICD_ICFRn -> irq_state[n].trigger */
+    kvm_dist_get(s, 0xc00, 2, s->num_irq, translate_trigger);
+
+    /* GICD_IPRIORITYRn -> s->priorityX[irq] */
+    kvm_dist_get(s, 0x400, 8, s->num_irq, translate_priority);
+
+    /* GICD_ITARGETSRn -> s->irq_target[irq] */
+    kvm_dist_get(s, 0x800, 8, s->num_irq, translate_targets);
+
+    /* GICD_CPENDSGIRn -> s->sgi_pending */
+    kvm_dist_get(s, 0xf10, 8, GIC_NR_SGIS, translate_sgisource);
+
+
+    /*****************************************************************
+     * CPU Interface(s) State
+     */
+
+    for (cpu = 0; cpu < s->num_cpu; cpu++) {
+        /* GICC_CTLR -> s->cpu_enabled[cpu] */
+        kvm_gicc_access(s, 0x00, cpu, &reg, false);
+        s->cpu_enabled[cpu] = (reg & 1);
+
+        /* GICC_PMR -> s->priority_mask[cpu] */
+        kvm_gicc_access(s, 0x04, cpu, &reg, false);
+        s->priority_mask[cpu] = (reg & 0xff);
+
+        /* GICC_BPR -> s->bpr[cpu] */
+        kvm_gicc_access(s, 0x08, cpu, &reg, false);
+        s->bpr[cpu] = (reg & 0x7);
+
+        /* GICC_ABPR -> s->abpr[cpu] */
+        kvm_gicc_access(s, 0x1c, cpu, &reg, false);
+        s->abpr[cpu] = (reg & 0x7);
+
+        /* GICC_APRn -> s->apr[n][cpu] */
+        for (i = 0; i < 4; i++) {
+            kvm_gicc_access(s, 0xd0 + i * 4, cpu, &reg, false);
+            s->apr[i][cpu] = reg;
+        }
+    }
 }
 
 static void kvm_arm_gic_reset(DeviceState *dev)
@@ -97,6 +517,7 @@
     GICState *s = KVM_ARM_GIC(dev);
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
+    int ret;
 
     kgc->parent_realize(dev, errp);
     if (error_is_set(errp)) {
@@ -119,13 +540,27 @@
     for (i = 0; i < s->num_cpu; i++) {
         sysbus_init_irq(sbd, &s->parent_irq[i]);
     }
+
+    /* Try to create the device via the device control API */
+    s->dev_fd = -1;
+    ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_ARM_VGIC_V2, false);
+    if (ret >= 0) {
+        s->dev_fd = ret;
+    } else if (ret != -ENODEV && ret != -ENOTSUP) {
+        error_setg_errno(errp, -ret, "error creating in-kernel VGIC");
+        return;
+    }
+
     /* Distributor */
     memory_region_init_reservation(&s->iomem, OBJECT(s),
                                    "kvm-gic_dist", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
     kvm_arm_register_device(&s->iomem,
                             (KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT)
-                            | KVM_VGIC_V2_ADDR_TYPE_DIST);
+                            | KVM_VGIC_V2_ADDR_TYPE_DIST,
+                            KVM_DEV_ARM_VGIC_GRP_ADDR,
+                            KVM_VGIC_V2_ADDR_TYPE_DIST,
+                            s->dev_fd);
     /* CPU interface for current core. Unlike arm_gic, we don't
      * provide the "interface for core #N" memory regions, because
      * cores with a VGIC don't have those.
@@ -135,7 +570,10 @@
     sysbus_init_mmio(sbd, &s->cpuiomem[0]);
     kvm_arm_register_device(&s->cpuiomem[0],
                             (KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT)
-                            | KVM_VGIC_V2_ADDR_TYPE_CPU);
+                            | KVM_VGIC_V2_ADDR_TYPE_CPU,
+                            KVM_DEV_ARM_VGIC_GRP_ADDR,
+                            KVM_VGIC_V2_ADDR_TYPE_CPU,
+                            s->dev_fd);
 }
 
 static void kvm_arm_gic_class_init(ObjectClass *klass, void *data)
diff --git a/hw/intc/exynos4210_combiner.c b/hw/intc/exynos4210_combiner.c
index ef5e8eb..3287479 100644
--- a/hw/intc/exynos4210_combiner.c
+++ b/hw/intc/exynos4210_combiner.c
@@ -418,7 +418,7 @@
     qdev_init_gpio_in(dev, exynos4210_combiner_handler, IIC_NIRQ);
 
     /* Connect SysBusDev irqs to device specific irqs */
-    for (i = 0; i < IIC_NIRQ; i++) {
+    for (i = 0; i < IIC_NGRP; i++) {
         sysbus_init_irq(sbd, &s->output_irq[i]);
     }
 
diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
index 92a6f7a..48a58d7 100644
--- a/hw/intc/gic_internal.h
+++ b/hw/intc/gic_internal.h
@@ -40,7 +40,7 @@
 #define GIC_SET_MODEL(irq) s->irq_state[irq].model = true
 #define GIC_CLEAR_MODEL(irq) s->irq_state[irq].model = false
 #define GIC_TEST_MODEL(irq) s->irq_state[irq].model
-#define GIC_SET_LEVEL(irq, cm) s->irq_state[irq].level = (cm)
+#define GIC_SET_LEVEL(irq, cm) s->irq_state[irq].level |= (cm)
 #define GIC_CLEAR_LEVEL(irq, cm) s->irq_state[irq].level &= ~(cm)
 #define GIC_TEST_LEVEL(irq, cm) ((s->irq_state[irq].level & (cm)) != 0)
 #define GIC_SET_EDGE_TRIGGER(irq) s->irq_state[irq].edge_trigger = true
diff --git a/hw/misc/arm_sysctl.c b/hw/misc/arm_sysctl.c
index 0fc26d2..3fad6f8 100644
--- a/hw/misc/arm_sysctl.c
+++ b/hw/misc/arm_sysctl.c
@@ -276,7 +276,7 @@
         }
         break;
     case SYS_CFG_OSC:
-        if (site == SYS_CFG_SITE_MB && device < sizeof(s->mb_clock)) {
+        if (site == SYS_CFG_SITE_MB && device < ARRAY_SIZE(s->mb_clock)) {
             /* motherboard clock */
             *val = s->mb_clock[device];
             return true;
@@ -324,7 +324,7 @@
 
     switch (function) {
     case SYS_CFG_OSC:
-        if (site == SYS_CFG_SITE_MB && device < sizeof(s->mb_clock)) {
+        if (site == SYS_CFG_SITE_MB && device < ARRAY_SIZE(s->mb_clock)) {
             /* motherboard clock */
             s->mb_clock[device] = val;
             return true;
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 8db182f..c2c688c 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -209,6 +209,29 @@
     QLIST_ENTRY(VFIOGroup) container_next;
 } VFIOGroup;
 
+typedef struct VFIORomBlacklistEntry {
+    uint16_t vendor_id;
+    uint16_t device_id;
+} VFIORomBlacklistEntry;
+
+/*
+ * List of device ids/vendor ids for which to disable
+ * option rom loading. This avoids the guest hangs during rom
+ * execution as noticed with the BCM 57810 card for lack of a
+ * more better way to handle such issues.
+ * The  user can still override by specifying a romfile or
+ * rombar=1.
+ * Please see https://bugs.launchpad.net/qemu/+bug/1284874
+ * for an analysis of the 57810 card hang. When adding
+ * a new vendor id/device id combination below, please also add
+ * your card/environment details and information that could
+ * help in debugging to the bug tracking this issue
+ */
+static const VFIORomBlacklistEntry romblacklist[] = {
+    /* Broadcom BCM 57810 */
+    { 0x14e4, 0x168e }
+};
+
 #define MSIX_CAP_LENGTH 12
 
 static QLIST_HEAD(, VFIOContainer)
@@ -1197,13 +1220,43 @@
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static bool vfio_blacklist_opt_rom(VFIODevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint16_t vendor_id, device_id;
+    int count = 0;
+
+    vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
+    device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
+
+    while (count < ARRAY_SIZE(romblacklist)) {
+        if (romblacklist[count].vendor_id == vendor_id &&
+            romblacklist[count].device_id == device_id) {
+                return true;
+        }
+        count++;
+    }
+
+    return false;
+}
+
 static void vfio_pci_size_rom(VFIODevice *vdev)
 {
     uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
     off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
+    DeviceState *dev = DEVICE(vdev);
     char name[32];
 
     if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
+        /* Since pci handles romfile, just print a message and return */
+        if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
+            error_printf("Warning : Device at %04x:%02x:%02x.%x "
+                         "is known to cause system instability issues during "
+                         "option rom execution. "
+                         "Proceeding anyway since user specified romfile\n",
+                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                         vdev->host.function);
+        }
         return;
     }
 
@@ -1227,6 +1280,26 @@
         return;
     }
 
+    if (vfio_blacklist_opt_rom(vdev)) {
+        if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
+            error_printf("Warning : Device at %04x:%02x:%02x.%x "
+                         "is known to cause system instability issues during "
+                         "option rom execution. "
+                         "Proceeding anyway since user specified non zero value for "
+                         "rombar\n",
+                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                         vdev->host.function);
+        } else {
+            error_printf("Warning : Rom loading for device at "
+                         "%04x:%02x:%02x.%x has been disabled due to "
+                         "system instability issues. "
+                         "Specify rombar=1 or romfile to force\n",
+                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                         vdev->host.function);
+            return;
+        }
+    }
+
     DPRINTF("%04x:%02x:%02x.%x ROM size 0x%x\n", vdev->host.domain,
             vdev->host.bus, vdev->host.slot, vdev->host.function, size);
 
@@ -3681,10 +3754,10 @@
 
     strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
 
-    len = readlink(path, iommu_group_path, PATH_MAX);
-    if (len <= 0) {
+    len = readlink(path, iommu_group_path, sizeof(path));
+    if (len <= 0 || len >= sizeof(path)) {
         error_report("vfio: error no iommu_group for device");
-        return -errno;
+        return len < 0 ? -errno : ENAMETOOLONG;
     }
 
     iommu_group_path[len] = 0;
diff --git a/hw/net/stellaris_enet.c b/hw/net/stellaris_enet.c
index 9dd77f7..d04e6a4 100644
--- a/hw/net/stellaris_enet.c
+++ b/hw/net/stellaris_enet.c
@@ -176,7 +176,8 @@
         return val;
     case 0x14: /* IA0 */
         return s->conf.macaddr.a[0] | (s->conf.macaddr.a[1] << 8)
-               | (s->conf.macaddr.a[2] << 16) | (s->conf.macaddr.a[3] << 24);
+            | (s->conf.macaddr.a[2] << 16)
+            | ((uint32_t)s->conf.macaddr.a[3] << 24);
     case 0x18: /* IA1 */
         return s->conf.macaddr.a[4] | (s->conf.macaddr.a[5] << 8);
     case 0x1c: /* THR */
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 854997d..a1de2f4 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -106,7 +106,7 @@
         goto fail;
     }
     net->nc = backend;
-    net->dev.backend_features = tap_has_vnet_hdr(backend) ? 0 :
+    net->dev.backend_features = qemu_has_vnet_hdr(backend) ? 0 :
         (1 << VHOST_NET_F_VIRTIO_NET_HDR);
     net->backend = r;
 
@@ -117,8 +117,8 @@
     if (r < 0) {
         goto fail;
     }
-    if (!tap_has_vnet_hdr_len(backend,
-                              sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
+    if (!qemu_has_vnet_hdr_len(backend,
+                               sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
         net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
     }
     if (~net->dev.features & net->dev.backend_features) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 3626608..3c0342e 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -325,11 +325,7 @@
         return;
     }
 
-    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
-        return;
-    }
-
-    n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
+    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 }
 
 static int peer_has_vnet_hdr(VirtIONet *n)
@@ -342,7 +338,7 @@
     if (!peer_has_vnet_hdr(n))
         return 0;
 
-    n->has_ufo = tap_has_ufo(qemu_get_queue(n->nic)->peer);
+    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 
     return n->has_ufo;
 }
@@ -361,8 +357,8 @@
         nc = qemu_get_subqueue(n->nic, i);
 
         if (peer_has_vnet_hdr(n) &&
-            tap_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
-            tap_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
+            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
+            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
             n->host_hdr_len = n->guest_hdr_len;
         }
     }
@@ -463,7 +459,7 @@
 
 static void virtio_net_apply_guest_offloads(VirtIONet *n)
 {
-    tap_set_offload(qemu_get_subqueue(n->nic, 0)->peer,
+    qemu_set_offload(qemu_get_queue(n->nic)->peer,
             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
@@ -1544,7 +1540,7 @@
     peer_test_vnet_hdr(n);
     if (peer_has_vnet_hdr(n)) {
         for (i = 0; i < n->max_queues; i++) {
-            tap_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
+            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
         }
         n->host_hdr_len = sizeof(struct virtio_net_hdr);
     } else {
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 19687aa..5be807c 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1290,12 +1290,12 @@
               s->lro_supported, rxcso_supported,
               s->rx_vlan_stripping);
     if (s->peer_has_vhdr) {
-        tap_set_offload(qemu_get_queue(s->nic)->peer,
-                        rxcso_supported,
-                        s->lro_supported,
-                        s->lro_supported,
-                        0,
-                        0);
+        qemu_set_offload(qemu_get_queue(s->nic)->peer,
+                         rxcso_supported,
+                         s->lro_supported,
+                         s->lro_supported,
+                         0,
+                         0);
     }
 }
 
@@ -1883,11 +1883,9 @@
 
 static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s)
 {
-    NetClientState *peer = qemu_get_queue(s->nic)->peer;
+    NetClientState *nc = qemu_get_queue(s->nic);
 
-    if ((NULL != peer)                              &&
-        (peer->info->type == NET_CLIENT_OPTIONS_KIND_TAP)   &&
-        tap_has_vnet_hdr(peer)) {
+    if (qemu_has_vnet_hdr(nc->peer)) {
         return true;
     }
 
@@ -1935,10 +1933,10 @@
     s->lro_supported = false;
 
     if (s->peer_has_vhdr) {
-        tap_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
+        qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
             sizeof(struct virtio_net_hdr));
 
-        tap_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1);
+        qemu_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1);
     }
 
     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index 50b89ad..50a0acf 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -909,7 +909,7 @@
     case VERIFY_16:
         if ((buf[1] & 2) == 0) {
             cmd->xfer = 0;
-        } else if ((buf[1] & 4) == 1) {
+        } else if ((buf[1] & 4) != 0) {
             cmd->xfer = 1;
         }
         cmd->xfer *= dev->blocksize;
@@ -1367,6 +1367,11 @@
     .key = DATA_PROTECT, .asc = 0x27, .ascq = 0x00
 };
 
+/* Data Protection, Space Allocation Failed Write Protect */
+const struct SCSISense sense_code_SPACE_ALLOC_FAILED = {
+    .key = DATA_PROTECT, .asc = 0x27, .ascq = 0x07
+};
+
 /*
  * scsi_build_sense
  *
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index b4fadd2..48a28ae 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -75,6 +75,8 @@
     bool media_event;
     bool eject_request;
     uint64_t wwn;
+    uint64_t port_wwn;
+    uint16_t port_index;
     uint64_t max_unmap_size;
     QEMUBH *bh;
     char *version;
@@ -428,6 +430,9 @@
         case EINVAL:
             scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
             break;
+        case ENOSPC:
+            scsi_check_condition(r, SENSE_CODE(SPACE_ALLOC_FAILED));
+            break;
         default:
             scsi_check_condition(r, SENSE_CODE(IO_ERROR));
             break;
@@ -617,6 +622,24 @@
                 stq_be_p(&outbuf[buflen], s->wwn);
                 buflen += 8;
             }
+
+            if (s->port_wwn) {
+                outbuf[buflen++] = 0x61; // SAS / Binary
+                outbuf[buflen++] = 0x93; // PIV / Target port / NAA
+                outbuf[buflen++] = 0;    // reserved
+                outbuf[buflen++] = 8;
+                stq_be_p(&outbuf[buflen], s->port_wwn);
+                buflen += 8;
+            }
+
+            if (s->port_index) {
+                outbuf[buflen++] = 0x61; // SAS / Binary
+                outbuf[buflen++] = 0x94; // PIV / Target port / relative target port
+                outbuf[buflen++] = 0;    // reserved
+                outbuf[buflen++] = 4;
+                stw_be_p(&outbuf[buflen + 2], s->port_index);
+                buflen += 4;
+            }
             break;
         }
         case 0xb0: /* block limits */
@@ -2536,6 +2559,8 @@
     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
                     SCSI_DISK_F_DPOFUA, false),
     DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0),
+    DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0),
+    DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
                        DEFAULT_MAX_UNMAP_SIZE),
     DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf),
@@ -2584,6 +2609,8 @@
 static Property scsi_cd_properties[] = {
     DEFINE_SCSI_DISK_PROPERTIES(),
     DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0),
+    DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0),
+    DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -2647,6 +2674,8 @@
     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
                     SCSI_DISK_F_DPOFUA, false),
     DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0),
+    DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0),
+    DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
                        DEFAULT_MAX_UNMAP_SIZE),
     DEFINE_PROP_END_OF_LIST(),
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index f08b64e..8d92e0d 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -37,8 +37,6 @@
 #include <scsi/sg.h>
 #include "block/scsi.h"
 
-#define SCSI_SENSE_BUF_SIZE 96
-
 #define SG_ERR_DRIVER_TIMEOUT  0x06
 #define SG_ERR_DRIVER_SENSE    0x08
 
diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c
index c0c46d7..e8bca39 100644
--- a/hw/scsi/spapr_vscsi.c
+++ b/hw/scsi/spapr_vscsi.c
@@ -60,7 +60,6 @@
 #define VSCSI_MAX_SECTORS       4096
 #define VSCSI_REQ_LIMIT         24
 
-#define SCSI_SENSE_BUF_SIZE     96
 #define SRP_RSP_SENSE_DATA_LEN  18
 
 typedef union vscsi_crq {
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 2957d90..75adb68 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "hw/sysbus.h"
+#include "qemu/error-report.h"
 #include "qemu/timer.h"
 #include "hw/sparc/sun4m.h"
 #include "hw/timer/m48t59.h"
@@ -561,6 +562,31 @@
     }
 }
 
+static void cg3_init(hwaddr addr, qemu_irq irq, int vram_size, int width,
+                     int height, int depth)
+{
+    DeviceState *dev;
+    SysBusDevice *s;
+
+    dev = qdev_create(NULL, "cgthree");
+    qdev_prop_set_uint32(dev, "vram-size", vram_size);
+    qdev_prop_set_uint16(dev, "width", width);
+    qdev_prop_set_uint16(dev, "height", height);
+    qdev_prop_set_uint16(dev, "depth", depth);
+    qdev_prop_set_uint64(dev, "prom-addr", addr);
+    qdev_init_nofail(dev);
+    s = SYS_BUS_DEVICE(dev);
+
+    /* FCode ROM */
+    sysbus_mmio_map(s, 0, addr);
+    /* DAC */
+    sysbus_mmio_map(s, 1, addr + 0x400000ULL);
+    /* 8-bit plane */
+    sysbus_mmio_map(s, 2, addr + 0x800000ULL);
+
+    sysbus_connect_irq(s, 0, irq);
+}
+
 /* NCR89C100/MACIO Internal ID register */
 
 #define TYPE_MACIO_ID_REGISTER "macio_idreg"
@@ -914,13 +940,43 @@
                              slavio_irq[16], iommu, &ledma_irq, 1);
 
     if (graphic_depth != 8 && graphic_depth != 24) {
-        fprintf(stderr, "qemu: Unsupported depth: %d\n", graphic_depth);
+        error_report("Unsupported depth: %d", graphic_depth);
         exit (1);
     }
     num_vsimms = 0;
     if (num_vsimms == 0) {
-        tcx_init(hwdef->tcx_base, 0x00100000, graphic_width, graphic_height,
-                 graphic_depth);
+        if (vga_interface_type == VGA_CG3) {
+            if (graphic_depth != 8) {
+                error_report("Unsupported depth: %d", graphic_depth);
+                exit(1);
+            }
+
+            if (!(graphic_width == 1024 && graphic_height == 768) &&
+                !(graphic_width == 1152 && graphic_height == 900)) {
+                error_report("Unsupported resolution: %d x %d", graphic_width,
+                             graphic_height);
+                exit(1);
+            }
+
+            /* sbus irq 5 */
+            cg3_init(hwdef->tcx_base, slavio_irq[11], 0x00100000,
+                     graphic_width, graphic_height, graphic_depth);
+        } else {
+            /* If no display specified, default to TCX */
+            if (graphic_depth != 8 && graphic_depth != 24) {
+                error_report("Unsupported depth: %d", graphic_depth);
+                exit(1);
+            }
+
+            if (!(graphic_width == 1024 && graphic_height == 768)) {
+                error_report("Unsupported resolution: %d x %d",
+                             graphic_width, graphic_height);
+                exit(1);
+            }
+
+            tcx_init(hwdef->tcx_base, 0x00100000, graphic_width, graphic_height,
+                     graphic_depth);
+        }
     }
 
     for (i = num_vsimms; i < MAX_VSIMMS; i++) {
diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c
index a47afde..fb0a45c 100644
--- a/hw/timer/arm_timer.c
+++ b/hw/timer/arm_timer.c
@@ -320,6 +320,7 @@
     n = offset >> 8;
     if (n > 2) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad timer %d\n", __func__, n);
+        return 0;
     }
 
     return arm_timer_read(s->timer[n], offset & 0xff);
@@ -334,6 +335,7 @@
     n = offset >> 8;
     if (n > 2) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad timer %d\n", __func__, n);
+        return;
     }
 
     arm_timer_write(s->timer[n], offset & 0xff, value);
diff --git a/hw/timer/slavio_timer.c b/hw/timer/slavio_timer.c
index f75b914..e4dccea 100644
--- a/hw/timer/slavio_timer.c
+++ b/hw/timer/slavio_timer.c
@@ -51,7 +51,7 @@
     ptimer_state *timer;
     uint32_t count, counthigh, reached;
     /* processor only */
-    uint32_t running;
+    uint32_t run;
     uint64_t limit;
 } CPUTimerState;
 
@@ -177,7 +177,7 @@
         // only available in processor counter/timer
         // read start/stop status
         if (timer_index > 0) {
-            ret = t->running;
+            ret = t->run;
         } else {
             ret = 0;
         }
@@ -260,16 +260,15 @@
     case TIMER_STATUS:
         if (slavio_timer_is_user(tc)) {
             // start/stop user counter
-            if ((val & 1) && !t->running) {
+            if (val & 1) {
                 trace_slavio_timer_mem_writel_status_start(timer_index);
                 ptimer_run(t->timer, 0);
-                t->running = 1;
-            } else if (!(val & 1) && t->running) {
+            } else {
                 trace_slavio_timer_mem_writel_status_stop(timer_index);
                 ptimer_stop(t->timer);
-                t->running = 0;
             }
         }
+        t->run = val & 1;
         break;
     case TIMER_MODE:
         if (timer_index == 0) {
@@ -284,8 +283,9 @@
                     if (val & processor) { // counter -> user timer
                         qemu_irq_lower(curr_timer->irq);
                         // counters are always running
-                        ptimer_stop(curr_timer->timer);
-                        curr_timer->running = 0;
+                        if (!curr_timer->run) {
+                            ptimer_stop(curr_timer->timer);
+                        }
                         // user timer limit is always the same
                         curr_timer->limit = TIMER_MAX_COUNT64;
                         ptimer_set_limit(curr_timer->timer,
@@ -296,13 +296,8 @@
                         s->cputimer_mode |= processor;
                         trace_slavio_timer_mem_writel_mode_user(timer_index);
                     } else { // user timer -> counter
-                        // stop the user timer if it is running
-                        if (curr_timer->running) {
-                            ptimer_stop(curr_timer->timer);
-                        }
                         // start the counter
                         ptimer_run(curr_timer->timer, 0);
-                        curr_timer->running = 1;
                         // clear this processors user timer bit in config
                         // register
                         s->cputimer_mode &= ~processor;
@@ -340,7 +335,7 @@
         VMSTATE_UINT32(count, CPUTimerState),
         VMSTATE_UINT32(counthigh, CPUTimerState),
         VMSTATE_UINT32(reached, CPUTimerState),
-        VMSTATE_UINT32(running, CPUTimerState),
+        VMSTATE_UINT32(run    , CPUTimerState),
         VMSTATE_PTIMER(timer, CPUTimerState),
         VMSTATE_END_OF_LIST()
     }
@@ -373,7 +368,7 @@
             ptimer_set_limit(curr_timer->timer,
                              LIMIT_TO_PERIODS(TIMER_MAX_COUNT32), 1);
             ptimer_run(curr_timer->timer, 0);
-            curr_timer->running = 1;
+            curr_timer->run = 1;
         }
     }
     s->cputimer_mode = 0;
diff --git a/include/hw/intc/arm_gic_common.h b/include/hw/intc/arm_gic_common.h
index 89384c2..f6887ed 100644
--- a/include/hw/intc/arm_gic_common.h
+++ b/include/hw/intc/arm_gic_common.h
@@ -104,6 +104,7 @@
     MemoryRegion cpuiomem[GIC_NCPU + 1]; /* CPU interfaces */
     uint32_t num_irq;
     uint32_t revision;
+    int dev_fd; /* kvm device fd if backed by kvm vgic support */
 } GICState;
 
 #define TYPE_ARM_GIC_COMMON "arm_gic_common"
diff --git a/include/hw/nvram/openbios_firmware_abi.h b/include/hw/nvram/openbios_firmware_abi.h
index 5e6e5d4..c66ee22 100644
--- a/include/hw/nvram/openbios_firmware_abi.h
+++ b/include/hw/nvram/openbios_firmware_abi.h
@@ -62,6 +62,8 @@
     header->type = 1;
     header->machine_id = machine_id & 0xff;
     memcpy(&header->macaddr, macaddr, 6);
+    memcpy(&header->hostid , &macaddr[3], 3);
+
     /* Calculate checksum */
     tmp = 0;
     tmpptr = (uint8_t *)header;
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index bf6da3d..e5fc39d 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -31,7 +31,7 @@
     uint8_t ascq;
 } SCSISense;
 
-#define SCSI_SENSE_BUF_SIZE 96
+#define SCSI_SENSE_BUF_SIZE 252
 
 struct SCSICommand {
     uint8_t buf[SCSI_CMD_BUF_SIZE];
@@ -223,6 +223,8 @@
 extern const struct SCSISense sense_code_DEVICE_INTERNAL_RESET;
 /* Data Protection, Write Protected */
 extern const struct SCSISense sense_code_WRITE_PROTECTED;
+/* Data Protection, Space Allocation Failed Write Protect */
+extern const struct SCSISense sense_code_SPACE_ALLOC_FAILED;
 
 #define SENSE_CODE(x) sense_code_ ## x
 
diff --git a/include/migration/page_cache.h b/include/migration/page_cache.h
index d156f0d..2d5ce2d 100644
--- a/include/migration/page_cache.h
+++ b/include/migration/page_cache.h
@@ -66,7 +66,7 @@
  * @addr: page address
  * @pdata: pointer to the page
  */
-int cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata);
+int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata);
 
 /**
  * cache_resize: resize the page cache. In case of size reduction the extra
diff --git a/include/net/net.h b/include/net/net.h
index 11e1468..8166345 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -50,6 +50,12 @@
 typedef void (LinkStatusChanged)(NetClientState *);
 typedef void (NetClientDestructor)(NetClientState *);
 typedef RxFilterInfo *(QueryRxFilter)(NetClientState *);
+typedef bool (HasUfo)(NetClientState *);
+typedef bool (HasVnetHdr)(NetClientState *);
+typedef bool (HasVnetHdrLen)(NetClientState *, int);
+typedef void (UsingVnetHdr)(NetClientState *, bool);
+typedef void (SetOffload)(NetClientState *, int, int, int, int, int);
+typedef void (SetVnetHdrLen)(NetClientState *, int);
 
 typedef struct NetClientInfo {
     NetClientOptionsKind type;
@@ -62,6 +68,12 @@
     LinkStatusChanged *link_status_changed;
     QueryRxFilter *query_rx_filter;
     NetPoll *poll;
+    HasUfo *has_ufo;
+    HasVnetHdr *has_vnet_hdr;
+    HasVnetHdrLen *has_vnet_hdr_len;
+    UsingVnetHdr *using_vnet_hdr;
+    SetOffload *set_offload;
+    SetVnetHdrLen *set_vnet_hdr_len;
 } NetClientInfo;
 
 struct NetClientState {
@@ -120,6 +132,13 @@
 void qemu_purge_queued_packets(NetClientState *nc);
 void qemu_flush_queued_packets(NetClientState *nc);
 void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]);
+bool qemu_has_ufo(NetClientState *nc);
+bool qemu_has_vnet_hdr(NetClientState *nc);
+bool qemu_has_vnet_hdr_len(NetClientState *nc, int len);
+void qemu_using_vnet_hdr(NetClientState *nc, bool enable);
+void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
+                      int ecn, int ufo);
+void qemu_set_vnet_hdr_len(NetClientState *nc, int len);
 void qemu_macaddr_default_if_unset(MACAddr *macaddr);
 int qemu_show_nic_models(const char *arg, const char *const *models);
 void qemu_check_nic_model(NICInfo *nd, const char *model);
diff --git a/include/net/tap.h b/include/net/tap.h
index a994f20..6daeb42 100644
--- a/include/net/tap.h
+++ b/include/net/tap.h
@@ -29,12 +29,6 @@
 #include "qemu-common.h"
 #include "qapi-types.h"
 
-bool tap_has_ufo(NetClientState *nc);
-int tap_has_vnet_hdr(NetClientState *nc);
-int tap_has_vnet_hdr_len(NetClientState *nc, int len);
-void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr);
-void tap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, int ecn, int ufo);
-void tap_set_vnet_hdr_len(NetClientState *nc, int len);
 int tap_enable(NetClientState *nc);
 int tap_disable(NetClientState *nc);
 
diff --git a/include/qemu/crc32c.h b/include/qemu/crc32c.h
index 56d1c3b..dafb6a1 100644
--- a/include/qemu/crc32c.h
+++ b/include/qemu/crc32c.h
@@ -25,8 +25,8 @@
  *
  */
 
-#ifndef QEMU_CRC32_H
-#define QEMU_CRC32_H
+#ifndef QEMU_CRC32C_H
+#define QEMU_CRC32C_H
 
 #include "qemu-common.h"
 
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 3b25f27..a02d67c 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -194,6 +194,28 @@
 
 int kvm_vcpu_ioctl(CPUState *cpu, int type, ...);
 
+/**
+ * kvm_device_ioctl - call an ioctl on a kvm device
+ * @fd: The KVM device file descriptor as returned from KVM_CREATE_DEVICE
+ * @type: The device-ctrl ioctl number
+ *
+ * Returns: -errno on error, nonnegative on success
+ */
+int kvm_device_ioctl(int fd, int type, ...);
+
+/**
+ * kvm_create_device - create a KVM device for the device control API
+ * @KVMState: The KVMState pointer
+ * @type: The KVM device type (see Documentation/virtual/kvm/devices in the
+ *        kernel source)
+ * @test: If true, only test if device can be created, but don't actually
+ *        create the device.
+ *
+ * Returns: -errno on error, nonnegative on success: @test ? 0 : device fd;
+ */
+int kvm_create_device(KVMState *s, uint64_t type, bool test);
+
+
 /* Arch specific hooks */
 
 extern const KVMCapabilityInfo kvm_arch_required_capabilities[];
@@ -319,4 +341,16 @@
 void kvm_pc_gsi_handler(void *opaque, int n, int level);
 void kvm_pc_setup_irq_routing(bool pci_enabled);
 void kvm_init_irq_routing(KVMState *s);
+
+/**
+ * kvm_arch_irqchip_create:
+ * @KVMState: The KVMState pointer
+ *
+ * Allow architectures to create an in-kernel irq chip themselves.
+ *
+ * Returns: < 0: error
+ *            0: irq chip was not created
+ *          > 0: irq chip was created
+ */
+int kvm_arch_irqchip_create(KVMState *s);
 #endif
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 495dae8..b90df9a 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -104,6 +104,7 @@
 
 typedef enum {
     VGA_NONE, VGA_STD, VGA_CIRRUS, VGA_VMWARE, VGA_XENFB, VGA_QXL,
+    VGA_TCX, VGA_CG3,
 } VGAInterfaceType;
 
 extern int vga_interface_type;
diff --git a/kvm-all.c b/kvm-all.c
index 2ca9143..fd8157a 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1298,10 +1298,17 @@
         return 0;
     }
 
-    ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
+    /* First probe and see if there's a arch-specific hook to create the
+     * in-kernel irqchip for us */
+    ret = kvm_arch_irqchip_create(s);
     if (ret < 0) {
-        fprintf(stderr, "Create kernel irqchip failed\n");
         return ret;
+    } else if (ret == 0) {
+        ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
+        if (ret < 0) {
+            fprintf(stderr, "Create kernel irqchip failed\n");
+            return ret;
+        }
     }
 
     kvm_kernel_irqchip = true;
@@ -1428,7 +1435,7 @@
     } while (ret == -EINTR);
 
     if (ret < 0) {
-        fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %d %s\n", -s->vmfd,
+        fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %d %s\n", -ret,
                 strerror(-ret));
 
 #ifdef TARGET_S390X
@@ -1777,6 +1784,24 @@
     return ret;
 }
 
+int kvm_device_ioctl(int fd, int type, ...)
+{
+    int ret;
+    void *arg;
+    va_list ap;
+
+    va_start(ap, type);
+    arg = va_arg(ap, void *);
+    va_end(ap);
+
+    trace_kvm_device_ioctl(fd, type, arg);
+    ret = ioctl(fd, type, arg);
+    if (ret == -1) {
+        ret = -errno;
+    }
+    return ret;
+}
+
 int kvm_has_sync_mmu(void)
 {
     return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
@@ -2058,3 +2083,24 @@
 {
     return kvm_arch_on_sigbus(code, addr);
 }
+
+int kvm_create_device(KVMState *s, uint64_t type, bool test)
+{
+    int ret;
+    struct kvm_create_device create_dev;
+
+    create_dev.type = type;
+    create_dev.fd = -1;
+    create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
+
+    if (!kvm_check_extension(s, KVM_CAP_DEVICE_CTRL)) {
+        return -ENOTSUP;
+    }
+
+    ret = kvm_vm_ioctl(s, KVM_CREATE_DEVICE, &create_dev);
+    if (ret) {
+        return ret;
+    }
+
+    return test ? 0 : create_dev.fd;
+}
diff --git a/migration-rdma.c b/migration-rdma.c
index f94f3b4..eeb4302 100644
--- a/migration-rdma.c
+++ b/migration-rdma.c
@@ -3412,7 +3412,7 @@
     }
 
     ret = qemu_rdma_source_init(rdma, &local_err,
-        s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL]);
+        s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]);
 
     if (ret) {
         goto err;
diff --git a/migration.c b/migration.c
index 25add6f..14235b2 100644
--- a/migration.c
+++ b/migration.c
@@ -82,7 +82,7 @@
     if (strstart(uri, "tcp:", &p))
         tcp_start_incoming_migration(p, errp);
 #ifdef CONFIG_RDMA
-    else if (strstart(uri, "x-rdma:", &p))
+    else if (strstart(uri, "rdma:", &p))
         rdma_start_incoming_migration(p, errp);
 #endif
 #if !defined(WIN32)
@@ -438,7 +438,7 @@
     if (strstart(uri, "tcp:", &p)) {
         tcp_start_outgoing_migration(s, p, &local_err);
 #ifdef CONFIG_RDMA
-    } else if (strstart(uri, "x-rdma:", &p)) {
+    } else if (strstart(uri, "rdma:", &p)) {
         rdma_start_outgoing_migration(s, p, &local_err);
 #endif
 #if !defined(WIN32)
@@ -532,7 +532,7 @@
 
     s = migrate_get_current();
 
-    return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL];
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL];
 }
 
 bool migrate_auto_converge(void)
diff --git a/net/net.c b/net/net.c
index 41b3883..e3ef1e4 100644
--- a/net/net.c
+++ b/net/net.c
@@ -378,6 +378,61 @@
     }
 }
 
+bool qemu_has_ufo(NetClientState *nc)
+{
+    if (!nc || !nc->info->has_ufo) {
+        return false;
+    }
+
+    return nc->info->has_ufo(nc);
+}
+
+bool qemu_has_vnet_hdr(NetClientState *nc)
+{
+    if (!nc || !nc->info->has_vnet_hdr) {
+        return false;
+    }
+
+    return nc->info->has_vnet_hdr(nc);
+}
+
+bool qemu_has_vnet_hdr_len(NetClientState *nc, int len)
+{
+    if (!nc || !nc->info->has_vnet_hdr_len) {
+        return false;
+    }
+
+    return nc->info->has_vnet_hdr_len(nc, len);
+}
+
+void qemu_using_vnet_hdr(NetClientState *nc, bool enable)
+{
+    if (!nc || !nc->info->using_vnet_hdr) {
+        return;
+    }
+
+    nc->info->using_vnet_hdr(nc, enable);
+}
+
+void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
+                          int ecn, int ufo)
+{
+    if (!nc || !nc->info->set_offload) {
+        return;
+    }
+
+    nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo);
+}
+
+void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
+{
+    if (!nc || !nc->info->set_vnet_hdr_len) {
+        return;
+    }
+
+    nc->info->set_vnet_hdr_len(nc, len);
+}
+
 int qemu_can_send_packet(NetClientState *sender)
 {
     if (!sender->peer) {
diff --git a/net/netmap.c b/net/netmap.c
index 0ccc497..8213304 100644
--- a/net/netmap.c
+++ b/net/netmap.c
@@ -27,10 +27,13 @@
 #include <net/if.h>
 #include <sys/mman.h>
 #include <stdint.h>
+#include <stdio.h>
+#define NETMAP_WITH_LIBS
 #include <net/netmap.h>
 #include <net/netmap_user.h>
 
 #include "net/net.h"
+#include "net/tap.h"
 #include "clients.h"
 #include "sysemu/sysemu.h"
 #include "qemu/error-report.h"
@@ -54,33 +57,9 @@
     bool                read_poll;
     bool                write_poll;
     struct iovec        iov[IOV_MAX];
+    int                 vnet_hdr_len;  /* Current virtio-net header length. */
 } NetmapState;
 
-#define D(format, ...)                                          \
-    do {                                                        \
-        struct timeval __xxts;                                  \
-        gettimeofday(&__xxts, NULL);                            \
-        printf("%03d.%06d %s [%d] " format "\n",                \
-                (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
-                __func__, __LINE__, ##__VA_ARGS__);         \
-    } while (0)
-
-/* Rate limited version of "D", lps indicates how many per second */
-#define RD(lps, format, ...)                                    \
-    do {                                                        \
-        static int t0, __cnt;                                   \
-        struct timeval __xxts;                                  \
-        gettimeofday(&__xxts, NULL);                            \
-        if (t0 != __xxts.tv_sec) {                              \
-            t0 = __xxts.tv_sec;                                 \
-            __cnt = 0;                                          \
-        }                                                       \
-        if (__cnt++ < lps) {                                    \
-            D(format, ##__VA_ARGS__);                           \
-        }                                                       \
-    } while (0)
-
-
 #ifndef __FreeBSD__
 #define pkt_copy bcopy
 #else
@@ -237,7 +216,7 @@
         return size;
     }
 
-    if (ring->avail == 0) {
+    if (nm_ring_empty(ring)) {
         /* No available slots in the netmap TX ring. */
         netmap_write_poll(s, true);
         return 0;
@@ -250,8 +229,7 @@
     ring->slot[i].len = size;
     ring->slot[i].flags = 0;
     pkt_copy(buf, dst, size);
-    ring->cur = NETMAP_RING_NEXT(ring, i);
-    ring->avail--;
+    ring->cur = ring->head = nm_ring_next(ring, i);
     ioctl(s->me.fd, NIOCTXSYNC, NULL);
 
     return size;
@@ -267,17 +245,15 @@
     uint8_t *dst;
     int j;
     uint32_t i;
-    uint32_t avail;
 
     if (unlikely(!ring)) {
         /* Drop the packet. */
         return iov_size(iov, iovcnt);
     }
 
-    i = ring->cur;
-    avail = ring->avail;
+    last = i = ring->cur;
 
-    if (avail < iovcnt) {
+    if (nm_ring_space(ring) < iovcnt) {
         /* Not enough netmap slots. */
         netmap_write_poll(s, true);
         return 0;
@@ -293,7 +269,7 @@
         while (iov_frag_size) {
             nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size);
 
-            if (unlikely(avail == 0)) {
+            if (unlikely(nm_ring_empty(ring))) {
                 /* We run out of netmap slots while splitting the
                    iovec fragments. */
                 netmap_write_poll(s, true);
@@ -308,8 +284,7 @@
             pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size);
 
             last = i;
-            i = NETMAP_RING_NEXT(ring, i);
-            avail--;
+            i = nm_ring_next(ring, i);
 
             offset += nm_frag_size;
             iov_frag_size -= nm_frag_size;
@@ -318,9 +293,8 @@
     /* The last slot must not have NS_MOREFRAG set. */
     ring->slot[last].flags &= ~NS_MOREFRAG;
 
-    /* Now update ring->cur and ring->avail. */
-    ring->cur = i;
-    ring->avail = avail;
+    /* Now update ring->cur and ring->head. */
+    ring->cur = ring->head = i;
 
     ioctl(s->me.fd, NIOCTXSYNC, NULL);
 
@@ -343,7 +317,7 @@
 
     /* Keep sending while there are available packets into the netmap
        RX ring and the forwarding path towards the peer is open. */
-    while (ring->avail > 0 && qemu_can_send_packet(&s->nc)) {
+    while (!nm_ring_empty(ring) && qemu_can_send_packet(&s->nc)) {
         uint32_t i;
         uint32_t idx;
         bool morefrag;
@@ -358,11 +332,10 @@
             s->iov[iovcnt].iov_len = ring->slot[i].len;
             iovcnt++;
 
-            ring->cur = NETMAP_RING_NEXT(ring, i);
-            ring->avail--;
-        } while (ring->avail && morefrag);
+            ring->cur = ring->head = nm_ring_next(ring, i);
+        } while (!nm_ring_empty(ring) && morefrag);
 
-        if (unlikely(!ring->avail && morefrag)) {
+        if (unlikely(nm_ring_empty(ring) && morefrag)) {
             RD(5, "[netmap_send] ran out of slots, with a pending"
                    "incomplete packet\n");
         }
@@ -394,6 +367,63 @@
     s->me.fd = -1;
 }
 
+/* Offloading manipulation support callbacks. */
+static bool netmap_has_ufo(NetClientState *nc)
+{
+    return true;
+}
+
+static bool netmap_has_vnet_hdr(NetClientState *nc)
+{
+    return true;
+}
+
+static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len)
+{
+    return len == 0 || len == sizeof(struct virtio_net_hdr) ||
+                len == sizeof(struct virtio_net_hdr_mrg_rxbuf);
+}
+
+static void netmap_using_vnet_hdr(NetClientState *nc, bool enable)
+{
+}
+
+static void netmap_set_vnet_hdr_len(NetClientState *nc, int len)
+{
+    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
+    int err;
+    struct nmreq req;
+
+    /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header
+     * length for the netmap adapter associated to 'me->ifname'.
+     */
+    memset(&req, 0, sizeof(req));
+    pstrcpy(req.nr_name, sizeof(req.nr_name), s->me.ifname);
+    req.nr_version = NETMAP_API;
+    req.nr_cmd = NETMAP_BDG_VNET_HDR;
+    req.nr_arg1 = len;
+    err = ioctl(s->me.fd, NIOCREGIF, &req);
+    if (err) {
+        error_report("Unable to execute NETMAP_BDG_VNET_HDR on %s: %s",
+                     s->me.ifname, strerror(errno));
+    } else {
+        /* Keep track of the current length. */
+        s->vnet_hdr_len = len;
+    }
+}
+
+static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
+                               int ecn, int ufo)
+{
+    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
+
+    /* Setting a virtio-net header length greater than zero automatically
+     * enables the offloadings.
+     */
+    if (!s->vnet_hdr_len) {
+        netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr));
+    }
+}
 
 /* NetClientInfo methods */
 static NetClientInfo net_netmap_info = {
@@ -403,6 +433,12 @@
     .receive_iov = netmap_receive_iov,
     .poll = netmap_poll,
     .cleanup = netmap_cleanup,
+    .has_ufo = netmap_has_ufo,
+    .has_vnet_hdr = netmap_has_vnet_hdr,
+    .has_vnet_hdr_len = netmap_has_vnet_hdr_len,
+    .using_vnet_hdr = netmap_using_vnet_hdr,
+    .set_offload = netmap_set_offload,
+    .set_vnet_hdr_len = netmap_set_vnet_hdr_len,
 };
 
 /* The exported init function
@@ -428,6 +464,7 @@
     nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name);
     s = DO_UPCAST(NetmapState, nc, nc);
     s->me = me;
+    s->vnet_hdr_len = 0;
     netmap_read_poll(s, true); /* Initially only poll for reads. */
 
     return 0;
diff --git a/net/tap-win32.c b/net/tap-win32.c
index 91e9e84..8aee611 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -669,11 +669,60 @@
     }
 }
 
+static bool tap_has_ufo(NetClientState *nc)
+{
+    return false;
+}
+
+static bool tap_has_vnet_hdr(NetClientState *nc)
+{
+    return false;
+}
+
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+    return 0;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+}
+
+static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
+{
+}
+
+static void tap_set_offload(NetClientState *nc, int csum, int tso4,
+                     int tso6, int ecn, int ufo)
+{
+}
+
+struct vhost_net *tap_get_vhost_net(NetClientState *nc)
+{
+    return NULL;
+}
+
+static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
+{
+    return false;
+}
+
+static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
+{
+    abort();
+}
+
 static NetClientInfo net_tap_win32_info = {
     .type = NET_CLIENT_OPTIONS_KIND_TAP,
     .size = sizeof(TAPState),
     .receive = tap_receive,
     .cleanup = tap_cleanup,
+    .has_ufo = tap_has_ufo,
+    .has_vnet_hdr = tap_has_vnet_hdr,
+    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
+    .using_vnet_hdr = tap_using_vnet_hdr,
+    .set_offload = tap_set_offload,
+    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
 };
 
 static int tap_win32_init(NetClientState *peer, const char *model,
@@ -722,49 +771,6 @@
     return 0;
 }
 
-bool tap_has_ufo(NetClientState *nc)
-{
-    return false;
-}
-
-int tap_has_vnet_hdr(NetClientState *nc)
-{
-    return 0;
-}
-
-int tap_probe_vnet_hdr_len(int fd, int len)
-{
-    return 0;
-}
-
-void tap_fd_set_vnet_hdr_len(int fd, int len)
-{
-}
-
-void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
-{
-}
-
-void tap_set_offload(NetClientState *nc, int csum, int tso4,
-                     int tso6, int ecn, int ufo)
-{
-}
-
-struct vhost_net *tap_get_vhost_net(NetClientState *nc)
-{
-    return NULL;
-}
-
-int tap_has_vnet_hdr_len(NetClientState *nc, int len)
-{
-    return 0;
-}
-
-void tap_set_vnet_hdr_len(NetClientState *nc, int len)
-{
-    abort();
-}
-
 int tap_enable(NetClientState *nc)
 {
     abort();
diff --git a/net/tap.c b/net/tap.c
index 39c1cda..2d5099b 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -210,7 +210,7 @@
     } while (size > 0 && qemu_can_send_packet(&s->nc));
 }
 
-bool tap_has_ufo(NetClientState *nc)
+static bool tap_has_ufo(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
@@ -219,7 +219,7 @@
     return s->has_ufo;
 }
 
-int tap_has_vnet_hdr(NetClientState *nc)
+static bool tap_has_vnet_hdr(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
@@ -228,16 +228,16 @@
     return !!s->host_vnet_hdr_len;
 }
 
-int tap_has_vnet_hdr_len(NetClientState *nc, int len)
+static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
     assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
 
-    return tap_probe_vnet_hdr_len(s->fd, len);
+    return !!tap_probe_vnet_hdr_len(s->fd, len);
 }
 
-void tap_set_vnet_hdr_len(NetClientState *nc, int len)
+static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
@@ -249,7 +249,7 @@
     s->host_vnet_hdr_len = len;
 }
 
-void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
+static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
@@ -259,7 +259,7 @@
     s->using_vnet_hdr = using_vnet_hdr;
 }
 
-void tap_set_offload(NetClientState *nc, int csum, int tso4,
+static void tap_set_offload(NetClientState *nc, int csum, int tso4,
                      int tso6, int ecn, int ufo)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
@@ -314,6 +314,12 @@
     .receive_iov = tap_receive_iov,
     .poll = tap_poll,
     .cleanup = tap_cleanup,
+    .has_ufo = tap_has_ufo,
+    .has_vnet_hdr = tap_has_vnet_hdr,
+    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
+    .using_vnet_hdr = tap_using_vnet_hdr,
+    .set_offload = tap_set_offload,
+    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
 };
 
 static TAPState *net_tap_fd_init(NetClientState *peer,
diff --git a/page_cache.c b/page_cache.c
index 3ef6ee7..b033681 100644
--- a/page_cache.c
+++ b/page_cache.c
@@ -150,7 +150,7 @@
     return cache_get_by_addr(cache, addr)->it_data;
 }
 
-int cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata)
+int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata)
 {
 
     CacheItem *it = NULL;
diff --git a/pc-bios/QEMU,cgthree.bin b/pc-bios/QEMU,cgthree.bin
new file mode 100644
index 0000000..6fec946
--- /dev/null
+++ b/pc-bios/QEMU,cgthree.bin
Binary files differ
diff --git a/pc-bios/README b/pc-bios/README
index f190068..5914200 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -11,8 +11,8 @@
   firmware implementation. The goal is to implement a 100% IEEE
   1275-1994 (referred to as Open Firmware) compliant firmware.
   The included images for PowerPC (for 32 and 64 bit PPC CPUs),
-  Sparc32 (including QEMU,tcx.bin) and Sparc64 are built from OpenBIOS SVN
-  revision 1246.
+  Sparc32 (including QEMU,tcx.bin and QEMU,cgthree.bin) and Sparc64 are built
+  from OpenBIOS SVN revision 1246.
 
 - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
   implementation for certain IBM POWER hardware.  The sources are at
diff --git a/qapi-schema.json b/qapi-schema.json
index fcb22800..ac8ad24 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -751,10 +751,9 @@
 #          This feature allows us to minimize migration traffic for certain work
 #          loads, by sending compressed difference of the pages
 #
-# @x-rdma-pin-all: Controls whether or not the entire VM memory footprint is
+# @rdma-pin-all: Controls whether or not the entire VM memory footprint is
 #          mlock()'d on demand or all at once. Refer to docs/rdma.txt for usage.
-#          Disabled by default. Experimental: may (or may not) be renamed after
-#          further testing is complete. (since 1.6)
+#          Disabled by default. (since 2.0)
 #
 # @zero-blocks: During storage migration encode blocks of zeroes efficiently. This
 #          essentially saves 1MB of zeroes per block on the wire. Enabling requires
@@ -768,7 +767,7 @@
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
-  'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge', 'zero-blocks'] }
+  'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks'] }
 
 ##
 # @MigrationCapabilityStatus
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 3a7dc0d..6673e3c 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -548,16 +548,18 @@
                                   OBJECT(dev), NULL);
         g_free(name);
     }
+
+    dev->opts = opts;
     object_property_set_bool(OBJECT(dev), true, "realized", &err);
     if (err != NULL) {
         qerror_report_err(err);
         error_free(err);
+        dev->opts = NULL;
         object_unparent(OBJECT(dev));
         object_unref(OBJECT(dev));
         qerror_report(QERR_DEVICE_INIT_FAILED, driver);
         return NULL;
     }
-    dev->opts = opts;
     return dev;
 }
 
diff --git a/qemu-file.c b/qemu-file.c
index 9473b67..f074af1 100644
--- a/qemu-file.c
+++ b/qemu-file.c
@@ -100,7 +100,14 @@
                             int size)
 {
     QEMUFileStdio *s = opaque;
-    return fwrite(buf, 1, size, s->stdio_file);
+    int res;
+
+    res = fwrite(buf, 1, size, s->stdio_file);
+
+    if (res != size) {
+        return -EIO;	/* fake errno value */
+    }
+    return res;
 }
 
 static int stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index df92fe5..df3aa7a 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -27,3 +27,4 @@
 stub-obj-y += vmstate.o
 stub-obj-$(CONFIG_WIN32) += fd-register.o
 stub-obj-y += cpus.o
+stub-obj-y += kvm.o
diff --git a/stubs/kvm.c b/stubs/kvm.c
new file mode 100644
index 0000000..e7c60b6
--- /dev/null
+++ b/stubs/kvm.c
@@ -0,0 +1,7 @@
+#include "qemu-common.h"
+#include "sysemu/kvm.h"
+
+int kvm_arch_irqchip_create(KVMState *s)
+{
+    return 0;
+}
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index afbd422..00234e1 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -132,6 +132,16 @@
     uint32_t id_isar3;
     uint32_t id_isar4;
     uint32_t id_isar5;
+    uint64_t id_aa64pfr0;
+    uint64_t id_aa64pfr1;
+    uint64_t id_aa64dfr0;
+    uint64_t id_aa64dfr1;
+    uint64_t id_aa64afr0;
+    uint64_t id_aa64afr1;
+    uint64_t id_aa64isar0;
+    uint64_t id_aa64isar1;
+    uint64_t id_aa64mmfr0;
+    uint64_t id_aa64mmfr1;
     uint32_t clidr;
     /* The elements of this array are the CCSIDR values for each cache,
      * in the order L1DCache, L1ICache, L2DCache, L2ICache, etc.
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 6e7ce89..1ce8a9b 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -60,7 +60,7 @@
         return;
     }
 
-    if (ri->type & ARM_CP_64BIT) {
+    if (cpreg_field_is_64bit(ri)) {
         CPREG_FIELD64(&cpu->env, ri) = ri->resetvalue;
     } else {
         CPREG_FIELD32(&cpu->env, ri) = ri->resetvalue;
@@ -91,9 +91,10 @@
         env->aarch64 = 1;
 #if defined(CONFIG_USER_ONLY)
         env->pstate = PSTATE_MODE_EL0t;
+        /* Userspace expects access to CTL_EL0 and the cache ops */
+        env->cp15.c1_sys |= SCTLR_UCT | SCTLR_UCI;
 #else
-        env->pstate = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F
-            | PSTATE_MODE_EL1h;
+        env->pstate = PSTATE_MODE_EL1h;
 #endif
     }
 
@@ -108,13 +109,14 @@
     }
 #else
     /* SVC mode with interrupts disabled.  */
-    env->uncached_cpsr = ARM_CPU_MODE_SVC | CPSR_A | CPSR_F | CPSR_I;
+    env->uncached_cpsr = ARM_CPU_MODE_SVC;
+    env->daif = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F;
     /* On ARMv7-M the CPSR_I is the value of the PRIMASK register, and is
        clear at reset.  Initial SP and PC are loaded from ROM.  */
     if (IS_M(env)) {
         uint32_t pc;
         uint8_t *rom;
-        env->uncached_cpsr &= ~CPSR_I;
+        env->daif &= ~PSTATE_I;
         rom = rom_ptr(0);
         if (rom) {
             /* We should really use ldl_phys here, in case the guest
@@ -922,6 +924,7 @@
     set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
     set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
     set_feature(&cpu->env, ARM_FEATURE_V7MP);
+    set_feature(&cpu->env, ARM_FEATURE_CRC);
 #ifdef TARGET_AARCH64
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
 #endif
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 3c8a2db..49fef3f 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -74,8 +74,10 @@
  */
 #ifdef HOST_WORDS_BIGENDIAN
 #define offsetoflow32(S, M) (offsetof(S, M) + sizeof(uint32_t))
+#define offsetofhigh32(S, M) offsetof(S, M)
 #else
 #define offsetoflow32(S, M) offsetof(S, M)
+#define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t))
 #endif
 
 /* Meanings of the ARMCPU object's two inbound GPIO lines */
@@ -102,7 +104,7 @@
 /* CPU state for each instance of a generic timer (in cp15 c14) */
 typedef struct ARMGenericTimer {
     uint64_t cval; /* Timer CompareValue register */
-    uint32_t ctl; /* Timer Control register */
+    uint64_t ctl; /* Timer Control register */
 } ARMGenericTimer;
 
 #define GTIMER_PHYS 0
@@ -133,6 +135,7 @@
      *  NZCV are kept in the split out env->CF/VF/NF/ZF, (which have the same
      *    semantics as for AArch32, as described in the comments on each field)
      *  nRW (also known as M[4]) is kept, inverted, in env->aarch64
+     *  DAIF (exception masks) are kept in env->daif
      *  all other bits are stored in their correct places in env->pstate
      */
     uint32_t pstate;
@@ -162,20 +165,19 @@
     uint32_t GE; /* cpsr[19:16] */
     uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */
     uint32_t condexec_bits; /* IT bits.  cpsr[15:10,26:25].  */
+    uint32_t daif; /* exception masks, in the bits they are in in PSTATE */
 
     /* System control coprocessor (cp15) */
     struct {
         uint32_t c0_cpuid;
-        uint32_t c0_cssel; /* Cache size selection.  */
-        uint32_t c1_sys; /* System control register.  */
-        uint32_t c1_coproc; /* Coprocessor access register.  */
+        uint64_t c0_cssel; /* Cache size selection.  */
+        uint64_t c1_sys; /* System control register.  */
+        uint64_t c1_coproc; /* Coprocessor access register.  */
         uint32_t c1_xscaleauxcr; /* XScale auxiliary control register.  */
         uint32_t c1_scr; /* secure config register.  */
-        uint32_t c2_base0; /* MMU translation table base 0.  */
-        uint32_t c2_base0_hi; /* MMU translation table base 0, high 32 bits */
-        uint32_t c2_base1; /* MMU translation table base 0.  */
-        uint32_t c2_base1_hi; /* MMU translation table base 1, high 32 bits */
-        uint32_t c2_control; /* MMU translation table base control.  */
+        uint64_t ttbr0_el1; /* MMU translation table base 0. */
+        uint64_t ttbr1_el1; /* MMU translation table base 1. */
+        uint64_t c2_control; /* MMU translation table base control.  */
         uint32_t c2_mask; /* MMU translation table base selection mask.  */
         uint32_t c2_base_mask; /* MMU translation table base 0 mask. */
         uint32_t c2_data; /* MPU data cachable bits.  */
@@ -197,14 +199,15 @@
         uint32_t c9_pmxevtyper; /* perf monitor event type */
         uint32_t c9_pmuserenr; /* perf monitor user enable */
         uint32_t c9_pminten; /* perf monitor interrupt enables */
-        uint32_t c12_vbar; /* vector base address register */
+        uint64_t mair_el1;
+        uint64_t c12_vbar; /* vector base address register */
         uint32_t c13_fcse; /* FCSE PID.  */
         uint32_t c13_context; /* Context ID.  */
         uint64_t tpidr_el0; /* User RW Thread register.  */
         uint64_t tpidrro_el0; /* User RO Thread register.  */
         uint64_t tpidr_el1; /* Privileged Thread register.  */
-        uint32_t c14_cntfrq; /* Counter Frequency register */
-        uint32_t c14_cntkctl; /* Timer Control register */
+        uint64_t c14_cntfrq; /* Counter Frequency register */
+        uint64_t c14_cntkctl; /* Timer Control register */
         ARMGenericTimer c14_timer[NUM_GTIMERS];
         uint32_t c15_cpar; /* XScale Coprocessor Access Register */
         uint32_t c15_ticonfig; /* TI925T configuration byte.  */
@@ -215,6 +218,10 @@
         uint32_t c15_diagnostic; /* diagnostic register */
         uint32_t c15_power_diagnostic;
         uint32_t c15_power_control; /* power control */
+        uint64_t dbgbvr[16]; /* breakpoint value registers */
+        uint64_t dbgbcr[16]; /* breakpoint control registers */
+        uint64_t dbgwvr[16]; /* watchpoint value registers */
+        uint64_t dbgwcr[16]; /* watchpoint control registers */
     } cp15;
 
     struct {
@@ -401,9 +408,11 @@
 #define CPSR_Z (1U << 30)
 #define CPSR_N (1U << 31)
 #define CPSR_NZCV (CPSR_N | CPSR_Z | CPSR_C | CPSR_V)
+#define CPSR_AIF (CPSR_A | CPSR_I | CPSR_F)
 
 #define CPSR_IT (CPSR_IT_0_1 | CPSR_IT_2_7)
-#define CACHED_CPSR_BITS (CPSR_T | CPSR_GE | CPSR_IT | CPSR_Q | CPSR_NZCV)
+#define CACHED_CPSR_BITS (CPSR_T | CPSR_AIF | CPSR_GE | CPSR_IT | CPSR_Q \
+    | CPSR_NZCV)
 /* Bits writable in user mode.  */
 #define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE)
 /* Execution state bits.  MRS read as zero, MSR writes ignored.  */
@@ -426,7 +435,8 @@
 #define PSTATE_Z (1U << 30)
 #define PSTATE_N (1U << 31)
 #define PSTATE_NZCV (PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V)
-#define CACHED_PSTATE_BITS (PSTATE_NZCV)
+#define PSTATE_DAIF (PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F)
+#define CACHED_PSTATE_BITS (PSTATE_NZCV | PSTATE_DAIF)
 /* Mode values for AArch64 */
 #define PSTATE_MODE_EL3h 13
 #define PSTATE_MODE_EL3t 12
@@ -447,7 +457,7 @@
     ZF = (env->ZF == 0);
     return (env->NF & 0x80000000) | (ZF << 30)
         | (env->CF << 29) | ((env->VF & 0x80000000) >> 3)
-        | env->pstate;
+        | env->pstate | env->daif;
 }
 
 static inline void pstate_write(CPUARMState *env, uint32_t val)
@@ -456,6 +466,7 @@
     env->NF = val;
     env->CF = (val >> 29) & 1;
     env->VF = (val << 3) & 0x80000000;
+    env->daif = val & PSTATE_DAIF;
     env->pstate = val & ~CACHED_PSTATE_BITS;
 }
 
@@ -615,6 +626,7 @@
     ARM_FEATURE_AARCH64, /* supports 64 bit mode */
     ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */
     ARM_FEATURE_CBAR, /* has cp15 CBAR */
+    ARM_FEATURE_CRC, /* ARMv8 CRC instructions */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
@@ -622,6 +634,22 @@
     return (env->features & (1ULL << feature)) != 0;
 }
 
+/* Return true if the specified exception level is running in AArch64 state. */
+static inline bool arm_el_is_aa64(CPUARMState *env, int el)
+{
+    /* We don't currently support EL2 or EL3, and this isn't valid for EL0
+     * (if we're in EL0, is_a64() is what you want, and if we're not in EL0
+     * then the state of EL0 isn't well defined.)
+     */
+    assert(el == 1);
+    /* AArch64-capable CPUs always run with EL1 in AArch64 mode. This
+     * is a QEMU-imposed simplification which we may wish to change later.
+     * If we in future support EL2 and/or EL3, then the state of lower
+     * exception levels is controlled by the HCR.RW and SCR.RW bits.
+     */
+    return arm_feature(env, ARM_FEATURE_AARCH64);
+}
+
 void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 
 /* Interface between CPU and Interrupt controller.  */
@@ -731,7 +759,8 @@
 #define ARM_CP_NOP (ARM_CP_SPECIAL | (1 << 8))
 #define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8))
 #define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8))
-#define ARM_LAST_SPECIAL ARM_CP_NZCV
+#define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | (4 << 8))
+#define ARM_LAST_SPECIAL ARM_CP_CURRENTEL
 /* Used only as a terminator for ARMCPRegInfo lists */
 #define ARM_CP_SENTINEL 0xffff
 /* Mask of only the flag bits in a type field */
@@ -959,6 +988,14 @@
  */
 void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque);
 
+/* Return true if this reginfo struct's field in the cpu state struct
+ * is 64 bits wide.
+ */
+static inline bool cpreg_field_is_64bit(const ARMCPRegInfo *ri)
+{
+    return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT);
+}
+
 static inline bool cp_access_ok(int current_pl,
                                 const ARMCPRegInfo *ri, int isread)
 {
@@ -1043,7 +1080,7 @@
 #define MMU_USER_IDX 1
 static inline int cpu_mmu_index (CPUARMState *env)
 {
-    return (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR ? 1 : 0;
+    return arm_current_pl(env) ? 0 : 1;
 }
 
 #include "exec/cpu-all.h"
@@ -1070,7 +1107,9 @@
 #define ARM_TBFLAG_BSWAP_CODE_SHIFT 16
 #define ARM_TBFLAG_BSWAP_CODE_MASK  (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT)
 
-/* Bit usage when in AArch64 state: currently no bits defined */
+/* Bit usage when in AArch64 state */
+#define ARM_TBFLAG_AA64_EL_SHIFT    0
+#define ARM_TBFLAG_AA64_EL_MASK     (0x3 << ARM_TBFLAG_AA64_EL_SHIFT)
 
 /* some convenience accessor macros */
 #define ARM_TBFLAG_AARCH64_STATE(F) \
@@ -1089,13 +1128,16 @@
     (((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT)
 #define ARM_TBFLAG_BSWAP_CODE(F) \
     (((F) & ARM_TBFLAG_BSWAP_CODE_MASK) >> ARM_TBFLAG_BSWAP_CODE_SHIFT)
+#define ARM_TBFLAG_AA64_EL(F) \
+    (((F) & ARM_TBFLAG_AA64_EL_MASK) >> ARM_TBFLAG_AA64_EL_SHIFT)
 
 static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
     if (is_a64(env)) {
         *pc = env->pc;
-        *flags = ARM_TBFLAG_AARCH64_STATE_MASK;
+        *flags = ARM_TBFLAG_AARCH64_STATE_MASK
+            | (arm_current_pl(env) << ARM_TBFLAG_AA64_EL_SHIFT);
     } else {
         int privmode;
         *pc = env->regs[15];
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index a639c2e..8426bf1 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -45,6 +45,7 @@
     set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
     set_feature(&cpu->env, ARM_FEATURE_V7MP);
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+    cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
 }
 #endif
 
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 1b111b6..90f85f1 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -5,6 +5,8 @@
 #include "sysemu/arch_init.h"
 #include "sysemu/sysemu.h"
 #include "qemu/bitops.h"
+#include "qemu/crc32c.h"
+#include <zlib.h> /* For crc32 */
 
 #ifndef CONFIG_USER_ONLY
 static inline int get_phys_addr(CPUARMState *env, uint32_t address,
@@ -109,7 +111,7 @@
 
 static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    if (ri->type & ARM_CP_64BIT) {
+    if (cpreg_field_is_64bit(ri)) {
         return CPREG_FIELD64(env, ri);
     } else {
         return CPREG_FIELD32(env, ri);
@@ -119,7 +121,7 @@
 static void raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
                       uint64_t value)
 {
-    if (ri->type & ARM_CP_64BIT) {
+    if (cpreg_field_is_64bit(ri)) {
         CPREG_FIELD64(env, ri) = value;
     } else {
         CPREG_FIELD32(env, ri) = value;
@@ -458,7 +460,8 @@
      */
     { .name = "WFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, },
-    { .name = "CPACR", .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2,
+    { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3,
+      .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_coproc),
       .resetvalue = 0, .writefn = cpacr_write },
     REGINFO_SENTINEL
@@ -533,6 +536,12 @@
 static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri,
                        uint64_t value)
 {
+    /* Note that even though the AArch64 view of this register has bits
+     * [10:0] all RES0 we can only mask the bottom 5, to comply with the
+     * architectural requirements for bits which are RES0 only in some
+     * contexts. (ARMv8 would permit us to do no masking at all, but ARMv7
+     * requires the bottom five bits to be RAZ/WI because they're UNK/SBZP.)
+     */
     env->cp15.c12_vbar = value & ~0x1Ful;
 }
 
@@ -622,16 +631,19 @@
       .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
       .resetvalue = 0, .writefn = pmintenclr_write, },
-    { .name = "VBAR", .cp = 15, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0,
+    { .name = "VBAR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .writefn = vbar_write,
       .fieldoffset = offsetof(CPUARMState, cp15.c12_vbar),
       .resetvalue = 0 },
     { .name = "SCR", .cp = 15, .crn = 1, .crm = 1, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_scr),
       .resetvalue = 0, },
-    { .name = "CCSIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0,
+    { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0,
       .access = PL1_R, .readfn = ccsidr_read, .type = ARM_CP_NO_MIGRATE },
-    { .name = "CSSELR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0,
+    { .name = "CSSELR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c0_cssel),
       .writefn = csselr_write, .resetvalue = 0 },
     /* Auxiliary ID register: this actually has an IMPDEF value but for now
@@ -639,6 +651,26 @@
      */
     { .name = "AIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 7,
       .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    /* MAIR can just read-as-written because we don't implement caches
+     * and so don't need to care about memory attributes.
+     */
+    { .name = "MAIR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el1),
+      .resetvalue = 0 },
+    /* For non-long-descriptor page tables these are PRRR and NMRR;
+     * regardless they still act as reads-as-written for QEMU.
+     * The override is necessary because of the overly-broad TLB_LOCKDOWN
+     * definition.
+     */
+    { .name = "MAIR0", .state = ARM_CP_STATE_AA32, .type = ARM_CP_OVERRIDE,
+      .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0, .access = PL1_RW,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.mair_el1),
+      .resetfn = arm_cp_reset_ignore },
+    { .name = "MAIR1", .state = ARM_CP_STATE_AA32, .type = ARM_CP_OVERRIDE,
+      .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 1, .access = PL1_RW,
+      .fieldoffset = offsetofhigh32(CPUARMState, cp15.mair_el1),
+      .resetfn = arm_cp_reset_ignore },
     REGINFO_SENTINEL
 };
 
@@ -872,30 +904,55 @@
      * Our reset value matches the fixed frequency we implement the timer at.
      */
     { .name = "CNTFRQ", .cp = 15, .crn = 14, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW | PL0_R,
+      .type = ARM_CP_NO_MIGRATE,
+      .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.c14_cntfrq),
+      .resetfn = arm_cp_reset_ignore,
+    },
+    { .name = "CNTFRQ_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0,
+      .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq),
       .resetvalue = (1000 * 1000 * 1000) / GTIMER_SCALE,
-      .accessfn = gt_cntfrq_access,
     },
     /* overall control: mostly access permissions */
-    { .name = "CNTKCTL", .cp = 15, .crn = 14, .crm = 1, .opc1 = 0, .opc2 = 0,
+    { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 14, .crm = 1, .opc2 = 0,
       .access = PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_cntkctl),
       .resetvalue = 0,
     },
     /* per-timer control */
     { .name = "CNTP_CTL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 1,
+      .type = ARM_CP_IO | ARM_CP_NO_MIGRATE, .access = PL1_RW | PL0_R,
+      .accessfn = gt_ptimer_access,
+      .fieldoffset = offsetoflow32(CPUARMState,
+                                   cp15.c14_timer[GTIMER_PHYS].ctl),
+      .resetfn = arm_cp_reset_ignore,
+      .writefn = gt_ctl_write, .raw_writefn = raw_write,
+    },
+    { .name = "CNTP_CTL_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 1,
       .type = ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .accessfn = gt_ptimer_access,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl),
       .resetvalue = 0,
-      .accessfn = gt_ptimer_access,
       .writefn = gt_ctl_write, .raw_writefn = raw_write,
     },
     { .name = "CNTV_CTL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 1,
+      .type = ARM_CP_IO | ARM_CP_NO_MIGRATE, .access = PL1_RW | PL0_R,
+      .accessfn = gt_vtimer_access,
+      .fieldoffset = offsetoflow32(CPUARMState,
+                                   cp15.c14_timer[GTIMER_VIRT].ctl),
+      .resetfn = arm_cp_reset_ignore,
+      .writefn = gt_ctl_write, .raw_writefn = raw_write,
+    },
+    { .name = "CNTV_CTL_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 1,
       .type = ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .accessfn = gt_vtimer_access,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl),
       .resetvalue = 0,
-      .accessfn = gt_vtimer_access,
       .writefn = gt_ctl_write, .raw_writefn = raw_write,
     },
     /* TimerValue views: a 32 bit downcounting view of the underlying state */
@@ -904,37 +961,73 @@
       .accessfn = gt_ptimer_access,
       .readfn = gt_tval_read, .writefn = gt_tval_write,
     },
+    { .name = "CNTP_TVAL_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .readfn = gt_tval_read, .writefn = gt_tval_write,
+    },
     { .name = "CNTV_TVAL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 0,
       .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R,
       .accessfn = gt_vtimer_access,
       .readfn = gt_tval_read, .writefn = gt_tval_write,
     },
+    { .name = "CNTV_TVAL_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .readfn = gt_tval_read, .writefn = gt_tval_write,
+    },
     /* The counter itself */
     { .name = "CNTPCT", .cp = 15, .crm = 14, .opc1 = 0,
       .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE | ARM_CP_IO,
       .accessfn = gt_pct_access,
+      .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore,
+    },
+    { .name = "CNTPCT_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 1,
+      .access = PL0_R, .type = ARM_CP_NO_MIGRATE | ARM_CP_IO,
+      .accessfn = gt_pct_access,
       .readfn = gt_cnt_read, .resetfn = gt_cnt_reset,
     },
     { .name = "CNTVCT", .cp = 15, .crm = 14, .opc1 = 1,
       .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE | ARM_CP_IO,
       .accessfn = gt_vct_access,
+      .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore,
+    },
+    { .name = "CNTVCT_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 2,
+      .access = PL0_R, .type = ARM_CP_NO_MIGRATE | ARM_CP_IO,
+      .accessfn = gt_vct_access,
       .readfn = gt_cnt_read, .resetfn = gt_cnt_reset,
     },
     /* Comparison value, indicating when the timer goes off */
     { .name = "CNTP_CVAL", .cp = 15, .crm = 14, .opc1 = 2,
       .access = PL1_RW | PL0_R,
-      .type = ARM_CP_64BIT | ARM_CP_IO,
+      .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_NO_MIGRATE,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
-      .resetvalue = 0,
-      .accessfn = gt_ptimer_access,
+      .accessfn = gt_ptimer_access, .resetfn = arm_cp_reset_ignore,
+      .writefn = gt_cval_write, .raw_writefn = raw_write,
+    },
+    { .name = "CNTP_CVAL_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 2,
+      .access = PL1_RW | PL0_R,
+      .type = ARM_CP_IO,
+      .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
+      .resetvalue = 0, .accessfn = gt_vtimer_access,
       .writefn = gt_cval_write, .raw_writefn = raw_write,
     },
     { .name = "CNTV_CVAL", .cp = 15, .crm = 14, .opc1 = 3,
       .access = PL1_RW | PL0_R,
-      .type = ARM_CP_64BIT | ARM_CP_IO,
+      .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_NO_MIGRATE,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
-      .resetvalue = 0,
-      .accessfn = gt_vtimer_access,
+      .accessfn = gt_vtimer_access, .resetfn = arm_cp_reset_ignore,
+      .writefn = gt_cval_write, .raw_writefn = raw_write,
+    },
+    { .name = "CNTV_CVAL_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 2,
+      .access = PL1_RW | PL0_R,
+      .type = ARM_CP_IO,
+      .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
+      .resetvalue = 0, .accessfn = gt_vtimer_access,
       .writefn = gt_cval_write, .raw_writefn = raw_write,
     },
     REGINFO_SENTINEL
@@ -1031,8 +1124,8 @@
                 env->cp15.c7_par = phys_addr & 0xfffff000;
             }
         } else {
-            env->cp15.c7_par = ((ret & (10 << 1)) >> 5) |
-                ((ret & (12 << 1)) >> 6) |
+            env->cp15.c7_par = ((ret & (1 << 10)) >> 5) |
+                ((ret & (1 << 12)) >> 6) |
                 ((ret & 0xf) << 1) | 1;
         }
         env->cp15.c7_par_hi = 0;
@@ -1193,6 +1286,26 @@
     env->cp15.c2_mask = 0;
 }
 
+static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                               uint64_t value)
+{
+    /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */
+    tlb_flush(env, 1);
+    env->cp15.c2_control = value;
+}
+
+static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                            uint64_t value)
+{
+    /* 64 bit accesses to the TTBRs can change the ASID and so we
+     * must flush the TLB.
+     */
+    if (cpreg_field_is_64bit(ri)) {
+        tlb_flush(env, 1);
+    }
+    raw_write(env, ri, value);
+}
+
 static const ARMCPRegInfo vmsa_cp_reginfo[] = {
     { .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW,
@@ -1200,16 +1313,23 @@
     { .name = "IFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c5_insn), .resetvalue = 0, },
-    { .name = "TTBR0", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c2_base0), .resetvalue = 0, },
-    { .name = "TTBR1", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 1,
-      .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c2_base1), .resetvalue = 0, },
-    { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
-      .access = PL1_RW, .writefn = vmsa_ttbcr_write,
-      .resetfn = vmsa_ttbcr_reset, .raw_writefn = vmsa_ttbcr_raw_write,
+    { .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1),
+      .writefn = vmsa_ttbr_write, .resetvalue = 0 },
+    { .name = "TTBR1_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 1,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el1),
+      .writefn = vmsa_ttbr_write, .resetvalue = 0 },
+    { .name = "TCR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
+      .access = PL1_RW, .writefn = vmsa_tcr_el1_write,
+      .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write,
       .fieldoffset = offsetof(CPUARMState, cp15.c2_control) },
+    { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
+      .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, .writefn = vmsa_ttbcr_write,
+      .resetfn = arm_cp_reset_ignore, .raw_writefn = vmsa_ttbcr_raw_write,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.c2_control) },
     { .name = "DFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c6_data),
       .resetvalue = 0, },
@@ -1379,7 +1499,8 @@
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
     uint32_t mpidr = cs->cpu_index;
-    /* We don't support setting cluster ID ([8..11])
+    /* We don't support setting cluster ID ([8..11]) (known as Aff1
+     * in later ARM ARM versions), or any of the higher affinity level fields,
      * so these bits always RAZ.
      */
     if (arm_feature(env, ARM_FEATURE_V7MP)) {
@@ -1394,7 +1515,8 @@
 }
 
 static const ARMCPRegInfo mpidr_cp_reginfo[] = {
-    { .name = "MPIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
+    { .name = "MPIDR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
       .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_MIGRATE },
     REGINFO_SENTINEL
 };
@@ -1417,57 +1539,15 @@
     env->cp15.c7_par = 0;
 }
 
-static uint64_t ttbr064_read(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    return ((uint64_t)env->cp15.c2_base0_hi << 32) | env->cp15.c2_base0;
-}
-
-static void ttbr064_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                              uint64_t value)
-{
-    env->cp15.c2_base0_hi = value >> 32;
-    env->cp15.c2_base0 = value;
-}
-
-static void ttbr064_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                          uint64_t value)
-{
-    /* Writes to the 64 bit format TTBRs may change the ASID */
-    tlb_flush(env, 1);
-    ttbr064_raw_write(env, ri, value);
-}
-
-static void ttbr064_reset(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    env->cp15.c2_base0_hi = 0;
-    env->cp15.c2_base0 = 0;
-}
-
-static uint64_t ttbr164_read(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    return ((uint64_t)env->cp15.c2_base1_hi << 32) | env->cp15.c2_base1;
-}
-
-static void ttbr164_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                          uint64_t value)
-{
-    env->cp15.c2_base1_hi = value >> 32;
-    env->cp15.c2_base1 = value;
-}
-
-static void ttbr164_reset(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    env->cp15.c2_base1_hi = 0;
-    env->cp15.c2_base1 = 0;
-}
-
 static const ARMCPRegInfo lpae_cp_reginfo[] = {
     /* NOP AMAIR0/1: the override is because these clash with the rather
      * broadly specified TLB_LOCKDOWN entry in the generic cp_reginfo.
      */
-    { .name = "AMAIR0", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0,
+    { .name = "AMAIR0", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE,
       .resetvalue = 0 },
+    /* AMAIR1 is mapped to AMAIR_EL1[63:32] */
     { .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE,
       .resetvalue = 0 },
@@ -1480,12 +1560,13 @@
       .access = PL1_RW, .type = ARM_CP_64BIT,
       .readfn = par64_read, .writefn = par64_write, .resetfn = par64_reset },
     { .name = "TTBR0", .cp = 15, .crm = 2, .opc1 = 0,
-      .access = PL1_RW, .type = ARM_CP_64BIT, .readfn = ttbr064_read,
-      .writefn = ttbr064_write, .raw_writefn = ttbr064_raw_write,
-      .resetfn = ttbr064_reset },
+      .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1),
+      .writefn = vmsa_ttbr_write, .resetfn = arm_cp_reset_ignore },
     { .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1,
-      .access = PL1_RW, .type = ARM_CP_64BIT, .readfn = ttbr164_read,
-      .writefn = ttbr164_write, .resetfn = ttbr164_reset },
+      .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el1),
+      .writefn = vmsa_ttbr_write, .resetfn = arm_cp_reset_ignore },
     REGINFO_SENTINEL
 };
 
@@ -1511,6 +1592,42 @@
     vfp_set_fpsr(env, value);
 }
 
+static CPAccessResult aa64_cacheop_access(CPUARMState *env,
+                                          const ARMCPRegInfo *ri)
+{
+    /* Cache invalidate/clean: NOP, but EL0 must UNDEF unless
+     * SCTLR_EL1.UCI is set.
+     */
+    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCI)) {
+        return CP_ACCESS_TRAP;
+    }
+    return CP_ACCESS_OK;
+}
+
+static void tlbi_aa64_va_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                               uint64_t value)
+{
+    /* Invalidate by VA (AArch64 version) */
+    uint64_t pageaddr = value << 12;
+    tlb_flush_page(env, pageaddr);
+}
+
+static void tlbi_aa64_vaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                uint64_t value)
+{
+    /* Invalidate by VA, all ASIDs (AArch64 version) */
+    uint64_t pageaddr = value << 12;
+    tlb_flush_page(env, pageaddr);
+}
+
+static void tlbi_aa64_asid_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                 uint64_t value)
+{
+    /* Invalidate by ASID (AArch64 version) */
+    int asid = extract64(value, 48, 16);
+    tlb_flush(env, asid == 0);
+}
+
 static const ARMCPRegInfo v8_cp_reginfo[] = {
     /* Minimal set of EL0-visible registers. This will need to be expanded
      * significantly for system emulation of AArch64 CPUs.
@@ -1524,13 +1641,6 @@
     { .name = "FPSR", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
       .access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write },
-    /* This claims a 32 byte cacheline size for icache and dcache, VIPT icache.
-     * It will eventually need to have a CPU-specified reset value.
-     */
-    { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64,
-      .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0,
-      .access = PL0_R, .type = ARM_CP_CONST,
-      .resetvalue = 0x80030003 },
     /* Prohibit use of DC ZVA. OPTME: implement DC ZVA and allow its use.
      * For system mode the DZP bit here will need to be computed, not constant.
      */
@@ -1538,6 +1648,103 @@
       .opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0,
       .access = PL0_R, .type = ARM_CP_CONST,
       .resetvalue = 0x10 },
+    { .name = "CURRENTEL", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2,
+      .access = PL1_R, .type = ARM_CP_CURRENTEL },
+    /* Cache ops: all NOPs since we don't emulate caches */
+    { .name = "IC_IALLUIS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "IC_IALLU", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "IC_IVAU", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 5, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_NOP,
+      .accessfn = aa64_cacheop_access },
+    { .name = "DC_IVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "DC_ISW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "DC_CVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_NOP,
+      .accessfn = aa64_cacheop_access },
+    { .name = "DC_CSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "DC_CVAU", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 11, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_NOP,
+      .accessfn = aa64_cacheop_access },
+    { .name = "DC_CIVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_NOP,
+      .accessfn = aa64_cacheop_access },
+    { .name = "DC_CISW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    /* TLBI operations */
+    { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 0,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbiall_write },
+    { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_va_write },
+    { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_asid_write },
+    { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 3,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_vaa_write },
+    { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 5,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_va_write },
+    { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 7,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_vaa_write },
+    { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 0,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbiall_write },
+    { .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_va_write },
+    { .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_asid_write },
+    { .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 3,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_vaa_write },
+    { .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 5,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_va_write },
+    { .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 7,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_vaa_write },
+    /* Dummy implementation of monitor debug system control register:
+     * we don't support debug.
+     */
+    { .name = "MDSCR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    /* We define a dummy WI OSLAR_EL1, because Linux writes to it. */
+    { .name = "OSLAR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4,
+      .access = PL1_W, .type = ARM_CP_NOP },
     REGINFO_SENTINEL
 };
 
@@ -1550,6 +1757,48 @@
     tlb_flush(env, 1);
 }
 
+static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /* Only accessible in EL0 if SCTLR.UCT is set (and only in AArch64,
+     * but the AArch32 CTR has its own reginfo struct)
+     */
+    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCT)) {
+        return CP_ACCESS_TRAP;
+    }
+    return CP_ACCESS_OK;
+}
+
+static void define_aarch64_debug_regs(ARMCPU *cpu)
+{
+    /* Define breakpoint and watchpoint registers. These do nothing
+     * but read as written, for now.
+     */
+    int i;
+
+    for (i = 0; i < 16; i++) {
+        ARMCPRegInfo dbgregs[] = {
+            { .name = "DBGBVR", .state = ARM_CP_STATE_AA64,
+              .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4,
+              .access = PL1_RW,
+              .fieldoffset = offsetof(CPUARMState, cp15.dbgbvr[i]) },
+            { .name = "DBGBCR", .state = ARM_CP_STATE_AA64,
+              .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 5,
+              .access = PL1_RW,
+              .fieldoffset = offsetof(CPUARMState, cp15.dbgbcr[i]) },
+            { .name = "DBGWVR", .state = ARM_CP_STATE_AA64,
+              .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6,
+              .access = PL1_RW,
+              .fieldoffset = offsetof(CPUARMState, cp15.dbgwvr[i]) },
+            { .name = "DBGWCR", .state = ARM_CP_STATE_AA64,
+              .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 7,
+              .access = PL1_RW,
+              .fieldoffset = offsetof(CPUARMState, cp15.dbgwcr[i]) },
+               REGINFO_SENTINEL
+        };
+        define_arm_cp_regs(cpu, dbgregs);
+    }
+}
+
 void register_cp_regs_for_features(ARMCPU *cpu)
 {
     /* Register all the coprocessor registers based on feature bits */
@@ -1634,7 +1883,8 @@
             .raw_writefn = raw_write,
         };
         ARMCPRegInfo clidr = {
-            .name = "CLIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1,
+            .name = "CLIDR", .state = ARM_CP_STATE_BOTH,
+            .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1,
             .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->clidr
         };
         define_one_arm_cp_reg(cpu, &pmcr);
@@ -1644,7 +1894,53 @@
         define_arm_cp_regs(cpu, not_v7_cp_reginfo);
     }
     if (arm_feature(env, ARM_FEATURE_V8)) {
+        /* AArch64 ID registers, which all have impdef reset values */
+        ARMCPRegInfo v8_idregs[] = {
+            { .name = "ID_AA64PFR0_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64pfr0 },
+            { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64pfr1},
+            { .name = "ID_AA64DFR0_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64dfr0 },
+            { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64dfr1 },
+            { .name = "ID_AA64AFR0_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 4,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64afr0 },
+            { .name = "ID_AA64AFR1_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 5,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64afr1 },
+            { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64isar0 },
+            { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64isar1 },
+            { .name = "ID_AA64MMFR0_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64mmfr0 },
+            { .name = "ID_AA64MMFR1_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64mmfr1 },
+            REGINFO_SENTINEL
+        };
+        define_arm_cp_regs(cpu, v8_idregs);
         define_arm_cp_regs(cpu, v8_cp_reginfo);
+        define_aarch64_debug_regs(cpu);
     }
     if (arm_feature(env, ARM_FEATURE_MPU)) {
         /* These are the MPU registers prior to PMSAv6. Any new
@@ -1710,9 +2006,16 @@
               .writefn = arm_cp_write_ignore, .raw_writefn = raw_write,
               .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
               .type = ARM_CP_OVERRIDE },
+            { .name = "MIDR_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .opc2 = 0, .crn = 0, .crm = 0,
+              .access = PL1_R, .resetvalue = cpu->midr, .type = ARM_CP_CONST },
             { .name = "CTR",
               .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 1,
               .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
+            { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0,
+              .access = PL0_R, .accessfn = ctr_el0_access,
+              .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
             { .name = "TCMTR",
               .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 2,
               .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
@@ -1783,7 +2086,8 @@
     /* Generic registers whose values depend on the implementation */
     {
         ARMCPRegInfo sctlr = {
-            .name = "SCTLR", .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
+            .name = "SCTLR", .state = ARM_CP_STATE_BOTH,
+            .opc0 = 3, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
             .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_sys),
             .writefn = sctlr_write, .resetvalue = cpu->reset_sctlr,
             .raw_writefn = raw_write,
@@ -1962,6 +2266,10 @@
     if (opaque) {
         r2->opaque = opaque;
     }
+    /* reginfo passed to helpers is correct for the actual access,
+     * and is never ARM_CP_STATE_BOTH:
+     */
+    r2->state = state;
     /* Make sure reginfo passed to helpers for wildcarded regs
      * has the correct crm/opc1/opc2 for this reg, not CP_ANY:
      */
@@ -2170,7 +2478,7 @@
         (env->CF << 29) | ((env->VF & 0x80000000) >> 3) | (env->QF << 27)
         | (env->thumb << 5) | ((env->condexec_bits & 3) << 25)
         | ((env->condexec_bits & 0xfc) << 8)
-        | (env->GE << 16);
+        | (env->GE << 16) | env->daif;
 }
 
 void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
@@ -2197,6 +2505,9 @@
         env->GE = (val >> 16) & 0xf;
     }
 
+    env->daif &= ~(CPSR_AIF & mask);
+    env->daif |= val & CPSR_AIF & mask;
+
     if ((env->uncached_cpsr ^ val) & mask & CPSR_M) {
         if (bad_mode_switch(env, val & CPSR_M)) {
             /* Attempt to switch to an invalid mode: this is UNPREDICTABLE.
@@ -2658,7 +2969,7 @@
     env->condexec_bits = 0;
     /* Switch to the new mode, and to the correct instruction set.  */
     env->uncached_cpsr = (env->uncached_cpsr & ~CPSR_M) | new_mode;
-    env->uncached_cpsr |= mask;
+    env->daif |= mask;
     /* this is a lie, as the was no c1_sys on V4T/V5, but who cares
      * and we should just guard the thumb mode on V4 */
     if (arm_feature(env, ARM_FEATURE_V4T)) {
@@ -2730,9 +3041,9 @@
     uint32_t table;
 
     if (address & env->cp15.c2_mask)
-        table = env->cp15.c2_base1 & 0xffffc000;
+        table = env->cp15.ttbr1_el1 & 0xffffc000;
     else
-        table = env->cp15.c2_base0 & env->cp15.c2_base_mask;
+        table = env->cp15.ttbr0_el1 & env->cp15.c2_base_mask;
 
     table |= (address >> 18) & 0x3ffc;
     return table;
@@ -2798,7 +3109,7 @@
             break;
         case 2: /* 4k page.  */
             phys_addr = (desc & 0xfffff000) | (address & 0xfff);
-            ap = (desc >> (4 + ((address >> 13) & 6))) & 3;
+            ap = (desc >> (4 + ((address >> 9) & 6))) & 3;
             *page_size = 0x1000;
             break;
         case 3: /* 1k page.  */
@@ -3008,11 +3319,11 @@
      * we will always flush the TLB any time the ASID is changed).
      */
     if (ttbr_select == 0) {
-        ttbr = ((uint64_t)env->cp15.c2_base0_hi << 32) | env->cp15.c2_base0;
+        ttbr = env->cp15.ttbr0_el1;
         epd = extract32(env->cp15.c2_control, 7, 1);
         tsz = t0sz;
     } else {
-        ttbr = ((uint64_t)env->cp15.c2_base1_hi << 32) | env->cp15.c2_base1;
+        ttbr = env->cp15.ttbr1_el1;
         epd = extract32(env->cp15.c2_control, 23, 1);
         tsz = t1sz;
     }
@@ -3331,12 +3642,12 @@
     case 9: /* PSP */
         return env->v7m.current_sp ? env->regs[13] : env->v7m.other_sp;
     case 16: /* PRIMASK */
-        return (env->uncached_cpsr & CPSR_I) != 0;
+        return (env->daif & PSTATE_I) != 0;
     case 17: /* BASEPRI */
     case 18: /* BASEPRI_MAX */
         return env->v7m.basepri;
     case 19: /* FAULTMASK */
-        return (env->uncached_cpsr & CPSR_F) != 0;
+        return (env->daif & PSTATE_F) != 0;
     case 20: /* CONTROL */
         return env->v7m.control;
     default:
@@ -3383,10 +3694,11 @@
             env->v7m.other_sp = val;
         break;
     case 16: /* PRIMASK */
-        if (val & 1)
-            env->uncached_cpsr |= CPSR_I;
-        else
-            env->uncached_cpsr &= ~CPSR_I;
+        if (val & 1) {
+            env->daif |= PSTATE_I;
+        } else {
+            env->daif &= ~PSTATE_I;
+        }
         break;
     case 17: /* BASEPRI */
         env->v7m.basepri = val & 0xff;
@@ -3397,10 +3709,11 @@
             env->v7m.basepri = val;
         break;
     case 19: /* FAULTMASK */
-        if (val & 1)
-            env->uncached_cpsr |= CPSR_F;
-        else
-            env->uncached_cpsr &= ~CPSR_F;
+        if (val & 1) {
+            env->daif |= PSTATE_F;
+        } else {
+            env->daif &= ~PSTATE_F;
+        }
         break;
     case 20: /* CONTROL */
         env->v7m.control = val & 3;
@@ -4392,3 +4705,40 @@
     }
     return rmode;
 }
+
+static void crc_init_buffer(uint8_t *buf, uint32_t val, uint32_t bytes)
+{
+    memset(buf, 0, 4);
+
+    if (bytes == 1) {
+        buf[0] = val & 0xff;
+    } else if (bytes == 2) {
+        buf[0] = val & 0xff;
+        buf[1] = (val >> 8) & 0xff;
+    } else {
+        buf[0] = val & 0xff;
+        buf[1] = (val >> 8) & 0xff;
+        buf[2] = (val >> 16) & 0xff;
+        buf[3] = (val >> 24) & 0xff;
+    }
+}
+
+uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
+{
+    uint8_t buf[4];
+
+    crc_init_buffer(buf, val, bytes);
+
+    /* zlib crc32 converts the accumulator and output to one's complement.  */
+    return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
+}
+
+uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
+{
+    uint8_t buf[4];
+
+    crc_init_buffer(buf, val, bytes);
+
+    /* Linux crc32c converts the output to one's complement.  */
+    return crc32c(acc, buf, bytes) ^ 0xffffffff;
+}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 19bd620..276f3a9 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -63,6 +63,8 @@
 DEF_HELPER_3(set_cp_reg64, void, env, ptr, i64)
 DEF_HELPER_2(get_cp_reg64, i64, env, ptr)
 
+DEF_HELPER_3(msr_i_pstate, void, env, i32, i32)
+
 DEF_HELPER_2(get_r13_banked, i32, env, i32)
 DEF_HELPER_3(set_r13_banked, void, env, i32, i32)
 
@@ -497,6 +499,9 @@
 DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32)
 DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
 
+DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #endif
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index 1d2688d..39202d7 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -165,8 +165,10 @@
  */
 typedef struct KVMDevice {
     struct kvm_arm_device_addr kda;
+    struct kvm_device_attr kdattr;
     MemoryRegion *mr;
     QSLIST_ENTRY(KVMDevice) entries;
+    int dev_fd;
 } KVMDevice;
 
 static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head;
@@ -200,6 +202,29 @@
     .region_del = kvm_arm_devlistener_del,
 };
 
+static void kvm_arm_set_device_addr(KVMDevice *kd)
+{
+    struct kvm_device_attr *attr = &kd->kdattr;
+    int ret;
+
+    /* If the device control API is available and we have a device fd on the
+     * KVMDevice struct, let's use the newer API
+     */
+    if (kd->dev_fd >= 0) {
+        uint64_t addr = kd->kda.addr;
+        attr->addr = (uintptr_t)&addr;
+        ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
+    } else {
+        ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda);
+    }
+
+    if (ret < 0) {
+        fprintf(stderr, "Failed to set device address: %s\n",
+                strerror(-ret));
+        abort();
+    }
+}
+
 static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
 {
     KVMDevice *kd, *tkd;
@@ -207,12 +232,7 @@
     memory_listener_unregister(&devlistener);
     QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) {
         if (kd->kda.addr != -1) {
-            if (kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR,
-                             &kd->kda) < 0) {
-                fprintf(stderr, "KVM_ARM_SET_DEVICE_ADDRESS failed: %s\n",
-                        strerror(errno));
-                abort();
-            }
+            kvm_arm_set_device_addr(kd);
         }
         memory_region_unref(kd->mr);
         g_free(kd);
@@ -223,7 +243,8 @@
     .notify = kvm_arm_machine_init_done,
 };
 
-void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid)
+void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
+                             uint64_t attr, int dev_fd)
 {
     KVMDevice *kd;
 
@@ -239,6 +260,10 @@
     kd->mr = mr;
     kd->kda.id = devid;
     kd->kda.addr = -1;
+    kd->kdattr.flags = 0;
+    kd->kdattr.group = group;
+    kd->kdattr.attr = attr;
+    kd->dev_fd = dev_fd;
     QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
     memory_region_ref(kd->mr);
 }
@@ -389,3 +414,19 @@
 void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
+
+int kvm_arch_irqchip_create(KVMState *s)
+{
+    int ret;
+
+    /* If we can create the VGIC using the newer device control API, we
+     * let the device do this when it initializes itself, otherwise we
+     * fall back to the old API */
+
+    ret = kvm_create_device(s, KVM_DEV_TYPE_ARM_VGIC_V2, true);
+    if (ret == 0) {
+        return 1;
+    }
+
+    return 0;
+}
diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h
index cd3d13c..137c567 100644
--- a/target-arm/kvm_arm.h
+++ b/target-arm/kvm_arm.h
@@ -18,16 +18,21 @@
  * kvm_arm_register_device:
  * @mr: memory region for this device
  * @devid: the KVM device ID
+ * @group: device control API group for setting addresses
+ * @attr: device control API address type
+ * @dev_fd: device control device file descriptor (or -1 if not supported)
  *
  * Remember the memory region @mr, and when it is mapped by the
  * machine model, tell the kernel that base address using the
- * KVM_SET_DEVICE_ADDRESS ioctl. @devid should be the ID of
- * the device as defined by KVM_SET_DEVICE_ADDRESS.
- * The machine model may map and unmap the device multiple times;
- * the kernel will only be told the final address at the point
- * where machine init is complete.
+ * KVM_ARM_SET_DEVICE_ADDRESS ioctl or the newer device control API.  @devid
+ * should be the ID of the device as defined by KVM_ARM_SET_DEVICE_ADDRESS or
+ * the arm-vgic device in the device control API.
+ * The machine model may map
+ * and unmap the device multiple times; the kernel will only be told the final
+ * address at the point where machine init is complete.
  */
-void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid);
+void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
+                             uint64_t attr, int dev_fd);
 
 /**
  * write_list_to_kvmstate:
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index eb0fccd..7d06d2f 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -319,6 +319,31 @@
     return ri->readfn(env, ri);
 }
 
+void HELPER(msr_i_pstate)(CPUARMState *env, uint32_t op, uint32_t imm)
+{
+    /* MSR_i to update PSTATE. This is OK from EL0 only if UMA is set.
+     * Note that SPSel is never OK from EL0; we rely on handle_msr_i()
+     * to catch that case at translate time.
+     */
+    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) {
+        raise_exception(env, EXCP_UDEF);
+    }
+
+    switch (op) {
+    case 0x05: /* SPSel */
+        env->pstate = deposit32(env->pstate, 0, 1, imm);
+        break;
+    case 0x1e: /* DAIFSet */
+        env->daif |= (imm << 6) & PSTATE_DAIF;
+        break;
+    case 0x1f: /* DAIFClear */
+        env->daif &= ~((imm << 6) & PSTATE_DAIF);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 /* ??? Flag setting arithmetic is awkward because we need to do comparisons.
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 8752e7e..08ac659 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -1080,9 +1080,11 @@
     switch (selector) {
     case 0: /* NOP */
         return;
+    case 3: /* WFI */
+        s->is_jmp = DISAS_WFI;
+        return;
     case 1: /* YIELD */
     case 2: /* WFE */
-    case 3: /* WFI */
     case 4: /* SEV */
     case 5: /* SEVL */
         /* we treat all as NOP at least for now */
@@ -1126,7 +1128,30 @@
 static void handle_msr_i(DisasContext *s, uint32_t insn,
                          unsigned int op1, unsigned int op2, unsigned int crm)
 {
-    unsupported_encoding(s, insn);
+    int op = op1 << 3 | op2;
+    switch (op) {
+    case 0x05: /* SPSel */
+        if (s->current_pl == 0) {
+            unallocated_encoding(s);
+            return;
+        }
+        /* fall through */
+    case 0x1e: /* DAIFSet */
+    case 0x1f: /* DAIFClear */
+    {
+        TCGv_i32 tcg_imm = tcg_const_i32(crm);
+        TCGv_i32 tcg_op = tcg_const_i32(op);
+        gen_a64_set_pc_im(s->pc - 4);
+        gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
+        tcg_temp_free_i32(tcg_imm);
+        tcg_temp_free_i32(tcg_op);
+        s->is_jmp = DISAS_UPDATE;
+        break;
+    }
+    default:
+        unallocated_encoding(s);
+        return;
+    }
 }
 
 static void gen_get_nzcv(TCGv_i64 tcg_rt)
@@ -1231,6 +1256,13 @@
             gen_set_nzcv(tcg_rt);
         }
         return;
+    case ARM_CP_CURRENTEL:
+        /* Reads as current EL value from pstate, which is
+         * guaranteed to be constant by the tb flags.
+         */
+        tcg_rt = cpu_reg(s, rt);
+        tcg_gen_movi_i64(tcg_rt, s->current_pl << 2);
+        return;
     default:
         break;
     }
@@ -9006,7 +9038,7 @@
     dc->condexec_mask = 0;
     dc->condexec_cond = 0;
 #if !defined(CONFIG_USER_ONLY)
-    dc->user = 0;
+    dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0);
 #endif
     dc->vfp_enabled = 0;
     dc->vec_len = 0;
@@ -9117,6 +9149,7 @@
             /* This is a special case because we don't want to just halt the CPU
              * if trying to debug across a WFI.
              */
+            gen_a64_set_pc_im(dc->pc);
             gen_helper_wfi(cpu_env);
             break;
         }
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 6ccf0ba..253d2a1 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -7561,6 +7561,36 @@
             store_reg(s, 14, tmp2);
             gen_bx(s, tmp);
             break;
+        case 0x4:
+        {
+            /* crc32/crc32c */
+            uint32_t c = extract32(insn, 8, 4);
+
+            /* Check this CPU supports ARMv8 CRC instructions.
+             * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
+             * Bits 8, 10 and 11 should be zero.
+             */
+            if (!arm_feature(env, ARM_FEATURE_CRC) || op1 == 0x3 ||
+                (c & 0xd) != 0) {
+                goto illegal_op;
+            }
+
+            rn = extract32(insn, 16, 4);
+            rd = extract32(insn, 12, 4);
+
+            tmp = load_reg(s, rn);
+            tmp2 = load_reg(s, rm);
+            tmp3 = tcg_const_i32(1 << op1);
+            if (c & 0x2) {
+                gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
+            } else {
+                gen_helper_crc32(tmp, tmp, tmp2, tmp3);
+            }
+            tcg_temp_free_i32(tmp2);
+            tcg_temp_free_i32(tmp3);
+            store_reg(s, rd, tmp);
+            break;
+        }
         case 0x5: /* saturating add/subtract */
             ARCH(5TE);
             rd = (insn >> 12) & 0xf;
@@ -9145,6 +9175,32 @@
                 case 0x18: /* clz */
                     gen_helper_clz(tmp, tmp);
                     break;
+                case 0x20:
+                case 0x21:
+                case 0x22:
+                case 0x28:
+                case 0x29:
+                case 0x2a:
+                {
+                    /* crc32/crc32c */
+                    uint32_t sz = op & 0x3;
+                    uint32_t c = op & 0x8;
+
+                    if (!arm_feature(env, ARM_FEATURE_CRC)) {
+                        goto illegal_op;
+                    }
+
+                    tmp2 = load_reg(s, rm);
+                    tmp3 = tcg_const_i32(1 << sz);
+                    if (c) {
+                        gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
+                    } else {
+                        gen_helper_crc32(tmp, tmp, tmp2, tmp3);
+                    }
+                    tcg_temp_free_i32(tmp2);
+                    tcg_temp_free_i32(tmp3);
+                    break;
+                }
                 default:
                     goto illegal_op;
                 }
diff --git a/trace-events b/trace-events
index 8bc7cc4..d86f98c 100644
--- a/trace-events
+++ b/trace-events
@@ -1175,6 +1175,7 @@
 kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
 kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p"
 kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
+kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
 
 # memory.c
 memory_region_ops_read(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
diff --git a/util/module.c b/util/module.c
index dc08c16..863a8a3 100644
--- a/util/module.c
+++ b/util/module.c
@@ -14,10 +14,10 @@
  */
 
 #include <stdlib.h>
+#include "qemu-common.h"
 #ifdef CONFIG_MODULES
 #include <gmodule.h>
 #endif
-#include "qemu-common.h"
 #include "qemu/queue.h"
 #include "qemu/module.h"
 
diff --git a/vl.c b/vl.c
index 1d27b34..52ee67e 100644
--- a/vl.c
+++ b/vl.c
@@ -2031,6 +2031,16 @@
     return object_class_by_name("qxl-vga");
 }
 
+static bool tcx_vga_available(void)
+{
+    return object_class_by_name("SUNW,tcx");
+}
+
+static bool cg3_vga_available(void)
+{
+    return object_class_by_name("cgthree");
+}
+
 static void select_vgahw (const char *p)
 {
     const char *opts;
@@ -2066,6 +2076,20 @@
             fprintf(stderr, "Error: QXL VGA not available\n");
             exit(0);
         }
+    } else if (strstart(p, "tcx", &opts)) {
+        if (tcx_vga_available()) {
+            vga_interface_type = VGA_TCX;
+        } else {
+            fprintf(stderr, "Error: TCX framebuffer not available\n");
+            exit(0);
+        }
+    } else if (strstart(p, "cg3", &opts)) {
+        if (cg3_vga_available()) {
+            vga_interface_type = VGA_CG3;
+        } else {
+            fprintf(stderr, "Error: CG3 framebuffer not available\n");
+            exit(0);
+        }
     } else if (!strstart(p, "none", &opts)) {
     invalid_vga:
         fprintf(stderr, "Unknown vga type: %s\n", p);
diff --git a/vmstate.c b/vmstate.c
index 284b080..d1f5eb0 100644
--- a/vmstate.c
+++ b/vmstate.c
@@ -321,23 +321,24 @@
     .put  = put_int32,
 };
 
-/* 32 bit int. See that the received value is the less or the same
-   than the one in the field */
+/* 32 bit int. Check that the received value is less than or equal to
+   the one in the field */
 
 static int get_int32_le(QEMUFile *f, void *pv, size_t size)
 {
-    int32_t *old = pv;
-    int32_t new;
-    qemu_get_sbe32s(f, &new);
+    int32_t *cur = pv;
+    int32_t loaded;
+    qemu_get_sbe32s(f, &loaded);
 
-    if (*old <= new) {
+    if (loaded <= *cur) {
+        *cur = loaded;
         return 0;
     }
     return -EINVAL;
 }
 
 const VMStateInfo vmstate_info_int32_le = {
-    .name = "int32 equal",
+    .name = "int32 le",
     .get  = get_int32_le,
     .put  = put_int32,
 };