Merge remote-tracking branch 'stefanha/net' into staging

# By Jason Wang (2) and others
# Via Stefan Hajnoczi
* stefanha/net:
  qmp: netdev_add is like -netdev, not -net, fix documentation
  doc: document -netdev hubport
  net: reduce the unnecessary memory allocation of multiqueue
  tap: set IFF_ONE_QUEUE per default
  tap: forbid creating multiqueue tap when hub is used
  net: fix unbounded NetQueue
  net: fix qemu_flush_queued_packets() in presence of a hub
diff --git a/cpu-exec.c b/cpu-exec.c
index afbe497..9092145 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -51,13 +51,34 @@
 }
 #endif
 
+/* Execute a TB, and fix up the CPU state afterwards if necessary */
+static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
+{
+    CPUArchState *env = cpu->env_ptr;
+    tcg_target_ulong next_tb = tcg_qemu_tb_exec(env, tb_ptr);
+    if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) {
+        /* We didn't start executing this TB (eg because the instruction
+         * counter hit zero); we must restore the guest PC to the address
+         * of the start of the TB.
+         */
+        TranslationBlock *tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
+        cpu_pc_from_tb(env, tb);
+    }
+    if ((next_tb & TB_EXIT_MASK) == TB_EXIT_REQUESTED) {
+        /* We were asked to stop executing TBs (probably a pending
+         * interrupt. We've now stopped, so clear the flag.
+         */
+        cpu->tcg_exit_req = 0;
+    }
+    return next_tb;
+}
+
 /* Execute the code without caching the generated code. An interpreter
    could be used if available. */
 static void cpu_exec_nocache(CPUArchState *env, int max_cycles,
                              TranslationBlock *orig_tb)
 {
     CPUState *cpu = ENV_GET_CPU(env);
-    tcg_target_ulong next_tb;
     TranslationBlock *tb;
 
     /* Should never happen.
@@ -69,14 +90,8 @@
                      max_cycles);
     cpu->current_tb = tb;
     /* execute the generated code */
-    next_tb = tcg_qemu_tb_exec(env, tb->tc_ptr);
+    cpu_tb_exec(cpu, tb->tc_ptr);
     cpu->current_tb = NULL;
-
-    if ((next_tb & 3) == 2) {
-        /* Restore PC.  This may happen if async event occurs before
-           the TB starts executing.  */
-        cpu_pc_from_tb(env, tb);
-    }
     tb_phys_invalidate(tb, -1);
     tb_free(tb);
 }
@@ -584,7 +599,8 @@
                    spans two pages, we cannot safely do a direct
                    jump. */
                 if (next_tb != 0 && tb->page_addr[1] == -1) {
-                    tb_add_jump((TranslationBlock *)(next_tb & ~3), next_tb & 3, tb);
+                    tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
+                                next_tb & TB_EXIT_MASK, tb);
                 }
                 spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
 
@@ -597,13 +613,24 @@
                 if (likely(!cpu->exit_request)) {
                     tc_ptr = tb->tc_ptr;
                     /* execute the generated code */
-                    next_tb = tcg_qemu_tb_exec(env, tc_ptr);
-                    if ((next_tb & 3) == 2) {
+                    next_tb = cpu_tb_exec(cpu, tc_ptr);
+                    switch (next_tb & TB_EXIT_MASK) {
+                    case TB_EXIT_REQUESTED:
+                        /* Something asked us to stop executing
+                         * chained TBs; just continue round the main
+                         * loop. Whatever requested the exit will also
+                         * have set something else (eg exit_request or
+                         * interrupt_request) which we will handle
+                         * next time around the loop.
+                         */
+                        tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
+                        next_tb = 0;
+                        break;
+                    case TB_EXIT_ICOUNT_EXPIRED:
+                    {
                         /* Instruction counter expired.  */
                         int insns_left;
-                        tb = (TranslationBlock *)(next_tb & ~3);
-                        /* Restore PC.  */
-                        cpu_pc_from_tb(env, tb);
+                        tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
                         insns_left = env->icount_decr.u32;
                         if (env->icount_extra && insns_left >= 0) {
                             /* Refill decrementer and continue execution.  */
@@ -624,6 +651,10 @@
                             next_tb = 0;
                             cpu_loop_exit(env);
                         }
+                        break;
+                    }
+                    default:
+                        break;
                     }
                 }
                 cpu->current_tb = NULL;
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index b40f7b0..68b2045 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -47,3 +47,5 @@
 
 CONFIG_VERSATILE_PCI=y
 CONFIG_VERSATILE_I2C=y
+
+CONFIG_SDHCI=y
diff --git a/exec.c b/exec.c
index a41bcb8..46a2830 100644
--- a/exec.c
+++ b/exec.c
@@ -495,7 +495,7 @@
     CPUState *cpu = ENV_GET_CPU(env);
 
     cpu->exit_request = 1;
-    cpu_unlink_tb(cpu);
+    cpu->tcg_exit_req = 1;
 }
 
 void cpu_abort(CPUArchState *env, const char *fmt, ...)
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index a1f3a80..40ebe46 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -43,6 +43,7 @@
 common-obj-$(CONFIG_SMARTCARD_NSS) += ccid-card-emulated.o
 endif
 common-obj-$(CONFIG_I8259) += i8259_common.o i8259.o
+common-obj-$(CONFIG_SDHCI) += sdhci.o
 common-obj-y += fifo.o
 common-obj-y += pam.o
 
diff --git a/hw/a9mpcore.c b/hw/a9mpcore.c
index 673bbd8..01aee02 100644
--- a/hw/a9mpcore.c
+++ b/hw/a9mpcore.c
@@ -10,126 +10,27 @@
 
 #include "sysbus.h"
 
-/* A9MP private memory region.  */
-
-typedef struct a9mp_priv_state {
+typedef struct A9MPPrivState {
     SysBusDevice busdev;
-    uint32_t scu_control;
-    uint32_t scu_status;
-    uint32_t old_timer_status[8];
     uint32_t num_cpu;
-    MemoryRegion scu_iomem;
     MemoryRegion container;
     DeviceState *mptimer;
+    DeviceState *wdt;
     DeviceState *gic;
+    DeviceState *scu;
     uint32_t num_irq;
-} a9mp_priv_state;
-
-static uint64_t a9_scu_read(void *opaque, hwaddr offset,
-                            unsigned size)
-{
-    a9mp_priv_state *s = (a9mp_priv_state *)opaque;
-    switch (offset) {
-    case 0x00: /* Control */
-        return s->scu_control;
-    case 0x04: /* Configuration */
-        return (((1 << s->num_cpu) - 1) << 4) | (s->num_cpu - 1);
-    case 0x08: /* CPU Power Status */
-        return s->scu_status;
-    case 0x09: /* CPU status.  */
-        return s->scu_status >> 8;
-    case 0x0a: /* CPU status.  */
-        return s->scu_status >> 16;
-    case 0x0b: /* CPU status.  */
-        return s->scu_status >> 24;
-    case 0x0c: /* Invalidate All Registers In Secure State */
-        return 0;
-    case 0x40: /* Filtering Start Address Register */
-    case 0x44: /* Filtering End Address Register */
-        /* RAZ/WI, like an implementation with only one AXI master */
-        return 0;
-    case 0x50: /* SCU Access Control Register */
-    case 0x54: /* SCU Non-secure Access Control Register */
-        /* unimplemented, fall through */
-    default:
-        return 0;
-    }
-}
-
-static void a9_scu_write(void *opaque, hwaddr offset,
-                         uint64_t value, unsigned size)
-{
-    a9mp_priv_state *s = (a9mp_priv_state *)opaque;
-    uint32_t mask;
-    uint32_t shift;
-    switch (size) {
-    case 1:
-        mask = 0xff;
-        break;
-    case 2:
-        mask = 0xffff;
-        break;
-    case 4:
-        mask = 0xffffffff;
-        break;
-    default:
-        fprintf(stderr, "Invalid size %u in write to a9 scu register %x\n",
-                size, (unsigned)offset);
-        return;
-    }
-
-    switch (offset) {
-    case 0x00: /* Control */
-        s->scu_control = value & 1;
-        break;
-    case 0x4: /* Configuration: RO */
-        break;
-    case 0x08: case 0x09: case 0x0A: case 0x0B: /* Power Control */
-        shift = (offset - 0x8) * 8;
-        s->scu_status &= ~(mask << shift);
-        s->scu_status |= ((value & mask) << shift);
-        break;
-    case 0x0c: /* Invalidate All Registers In Secure State */
-        /* no-op as we do not implement caches */
-        break;
-    case 0x40: /* Filtering Start Address Register */
-    case 0x44: /* Filtering End Address Register */
-        /* RAZ/WI, like an implementation with only one AXI master */
-        break;
-    case 0x50: /* SCU Access Control Register */
-    case 0x54: /* SCU Non-secure Access Control Register */
-        /* unimplemented, fall through */
-    default:
-        break;
-    }
-}
-
-static const MemoryRegionOps a9_scu_ops = {
-    .read = a9_scu_read,
-    .write = a9_scu_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void a9mp_priv_reset(DeviceState *dev)
-{
-    a9mp_priv_state *s = FROM_SYSBUS(a9mp_priv_state, SYS_BUS_DEVICE(dev));
-    int i;
-    s->scu_control = 0;
-    for (i = 0; i < ARRAY_SIZE(s->old_timer_status); i++) {
-        s->old_timer_status[i] = 0;
-    }
-}
+} A9MPPrivState;
 
 static void a9mp_priv_set_irq(void *opaque, int irq, int level)
 {
-    a9mp_priv_state *s = (a9mp_priv_state *)opaque;
+    A9MPPrivState *s = (A9MPPrivState *)opaque;
     qemu_set_irq(qdev_get_gpio_in(s->gic, irq), level);
 }
 
 static int a9mp_priv_init(SysBusDevice *dev)
 {
-    a9mp_priv_state *s = FROM_SYSBUS(a9mp_priv_state, dev);
-    SysBusDevice *busdev, *gicbusdev;
+    A9MPPrivState *s = FROM_SYSBUS(A9MPPrivState, dev);
+    SysBusDevice *timerbusdev, *wdtbusdev, *gicbusdev, *scubusdev;
     int i;
 
     s->gic = qdev_create(NULL, "arm_gic");
@@ -144,10 +45,20 @@
     /* Pass through inbound GPIO lines to the GIC */
     qdev_init_gpio_in(&s->busdev.qdev, a9mp_priv_set_irq, s->num_irq - 32);
 
+    s->scu = qdev_create(NULL, "a9-scu");
+    qdev_prop_set_uint32(s->scu, "num-cpu", s->num_cpu);
+    qdev_init_nofail(s->scu);
+    scubusdev = SYS_BUS_DEVICE(s->scu);
+
     s->mptimer = qdev_create(NULL, "arm_mptimer");
     qdev_prop_set_uint32(s->mptimer, "num-cpu", s->num_cpu);
     qdev_init_nofail(s->mptimer);
-    busdev = SYS_BUS_DEVICE(s->mptimer);
+    timerbusdev = SYS_BUS_DEVICE(s->mptimer);
+
+    s->wdt = qdev_create(NULL, "arm_mptimer");
+    qdev_prop_set_uint32(s->wdt, "num-cpu", s->num_cpu);
+    qdev_init_nofail(s->wdt);
+    wdtbusdev = SYS_BUS_DEVICE(s->wdt);
 
     /* Memory map (addresses are offsets from PERIPHBASE):
      *  0x0000-0x00ff -- Snoop Control Unit
@@ -161,8 +72,8 @@
      * We should implement the global timer but don't currently do so.
      */
     memory_region_init(&s->container, "a9mp-priv-container", 0x2000);
-    memory_region_init_io(&s->scu_iomem, &a9_scu_ops, s, "a9mp-scu", 0x100);
-    memory_region_add_subregion(&s->container, 0, &s->scu_iomem);
+    memory_region_add_subregion(&s->container, 0,
+                                sysbus_mmio_get_region(scubusdev, 0));
     /* GIC CPU interface */
     memory_region_add_subregion(&s->container, 0x100,
                                 sysbus_mmio_get_region(gicbusdev, 1));
@@ -170,9 +81,9 @@
      * memory region, not the "timer/watchdog for core X" ones 11MPcore has.
      */
     memory_region_add_subregion(&s->container, 0x600,
-                                sysbus_mmio_get_region(busdev, 0));
+                                sysbus_mmio_get_region(timerbusdev, 0));
     memory_region_add_subregion(&s->container, 0x620,
-                                sysbus_mmio_get_region(busdev, 1));
+                                sysbus_mmio_get_region(wdtbusdev, 0));
     memory_region_add_subregion(&s->container, 0x1000,
                                 sysbus_mmio_get_region(gicbusdev, 0));
 
@@ -183,35 +94,23 @@
      */
     for (i = 0; i < s->num_cpu; i++) {
         int ppibase = (s->num_irq - 32) + i * 32;
-        sysbus_connect_irq(busdev, i * 2,
+        sysbus_connect_irq(timerbusdev, i,
                            qdev_get_gpio_in(s->gic, ppibase + 29));
-        sysbus_connect_irq(busdev, i * 2 + 1,
+        sysbus_connect_irq(wdtbusdev, i,
                            qdev_get_gpio_in(s->gic, ppibase + 30));
     }
     return 0;
 }
 
-static const VMStateDescription vmstate_a9mp_priv = {
-    .name = "a9mpcore_priv",
-    .version_id = 2,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32(scu_control, a9mp_priv_state),
-        VMSTATE_UINT32_ARRAY(old_timer_status, a9mp_priv_state, 8),
-        VMSTATE_UINT32_V(scu_status, a9mp_priv_state, 2),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
 static Property a9mp_priv_properties[] = {
-    DEFINE_PROP_UINT32("num-cpu", a9mp_priv_state, num_cpu, 1),
+    DEFINE_PROP_UINT32("num-cpu", A9MPPrivState, num_cpu, 1),
     /* The Cortex-A9MP may have anything from 0 to 224 external interrupt
      * IRQ lines (with another 32 internal). We default to 64+32, which
      * is the number provided by the Cortex-A9MP test chip in the
      * Realview PBX-A9 and Versatile Express A9 development boards.
      * Other boards may differ and should set this property appropriately.
      */
-    DEFINE_PROP_UINT32("num-irq", a9mp_priv_state, num_irq, 96),
+    DEFINE_PROP_UINT32("num-irq", A9MPPrivState, num_irq, 96),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -222,14 +121,12 @@
 
     k->init = a9mp_priv_init;
     dc->props = a9mp_priv_properties;
-    dc->vmsd = &vmstate_a9mp_priv;
-    dc->reset = a9mp_priv_reset;
 }
 
 static const TypeInfo a9mp_priv_info = {
     .name          = "a9mpcore_priv",
     .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(a9mp_priv_state),
+    .instance_size = sizeof(A9MPPrivState),
     .class_init    = a9mp_priv_class_init,
 };
 
diff --git a/hw/a9scu.c b/hw/a9scu.c
new file mode 100644
index 0000000..0e9e54d
--- /dev/null
+++ b/hw/a9scu.c
@@ -0,0 +1,164 @@
+/*
+ * Cortex-A9MPCore Snoop Control Unit (SCU) emulation.
+ *
+ * Copyright (c) 2009 CodeSourcery.
+ * Copyright (c) 2011 Linaro Limited.
+ * Written by Paul Brook, Peter Maydell.
+ *
+ * This code is licensed under the GPL.
+ */
+
+#include "sysbus.h"
+
+/* A9MP private memory region.  */
+
+typedef struct A9SCUState {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    uint32_t control;
+    uint32_t status;
+    uint32_t num_cpu;
+} A9SCUState;
+
+#define TYPE_A9_SCU "a9-scu"
+#define A9_SCU(obj) OBJECT_CHECK(A9SCUState, (obj), TYPE_A9_SCU)
+
+static uint64_t a9_scu_read(void *opaque, hwaddr offset,
+                            unsigned size)
+{
+    A9SCUState *s = (A9SCUState *)opaque;
+    switch (offset) {
+    case 0x00: /* Control */
+        return s->control;
+    case 0x04: /* Configuration */
+        return (((1 << s->num_cpu) - 1) << 4) | (s->num_cpu - 1);
+    case 0x08: /* CPU Power Status */
+        return s->status;
+    case 0x09: /* CPU status.  */
+        return s->status >> 8;
+    case 0x0a: /* CPU status.  */
+        return s->status >> 16;
+    case 0x0b: /* CPU status.  */
+        return s->status >> 24;
+    case 0x0c: /* Invalidate All Registers In Secure State */
+        return 0;
+    case 0x40: /* Filtering Start Address Register */
+    case 0x44: /* Filtering End Address Register */
+        /* RAZ/WI, like an implementation with only one AXI master */
+        return 0;
+    case 0x50: /* SCU Access Control Register */
+    case 0x54: /* SCU Non-secure Access Control Register */
+        /* unimplemented, fall through */
+    default:
+        return 0;
+    }
+}
+
+static void a9_scu_write(void *opaque, hwaddr offset,
+                         uint64_t value, unsigned size)
+{
+    A9SCUState *s = (A9SCUState *)opaque;
+    uint32_t mask;
+    uint32_t shift;
+    switch (size) {
+    case 1:
+        mask = 0xff;
+        break;
+    case 2:
+        mask = 0xffff;
+        break;
+    case 4:
+        mask = 0xffffffff;
+        break;
+    default:
+        fprintf(stderr, "Invalid size %u in write to a9 scu register %x\n",
+                size, (unsigned)offset);
+        return;
+    }
+
+    switch (offset) {
+    case 0x00: /* Control */
+        s->control = value & 1;
+        break;
+    case 0x4: /* Configuration: RO */
+        break;
+    case 0x08: case 0x09: case 0x0A: case 0x0B: /* Power Control */
+        shift = (offset - 0x8) * 8;
+        s->status &= ~(mask << shift);
+        s->status |= ((value & mask) << shift);
+        break;
+    case 0x0c: /* Invalidate All Registers In Secure State */
+        /* no-op as we do not implement caches */
+        break;
+    case 0x40: /* Filtering Start Address Register */
+    case 0x44: /* Filtering End Address Register */
+        /* RAZ/WI, like an implementation with only one AXI master */
+        break;
+    case 0x50: /* SCU Access Control Register */
+    case 0x54: /* SCU Non-secure Access Control Register */
+        /* unimplemented, fall through */
+    default:
+        break;
+    }
+}
+
+static const MemoryRegionOps a9_scu_ops = {
+    .read = a9_scu_read,
+    .write = a9_scu_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void a9_scu_reset(DeviceState *dev)
+{
+    A9SCUState *s = A9_SCU(dev);
+    s->control = 0;
+}
+
+static void a9_scu_realize(DeviceState *dev, Error ** errp)
+{
+    A9SCUState *s = A9_SCU(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+
+    memory_region_init_io(&s->iomem, &a9_scu_ops, s, "a9-scu", 0x100);
+    sysbus_init_mmio(sbd, &s->iomem);
+}
+
+static const VMStateDescription vmstate_a9_scu = {
+    .name = "a9-scu",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(control, A9SCUState),
+        VMSTATE_UINT32(status, A9SCUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property a9_scu_properties[] = {
+    DEFINE_PROP_UINT32("num-cpu", A9SCUState, num_cpu, 1),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void a9_scu_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = a9_scu_realize;
+    dc->props = a9_scu_properties;
+    dc->vmsd = &vmstate_a9_scu;
+    dc->reset = a9_scu_reset;
+}
+
+static const TypeInfo a9_scu_info = {
+    .name          = TYPE_A9_SCU,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(A9SCUState),
+    .class_init    = a9_scu_class_init,
+};
+
+static void a9mp_register_types(void)
+{
+    type_register_static(&a9_scu_info);
+}
+
+type_init(a9mp_register_types)
diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index 6d049e7..4c10985 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -3,6 +3,7 @@
 obj-y += xilinx_zynq.o zynq_slcr.o
 obj-y += xilinx_spips.o
 obj-y += arm_gic.o arm_gic_common.o
+obj-y += a9scu.o
 obj-y += realview_gic.o realview.o arm_sysctl.o arm11mpcore.o a9mpcore.o
 obj-y += exynos4210_gic.o exynos4210_combiner.o exynos4210.o
 obj-y += exynos4_boards.o exynos4210_uart.o exynos4210_pwm.o
diff --git a/hw/arm11mpcore.c b/hw/arm11mpcore.c
index 324e503..ca49948 100644
--- a/hw/arm11mpcore.c
+++ b/hw/arm11mpcore.c
@@ -12,7 +12,7 @@
 
 /* MPCore private memory region.  */
 
-typedef struct mpcore_priv_state {
+typedef struct ARM11MPCorePriveState {
     SysBusDevice busdev;
     uint32_t scu_control;
     int iomemtype;
@@ -21,16 +21,17 @@
     MemoryRegion iomem;
     MemoryRegion container;
     DeviceState *mptimer;
+    DeviceState *wdtimer;
     DeviceState *gic;
     uint32_t num_irq;
-} mpcore_priv_state;
+} ARM11MPCorePriveState;
 
 /* Per-CPU private memory mapped IO.  */
 
 static uint64_t mpcore_scu_read(void *opaque, hwaddr offset,
                                 unsigned size)
 {
-    mpcore_priv_state *s = (mpcore_priv_state *)opaque;
+    ARM11MPCorePriveState *s = (ARM11MPCorePriveState *)opaque;
     int id;
     /* SCU */
     switch (offset) {
@@ -53,7 +54,7 @@
 static void mpcore_scu_write(void *opaque, hwaddr offset,
                              uint64_t value, unsigned size)
 {
-    mpcore_priv_state *s = (mpcore_priv_state *)opaque;
+    ARM11MPCorePriveState *s = (ARM11MPCorePriveState *)opaque;
     /* SCU */
     switch (offset) {
     case 0: /* Control register.  */
@@ -76,15 +77,16 @@
 
 static void mpcore_priv_set_irq(void *opaque, int irq, int level)
 {
-    mpcore_priv_state *s = (mpcore_priv_state *)opaque;
+    ARM11MPCorePriveState *s = (ARM11MPCorePriveState *)opaque;
     qemu_set_irq(qdev_get_gpio_in(s->gic, irq), level);
 }
 
-static void mpcore_priv_map_setup(mpcore_priv_state *s)
+static void mpcore_priv_map_setup(ARM11MPCorePriveState *s)
 {
     int i;
     SysBusDevice *gicbusdev = SYS_BUS_DEVICE(s->gic);
-    SysBusDevice *busdev = SYS_BUS_DEVICE(s->mptimer);
+    SysBusDevice *timerbusdev = SYS_BUS_DEVICE(s->mptimer);
+    SysBusDevice *wdtbusdev = SYS_BUS_DEVICE(s->wdtimer);
     memory_region_init(&s->container, "mpcode-priv-container", 0x2000);
     memory_region_init_io(&s->iomem, &mpcore_scu_ops, s, "mpcore-scu", 0x100);
     memory_region_add_subregion(&s->container, 0, &s->iomem);
@@ -99,11 +101,13 @@
     /* Add the regions for timer and watchdog for "current CPU" and
      * for each specific CPU.
      */
-    for (i = 0; i < (s->num_cpu + 1) * 2; i++) {
+    for (i = 0; i < (s->num_cpu + 1); i++) {
         /* Timers at 0x600, 0x700, ...; watchdogs at 0x620, 0x720, ... */
-        hwaddr offset = 0x600 + (i >> 1) * 0x100 + (i & 1) * 0x20;
+        hwaddr offset = 0x600 + i * 0x100;
         memory_region_add_subregion(&s->container, offset,
-                                    sysbus_mmio_get_region(busdev, i));
+                                    sysbus_mmio_get_region(timerbusdev, i));
+        memory_region_add_subregion(&s->container, offset + 0x20,
+                                    sysbus_mmio_get_region(wdtbusdev, i));
     }
     memory_region_add_subregion(&s->container, 0x1000,
                                 sysbus_mmio_get_region(gicbusdev, 0));
@@ -112,16 +116,16 @@
      */
     for (i = 0; i < s->num_cpu; i++) {
         int ppibase = (s->num_irq - 32) + i * 32;
-        sysbus_connect_irq(busdev, i * 2,
+        sysbus_connect_irq(timerbusdev, i,
                            qdev_get_gpio_in(s->gic, ppibase + 29));
-        sysbus_connect_irq(busdev, i * 2 + 1,
+        sysbus_connect_irq(wdtbusdev, i,
                            qdev_get_gpio_in(s->gic, ppibase + 30));
     }
 }
 
 static int mpcore_priv_init(SysBusDevice *dev)
 {
-    mpcore_priv_state *s = FROM_SYSBUS(mpcore_priv_state, dev);
+    ARM11MPCorePriveState *s = FROM_SYSBUS(ARM11MPCorePriveState, dev);
 
     s->gic = qdev_create(NULL, "arm_gic");
     qdev_prop_set_uint32(s->gic, "num-cpu", s->num_cpu);
@@ -139,6 +143,11 @@
     s->mptimer = qdev_create(NULL, "arm_mptimer");
     qdev_prop_set_uint32(s->mptimer, "num-cpu", s->num_cpu);
     qdev_init_nofail(s->mptimer);
+
+    s->wdtimer = qdev_create(NULL, "arm_mptimer");
+    qdev_prop_set_uint32(s->wdtimer, "num-cpu", s->num_cpu);
+    qdev_init_nofail(s->wdtimer);
+
     mpcore_priv_map_setup(s);
     sysbus_init_mmio(dev, &s->container);
     return 0;
@@ -230,7 +239,7 @@
 };
 
 static Property mpcore_priv_properties[] = {
-    DEFINE_PROP_UINT32("num-cpu", mpcore_priv_state, num_cpu, 1),
+    DEFINE_PROP_UINT32("num-cpu", ARM11MPCorePriveState, num_cpu, 1),
     /* The ARM11 MPCORE TRM says the on-chip controller may have
      * anything from 0 to 224 external interrupt IRQ lines (with another
      * 32 internal). We default to 32+32, which is the number provided by
@@ -239,7 +248,7 @@
      * appropriately. Some Linux kernels may not boot if the hardware
      * has more IRQ lines than the kernel expects.
      */
-    DEFINE_PROP_UINT32("num-irq", mpcore_priv_state, num_irq, 64),
+    DEFINE_PROP_UINT32("num-irq", ARM11MPCorePriveState, num_irq, 64),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -255,7 +264,7 @@
 static const TypeInfo mpcore_priv_info = {
     .name          = "arm11mpcore_priv",
     .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(mpcore_priv_state),
+    .instance_size = sizeof(ARM11MPCorePriveState),
     .class_init    = mpcore_priv_class_init,
 };
 
diff --git a/hw/arm_mptimer.c b/hw/arm_mptimer.c
index 32817d3..7b08aa3 100644
--- a/hw/arm_mptimer.c
+++ b/hw/arm_mptimer.c
@@ -38,16 +38,16 @@
     QEMUTimer *timer;
     qemu_irq irq;
     MemoryRegion iomem;
-} timerblock;
+} TimerBlock;
 
 typedef struct {
     SysBusDevice busdev;
     uint32_t num_cpu;
-    timerblock timerblock[MAX_CPUS * 2];
-    MemoryRegion iomem[2];
-} arm_mptimer_state;
+    TimerBlock timerblock[MAX_CPUS];
+    MemoryRegion iomem;
+} ARMMPTimerState;
 
-static inline int get_current_cpu(arm_mptimer_state *s)
+static inline int get_current_cpu(ARMMPTimerState *s)
 {
     CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
 
@@ -58,18 +58,18 @@
     return cpu_single_cpu->cpu_index;
 }
 
-static inline void timerblock_update_irq(timerblock *tb)
+static inline void timerblock_update_irq(TimerBlock *tb)
 {
     qemu_set_irq(tb->irq, tb->status);
 }
 
 /* Return conversion factor from mpcore timer ticks to qemu timer ticks.  */
-static inline uint32_t timerblock_scale(timerblock *tb)
+static inline uint32_t timerblock_scale(TimerBlock *tb)
 {
     return (((tb->control >> 8) & 0xff) + 1) * 10;
 }
 
-static void timerblock_reload(timerblock *tb, int restart)
+static void timerblock_reload(TimerBlock *tb, int restart)
 {
     if (tb->count == 0) {
         return;
@@ -83,7 +83,7 @@
 
 static void timerblock_tick(void *opaque)
 {
-    timerblock *tb = (timerblock *)opaque;
+    TimerBlock *tb = (TimerBlock *)opaque;
     tb->status = 1;
     if (tb->control & 2) {
         tb->count = tb->load;
@@ -97,7 +97,7 @@
 static uint64_t timerblock_read(void *opaque, hwaddr addr,
                                 unsigned size)
 {
-    timerblock *tb = (timerblock *)opaque;
+    TimerBlock *tb = (TimerBlock *)opaque;
     int64_t val;
     switch (addr) {
     case 0: /* Load */
@@ -125,7 +125,7 @@
 static void timerblock_write(void *opaque, hwaddr addr,
                              uint64_t value, unsigned size)
 {
-    timerblock *tb = (timerblock *)opaque;
+    TimerBlock *tb = (TimerBlock *)opaque;
     int64_t old;
     switch (addr) {
     case 0: /* Load */
@@ -164,33 +164,17 @@
 static uint64_t arm_thistimer_read(void *opaque, hwaddr addr,
                                    unsigned size)
 {
-    arm_mptimer_state *s = (arm_mptimer_state *)opaque;
+    ARMMPTimerState *s = (ARMMPTimerState *)opaque;
     int id = get_current_cpu(s);
-    return timerblock_read(&s->timerblock[id * 2], addr, size);
+    return timerblock_read(&s->timerblock[id], addr, size);
 }
 
 static void arm_thistimer_write(void *opaque, hwaddr addr,
                                 uint64_t value, unsigned size)
 {
-    arm_mptimer_state *s = (arm_mptimer_state *)opaque;
+    ARMMPTimerState *s = (ARMMPTimerState *)opaque;
     int id = get_current_cpu(s);
-    timerblock_write(&s->timerblock[id * 2], addr, value, size);
-}
-
-static uint64_t arm_thiswdog_read(void *opaque, hwaddr addr,
-                                  unsigned size)
-{
-    arm_mptimer_state *s = (arm_mptimer_state *)opaque;
-    int id = get_current_cpu(s);
-    return timerblock_read(&s->timerblock[id * 2 + 1], addr, size);
-}
-
-static void arm_thiswdog_write(void *opaque, hwaddr addr,
-                               uint64_t value, unsigned size)
-{
-    arm_mptimer_state *s = (arm_mptimer_state *)opaque;
-    int id = get_current_cpu(s);
-    timerblock_write(&s->timerblock[id * 2 + 1], addr, value, size);
+    timerblock_write(&s->timerblock[id], addr, value, size);
 }
 
 static const MemoryRegionOps arm_thistimer_ops = {
@@ -203,16 +187,6 @@
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static const MemoryRegionOps arm_thiswdog_ops = {
-    .read = arm_thiswdog_read,
-    .write = arm_thiswdog_write,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
 static const MemoryRegionOps timerblock_ops = {
     .read = timerblock_read,
     .write = timerblock_write,
@@ -223,7 +197,7 @@
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void timerblock_reset(timerblock *tb)
+static void timerblock_reset(TimerBlock *tb)
 {
     tb->count = 0;
     tb->load = 0;
@@ -237,12 +211,9 @@
 
 static void arm_mptimer_reset(DeviceState *dev)
 {
-    arm_mptimer_state *s =
-        FROM_SYSBUS(arm_mptimer_state, SYS_BUS_DEVICE(dev));
+    ARMMPTimerState *s =
+        FROM_SYSBUS(ARMMPTimerState, SYS_BUS_DEVICE(dev));
     int i;
-    /* We reset every timer in the array, not just the ones we're using,
-     * because vmsave will look at every array element.
-     */
     for (i = 0; i < ARRAY_SIZE(s->timerblock); i++) {
         timerblock_reset(&s->timerblock[i]);
     }
@@ -250,35 +221,26 @@
 
 static int arm_mptimer_init(SysBusDevice *dev)
 {
-    arm_mptimer_state *s = FROM_SYSBUS(arm_mptimer_state, dev);
+    ARMMPTimerState *s = FROM_SYSBUS(ARMMPTimerState, dev);
     int i;
     if (s->num_cpu < 1 || s->num_cpu > MAX_CPUS) {
         hw_error("%s: num-cpu must be between 1 and %d\n", __func__, MAX_CPUS);
     }
-    /* We implement one timer and one watchdog block per CPU, and
-     * expose multiple MMIO regions:
+    /* We implement one timer block per CPU, and expose multiple MMIO regions:
      *  * region 0 is "timer for this core"
-     *  * region 1 is "watchdog for this core"
-     *  * region 2 is "timer for core 0"
-     *  * region 3 is "watchdog for core 0"
-     *  * region 4 is "timer for core 1"
-     *  * region 5 is "watchdog for core 1"
+     *  * region 1 is "timer for core 0"
+     *  * region 2 is "timer for core 1"
      * and so on.
      * The outgoing interrupt lines are
      *  * timer for core 0
-     *  * watchdog for core 0
      *  * timer for core 1
-     *  * watchdog for core 1
      * and so on.
      */
-    memory_region_init_io(&s->iomem[0], &arm_thistimer_ops, s,
+    memory_region_init_io(&s->iomem, &arm_thistimer_ops, s,
                           "arm_mptimer_timer", 0x20);
-    sysbus_init_mmio(dev, &s->iomem[0]);
-    memory_region_init_io(&s->iomem[1], &arm_thiswdog_ops, s,
-                          "arm_mptimer_wdog", 0x20);
-    sysbus_init_mmio(dev, &s->iomem[1]);
-    for (i = 0; i < (s->num_cpu * 2); i++) {
-        timerblock *tb = &s->timerblock[i];
+    sysbus_init_mmio(dev, &s->iomem);
+    for (i = 0; i < s->num_cpu; i++) {
+        TimerBlock *tb = &s->timerblock[i];
         tb->timer = qemu_new_timer_ns(vm_clock, timerblock_tick, tb);
         sysbus_init_irq(dev, &tb->irq);
         memory_region_init_io(&tb->iomem, &timerblock_ops, tb,
@@ -294,28 +256,28 @@
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT32(count, timerblock),
-        VMSTATE_UINT32(load, timerblock),
-        VMSTATE_UINT32(control, timerblock),
-        VMSTATE_UINT32(status, timerblock),
-        VMSTATE_INT64(tick, timerblock),
+        VMSTATE_UINT32(count, TimerBlock),
+        VMSTATE_UINT32(load, TimerBlock),
+        VMSTATE_UINT32(control, TimerBlock),
+        VMSTATE_UINT32(status, TimerBlock),
+        VMSTATE_INT64(tick, TimerBlock),
         VMSTATE_END_OF_LIST()
     }
 };
 
 static const VMStateDescription vmstate_arm_mptimer = {
     .name = "arm_mptimer",
-    .version_id = 1,
-    .minimum_version_id = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
     .fields = (VMStateField[]) {
-        VMSTATE_STRUCT_ARRAY(timerblock, arm_mptimer_state, (MAX_CPUS * 2),
-                             1, vmstate_timerblock, timerblock),
+        VMSTATE_STRUCT_VARRAY_UINT32(timerblock, ARMMPTimerState, num_cpu,
+                                     2, vmstate_timerblock, TimerBlock),
         VMSTATE_END_OF_LIST()
     }
 };
 
 static Property arm_mptimer_properties[] = {
-    DEFINE_PROP_UINT32("num-cpu", arm_mptimer_state, num_cpu, 0),
+    DEFINE_PROP_UINT32("num-cpu", ARMMPTimerState, num_cpu, 0),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -334,7 +296,7 @@
 static const TypeInfo arm_mptimer_info = {
     .name          = "arm_mptimer",
     .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(arm_mptimer_state),
+    .instance_size = sizeof(ARMMPTimerState),
     .class_init    = arm_mptimer_class_init,
 };
 
diff --git a/hw/cadence_gem.c b/hw/cadence_gem.c
index ab86c17..de7d15a 100644
--- a/hw/cadence_gem.c
+++ b/hw/cadence_gem.c
@@ -427,32 +427,9 @@
  */
 static void gem_update_int_status(GemState *s)
 {
-    uint32_t new_interrupts = 0;
-    /* Packet transmitted ? */
-    if (s->regs[GEM_TXSTATUS] & GEM_TXSTATUS_TXCMPL) {
-        new_interrupts |= GEM_INT_TXCMPL;
-    }
-    /* End of TX ring ? */
-    if (s->regs[GEM_TXSTATUS] & GEM_TXSTATUS_USED) {
-        new_interrupts |= GEM_INT_TXUSED;
-    }
-
-    /* Frame received ? */
-    if (s->regs[GEM_RXSTATUS] & GEM_RXSTATUS_FRMRCVD) {
-        new_interrupts |= GEM_INT_RXCMPL;
-    }
-    /* RX ring full ? */
-    if (s->regs[GEM_RXSTATUS] & GEM_RXSTATUS_NOBUF) {
-        new_interrupts |= GEM_INT_RXUSED;
-    }
-
-    s->regs[GEM_ISR] |= new_interrupts & ~(s->regs[GEM_IMR]);
-
     if (s->regs[GEM_ISR]) {
         DB_PRINT("asserting int. (0x%08x)\n", s->regs[GEM_ISR]);
         qemu_set_irq(s->irq, 1);
-    } else {
-        qemu_set_irq(s->irq, 0);
     }
 }
 
@@ -615,7 +592,7 @@
     s = qemu_get_nic_opaque(nc);
 
     /* Do nothing if receive is not enabled. */
-    if (!(s->regs[GEM_NWCTRL] & GEM_NWCTRL_RXENA)) {
+    if (!gem_can_receive(nc)) {
         return -1;
     }
 
@@ -697,6 +674,7 @@
             DB_PRINT("descriptor 0x%x owned by sw.\n",
                      (unsigned)packet_desc_addr);
             s->regs[GEM_RXSTATUS] |= GEM_RXSTATUS_NOBUF;
+            s->regs[GEM_ISR] |= GEM_INT_RXUSED & ~(s->regs[GEM_IMR]);
             /* Handle interrupt consequences */
             gem_update_int_status(s);
             return -1;
@@ -746,7 +724,9 @@
     s->rx_desc_addr = last_desc_addr;
     if (rx_desc_get_wrap(desc)) {
         s->rx_desc_addr = s->regs[GEM_RXQBASE];
+        DB_PRINT("wrapping RX descriptor list\n");
     } else {
+        DB_PRINT("incrementing RX descriptor list\n");
         s->rx_desc_addr += 8;
     }
 
@@ -765,6 +745,7 @@
                               (uint8_t *)&desc[0], sizeof(desc));
 
     s->regs[GEM_RXSTATUS] |= GEM_RXSTATUS_FRMRCVD;
+    s->regs[GEM_ISR] |= GEM_INT_RXCMPL & ~(s->regs[GEM_IMR]);
 
     /* Handle interrupt consequences */
     gem_update_int_status(s);
@@ -894,6 +875,7 @@
             DB_PRINT("TX descriptor next: 0x%08x\n", s->tx_desc_addr);
 
             s->regs[GEM_TXSTATUS] |= GEM_TXSTATUS_TXCMPL;
+            s->regs[GEM_ISR] |= GEM_INT_TXCMPL & ~(s->regs[GEM_IMR]);
 
             /* Handle interrupt consequences */
             gem_update_int_status(s);
@@ -931,6 +913,7 @@
 
     if (tx_desc_get_used(desc)) {
         s->regs[GEM_TXSTATUS] |= GEM_TXSTATUS_USED;
+        s->regs[GEM_ISR] |= GEM_INT_TXUSED & ~(s->regs[GEM_IMR]);
         gem_update_int_status(s);
     }
 }
@@ -1102,9 +1085,8 @@
             /* Reset to start of Q when transmit disabled. */
             s->tx_desc_addr = s->regs[GEM_TXQBASE];
         }
-        if (!(val & GEM_NWCTRL_RXENA)) {
-            /* Reset to start of Q when receive disabled. */
-            s->rx_desc_addr = s->regs[GEM_RXQBASE];
+        if (val & GEM_NWCTRL_RXENA) {
+            qemu_flush_queued_packets(qemu_get_queue(s->nic));
         }
         break;
 
diff --git a/hw/ds1338.c b/hw/ds1338.c
index 6f70538..1da0f96 100644
--- a/hw/ds1338.c
+++ b/hw/ds1338.c
@@ -59,8 +59,8 @@
     s->nvram[1] = to_bcd(now.tm_min);
     if (s->nvram[2] & HOURS_12) {
         int tmp = now.tm_hour;
-        if (tmp == 0) {
-            tmp = 24;
+        if (tmp % 12 == 0) {
+            tmp += 12;
         }
         if (tmp <= 12) {
             s->nvram[2] = HOURS_12 | to_bcd(tmp);
@@ -145,8 +145,8 @@
                 if (data & HOURS_PM) {
                     tmp += 12;
                 }
-                if (tmp == 24) {
-                    tmp = 0;
+                if (tmp % 12 == 0) {
+                    tmp -= 12;
                 }
                 now.tm_hour = tmp;
             } else {
diff --git a/hw/m25p80.c b/hw/m25p80.c
index 461b41c..1372d06 100644
--- a/hw/m25p80.c
+++ b/hw/m25p80.c
@@ -178,8 +178,6 @@
 
     /* Numonyx -- n25q128 */
     { INFO("n25q128",      0x20ba18,      0,  64 << 10, 256, 0) },
-
-    { },
 };
 
 typedef enum {
@@ -236,11 +234,23 @@
 
     int64_t dirty_page;
 
-    char *part_name;
     const FlashPartInfo *pi;
 
 } Flash;
 
+typedef struct M25P80Class {
+    SSISlaveClass parent_class;
+    FlashPartInfo *pi;
+} M25P80Class;
+
+#define TYPE_M25P80 "m25p80-generic"
+#define M25P80(obj) \
+     OBJECT_CHECK(Flash, (obj), TYPE_M25P80)
+#define M25P80_CLASS(klass) \
+     OBJECT_CLASS_CHECK(M25P80Class, (klass), TYPE_M25P80)
+#define M25P80_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(M25P80Class, (obj), TYPE_M25P80)
+
 static void bdrv_sync_complete(void *opaque, int ret)
 {
     /* do nothing. Masters do not directly interact with the backing store,
@@ -571,23 +581,9 @@
 {
     DriveInfo *dinfo;
     Flash *s = FROM_SSI_SLAVE(Flash, ss);
-    const FlashPartInfo *i;
+    M25P80Class *mc = M25P80_GET_CLASS(s);
 
-    if (!s->part_name) { /* default to actual m25p80 if no partname given */
-        s->part_name = (char *)"m25p80";
-    }
-
-    i = known_devices;
-    for (i = known_devices;; i++) {
-        assert(i);
-        if (!i->part_name) {
-            fprintf(stderr, "Unknown SPI flash part: \"%s\"\n", s->part_name);
-            return 1;
-        } else if (!strcmp(i->part_name, s->part_name)) {
-            s->pi = i;
-            break;
-        }
-    }
+    s->pi = mc->pi;
 
     s->size = s->pi->sector_size * s->pi->n_sectors;
     s->dirty_page = -1;
@@ -635,34 +631,42 @@
     }
 };
 
-static Property m25p80_properties[] = {
-    DEFINE_PROP_STRING("partname", Flash, part_name),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
 static void m25p80_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     SSISlaveClass *k = SSI_SLAVE_CLASS(klass);
+    M25P80Class *mc = M25P80_CLASS(klass);
 
     k->init = m25p80_init;
     k->transfer = m25p80_transfer8;
     k->set_cs = m25p80_cs;
     k->cs_polarity = SSI_CS_LOW;
-    dc->props = m25p80_properties;
     dc->vmsd = &vmstate_m25p80;
+    mc->pi = data;
 }
 
 static const TypeInfo m25p80_info = {
-    .name           = "m25p80",
+    .name           = TYPE_M25P80,
     .parent         = TYPE_SSI_SLAVE,
     .instance_size  = sizeof(Flash),
-    .class_init     = m25p80_class_init,
+    .class_size     = sizeof(M25P80Class),
+    .abstract       = true,
 };
 
 static void m25p80_register_types(void)
 {
+    int i;
+
     type_register_static(&m25p80_info);
+    for (i = 0; i < ARRAY_SIZE(known_devices); ++i) {
+        TypeInfo ti = {
+            .name       = known_devices[i].part_name,
+            .parent     = TYPE_M25P80,
+            .class_init = m25p80_class_init,
+            .class_data = (void *)&known_devices[i],
+        };
+        type_register(&ti);
+    }
 }
 
 type_init(m25p80_register_types)
diff --git a/hw/petalogix_ml605_mmu.c b/hw/petalogix_ml605_mmu.c
index 82d7183..fe7a932 100644
--- a/hw/petalogix_ml605_mmu.c
+++ b/hw/petalogix_ml605_mmu.c
@@ -158,8 +158,7 @@
         for (i = 0; i < NUM_SPI_FLASHES; i++) {
             qemu_irq cs_line;
 
-            dev = ssi_create_slave_no_init(spi, "m25p80");
-            qdev_prop_set_string(dev, "partname", "n25q128");
+            dev = ssi_create_slave_no_init(spi, "n25q128");
             qdev_init_nofail(dev);
             cs_line = qdev_get_gpio_in(dev, 0);
             sysbus_connect_irq(busdev, i+1, cs_line);
diff --git a/hw/pflash_cfi01.c b/hw/pflash_cfi01.c
index 9e6ff52..123b006 100644
--- a/hw/pflash_cfi01.c
+++ b/hw/pflash_cfi01.c
@@ -122,6 +122,12 @@
             __func__, offset, pfl->cmd, width);
 #endif
     switch (pfl->cmd) {
+    default:
+        /* This should never happen : reset state & treat it as a read */
+        DPRINTF("%s: unknown command state: %x\n", __func__, pfl->cmd);
+        pfl->wcycle = 0;
+        pfl->cmd = 0;
+        /* fall through to read code */
     case 0x00:
         /* Flash area read */
         p = pfl->storage;
@@ -162,7 +168,10 @@
         }
 
         break;
+    case 0x10: /* Single byte program */
     case 0x20: /* Block erase */
+    case 0x28: /* Block erase */
+    case 0x40: /* single byte program */
     case 0x50: /* Clear status register */
     case 0x60: /* Block /un)lock */
     case 0x70: /* Status Register */
@@ -194,11 +203,6 @@
         else
             ret = pfl->cfi_table[boff];
         break;
-    default:
-        /* This should never happen : reset state & treat it as a read */
-        DPRINTF("%s: unknown command state: %x\n", __func__, pfl->cmd);
-        pfl->wcycle = 0;
-        pfl->cmd = 0;
     }
     return ret;
 }
diff --git a/hw/sdhci.c b/hw/sdhci.c
new file mode 100644
index 0000000..e535df9
--- /dev/null
+++ b/hw/sdhci.c
@@ -0,0 +1,1300 @@
+/*
+ * SD Association Host Standard Specification v2.0 controller emulation
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Mitsyanko Igor <i.mitsyanko@samsung.com>
+ * Peter A.G. Crosthwaite <peter.crosthwaite@petalogix.com>
+ *
+ * Based on MMC controller for Samsung S5PC1xx-based board emulation
+ * by Alexey Merkulov and Vladimir Monakhov.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw.h"
+#include "sysemu/blockdev.h"
+#include "sysemu/dma.h"
+#include "qemu/timer.h"
+#include "block/block_int.h"
+#include "qemu/bitops.h"
+
+#include "sdhci.h"
+
+/* host controller debug messages */
+#ifndef SDHC_DEBUG
+#define SDHC_DEBUG                        0
+#endif
+
+#if SDHC_DEBUG == 0
+    #define DPRINT_L1(fmt, args...)       do { } while (0)
+    #define DPRINT_L2(fmt, args...)       do { } while (0)
+    #define ERRPRINT(fmt, args...)        do { } while (0)
+#elif SDHC_DEBUG == 1
+    #define DPRINT_L1(fmt, args...)       \
+        do {fprintf(stderr, "QEMU SDHC: "fmt, ## args); } while (0)
+    #define DPRINT_L2(fmt, args...)       do { } while (0)
+    #define ERRPRINT(fmt, args...)        \
+        do {fprintf(stderr, "QEMU SDHC ERROR: "fmt, ## args); } while (0)
+#else
+    #define DPRINT_L1(fmt, args...)       \
+        do {fprintf(stderr, "QEMU SDHC: "fmt, ## args); } while (0)
+    #define DPRINT_L2(fmt, args...)       \
+        do {fprintf(stderr, "QEMU SDHC: "fmt, ## args); } while (0)
+    #define ERRPRINT(fmt, args...)        \
+        do {fprintf(stderr, "QEMU SDHC ERROR: "fmt, ## args); } while (0)
+#endif
+
+/* Default SD/MMC host controller features information, which will be
+ * presented in CAPABILITIES register of generic SD host controller at reset.
+ * If not stated otherwise:
+ * 0 - not supported, 1 - supported, other - prohibited.
+ */
+#define SDHC_CAPAB_64BITBUS       0ul        /* 64-bit System Bus Support */
+#define SDHC_CAPAB_18V            1ul        /* Voltage support 1.8v */
+#define SDHC_CAPAB_30V            0ul        /* Voltage support 3.0v */
+#define SDHC_CAPAB_33V            1ul        /* Voltage support 3.3v */
+#define SDHC_CAPAB_SUSPRESUME     0ul        /* Suspend/resume support */
+#define SDHC_CAPAB_SDMA           1ul        /* SDMA support */
+#define SDHC_CAPAB_HIGHSPEED      1ul        /* High speed support */
+#define SDHC_CAPAB_ADMA1          1ul        /* ADMA1 support */
+#define SDHC_CAPAB_ADMA2          1ul        /* ADMA2 support */
+/* Maximum host controller R/W buffers size
+ * Possible values: 512, 1024, 2048 bytes */
+#define SDHC_CAPAB_MAXBLOCKLENGTH 512ul
+/* Maximum clock frequency for SDclock in MHz
+ * value in range 10-63 MHz, 0 - not defined */
+#define SDHC_CAPAB_BASECLKFREQ    0ul
+#define SDHC_CAPAB_TOUNIT         1ul  /* Timeout clock unit 0 - kHz, 1 - MHz */
+/* Timeout clock frequency 1-63, 0 - not defined */
+#define SDHC_CAPAB_TOCLKFREQ      0ul
+
+/* Now check all parameters and calculate CAPABILITIES REGISTER value */
+#if SDHC_CAPAB_64BITBUS > 1 || SDHC_CAPAB_18V > 1 || SDHC_CAPAB_30V > 1 ||     \
+    SDHC_CAPAB_33V > 1 || SDHC_CAPAB_SUSPRESUME > 1 || SDHC_CAPAB_SDMA > 1 ||  \
+    SDHC_CAPAB_HIGHSPEED > 1 || SDHC_CAPAB_ADMA2 > 1 || SDHC_CAPAB_ADMA1 > 1 ||\
+    SDHC_CAPAB_TOUNIT > 1
+#error Capabilities features can have value 0 or 1 only!
+#endif
+
+#if SDHC_CAPAB_MAXBLOCKLENGTH == 512
+#define MAX_BLOCK_LENGTH 0ul
+#elif SDHC_CAPAB_MAXBLOCKLENGTH == 1024
+#define MAX_BLOCK_LENGTH 1ul
+#elif SDHC_CAPAB_MAXBLOCKLENGTH == 2048
+#define MAX_BLOCK_LENGTH 2ul
+#else
+#error Max host controller block size can have value 512, 1024 or 2048 only!
+#endif
+
+#if (SDHC_CAPAB_BASECLKFREQ > 0 && SDHC_CAPAB_BASECLKFREQ < 10) || \
+    SDHC_CAPAB_BASECLKFREQ > 63
+#error SDclock frequency can have value in range 0, 10-63 only!
+#endif
+
+#if SDHC_CAPAB_TOCLKFREQ > 63
+#error Timeout clock frequency can have value in range 0-63 only!
+#endif
+
+#define SDHC_CAPAB_REG_DEFAULT                                 \
+   ((SDHC_CAPAB_64BITBUS << 28) | (SDHC_CAPAB_18V << 26) |     \
+    (SDHC_CAPAB_30V << 25) | (SDHC_CAPAB_33V << 24) |          \
+    (SDHC_CAPAB_SUSPRESUME << 23) | (SDHC_CAPAB_SDMA << 22) |  \
+    (SDHC_CAPAB_HIGHSPEED << 21) | (SDHC_CAPAB_ADMA1 << 20) |  \
+    (SDHC_CAPAB_ADMA2 << 19) | (MAX_BLOCK_LENGTH << 16) |      \
+    (SDHC_CAPAB_BASECLKFREQ << 8) | (SDHC_CAPAB_TOUNIT << 7) | \
+    (SDHC_CAPAB_TOCLKFREQ))
+
+#define MASKED_WRITE(reg, mask, val)  (reg = (reg & (mask)) | (val))
+
+static uint8_t sdhci_slotint(SDHCIState *s)
+{
+    return (s->norintsts & s->norintsigen) || (s->errintsts & s->errintsigen) ||
+         ((s->norintsts & SDHC_NIS_INSERT) && (s->wakcon & SDHC_WKUP_ON_INS)) ||
+         ((s->norintsts & SDHC_NIS_REMOVE) && (s->wakcon & SDHC_WKUP_ON_RMV));
+}
+
+static inline void sdhci_update_irq(SDHCIState *s)
+{
+    qemu_set_irq(s->irq, sdhci_slotint(s));
+}
+
+static void sdhci_raise_insertion_irq(void *opaque)
+{
+    SDHCIState *s = (SDHCIState *)opaque;
+
+    if (s->norintsts & SDHC_NIS_REMOVE) {
+        qemu_mod_timer(s->insert_timer,
+                       qemu_get_clock_ns(vm_clock) + SDHC_INSERTION_DELAY);
+    } else {
+        s->prnsts = 0x1ff0000;
+        if (s->norintstsen & SDHC_NISEN_INSERT) {
+            s->norintsts |= SDHC_NIS_INSERT;
+        }
+        sdhci_update_irq(s);
+    }
+}
+
+static void sdhci_insert_eject_cb(void *opaque, int irq, int level)
+{
+    SDHCIState *s = (SDHCIState *)opaque;
+    DPRINT_L1("Card state changed: %s!\n", level ? "insert" : "eject");
+
+    if ((s->norintsts & SDHC_NIS_REMOVE) && level) {
+        /* Give target some time to notice card ejection */
+        qemu_mod_timer(s->insert_timer,
+                       qemu_get_clock_ns(vm_clock) + SDHC_INSERTION_DELAY);
+    } else {
+        if (level) {
+            s->prnsts = 0x1ff0000;
+            if (s->norintstsen & SDHC_NISEN_INSERT) {
+                s->norintsts |= SDHC_NIS_INSERT;
+            }
+        } else {
+            s->prnsts = 0x1fa0000;
+            s->pwrcon &= ~SDHC_POWER_ON;
+            s->clkcon &= ~SDHC_CLOCK_SDCLK_EN;
+            if (s->norintstsen & SDHC_NISEN_REMOVE) {
+                s->norintsts |= SDHC_NIS_REMOVE;
+            }
+        }
+        sdhci_update_irq(s);
+    }
+}
+
+static void sdhci_card_readonly_cb(void *opaque, int irq, int level)
+{
+    SDHCIState *s = (SDHCIState *)opaque;
+
+    if (level) {
+        s->prnsts &= ~SDHC_WRITE_PROTECT;
+    } else {
+        /* Write enabled */
+        s->prnsts |= SDHC_WRITE_PROTECT;
+    }
+}
+
+static void sdhci_reset(SDHCIState *s)
+{
+    qemu_del_timer(s->insert_timer);
+    qemu_del_timer(s->transfer_timer);
+    /* Set all registers to 0. Capabilities registers are not cleared
+     * and assumed to always preserve their value, given to them during
+     * initialization */
+    memset(&s->sdmasysad, 0, (uintptr_t)&s->capareg - (uintptr_t)&s->sdmasysad);
+
+    sd_set_cb(s->card, s->ro_cb, s->eject_cb);
+    s->data_count = 0;
+    s->stopped_state = sdhc_not_stopped;
+}
+
+static void sdhci_do_data_transfer(void *opaque)
+{
+    SDHCIState *s = (SDHCIState *)opaque;
+
+    SDHCI_GET_CLASS(s)->data_transfer(s);
+}
+
+static void sdhci_send_command(SDHCIState *s)
+{
+    SDRequest request;
+    uint8_t response[16];
+    int rlen;
+
+    s->errintsts = 0;
+    s->acmd12errsts = 0;
+    request.cmd = s->cmdreg >> 8;
+    request.arg = s->argument;
+    DPRINT_L1("sending CMD%u ARG[0x%08x]\n", request.cmd, request.arg);
+    rlen = sd_do_command(s->card, &request, response);
+
+    if (s->cmdreg & SDHC_CMD_RESPONSE) {
+        if (rlen == 4) {
+            s->rspreg[0] = (response[0] << 24) | (response[1] << 16) |
+                           (response[2] << 8)  |  response[3];
+            s->rspreg[1] = s->rspreg[2] = s->rspreg[3] = 0;
+            DPRINT_L1("Response: RSPREG[31..0]=0x%08x\n", s->rspreg[0]);
+        } else if (rlen == 16) {
+            s->rspreg[0] = (response[11] << 24) | (response[12] << 16) |
+                           (response[13] << 8) |  response[14];
+            s->rspreg[1] = (response[7] << 24) | (response[8] << 16) |
+                           (response[9] << 8)  |  response[10];
+            s->rspreg[2] = (response[3] << 24) | (response[4] << 16) |
+                           (response[5] << 8)  |  response[6];
+            s->rspreg[3] = (response[0] << 16) | (response[1] << 8) |
+                            response[2];
+            DPRINT_L1("Response received:\n RSPREG[127..96]=0x%08x, RSPREG[95.."
+                  "64]=0x%08x,\n RSPREG[63..32]=0x%08x, RSPREG[31..0]=0x%08x\n",
+                  s->rspreg[3], s->rspreg[2], s->rspreg[1], s->rspreg[0]);
+        } else {
+            ERRPRINT("Timeout waiting for command response\n");
+            if (s->errintstsen & SDHC_EISEN_CMDTIMEOUT) {
+                s->errintsts |= SDHC_EIS_CMDTIMEOUT;
+                s->norintsts |= SDHC_NIS_ERR;
+            }
+        }
+
+        if ((s->norintstsen & SDHC_NISEN_TRSCMP) &&
+            (s->cmdreg & SDHC_CMD_RESPONSE) == SDHC_CMD_RSP_WITH_BUSY) {
+            s->norintsts |= SDHC_NIS_TRSCMP;
+        }
+    } else if (rlen != 0 && (s->errintstsen & SDHC_EISEN_CMDIDX)) {
+        s->errintsts |= SDHC_EIS_CMDIDX;
+        s->norintsts |= SDHC_NIS_ERR;
+    }
+
+    if (s->norintstsen & SDHC_NISEN_CMDCMP) {
+        s->norintsts |= SDHC_NIS_CMDCMP;
+    }
+
+    sdhci_update_irq(s);
+
+    if (s->blksize && (s->cmdreg & SDHC_CMD_DATA_PRESENT)) {
+        sdhci_do_data_transfer(s);
+    }
+}
+
+static void sdhci_end_transfer(SDHCIState *s)
+{
+    /* Automatically send CMD12 to stop transfer if AutoCMD12 enabled */
+    if ((s->trnmod & SDHC_TRNS_ACMD12) != 0) {
+        SDRequest request;
+        uint8_t response[16];
+
+        request.cmd = 0x0C;
+        request.arg = 0;
+        DPRINT_L1("Automatically issue CMD%d %08x\n", request.cmd, request.arg);
+        sd_do_command(s->card, &request, response);
+        /* Auto CMD12 response goes to the upper Response register */
+        s->rspreg[3] = (response[0] << 24) | (response[1] << 16) |
+                (response[2] << 8) | response[3];
+    }
+
+    s->prnsts &= ~(SDHC_DOING_READ | SDHC_DOING_WRITE |
+            SDHC_DAT_LINE_ACTIVE | SDHC_DATA_INHIBIT |
+            SDHC_SPACE_AVAILABLE | SDHC_DATA_AVAILABLE);
+
+    if (s->norintstsen & SDHC_NISEN_TRSCMP) {
+        s->norintsts |= SDHC_NIS_TRSCMP;
+    }
+
+    sdhci_update_irq(s);
+}
+
+/*
+ * Programmed i/o data transfer
+ */
+
+/* Fill host controller's read buffer with BLKSIZE bytes of data from card */
+static void sdhci_read_block_from_card(SDHCIState *s)
+{
+    int index = 0;
+
+    if ((s->trnmod & SDHC_TRNS_MULTI) &&
+            (s->trnmod & SDHC_TRNS_BLK_CNT_EN) && (s->blkcnt == 0)) {
+        return;
+    }
+
+    for (index = 0; index < (s->blksize & 0x0fff); index++) {
+        s->fifo_buffer[index] = sd_read_data(s->card);
+    }
+
+    /* New data now available for READ through Buffer Port Register */
+    s->prnsts |= SDHC_DATA_AVAILABLE;
+    if (s->norintstsen & SDHC_NISEN_RBUFRDY) {
+        s->norintsts |= SDHC_NIS_RBUFRDY;
+    }
+
+    /* Clear DAT line active status if that was the last block */
+    if ((s->trnmod & SDHC_TRNS_MULTI) == 0 ||
+            ((s->trnmod & SDHC_TRNS_MULTI) && s->blkcnt == 1)) {
+        s->prnsts &= ~SDHC_DAT_LINE_ACTIVE;
+    }
+
+    /* If stop at block gap request was set and it's not the last block of
+     * data - generate Block Event interrupt */
+    if (s->stopped_state == sdhc_gap_read && (s->trnmod & SDHC_TRNS_MULTI) &&
+            s->blkcnt != 1)    {
+        s->prnsts &= ~SDHC_DAT_LINE_ACTIVE;
+        if (s->norintstsen & SDHC_EISEN_BLKGAP) {
+            s->norintsts |= SDHC_EIS_BLKGAP;
+        }
+    }
+
+    sdhci_update_irq(s);
+}
+
+/* Read @size byte of data from host controller @s BUFFER DATA PORT register */
+static uint32_t sdhci_read_dataport(SDHCIState *s, unsigned size)
+{
+    uint32_t value = 0;
+    int i;
+
+    /* first check that a valid data exists in host controller input buffer */
+    if ((s->prnsts & SDHC_DATA_AVAILABLE) == 0) {
+        ERRPRINT("Trying to read from empty buffer\n");
+        return 0;
+    }
+
+    for (i = 0; i < size; i++) {
+        value |= s->fifo_buffer[s->data_count] << i * 8;
+        s->data_count++;
+        /* check if we've read all valid data (blksize bytes) from buffer */
+        if ((s->data_count) >= (s->blksize & 0x0fff)) {
+            DPRINT_L2("All %u bytes of data have been read from input buffer\n",
+                    s->data_count);
+            s->prnsts &= ~SDHC_DATA_AVAILABLE; /* no more data in a buffer */
+            s->data_count = 0;  /* next buff read must start at position [0] */
+
+            if (s->trnmod & SDHC_TRNS_BLK_CNT_EN) {
+                s->blkcnt--;
+            }
+
+            /* if that was the last block of data */
+            if ((s->trnmod & SDHC_TRNS_MULTI) == 0 ||
+                ((s->trnmod & SDHC_TRNS_BLK_CNT_EN) && (s->blkcnt == 0)) ||
+                 /* stop at gap request */
+                (s->stopped_state == sdhc_gap_read &&
+                 !(s->prnsts & SDHC_DAT_LINE_ACTIVE))) {
+                SDHCI_GET_CLASS(s)->end_data_transfer(s);
+            } else { /* if there are more data, read next block from card */
+                SDHCI_GET_CLASS(s)->read_block_from_card(s);
+            }
+            break;
+        }
+    }
+
+    return value;
+}
+
+/* Write data from host controller FIFO to card */
+static void sdhci_write_block_to_card(SDHCIState *s)
+{
+    int index = 0;
+
+    if (s->prnsts & SDHC_SPACE_AVAILABLE) {
+        if (s->norintstsen & SDHC_NISEN_WBUFRDY) {
+            s->norintsts |= SDHC_NIS_WBUFRDY;
+        }
+        sdhci_update_irq(s);
+        return;
+    }
+
+    if (s->trnmod & SDHC_TRNS_BLK_CNT_EN) {
+        if (s->blkcnt == 0) {
+            return;
+        } else {
+            s->blkcnt--;
+        }
+    }
+
+    for (index = 0; index < (s->blksize & 0x0fff); index++) {
+        sd_write_data(s->card, s->fifo_buffer[index]);
+    }
+
+    /* Next data can be written through BUFFER DATORT register */
+    s->prnsts |= SDHC_SPACE_AVAILABLE;
+    if (s->norintstsen & SDHC_NISEN_WBUFRDY) {
+        s->norintsts |= SDHC_NIS_WBUFRDY;
+    }
+
+    /* Finish transfer if that was the last block of data */
+    if ((s->trnmod & SDHC_TRNS_MULTI) == 0 ||
+            ((s->trnmod & SDHC_TRNS_MULTI) &&
+            (s->trnmod & SDHC_TRNS_BLK_CNT_EN) && (s->blkcnt == 0))) {
+        SDHCI_GET_CLASS(s)->end_data_transfer(s);
+    }
+
+    /* Generate Block Gap Event if requested and if not the last block */
+    if (s->stopped_state == sdhc_gap_write && (s->trnmod & SDHC_TRNS_MULTI) &&
+            s->blkcnt > 0) {
+        s->prnsts &= ~SDHC_DOING_WRITE;
+        if (s->norintstsen & SDHC_EISEN_BLKGAP) {
+            s->norintsts |= SDHC_EIS_BLKGAP;
+        }
+        SDHCI_GET_CLASS(s)->end_data_transfer(s);
+    }
+
+    sdhci_update_irq(s);
+}
+
+/* Write @size bytes of @value data to host controller @s Buffer Data Port
+ * register */
+static void sdhci_write_dataport(SDHCIState *s, uint32_t value, unsigned size)
+{
+    unsigned i;
+
+    /* Check that there is free space left in a buffer */
+    if (!(s->prnsts & SDHC_SPACE_AVAILABLE)) {
+        ERRPRINT("Can't write to data buffer: buffer full\n");
+        return;
+    }
+
+    for (i = 0; i < size; i++) {
+        s->fifo_buffer[s->data_count] = value & 0xFF;
+        s->data_count++;
+        value >>= 8;
+        if (s->data_count >= (s->blksize & 0x0fff)) {
+            DPRINT_L2("write buffer filled with %u bytes of data\n",
+                    s->data_count);
+            s->data_count = 0;
+            s->prnsts &= ~SDHC_SPACE_AVAILABLE;
+            if (s->prnsts & SDHC_DOING_WRITE) {
+                SDHCI_GET_CLASS(s)->write_block_to_card(s);
+            }
+        }
+    }
+}
+
+/*
+ * Single DMA data transfer
+ */
+
+/* Multi block SDMA transfer */
+static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
+{
+    bool page_aligned = false;
+    unsigned int n, begin;
+    const uint16_t block_size = s->blksize & 0x0fff;
+    uint32_t boundary_chk = 1 << (((s->blksize & 0xf000) >> 12) + 12);
+    uint32_t boundary_count = boundary_chk - (s->sdmasysad % boundary_chk);
+
+    /* XXX: Some sd/mmc drivers (for example, u-boot-slp) do not account for
+     * possible stop at page boundary if initial address is not page aligned,
+     * allow them to work properly */
+    if ((s->sdmasysad % boundary_chk) == 0) {
+        page_aligned = true;
+    }
+
+    if (s->trnmod & SDHC_TRNS_READ) {
+        s->prnsts |= SDHC_DOING_READ | SDHC_DATA_INHIBIT |
+                SDHC_DAT_LINE_ACTIVE;
+        while (s->blkcnt) {
+            if (s->data_count == 0) {
+                for (n = 0; n < block_size; n++) {
+                    s->fifo_buffer[n] = sd_read_data(s->card);
+                }
+            }
+            begin = s->data_count;
+            if (((boundary_count + begin) < block_size) && page_aligned) {
+                s->data_count = boundary_count + begin;
+                boundary_count = 0;
+             } else {
+                s->data_count = block_size;
+                boundary_count -= block_size - begin;
+                if (s->trnmod & SDHC_TRNS_BLK_CNT_EN) {
+                    s->blkcnt--;
+                }
+            }
+            dma_memory_write(&dma_context_memory, s->sdmasysad,
+                             &s->fifo_buffer[begin], s->data_count - begin);
+            s->sdmasysad += s->data_count - begin;
+            if (s->data_count == block_size) {
+                s->data_count = 0;
+            }
+            if (page_aligned && boundary_count == 0) {
+                break;
+            }
+        }
+    } else {
+        s->prnsts |= SDHC_DOING_WRITE | SDHC_DATA_INHIBIT |
+                SDHC_DAT_LINE_ACTIVE;
+        while (s->blkcnt) {
+            begin = s->data_count;
+            if (((boundary_count + begin) < block_size) && page_aligned) {
+                s->data_count = boundary_count + begin;
+                boundary_count = 0;
+             } else {
+                s->data_count = block_size;
+                boundary_count -= block_size - begin;
+            }
+            dma_memory_read(&dma_context_memory, s->sdmasysad,
+                            &s->fifo_buffer[begin], s->data_count);
+            s->sdmasysad += s->data_count - begin;
+            if (s->data_count == block_size) {
+                for (n = 0; n < block_size; n++) {
+                    sd_write_data(s->card, s->fifo_buffer[n]);
+                }
+                s->data_count = 0;
+                if (s->trnmod & SDHC_TRNS_BLK_CNT_EN) {
+                    s->blkcnt--;
+                }
+            }
+            if (page_aligned && boundary_count == 0) {
+                break;
+            }
+        }
+    }
+
+    if (s->blkcnt == 0) {
+        SDHCI_GET_CLASS(s)->end_data_transfer(s);
+    } else {
+        if (s->norintstsen & SDHC_NISEN_DMA) {
+            s->norintsts |= SDHC_NIS_DMA;
+        }
+        sdhci_update_irq(s);
+    }
+}
+
+/* single block SDMA transfer */
+
+static void sdhci_sdma_transfer_single_block(SDHCIState *s)
+{
+    int n;
+    uint32_t datacnt = s->blksize & 0x0fff;
+
+    if (s->trnmod & SDHC_TRNS_READ) {
+        for (n = 0; n < datacnt; n++) {
+            s->fifo_buffer[n] = sd_read_data(s->card);
+        }
+        dma_memory_write(&dma_context_memory, s->sdmasysad, s->fifo_buffer,
+                         datacnt);
+    } else {
+        dma_memory_read(&dma_context_memory, s->sdmasysad, s->fifo_buffer,
+                        datacnt);
+        for (n = 0; n < datacnt; n++) {
+            sd_write_data(s->card, s->fifo_buffer[n]);
+        }
+    }
+
+    if (s->trnmod & SDHC_TRNS_BLK_CNT_EN) {
+        s->blkcnt--;
+    }
+
+    SDHCI_GET_CLASS(s)->end_data_transfer(s);
+}
+
+typedef struct ADMADescr {
+    hwaddr addr;
+    uint16_t length;
+    uint8_t attr;
+    uint8_t incr;
+} ADMADescr;
+
+static void get_adma_description(SDHCIState *s, ADMADescr *dscr)
+{
+    uint32_t adma1 = 0;
+    uint64_t adma2 = 0;
+    hwaddr entry_addr = (hwaddr)s->admasysaddr;
+    switch (SDHC_DMA_TYPE(s->hostctl)) {
+    case SDHC_CTRL_ADMA2_32:
+        dma_memory_read(&dma_context_memory, entry_addr, (uint8_t *)&adma2,
+                        sizeof(adma2));
+        adma2 = le64_to_cpu(adma2);
+        /* The spec does not specify endianness of descriptor table.
+         * We currently assume that it is LE.
+         */
+        dscr->addr = (hwaddr)extract64(adma2, 32, 32) & ~0x3ull;
+        dscr->length = (uint16_t)extract64(adma2, 16, 16);
+        dscr->attr = (uint8_t)extract64(adma2, 0, 7);
+        dscr->incr = 8;
+        break;
+    case SDHC_CTRL_ADMA1_32:
+        dma_memory_read(&dma_context_memory, entry_addr, (uint8_t *)&adma1,
+                        sizeof(adma1));
+        adma1 = le32_to_cpu(adma1);
+        dscr->addr = (hwaddr)(adma1 & 0xFFFFF000);
+        dscr->attr = (uint8_t)extract32(adma1, 0, 7);
+        dscr->incr = 4;
+        if ((dscr->attr & SDHC_ADMA_ATTR_ACT_MASK) == SDHC_ADMA_ATTR_SET_LEN) {
+            dscr->length = (uint16_t)extract32(adma1, 12, 16);
+        } else {
+            dscr->length = 4096;
+        }
+        break;
+    case SDHC_CTRL_ADMA2_64:
+        dma_memory_read(&dma_context_memory, entry_addr,
+                        (uint8_t *)(&dscr->attr), 1);
+        dma_memory_read(&dma_context_memory, entry_addr + 2,
+                        (uint8_t *)(&dscr->length), 2);
+        dscr->length = le16_to_cpu(dscr->length);
+        dma_memory_read(&dma_context_memory, entry_addr + 4,
+                        (uint8_t *)(&dscr->addr), 8);
+        dscr->attr = le64_to_cpu(dscr->attr);
+        dscr->attr &= 0xfffffff8;
+        dscr->incr = 12;
+        break;
+    }
+}
+
+/* Advanced DMA data transfer */
+
+static void sdhci_do_adma(SDHCIState *s)
+{
+    unsigned int n, begin, length;
+    const uint16_t block_size = s->blksize & 0x0fff;
+    ADMADescr dscr;
+    int i;
+
+    for (i = 0; i < SDHC_ADMA_DESCS_PER_DELAY; ++i) {
+        s->admaerr &= ~SDHC_ADMAERR_LENGTH_MISMATCH;
+
+        get_adma_description(s, &dscr);
+        DPRINT_L2("ADMA loop: addr=" TARGET_FMT_plx ", len=%d, attr=%x\n",
+                dscr.addr, dscr.length, dscr.attr);
+
+        if ((dscr.attr & SDHC_ADMA_ATTR_VALID) == 0) {
+            /* Indicate that error occurred in ST_FDS state */
+            s->admaerr &= ~SDHC_ADMAERR_STATE_MASK;
+            s->admaerr |= SDHC_ADMAERR_STATE_ST_FDS;
+
+            /* Generate ADMA error interrupt */
+            if (s->errintstsen & SDHC_EISEN_ADMAERR) {
+                s->errintsts |= SDHC_EIS_ADMAERR;
+                s->norintsts |= SDHC_NIS_ERR;
+            }
+
+            sdhci_update_irq(s);
+            return;
+        }
+
+        length = dscr.length ? dscr.length : 65536;
+
+        switch (dscr.attr & SDHC_ADMA_ATTR_ACT_MASK) {
+        case SDHC_ADMA_ATTR_ACT_TRAN:  /* data transfer */
+
+            if (s->trnmod & SDHC_TRNS_READ) {
+                while (length) {
+                    if (s->data_count == 0) {
+                        for (n = 0; n < block_size; n++) {
+                            s->fifo_buffer[n] = sd_read_data(s->card);
+                        }
+                    }
+                    begin = s->data_count;
+                    if ((length + begin) < block_size) {
+                        s->data_count = length + begin;
+                        length = 0;
+                     } else {
+                        s->data_count = block_size;
+                        length -= block_size - begin;
+                    }
+                    dma_memory_write(&dma_context_memory, dscr.addr,
+                                     &s->fifo_buffer[begin],
+                                     s->data_count - begin);
+                    dscr.addr += s->data_count - begin;
+                    if (s->data_count == block_size) {
+                        s->data_count = 0;
+                        if (s->trnmod & SDHC_TRNS_BLK_CNT_EN) {
+                            s->blkcnt--;
+                            if (s->blkcnt == 0) {
+                                break;
+                            }
+                        }
+                    }
+                }
+            } else {
+                while (length) {
+                    begin = s->data_count;
+                    if ((length + begin) < block_size) {
+                        s->data_count = length + begin;
+                        length = 0;
+                     } else {
+                        s->data_count = block_size;
+                        length -= block_size - begin;
+                    }
+                    dma_memory_read(&dma_context_memory, dscr.addr,
+                                    &s->fifo_buffer[begin], s->data_count);
+                    dscr.addr += s->data_count - begin;
+                    if (s->data_count == block_size) {
+                        for (n = 0; n < block_size; n++) {
+                            sd_write_data(s->card, s->fifo_buffer[n]);
+                        }
+                        s->data_count = 0;
+                        if (s->trnmod & SDHC_TRNS_BLK_CNT_EN) {
+                            s->blkcnt--;
+                            if (s->blkcnt == 0) {
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+            s->admasysaddr += dscr.incr;
+            break;
+        case SDHC_ADMA_ATTR_ACT_LINK:   /* link to next descriptor table */
+            s->admasysaddr = dscr.addr;
+            DPRINT_L1("ADMA link: admasysaddr=0x%lx\n", s->admasysaddr);
+            break;
+        default:
+            s->admasysaddr += dscr.incr;
+            break;
+        }
+
+        /* ADMA transfer terminates if blkcnt == 0 or by END attribute */
+        if (((s->trnmod & SDHC_TRNS_BLK_CNT_EN) &&
+                    (s->blkcnt == 0)) || (dscr.attr & SDHC_ADMA_ATTR_END)) {
+            DPRINT_L2("ADMA transfer completed\n");
+            if (length || ((dscr.attr & SDHC_ADMA_ATTR_END) &&
+                (s->trnmod & SDHC_TRNS_BLK_CNT_EN) &&
+                s->blkcnt != 0)) {
+                ERRPRINT("SD/MMC host ADMA length mismatch\n");
+                s->admaerr |= SDHC_ADMAERR_LENGTH_MISMATCH |
+                        SDHC_ADMAERR_STATE_ST_TFR;
+                if (s->errintstsen & SDHC_EISEN_ADMAERR) {
+                    ERRPRINT("Set ADMA error flag\n");
+                    s->errintsts |= SDHC_EIS_ADMAERR;
+                    s->norintsts |= SDHC_NIS_ERR;
+                }
+
+                sdhci_update_irq(s);
+            }
+            SDHCI_GET_CLASS(s)->end_data_transfer(s);
+            return;
+        }
+
+        if (dscr.attr & SDHC_ADMA_ATTR_INT) {
+            DPRINT_L1("ADMA interrupt: admasysaddr=0x%lx\n", s->admasysaddr);
+            if (s->norintstsen & SDHC_NISEN_DMA) {
+                s->norintsts |= SDHC_NIS_DMA;
+            }
+
+            sdhci_update_irq(s);
+            return;
+        }
+    }
+
+    /* we have unfinished bussiness - reschedule to continue ADMA */
+    qemu_mod_timer(s->transfer_timer,
+                   qemu_get_clock_ns(vm_clock) + SDHC_TRANSFER_DELAY);
+}
+
+/* Perform data transfer according to controller configuration */
+
+static void sdhci_data_transfer(SDHCIState *s)
+{
+    SDHCIClass *k = SDHCI_GET_CLASS(s);
+    s->data_count = 0;
+
+    if (s->trnmod & SDHC_TRNS_DMA) {
+        switch (SDHC_DMA_TYPE(s->hostctl)) {
+        case SDHC_CTRL_SDMA:
+            if ((s->trnmod & SDHC_TRNS_MULTI) &&
+                    (!(s->trnmod & SDHC_TRNS_BLK_CNT_EN) || s->blkcnt == 0)) {
+                break;
+            }
+
+            if ((s->blkcnt == 1) || !(s->trnmod & SDHC_TRNS_MULTI)) {
+                k->do_sdma_single(s);
+            } else {
+                k->do_sdma_multi(s);
+            }
+
+            break;
+        case SDHC_CTRL_ADMA1_32:
+            if (!(s->capareg & SDHC_CAN_DO_ADMA1)) {
+                ERRPRINT("ADMA1 not supported\n");
+                break;
+            }
+
+            k->do_adma(s);
+            break;
+        case SDHC_CTRL_ADMA2_32:
+            if (!(s->capareg & SDHC_CAN_DO_ADMA2)) {
+                ERRPRINT("ADMA2 not supported\n");
+                break;
+            }
+
+            k->do_adma(s);
+            break;
+        case SDHC_CTRL_ADMA2_64:
+            if (!(s->capareg & SDHC_CAN_DO_ADMA2) ||
+                    !(s->capareg & SDHC_64_BIT_BUS_SUPPORT)) {
+                ERRPRINT("64 bit ADMA not supported\n");
+                break;
+            }
+
+            k->do_adma(s);
+            break;
+        default:
+            ERRPRINT("Unsupported DMA type\n");
+            break;
+        }
+    } else {
+        if ((s->trnmod & SDHC_TRNS_READ) && sd_data_ready(s->card)) {
+            s->prnsts |= SDHC_DOING_READ | SDHC_DATA_INHIBIT |
+                    SDHC_DAT_LINE_ACTIVE;
+            SDHCI_GET_CLASS(s)->read_block_from_card(s);
+        } else {
+            s->prnsts |= SDHC_DOING_WRITE | SDHC_DAT_LINE_ACTIVE |
+                    SDHC_SPACE_AVAILABLE | SDHC_DATA_INHIBIT;
+            SDHCI_GET_CLASS(s)->write_block_to_card(s);
+        }
+    }
+}
+
+static bool sdhci_can_issue_command(SDHCIState *s)
+{
+    if (!SDHC_CLOCK_IS_ON(s->clkcon) || !(s->pwrcon & SDHC_POWER_ON) ||
+        (((s->prnsts & SDHC_DATA_INHIBIT) || s->stopped_state) &&
+        ((s->cmdreg & SDHC_CMD_DATA_PRESENT) ||
+        ((s->cmdreg & SDHC_CMD_RESPONSE) == SDHC_CMD_RSP_WITH_BUSY &&
+        !(SDHC_COMMAND_TYPE(s->cmdreg) == SDHC_CMD_ABORT))))) {
+        return false;
+    }
+
+    return true;
+}
+
+/* The Buffer Data Port register must be accessed in sequential and
+ * continuous manner */
+static inline bool
+sdhci_buff_access_is_sequential(SDHCIState *s, unsigned byte_num)
+{
+    if ((s->data_count & 0x3) != byte_num) {
+        ERRPRINT("Non-sequential access to Buffer Data Port register"
+                "is prohibited\n");
+        return false;
+    }
+    return true;
+}
+
+static uint32_t sdhci_read(SDHCIState *s, unsigned int offset, unsigned size)
+{
+    uint32_t ret = 0;
+
+    switch (offset & ~0x3) {
+    case SDHC_SYSAD:
+        ret = s->sdmasysad;
+        break;
+    case SDHC_BLKSIZE:
+        ret = s->blksize | (s->blkcnt << 16);
+        break;
+    case SDHC_ARGUMENT:
+        ret = s->argument;
+        break;
+    case SDHC_TRNMOD:
+        ret = s->trnmod | (s->cmdreg << 16);
+        break;
+    case SDHC_RSPREG0 ... SDHC_RSPREG3:
+        ret = s->rspreg[((offset & ~0x3) - SDHC_RSPREG0) >> 2];
+        break;
+    case  SDHC_BDATA:
+        if (sdhci_buff_access_is_sequential(s, offset - SDHC_BDATA)) {
+            ret = SDHCI_GET_CLASS(s)->bdata_read(s, size);
+            DPRINT_L2("read %ub: addr[0x%04x] -> %u\n", size, offset, ret);
+            return ret;
+        }
+        break;
+    case SDHC_PRNSTS:
+        ret = s->prnsts;
+        break;
+    case SDHC_HOSTCTL:
+        ret = s->hostctl | (s->pwrcon << 8) | (s->blkgap << 16) |
+              (s->wakcon << 24);
+        break;
+    case SDHC_CLKCON:
+        ret = s->clkcon | (s->timeoutcon << 16);
+        break;
+    case SDHC_NORINTSTS:
+        ret = s->norintsts | (s->errintsts << 16);
+        break;
+    case SDHC_NORINTSTSEN:
+        ret = s->norintstsen | (s->errintstsen << 16);
+        break;
+    case SDHC_NORINTSIGEN:
+        ret = s->norintsigen | (s->errintsigen << 16);
+        break;
+    case SDHC_ACMD12ERRSTS:
+        ret = s->acmd12errsts;
+        break;
+    case SDHC_CAPAREG:
+        ret = s->capareg;
+        break;
+    case SDHC_MAXCURR:
+        ret = s->maxcurr;
+        break;
+    case SDHC_ADMAERR:
+        ret =  s->admaerr;
+        break;
+    case SDHC_ADMASYSADDR:
+        ret = (uint32_t)s->admasysaddr;
+        break;
+    case SDHC_ADMASYSADDR + 4:
+        ret = (uint32_t)(s->admasysaddr >> 32);
+        break;
+    case SDHC_SLOT_INT_STATUS:
+        ret = (SD_HOST_SPECv2_VERS << 16) | sdhci_slotint(s);
+        break;
+    default:
+        ERRPRINT("bad %ub read: addr[0x%04x]\n", size, offset);
+        break;
+    }
+
+    ret >>= (offset & 0x3) * 8;
+    ret &= (1ULL << (size * 8)) - 1;
+    DPRINT_L2("read %ub: addr[0x%04x] -> %u(0x%x)\n", size, offset, ret, ret);
+    return ret;
+}
+
+static inline void sdhci_blkgap_write(SDHCIState *s, uint8_t value)
+{
+    if ((value & SDHC_STOP_AT_GAP_REQ) && (s->blkgap & SDHC_STOP_AT_GAP_REQ)) {
+        return;
+    }
+    s->blkgap = value & SDHC_STOP_AT_GAP_REQ;
+
+    if ((value & SDHC_CONTINUE_REQ) && s->stopped_state &&
+            (s->blkgap & SDHC_STOP_AT_GAP_REQ) == 0) {
+        if (s->stopped_state == sdhc_gap_read) {
+            s->prnsts |= SDHC_DAT_LINE_ACTIVE | SDHC_DOING_READ;
+            SDHCI_GET_CLASS(s)->read_block_from_card(s);
+        } else {
+            s->prnsts |= SDHC_DAT_LINE_ACTIVE | SDHC_DOING_WRITE;
+            SDHCI_GET_CLASS(s)->write_block_to_card(s);
+        }
+        s->stopped_state = sdhc_not_stopped;
+    } else if (!s->stopped_state && (value & SDHC_STOP_AT_GAP_REQ)) {
+        if (s->prnsts & SDHC_DOING_READ) {
+            s->stopped_state = sdhc_gap_read;
+        } else if (s->prnsts & SDHC_DOING_WRITE) {
+            s->stopped_state = sdhc_gap_write;
+        }
+    }
+}
+
+static inline void sdhci_reset_write(SDHCIState *s, uint8_t value)
+{
+    switch (value) {
+    case SDHC_RESET_ALL:
+        DEVICE_GET_CLASS(s)->reset(DEVICE(s));
+        break;
+    case SDHC_RESET_CMD:
+        s->prnsts &= ~SDHC_CMD_INHIBIT;
+        s->norintsts &= ~SDHC_NIS_CMDCMP;
+        break;
+    case SDHC_RESET_DATA:
+        s->data_count = 0;
+        s->prnsts &= ~(SDHC_SPACE_AVAILABLE | SDHC_DATA_AVAILABLE |
+                SDHC_DOING_READ | SDHC_DOING_WRITE |
+                SDHC_DATA_INHIBIT | SDHC_DAT_LINE_ACTIVE);
+        s->blkgap &= ~(SDHC_STOP_AT_GAP_REQ | SDHC_CONTINUE_REQ);
+        s->stopped_state = sdhc_not_stopped;
+        s->norintsts &= ~(SDHC_NIS_WBUFRDY | SDHC_NIS_RBUFRDY |
+                SDHC_NIS_DMA | SDHC_NIS_TRSCMP | SDHC_NIS_BLKGAP);
+        break;
+    }
+}
+
+static void
+sdhci_write(SDHCIState *s, unsigned int offset, uint32_t value, unsigned size)
+{
+    unsigned shift =  8 * (offset & 0x3);
+    uint32_t mask = ~(((1ULL << (size * 8)) - 1) << shift);
+    value <<= shift;
+
+    switch (offset & ~0x3) {
+    case SDHC_SYSAD:
+        s->sdmasysad = (s->sdmasysad & mask) | value;
+        MASKED_WRITE(s->sdmasysad, mask, value);
+        /* Writing to last byte of sdmasysad might trigger transfer */
+        if (!(mask & 0xFF000000) && TRANSFERRING_DATA(s->prnsts) && s->blkcnt &&
+                s->blksize && SDHC_DMA_TYPE(s->hostctl) == SDHC_CTRL_SDMA) {
+            SDHCI_GET_CLASS(s)->do_sdma_multi(s);
+        }
+        break;
+    case SDHC_BLKSIZE:
+        if (!TRANSFERRING_DATA(s->prnsts)) {
+            MASKED_WRITE(s->blksize, mask, value);
+            MASKED_WRITE(s->blkcnt, mask >> 16, value >> 16);
+        }
+        break;
+    case SDHC_ARGUMENT:
+        MASKED_WRITE(s->argument, mask, value);
+        break;
+    case SDHC_TRNMOD:
+        /* DMA can be enabled only if it is supported as indicated by
+         * capabilities register */
+        if (!(s->capareg & SDHC_CAN_DO_DMA)) {
+            value &= ~SDHC_TRNS_DMA;
+        }
+        MASKED_WRITE(s->trnmod, mask, value);
+        MASKED_WRITE(s->cmdreg, mask >> 16, value >> 16);
+
+        /* Writing to the upper byte of CMDREG triggers SD command generation */
+        if ((mask & 0xFF000000) || !SDHCI_GET_CLASS(s)->can_issue_command(s)) {
+            break;
+        }
+
+        SDHCI_GET_CLASS(s)->send_command(s);
+        break;
+    case  SDHC_BDATA:
+        if (sdhci_buff_access_is_sequential(s, offset - SDHC_BDATA)) {
+            SDHCI_GET_CLASS(s)->bdata_write(s, value >> shift, size);
+        }
+        break;
+    case SDHC_HOSTCTL:
+        if (!(mask & 0xFF0000)) {
+            sdhci_blkgap_write(s, value >> 16);
+        }
+        MASKED_WRITE(s->hostctl, mask, value);
+        MASKED_WRITE(s->pwrcon, mask >> 8, value >> 8);
+        MASKED_WRITE(s->wakcon, mask >> 24, value >> 24);
+        if (!(s->prnsts & SDHC_CARD_PRESENT) || ((s->pwrcon >> 1) & 0x7) < 5 ||
+                !(s->capareg & (1 << (31 - ((s->pwrcon >> 1) & 0x7))))) {
+            s->pwrcon &= ~SDHC_POWER_ON;
+        }
+        break;
+    case SDHC_CLKCON:
+        if (!(mask & 0xFF000000)) {
+            sdhci_reset_write(s, value >> 24);
+        }
+        MASKED_WRITE(s->clkcon, mask, value);
+        MASKED_WRITE(s->timeoutcon, mask >> 16, value >> 16);
+        if (s->clkcon & SDHC_CLOCK_INT_EN) {
+            s->clkcon |= SDHC_CLOCK_INT_STABLE;
+        } else {
+            s->clkcon &= ~SDHC_CLOCK_INT_STABLE;
+        }
+        break;
+    case SDHC_NORINTSTS:
+        if (s->norintstsen & SDHC_NISEN_CARDINT) {
+            value &= ~SDHC_NIS_CARDINT;
+        }
+        s->norintsts &= mask | ~value;
+        s->errintsts &= (mask >> 16) | ~(value >> 16);
+        if (s->errintsts) {
+            s->norintsts |= SDHC_NIS_ERR;
+        } else {
+            s->norintsts &= ~SDHC_NIS_ERR;
+        }
+        sdhci_update_irq(s);
+        break;
+    case SDHC_NORINTSTSEN:
+        MASKED_WRITE(s->norintstsen, mask, value);
+        MASKED_WRITE(s->errintstsen, mask >> 16, value >> 16);
+        s->norintsts &= s->norintstsen;
+        s->errintsts &= s->errintstsen;
+        if (s->errintsts) {
+            s->norintsts |= SDHC_NIS_ERR;
+        } else {
+            s->norintsts &= ~SDHC_NIS_ERR;
+        }
+        sdhci_update_irq(s);
+        break;
+    case SDHC_NORINTSIGEN:
+        MASKED_WRITE(s->norintsigen, mask, value);
+        MASKED_WRITE(s->errintsigen, mask >> 16, value >> 16);
+        sdhci_update_irq(s);
+        break;
+    case SDHC_ADMAERR:
+        MASKED_WRITE(s->admaerr, mask, value);
+        break;
+    case SDHC_ADMASYSADDR:
+        s->admasysaddr = (s->admasysaddr & (0xFFFFFFFF00000000ULL |
+                (uint64_t)mask)) | (uint64_t)value;
+        break;
+    case SDHC_ADMASYSADDR + 4:
+        s->admasysaddr = (s->admasysaddr & (0x00000000FFFFFFFFULL |
+                ((uint64_t)mask << 32))) | ((uint64_t)value << 32);
+        break;
+    case SDHC_FEAER:
+        s->acmd12errsts |= value;
+        s->errintsts |= (value >> 16) & s->errintstsen;
+        if (s->acmd12errsts) {
+            s->errintsts |= SDHC_EIS_CMD12ERR;
+        }
+        if (s->errintsts) {
+            s->norintsts |= SDHC_NIS_ERR;
+        }
+        sdhci_update_irq(s);
+        break;
+    default:
+        ERRPRINT("bad %ub write offset: addr[0x%04x] <- %u(0x%x)\n",
+                size, offset, value >> shift, value >> shift);
+        break;
+    }
+    DPRINT_L2("write %ub: addr[0x%04x] <- %u(0x%x)\n",
+            size, offset, value >> shift, value >> shift);
+}
+
+static uint64_t
+sdhci_readfn(void *opaque, hwaddr offset, unsigned size)
+{
+    SDHCIState *s = (SDHCIState *)opaque;
+
+    return SDHCI_GET_CLASS(s)->mem_read(s, offset, size);
+}
+
+static void
+sdhci_writefn(void *opaque, hwaddr off, uint64_t val, unsigned sz)
+{
+    SDHCIState *s = (SDHCIState *)opaque;
+
+    SDHCI_GET_CLASS(s)->mem_write(s, off, val, sz);
+}
+
+static const MemoryRegionOps sdhci_mmio_ops = {
+    .read = sdhci_readfn,
+    .write = sdhci_writefn,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+        .unaligned = false
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static inline unsigned int sdhci_get_fifolen(SDHCIState *s)
+{
+    switch (SDHC_CAPAB_BLOCKSIZE(s->capareg)) {
+    case 0:
+        return 512;
+    case 1:
+        return 1024;
+    case 2:
+        return 2048;
+    default:
+        hw_error("SDHC: unsupported value for maximum block size\n");
+        return 0;
+    }
+}
+
+static void sdhci_initfn(Object *obj)
+{
+    SDHCIState *s = SDHCI(obj);
+    DriveInfo *di;
+
+    di = drive_get_next(IF_SD);
+    s->card = sd_init(di ? di->bdrv : NULL, 0);
+    s->eject_cb = qemu_allocate_irqs(sdhci_insert_eject_cb, s, 1)[0];
+    s->ro_cb = qemu_allocate_irqs(sdhci_card_readonly_cb, s, 1)[0];
+    sd_set_cb(s->card, s->ro_cb, s->eject_cb);
+
+    s->insert_timer = qemu_new_timer_ns(vm_clock, sdhci_raise_insertion_irq, s);
+    s->transfer_timer = qemu_new_timer_ns(vm_clock, sdhci_do_data_transfer, s);
+}
+
+static void sdhci_uninitfn(Object *obj)
+{
+    SDHCIState *s = SDHCI(obj);
+
+    qemu_del_timer(s->insert_timer);
+    qemu_free_timer(s->insert_timer);
+    qemu_del_timer(s->transfer_timer);
+    qemu_free_timer(s->transfer_timer);
+    qemu_free_irqs(&s->eject_cb);
+    qemu_free_irqs(&s->ro_cb);
+
+    if (s->fifo_buffer) {
+        g_free(s->fifo_buffer);
+        s->fifo_buffer = NULL;
+    }
+}
+
+const VMStateDescription sdhci_vmstate = {
+    .name = "sdhci",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_UINT32(sdmasysad, SDHCIState),
+        VMSTATE_UINT16(blksize, SDHCIState),
+        VMSTATE_UINT16(blkcnt, SDHCIState),
+        VMSTATE_UINT32(argument, SDHCIState),
+        VMSTATE_UINT16(trnmod, SDHCIState),
+        VMSTATE_UINT16(cmdreg, SDHCIState),
+        VMSTATE_UINT32_ARRAY(rspreg, SDHCIState, 4),
+        VMSTATE_UINT32(prnsts, SDHCIState),
+        VMSTATE_UINT8(hostctl, SDHCIState),
+        VMSTATE_UINT8(pwrcon, SDHCIState),
+        VMSTATE_UINT8(blkgap, SDHCIState),
+        VMSTATE_UINT8(wakcon, SDHCIState),
+        VMSTATE_UINT16(clkcon, SDHCIState),
+        VMSTATE_UINT8(timeoutcon, SDHCIState),
+        VMSTATE_UINT8(admaerr, SDHCIState),
+        VMSTATE_UINT16(norintsts, SDHCIState),
+        VMSTATE_UINT16(errintsts, SDHCIState),
+        VMSTATE_UINT16(norintstsen, SDHCIState),
+        VMSTATE_UINT16(errintstsen, SDHCIState),
+        VMSTATE_UINT16(norintsigen, SDHCIState),
+        VMSTATE_UINT16(errintsigen, SDHCIState),
+        VMSTATE_UINT16(acmd12errsts, SDHCIState),
+        VMSTATE_UINT16(data_count, SDHCIState),
+        VMSTATE_UINT64(admasysaddr, SDHCIState),
+        VMSTATE_UINT8(stopped_state, SDHCIState),
+        VMSTATE_VBUFFER_UINT32(fifo_buffer, SDHCIState, 1, NULL, 0, buf_maxsz),
+        VMSTATE_TIMER(insert_timer, SDHCIState),
+        VMSTATE_TIMER(transfer_timer, SDHCIState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+/* Capabilities registers provide information on supported features of this
+ * specific host controller implementation */
+static Property sdhci_properties[] = {
+    DEFINE_PROP_HEX32("capareg", SDHCIState, capareg,
+            SDHC_CAPAB_REG_DEFAULT),
+    DEFINE_PROP_HEX32("maxcurr", SDHCIState, maxcurr, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void sdhci_realize(DeviceState *dev, Error ** errp)
+{
+    SDHCIState *s = SDHCI(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+
+    s->buf_maxsz = sdhci_get_fifolen(s);
+    s->fifo_buffer = g_malloc0(s->buf_maxsz);
+    sysbus_init_irq(sbd, &s->irq);
+    memory_region_init_io(&s->iomem, &sdhci_mmio_ops, s, "sdhci",
+            SDHC_REGISTERS_MAP_SIZE);
+    sysbus_init_mmio(sbd, &s->iomem);
+}
+
+static void sdhci_generic_reset(DeviceState *ds)
+{
+    SDHCIState *s = SDHCI(ds);
+    SDHCI_GET_CLASS(s)->reset(s);
+}
+
+static void sdhci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SDHCIClass *k = SDHCI_CLASS(klass);
+
+    dc->vmsd = &sdhci_vmstate;
+    dc->props = sdhci_properties;
+    dc->reset = sdhci_generic_reset;
+    dc->realize = sdhci_realize;
+
+    k->reset = sdhci_reset;
+    k->mem_read = sdhci_read;
+    k->mem_write = sdhci_write;
+    k->send_command = sdhci_send_command;
+    k->can_issue_command = sdhci_can_issue_command;
+    k->data_transfer = sdhci_data_transfer;
+    k->end_data_transfer = sdhci_end_transfer;
+    k->do_sdma_single = sdhci_sdma_transfer_single_block;
+    k->do_sdma_multi = sdhci_sdma_transfer_multi_blocks;
+    k->do_adma = sdhci_do_adma;
+    k->read_block_from_card = sdhci_read_block_from_card;
+    k->write_block_to_card = sdhci_write_block_to_card;
+    k->bdata_read = sdhci_read_dataport;
+    k->bdata_write = sdhci_write_dataport;
+}
+
+static const TypeInfo sdhci_type_info = {
+    .name = TYPE_SDHCI,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(SDHCIState),
+    .instance_init = sdhci_initfn,
+    .instance_finalize = sdhci_uninitfn,
+    .class_init = sdhci_class_init,
+    .class_size = sizeof(SDHCIClass)
+};
+
+static void sdhci_register_types(void)
+{
+    type_register_static(&sdhci_type_info);
+}
+
+type_init(sdhci_register_types)
diff --git a/hw/sdhci.h b/hw/sdhci.h
new file mode 100644
index 0000000..931d740
--- /dev/null
+++ b/hw/sdhci.h
@@ -0,0 +1,312 @@
+/*
+ * SD Association Host Standard Specification v2.0 controller emulation
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Mitsyanko Igor <i.mitsyanko@samsung.com>
+ * Peter A.G. Crosthwaite <peter.crosthwaite@petalogix.com>
+ *
+ * Based on MMC controller for Samsung S5PC1xx-based board emulation
+ * by Alexey Merkulov and Vladimir Monakhov.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef SDHCI_H
+#define SDHCI_H
+
+#include "qemu-common.h"
+#include "sysbus.h"
+#include "sd.h"
+
+/* R/W SDMA System Address register 0x0 */
+#define SDHC_SYSAD                     0x00
+
+/* R/W Host DMA Buffer Boundary and Transfer Block Size Register 0x0 */
+#define SDHC_BLKSIZE                   0x04
+
+/* R/W Blocks count for current transfer 0x0 */
+#define SDHC_BLKCNT                    0x06
+
+/* R/W Command Argument Register 0x0 */
+#define SDHC_ARGUMENT                  0x08
+
+/* R/W Transfer Mode Setting Register 0x0 */
+#define SDHC_TRNMOD                    0x0C
+#define SDHC_TRNS_DMA                  0x0001
+#define SDHC_TRNS_BLK_CNT_EN           0x0002
+#define SDHC_TRNS_ACMD12               0x0004
+#define SDHC_TRNS_READ                 0x0010
+#define SDHC_TRNS_MULTI                0x0020
+
+/* R/W Command Register 0x0 */
+#define SDHC_CMDREG                    0x0E
+#define SDHC_CMD_RSP_WITH_BUSY         (3 << 0)
+#define SDHC_CMD_DATA_PRESENT          (1 << 5)
+#define SDHC_CMD_SUSPEND               (1 << 6)
+#define SDHC_CMD_RESUME                (1 << 7)
+#define SDHC_CMD_ABORT                 ((1 << 6)|(1 << 7))
+#define SDHC_CMD_TYPE_MASK             ((1 << 6)|(1 << 7))
+#define SDHC_COMMAND_TYPE(x)           ((x) & SDHC_CMD_TYPE_MASK)
+
+/* ROC Response Register 0 0x0 */
+#define SDHC_RSPREG0                   0x10
+/* ROC Response Register 1 0x0 */
+#define SDHC_RSPREG1                   0x14
+/* ROC Response Register 2 0x0 */
+#define SDHC_RSPREG2                   0x18
+/* ROC Response Register 3 0x0 */
+#define SDHC_RSPREG3                   0x1C
+
+/* R/W Buffer Data Register 0x0 */
+#define SDHC_BDATA                     0x20
+
+/* R/ROC Present State Register 0x000A0000 */
+#define SDHC_PRNSTS                    0x24
+#define SDHC_CMD_INHIBIT               0x00000001
+#define SDHC_DATA_INHIBIT              0x00000002
+#define SDHC_DAT_LINE_ACTIVE           0x00000004
+#define SDHC_DOING_WRITE               0x00000100
+#define SDHC_DOING_READ                0x00000200
+#define SDHC_SPACE_AVAILABLE           0x00000400
+#define SDHC_DATA_AVAILABLE            0x00000800
+#define SDHC_CARD_PRESENT              0x00010000
+#define SDHC_CARD_DETECT               0x00040000
+#define SDHC_WRITE_PROTECT             0x00080000
+#define TRANSFERRING_DATA(x)           \
+    ((x) & (SDHC_DOING_READ | SDHC_DOING_WRITE))
+
+/* R/W Host control Register 0x0 */
+#define SDHC_HOSTCTL                   0x28
+#define SDHC_CTRL_DMA_CHECK_MASK       0x18
+#define SDHC_CTRL_SDMA                 0x00
+#define SDHC_CTRL_ADMA1_32             0x08
+#define SDHC_CTRL_ADMA2_32             0x10
+#define SDHC_CTRL_ADMA2_64             0x18
+#define SDHC_DMA_TYPE(x)               ((x) & SDHC_CTRL_DMA_CHECK_MASK)
+
+/* R/W Power Control Register 0x0 */
+#define SDHC_PWRCON                    0x29
+#define SDHC_POWER_ON                  (1 << 0)
+
+/* R/W Block Gap Control Register 0x0 */
+#define SDHC_BLKGAP                    0x2A
+#define SDHC_STOP_AT_GAP_REQ           0x01
+#define SDHC_CONTINUE_REQ              0x02
+
+/* R/W WakeUp Control Register 0x0 */
+#define SDHC_WAKCON                    0x2B
+#define SDHC_WKUP_ON_INS               (1 << 1)
+#define SDHC_WKUP_ON_RMV               (1 << 2)
+
+/* CLKCON */
+#define SDHC_CLKCON                    0x2C
+#define SDHC_CLOCK_INT_STABLE          0x0002
+#define SDHC_CLOCK_INT_EN              0x0001
+#define SDHC_CLOCK_SDCLK_EN            (1 << 2)
+#define SDHC_CLOCK_CHK_MASK            0x0007
+#define SDHC_CLOCK_IS_ON(x)            \
+    (((x) & SDHC_CLOCK_CHK_MASK) == SDHC_CLOCK_CHK_MASK)
+
+/* R/W Timeout Control Register 0x0 */
+#define SDHC_TIMEOUTCON                0x2E
+
+/* R/W Software Reset Register 0x0 */
+#define SDHC_SWRST                     0x2F
+#define SDHC_RESET_ALL                 0x01
+#define SDHC_RESET_CMD                 0x02
+#define SDHC_RESET_DATA                0x04
+
+/* ROC/RW1C Normal Interrupt Status Register 0x0 */
+#define SDHC_NORINTSTS                 0x30
+#define SDHC_NIS_ERR                   0x8000
+#define SDHC_NIS_CMDCMP                0x0001
+#define SDHC_NIS_TRSCMP                0x0002
+#define SDHC_NIS_BLKGAP                0x0004
+#define SDHC_NIS_DMA                   0x0008
+#define SDHC_NIS_WBUFRDY               0x0010
+#define SDHC_NIS_RBUFRDY               0x0020
+#define SDHC_NIS_INSERT                0x0040
+#define SDHC_NIS_REMOVE                0x0080
+#define SDHC_NIS_CARDINT               0x0100
+
+/* ROC/RW1C Error Interrupt Status Register 0x0 */
+#define SDHC_ERRINTSTS                 0x32
+#define SDHC_EIS_CMDTIMEOUT            0x0001
+#define SDHC_EIS_BLKGAP                0x0004
+#define SDHC_EIS_CMDIDX                0x0008
+#define SDHC_EIS_CMD12ERR              0x0100
+#define SDHC_EIS_ADMAERR               0x0200
+
+/* R/W Normal Interrupt Status Enable Register 0x0 */
+#define SDHC_NORINTSTSEN               0x34
+#define SDHC_NISEN_CMDCMP              0x0001
+#define SDHC_NISEN_TRSCMP              0x0002
+#define SDHC_NISEN_DMA                 0x0008
+#define SDHC_NISEN_WBUFRDY             0x0010
+#define SDHC_NISEN_RBUFRDY             0x0020
+#define SDHC_NISEN_INSERT              0x0040
+#define SDHC_NISEN_REMOVE              0x0080
+#define SDHC_NISEN_CARDINT             0x0100
+
+/* R/W Error Interrupt Status Enable Register 0x0 */
+#define SDHC_ERRINTSTSEN               0x36
+#define SDHC_EISEN_CMDTIMEOUT          0x0001
+#define SDHC_EISEN_BLKGAP              0x0004
+#define SDHC_EISEN_CMDIDX              0x0008
+#define SDHC_EISEN_ADMAERR             0x0200
+
+/* R/W Normal Interrupt Signal Enable Register 0x0 */
+#define SDHC_NORINTSIGEN               0x38
+#define SDHC_NORINTSIG_INSERT          (1 << 6)
+#define SDHC_NORINTSIG_REMOVE          (1 << 7)
+
+/* R/W Error Interrupt Signal Enable Register 0x0 */
+#define SDHC_ERRINTSIGEN               0x3A
+
+/* ROC Auto CMD12 error status register 0x0 */
+#define SDHC_ACMD12ERRSTS              0x3C
+
+/* HWInit Capabilities Register 0x05E80080 */
+#define SDHC_CAPAREG                   0x40
+#define SDHC_CAN_DO_DMA                0x00400000
+#define SDHC_CAN_DO_ADMA2              0x00080000
+#define SDHC_CAN_DO_ADMA1              0x00100000
+#define SDHC_64_BIT_BUS_SUPPORT        (1 << 28)
+#define SDHC_CAPAB_BLOCKSIZE(x)        (((x) >> 16) & 0x3)
+
+/* HWInit Maximum Current Capabilities Register 0x0 */
+#define SDHC_MAXCURR                   0x48
+
+/* W Force Event Auto CMD12 Error Interrupt Register 0x0000 */
+#define SDHC_FEAER                     0x50
+/* W Force Event Error Interrupt Register Error Interrupt 0x0000 */
+#define SDHC_FEERR                     0x52
+
+/* R/W ADMA Error Status Register 0x00 */
+#define SDHC_ADMAERR                   0x54
+#define SDHC_ADMAERR_LENGTH_MISMATCH   (1 << 2)
+#define SDHC_ADMAERR_STATE_ST_STOP     (0 << 0)
+#define SDHC_ADMAERR_STATE_ST_FDS      (1 << 0)
+#define SDHC_ADMAERR_STATE_ST_TFR      (3 << 0)
+#define SDHC_ADMAERR_STATE_MASK        (3 << 0)
+
+/* R/W ADMA System Address Register 0x00 */
+#define SDHC_ADMASYSADDR               0x58
+#define SDHC_ADMA_ATTR_SET_LEN         (1 << 4)
+#define SDHC_ADMA_ATTR_ACT_TRAN        (1 << 5)
+#define SDHC_ADMA_ATTR_ACT_LINK        (3 << 4)
+#define SDHC_ADMA_ATTR_INT             (1 << 2)
+#define SDHC_ADMA_ATTR_END             (1 << 1)
+#define SDHC_ADMA_ATTR_VALID           (1 << 0)
+#define SDHC_ADMA_ATTR_ACT_MASK        ((1 << 4)|(1 << 5))
+
+/* Slot interrupt status */
+#define SDHC_SLOT_INT_STATUS            0xFC
+
+/* HWInit Host Controller Version Register 0x0401 */
+#define SDHC_HCVER                      0xFE
+#define SD_HOST_SPECv2_VERS             0x2401
+
+#define SDHC_REGISTERS_MAP_SIZE         0x100
+#define SDHC_INSERTION_DELAY            (get_ticks_per_sec())
+#define SDHC_TRANSFER_DELAY             100
+#define SDHC_ADMA_DESCS_PER_DELAY       5
+#define SDHC_CMD_RESPONSE               (3 << 0)
+
+enum {
+    sdhc_not_stopped = 0, /* normal SDHC state */
+    sdhc_gap_read   = 1,  /* SDHC stopped at block gap during read operation */
+    sdhc_gap_write  = 2   /* SDHC stopped at block gap during write operation */
+};
+
+/* SD/MMC host controller state */
+typedef struct SDHCIState {
+    SysBusDevice busdev;
+    SDState *card;
+    MemoryRegion iomem;
+
+    QEMUTimer *insert_timer;       /* timer for 'changing' sd card. */
+    QEMUTimer *transfer_timer;
+    qemu_irq eject_cb;
+    qemu_irq ro_cb;
+    qemu_irq irq;
+
+    uint32_t sdmasysad;    /* SDMA System Address register */
+    uint16_t blksize;      /* Host DMA Buff Boundary and Transfer BlkSize Reg */
+    uint16_t blkcnt;       /* Blocks count for current transfer */
+    uint32_t argument;     /* Command Argument Register */
+    uint16_t trnmod;       /* Transfer Mode Setting Register */
+    uint16_t cmdreg;       /* Command Register */
+    uint32_t rspreg[4];    /* Response Registers 0-3 */
+    uint32_t prnsts;       /* Present State Register */
+    uint8_t  hostctl;      /* Host Control Register */
+    uint8_t  pwrcon;       /* Power control Register */
+    uint8_t  blkgap;       /* Block Gap Control Register */
+    uint8_t  wakcon;       /* WakeUp Control Register */
+    uint16_t clkcon;       /* Clock control Register */
+    uint8_t  timeoutcon;   /* Timeout Control Register */
+    uint8_t  admaerr;      /* ADMA Error Status Register */
+    uint16_t norintsts;    /* Normal Interrupt Status Register */
+    uint16_t errintsts;    /* Error Interrupt Status Register */
+    uint16_t norintstsen;  /* Normal Interrupt Status Enable Register */
+    uint16_t errintstsen;  /* Error Interrupt Status Enable Register */
+    uint16_t norintsigen;  /* Normal Interrupt Signal Enable Register */
+    uint16_t errintsigen;  /* Error Interrupt Signal Enable Register */
+    uint16_t acmd12errsts; /* Auto CMD12 error status register */
+    uint64_t admasysaddr;  /* ADMA System Address Register */
+
+    uint32_t capareg;      /* Capabilities Register */
+    uint32_t maxcurr;      /* Maximum Current Capabilities Register */
+    uint8_t  *fifo_buffer; /* SD host i/o FIFO buffer */
+    uint32_t buf_maxsz;
+    uint16_t data_count;   /* current element in FIFO buffer */
+    uint8_t  stopped_state;/* Current SDHC state */
+    /* Buffer Data Port Register - virtual access point to R and W buffers */
+    /* Software Reset Register - always reads as 0 */
+    /* Force Event Auto CMD12 Error Interrupt Reg - write only */
+    /* Force Event Error Interrupt Register- write only */
+    /* RO Host Controller Version Register always reads as 0x2401 */
+} SDHCIState;
+
+typedef struct SDHCIClass {
+    SysBusDeviceClass busdev_class;
+
+    void (*reset)(SDHCIState *s);
+    uint32_t (*mem_read)(SDHCIState *s, unsigned int offset, unsigned size);
+    void (*mem_write)(SDHCIState *s, unsigned int offset, uint32_t value,
+            unsigned size);
+    void (*send_command)(SDHCIState *s);
+    bool (*can_issue_command)(SDHCIState *s);
+    void (*data_transfer)(SDHCIState *s);
+    void (*end_data_transfer)(SDHCIState *s);
+    void (*do_sdma_single)(SDHCIState *s);
+    void (*do_sdma_multi)(SDHCIState *s);
+    void (*do_adma)(SDHCIState *s);
+    void (*read_block_from_card)(SDHCIState *s);
+    void (*write_block_to_card)(SDHCIState *s);
+    uint32_t (*bdata_read)(SDHCIState *s, unsigned size);
+    void (*bdata_write)(SDHCIState *s, uint32_t value, unsigned size);
+} SDHCIClass;
+
+extern const VMStateDescription sdhci_vmstate;
+
+#define TYPE_SDHCI            "generic-sdhci"
+#define SDHCI(obj)            \
+     OBJECT_CHECK(SDHCIState, (obj), TYPE_SDHCI)
+#define SDHCI_CLASS(klass)    \
+     OBJECT_CLASS_CHECK(SDHCIClass, (klass), TYPE_SDHCI)
+#define SDHCI_GET_CLASS(obj)  \
+     OBJECT_GET_CLASS(SDHCIClass, (obj), TYPE_SDHCI)
+
+#endif /* SDHCI_H */
diff --git a/hw/xilinx_zynq.c b/hw/xilinx_zynq.c
index 311f791..2f67d90 100644
--- a/hw/xilinx_zynq.c
+++ b/hw/xilinx_zynq.c
@@ -82,8 +82,7 @@
         spi = (SSIBus *)qdev_get_child_bus(dev, bus_name);
 
         for (j = 0; j < num_ss; ++j) {
-            flash_dev = ssi_create_slave_no_init(spi, "m25p80");
-            qdev_prop_set_string(flash_dev, "partname", "n25q128");
+            flash_dev = ssi_create_slave_no_init(spi, "n25q128");
             qdev_init_nofail(flash_dev);
 
             cs_line = qdev_get_gpio_in(flash_dev, 0);
@@ -187,6 +186,16 @@
         }
     }
 
+    dev = qdev_create(NULL, "generic-sdhci");
+    qdev_init_nofail(dev);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0xE0100000);
+    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[56-IRQ_OFFSET]);
+
+    dev = qdev_create(NULL, "generic-sdhci");
+    qdev_init_nofail(dev);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0xE0101000);
+    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[79-IRQ_OFFSET]);
+
     zynq_binfo.ram_size = ram_size;
     zynq_binfo.kernel_filename = kernel_filename;
     zynq_binfo.kernel_cmdline = kernel_cmdline;
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 8043b3b..4e3b17b 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -7,10 +7,19 @@
 
 static TCGArg *icount_arg;
 static int icount_label;
+static int exitreq_label;
 
-static inline void gen_icount_start(void)
+static inline void gen_tb_start(void)
 {
     TCGv_i32 count;
+    TCGv_i32 flag;
+
+    exitreq_label = gen_new_label();
+    flag = tcg_temp_local_new_i32();
+    tcg_gen_ld_i32(flag, cpu_env,
+                   offsetof(CPUState, tcg_exit_req) - ENV_OFFSET);
+    tcg_gen_brcondi_i32(TCG_COND_NE, flag, 0, exitreq_label);
+    tcg_temp_free_i32(flag);
 
     if (!use_icount)
         return;
@@ -27,12 +36,15 @@
     tcg_temp_free_i32(count);
 }
 
-static void gen_icount_end(TranslationBlock *tb, int num_insns)
+static void gen_tb_end(TranslationBlock *tb, int num_insns)
 {
+    gen_set_label(exitreq_label);
+    tcg_gen_exit_tb((tcg_target_long)tb + TB_EXIT_REQUESTED);
+
     if (use_icount) {
         *icount_arg = num_insns;
         gen_set_label(icount_label);
-        tcg_gen_exit_tb((tcg_target_long)tb + 2);
+        tcg_gen_exit_tb((tcg_target_long)tb + TB_EXIT_ICOUNT_EXPIRED);
     }
 }
 
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index ee1a7c8..ab2657c 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -71,6 +71,8 @@
  * @created: Indicates whether the CPU thread has been successfully created.
  * @stop: Indicates a pending stop request.
  * @stopped: Indicates the CPU has been artificially stopped.
+ * @tcg_exit_req: Set to force TCG to stop executing linked TBs for this
+ *           CPU and return to its top level loop.
  * @env_ptr: Pointer to subclass-specific CPUArchState field.
  * @current_tb: Currently executing TB.
  * @kvm_fd: vCPU file descriptor for KVM.
@@ -100,6 +102,7 @@
     bool stop;
     bool stopped;
     volatile sig_atomic_t exit_request;
+    volatile sig_atomic_t tcg_exit_req;
 
     void *env_ptr; /* CPUArchState */
     struct TranslationBlock *current_tb;
diff --git a/target-alpha/cpu-qom.h b/target-alpha/cpu-qom.h
index c0f6c6d..252bd14 100644
--- a/target-alpha/cpu-qom.h
+++ b/target-alpha/cpu-qom.h
@@ -72,5 +72,6 @@
 
 #define ENV_GET_CPU(e) CPU(alpha_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(AlphaCPU, env)
 
 #endif
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index f8f7695..657f5e1 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -3411,7 +3411,7 @@
     if (max_insns == 0)
         max_insns = CF_COUNT_MASK;
 
-    gen_icount_start();
+    gen_tb_start();
     do {
         if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
             QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
@@ -3478,7 +3478,7 @@
         abort();
     }
 
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index aff7bf3..7539727 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -109,6 +109,8 @@
 
 #define ENV_GET_CPU(e) CPU(arm_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(ARMCPU, env)
+
 void register_cp_regs_for_features(ARMCPU *cpu);
 
 #endif
diff --git a/target-arm/translate.c b/target-arm/translate.c
index f2f649d..db63c6e 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -9808,7 +9808,7 @@
     if (max_insns == 0)
         max_insns = CF_COUNT_MASK;
 
-    gen_icount_start();
+    gen_tb_start();
 
     tcg_clear_temp_count();
 
@@ -10011,7 +10011,7 @@
     }
 
 done_generating:
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
diff --git a/target-cris/cpu-qom.h b/target-cris/cpu-qom.h
index 2bac71f..11e5286 100644
--- a/target-cris/cpu-qom.h
+++ b/target-cris/cpu-qom.h
@@ -71,5 +71,6 @@
 
 #define ENV_GET_CPU(e) CPU(cris_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(CRISCPU, env)
 
 #endif
diff --git a/target-cris/translate.c b/target-cris/translate.c
index 14c167f..ec71ef4 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -3250,7 +3250,7 @@
         max_insns = CF_COUNT_MASK;
     }
 
-    gen_icount_start();
+    gen_tb_start();
     do {
         check_breakpoint(env, dc);
 
@@ -3391,7 +3391,7 @@
             break;
         }
     }
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
index 48e6b54..b7bdcb6 100644
--- a/target-i386/cpu-qom.h
+++ b/target-i386/cpu-qom.h
@@ -74,5 +74,6 @@
 
 #define ENV_GET_CPU(e) CPU(x86_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(X86CPU, env)
 
 #endif
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 26a0cc8..d6974df 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -14,12 +14,8 @@
 DEF_HELPER_2(divl_EAX, void, env, tl)
 DEF_HELPER_2(idivl_EAX, void, env, tl)
 #ifdef TARGET_X86_64
-DEF_HELPER_2(mulq_EAX_T0, void, env, tl)
-DEF_HELPER_2(imulq_EAX_T0, void, env, tl)
-DEF_HELPER_3(imulq_T0_T1, tl, env, tl, tl)
 DEF_HELPER_2(divq_EAX, void, env, tl)
 DEF_HELPER_2(idivq_EAX, void, env, tl)
-DEF_HELPER_FLAGS_2(umulh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 #endif
 
 DEF_HELPER_2(aam, void, env, int)
diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c
index 3b56075..74c7c36 100644
--- a/target-i386/int_helper.c
+++ b/target-i386/int_helper.c
@@ -374,46 +374,6 @@
     return 0;
 }
 
-void helper_mulq_EAX_T0(CPUX86State *env, target_ulong t0)
-{
-    uint64_t r0, r1;
-
-    mulu64(&r0, &r1, EAX, t0);
-    EAX = r0;
-    EDX = r1;
-    CC_DST = r0;
-    CC_SRC = r1;
-}
-
-target_ulong helper_umulh(target_ulong t0, target_ulong t1)
-{
-    uint64_t h, l;
-    mulu64(&l, &h, t0, t1);
-    return h;
-}
-
-void helper_imulq_EAX_T0(CPUX86State *env, target_ulong t0)
-{
-    uint64_t r0, r1;
-
-    muls64(&r0, &r1, EAX, t0);
-    EAX = r0;
-    EDX = r1;
-    CC_DST = r0;
-    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
-}
-
-target_ulong helper_imulq_T0_T1(CPUX86State *env, target_ulong t0,
-                                target_ulong t1)
-{
-    uint64_t r0, r1;
-
-    muls64(&r0, &r1, t0, t1);
-    CC_DST = r0;
-    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
-    return r0;
-}
-
 void helper_divq_EAX(CPUX86State *env, target_ulong t0)
 {
     uint64_t r0, r1;
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 605cd88..705147a 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4111,31 +4111,18 @@
                 ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 switch (ot) {
-                    TCGv_i64 t0, t1;
                 default:
-                    t0 = tcg_temp_new_i64();
-                    t1 = tcg_temp_new_i64();
-#ifdef TARGET_X86_64
-                    tcg_gen_ext32u_i64(t0, cpu_T[0]);
-                    tcg_gen_ext32u_i64(t1, cpu_regs[R_EDX]);
-#else
-                    tcg_gen_extu_i32_i64(t0, cpu_T[0]);
-                    tcg_gen_extu_i32_i64(t0, cpu_regs[R_EDX]);
-#endif
-                    tcg_gen_mul_i64(t0, t0, t1);
-                    tcg_gen_trunc_i64_tl(cpu_T[0], t0);
-                    tcg_gen_shri_i64(t0, t0, 32);
-                    tcg_gen_trunc_i64_tl(cpu_T[1], t0);
-                    tcg_temp_free_i64(t0);
-                    tcg_temp_free_i64(t1);
-                    gen_op_mov_reg_T0(OT_LONG, s->vex_v);
-                    gen_op_mov_reg_T1(OT_LONG, reg);
+                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
+                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
+                                      cpu_tmp2_i32, cpu_tmp3_i32);
+                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
+                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
                     break;
 #ifdef TARGET_X86_64
                 case OT_QUAD:
-                    tcg_gen_mov_tl(cpu_T[1], cpu_regs[R_EDX]);
-                    tcg_gen_mul_tl(cpu_regs[s->vex_v], cpu_T[0], cpu_T[1]);
-                    gen_helper_umulh(cpu_regs[reg], cpu_T[0], cpu_T[1]);
+                    tcg_gen_mulu2_i64(cpu_regs[s->vex_v], cpu_regs[reg],
+                                      cpu_T[0], cpu_regs[R_EDX]);
                     break;
 #endif
                 }
@@ -5032,39 +5019,22 @@
                 break;
             default:
             case OT_LONG:
-#ifdef TARGET_X86_64
-                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
-                tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
-                tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
-                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                gen_op_mov_reg_T0(OT_LONG, R_EAX);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
-                gen_op_mov_reg_T0(OT_LONG, R_EDX);
-                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
-#else
-                {
-                    TCGv_i64 t0, t1;
-                    t0 = tcg_temp_new_i64();
-                    t1 = tcg_temp_new_i64();
-                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
-                    tcg_gen_extu_i32_i64(t0, cpu_T[0]);
-                    tcg_gen_extu_i32_i64(t1, cpu_T[1]);
-                    tcg_gen_mul_i64(t0, t0, t1);
-                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
-                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                    tcg_gen_shri_i64(t0, t0, 32);
-                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
-                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
-                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
-                }
-#endif
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
+                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
+                                  cpu_tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
+                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
-                gen_helper_mulq_EAX_T0(cpu_env, cpu_T[0]);
+                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
+                                  cpu_T[0], cpu_regs[R_EAX]);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
+                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULQ);
                 break;
 #endif
@@ -5100,41 +5070,25 @@
                 break;
             default:
             case OT_LONG:
-#ifdef TARGET_X86_64
-                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
-                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
-                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
-                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                gen_op_mov_reg_T0(OT_LONG, R_EAX);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
-                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
-                gen_op_mov_reg_T0(OT_LONG, R_EDX);
-#else
-                {
-                    TCGv_i64 t0, t1;
-                    t0 = tcg_temp_new_i64();
-                    t1 = tcg_temp_new_i64();
-                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
-                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
-                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
-                    tcg_gen_mul_i64(t0, t0, t1);
-                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
-                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
-                    tcg_gen_shri_i64(t0, t0, 32);
-                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
-                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
-                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
-                }
-#endif
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
+                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
+                                  cpu_tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
+                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
+                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
                 set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
-                gen_helper_imulq_EAX_T0(cpu_env, cpu_T[0]);
+                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
+                                  cpu_T[0], cpu_regs[R_EAX]);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
+                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
+                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULQ);
                 break;
 #endif
@@ -5389,37 +5343,27 @@
         } else {
             gen_op_mov_TN_reg(ot, 1, reg);
         }
-
+        switch (ot) {
 #ifdef TARGET_X86_64
-        if (ot == OT_QUAD) {
-            gen_helper_imulq_T0_T1(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
-        } else
+        case OT_QUAD:
+            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T[1], cpu_T[0], cpu_T[1]);
+            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
+            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
+            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T[1]);
+            break;
 #endif
-        if (ot == OT_LONG) {
-#ifdef TARGET_X86_64
-                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
-                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
-                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
-#else
-                {
-                    TCGv_i64 t0, t1;
-                    t0 = tcg_temp_new_i64();
-                    t1 = tcg_temp_new_i64();
-                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
-                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
-                    tcg_gen_mul_i64(t0, t0, t1);
-                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
-                    tcg_gen_shri_i64(t0, t0, 32);
-                    tcg_gen_trunc_i64_i32(cpu_T[1], t0);
-                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
-                }
-#endif
-        } else {
+        case OT_LONG:
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
+                              cpu_tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
+            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
+            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
+            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
+            break;
+        default:
             tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
             tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
             /* XXX: use 32 bit mul which could be faster */
@@ -5427,8 +5371,9 @@
             tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
             tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
             tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+            gen_op_mov_reg_T0(ot, reg);
+            break;
         }
-        gen_op_mov_reg_T0(ot, reg);
         set_cc_op(s, CC_OP_MULB + ot);
         break;
     case 0x1c0:
@@ -8379,7 +8324,7 @@
     if (max_insns == 0)
         max_insns = CF_COUNT_MASK;
 
-    gen_icount_start();
+    gen_tb_start();
     for(;;) {
         if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
             QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
@@ -8437,7 +8382,7 @@
     }
     if (tb->cflags & CF_LAST_IO)
         gen_io_end();
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     /* we don't forget to fill the last values */
     if (search_pc) {
diff --git a/target-lm32/cpu-qom.h b/target-lm32/cpu-qom.h
index d7525b3..c0b6ce5 100644
--- a/target-lm32/cpu-qom.h
+++ b/target-lm32/cpu-qom.h
@@ -69,5 +69,6 @@
 
 #define ENV_GET_CPU(e) CPU(lm32_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(LM32CPU, env)
 
 #endif
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index ccaf838..695d9c5 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -1040,7 +1040,7 @@
         max_insns = CF_COUNT_MASK;
     }
 
-    gen_icount_start();
+    gen_tb_start();
     do {
         check_breakpoint(env, dc);
 
@@ -1102,7 +1102,7 @@
         }
     }
 
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
diff --git a/target-m68k/cpu-qom.h b/target-m68k/cpu-qom.h
index 20e5684..f4c33b2 100644
--- a/target-m68k/cpu-qom.h
+++ b/target-m68k/cpu-qom.h
@@ -68,5 +68,6 @@
 
 #define ENV_GET_CPU(e) CPU(m68k_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(M68kCPU, env)
 
 #endif
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 3f1478c..20a86d8 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -2999,7 +2999,7 @@
     if (max_insns == 0)
         max_insns = CF_COUNT_MASK;
 
-    gen_icount_start();
+    gen_tb_start();
     do {
         pc_offset = dc->pc - pc_start;
         gen_throws_exception = NULL;
@@ -3063,7 +3063,7 @@
             break;
         }
     }
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
diff --git a/target-microblaze/cpu-qom.h b/target-microblaze/cpu-qom.h
index 5ea911c..a0248a5 100644
--- a/target-microblaze/cpu-qom.h
+++ b/target-microblaze/cpu-qom.h
@@ -68,5 +68,6 @@
 
 #define ENV_GET_CPU(e) CPU(mb_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(MicroBlazeCPU, env)
 
 #endif
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index 687b7d1..a74da8e 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -1770,7 +1770,7 @@
     if (max_insns == 0)
         max_insns = CF_COUNT_MASK;
 
-    gen_icount_start();
+    gen_tb_start();
     do
     {
 #if SIM_COMPAT
@@ -1894,7 +1894,7 @@
                 break;
         }
     }
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
diff --git a/target-mips/cpu-qom.h b/target-mips/cpu-qom.h
index 55aa692..c6bcddf 100644
--- a/target-mips/cpu-qom.h
+++ b/target-mips/cpu-qom.h
@@ -72,5 +72,6 @@
 
 #define ENV_GET_CPU(e) CPU(mips_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(MIPSCPU, env)
 
 #endif
diff --git a/target-mips/translate.c b/target-mips/translate.c
index f10a533..6ce2f03 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -15596,7 +15596,7 @@
     if (max_insns == 0)
         max_insns = CF_COUNT_MASK;
     LOG_DISAS("\ntb %p idx %d hflags %04x\n", tb, ctx.mem_idx, ctx.hflags);
-    gen_icount_start();
+    gen_tb_start();
     while (ctx.bstate == BS_NONE) {
         if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
             QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
@@ -15694,7 +15694,7 @@
         }
     }
 done_generating:
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
diff --git a/target-openrisc/cpu.h b/target-openrisc/cpu.h
index 419f007..4cfd1c7 100644
--- a/target-openrisc/cpu.h
+++ b/target-openrisc/cpu.h
@@ -340,6 +340,8 @@
 
 #define ENV_GET_CPU(e) CPU(openrisc_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(OpenRISCCPU, env)
+
 OpenRISCCPU *cpu_openrisc_init(const char *cpu_model);
 
 void cpu_openrisc_list(FILE *f, fprintf_function cpu_fprintf);
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 23e853e..0eafd02 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -1696,7 +1696,7 @@
         max_insns = CF_COUNT_MASK;
     }
 
-    gen_icount_start();
+    gen_tb_start();
 
     do {
         check_breakpoint(cpu, dc);
@@ -1779,7 +1779,7 @@
         }
     }
 
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
index 2b82cdb..4e8ceca 100644
--- a/target-ppc/cpu-qom.h
+++ b/target-ppc/cpu-qom.h
@@ -78,7 +78,8 @@
 
 #define ENV_GET_CPU(e) CPU(ppc_env_get_cpu(e))
 
-PowerPCCPUClass *ppc_cpu_class_by_pvr(uint32_t pvr);
+#define ENV_OFFSET offsetof(PowerPCCPU, env)
 
+PowerPCCPUClass *ppc_cpu_class_by_pvr(uint32_t pvr);
 
 #endif
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 80d5366..fa9e9e3 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9557,7 +9557,7 @@
     if (max_insns == 0)
         max_insns = CF_COUNT_MASK;
 
-    gen_icount_start();
+    gen_tb_start();
     /* Set env in case of segfault during code fetch */
     while (ctx.exception == POWERPC_EXCP_NONE
             && tcg_ctx.gen_opc_ptr < gen_opc_end) {
@@ -9669,7 +9669,7 @@
         /* Generate the return instruction */
         tcg_gen_exit_tb(0);
     }
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (unlikely(search_pc)) {
         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
diff --git a/target-s390x/cpu-qom.h b/target-s390x/cpu-qom.h
index 237184f..f6e5145 100644
--- a/target-s390x/cpu-qom.h
+++ b/target-s390x/cpu-qom.h
@@ -69,5 +69,6 @@
 
 #define ENV_GET_CPU(e) CPU(s390_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(S390CPU, env)
 
 #endif
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index bdf69a3..88e481c 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -4769,7 +4769,7 @@
         max_insns = CF_COUNT_MASK;
     }
 
-    gen_icount_start();
+    gen_tb_start();
 
     do {
         if (search_pc) {
@@ -4845,7 +4845,7 @@
         abort();
     }
 
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
diff --git a/target-sh4/cpu-qom.h b/target-sh4/cpu-qom.h
index d368db1..f4e8976 100644
--- a/target-sh4/cpu-qom.h
+++ b/target-sh4/cpu-qom.h
@@ -68,5 +68,6 @@
 
 #define ENV_GET_CPU(e) CPU(sh_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(SuperHCPU, env)
 
 #endif
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index d255066..7f300e3 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -1959,7 +1959,7 @@
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0)
         max_insns = CF_COUNT_MASK;
-    gen_icount_start();
+    gen_tb_start();
     while (ctx.bstate == BS_NONE && tcg_ctx.gen_opc_ptr < gen_opc_end) {
         if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
             QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
@@ -2029,7 +2029,7 @@
 	}
     }
 
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         i = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
diff --git a/target-sparc/cpu-qom.h b/target-sparc/cpu-qom.h
index 89cd1cf..efeeca0 100644
--- a/target-sparc/cpu-qom.h
+++ b/target-sparc/cpu-qom.h
@@ -73,5 +73,6 @@
 
 #define ENV_GET_CPU(e) CPU(sparc_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(SPARCCPU, env)
 
 #endif
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 12276d5..eb6e800 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -5249,7 +5249,7 @@
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0)
         max_insns = CF_COUNT_MASK;
-    gen_icount_start();
+    gen_tb_start();
     do {
         if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
             QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
@@ -5319,7 +5319,7 @@
             tcg_gen_exit_tb(0);
         }
     }
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (spc) {
         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
diff --git a/target-unicore32/cpu-qom.h b/target-unicore32/cpu-qom.h
index 625c614..c6590bd 100644
--- a/target-unicore32/cpu-qom.h
+++ b/target-unicore32/cpu-qom.h
@@ -58,5 +58,6 @@
 
 #define ENV_GET_CPU(e) CPU(uc32_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(UniCore32CPU, env)
 
 #endif
diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index d5039e2..151e35e 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -1921,7 +1921,7 @@
     }
 #endif
 
-    gen_icount_start();
+    gen_tb_start();
     do {
         if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
             QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
@@ -2041,7 +2041,7 @@
     }
 
 done_generating:
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
diff --git a/target-xtensa/cpu-qom.h b/target-xtensa/cpu-qom.h
index 270de16..c78136b 100644
--- a/target-xtensa/cpu-qom.h
+++ b/target-xtensa/cpu-qom.h
@@ -78,5 +78,6 @@
 
 #define ENV_GET_CPU(e) CPU(xtensa_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(XtensaCPU, env)
 
 #endif
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 11e06a3..06d68db 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -2913,7 +2913,7 @@
         dc.next_icount = tcg_temp_local_new_i32();
     }
 
-    gen_icount_start();
+    gen_tb_start();
 
     if (env->singlestep_enabled && env->exception_taken) {
         env->exception_taken = 0;
@@ -2991,7 +2991,7 @@
     if (dc.is_jmp == DISAS_NEXT) {
         gen_jumpi(&dc, dc.pc, 0);
     }
-    gen_icount_end(tb, insn_count);
+    gen_tb_end(tb, insn_count);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
 
     if (search_pc) {
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 6d489fc..025af9b 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -776,6 +776,7 @@
         break;
     }
 }
+#endif
 
 static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh,
                             TCGArg al, TCGArg ah, TCGArg bl, int blconst,
@@ -792,7 +793,6 @@
     tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
     tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
 }
-#endif
 
 /* Generate global QEMU prologue and epilogue code */
 static void tcg_target_qemu_prologue(TCGContext *s)
diff --git a/tcg/tcg.h b/tcg/tcg.h
index b195396..df375cf 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -677,7 +677,54 @@
 TCGv_i32 tcg_const_local_i32(int32_t val);
 TCGv_i64 tcg_const_local_i64(int64_t val);
 
-/* TCG targets may use a different definition of tcg_qemu_tb_exec. */
+/**
+ * tcg_qemu_tb_exec:
+ * @env: CPUArchState * for the CPU
+ * @tb_ptr: address of generated code for the TB to execute
+ *
+ * Start executing code from a given translation block.
+ * Where translation blocks have been linked, execution
+ * may proceed from the given TB into successive ones.
+ * Control eventually returns only when some action is needed
+ * from the top-level loop: either control must pass to a TB
+ * which has not yet been directly linked, or an asynchronous
+ * event such as an interrupt needs handling.
+ *
+ * The return value is a pointer to the next TB to execute
+ * (if known; otherwise zero). This pointer is assumed to be
+ * 4-aligned, and the bottom two bits are used to return further
+ * information:
+ *  0, 1: the link between this TB and the next is via the specified
+ *        TB index (0 or 1). That is, we left the TB via (the equivalent
+ *        of) "goto_tb <index>". The main loop uses this to determine
+ *        how to link the TB just executed to the next.
+ *  2:    we are using instruction counting code generation, and we
+ *        did not start executing this TB because the instruction counter
+ *        would hit zero midway through it. In this case the next-TB pointer
+ *        returned is the TB we were about to execute, and the caller must
+ *        arrange to execute the remaining count of instructions.
+ *  3:    we stopped because the CPU's exit_request flag was set
+ *        (usually meaning that there is an interrupt that needs to be
+ *        handled). The next-TB pointer returned is the TB we were
+ *        about to execute when we noticed the pending exit request.
+ *
+ * If the bottom two bits indicate an exit-via-index then the CPU
+ * state is correctly synchronised and ready for execution of the next
+ * TB (and in particular the guest PC is the address to execute next).
+ * Otherwise, we gave up on execution of this TB before it started, and
+ * the caller must fix up the CPU state by calling cpu_pc_from_tb()
+ * with the next-TB pointer we return.
+ *
+ * Note that TCG targets may use a different definition of tcg_qemu_tb_exec
+ * to this default (which just calls the prologue.code emitted by
+ * tcg_target_qemu_prologue()).
+ */
+#define TB_EXIT_MASK 3
+#define TB_EXIT_IDX0 0
+#define TB_EXIT_IDX1 1
+#define TB_EXIT_ICOUNT_EXPIRED 2
+#define TB_EXIT_REQUESTED 3
+
 #if !defined(tcg_qemu_tb_exec)
 # define tcg_qemu_tb_exec(env, tb_ptr) \
     ((tcg_target_ulong (*)(void *, void *))tcg_ctx.code_gen_prologue)(env, \
diff --git a/translate-all.c b/translate-all.c
index b50fb89..90ea002 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -1349,55 +1349,6 @@
     return &tcg_ctx.tb_ctx.tbs[m_max];
 }
 
-static void tb_reset_jump_recursive(TranslationBlock *tb);
-
-static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
-{
-    TranslationBlock *tb1, *tb_next, **ptb;
-    unsigned int n1;
-
-    tb1 = tb->jmp_next[n];
-    if (tb1 != NULL) {
-        /* find head of list */
-        for (;;) {
-            n1 = (uintptr_t)tb1 & 3;
-            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == 2) {
-                break;
-            }
-            tb1 = tb1->jmp_next[n1];
-        }
-        /* we are now sure now that tb jumps to tb1 */
-        tb_next = tb1;
-
-        /* remove tb from the jmp_first list */
-        ptb = &tb_next->jmp_first;
-        for (;;) {
-            tb1 = *ptb;
-            n1 = (uintptr_t)tb1 & 3;
-            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == n && tb1 == tb) {
-                break;
-            }
-            ptb = &tb1->jmp_next[n1];
-        }
-        *ptb = tb->jmp_next[n];
-        tb->jmp_next[n] = NULL;
-
-        /* suppress the jump to next tb in generated code */
-        tb_reset_jump(tb, n);
-
-        /* suppress jumps in the tb on which we could have jumped */
-        tb_reset_jump_recursive(tb_next);
-    }
-}
-
-static void tb_reset_jump_recursive(TranslationBlock *tb)
-{
-    tb_reset_jump_recursive2(tb, 0);
-    tb_reset_jump_recursive2(tb, 1);
-}
-
 #if defined(TARGET_HAS_ICE) && !defined(CONFIG_USER_ONLY)
 void tb_invalidate_phys_addr(hwaddr addr)
 {
@@ -1416,26 +1367,6 @@
 }
 #endif /* TARGET_HAS_ICE && !defined(CONFIG_USER_ONLY) */
 
-void cpu_unlink_tb(CPUState *cpu)
-{
-    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
-       problem and hope the cpu will stop of its own accord.  For userspace
-       emulation this often isn't actually as bad as it sounds.  Often
-       signals are used primarily to interrupt blocking syscalls.  */
-    TranslationBlock *tb;
-    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
-
-    spin_lock(&interrupt_lock);
-    tb = cpu->current_tb;
-    /* if the cpu is currently executing code, we must unlink it and
-       all the potentially executing TB */
-    if (tb) {
-        cpu->current_tb = NULL;
-        tb_reset_jump_recursive(tb);
-    }
-    spin_unlock(&interrupt_lock);
-}
-
 void tb_check_watchpoint(CPUArchState *env)
 {
     TranslationBlock *tb;
@@ -1475,7 +1406,7 @@
             cpu_abort(env, "Raised interrupt while not in I/O function");
         }
     } else {
-        cpu_unlink_tb(cpu);
+        cpu->tcg_exit_req = 1;
     }
 }
 
@@ -1626,7 +1557,7 @@
     CPUState *cpu = ENV_GET_CPU(env);
 
     env->interrupt_request |= mask;
-    cpu_unlink_tb(cpu);
+    cpu->tcg_exit_req = 1;
 }
 
 /*
diff --git a/vl.c b/vl.c
index febd2ea..c03edf1 100644
--- a/vl.c
+++ b/vl.c
@@ -2994,7 +2994,7 @@
                 drive_add(IF_MTD, -1, optarg, MTD_OPTS);
                 break;
             case QEMU_OPTION_sd:
-                drive_add(IF_SD, 0, optarg, SD_OPTS);
+                drive_add(IF_SD, -1, optarg, SD_OPTS);
                 break;
             case QEMU_OPTION_pflash:
                 drive_add(IF_PFLASH, -1, optarg, PFLASH_OPTS);