Merge remote-tracking branch 'kwolf/for-anthony' into staging
diff --git a/async.c b/async.c
index 57ac3a8..fd313df 100644
--- a/async.c
+++ b/async.c
@@ -137,11 +137,12 @@
 
 int qemu_bh_poll(void)
 {
-    QEMUBH *bh, **bhp;
+    QEMUBH *bh, **bhp, *next;
     int ret;
 
     ret = 0;
-    for (bh = async_context->first_bh; bh; bh = bh->next) {
+    for (bh = async_context->first_bh; bh; bh = next) {
+        next = bh->next;
         if (!bh->deleted && bh->scheduled) {
             bh->scheduled = 0;
             if (!bh->idle)
diff --git a/block.h b/block.h
index da7d39c..859d1d9 100644
--- a/block.h
+++ b/block.h
@@ -110,7 +110,7 @@
 typedef struct BlockDriverAIOCB BlockDriverAIOCB;
 typedef void BlockDriverCompletionFunc(void *opaque, int ret);
 typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
-				     int sector_num);
+                                     int sector_num);
 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
                                  QEMUIOVector *iov, int nb_sectors,
                                  BlockDriverCompletionFunc *cb, void *opaque);
@@ -118,7 +118,7 @@
                                   QEMUIOVector *iov, int nb_sectors,
                                   BlockDriverCompletionFunc *cb, void *opaque);
 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
-				 BlockDriverCompletionFunc *cb, void *opaque);
+                                 BlockDriverCompletionFunc *cb, void *opaque);
 void bdrv_aio_cancel(BlockDriverAIOCB *acb);
 
 typedef struct BlockRequest {
@@ -150,7 +150,7 @@
 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
 int bdrv_has_zero_init(BlockDriverState *bs);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-	int *pnum);
+                      int *pnum);
 
 #define BIOS_ATA_TRANSLATION_AUTO   0
 #define BIOS_ATA_TRANSLATION_NONE   1
diff --git a/block/qcow.c b/block/qcow.c
index a26c886..227b104 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -496,6 +496,8 @@
     uint64_t cluster_offset;
     uint8_t *cluster_data;
     struct iovec hd_iov;
+    bool is_write;
+    QEMUBH *bh;
     QEMUIOVector hd_qiov;
     BlockDriverAIOCB *hd_aiocb;
 } QCowAIOCB;
@@ -525,6 +527,8 @@
     acb->hd_aiocb = NULL;
     acb->sector_num = sector_num;
     acb->qiov = qiov;
+    acb->is_write = is_write;
+
     if (qiov->niov > 1) {
         acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
         if (is_write)
@@ -538,6 +542,38 @@
     return acb;
 }
 
+static void qcow_aio_read_cb(void *opaque, int ret);
+static void qcow_aio_write_cb(void *opaque, int ret);
+
+static void qcow_aio_rw_bh(void *opaque)
+{
+    QCowAIOCB *acb = opaque;
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+
+    if (acb->is_write) {
+        qcow_aio_write_cb(opaque, 0);
+    } else {
+        qcow_aio_read_cb(opaque, 0);
+    }
+}
+
+static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
+{
+    if (acb->bh) {
+        return -EIO;
+    }
+
+    acb->bh = qemu_bh_new(cb, acb);
+    if (!acb->bh) {
+        return -EIO;
+    }
+
+    qemu_bh_schedule(acb->bh);
+
+    return 0;
+}
+
 static void qcow_aio_read_cb(void *opaque, int ret)
 {
     QCowAIOCB *acb = opaque;
@@ -640,12 +676,21 @@
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     QCowAIOCB *acb;
+    int ret;
 
     acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
     if (!acb)
         return NULL;
 
-    qcow_aio_read_cb(acb, 0);
+    ret = qcow_schedule_bh(qcow_aio_rw_bh, acb);
+    if (ret < 0) {
+        if (acb->qiov->niov > 1) {
+            qemu_vfree(acb->orig_buf);
+        }
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
     return &acb->common;
 }
 
@@ -725,6 +770,7 @@
 {
     BDRVQcowState *s = bs->opaque;
     QCowAIOCB *acb;
+    int ret;
 
     s->cluster_cache_offset = -1; /* disable compressed cache */
 
@@ -733,7 +779,15 @@
         return NULL;
 
 
-    qcow_aio_write_cb(acb, 0);
+    ret = qcow_schedule_bh(qcow_aio_rw_bh, acb);
+    if (ret < 0) {
+        if (acb->qiov->niov > 1) {
+            qemu_vfree(acb->orig_buf);
+        }
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
     return &acb->common;
 }
 
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index c9e7bbd..882f50a 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -796,8 +796,8 @@
                 m->depends_on = old_alloc;
                 m->nb_clusters = 0;
                 *num = 0;
-                ret = 0;
-                goto fail;
+
+                goto out_wait_dependency;
             }
         }
     }
@@ -812,7 +812,6 @@
 
     cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
     if (cluster_offset < 0) {
-        QLIST_REMOVE(m, next_in_flight);
         ret = cluster_offset;
         goto fail;
     }
@@ -825,7 +824,7 @@
 out:
     ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
     if (ret < 0) {
-        return ret;
+        goto fail_put;
     }
 
     m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
@@ -835,8 +834,13 @@
 
     return 0;
 
+out_wait_dependency:
+    return qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+
 fail:
     qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+fail_put:
+    QLIST_REMOVE(m, next_in_flight);
     return ret;
 }
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 8451ded..2c51e7c 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -378,6 +378,7 @@
     uint64_t bytes_done;
     uint64_t cluster_offset;
     uint8_t *cluster_data;
+    bool is_write;
     BlockDriverAIOCB *hd_aiocb;
     QEMUIOVector hd_qiov;
     QEMUBH *bh;
@@ -399,12 +400,19 @@
 };
 
 static void qcow2_aio_read_cb(void *opaque, int ret);
-static void qcow2_aio_read_bh(void *opaque)
+static void qcow2_aio_write_cb(void *opaque, int ret);
+
+static void qcow2_aio_rw_bh(void *opaque)
 {
     QCowAIOCB *acb = opaque;
     qemu_bh_delete(acb->bh);
     acb->bh = NULL;
-    qcow2_aio_read_cb(opaque, 0);
+
+    if (acb->is_write) {
+        qcow2_aio_write_cb(opaque, 0);
+    } else {
+        qcow2_aio_read_cb(opaque, 0);
+    }
 }
 
 static int qcow2_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
@@ -493,14 +501,14 @@
                     goto done;
                 }
             } else {
-                ret = qcow2_schedule_bh(qcow2_aio_read_bh, acb);
+                ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
                 if (ret < 0)
                     goto done;
             }
         } else {
             /* Note: in this case, no need to wait */
             qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors);
-            ret = qcow2_schedule_bh(qcow2_aio_read_bh, acb);
+            ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
             if (ret < 0)
                 goto done;
         }
@@ -515,7 +523,7 @@
             s->cluster_cache + index_in_cluster * 512,
             512 * acb->cur_nr_sectors);
 
-        ret = qcow2_schedule_bh(qcow2_aio_read_bh, acb);
+        ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
         if (ret < 0)
             goto done;
     } else {
@@ -572,6 +580,7 @@
     acb->hd_aiocb = NULL;
     acb->sector_num = sector_num;
     acb->qiov = qiov;
+    acb->is_write = is_write;
 
     qemu_iovec_init(&acb->hd_qiov, qiov->niov);
 
@@ -591,17 +600,22 @@
                                          void *opaque)
 {
     QCowAIOCB *acb;
+    int ret;
 
     acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
     if (!acb)
         return NULL;
 
-    qcow2_aio_read_cb(acb, 0);
+    ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
+    if (ret < 0) {
+        qemu_iovec_destroy(&acb->hd_qiov);
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
     return &acb->common;
 }
 
-static void qcow2_aio_write_cb(void *opaque, int ret);
-
 static void run_dependent_requests(QCowL2Meta *m)
 {
     QCowAIOCB *req;
@@ -724,6 +738,7 @@
 {
     BDRVQcowState *s = bs->opaque;
     QCowAIOCB *acb;
+    int ret;
 
     s->cluster_cache_offset = -1; /* disable compressed cache */
 
@@ -731,7 +746,13 @@
     if (!acb)
         return NULL;
 
-    qcow2_aio_write_cb(acb, 0);
+    ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
+    if (ret < 0) {
+        qemu_iovec_destroy(&acb->hd_qiov);
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
     return &acb->common;
 }
 
diff --git a/block/rbd.c b/block/rbd.c
index bdc448a..d5659cd 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -227,7 +227,6 @@
     char name[RBD_MAX_IMAGE_NAME_SIZE];
     char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
     char conf[RBD_MAX_CONF_SIZE];
-    char *snap = NULL;
     rados_t cluster;
     rados_ioctx_t io_ctx;
     int ret;
@@ -238,9 +237,6 @@
                            conf, sizeof(conf)) < 0) {
         return -EINVAL;
     }
-    if (snap_buf[0] != '\0') {
-        snap = snap_buf;
-    }
 
     /* Read out options */
     while (options && options->name) {
diff --git a/block/vdi.c b/block/vdi.c
index 4c9e201..261cf9b 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -152,6 +152,7 @@
     /* Buffer for new allocated block. */
     void *block_buffer;
     void *orig_buf;
+    bool is_write;
     int header_modified;
     BlockDriverAIOCB *hd_aiocb;
     struct iovec hd_iov;
@@ -504,6 +505,8 @@
         acb->hd_aiocb = NULL;
         acb->sector_num = sector_num;
         acb->qiov = qiov;
+        acb->is_write = is_write;
+
         if (qiov->niov > 1) {
             acb->buf = qemu_blockalign(bs, qiov->size);
             acb->orig_buf = acb->buf;
@@ -542,14 +545,20 @@
 }
 
 static void vdi_aio_read_cb(void *opaque, int ret);
+static void vdi_aio_write_cb(void *opaque, int ret);
 
-static void vdi_aio_read_bh(void *opaque)
+static void vdi_aio_rw_bh(void *opaque)
 {
     VdiAIOCB *acb = opaque;
     logout("\n");
     qemu_bh_delete(acb->bh);
     acb->bh = NULL;
-    vdi_aio_read_cb(opaque, 0);
+
+    if (acb->is_write) {
+        vdi_aio_write_cb(opaque, 0);
+    } else {
+        vdi_aio_read_cb(opaque, 0);
+    }
 }
 
 static void vdi_aio_read_cb(void *opaque, int ret)
@@ -597,7 +606,7 @@
     if (bmap_entry == VDI_UNALLOCATED) {
         /* Block not allocated, return zeros, no need to wait. */
         memset(acb->buf, 0, n_sectors * SECTOR_SIZE);
-        ret = vdi_schedule_bh(vdi_aio_read_bh, acb);
+        ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
         if (ret < 0) {
             goto done;
         }
@@ -630,12 +639,23 @@
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     VdiAIOCB *acb;
+    int ret;
+
     logout("\n");
     acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
     if (!acb) {
         return NULL;
     }
-    vdi_aio_read_cb(acb, 0);
+
+    ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
+    if (ret < 0) {
+        if (acb->qiov->niov > 1) {
+            qemu_vfree(acb->orig_buf);
+        }
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
     return &acb->common;
 }
 
@@ -789,12 +809,23 @@
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     VdiAIOCB *acb;
+    int ret;
+
     logout("\n");
     acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
     if (!acb) {
         return NULL;
     }
-    vdi_aio_write_cb(acb, 0);
+
+    ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
+    if (ret < 0) {
+        if (acb->qiov->niov > 1) {
+            qemu_vfree(acb->orig_buf);
+        }
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
     return &acb->common;
 }
 
diff --git a/block_int.h b/block_int.h
index fa91337..1e265d2 100644
--- a/block_int.h
+++ b/block_int.h
@@ -203,8 +203,8 @@
     void *private;
 };
 
-#define CHANGE_MEDIA	0x01
-#define CHANGE_SIZE	0x02
+#define CHANGE_MEDIA    0x01
+#define CHANGE_SIZE     0x02
 
 struct BlockDriverAIOCB {
     AIOPool *pool;
diff --git a/dma-helpers.c b/dma-helpers.c
index 712ed89..ba7f897 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -47,6 +47,7 @@
     target_phys_addr_t sg_cur_byte;
     QEMUIOVector iov;
     QEMUBH *bh;
+    DMAIOFunc *io_func;
 } DMAAIOCB;
 
 static void dma_bdrv_cb(void *opaque, int ret);
@@ -116,13 +117,8 @@
         return;
     }
 
-    if (dbs->is_write) {
-        dbs->acb = bdrv_aio_writev(dbs->bs, dbs->sector_num, &dbs->iov,
-                                   dbs->iov.size / 512, dma_bdrv_cb, dbs);
-    } else {
-        dbs->acb = bdrv_aio_readv(dbs->bs, dbs->sector_num, &dbs->iov,
-                                  dbs->iov.size / 512, dma_bdrv_cb, dbs);
-    }
+    dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
+                            dbs->iov.size / 512, dma_bdrv_cb, dbs);
     if (!dbs->acb) {
         dma_bdrv_unmap(dbs);
         qemu_iovec_destroy(&dbs->iov);
@@ -144,12 +140,12 @@
     .cancel             = dma_aio_cancel,
 };
 
-static BlockDriverAIOCB *dma_bdrv_io(
+BlockDriverAIOCB *dma_bdrv_io(
     BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num,
-    BlockDriverCompletionFunc *cb, void *opaque,
-    int is_write)
+    DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
+    void *opaque, int is_write)
 {
-    DMAAIOCB *dbs =  qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
+    DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
 
     dbs->acb = NULL;
     dbs->bs = bs;
@@ -158,6 +154,7 @@
     dbs->sg_cur_index = 0;
     dbs->sg_cur_byte = 0;
     dbs->is_write = is_write;
+    dbs->io_func = io_func;
     dbs->bh = NULL;
     qemu_iovec_init(&dbs->iov, sg->nsg);
     dma_bdrv_cb(dbs, 0);
@@ -173,12 +170,12 @@
                                 QEMUSGList *sg, uint64_t sector,
                                 void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, cb, opaque, 0);
+    return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, 0);
 }
 
 BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
                                  QEMUSGList *sg, uint64_t sector,
                                  void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, cb, opaque, 1);
+    return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, 1);
 }
diff --git a/dma.h b/dma.h
index f3bb275..3d8324b 100644
--- a/dma.h
+++ b/dma.h
@@ -32,6 +32,14 @@
                      target_phys_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 
+typedef BlockDriverAIOCB *DMAIOFunc(BlockDriverState *bs, int64_t sector_num,
+                                 QEMUIOVector *iov, int nb_sectors,
+                                 BlockDriverCompletionFunc *cb, void *opaque);
+
+BlockDriverAIOCB *dma_bdrv_io(BlockDriverState *bs,
+                              QEMUSGList *sg, uint64_t sector_num,
+                              DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
+                              void *opaque, int is_write);
 BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
                                 QEMUSGList *sg, uint64_t sector,
                                 BlockDriverCompletionFunc *cb, void *opaque);
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 95beb17..ca17a43 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -78,7 +78,7 @@
 {
     uint16_t *p;
     unsigned int oldsize;
-    IDEDevice *dev;
+    IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
 
     if (s->identify_set) {
 	memcpy(s->io_buffer, s->identify_data, sizeof(s->identify_data));
@@ -124,6 +124,9 @@
     put_le16(p + 66, 120);
     put_le16(p + 67, 120);
     put_le16(p + 68, 120);
+    if (dev && dev->conf.discard_granularity) {
+        put_le16(p + 69, (1 << 14)); /* determinate TRIM behavior */
+    }
 
     if (s->ncq_queues) {
         put_le16(p + 75, s->ncq_queues - 1);
@@ -154,9 +157,12 @@
     put_le16(p + 101, s->nb_sectors >> 16);
     put_le16(p + 102, s->nb_sectors >> 32);
     put_le16(p + 103, s->nb_sectors >> 48);
-    dev = s->unit ? s->bus->slave : s->bus->master;
+
     if (dev && dev->conf.physical_block_size)
         put_le16(p + 106, 0x6000 | get_physical_block_exp(&dev->conf));
+    if (dev && dev->conf.discard_granularity) {
+        put_le16(p + 169, 1); /* TRIM support */
+    }
 
     memcpy(s->identify_data, p, sizeof(s->identify_data));
     s->identify_set = 1;
@@ -299,6 +305,74 @@
     }
 }
 
+typedef struct TrimAIOCB {
+    BlockDriverAIOCB common;
+    QEMUBH *bh;
+    int ret;
+} TrimAIOCB;
+
+static void trim_aio_cancel(BlockDriverAIOCB *acb)
+{
+    TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common);
+
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+    qemu_aio_release(iocb);
+}
+
+static AIOPool trim_aio_pool = {
+    .aiocb_size         = sizeof(TrimAIOCB),
+    .cancel             = trim_aio_cancel,
+};
+
+static void ide_trim_bh_cb(void *opaque)
+{
+    TrimAIOCB *iocb = opaque;
+
+    iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+
+    qemu_aio_release(iocb);
+}
+
+BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    TrimAIOCB *iocb;
+    int i, j, ret;
+
+    iocb = qemu_aio_get(&trim_aio_pool, bs, cb, opaque);
+    iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
+    iocb->ret = 0;
+
+    for (j = 0; j < qiov->niov; j++) {
+        uint64_t *buffer = qiov->iov[j].iov_base;
+
+        for (i = 0; i < qiov->iov[j].iov_len / 8; i++) {
+            /* 6-byte LBA + 2-byte range per entry */
+            uint64_t entry = le64_to_cpu(buffer[i]);
+            uint64_t sector = entry & 0x0000ffffffffffffULL;
+            uint16_t count = entry >> 48;
+
+            if (count == 0) {
+                break;
+            }
+
+            ret = bdrv_discard(bs, sector, count);
+            if (!iocb->ret) {
+                iocb->ret = ret;
+            }
+        }
+    }
+
+    qemu_bh_schedule(iocb->bh);
+
+    return &iocb->common;
+}
+
 static inline void ide_abort_command(IDEState *s)
 {
     s->status = READY_STAT | ERR_STAT;
@@ -446,7 +520,7 @@
     if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
             || action == BLOCK_ERR_STOP_ANY) {
         s->bus->dma->ops->set_unit(s->bus->dma, s->unit);
-        s->bus->dma->ops->add_status(s->bus->dma, op);
+        s->bus->error_status = op;
         bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read);
         vm_stop(VMSTOP_DISKFULL);
     } else {
@@ -472,8 +546,11 @@
     if (ret < 0) {
         int op = BM_STATUS_DMA_RETRY;
 
-        if (s->is_read)
+        if (s->dma_cmd == IDE_DMA_READ)
             op |= BM_STATUS_RETRY_READ;
+        else if (s->dma_cmd == IDE_DMA_TRIM)
+            op |= BM_STATUS_RETRY_TRIM;
+
         if (ide_handle_rw_error(s, -ret, op)) {
             return;
         }
@@ -482,7 +559,7 @@
     n = s->io_buffer_size >> 9;
     sector_num = ide_get_sector(s);
     if (n > 0) {
-        dma_buf_commit(s, s->is_read);
+        dma_buf_commit(s, ide_cmd_is_read(s));
         sector_num += n;
         ide_set_sector(s, sector_num);
         s->nsector -= n;
@@ -499,23 +576,30 @@
     n = s->nsector;
     s->io_buffer_index = 0;
     s->io_buffer_size = n * 512;
-    if (s->bus->dma->ops->prepare_buf(s->bus->dma, s->is_read) == 0) {
+    if (s->bus->dma->ops->prepare_buf(s->bus->dma, ide_cmd_is_read(s)) == 0) {
         /* The PRDs were too short. Reset the Active bit, but don't raise an
          * interrupt. */
         goto eot;
     }
 
 #ifdef DEBUG_AIO
-    printf("ide_dma_cb: sector_num=%" PRId64 " n=%d, is_read=%d\n",
-           sector_num, n, s->is_read);
+    printf("ide_dma_cb: sector_num=%" PRId64 " n=%d, cmd_cmd=%d\n",
+           sector_num, n, s->dma_cmd);
 #endif
 
-    if (s->is_read) {
+    switch (s->dma_cmd) {
+    case IDE_DMA_READ:
         s->bus->dma->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num,
                                            ide_dma_cb, s);
-    } else {
+        break;
+    case IDE_DMA_WRITE:
         s->bus->dma->aiocb = dma_bdrv_write(s->bs, &s->sg, sector_num,
                                             ide_dma_cb, s);
+        break;
+    case IDE_DMA_TRIM:
+        s->bus->dma->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num,
+                                         ide_issue_trim, ide_dma_cb, s, 1);
+        break;
     }
 
     if (!s->bus->dma->aiocb) {
@@ -528,12 +612,12 @@
    ide_set_inactive(s);
 }
 
-static void ide_sector_start_dma(IDEState *s, int is_read)
+static void ide_sector_start_dma(IDEState *s, enum ide_dma_cmd dma_cmd)
 {
     s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT;
     s->io_buffer_index = 0;
     s->io_buffer_size = 0;
-    s->is_read = is_read;
+    s->dma_cmd = dma_cmd;
     s->bus->dma->ops->start_dma(s->bus->dma, s, ide_dma_cb);
 }
 
@@ -815,6 +899,18 @@
         return;
 
     switch(val) {
+    case WIN_DSM:
+        switch (s->feature) {
+        case DSM_TRIM:
+            if (!s->bs) {
+                goto abort_cmd;
+            }
+            ide_sector_start_dma(s, IDE_DMA_TRIM);
+            break;
+        default:
+            goto abort_cmd;
+        }
+        break;
     case WIN_IDENTIFY:
         if (s->bs && s->drive_kind != IDE_CD) {
             if (s->drive_kind != IDE_CFATA)
@@ -916,7 +1012,7 @@
         if (!s->bs)
             goto abort_cmd;
 	ide_cmd_lba48_transform(s, lba48);
-        ide_sector_start_dma(s, 1);
+        ide_sector_start_dma(s, IDE_DMA_READ);
         break;
 	case WIN_WRITEDMA_EXT:
 	lba48 = 1;
@@ -925,7 +1021,7 @@
         if (!s->bs)
             goto abort_cmd;
 	ide_cmd_lba48_transform(s, lba48);
-        ide_sector_start_dma(s, 0);
+        ide_sector_start_dma(s, IDE_DMA_WRITE);
         s->media_changed = 1;
         break;
     case WIN_READ_NATIVE_MAX_EXT:
@@ -1837,7 +1933,8 @@
 {
     IDEState *s = opaque;
 
-    return (s->status & DRQ_STAT) != 0;
+    return ((s->status & DRQ_STAT) != 0)
+        || (s->bus->error_status & BM_STATUS_PIO_RETRY);
 }
 
 static bool ide_atapi_gesn_needed(void *opaque)
@@ -1847,6 +1944,13 @@
     return s->events.new_media || s->events.eject_request;
 }
 
+static bool ide_error_needed(void *opaque)
+{
+    IDEBus *bus = opaque;
+
+    return (bus->error_status != 0);
+}
+
 /* Fields for GET_EVENT_STATUS_NOTIFICATION ATAPI command */
 const VMStateDescription vmstate_ide_atapi_gesn_state = {
     .name ="ide_drive/atapi/gesn_state",
@@ -1856,6 +1960,7 @@
     .fields = (VMStateField []) {
         VMSTATE_BOOL(events.new_media, IDEState),
         VMSTATE_BOOL(events.eject_request, IDEState),
+        VMSTATE_END_OF_LIST()
     }
 };
 
@@ -1921,6 +2026,17 @@
     }
 };
 
+const VMStateDescription vmstate_ide_error_status = {
+    .name ="ide_bus/error",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField []) {
+        VMSTATE_INT32(error_status, IDEBus),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 const VMStateDescription vmstate_ide_bus = {
     .name = "ide_bus",
     .version_id = 1,
@@ -1930,6 +2046,14 @@
         VMSTATE_UINT8(cmd, IDEBus),
         VMSTATE_UINT8(unit, IDEBus),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (VMStateSubsection []) {
+        {
+            .vmsd = &vmstate_ide_error_status,
+            .needed = ide_error_needed,
+        }, {
+            /* empty */
+        }
     }
 };
 
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index c2b35ec..02e805f 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -62,7 +62,11 @@
  */
 #define CFA_REQ_EXT_ERROR_CODE		0x03 /* CFA Request Extended Error Code */
 /*
- *	0x04->0x07 Reserved
+ *      0x04->0x05 Reserved
+ */
+#define WIN_DSM                         0x06
+/*
+ *      0x07 Reserved
  */
 #define WIN_SRST			0x08 /* ATAPI soft reset command */
 #define WIN_DEVICE_RESET		0x08
@@ -190,6 +194,9 @@
 
 #define IDE_DMA_BUF_SECTORS 256
 
+/* feature values for Data Set Management */
+#define DSM_TRIM                        0x01
+
 #if (IDE_DMA_BUF_SECTORS < MAX_MULT_SECTORS)
 #error "IDE_DMA_BUF_SECTORS must be bigger or equal to MAX_MULT_SECTORS"
 #endif
@@ -379,6 +386,15 @@
     bool new_media;
 };
 
+enum ide_dma_cmd {
+    IDE_DMA_READ,
+    IDE_DMA_WRITE,
+    IDE_DMA_TRIM,
+};
+
+#define ide_cmd_is_read(s) \
+	((s)->dma_cmd == IDE_DMA_READ)
+
 /* NOTE: IDEState represents in fact one drive */
 struct IDEState {
     IDEBus *bus;
@@ -446,7 +462,7 @@
     uint32_t mdata_size;
     uint8_t *mdata_storage;
     int media_changed;
-    int is_read;
+    enum ide_dma_cmd dma_cmd;
     /* SMART */
     uint8_t smart_enabled;
     uint8_t smart_autosave;
@@ -486,6 +502,8 @@
     uint8_t unit;
     uint8_t cmd;
     qemu_irq irq;
+
+    int error_status;
 };
 
 struct IDEDevice {
@@ -505,10 +523,17 @@
 #define BM_STATUS_DMAING 0x01
 #define BM_STATUS_ERROR  0x02
 #define BM_STATUS_INT    0x04
+
+/* FIXME These are not status register bits */
 #define BM_STATUS_DMA_RETRY  0x08
 #define BM_STATUS_PIO_RETRY  0x10
 #define BM_STATUS_RETRY_READ  0x20
 #define BM_STATUS_RETRY_FLUSH 0x40
+#define BM_STATUS_RETRY_TRIM 0x80
+
+#define BM_MIGRATION_COMPAT_STATUS_BITS \
+        (BM_STATUS_DMA_RETRY | BM_STATUS_PIO_RETRY | \
+        BM_STATUS_RETRY_READ | BM_STATUS_RETRY_FLUSH)
 
 #define BM_CMD_START     0x01
 #define BM_CMD_READ      0x08
@@ -575,6 +600,9 @@
                         EndTransferFunc *end_transfer_func);
 void ide_transfer_stop(IDEState *s);
 void ide_set_inactive(IDEState *s);
+BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque);
 
 /* hw/ide/atapi.c */
 void ide_atapi_cmd(IDEState *s);
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 7107f6b..7daeb31 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -145,12 +145,21 @@
     io->addr += io->len;
     io->len = 0;
 
-    if (s->is_read)
+    switch (s->dma_cmd) {
+    case IDE_DMA_READ:
         m->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num,
 		                 pmac_ide_transfer_cb, io);
-    else
+        break;
+    case IDE_DMA_WRITE:
         m->aiocb = dma_bdrv_write(s->bs, &s->sg, sector_num,
 		                  pmac_ide_transfer_cb, io);
+        break;
+    case IDE_DMA_TRIM:
+        m->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num,
+                               ide_issue_trim, pmac_ide_transfer_cb, s, 1);
+        break;
+    }
+
     if (!m->aiocb)
         pmac_ide_transfer_cb(io, -1);
 }
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index a4726ad..9f3050a 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -169,7 +169,7 @@
     return 0;
 }
 
-static void bmdma_restart_dma(BMDMAState *bm, int is_read)
+static void bmdma_restart_dma(BMDMAState *bm, enum ide_dma_cmd dma_cmd)
 {
     IDEState *s = bmdma_active_if(bm);
 
@@ -177,33 +177,48 @@
     s->io_buffer_index = 0;
     s->io_buffer_size = 0;
     s->nsector = bm->nsector;
-    s->is_read = is_read;
+    s->dma_cmd = dma_cmd;
     bm->cur_addr = bm->addr;
     bm->dma_cb = ide_dma_cb;
     bmdma_start_dma(&bm->dma, s, bm->dma_cb);
 }
 
+/* TODO This should be common IDE code */
 static void bmdma_restart_bh(void *opaque)
 {
     BMDMAState *bm = opaque;
+    IDEBus *bus = bm->bus;
     int is_read;
+    int error_status;
 
     qemu_bh_delete(bm->bh);
     bm->bh = NULL;
 
-    is_read = !!(bm->status & BM_STATUS_RETRY_READ);
+    if (bm->unit == (uint8_t) -1) {
+        return;
+    }
 
-    if (bm->status & BM_STATUS_DMA_RETRY) {
-        bm->status &= ~(BM_STATUS_DMA_RETRY | BM_STATUS_RETRY_READ);
-        bmdma_restart_dma(bm, is_read);
-    } else if (bm->status & BM_STATUS_PIO_RETRY) {
-        bm->status &= ~(BM_STATUS_PIO_RETRY | BM_STATUS_RETRY_READ);
+    is_read = !!(bus->error_status & BM_STATUS_RETRY_READ);
+
+    /* The error status must be cleared before resubmitting the request: The
+     * request may fail again, and this case can only be distinguished if the
+     * called function can set a new error status. */
+    error_status = bus->error_status;
+    bus->error_status = 0;
+
+    if (error_status & BM_STATUS_DMA_RETRY) {
+        if (error_status & BM_STATUS_RETRY_TRIM) {
+            bmdma_restart_dma(bm, IDE_DMA_TRIM);
+        } else {
+            bmdma_restart_dma(bm, is_read ? IDE_DMA_READ : IDE_DMA_WRITE);
+        }
+    } else if (error_status & BM_STATUS_PIO_RETRY) {
         if (is_read) {
             ide_sector_read(bmdma_active_if(bm));
         } else {
             ide_sector_write(bmdma_active_if(bm));
         }
-    } else if (bm->status & BM_STATUS_RETRY_FLUSH) {
+    } else if (error_status & BM_STATUS_RETRY_FLUSH) {
         ide_flush_cache(bmdma_active_if(bm));
     }
 }
@@ -351,6 +366,43 @@
     return (bm->cur_prd_len != 0);
 }
 
+static bool ide_bmdma_status_needed(void *opaque)
+{
+    BMDMAState *bm = opaque;
+
+    /* Older versions abused some bits in the status register for internal
+     * error state. If any of these bits are set, we must add a subsection to
+     * transfer the real status register */
+    uint8_t abused_bits = BM_MIGRATION_COMPAT_STATUS_BITS;
+
+    return ((bm->status & abused_bits) != 0);
+}
+
+static void ide_bmdma_pre_save(void *opaque)
+{
+    BMDMAState *bm = opaque;
+    uint8_t abused_bits = BM_MIGRATION_COMPAT_STATUS_BITS;
+
+    bm->migration_compat_status =
+        (bm->status & ~abused_bits) | (bm->bus->error_status & abused_bits);
+}
+
+/* This function accesses bm->bus->error_status which is loaded only after
+ * BMDMA itself. This is why the function is called from ide_pci_post_load
+ * instead of being registered with VMState where it would run too early. */
+static int ide_bmdma_post_load(void *opaque, int version_id)
+{
+    BMDMAState *bm = opaque;
+    uint8_t abused_bits = BM_MIGRATION_COMPAT_STATUS_BITS;
+
+    if (bm->status == 0) {
+        bm->status = bm->migration_compat_status & ~abused_bits;
+        bm->bus->error_status |= bm->migration_compat_status & abused_bits;
+    }
+
+    return 0;
+}
+
 static const VMStateDescription vmstate_bmdma_current = {
     .name = "ide bmdma_current",
     .version_id = 1,
@@ -365,15 +417,26 @@
     }
 };
 
+const VMStateDescription vmstate_bmdma_status = {
+    .name ="ide bmdma/status",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField []) {
+        VMSTATE_UINT8(status, BMDMAState),
+        VMSTATE_END_OF_LIST()
+    }
+};
 
 static const VMStateDescription vmstate_bmdma = {
     .name = "ide bmdma",
     .version_id = 3,
     .minimum_version_id = 0,
     .minimum_version_id_old = 0,
+    .pre_save  = ide_bmdma_pre_save,
     .fields      = (VMStateField []) {
         VMSTATE_UINT8(cmd, BMDMAState),
-        VMSTATE_UINT8(status, BMDMAState),
+        VMSTATE_UINT8(migration_compat_status, BMDMAState),
         VMSTATE_UINT32(addr, BMDMAState),
         VMSTATE_INT64(sector_num, BMDMAState),
         VMSTATE_UINT32(nsector, BMDMAState),
@@ -385,6 +448,9 @@
             .vmsd = &vmstate_bmdma_current,
             .needed = ide_bmdma_current_needed,
         }, {
+            .vmsd = &vmstate_bmdma_status,
+            .needed = ide_bmdma_status_needed,
+        }, {
             /* empty */
         }
     }
@@ -399,7 +465,9 @@
         /* current versions always store 0/1, but older version
            stored bigger values. We only need last bit */
         d->bmdma[i].unit &= 1;
+        ide_bmdma_post_load(&d->bmdma[i], -1);
     }
+
     return 0;
 }
 
diff --git a/hw/ide/pci.h b/hw/ide/pci.h
index cd72cba..b4f3691 100644
--- a/hw/ide/pci.h
+++ b/hw/ide/pci.h
@@ -22,6 +22,10 @@
     IORange addr_ioport;
     QEMUBH *bh;
     qemu_irq irq;
+
+    /* Bit 0-2 and 7:   BM status register
+     * Bit 3-6:         bus->error_status */
+    uint8_t migration_compat_status;
 } BMDMAState;
 
 typedef struct PCIIDEState {
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 3f9dc89..d9b8f24 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -125,6 +125,11 @@
     const char *serial;
     DriveInfo *dinfo;
 
+    if (dev->conf.discard_granularity && dev->conf.discard_granularity != 512) {
+        error_report("discard_granularity must be 512 for ide");
+        return -1;
+    }
+
     serial = dev->serial;
     if (!serial) {
         /* try to fall back to value set with legacy -drive serial=... */