Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging

# gpg: Signature made Fri May 22 10:00:53 2015 BST using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"

* remotes/stefanha/tags/block-pull-request: (38 commits)
  block: get_block_status: use "else" when testing the opposite condition
  qemu-iotests: Test unaligned sub-block zero write
  block: Fix NULL deference for unaligned write if qiov is NULL
  Revert "block: Fix unaligned zero write"
  block: align bounce buffers to page
  block: minimal bounce buffer alignment
  block: return EPERM on writes or discards to read-only devices
  configure: Add workaround for ccache and clang
  configure: silence glib unknown attribute __alloc_size__
  configure: factor out supported flag check
  configure: handle clang -nopie argument warning
  block/parallels: improve image writing performance further
  block/parallels: optimize linear image expansion
  block/parallels: add prealloc-mode and prealloc-size open paramemets
  block/parallels: delay writing to BAT till bdrv_co_flush_to_os
  block/parallels: create bat_entry_off helper
  block/parallels: improve image reading performance
  iotests, parallels: check for incorrectly closed image in tests
  block/parallels: implement incorrect close detection
  block/parallels: implement parallels_check method of block driver
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/block.c b/block.c
index 7904098..325f727 100644
--- a/block.c
+++ b/block.c
@@ -106,13 +106,23 @@
 size_t bdrv_opt_mem_align(BlockDriverState *bs)
 {
     if (!bs || !bs->drv) {
-        /* 4k should be on the safe side */
-        return 4096;
+        /* page size or 4k (hdd sector size) should be on the safe side */
+        return MAX(4096, getpagesize());
     }
 
     return bs->bl.opt_mem_alignment;
 }
 
+size_t bdrv_min_mem_align(BlockDriverState *bs)
+{
+    if (!bs || !bs->drv) {
+        /* page size or 4k (hdd sector size) should be on the safe side */
+        return MAX(4096, getpagesize());
+    }
+
+    return bs->bl.min_mem_alignment;
+}
+
 /* check if the path starts with "<protocol>:" */
 int path_has_protocol(const char *path)
 {
@@ -890,6 +900,7 @@
     }
 
     assert(bdrv_opt_mem_align(bs) != 0);
+    assert(bdrv_min_mem_align(bs) != 0);
     assert((bs->request_alignment != 0) || bs->sg);
     return 0;
 
diff --git a/block/io.c b/block/io.c
index 1ce62c4..e394d92 100644
--- a/block/io.c
+++ b/block/io.c
@@ -201,9 +201,11 @@
         }
         bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
         bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
+        bs->bl.min_mem_alignment = bs->file->bl.min_mem_alignment;
         bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
     } else {
-        bs->bl.opt_mem_alignment = 512;
+        bs->bl.min_mem_alignment = 512;
+        bs->bl.opt_mem_alignment = getpagesize();
     }
 
     if (bs->backing_hd) {
@@ -221,6 +223,9 @@
         bs->bl.opt_mem_alignment =
             MAX(bs->bl.opt_mem_alignment,
                 bs->backing_hd->bl.opt_mem_alignment);
+        bs->bl.min_mem_alignment =
+            MAX(bs->bl.min_mem_alignment,
+                bs->backing_hd->bl.min_mem_alignment);
     }
 
     /* Then let the driver override it */
@@ -929,19 +934,6 @@
     return ret;
 }
 
-static inline uint64_t bdrv_get_align(BlockDriverState *bs)
-{
-    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
-    return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
-}
-
-static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
-                                       int64_t offset, size_t bytes)
-{
-    int64_t align = bdrv_get_align(bs);
-    return !(offset & (align - 1) || (bytes & (align - 1)));
-}
-
 /*
  * Handle a read request in coroutine context
  */
@@ -952,7 +944,8 @@
     BlockDriver *drv = bs->drv;
     BdrvTrackedRequest req;
 
-    uint64_t align = bdrv_get_align(bs);
+    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
+    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
     uint8_t *head_buf = NULL;
     uint8_t *tail_buf = NULL;
     QEMUIOVector local_qiov;
@@ -1186,6 +1179,94 @@
     return ret;
 }
 
+static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
+                                                int64_t offset,
+                                                unsigned int bytes,
+                                                BdrvRequestFlags flags,
+                                                BdrvTrackedRequest *req)
+{
+    uint8_t *buf = NULL;
+    QEMUIOVector local_qiov;
+    struct iovec iov;
+    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+    unsigned int head_padding_bytes, tail_padding_bytes;
+    int ret = 0;
+
+    head_padding_bytes = offset & (align - 1);
+    tail_padding_bytes = align - ((offset + bytes) & (align - 1));
+
+
+    assert(flags & BDRV_REQ_ZERO_WRITE);
+    if (head_padding_bytes || tail_padding_bytes) {
+        buf = qemu_blockalign(bs, align);
+        iov = (struct iovec) {
+            .iov_base   = buf,
+            .iov_len    = align,
+        };
+        qemu_iovec_init_external(&local_qiov, &iov, 1);
+    }
+    if (head_padding_bytes) {
+        uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
+
+        /* RMW the unaligned part before head. */
+        mark_request_serialising(req, align);
+        wait_serialising_requests(req);
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
+        ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
+                                  align, &local_qiov, 0);
+        if (ret < 0) {
+            goto fail;
+        }
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+
+        memset(buf + head_padding_bytes, 0, zero_bytes);
+        ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
+                                   &local_qiov,
+                                   flags & ~BDRV_REQ_ZERO_WRITE);
+        if (ret < 0) {
+            goto fail;
+        }
+        offset += zero_bytes;
+        bytes -= zero_bytes;
+    }
+
+    assert(!bytes || (offset & (align - 1)) == 0);
+    if (bytes >= align) {
+        /* Write the aligned part in the middle. */
+        uint64_t aligned_bytes = bytes & ~(align - 1);
+        ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes,
+                                   NULL, flags);
+        if (ret < 0) {
+            goto fail;
+        }
+        bytes -= aligned_bytes;
+        offset += aligned_bytes;
+    }
+
+    assert(!bytes || (offset & (align - 1)) == 0);
+    if (bytes) {
+        assert(align == tail_padding_bytes + bytes);
+        /* RMW the unaligned part after tail. */
+        mark_request_serialising(req, align);
+        wait_serialising_requests(req);
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
+        ret = bdrv_aligned_preadv(bs, req, offset, align,
+                                  align, &local_qiov, 0);
+        if (ret < 0) {
+            goto fail;
+        }
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+
+        memset(buf, 0, bytes);
+        ret = bdrv_aligned_pwritev(bs, req, offset, align,
+                                   &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
+    }
+fail:
+    qemu_vfree(buf);
+    return ret;
+
+}
+
 /*
  * Handle a write request in coroutine context
  */
@@ -1194,7 +1275,8 @@
     BdrvRequestFlags flags)
 {
     BdrvTrackedRequest req;
-    uint64_t align = bdrv_get_align(bs);
+    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
+    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
     uint8_t *head_buf = NULL;
     uint8_t *tail_buf = NULL;
     QEMUIOVector local_qiov;
@@ -1205,7 +1287,7 @@
         return -ENOMEDIUM;
     }
     if (bs->read_only) {
-        return -EACCES;
+        return -EPERM;
     }
 
     ret = bdrv_check_byte_request(bs, offset, bytes);
@@ -1225,6 +1307,11 @@
      */
     tracked_request_begin(&req, bs, offset, bytes, true);
 
+    if (!qiov) {
+        ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
+        goto out;
+    }
+
     if (offset & (align - 1)) {
         QEMUIOVector head_qiov;
         struct iovec head_iov;
@@ -1293,23 +1380,19 @@
         bytes = ROUND_UP(bytes, align);
     }
 
-    if (use_local_qiov) {
-        /* Local buffer may have non-zero data. */
-        flags &= ~BDRV_REQ_ZERO_WRITE;
-    }
     ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
                                use_local_qiov ? &local_qiov : qiov,
                                flags);
 
 fail:
-    tracked_request_end(&req);
 
     if (use_local_qiov) {
         qemu_iovec_destroy(&local_qiov);
     }
     qemu_vfree(head_buf);
     qemu_vfree(tail_buf);
-
+out:
+    tracked_request_end(&req);
     return ret;
 }
 
@@ -1337,32 +1420,14 @@
                                       int64_t sector_num, int nb_sectors,
                                       BdrvRequestFlags flags)
 {
-    int ret;
-
     trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
 
     if (!(bs->open_flags & BDRV_O_UNMAP)) {
         flags &= ~BDRV_REQ_MAY_UNMAP;
     }
-    if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
-                            nb_sectors << BDRV_SECTOR_BITS)) {
-        ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
-                                BDRV_REQ_ZERO_WRITE | flags);
-    } else {
-        uint8_t *buf;
-        QEMUIOVector local_qiov;
-        size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
 
-        buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
-        memset(buf, 0, bytes);
-        qemu_iovec_init(&local_qiov, 1);
-        qemu_iovec_add(&local_qiov, buf, bytes);
-
-        ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
-                                BDRV_REQ_ZERO_WRITE | flags);
-        qemu_vfree(buf);
-    }
-    return ret;
+    return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
+                             BDRV_REQ_ZERO_WRITE | flags);
 }
 
 int bdrv_flush_all(void)
@@ -1456,9 +1521,7 @@
 
     if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
         ret |= BDRV_BLOCK_ALLOCATED;
-    }
-
-    if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
+    } else {
         if (bdrv_unallocated_blocks_are_zero(bs)) {
             ret |= BDRV_BLOCK_ZERO;
         } else if (bs->backing_hd) {
@@ -2340,7 +2403,7 @@
     if (ret < 0) {
         return ret;
     } else if (bs->read_only) {
-        return -EROFS;
+        return -EPERM;
     }
 
     bdrv_reset_dirty(bs, sector_num, nb_sectors);
@@ -2489,7 +2552,7 @@
 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
 {
     int i;
-    size_t alignment = bdrv_opt_mem_align(bs);
+    size_t alignment = bdrv_min_mem_align(bs);
 
     for (i = 0; i < qiov->niov; i++) {
         if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
diff --git a/block/parallels.c b/block/parallels.c
index 4f9cd8d..046b568 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -2,8 +2,12 @@
  * Block driver for Parallels disk image format
  *
  * Copyright (c) 2007 Alex Beregszaszi
+ * Copyright (c) 2015 Denis V. Lunev <den@openvz.org>
  *
- * This code is based on comparing different disk images created by Parallels.
+ * This code was originally based on comparing different disk images created
+ * by Parallels. Currently it is based on opened OpenVZ sources
+ * available at
+ *     http://git.openvz.org/?p=ploop;a=summary
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -26,63 +30,539 @@
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qemu/bitmap.h"
+#include "qapi/util.h"
 
 /**************************************************************/
 
 #define HEADER_MAGIC "WithoutFreeSpace"
 #define HEADER_MAGIC2 "WithouFreSpacExt"
 #define HEADER_VERSION 2
-#define HEADER_SIZE 64
+#define HEADER_INUSE_MAGIC  (0x746F6E59)
+
+#define DEFAULT_CLUSTER_SIZE 1048576        /* 1 MiB */
+
 
 // always little-endian
-struct parallels_header {
+typedef struct ParallelsHeader {
     char magic[16]; // "WithoutFreeSpace"
     uint32_t version;
     uint32_t heads;
     uint32_t cylinders;
     uint32_t tracks;
-    uint32_t catalog_entries;
+    uint32_t bat_entries;
     uint64_t nb_sectors;
     uint32_t inuse;
     uint32_t data_off;
     char padding[12];
-} QEMU_PACKED;
+} QEMU_PACKED ParallelsHeader;
+
+
+typedef enum ParallelsPreallocMode {
+    PRL_PREALLOC_MODE_FALLOCATE = 0,
+    PRL_PREALLOC_MODE_TRUNCATE = 1,
+    PRL_PREALLOC_MODE_MAX = 2,
+} ParallelsPreallocMode;
+
+static const char *prealloc_mode_lookup[] = {
+    "falloc",
+    "truncate",
+    NULL,
+};
+
 
 typedef struct BDRVParallelsState {
+    /** Locking is conservative, the lock protects
+     *   - image file extending (truncate, fallocate)
+     *   - any access to block allocation table
+     */
     CoMutex lock;
 
-    uint32_t *catalog_bitmap;
-    unsigned int catalog_size;
+    ParallelsHeader *header;
+    uint32_t header_size;
+    bool header_unclean;
+
+    unsigned long *bat_dirty_bmap;
+    unsigned int  bat_dirty_block;
+
+    uint32_t *bat_bitmap;
+    unsigned int bat_size;
+
+    int64_t  data_end;
+    uint64_t prealloc_size;
+    ParallelsPreallocMode prealloc_mode;
 
     unsigned int tracks;
 
     unsigned int off_multiplier;
 } BDRVParallelsState;
 
-static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename)
+
+#define PARALLELS_OPT_PREALLOC_MODE     "prealloc-mode"
+#define PARALLELS_OPT_PREALLOC_SIZE     "prealloc-size"
+
+static QemuOptsList parallels_runtime_opts = {
+    .name = "parallels",
+    .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head),
+    .desc = {
+        {
+            .name = PARALLELS_OPT_PREALLOC_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Preallocation size on image expansion",
+            .def_value_str = "128MiB",
+        },
+        {
+            .name = PARALLELS_OPT_PREALLOC_MODE,
+            .type = QEMU_OPT_STRING,
+            .help = "Preallocation mode on image expansion "
+                    "(allowed values: falloc, truncate)",
+            .def_value_str = "falloc",
+        },
+        { /* end of list */ },
+    },
+};
+
+
+static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx)
 {
-    const struct parallels_header *ph = (const void *)buf;
+    return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier;
+}
 
-    if (buf_size < HEADER_SIZE)
+static uint32_t bat_entry_off(uint32_t idx)
+{
+    return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx;
+}
+
+static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num)
+{
+    uint32_t index, offset;
+
+    index = sector_num / s->tracks;
+    offset = sector_num % s->tracks;
+
+    /* not allocated */
+    if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) {
+        return -1;
+    }
+    return bat2sect(s, index) + offset;
+}
+
+static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
+        int nb_sectors)
+{
+    int ret = s->tracks - sector_num % s->tracks;
+    return MIN(nb_sectors, ret);
+}
+
+static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
+                            int nb_sectors, int *pnum)
+{
+    int64_t start_off = -2, prev_end_off = -2;
+
+    *pnum = 0;
+    while (nb_sectors > 0 || start_off == -2) {
+        int64_t offset = seek_to_sector(s, sector_num);
+        int to_end;
+
+        if (start_off == -2) {
+            start_off = offset;
+            prev_end_off = offset;
+        } else if (offset != prev_end_off) {
+            break;
+        }
+
+        to_end = cluster_remainder(s, sector_num, nb_sectors);
+        nb_sectors -= to_end;
+        sector_num += to_end;
+        *pnum += to_end;
+
+        if (offset > 0) {
+            prev_end_off += to_end;
+        }
+    }
+    return start_off;
+}
+
+static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
+                                 int nb_sectors, int *pnum)
+{
+    BDRVParallelsState *s = bs->opaque;
+    uint32_t idx, to_allocate, i;
+    int64_t pos, space;
+
+    pos = block_status(s, sector_num, nb_sectors, pnum);
+    if (pos > 0) {
+        return pos;
+    }
+
+    idx = sector_num / s->tracks;
+    if (idx >= s->bat_size) {
+        return -EINVAL;
+    }
+
+    to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx;
+    space = to_allocate * s->tracks;
+    if (s->data_end + space > bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS) {
+        int ret;
+        space += s->prealloc_size;
+        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
+            ret = bdrv_write_zeroes(bs->file, s->data_end, space, 0);
+        } else {
+            ret = bdrv_truncate(bs->file,
+                                (s->data_end + space) << BDRV_SECTOR_BITS);
+        }
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    for (i = 0; i < to_allocate; i++) {
+        s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
+        s->data_end += s->tracks;
+        bitmap_set(s->bat_dirty_bmap,
+                   bat_entry_off(idx) / s->bat_dirty_block, 1);
+    }
+
+    return bat2sect(s, idx) + sector_num % s->tracks;
+}
+
+
+static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
+{
+    BDRVParallelsState *s = bs->opaque;
+    unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block);
+    unsigned long bit;
+
+    qemu_co_mutex_lock(&s->lock);
+
+    bit = find_first_bit(s->bat_dirty_bmap, size);
+    while (bit < size) {
+        uint32_t off = bit * s->bat_dirty_block;
+        uint32_t to_write = s->bat_dirty_block;
+        int ret;
+
+        if (off + to_write > s->header_size) {
+            to_write = s->header_size - off;
+        }
+        ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off, to_write);
+        if (ret < 0) {
+            qemu_co_mutex_unlock(&s->lock);
+            return ret;
+        }
+        bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1);
+    }
+    bitmap_zero(s->bat_dirty_bmap, size);
+
+    qemu_co_mutex_unlock(&s->lock);
+    return 0;
+}
+
+
+static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, int *pnum)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int64_t offset;
+
+    qemu_co_mutex_lock(&s->lock);
+    offset = block_status(s, sector_num, nb_sectors, pnum);
+    qemu_co_mutex_unlock(&s->lock);
+
+    if (offset < 0) {
         return 0;
+    }
 
-    if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
-        !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
-        (le32_to_cpu(ph->version) == HEADER_VERSION))
-        return 100;
+    return (offset << BDRV_SECTOR_BITS) |
+        BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+}
+
+static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+    BDRVParallelsState *s = bs->opaque;
+    uint64_t bytes_done = 0;
+    QEMUIOVector hd_qiov;
+    int ret = 0;
+
+    qemu_iovec_init(&hd_qiov, qiov->niov);
+
+    while (nb_sectors > 0) {
+        int64_t position;
+        int n, nbytes;
+
+        qemu_co_mutex_lock(&s->lock);
+        position = allocate_clusters(bs, sector_num, nb_sectors, &n);
+        qemu_co_mutex_unlock(&s->lock);
+        if (position < 0) {
+            ret = (int)position;
+            break;
+        }
+
+        nbytes = n << BDRV_SECTOR_BITS;
+
+        qemu_iovec_reset(&hd_qiov);
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
+
+        ret = bdrv_co_writev(bs->file, position, n, &hd_qiov);
+        if (ret < 0) {
+            break;
+        }
+
+        nb_sectors -= n;
+        sector_num += n;
+        bytes_done += nbytes;
+    }
+
+    qemu_iovec_destroy(&hd_qiov);
+    return ret;
+}
+
+static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+    BDRVParallelsState *s = bs->opaque;
+    uint64_t bytes_done = 0;
+    QEMUIOVector hd_qiov;
+    int ret = 0;
+
+    qemu_iovec_init(&hd_qiov, qiov->niov);
+
+    while (nb_sectors > 0) {
+        int64_t position;
+        int n, nbytes;
+
+        qemu_co_mutex_lock(&s->lock);
+        position = block_status(s, sector_num, nb_sectors, &n);
+        qemu_co_mutex_unlock(&s->lock);
+
+        nbytes = n << BDRV_SECTOR_BITS;
+
+        if (position < 0) {
+            qemu_iovec_memset(qiov, bytes_done, 0, nbytes);
+        } else {
+            qemu_iovec_reset(&hd_qiov);
+            qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
+
+            ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
+            if (ret < 0) {
+                break;
+            }
+        }
+
+        nb_sectors -= n;
+        sector_num += n;
+        bytes_done += nbytes;
+    }
+
+    qemu_iovec_destroy(&hd_qiov);
+    return ret;
+}
+
+
+static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
+                           BdrvCheckMode fix)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int64_t size, prev_off, high_off;
+    int ret;
+    uint32_t i;
+    bool flush_bat = false;
+    int cluster_size = s->tracks << BDRV_SECTOR_BITS;
+
+    size = bdrv_getlength(bs->file);
+    if (size < 0) {
+        res->check_errors++;
+        return size;
+    }
+
+    if (s->header_unclean) {
+        fprintf(stderr, "%s image was not closed correctly\n",
+                fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
+        res->corruptions++;
+        if (fix & BDRV_FIX_ERRORS) {
+            /* parallels_close will do the job right */
+            res->corruptions_fixed++;
+            s->header_unclean = false;
+        }
+    }
+
+    res->bfi.total_clusters = s->bat_size;
+    res->bfi.compressed_clusters = 0; /* compression is not supported */
+
+    high_off = 0;
+    prev_off = 0;
+    for (i = 0; i < s->bat_size; i++) {
+        int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+        if (off == 0) {
+            prev_off = 0;
+            continue;
+        }
+
+        /* cluster outside the image */
+        if (off > size) {
+            fprintf(stderr, "%s cluster %u is outside image\n",
+                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
+            res->corruptions++;
+            if (fix & BDRV_FIX_ERRORS) {
+                prev_off = 0;
+                s->bat_bitmap[i] = 0;
+                res->corruptions_fixed++;
+                flush_bat = true;
+                continue;
+            }
+        }
+
+        res->bfi.allocated_clusters++;
+        if (off > high_off) {
+            high_off = off;
+        }
+
+        if (prev_off != 0 && (prev_off + cluster_size) != off) {
+            res->bfi.fragmented_clusters++;
+        }
+        prev_off = off;
+    }
+
+    if (flush_bat) {
+        ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
+        if (ret < 0) {
+            res->check_errors++;
+            return ret;
+        }
+    }
+
+    res->image_end_offset = high_off + cluster_size;
+    if (size > res->image_end_offset) {
+        int64_t count;
+        count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size);
+        fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
+                fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
+                size - res->image_end_offset);
+        res->leaks += count;
+        if (fix & BDRV_FIX_LEAKS) {
+            ret = bdrv_truncate(bs->file, res->image_end_offset);
+            if (ret < 0) {
+                res->check_errors++;
+                return ret;
+            }
+            res->leaks_fixed += count;
+        }
+    }
 
     return 0;
 }
 
+
+static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
+{
+    int64_t total_size, cl_size;
+    uint8_t tmp[BDRV_SECTOR_SIZE];
+    Error *local_err = NULL;
+    BlockDriverState *file;
+    uint32_t bat_entries, bat_sectors;
+    ParallelsHeader header;
+    int ret;
+
+    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                          BDRV_SECTOR_SIZE);
+    cl_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
+                          DEFAULT_CLUSTER_SIZE), BDRV_SECTOR_SIZE);
+
+    ret = bdrv_create_file(filename, opts, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        return ret;
+    }
+
+    file = NULL;
+    ret = bdrv_open(&file, filename, NULL, NULL,
+                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        return ret;
+    }
+    ret = bdrv_truncate(file, 0);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    bat_entries = DIV_ROUND_UP(total_size, cl_size);
+    bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size);
+    bat_sectors = (bat_sectors *  cl_size) >> BDRV_SECTOR_BITS;
+
+    memset(&header, 0, sizeof(header));
+    memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic));
+    header.version = cpu_to_le32(HEADER_VERSION);
+    /* don't care much about geometry, it is not used on image level */
+    header.heads = cpu_to_le32(16);
+    header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE / 16 / 32);
+    header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS);
+    header.bat_entries = cpu_to_le32(bat_entries);
+    header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE));
+    header.data_off = cpu_to_le32(bat_sectors);
+
+    /* write all the data */
+    memset(tmp, 0, sizeof(tmp));
+    memcpy(tmp, &header, sizeof(header));
+
+    ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
+    if (ret < 0) {
+        goto exit;
+    }
+    ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0);
+    if (ret < 0) {
+        goto exit;
+    }
+    ret = 0;
+
+done:
+    bdrv_unref(file);
+    return ret;
+
+exit:
+    error_setg_errno(errp, -ret, "Failed to create Parallels image");
+    goto done;
+}
+
+
+static int parallels_probe(const uint8_t *buf, int buf_size,
+                           const char *filename)
+{
+    const ParallelsHeader *ph = (const void *)buf;
+
+    if (buf_size < sizeof(ParallelsHeader)) {
+        return 0;
+    }
+
+    if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
+           !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
+           (le32_to_cpu(ph->version) == HEADER_VERSION)) {
+        return 100;
+    }
+
+    return 0;
+}
+
+static int parallels_update_header(BlockDriverState *bs)
+{
+    BDRVParallelsState *s = bs->opaque;
+    unsigned size = MAX(bdrv_opt_mem_align(bs->file), sizeof(ParallelsHeader));
+
+    if (size > s->header_size) {
+        size = s->header_size;
+    }
+    return bdrv_pwrite_sync(bs->file, 0, s->header, size);
+}
+
 static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
                           Error **errp)
 {
     BDRVParallelsState *s = bs->opaque;
-    int i;
-    struct parallels_header ph;
-    int ret;
-
-    bs->read_only = 1; // no write support yet
+    ParallelsHeader ph;
+    int ret, size, i;
+    QemuOpts *opts = NULL;
+    Error *local_err = NULL;
+    char *buf;
 
     ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
     if (ret < 0) {
@@ -115,25 +595,90 @@
         goto fail;
     }
 
-    s->catalog_size = le32_to_cpu(ph.catalog_entries);
-    if (s->catalog_size > INT_MAX / 4) {
+    s->bat_size = le32_to_cpu(ph.bat_entries);
+    if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
         error_setg(errp, "Catalog too large");
         ret = -EFBIG;
         goto fail;
     }
-    s->catalog_bitmap = g_try_new(uint32_t, s->catalog_size);
-    if (s->catalog_size && s->catalog_bitmap == NULL) {
+
+    size = bat_entry_off(s->bat_size);
+    s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file));
+    s->header = qemu_try_blockalign(bs->file, s->header_size);
+    if (s->header == NULL) {
         ret = -ENOMEM;
         goto fail;
     }
+    s->data_end = le32_to_cpu(ph.data_off);
+    if (s->data_end == 0) {
+        s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
+    }
+    if (s->data_end < s->header_size) {
+        /* there is not enough unused space to fit to block align between BAT
+           and actual data. We can't avoid read-modify-write... */
+        s->header_size = size;
+    }
 
-    ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4);
+    ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
     if (ret < 0) {
         goto fail;
     }
+    s->bat_bitmap = (uint32_t *)(s->header + 1);
 
-    for (i = 0; i < s->catalog_size; i++)
-        le32_to_cpus(&s->catalog_bitmap[i]);
+    for (i = 0; i < s->bat_size; i++) {
+        int64_t off = bat2sect(s, i);
+        if (off >= s->data_end) {
+            s->data_end = off + s->tracks;
+        }
+    }
+
+    if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
+        /* Image was not closed correctly. The check is mandatory */
+        s->header_unclean = true;
+        if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
+            error_setg(errp, "parallels: Image was not closed correctly; "
+                       "cannot be opened read/write");
+            ret = -EACCES;
+            goto fail;
+        }
+    }
+
+    opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, &local_err);
+    if (local_err != NULL) {
+        goto fail_options;
+    }
+
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err != NULL) {
+        goto fail_options;
+    }
+
+    s->prealloc_size =
+        qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
+    s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
+    buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
+    s->prealloc_mode = qapi_enum_parse(prealloc_mode_lookup, buf,
+            PRL_PREALLOC_MODE_MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err);
+    g_free(buf);
+    if (local_err != NULL) {
+        goto fail_options;
+    }
+    if (!bdrv_has_zero_init(bs->file) ||
+            bdrv_truncate(bs->file, bdrv_getlength(bs->file)) != 0) {
+        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
+    }
+
+    if (flags & BDRV_O_RDWR) {
+        s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
+        ret = parallels_update_header(bs);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+
+    s->bat_dirty_block = 4 * getpagesize();
+    s->bat_dirty_bmap =
+        bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
 
     qemu_co_mutex_init(&s->lock);
     return 0;
@@ -142,67 +687,67 @@
     error_setg(errp, "Image not in Parallels format");
     ret = -EINVAL;
 fail:
-    g_free(s->catalog_bitmap);
+    qemu_vfree(s->header);
     return ret;
+
+fail_options:
+    error_propagate(errp, local_err);
+    ret = -EINVAL;
+    goto fail;
 }
 
-static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
-{
-    BDRVParallelsState *s = bs->opaque;
-    uint32_t index, offset;
-
-    index = sector_num / s->tracks;
-    offset = sector_num % s->tracks;
-
-    /* not allocated */
-    if ((index >= s->catalog_size) || (s->catalog_bitmap[index] == 0))
-        return -1;
-    return
-        ((uint64_t)s->catalog_bitmap[index] * s->off_multiplier + offset) * 512;
-}
-
-static int parallels_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    while (nb_sectors > 0) {
-        int64_t position = seek_to_sector(bs, sector_num);
-        if (position >= 0) {
-            if (bdrv_pread(bs->file, position, buf, 512) != 512)
-                return -1;
-        } else {
-            memset(buf, 0, 512);
-        }
-        nb_sectors--;
-        sector_num++;
-        buf += 512;
-    }
-    return 0;
-}
-
-static coroutine_fn int parallels_co_read(BlockDriverState *bs, int64_t sector_num,
-                                          uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    BDRVParallelsState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    ret = parallels_read(bs, sector_num, buf, nb_sectors);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
 
 static void parallels_close(BlockDriverState *bs)
 {
     BDRVParallelsState *s = bs->opaque;
-    g_free(s->catalog_bitmap);
+
+    if (bs->open_flags & BDRV_O_RDWR) {
+        s->header->inuse = 0;
+        parallels_update_header(bs);
+    }
+
+    if (bs->open_flags & BDRV_O_RDWR) {
+        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
+    }
+
+    g_free(s->bat_dirty_bmap);
+    qemu_vfree(s->header);
 }
 
+static QemuOptsList parallels_create_opts = {
+    .name = "parallels-create-opts",
+    .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head),
+    .desc = {
+        {
+            .name = BLOCK_OPT_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Virtual disk size",
+        },
+        {
+            .name = BLOCK_OPT_CLUSTER_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Parallels image cluster size",
+            .def_value_str = stringify(DEFAULT_CLUSTER_SIZE),
+        },
+        { /* end of list */ }
+    }
+};
+
 static BlockDriver bdrv_parallels = {
     .format_name	= "parallels",
     .instance_size	= sizeof(BDRVParallelsState),
     .bdrv_probe		= parallels_probe,
     .bdrv_open		= parallels_open,
-    .bdrv_read          = parallels_co_read,
     .bdrv_close		= parallels_close,
+    .bdrv_co_get_block_status = parallels_co_get_block_status,
+    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
+    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
+    .bdrv_co_readv  = parallels_co_readv,
+    .bdrv_co_writev = parallels_co_writev,
+
+    .bdrv_create    = parallels_create,
+    .bdrv_check     = parallels_check,
+    .create_opts    = &parallels_create_opts,
 };
 
 static void bdrv_parallels_init(void)
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 24d8582..2990e95 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -301,6 +301,7 @@
 {
     BDRVRawState *s = bs->opaque;
     char *buf;
+    size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
 
     /* For /dev/sg devices the alignment is not really used.
        With buffered I/O, we don't have any restrictions. */
@@ -330,9 +331,9 @@
     /* If we could not get the sizes so far, we can only guess them */
     if (!s->buf_align) {
         size_t align;
-        buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
-        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
-            if (raw_is_io_aligned(fd, buf + align, MAX_BLOCKSIZE)) {
+        buf = qemu_memalign(max_align, 2 * max_align);
+        for (align = 512; align <= max_align; align <<= 1) {
+            if (raw_is_io_aligned(fd, buf + align, max_align)) {
                 s->buf_align = align;
                 break;
             }
@@ -342,8 +343,8 @@
 
     if (!bs->request_alignment) {
         size_t align;
-        buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
-        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+        buf = qemu_memalign(s->buf_align, max_align);
+        for (align = 512; align <= max_align; align <<= 1) {
             if (raw_is_io_aligned(fd, buf, align)) {
                 bs->request_alignment = align;
                 break;
@@ -725,7 +726,8 @@
     BDRVRawState *s = bs->opaque;
 
     raw_probe_alignment(bs, s->fd, errp);
-    bs->bl.opt_mem_alignment = s->buf_align;
+    bs->bl.min_mem_alignment = s->buf_align;
+    bs->bl.opt_mem_alignment = MAX(s->buf_align, getpagesize());
 }
 
 static int check_for_dasd(int fd)
diff --git a/configure b/configure
index 1f0f485..f758f32 100755
--- a/configure
+++ b/configure
@@ -103,7 +103,8 @@
 }
 
 compile_object() {
-  do_cc $QEMU_CFLAGS -c -o $TMPO $TMPC
+  local_cflags="$1"
+  do_cc $QEMU_CFLAGS $local_cflags -c -o $TMPO $TMPC
 }
 
 compile_prog() {
@@ -436,6 +437,12 @@
   compile_object
 }
 
+write_c_skeleton() {
+    cat > $TMPC <<EOF
+int main(void) { return 0; }
+EOF
+}
+
 if check_define __linux__ ; then
   targetos="Linux"
 elif check_define _WIN32 ; then
@@ -705,9 +712,7 @@
   # enable C99/POSIX format strings (needs mingw32-runtime 3.15 or later)
   QEMU_CFLAGS="-D__USE_MINGW_ANSI_STDIO=1 $QEMU_CFLAGS"
   LIBS="-lwinmm -lws2_32 -liphlpapi $LIBS"
-cat > $TMPC << EOF
-int main(void) { return 0; }
-EOF
+  write_c_skeleton;
   if compile_prog "" "-liberty" ; then
     LIBS="-liberty $LIBS"
   fi
@@ -1445,10 +1450,7 @@
 fi
 
 # check that the C compiler works.
-cat > $TMPC <<EOF
-int main(void) { return 0; }
-EOF
-
+write_c_skeleton;
 if compile_object ; then
   : C compiler works ok
 else
@@ -1496,16 +1498,20 @@
 # enable it for all configure tests. If a configure test failed due
 # to -Werror this would just silently disable some features,
 # so it's too error prone.
-cat > $TMPC << EOF
-int main(void) { return 0; }
-EOF
-for flag in $gcc_flags; do
+
+cc_has_warning_flag() {
+    write_c_skeleton;
+
     # Use the positive sense of the flag when testing for -Wno-wombat
     # support (gcc will happily accept the -Wno- form of unknown
     # warning options).
-    optflag="$(echo $flag | sed -e 's/^-Wno-/-W/')"
-    if compile_prog "-Werror $optflag" "" ; then
-	QEMU_CFLAGS="$QEMU_CFLAGS $flag"
+    optflag="$(echo $1 | sed -e 's/^-Wno-/-W/')"
+    compile_prog "-Werror $optflag" ""
+}
+
+for flag in $gcc_flags; do
+    if cc_has_warning_flag $flag ; then
+        QEMU_CFLAGS="$QEMU_CFLAGS $flag"
     fi
 done
 
@@ -1607,7 +1613,7 @@
     fi
   fi
 
-  if compile_prog "-fno-pie" "-nopie"; then
+  if compile_prog "-Werror -fno-pie" "-nopie"; then
     CFLAGS_NOPIE="-fno-pie"
     LDFLAGS_NOPIE="-nopie"
   fi
@@ -2802,6 +2808,18 @@
     glib_subprocess=no
 fi
 
+# Silence clang 3.5.0 warnings about glib attribute __alloc_size__ usage
+cat > $TMPC << EOF
+#include <glib.h>
+int main(void) { return 0; }
+EOF
+if ! compile_prog "$glib_cflags -Werror" "$glib_libs" ; then
+    if cc_has_warning_flag "-Wno-unknown-attributes"; then
+        glib_cflags="-Wno-unknown-attributes $glib_cflags"
+        CFLAGS="-Wno-unknown-attributes $CFLAGS"
+    fi
+fi
+
 ##########################################
 # SHA command probe for modules
 if test "$modules" = yes; then
@@ -4192,6 +4210,33 @@
     getauxval=yes
 fi
 
+########################################
+# check if ccache is interfering with
+# semantic analysis of macros
+
+ccache_cpp2=no
+cat > $TMPC << EOF
+static const int Z = 1;
+#define fn() ({ Z; })
+#define TAUT(X) ((X) == Z)
+#define PAREN(X, Y) (X == Y)
+#define ID(X) (X)
+int main(int argc, char *argv[])
+{
+    int x = 0, y = 0;
+    x = ID(x);
+    x = fn();
+    fn();
+    if (PAREN(x, y)) return 0;
+    if (TAUT(Z)) return 0;
+    return 0;
+}
+EOF
+
+if ! compile_object "-Werror"; then
+    ccache_cpp2=yes
+fi
+
 ##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
@@ -5485,6 +5530,10 @@
   echo "CONFIG_NUMA=y" >> $config_host_mak
 fi
 
+if test "$ccache_cpp2" = "yes"; then
+  echo "export CCACHE_CPP2=y" >> $config_host_mak
+fi
+
 # build tree in object directory in case the source is not in the current directory
 DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests"
 DIRS="$DIRS fsdev"
diff --git a/include/block/block.h b/include/block/block.h
index 7d1a717..c1c963e 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -440,6 +440,8 @@
 
 /* Returns the alignment in bytes that is required so that no bounce buffer
  * is required throughout the stack */
+size_t bdrv_min_mem_align(BlockDriverState *bs);
+/* Returns optimal alignment in bytes for bounce buffer */
 size_t bdrv_opt_mem_align(BlockDriverState *bs);
 void bdrv_set_guest_block_size(BlockDriverState *bs, int align);
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index db29b74..f004378 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -313,6 +313,9 @@
     int max_transfer_length;
 
     /* memory alignment so that no bounce buffer is needed */
+    size_t min_mem_alignment;
+
+    /* memory alignment for bounce buffer */
     size_t opt_mem_alignment;
 } BlockLimits;
 
diff --git a/tests/qemu-iotests/033 b/tests/qemu-iotests/033
index 4008f10..a61d8ce 100755
--- a/tests/qemu-iotests/033
+++ b/tests/qemu-iotests/033
@@ -78,6 +78,19 @@
 	echo
 	echo "== verifying patterns (2) =="
 	do_test $align "read -P 0x0 0x400 0x20000" "$TEST_IMG" | _filter_qemu_io
+
+	echo
+	echo "== rewriting unaligned zeroes =="
+	do_test $align "write -P 0xb 0x0 0x1000" "$TEST_IMG" | _filter_qemu_io
+	do_test $align "write -z 0x200 0x200" "$TEST_IMG" | _filter_qemu_io
+
+	echo
+	echo "== verifying patterns (3) =="
+	do_test $align "read -P 0xb 0x0 0x200" "$TEST_IMG" | _filter_qemu_io
+	do_test $align "read -P 0x0 0x200 0x200" "$TEST_IMG" | _filter_qemu_io
+	do_test $align "read -P 0xb 0x400 0xc00" "$TEST_IMG" | _filter_qemu_io
+
+	echo
 done
 
 # success, all done
diff --git a/tests/qemu-iotests/033.out b/tests/qemu-iotests/033.out
index 305949f..c3d18aa 100644
--- a/tests/qemu-iotests/033.out
+++ b/tests/qemu-iotests/033.out
@@ -27,6 +27,21 @@
 read 131072/131072 bytes at offset 1024
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
+== rewriting unaligned zeroes ==
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verifying patterns (3) ==
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 3072/3072 bytes at offset 1024
+3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+
 == preparing image ==
 wrote 1024/1024 bytes at offset 512
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -52,4 +67,19 @@
 == verifying patterns (2) ==
 read 131072/131072 bytes at offset 1024
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== rewriting unaligned zeroes ==
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verifying patterns (3) ==
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 3072/3072 bytes at offset 1024
+3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
 *** done
diff --git a/tests/qemu-iotests/076 b/tests/qemu-iotests/076
index ed2be35..c9b55a9 100755
--- a/tests/qemu-iotests/076
+++ b/tests/qemu-iotests/076
@@ -49,31 +49,36 @@
 echo
 echo "== Read from a valid v1 image =="
 _use_sample_img parallels-v1.bz2
-{ $QEMU_IO -c "read -P 0x11 0 64k" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0x11 0 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
 
 echo
 echo "== Negative catalog size =="
 _use_sample_img parallels-v1.bz2
 poke_file "$TEST_IMG" "$catalog_entries_offset" "\xff\xff\xff\xff"
-{ $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read 0 512" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
 
 echo
 echo "== Overflow in catalog allocation =="
 _use_sample_img parallels-v1.bz2
 poke_file "$TEST_IMG" "$nb_sectors_offset" "\xff\xff\xff\xff"
 poke_file "$TEST_IMG" "$catalog_entries_offset" "\x01\x00\x00\x40"
-{ $QEMU_IO -c "read 64M 64M" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read 64M 64M" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
 
 echo
 echo "== Zero sectors per track =="
 _use_sample_img parallels-v1.bz2
 poke_file "$TEST_IMG" "$tracks_offset" "\x00\x00\x00\x00"
-{ $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read 0 512" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
 
 echo
 echo "== Read from a valid v2 image =="
 _use_sample_img parallels-v2.bz2
-{ $QEMU_IO -c "read -P 0x11 0 64k" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0x11 0 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "write -P 0x21 1024k 1k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "write -P 0x22 1025k 1k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0x21 1024k 1k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0x22 1025k 1k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0 1026k 62k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
 
 # success, all done
 echo "*** done"
diff --git a/tests/qemu-iotests/076.out b/tests/qemu-iotests/076.out
index 32ade08..b0000ae 100644
--- a/tests/qemu-iotests/076.out
+++ b/tests/qemu-iotests/076.out
@@ -19,4 +19,14 @@
 == Read from a valid v2 image ==
 read 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1024/1024 bytes at offset 1048576
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1024/1024 bytes at offset 1049600
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 1048576
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 1049600
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 63488/63488 bytes at offset 1050624
+62 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131
new file mode 100755
index 0000000..4873f40
--- /dev/null
+++ b/tests/qemu-iotests/131
@@ -0,0 +1,77 @@
+#!/bin/bash
+#
+# parallels format validation tests (created by QEMU)
+#
+# Copyright (C) 2014 Denis V. Lunev <den@openvz.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=den@openvz.org
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt parallels
+_supported_proto file
+_supported_os Linux
+
+inuse_offset=$((0x2c))
+
+size=64M
+CLUSTER_SIZE=64k
+IMGFMT=parallels
+_make_test_img $size
+
+echo == read empty image ==
+{ $QEMU_IO -c "read -P 0 32k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == write more than 1 block in a row ==
+{ $QEMU_IO -c "write -P 0x11 32k 128k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == read less than block ==
+{ $QEMU_IO -c "read -P 0x11 32k 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == read exactly 1 block ==
+{ $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == read more than 1 block ==
+{ $QEMU_IO -c "read -P 0x11 32k 128k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == check that there is no trash after written ==
+{ $QEMU_IO -c "read -P 0 160k 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == check that there is no trash before written ==
+{ $QEMU_IO -c "read -P 0 0 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== Corrupt image =="
+poke_file "$TEST_IMG" "$inuse_offset" "\x59\x6e\x6f\x74"
+{ $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+_check_test_img
+_check_test_img -r all
+{ $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out
new file mode 100644
index 0000000..021a04c
--- /dev/null
+++ b/tests/qemu-iotests/131.out
@@ -0,0 +1,41 @@
+QA output created by 131
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+== read empty image ==
+read 65536/65536 bytes at offset 32768
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== write more than 1 block in a row ==
+wrote 131072/131072 bytes at offset 32768
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== read less than block ==
+read 32768/32768 bytes at offset 32768
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== read exactly 1 block ==
+read 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== read more than 1 block ==
+read 131072/131072 bytes at offset 32768
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check that there is no trash after written ==
+read 32768/32768 bytes at offset 163840
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check that there is no trash before written ==
+read 32768/32768 bytes at offset 0
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== Corrupt image ==
+qemu-io: can't open device TEST_DIR/t.parallels: parallels: Image was not closed correctly; cannot be opened read/write
+no file open, try 'help open'
+ERROR image was not closed correctly
+
+1 errors were found on the image.
+Data may be corrupted, or further writes to the image may corrupt it.
+Repairing image was not closed correctly
+The following inconsistencies were found and repaired:
+
+    0 leaked clusters
+    1 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+read 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 6ca3466..34b16cb 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -128,3 +128,4 @@
 128 rw auto quick
 129 rw auto quick
 130 rw auto quick
+131 rw auto quick