Merge remote-tracking branch 'agraf/tags/signed-ppc-for-upstream-1.7' into staging

Patch queue for ppc - 2013-11-08

These are two patches that will hopefully make it into 1.7. The SLOF update
fixes -append kernel command line argument passing into the guest kernel. The
other patch makes VIO devices appear when using -device '?'.

# gpg: Signature made Thu 07 Nov 2013 07:34:54 PM PST using RSA key ID 03FEDC60
# gpg: Can't check signature: public key not found

# By Alexey Kardashevskiy
# Via Alexander Graf
* agraf/tags/signed-ppc-for-upstream-1.7:
  pseries: Update SLOF firmware image
  spapr: add vio-bus devices to categories

Message-id: 1383881766-13958-1-git-send-email-agraf@suse.de
Signed-off-by: Anthony Liguori <aliguori@amazon.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index 77edacf..02b85ee 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -699,6 +699,7 @@
 F: monitor.c
 F: hmp.c
 F: hmp-commands.hx
+T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
 
 Network device layer
 M: Anthony Liguori <aliguori@amazon.com>
@@ -720,6 +721,7 @@
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
 S: Supported
 F: qapi/
+T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
 
 QAPI Schema
 M: Eric Blake <eblake@redhat.com>
@@ -727,6 +729,7 @@
 M: Markus Armbruster <armbru@redhat.com>
 S: Supported
 F: qapi-schema.json
+T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
 
 QMP
 M: Luiz Capitulino <lcapitulino@redhat.com>
@@ -735,6 +738,7 @@
 F: monitor.c
 F: qmp-commands.hx
 F: QMP/
+T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
 
 SLIRP
 M: Jan Kiszka <jan.kiszka@siemens.com>
diff --git a/Makefile b/Makefile
index b15003f..073f18b 100644
--- a/Makefile
+++ b/Makefile
@@ -28,7 +28,14 @@
 include $(SRC_PATH)/rules.mak
 config-host.mak: $(SRC_PATH)/configure
 	@echo $@ is out-of-date, running configure
-	@sed -n "/.*Configured with/s/[^:]*: //p" $@ | sh
+	@# TODO: The next lines include code which supports a smooth
+	@# transition from old configurations without config.status.
+	@# This code can be removed after QEMU 1.7.
+	@if test -x config.status; then \
+	    ./config.status; \
+        else \
+	    sed -n "/.*Configured with/s/[^:]*: //p" $@ | sh; \
+	fi
 else
 config-host.mak:
 ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
diff --git a/audio/ossaudio.c b/audio/ossaudio.c
index 3e04a58..5a73716 100644
--- a/audio/ossaudio.c
+++ b/audio/ossaudio.c
@@ -849,6 +849,10 @@
 
 static void *oss_audio_init (void)
 {
+    if (access(conf.devpath_in, R_OK | W_OK) < 0 ||
+        access(conf.devpath_out, R_OK | W_OK) < 0) {
+        return NULL;
+    }
     return &conf;
 }
 
@@ -932,7 +936,7 @@
     .init           = oss_audio_init,
     .fini           = oss_audio_fini,
     .pcm_ops        = &oss_pcm_ops,
-    .can_be_default = 0,
+    .can_be_default = 1,
     .max_voices_out = INT_MAX,
     .max_voices_in  = INT_MAX,
     .voice_size_out = sizeof (OSSVoiceOut),
diff --git a/block.c b/block.c
index 58efb5b..6d5c804 100644
--- a/block.c
+++ b/block.c
@@ -640,7 +640,7 @@
         if (length < 0) {
             return length;
         }
-        hint = length >> BDRV_SECTOR_BITS;
+        hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
     }
 
     bs->total_sectors = hint;
@@ -1084,8 +1084,8 @@
             snprintf(backing_filename, sizeof(backing_filename),
                      "%s", filename);
         } else if (!realpath(filename, backing_filename)) {
-            error_setg_errno(errp, errno, "Could not resolve path '%s'", filename);
             ret = -errno;
+            error_setg_errno(errp, errno, "Could not resolve path '%s'", filename);
             goto fail;
         }
 
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 3bb85b5..f43ecbc 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -2,7 +2,7 @@
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
-block-obj-y += vhdx.o
+block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
 block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
diff --git a/block/raw-posix.c b/block/raw-posix.c
index f6d48bb..ace5d96 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1842,7 +1842,8 @@
 #endif /* __linux__ */
 
 #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
-static int cdrom_open(BlockDriverState *bs, QDict *options, int flags)
+static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
+                      Error **errp)
 {
     BDRVRawState *s = bs->opaque;
     Error *local_err = NULL;
diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c
new file mode 100644
index 0000000..fe879ed
--- /dev/null
+++ b/block/vhdx-endian.c
@@ -0,0 +1,216 @@
+/*
+ * Block driver for Hyper-V VHDX Images
+ *
+ * Copyright (c) 2013 Red Hat, Inc.,
+ *
+ * Authors:
+ *  Jeff Cody <jcody@redhat.com>
+ *
+ *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
+ *  by Microsoft:
+ *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/vhdx.h"
+
+#include <uuid/uuid.h>
+
+
+/*
+ * All the VHDX formats on disk are little endian - the following
+ * are helper import/export functions to correctly convert
+ * endianness from disk read to native cpu format, and back again.
+ */
+
+
+/* VHDX File Header */
+
+
+void vhdx_header_le_import(VHDXHeader *h)
+{
+    assert(h != NULL);
+
+    le32_to_cpus(&h->signature);
+    le32_to_cpus(&h->checksum);
+    le64_to_cpus(&h->sequence_number);
+
+    leguid_to_cpus(&h->file_write_guid);
+    leguid_to_cpus(&h->data_write_guid);
+    leguid_to_cpus(&h->log_guid);
+
+    le16_to_cpus(&h->log_version);
+    le16_to_cpus(&h->version);
+    le32_to_cpus(&h->log_length);
+    le64_to_cpus(&h->log_offset);
+}
+
+void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h)
+{
+    assert(orig_h != NULL);
+    assert(new_h != NULL);
+
+    new_h->signature       = cpu_to_le32(orig_h->signature);
+    new_h->checksum        = cpu_to_le32(orig_h->checksum);
+    new_h->sequence_number = cpu_to_le64(orig_h->sequence_number);
+
+    new_h->file_write_guid = orig_h->file_write_guid;
+    new_h->data_write_guid = orig_h->data_write_guid;
+    new_h->log_guid        = orig_h->log_guid;
+
+    cpu_to_leguids(&new_h->file_write_guid);
+    cpu_to_leguids(&new_h->data_write_guid);
+    cpu_to_leguids(&new_h->log_guid);
+
+    new_h->log_version     = cpu_to_le16(orig_h->log_version);
+    new_h->version         = cpu_to_le16(orig_h->version);
+    new_h->log_length      = cpu_to_le32(orig_h->log_length);
+    new_h->log_offset      = cpu_to_le64(orig_h->log_offset);
+}
+
+
+/* VHDX Log Headers */
+
+
+void vhdx_log_desc_le_import(VHDXLogDescriptor *d)
+{
+    assert(d != NULL);
+
+    le32_to_cpus(&d->signature);
+    le32_to_cpus(&d->trailing_bytes);
+    le64_to_cpus(&d->leading_bytes);
+    le64_to_cpus(&d->file_offset);
+    le64_to_cpus(&d->sequence_number);
+}
+
+void vhdx_log_desc_le_export(VHDXLogDescriptor *d)
+{
+    assert(d != NULL);
+
+    cpu_to_le32s(&d->signature);
+    cpu_to_le32s(&d->trailing_bytes);
+    cpu_to_le64s(&d->leading_bytes);
+    cpu_to_le64s(&d->file_offset);
+    cpu_to_le64s(&d->sequence_number);
+}
+
+void vhdx_log_data_le_export(VHDXLogDataSector *d)
+{
+    assert(d != NULL);
+
+    cpu_to_le32s(&d->data_signature);
+    cpu_to_le32s(&d->sequence_high);
+    cpu_to_le32s(&d->sequence_low);
+}
+
+void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr)
+{
+    assert(hdr != NULL);
+
+    le32_to_cpus(&hdr->signature);
+    le32_to_cpus(&hdr->checksum);
+    le32_to_cpus(&hdr->entry_length);
+    le32_to_cpus(&hdr->tail);
+    le64_to_cpus(&hdr->sequence_number);
+    le32_to_cpus(&hdr->descriptor_count);
+    leguid_to_cpus(&hdr->log_guid);
+    le64_to_cpus(&hdr->flushed_file_offset);
+    le64_to_cpus(&hdr->last_file_offset);
+}
+
+void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr)
+{
+    assert(hdr != NULL);
+
+    cpu_to_le32s(&hdr->signature);
+    cpu_to_le32s(&hdr->checksum);
+    cpu_to_le32s(&hdr->entry_length);
+    cpu_to_le32s(&hdr->tail);
+    cpu_to_le64s(&hdr->sequence_number);
+    cpu_to_le32s(&hdr->descriptor_count);
+    cpu_to_leguids(&hdr->log_guid);
+    cpu_to_le64s(&hdr->flushed_file_offset);
+    cpu_to_le64s(&hdr->last_file_offset);
+}
+
+
+/* Region table entries */
+void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr)
+{
+    assert(hdr != NULL);
+
+    le32_to_cpus(&hdr->signature);
+    le32_to_cpus(&hdr->checksum);
+    le32_to_cpus(&hdr->entry_count);
+}
+
+void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr)
+{
+    assert(hdr != NULL);
+
+    cpu_to_le32s(&hdr->signature);
+    cpu_to_le32s(&hdr->checksum);
+    cpu_to_le32s(&hdr->entry_count);
+}
+
+void vhdx_region_entry_le_import(VHDXRegionTableEntry *e)
+{
+    assert(e != NULL);
+
+    leguid_to_cpus(&e->guid);
+    le64_to_cpus(&e->file_offset);
+    le32_to_cpus(&e->length);
+    le32_to_cpus(&e->data_bits);
+}
+
+void vhdx_region_entry_le_export(VHDXRegionTableEntry *e)
+{
+    assert(e != NULL);
+
+    cpu_to_leguids(&e->guid);
+    cpu_to_le64s(&e->file_offset);
+    cpu_to_le32s(&e->length);
+    cpu_to_le32s(&e->data_bits);
+}
+
+
+/* Metadata headers & table */
+void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr)
+{
+    assert(hdr != NULL);
+
+    le64_to_cpus(&hdr->signature);
+    le16_to_cpus(&hdr->entry_count);
+}
+
+void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr)
+{
+    assert(hdr != NULL);
+
+    cpu_to_le64s(&hdr->signature);
+    cpu_to_le16s(&hdr->entry_count);
+}
+
+void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e)
+{
+    assert(e != NULL);
+
+    leguid_to_cpus(&e->item_id);
+    le32_to_cpus(&e->offset);
+    le32_to_cpus(&e->length);
+    le32_to_cpus(&e->data_bits);
+}
+void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e)
+{
+    assert(e != NULL);
+
+    cpu_to_leguids(&e->item_id);
+    cpu_to_le32s(&e->offset);
+    cpu_to_le32s(&e->length);
+    cpu_to_le32s(&e->data_bits);
+}
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
new file mode 100644
index 0000000..ee5583c
--- /dev/null
+++ b/block/vhdx-log.c
@@ -0,0 +1,1010 @@
+/*
+ * Block driver for Hyper-V VHDX Images
+ *
+ * Copyright (c) 2013 Red Hat, Inc.,
+ *
+ * Authors:
+ *  Jeff Cody <jcody@redhat.com>
+ *
+ *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
+ *  by Microsoft:
+ *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
+ *
+ * This file covers the functionality of the metadata log writing, parsing, and
+ * replay.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+#include "block/vhdx.h"
+
+
+typedef struct VHDXLogSequence {
+    bool valid;
+    uint32_t count;
+    VHDXLogEntries log;
+    VHDXLogEntryHeader hdr;
+} VHDXLogSequence;
+
+typedef struct VHDXLogDescEntries {
+    VHDXLogEntryHeader hdr;
+    VHDXLogDescriptor desc[];
+} VHDXLogDescEntries;
+
+static const MSGUID zero_guid = { 0 };
+
+/* The log located on the disk is circular buffer containing
+ * sectors of 4096 bytes each.
+ *
+ * It is assumed for the read/write functions below that the
+ * circular buffer scheme uses a 'one sector open' to indicate
+ * the buffer is full.  Given the validation methods used for each
+ * sector, this method should be compatible with other methods that
+ * do not waste a sector.
+ */
+
+
+/* Allow peeking at the hdr entry at the beginning of the current
+ * read index, without advancing the read index */
+static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
+                             VHDXLogEntryHeader *hdr)
+{
+    int ret = 0;
+    uint64_t offset;
+    uint32_t read;
+
+    assert(hdr != NULL);
+
+    /* peek is only supported on sector boundaries */
+    if (log->read % VHDX_LOG_SECTOR_SIZE) {
+        ret = -EFAULT;
+        goto exit;
+    }
+
+    read = log->read;
+    /* we are guaranteed that a) log sectors are 4096 bytes,
+     * and b) the log length is a multiple of 1MB. So, there
+     * is always a round number of sectors in the buffer */
+    if ((read + sizeof(VHDXLogEntryHeader)) > log->length) {
+        read = 0;
+    }
+
+    if (read == log->write) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    offset = log->offset + read;
+
+    ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader));
+    if (ret < 0) {
+        goto exit;
+    }
+
+exit:
+    return ret;
+}
+
+/* Index increment for log, based on sector boundaries */
+static int vhdx_log_inc_idx(uint32_t idx, uint64_t length)
+{
+    idx += VHDX_LOG_SECTOR_SIZE;
+    /* we are guaranteed that a) log sectors are 4096 bytes,
+     * and b) the log length is a multiple of 1MB. So, there
+     * is always a round number of sectors in the buffer */
+    return idx >= length ? 0 : idx;
+}
+
+
+/* Reset the log to empty */
+static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s)
+{
+    MSGUID guid = { 0 };
+    s->log.read = s->log.write = 0;
+    /* a log guid of 0 indicates an empty log to any parser of v0
+     * VHDX logs */
+    vhdx_update_headers(bs, s, false, &guid);
+}
+
+/* Reads num_sectors from the log (all log sectors are 4096 bytes),
+ * into buffer 'buffer'.  Upon return, *sectors_read will contain
+ * the number of sectors successfully read.
+ *
+ * It is assumed that 'buffer' is already allocated, and of sufficient
+ * size (i.e. >= 4096*num_sectors).
+ *
+ * If 'peek' is true, then the tail (read) pointer for the circular buffer is
+ * not modified.
+ *
+ * 0 is returned on success, -errno otherwise.  */
+static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
+                                 uint32_t *sectors_read, void *buffer,
+                                 uint32_t num_sectors, bool peek)
+{
+    int ret = 0;
+    uint64_t offset;
+    uint32_t read;
+
+    read = log->read;
+
+    *sectors_read = 0;
+    while (num_sectors) {
+        if (read == log->write) {
+            /* empty */
+            break;
+        }
+        offset = log->offset + read;
+
+        ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE);
+        if (ret < 0) {
+            goto exit;
+        }
+        read = vhdx_log_inc_idx(read, log->length);
+
+        *sectors_read = *sectors_read + 1;
+        num_sectors--;
+    }
+
+exit:
+    if (!peek) {
+        log->read = read;
+    }
+    return ret;
+}
+
+/* Writes num_sectors to the log (all log sectors are 4096 bytes),
+ * from buffer 'buffer'.  Upon return, *sectors_written will contain
+ * the number of sectors successfully written.
+ *
+ * It is assumed that 'buffer' is at least 4096*num_sectors large.
+ *
+ * 0 is returned on success, -errno otherwise */
+static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
+                                  uint32_t *sectors_written, void *buffer,
+                                  uint32_t num_sectors)
+{
+    int ret = 0;
+    uint64_t offset;
+    uint32_t write;
+    void *buffer_tmp;
+    BDRVVHDXState *s = bs->opaque;
+
+    ret = vhdx_user_visible_write(bs, s);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    write = log->write;
+
+    buffer_tmp = buffer;
+    while (num_sectors) {
+
+        offset = log->offset + write;
+        write = vhdx_log_inc_idx(write, log->length);
+        if (write == log->read) {
+            /* full */
+            break;
+        }
+        ret = bdrv_pwrite(bs->file, offset, buffer_tmp, VHDX_LOG_SECTOR_SIZE);
+        if (ret < 0) {
+            goto exit;
+        }
+        buffer_tmp += VHDX_LOG_SECTOR_SIZE;
+
+        log->write = write;
+        *sectors_written = *sectors_written + 1;
+        num_sectors--;
+    }
+
+exit:
+    return ret;
+}
+
+
+/* Validates a log entry header */
+static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
+                                  BDRVVHDXState *s)
+{
+    int valid = false;
+
+    if (memcmp(&hdr->signature, "loge", 4)) {
+        goto exit;
+    }
+
+    /* if the individual entry length is larger than the whole log
+     * buffer, that is obviously invalid */
+    if (log->length < hdr->entry_length) {
+        goto exit;
+    }
+
+    /* length of entire entry must be in units of 4KB (log sector size) */
+    if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) {
+        goto exit;
+    }
+
+    /* per spec, sequence # must be > 0 */
+    if (hdr->sequence_number == 0) {
+        goto exit;
+    }
+
+    /* log entries are only valid if they match the file-wide log guid
+     * found in the active header */
+    if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) {
+        goto exit;
+    }
+
+    if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) {
+        goto exit;
+    }
+
+    valid = true;
+
+exit:
+    return valid;
+}
+
+/*
+ * Given a log header, this will validate that the descriptors and the
+ * corresponding data sectors (if applicable)
+ *
+ * Validation consists of:
+ *      1. Making sure the sequence numbers matches the entry header
+ *      2. Verifying a valid signature ('zero' or 'desc' for descriptors)
+ *      3. File offset field is a multiple of 4KB
+ *      4. If a data descriptor, the corresponding data sector
+ *         has its signature ('data') and matching sequence number
+ *
+ * @desc: the data buffer containing the descriptor
+ * @hdr:  the log entry header
+ *
+ * Returns true if valid
+ */
+static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
+                                   VHDXLogEntryHeader *hdr)
+{
+    bool ret = false;
+
+    if (desc->sequence_number != hdr->sequence_number) {
+        goto exit;
+    }
+    if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) {
+        goto exit;
+    }
+
+    if (!memcmp(&desc->signature, "zero", 4)) {
+        if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
+            /* valid */
+            ret = true;
+        }
+    } else if (!memcmp(&desc->signature, "desc", 4)) {
+            /* valid */
+            ret = true;
+    }
+
+exit:
+    return ret;
+}
+
+
+/* Prior to sector data for a log entry, there is the header
+ * and the descriptors referenced in the header:
+ *
+ * [] = 4KB sector
+ *
+ * [ hdr, desc ][   desc   ][ ... ][ data ][ ... ]
+ *
+ * The first sector in a log entry has a 64 byte header, and
+ * up to 126 32-byte descriptors.  If more descriptors than
+ * 126 are required, then subsequent sectors can have up to 128
+ * descriptors.  Each sector is 4KB.  Data follows the descriptor
+ * sectors.
+ *
+ * This will return the number of sectors needed to encompass
+ * the passed number of descriptors in desc_cnt.
+ *
+ * This will never return 0, even if desc_cnt is 0.
+ */
+static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
+{
+    uint32_t desc_sectors;
+
+    desc_cnt += 2; /* account for header in first sector */
+    desc_sectors = desc_cnt / 128;
+    if (desc_cnt % 128) {
+        desc_sectors++;
+    }
+
+    return desc_sectors;
+}
+
+
+/* Reads the log header, and subsequent descriptors (if any).  This
+ * will allocate all the space for buffer, which must be NULL when
+ * passed into this function. Each descriptor will also be validated,
+ * and error returned if any are invalid. */
+static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
+                              VHDXLogEntries *log, VHDXLogDescEntries **buffer)
+{
+    int ret = 0;
+    uint32_t desc_sectors;
+    uint32_t sectors_read;
+    VHDXLogEntryHeader hdr;
+    VHDXLogDescEntries *desc_entries = NULL;
+    int i;
+
+    assert(*buffer == NULL);
+
+    ret = vhdx_log_peek_hdr(bs, log, &hdr);
+    if (ret < 0) {
+        goto exit;
+    }
+    vhdx_log_entry_hdr_le_import(&hdr);
+    if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
+    desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE);
+
+    ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
+                                desc_sectors, false);
+    if (ret < 0) {
+        goto free_and_exit;
+    }
+    if (sectors_read != desc_sectors) {
+        ret = -EINVAL;
+        goto free_and_exit;
+    }
+
+    /* put in proper endianness, and validate each desc */
+    for (i = 0; i < hdr.descriptor_count; i++) {
+        vhdx_log_desc_le_import(&desc_entries->desc[i]);
+        if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) {
+            ret = -EINVAL;
+            goto free_and_exit;
+        }
+    }
+
+    *buffer = desc_entries;
+    goto exit;
+
+free_and_exit:
+    qemu_vfree(desc_entries);
+exit:
+    return ret;
+}
+
+
+/* Flushes the descriptor described by desc to the VHDX image file.
+ * If the descriptor is a data descriptor, than 'data' must be non-NULL,
+ * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be
+ * written.
+ *
+ * Verification is performed to make sure the sequence numbers of a data
+ * descriptor match the sequence number in the desc.
+ *
+ * For a zero descriptor, it may describe multiple sectors to fill with zeroes.
+ * In this case, it should be noted that zeroes are written to disk, and the
+ * image file is not extended as a sparse file.  */
+static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
+                               VHDXLogDataSector *data)
+{
+    int ret = 0;
+    uint64_t seq, file_offset;
+    uint32_t offset = 0;
+    void *buffer = NULL;
+    uint64_t count = 1;
+    int i;
+
+    buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
+
+    if (!memcmp(&desc->signature, "desc", 4)) {
+        /* data sector */
+        if (data == NULL) {
+            ret = -EFAULT;
+            goto exit;
+        }
+
+        /* The sequence number of the data sector must match that
+         * in the descriptor */
+        seq = data->sequence_high;
+        seq <<= 32;
+        seq |= data->sequence_low & 0xffffffff;
+
+        if (seq != desc->sequence_number) {
+            ret = -EINVAL;
+            goto exit;
+        }
+
+        /* Each data sector is in total 4096 bytes, however the first
+         * 8 bytes, and last 4 bytes, are located in the descriptor */
+        memcpy(buffer, &desc->leading_bytes, 8);
+        offset += 8;
+
+        memcpy(buffer+offset, data->data, 4084);
+        offset += 4084;
+
+        memcpy(buffer+offset, &desc->trailing_bytes, 4);
+
+    } else if (!memcmp(&desc->signature, "zero", 4)) {
+        /* write 'count' sectors of sector */
+        memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
+        count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
+    }
+
+    file_offset = desc->file_offset;
+
+    /* count is only > 1 if we are writing zeroes */
+    for (i = 0; i < count; i++) {
+        ret = bdrv_pwrite_sync(bs->file, file_offset, buffer,
+                               VHDX_LOG_SECTOR_SIZE);
+        if (ret < 0) {
+            goto exit;
+        }
+        file_offset += VHDX_LOG_SECTOR_SIZE;
+    }
+
+exit:
+    qemu_vfree(buffer);
+    return ret;
+}
+
+/* Flush the entire log (as described by 'logs') to the VHDX image
+ * file, and then set the log to 'empty' status once complete.
+ *
+ * The log entries should be validate prior to flushing */
+static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
+                          VHDXLogSequence *logs)
+{
+    int ret = 0;
+    int i;
+    uint32_t cnt, sectors_read;
+    uint64_t new_file_size;
+    void *data = NULL;
+    VHDXLogDescEntries *desc_entries = NULL;
+    VHDXLogEntryHeader hdr_tmp = { 0 };
+
+    cnt = logs->count;
+
+    data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
+
+    ret = vhdx_user_visible_write(bs, s);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    /* each iteration represents one log sequence, which may span multiple
+     * sectors */
+    while (cnt--) {
+        ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp);
+        if (ret < 0) {
+            goto exit;
+        }
+        /* if the log shows a FlushedFileOffset larger than our current file
+         * size, then that means the file has been truncated / corrupted, and
+         * we must refused to open it / use it */
+        if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) {
+            ret = -EINVAL;
+            goto exit;
+        }
+
+        ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries);
+        if (ret < 0) {
+            goto exit;
+        }
+
+        for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
+            if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) {
+                /* data sector, so read a sector to flush */
+                ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
+                                            data, 1, false);
+                if (ret < 0) {
+                    goto exit;
+                }
+                if (sectors_read != 1) {
+                    ret = -EINVAL;
+                    goto exit;
+                }
+            }
+
+            ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
+            if (ret < 0) {
+                goto exit;
+            }
+        }
+        if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) {
+            new_file_size = desc_entries->hdr.last_file_offset;
+            if (new_file_size % (1024*1024)) {
+                /* round up to nearest 1MB boundary */
+                new_file_size = ((new_file_size >> 20) + 1) << 20;
+                bdrv_truncate(bs->file, new_file_size);
+            }
+        }
+        qemu_vfree(desc_entries);
+        desc_entries = NULL;
+    }
+
+    bdrv_flush(bs);
+    /* once the log is fully flushed, indicate that we have an empty log
+     * now.  This also sets the log guid to 0, to indicate an empty log */
+    vhdx_log_reset(bs, s);
+
+exit:
+    qemu_vfree(data);
+    qemu_vfree(desc_entries);
+    return ret;
+}
+
+static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
+                                   VHDXLogEntries *log, uint64_t seq,
+                                   bool *valid, VHDXLogEntryHeader *entry)
+{
+    int ret = 0;
+    VHDXLogEntryHeader hdr;
+    void *buffer = NULL;
+    uint32_t i, desc_sectors, total_sectors, crc;
+    uint32_t sectors_read = 0;
+    VHDXLogDescEntries *desc_buffer = NULL;
+
+    *valid = false;
+
+    ret = vhdx_log_peek_hdr(bs, log, &hdr);
+    if (ret < 0) {
+        goto inc_and_exit;
+    }
+
+    vhdx_log_entry_hdr_le_import(&hdr);
+
+
+    if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
+        goto inc_and_exit;
+    }
+
+    if (seq > 0) {
+        if (hdr.sequence_number != seq + 1) {
+            goto inc_and_exit;
+        }
+    }
+
+    desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
+
+    /* Read desc sectors, and calculate log checksum */
+
+    total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
+
+
+    /* read_desc() will incrememnt the read idx */
+    ret = vhdx_log_read_desc(bs, s, log, &desc_buffer);
+    if (ret < 0) {
+        goto free_and_exit;
+    }
+
+    crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer,
+                            desc_sectors * VHDX_LOG_SECTOR_SIZE, 4);
+    crc ^= 0xffffffff;
+
+    buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
+    if (total_sectors > desc_sectors) {
+        for (i = 0; i < total_sectors - desc_sectors; i++) {
+            sectors_read = 0;
+            ret = vhdx_log_read_sectors(bs, log, &sectors_read, buffer,
+                                        1, false);
+            if (ret < 0 || sectors_read != 1) {
+                goto free_and_exit;
+            }
+            crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1);
+            crc ^= 0xffffffff;
+        }
+    }
+    crc ^= 0xffffffff;
+    if (crc != desc_buffer->hdr.checksum) {
+        goto free_and_exit;
+    }
+
+    *valid = true;
+    *entry = hdr;
+    goto free_and_exit;
+
+inc_and_exit:
+    log->read = vhdx_log_inc_idx(log->read, log->length);
+
+free_and_exit:
+    qemu_vfree(buffer);
+    qemu_vfree(desc_buffer);
+    return ret;
+}
+
+/* Search through the log circular buffer, and find the valid, active
+ * log sequence, if any exists
+ * */
+static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s,
+                           VHDXLogSequence *logs)
+{
+    int ret = 0;
+    uint32_t tail;
+    bool seq_valid = false;
+    VHDXLogSequence candidate = { 0 };
+    VHDXLogEntryHeader hdr = { 0 };
+    VHDXLogEntries curr_log;
+
+    memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries));
+    curr_log.write = curr_log.length;   /* assume log is full */
+    curr_log.read = 0;
+
+
+    /* now we will go through the whole log sector by sector, until
+     * we find a valid, active log sequence, or reach the end of the
+     * log buffer */
+    for (;;) {
+        uint64_t curr_seq = 0;
+        VHDXLogSequence current = { 0 };
+
+        tail = curr_log.read;
+
+        ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
+                                      &seq_valid, &hdr);
+        if (ret < 0) {
+            goto exit;
+        }
+
+        if (seq_valid) {
+            current.valid     = true;
+            current.log       = curr_log;
+            current.log.read  = tail;
+            current.log.write = curr_log.read;
+            current.count     = 1;
+            current.hdr       = hdr;
+
+
+            for (;;) {
+                ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
+                                              &seq_valid, &hdr);
+                if (ret < 0) {
+                    goto exit;
+                }
+                if (seq_valid == false) {
+                    break;
+                }
+                current.log.write = curr_log.read;
+                current.count++;
+
+                curr_seq = hdr.sequence_number;
+            }
+        }
+
+        if (current.valid) {
+            if (candidate.valid == false ||
+                current.hdr.sequence_number > candidate.hdr.sequence_number) {
+                candidate = current;
+            }
+        }
+
+        if (curr_log.read < tail) {
+            break;
+        }
+    }
+
+    *logs = candidate;
+
+    if (candidate.valid) {
+        /* this is the next sequence number, for writes */
+        s->log.sequence = candidate.hdr.sequence_number + 1;
+    }
+
+
+exit:
+    return ret;
+}
+
+/* Parse the replay log.  Per the VHDX spec, if the log is present
+ * it must be replayed prior to opening the file, even read-only.
+ *
+ * If read-only, we must replay the log in RAM (or refuse to open
+ * a dirty VHDX file read-only) */
+int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed)
+{
+    int ret = 0;
+    VHDXHeader *hdr;
+    VHDXLogSequence logs = { 0 };
+
+    hdr = s->headers[s->curr_header];
+
+    *flushed = false;
+
+    /* s->log.hdr is freed in vhdx_close() */
+    if (s->log.hdr == NULL) {
+        s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader));
+    }
+
+    s->log.offset = hdr->log_offset;
+    s->log.length = hdr->log_length;
+
+    if (s->log.offset < VHDX_LOG_MIN_SIZE ||
+        s->log.offset % VHDX_LOG_MIN_SIZE) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    /* per spec, only log version of 0 is supported */
+    if (hdr->log_version != 0) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    /* If either the log guid, or log length is zero,
+     * then a replay log is not present */
+    if (guid_eq(hdr->log_guid, zero_guid)) {
+        goto exit;
+    }
+
+    if (hdr->log_length == 0) {
+        goto exit;
+    }
+
+    if (hdr->log_length % VHDX_LOG_MIN_SIZE) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+
+    /* The log is present, we need to find if and where there is an active
+     * sequence of valid entries present in the log.  */
+
+    ret = vhdx_log_search(bs, s, &logs);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    if (logs.valid) {
+        /* now flush the log */
+        ret = vhdx_log_flush(bs, s, &logs);
+        if (ret < 0) {
+            goto exit;
+        }
+        *flushed = true;
+    }
+
+
+exit:
+    return ret;
+}
+
+
+
+static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
+                                      VHDXLogDataSector *sector, void *data,
+                                      uint64_t seq)
+{
+    /* 8 + 4084 + 4 = 4096, 1 log sector */
+    memcpy(&desc->leading_bytes, data, 8);
+    data += 8;
+    cpu_to_le64s(&desc->leading_bytes);
+    memcpy(sector->data, data, 4084);
+    data += 4084;
+    memcpy(&desc->trailing_bytes, data, 4);
+    cpu_to_le32s(&desc->trailing_bytes);
+    data += 4;
+
+    sector->sequence_high  = (uint32_t) (seq >> 32);
+    sector->sequence_low   = (uint32_t) (seq & 0xffffffff);
+    sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
+
+    vhdx_log_desc_le_export(desc);
+    vhdx_log_data_le_export(sector);
+}
+
+
+static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
+                          void *data, uint32_t length, uint64_t offset)
+{
+    int ret = 0;
+    void *buffer = NULL;
+    void *merged_sector = NULL;
+    void *data_tmp, *sector_write;
+    unsigned int i;
+    int sector_offset;
+    uint32_t desc_sectors, sectors, total_length;
+    uint32_t sectors_written = 0;
+    uint32_t aligned_length;
+    uint32_t leading_length = 0;
+    uint32_t trailing_length = 0;
+    uint32_t partial_sectors = 0;
+    uint32_t bytes_written = 0;
+    uint64_t file_offset;
+    VHDXHeader *header;
+    VHDXLogEntryHeader new_hdr;
+    VHDXLogDescriptor *new_desc = NULL;
+    VHDXLogDataSector *data_sector = NULL;
+    MSGUID new_guid = { 0 };
+
+    header = s->headers[s->curr_header];
+
+    /* need to have offset read data, and be on 4096 byte boundary */
+
+    if (length > header->log_length) {
+        /* no log present.  we could create a log here instead of failing */
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    if (guid_eq(header->log_guid, zero_guid)) {
+        vhdx_guid_generate(&new_guid);
+        vhdx_update_headers(bs, s, false, &new_guid);
+    } else {
+        /* currently, we require that the log be flushed after
+         * every write. */
+        ret = -ENOTSUP;
+        goto exit;
+    }
+
+    /* 0 is an invalid sequence number, but may also represent the first
+     * log write (or a wrapped seq) */
+    if (s->log.sequence == 0) {
+        s->log.sequence = 1;
+    }
+
+    sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
+    file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE;
+
+    aligned_length = length;
+
+    /* add in the unaligned head and tail bytes */
+    if (sector_offset) {
+        leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
+        leading_length = leading_length > length ? length : leading_length;
+        aligned_length -= leading_length;
+        partial_sectors++;
+    }
+
+    sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
+    trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
+    if (trailing_length) {
+        partial_sectors++;
+    }
+
+    sectors += partial_sectors;
+
+    /* sectors is now how many sectors the data itself takes, not
+     * including the header and descriptor metadata */
+
+    new_hdr = (VHDXLogEntryHeader) {
+                .signature           = VHDX_LOG_SIGNATURE,
+                .tail                = s->log.tail,
+                .sequence_number     = s->log.sequence,
+                .descriptor_count    = sectors,
+                .reserved            = 0,
+                .flushed_file_offset = bdrv_getlength(bs->file),
+                .last_file_offset    = bdrv_getlength(bs->file),
+              };
+
+    new_hdr.log_guid = header->log_guid;
+
+    desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
+
+    total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
+    new_hdr.entry_length = total_length;
+
+    vhdx_log_entry_hdr_le_export(&new_hdr);
+
+    buffer = qemu_blockalign(bs, total_length);
+    memcpy(buffer, &new_hdr, sizeof(new_hdr));
+
+    new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr));
+    data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
+    data_tmp = data;
+
+    /* All log sectors are 4KB, so for any partial sectors we must
+     * merge the data with preexisting data from the final file
+     * destination */
+    merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
+
+    for (i = 0; i < sectors; i++) {
+        new_desc->signature       = VHDX_LOG_DESC_SIGNATURE;
+        new_desc->sequence_number = s->log.sequence;
+        new_desc->file_offset     = file_offset;
+
+        if (i == 0 && leading_length) {
+            /* partial sector at the front of the buffer */
+            ret = bdrv_pread(bs->file, file_offset, merged_sector,
+                             VHDX_LOG_SECTOR_SIZE);
+            if (ret < 0) {
+                goto exit;
+            }
+            memcpy(merged_sector + sector_offset, data_tmp, leading_length);
+            bytes_written = leading_length;
+            sector_write = merged_sector;
+        } else if (i == sectors - 1 && trailing_length) {
+            /* partial sector at the end of the buffer */
+            ret = bdrv_pread(bs->file,
+                            file_offset,
+                            merged_sector + trailing_length,
+                            VHDX_LOG_SECTOR_SIZE - trailing_length);
+            if (ret < 0) {
+                goto exit;
+            }
+            memcpy(merged_sector, data_tmp, trailing_length);
+            bytes_written = trailing_length;
+            sector_write = merged_sector;
+        } else {
+            bytes_written = VHDX_LOG_SECTOR_SIZE;
+            sector_write = data_tmp;
+        }
+
+        /* populate the raw sector data into the proper structures,
+         * as well as update the descriptor, and convert to proper
+         * endianness */
+        vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
+                                  s->log.sequence);
+
+        data_tmp += bytes_written;
+        data_sector++;
+        new_desc++;
+        file_offset += VHDX_LOG_SECTOR_SIZE;
+    }
+
+    /* checksum covers entire entry, from the log header through the
+     * last data sector */
+    vhdx_update_checksum(buffer, total_length,
+                         offsetof(VHDXLogEntryHeader, checksum));
+    cpu_to_le32s((uint32_t *)(buffer + 4));
+
+    /* now write to the log */
+    vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
+                           desc_sectors + sectors);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    if (sectors_written != desc_sectors + sectors) {
+        /* instead of failing, we could flush the log here */
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    s->log.sequence++;
+    /* write new tail */
+    s->log.tail = s->log.write;
+
+exit:
+    qemu_vfree(buffer);
+    qemu_vfree(merged_sector);
+    return ret;
+}
+
+/* Perform a log write, and then immediately flush the entire log */
+int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
+                             void *data, uint32_t length, uint64_t offset)
+{
+    int ret = 0;
+    VHDXLogSequence logs = { .valid = true,
+                             .count = 1,
+                             .hdr = { 0 } };
+
+
+    /* Make sure data written (new and/or changed blocks) is stable
+     * on disk, before creating log entry */
+    bdrv_flush(bs);
+    ret = vhdx_log_write(bs, s, data, length, offset);
+    if (ret < 0) {
+        goto exit;
+    }
+    logs.log = s->log;
+
+    /* Make sure log is stable on disk */
+    bdrv_flush(bs);
+    ret = vhdx_log_flush(bs, s, &logs);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    s->log = logs.log;
+
+exit:
+    return ret;
+}
+
diff --git a/block/vhdx.c b/block/vhdx.c
index 6cb0412..7d1af96 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -6,9 +6,9 @@
  * Authors:
  *  Jeff Cody <jcody@redhat.com>
  *
- *  This is based on the "VHDX Format Specification v0.95", published 4/12/2012
+ *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
  *  by Microsoft:
- *      https://www.microsoft.com/en-us/download/details.aspx?id=29681
+ *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
  *
  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
  * See the COPYING.LIB file in the top-level directory.
@@ -22,6 +22,20 @@
 #include "block/vhdx.h"
 #include "migration/migration.h"
 
+#include <uuid/uuid.h>
+#include <glib.h>
+
+/* Options for VHDX creation */
+
+#define VHDX_BLOCK_OPT_LOG_SIZE   "log_size"
+#define VHDX_BLOCK_OPT_BLOCK_SIZE "block_size"
+#define VHDX_BLOCK_OPT_ZERO "block_state_zero"
+
+typedef enum VHDXImageType {
+    VHDX_TYPE_DYNAMIC = 0,
+    VHDX_TYPE_FIXED,
+    VHDX_TYPE_DIFFERENCING,   /* Currently unsupported */
+} VHDXImageType;
 
 /* Several metadata and region table data entries are identified by
  * guids in  a MS-specific GUID format. */
@@ -104,16 +118,6 @@
      META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
      META_PHYS_SECTOR_SIZE_PRESENT)
 
-typedef struct VHDXMetadataEntries {
-    VHDXMetadataTableEntry file_parameters_entry;
-    VHDXMetadataTableEntry virtual_disk_size_entry;
-    VHDXMetadataTableEntry page83_data_entry;
-    VHDXMetadataTableEntry logical_sector_size_entry;
-    VHDXMetadataTableEntry phys_sector_size_entry;
-    VHDXMetadataTableEntry parent_locator_entry;
-    uint16_t present;
-} VHDXMetadataEntries;
-
 
 typedef struct VHDXSectorInfo {
     uint32_t bat_idx;       /* BAT entry index */
@@ -124,44 +128,31 @@
     uint64_t block_offset;  /* block offset, in bytes */
 } VHDXSectorInfo;
 
+/* Calculates new checksum.
+ *
+ * Zero is substituted during crc calculation for the original crc field
+ * crc_offset: byte offset in buf of the buffer crc
+ * buf: buffer pointer
+ * size: size of buffer (must be > crc_offset+4)
+ *
+ * Note: The resulting checksum is in the CPU endianness, not necessarily
+ *       in the file format endianness (LE).  Any header export to disk should
+ *       make sure that vhdx_header_le_export() is used to convert to the
+ *       correct endianness
+ */
+uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
+{
+    uint32_t crc;
 
+    assert(buf != NULL);
+    assert(size > (crc_offset + sizeof(crc)));
 
-typedef struct BDRVVHDXState {
-    CoMutex lock;
+    memset(buf + crc_offset, 0, sizeof(crc));
+    crc =  crc32c(0xffffffff, buf, size);
+    memcpy(buf + crc_offset, &crc, sizeof(crc));
 
-    int curr_header;
-    VHDXHeader *headers[2];
-
-    VHDXRegionTableHeader rt;
-    VHDXRegionTableEntry bat_rt;         /* region table for the BAT */
-    VHDXRegionTableEntry metadata_rt;    /* region table for the metadata */
-
-    VHDXMetadataTableHeader metadata_hdr;
-    VHDXMetadataEntries metadata_entries;
-
-    VHDXFileParameters params;
-    uint32_t block_size;
-    uint32_t block_size_bits;
-    uint32_t sectors_per_block;
-    uint32_t sectors_per_block_bits;
-
-    uint64_t virtual_disk_size;
-    uint32_t logical_sector_size;
-    uint32_t physical_sector_size;
-
-    uint64_t chunk_ratio;
-    uint32_t chunk_ratio_bits;
-    uint32_t logical_sector_size_bits;
-
-    uint32_t bat_entries;
-    VHDXBatEntry *bat;
-    uint64_t bat_offset;
-
-    VHDXParentLocatorHeader parent_header;
-    VHDXParentLocatorEntry *parent_entries;
-
-    Error *migration_blocker;
-} BDRVVHDXState;
+    return crc;
+}
 
 uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
                             int crc_offset)
@@ -214,6 +205,71 @@
 
 
 /*
+ * This generates a UUID that is compliant with the MS GUIDs used
+ * in the VHDX spec (and elsewhere).
+ */
+void vhdx_guid_generate(MSGUID *guid)
+{
+    uuid_t uuid;
+    assert(guid != NULL);
+
+    uuid_generate(uuid);
+    memcpy(guid, uuid, sizeof(MSGUID));
+}
+
+/* Check for region overlaps inside the VHDX image */
+static int vhdx_region_check(BDRVVHDXState *s, uint64_t start, uint64_t length)
+{
+    int ret = 0;
+    uint64_t end;
+    VHDXRegionEntry *r;
+
+    end = start + length;
+    QLIST_FOREACH(r, &s->regions, entries) {
+        if (!((start >= r->end) || (end <= r->start))) {
+            ret = -EINVAL;
+            goto exit;
+        }
+    }
+
+exit:
+    return ret;
+}
+
+/* Register a region for future checks */
+static void vhdx_region_register(BDRVVHDXState *s,
+                                 uint64_t start, uint64_t length)
+{
+    VHDXRegionEntry *r;
+
+    r = g_malloc0(sizeof(*r));
+
+    r->start = start;
+    r->end = start + length;
+
+    QLIST_INSERT_HEAD(&s->regions, r, entries);
+}
+
+/* Free all registered regions */
+static void vhdx_region_unregister_all(BDRVVHDXState *s)
+{
+    VHDXRegionEntry *r, *r_next;
+
+    QLIST_FOREACH_SAFE(r, &s->regions, entries, r_next) {
+        QLIST_REMOVE(r, entries);
+        g_free(r);
+    }
+}
+
+static void vhdx_set_shift_bits(BDRVVHDXState *s)
+{
+    s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
+    s->sectors_per_block_bits =   31 - clz32(s->sectors_per_block);
+    s->chunk_ratio_bits =         63 - clz64(s->chunk_ratio);
+    s->block_size_bits =          31 - clz32(s->block_size);
+}
+
+/*
  * Per the MS VHDX Specification, for every VHDX file:
  *      - The header section is fixed size - 1 MB
  *      - The header section is always the first "object"
@@ -232,25 +288,118 @@
     return 0;
 }
 
-/* All VHDX structures on disk are little endian */
-static void vhdx_header_le_import(VHDXHeader *h)
+/*
+ * Writes the header to the specified offset.
+ *
+ * This will optionally read in buffer data from disk (otherwise zero-fill),
+ * and then update the header checksum.  Header is converted to proper
+ * endianness before being written to the specified file offset
+ */
+static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
+                             uint64_t offset, bool read)
 {
-    assert(h != NULL);
+    uint8_t *buffer = NULL;
+    int ret;
+    VHDXHeader header_le;
 
-    le32_to_cpus(&h->signature);
-    le32_to_cpus(&h->checksum);
-    le64_to_cpus(&h->sequence_number);
+    assert(bs_file != NULL);
+    assert(hdr != NULL);
 
-    leguid_to_cpus(&h->file_write_guid);
-    leguid_to_cpus(&h->data_write_guid);
-    leguid_to_cpus(&h->log_guid);
+    /* the header checksum is not over just the packed size of VHDXHeader,
+     * but rather over the entire 'reserved' range for the header, which is
+     * 4KB (VHDX_HEADER_SIZE). */
 
-    le16_to_cpus(&h->log_version);
-    le16_to_cpus(&h->version);
-    le32_to_cpus(&h->log_length);
-    le64_to_cpus(&h->log_offset);
+    buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE);
+    if (read) {
+        /* if true, we can't assume the extra reserved bytes are 0 */
+        ret = bdrv_pread(bs_file, offset, buffer, VHDX_HEADER_SIZE);
+        if (ret < 0) {
+            goto exit;
+        }
+    } else {
+        memset(buffer, 0, VHDX_HEADER_SIZE);
+    }
+
+    /* overwrite the actual VHDXHeader portion */
+    memcpy(buffer, hdr, sizeof(VHDXHeader));
+    hdr->checksum = vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
+                                         offsetof(VHDXHeader, checksum));
+    vhdx_header_le_export(hdr, &header_le);
+    ret = bdrv_pwrite_sync(bs_file, offset, &header_le, sizeof(VHDXHeader));
+
+exit:
+    qemu_vfree(buffer);
+    return ret;
 }
 
+/* Update the VHDX headers
+ *
+ * This follows the VHDX spec procedures for header updates.
+ *
+ *  - non-current header is updated with largest sequence number
+ */
+static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
+                              bool generate_data_write_guid, MSGUID *log_guid)
+{
+    int ret = 0;
+    int hdr_idx = 0;
+    uint64_t header_offset = VHDX_HEADER1_OFFSET;
+
+    VHDXHeader *active_header;
+    VHDXHeader *inactive_header;
+
+    /* operate on the non-current header */
+    if (s->curr_header == 0) {
+        hdr_idx = 1;
+        header_offset = VHDX_HEADER2_OFFSET;
+    }
+
+    active_header   = s->headers[s->curr_header];
+    inactive_header = s->headers[hdr_idx];
+
+    inactive_header->sequence_number = active_header->sequence_number + 1;
+
+    /* a new file guid must be generated before any file write, including
+     * headers */
+    inactive_header->file_write_guid = s->session_guid;
+
+    /* a new data guid only needs to be generated before any guest-visible
+     * writes (i.e. something observable via virtual disk read) */
+    if (generate_data_write_guid) {
+        vhdx_guid_generate(&inactive_header->data_write_guid);
+    }
+
+    /* update the log guid if present */
+    if (log_guid) {
+        inactive_header->log_guid = *log_guid;
+    }
+
+    vhdx_write_header(bs->file, inactive_header, header_offset, true);
+    if (ret < 0) {
+        goto exit;
+    }
+    s->curr_header = hdr_idx;
+
+exit:
+    return ret;
+}
+
+/*
+ * The VHDX spec calls for header updates to be performed twice, so that both
+ * the current and non-current header have valid info
+ */
+int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s,
+                        bool generate_data_write_guid, MSGUID *log_guid)
+{
+    int ret;
+
+    ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid);
+    if (ret < 0) {
+        return ret;
+    }
+    ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid);
+    return ret;
+}
 
 /* opens the specified header block from the VHDX file header section */
 static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
@@ -264,6 +413,7 @@
     uint64_t h2_seq = 0;
     uint8_t *buffer;
 
+    /* header1 & header2 are freed in vhdx_close() */
     header1 = qemu_blockalign(bs, sizeof(VHDXHeader));
     header2 = qemu_blockalign(bs, sizeof(VHDXHeader));
 
@@ -328,6 +478,9 @@
         }
     }
 
+    vhdx_region_register(s, s->headers[s->curr_header]->log_offset,
+                            s->headers[s->curr_header]->log_length);
+
     ret = 0;
 
     goto exit;
@@ -364,10 +517,7 @@
         goto fail;
     }
     memcpy(&s->rt, buffer, sizeof(s->rt));
-    le32_to_cpus(&s->rt.signature);
-    le32_to_cpus(&s->rt.checksum);
-    le32_to_cpus(&s->rt.entry_count);
-    le32_to_cpus(&s->rt.reserved);
+    vhdx_region_header_le_import(&s->rt);
     offset += sizeof(s->rt);
 
     if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
@@ -386,10 +536,16 @@
         memcpy(&rt_entry, buffer + offset, sizeof(rt_entry));
         offset += sizeof(rt_entry);
 
-        leguid_to_cpus(&rt_entry.guid);
-        le64_to_cpus(&rt_entry.file_offset);
-        le32_to_cpus(&rt_entry.length);
-        le32_to_cpus(&rt_entry.data_bits);
+        vhdx_region_entry_le_import(&rt_entry);
+
+        /* check for region overlap between these entries, and any
+         * other memory regions in the file */
+        ret = vhdx_region_check(s, rt_entry.file_offset, rt_entry.length);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        vhdx_region_register(s, rt_entry.file_offset, rt_entry.length);
 
         /* see if we recognize the entry */
         if (guid_eq(rt_entry.guid, bat_guid)) {
@@ -421,6 +577,12 @@
             goto fail;
         }
     }
+
+    if (!bat_rt_found || !metadata_rt_found) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
     ret = 0;
 
 fail:
@@ -464,9 +626,7 @@
     memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr));
     offset += sizeof(s->metadata_hdr);
 
-    le64_to_cpus(&s->metadata_hdr.signature);
-    le16_to_cpus(&s->metadata_hdr.reserved);
-    le16_to_cpus(&s->metadata_hdr.entry_count);
+    vhdx_metadata_header_le_import(&s->metadata_hdr);
 
     if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) {
         ret = -EINVAL;
@@ -485,11 +645,7 @@
         memcpy(&md_entry, buffer + offset, sizeof(md_entry));
         offset += sizeof(md_entry);
 
-        leguid_to_cpus(&md_entry.item_id);
-        le32_to_cpus(&md_entry.offset);
-        le32_to_cpus(&md_entry.length);
-        le32_to_cpus(&md_entry.data_bits);
-        le32_to_cpus(&md_entry.reserved2);
+        vhdx_metadata_entry_le_import(&md_entry);
 
         if (guid_eq(md_entry.item_id, file_param_guid)) {
             if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) {
@@ -662,10 +818,7 @@
         goto exit;
     }
 
-    s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
-    s->sectors_per_block_bits =   31 - clz32(s->sectors_per_block);
-    s->chunk_ratio_bits =         63 - clz64(s->chunk_ratio);
-    s->block_size_bits =          31 - clz32(s->block_size);
+    vhdx_set_shift_bits(s);
 
     ret = 0;
 
@@ -674,98 +827,14 @@
     return ret;
 }
 
-/* Parse the replay log.  Per the VHDX spec, if the log is present
- * it must be replayed prior to opening the file, even read-only.
- *
- * If read-only, we must replay the log in RAM (or refuse to open
- * a dirty VHDX file read-only */
-static int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s)
+/*
+ * Calculate the number of BAT entries, including sector
+ * bitmap entries.
+ */
+static void vhdx_calc_bat_entries(BDRVVHDXState *s)
 {
-    int ret = 0;
-    int i;
-    VHDXHeader *hdr;
-
-    hdr = s->headers[s->curr_header];
-
-    /* either the log guid, or log length is zero,
-     * then a replay log is present */
-    for (i = 0; i < sizeof(hdr->log_guid.data4); i++) {
-        ret |= hdr->log_guid.data4[i];
-    }
-    if (hdr->log_guid.data1 == 0 &&
-        hdr->log_guid.data2 == 0 &&
-        hdr->log_guid.data3 == 0 &&
-        ret == 0) {
-        goto exit;
-    }
-
-    /* per spec, only log version of 0 is supported */
-    if (hdr->log_version != 0) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    if (hdr->log_length == 0) {
-        goto exit;
-    }
-
-    /* We currently do not support images with logs to replay */
-    ret = -ENOTSUP;
-
-exit:
-    return ret;
-}
-
-
-static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
-{
-    BDRVVHDXState *s = bs->opaque;
-    int ret = 0;
-    uint32_t i;
-    uint64_t signature;
     uint32_t data_blocks_cnt, bitmap_blocks_cnt;
 
-
-    s->bat = NULL;
-
-    qemu_co_mutex_init(&s->lock);
-
-    /* validate the file signature */
-    ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
-    if (ret < 0) {
-        goto fail;
-    }
-    if (memcmp(&signature, "vhdxfile", 8)) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = vhdx_parse_header(bs, s);
-    if (ret) {
-        goto fail;
-    }
-
-    ret = vhdx_parse_log(bs, s);
-    if (ret) {
-        goto fail;
-    }
-
-    ret = vhdx_open_region_tables(bs, s);
-    if (ret) {
-        goto fail;
-    }
-
-    ret = vhdx_parse_metadata(bs, s);
-    if (ret) {
-        goto fail;
-    }
-    s->block_size = s->params.block_size;
-
-    /* the VHDX spec dictates that virtual_disk_size is always a multiple of
-     * logical_sector_size */
-    bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits;
-
     data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
     if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
         data_blocks_cnt++;
@@ -782,6 +851,85 @@
                          ((data_blocks_cnt - 1) >> s->chunk_ratio_bits);
     }
 
+}
+
+static void vhdx_close(BlockDriverState *bs)
+{
+    BDRVVHDXState *s = bs->opaque;
+    qemu_vfree(s->headers[0]);
+    s->headers[0] = NULL;
+    qemu_vfree(s->headers[1]);
+    s->headers[1] = NULL;
+    qemu_vfree(s->bat);
+    s->bat = NULL;
+    qemu_vfree(s->parent_entries);
+    s->parent_entries = NULL;
+    migrate_del_blocker(s->migration_blocker);
+    error_free(s->migration_blocker);
+    qemu_vfree(s->log.hdr);
+    s->log.hdr = NULL;
+    vhdx_region_unregister_all(s);
+}
+
+static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
+                     Error **errp)
+{
+    BDRVVHDXState *s = bs->opaque;
+    int ret = 0;
+    uint32_t i;
+    uint64_t signature;
+    bool log_flushed = false;
+
+
+    s->bat = NULL;
+    s->first_visible_write = true;
+
+    qemu_co_mutex_init(&s->lock);
+    QLIST_INIT(&s->regions);
+
+    /* validate the file signature */
+    ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
+    if (ret < 0) {
+        goto fail;
+    }
+    if (memcmp(&signature, "vhdxfile", 8)) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    /* This is used for any header updates, for the file_write_guid.
+     * The spec dictates that a new value should be used for the first
+     * header update */
+    vhdx_guid_generate(&s->session_guid);
+
+    ret = vhdx_parse_header(bs, s);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    ret = vhdx_parse_log(bs, s, &log_flushed);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    ret = vhdx_open_region_tables(bs, s);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    ret = vhdx_parse_metadata(bs, s);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    s->block_size = s->params.block_size;
+
+    /* the VHDX spec dictates that virtual_disk_size is always a multiple of
+     * logical_sector_size */
+    bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits;
+
+    vhdx_calc_bat_entries(s);
+
     s->bat_offset = s->bat_rt.file_offset;
 
     if (s->bat_entries > s->bat_rt.length / sizeof(VHDXBatEntry)) {
@@ -790,6 +938,7 @@
         goto fail;
     }
 
+    /* s->bat is freed in vhdx_close() */
     s->bat = qemu_blockalign(bs, s->bat_rt.length);
 
     ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
@@ -797,16 +946,36 @@
         goto fail;
     }
 
+    uint64_t payblocks = s->chunk_ratio;
+    /* endian convert, and verify populated BAT field file offsets against
+     * region table and log entries */
     for (i = 0; i < s->bat_entries; i++) {
         le64_to_cpus(&s->bat[i]);
+        if (payblocks--) {
+            /* payload bat entries */
+            if ((s->bat[i] & VHDX_BAT_STATE_BIT_MASK) ==
+                    PAYLOAD_BLOCK_FULLY_PRESENT) {
+                ret = vhdx_region_check(s, s->bat[i] & VHDX_BAT_FILE_OFF_MASK,
+                                        s->block_size);
+                if (ret < 0) {
+                    goto fail;
+                }
+            }
+        } else {
+            payblocks = s->chunk_ratio;
+            /* Once differencing files are supported, verify sector bitmap
+             * blocks here */
+        }
     }
 
     if (flags & BDRV_O_RDWR) {
-        ret = -ENOTSUP;
-        goto fail;
+        ret = vhdx_update_headers(bs, s, false, NULL);
+        if (ret < 0) {
+            goto fail;
+        }
     }
 
-    /* TODO: differencing files, write */
+    /* TODO: differencing files */
 
     /* Disable migration when VHDX images are used */
     error_set(&s->migration_blocker,
@@ -816,10 +985,7 @@
 
     return 0;
 fail:
-    qemu_vfree(s->headers[0]);
-    qemu_vfree(s->headers[1]);
-    qemu_vfree(s->bat);
-    qemu_vfree(s->parent_entries);
+    vhdx_close(bs);
     return ret;
 }
 
@@ -859,7 +1025,7 @@
 
     sinfo->bytes_avail = sinfo->sectors_avail << s->logical_sector_size_bits;
 
-    sinfo->file_offset = s->bat[sinfo->bat_idx] >> VHDX_BAT_FILE_OFF_BITS;
+    sinfo->file_offset = s->bat[sinfo->bat_idx] & VHDX_BAT_FILE_OFF_MASK;
 
     sinfo->block_offset = block_offset << s->logical_sector_size_bits;
 
@@ -873,7 +1039,6 @@
      * in the block, and add in the payload data block offset
      * in the file, in bytes, to get the final read address */
 
-    sinfo->file_offset <<= 20;  /* now in bytes, rather than 1MB units */
     sinfo->file_offset += sinfo->block_offset;
 }
 
@@ -914,7 +1079,7 @@
                 /* return zero */
                 qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail);
                 break;
-            case PAYLOAD_BLOCK_FULL_PRESENT:
+            case PAYLOAD_BLOCK_FULLY_PRESENT:
                 qemu_co_mutex_unlock(&s->lock);
                 ret = bdrv_co_readv(bs->file,
                                     sinfo.file_offset >> BDRV_SECTOR_BITS,
@@ -944,26 +1109,772 @@
     return ret;
 }
 
+/*
+ * Allocate a new payload block at the end of the file.
+ *
+ * Allocation will happen at 1MB alignment inside the file
+ *
+ * Returns the file offset start of the new payload block
+ */
+static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
+                                    uint64_t *new_offset)
+{
+    *new_offset = bdrv_getlength(bs->file);
 
+    /* per the spec, the address for a block is in units of 1MB */
+    *new_offset = ROUND_UP(*new_offset, 1024 * 1024);
+
+    return bdrv_truncate(bs->file, *new_offset + s->block_size);
+}
+
+/*
+ * Update the BAT table entry with the new file offset, and the new entry
+ * state */
+static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s,
+                                       VHDXSectorInfo *sinfo,
+                                       uint64_t *bat_entry_le,
+                                       uint64_t *bat_offset, int state)
+{
+    /* The BAT entry is a uint64, with 44 bits for the file offset in units of
+     * 1MB, and 3 bits for the block state. */
+    s->bat[sinfo->bat_idx]  = sinfo->file_offset;
+
+    s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK;
+
+    *bat_entry_le = cpu_to_le64(s->bat[sinfo->bat_idx]);
+    *bat_offset = s->bat_offset + sinfo->bat_idx * sizeof(VHDXBatEntry);
+
+}
+
+/* Per the spec, on the first write of guest-visible data to the file the
+ * data write guid must be updated in the header */
+int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s)
+{
+    int ret = 0;
+    if (s->first_visible_write) {
+        s->first_visible_write = false;
+        ret = vhdx_update_headers(bs, s, true, NULL);
+    }
+    return ret;
+}
 
 static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
                                       int nb_sectors, QEMUIOVector *qiov)
 {
-    return -ENOTSUP;
-}
-
-
-static void vhdx_close(BlockDriverState *bs)
-{
+    int ret = -ENOTSUP;
     BDRVVHDXState *s = bs->opaque;
-    qemu_vfree(s->headers[0]);
-    qemu_vfree(s->headers[1]);
-    qemu_vfree(s->bat);
-    qemu_vfree(s->parent_entries);
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
+    VHDXSectorInfo sinfo;
+    uint64_t bytes_done = 0;
+    uint64_t bat_entry = 0;
+    uint64_t bat_entry_offset = 0;
+    QEMUIOVector hd_qiov;
+    struct iovec iov1 = { 0 };
+    struct iovec iov2 = { 0 };
+    int sectors_to_write;
+    int bat_state;
+    uint64_t bat_prior_offset = 0;
+    bool bat_update = false;
+
+    qemu_iovec_init(&hd_qiov, qiov->niov);
+
+    qemu_co_mutex_lock(&s->lock);
+
+    ret = vhdx_user_visible_write(bs, s);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    while (nb_sectors > 0) {
+        bool use_zero_buffers = false;
+        bat_update = false;
+        if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
+            /* not supported yet */
+            ret = -ENOTSUP;
+            goto exit;
+        } else {
+            vhdx_block_translate(s, sector_num, nb_sectors, &sinfo);
+            sectors_to_write = sinfo.sectors_avail;
+
+            qemu_iovec_reset(&hd_qiov);
+            /* check the payload block state */
+            bat_state = s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK;
+            switch (bat_state) {
+            case PAYLOAD_BLOCK_ZERO:
+                /* in this case, we need to preserve zero writes for
+                 * data that is not part of this write, so we must pad
+                 * the rest of the buffer to zeroes */
+
+                /* if we are on a posix system with ftruncate() that extends
+                 * a file, then it is zero-filled for us.  On Win32, the raw
+                 * layer uses SetFilePointer and SetFileEnd, which does not
+                 * zero fill AFAIK */
+
+                /* Queue another write of zero buffers if the underlying file
+                 * does not zero-fill on file extension */
+
+                if (bdrv_has_zero_init(bs->file) == 0) {
+                    use_zero_buffers = true;
+
+                    /* zero fill the front, if any */
+                    if (sinfo.block_offset) {
+                        iov1.iov_len = sinfo.block_offset;
+                        iov1.iov_base = qemu_blockalign(bs, iov1.iov_len);
+                        memset(iov1.iov_base, 0, iov1.iov_len);
+                        qemu_iovec_concat_iov(&hd_qiov, &iov1, 1, 0,
+                                              sinfo.block_offset);
+                        sectors_to_write += iov1.iov_len >> BDRV_SECTOR_BITS;
+                    }
+
+                    /* our actual data */
+                    qemu_iovec_concat(&hd_qiov, qiov,  bytes_done,
+                                      sinfo.bytes_avail);
+
+                    /* zero fill the back, if any */
+                    if ((sinfo.bytes_avail - sinfo.block_offset) <
+                         s->block_size) {
+                        iov2.iov_len = s->block_size -
+                                      (sinfo.bytes_avail + sinfo.block_offset);
+                        iov2.iov_base = qemu_blockalign(bs, iov2.iov_len);
+                        memset(iov2.iov_base, 0, iov2.iov_len);
+                        qemu_iovec_concat_iov(&hd_qiov, &iov2, 1, 0,
+                                              sinfo.block_offset);
+                        sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS;
+                    }
+                }
+
+                /* fall through */
+            case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
+            case PAYLOAD_BLOCK_UNMAPPED:    /* fall through */
+            case PAYLOAD_BLOCK_UNDEFINED:   /* fall through */
+                bat_prior_offset = sinfo.file_offset;
+                ret = vhdx_allocate_block(bs, s, &sinfo.file_offset);
+                if (ret < 0) {
+                    goto exit;
+                }
+                /* once we support differencing files, this may also be
+                 * partially present */
+                /* update block state to the newly specified state */
+                vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry,
+                                            &bat_entry_offset,
+                                            PAYLOAD_BLOCK_FULLY_PRESENT);
+                bat_update = true;
+                /* since we just allocated a block, file_offset is the
+                 * beginning of the payload block. It needs to be the
+                 * write address, which includes the offset into the block */
+                if (!use_zero_buffers) {
+                    sinfo.file_offset += sinfo.block_offset;
+                }
+                /* fall through */
+            case PAYLOAD_BLOCK_FULLY_PRESENT:
+                /* if the file offset address is in the header zone,
+                 * there is a problem */
+                if (sinfo.file_offset < (1024 * 1024)) {
+                    ret = -EFAULT;
+                    goto error_bat_restore;
+                }
+
+                if (!use_zero_buffers) {
+                    qemu_iovec_concat(&hd_qiov, qiov,  bytes_done,
+                                      sinfo.bytes_avail);
+                }
+                /* block exists, so we can just overwrite it */
+                qemu_co_mutex_unlock(&s->lock);
+                ret = bdrv_co_writev(bs->file,
+                                    sinfo.file_offset >> BDRV_SECTOR_BITS,
+                                    sectors_to_write, &hd_qiov);
+                qemu_co_mutex_lock(&s->lock);
+                if (ret < 0) {
+                    goto error_bat_restore;
+                }
+                break;
+            case PAYLOAD_BLOCK_PARTIALLY_PRESENT:
+                /* we don't yet support difference files, fall through
+                 * to error */
+            default:
+                ret = -EIO;
+                goto exit;
+                break;
+            }
+
+            if (bat_update) {
+                /* this will update the BAT entry into the log journal, and
+                 * then flush the log journal out to disk */
+                ret =  vhdx_log_write_and_flush(bs, s, &bat_entry,
+                                                sizeof(VHDXBatEntry),
+                                                bat_entry_offset);
+                if (ret < 0) {
+                    goto exit;
+                }
+            }
+
+            nb_sectors -= sinfo.sectors_avail;
+            sector_num += sinfo.sectors_avail;
+            bytes_done += sinfo.bytes_avail;
+
+        }
+    }
+
+    goto exit;
+
+error_bat_restore:
+    if (bat_update) {
+        /* keep metadata in sync, and restore the bat entry state
+         * if error. */
+        sinfo.file_offset = bat_prior_offset;
+        vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry,
+                                    &bat_entry_offset, bat_state);
+    }
+exit:
+    qemu_vfree(iov1.iov_base);
+    qemu_vfree(iov2.iov_base);
+    qemu_co_mutex_unlock(&s->lock);
+    qemu_iovec_destroy(&hd_qiov);
+    return ret;
 }
 
+
+
+/*
+ * Create VHDX Headers
+ *
+ * There are 2 headers, and the highest sequence number will represent
+ * the active header
+ */
+static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
+                                   uint32_t log_size)
+{
+    int ret = 0;
+    VHDXHeader *hdr = NULL;
+
+    hdr = g_malloc0(sizeof(VHDXHeader));
+
+    hdr->signature       = VHDX_HEADER_SIGNATURE;
+    hdr->sequence_number = g_random_int();
+    hdr->log_version     = 0;
+    hdr->version         = 1;
+    hdr->log_length      = log_size;
+    hdr->log_offset      = VHDX_HEADER_SECTION_END;
+    vhdx_guid_generate(&hdr->file_write_guid);
+    vhdx_guid_generate(&hdr->data_write_guid);
+
+    ret = vhdx_write_header(bs, hdr, VHDX_HEADER1_OFFSET, false);
+    if (ret < 0) {
+        goto exit;
+    }
+    hdr->sequence_number++;
+    ret = vhdx_write_header(bs, hdr, VHDX_HEADER2_OFFSET, false);
+    if (ret < 0) {
+        goto exit;
+    }
+
+exit:
+    g_free(hdr);
+    return ret;
+}
+
+
+/*
+ * Create the Metadata entries.
+ *
+ * For more details on the entries, see section 3.5 (pg 29) in the
+ * VHDX 1.00 specification.
+ *
+ * We support 5 metadata entries (all required by spec):
+ *          File Parameters,
+ *          Virtual Disk Size,
+ *          Page 83 Data,
+ *          Logical Sector Size,
+ *          Physical Sector Size
+ *
+ * The first 64KB of the Metadata section is reserved for the metadata
+ * header and entries; beyond that, the metadata items themselves reside.
+ */
+static int vhdx_create_new_metadata(BlockDriverState *bs,
+                                    uint64_t image_size,
+                                    uint32_t block_size,
+                                    uint32_t sector_size,
+                                    uint64_t metadata_offset,
+                                    VHDXImageType type)
+{
+    int ret = 0;
+    uint32_t offset = 0;
+    void *buffer = NULL;
+    void *entry_buffer;
+    VHDXMetadataTableHeader *md_table;;
+    VHDXMetadataTableEntry  *md_table_entry;
+
+    /* Metadata entries */
+    VHDXFileParameters     *mt_file_params;
+    VHDXVirtualDiskSize    *mt_virtual_size;
+    VHDXPage83Data         *mt_page83;
+    VHDXVirtualDiskLogicalSectorSize  *mt_log_sector_size;
+    VHDXVirtualDiskPhysicalSectorSize *mt_phys_sector_size;
+
+    entry_buffer = g_malloc0(sizeof(VHDXFileParameters)               +
+                             sizeof(VHDXVirtualDiskSize)              +
+                             sizeof(VHDXPage83Data)                   +
+                             sizeof(VHDXVirtualDiskLogicalSectorSize) +
+                             sizeof(VHDXVirtualDiskPhysicalSectorSize));
+
+    mt_file_params = entry_buffer;
+    offset += sizeof(VHDXFileParameters);
+    mt_virtual_size = entry_buffer + offset;
+    offset += sizeof(VHDXVirtualDiskSize);
+    mt_page83 = entry_buffer + offset;
+    offset += sizeof(VHDXPage83Data);
+    mt_log_sector_size = entry_buffer + offset;
+    offset += sizeof(VHDXVirtualDiskLogicalSectorSize);
+    mt_phys_sector_size = entry_buffer + offset;
+
+    mt_file_params->block_size = cpu_to_le32(block_size);
+    if (type == VHDX_TYPE_FIXED) {
+        mt_file_params->data_bits |= VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED;
+        cpu_to_le32s(&mt_file_params->data_bits);
+    }
+
+    vhdx_guid_generate(&mt_page83->page_83_data);
+    cpu_to_leguids(&mt_page83->page_83_data);
+    mt_virtual_size->virtual_disk_size        = cpu_to_le64(image_size);
+    mt_log_sector_size->logical_sector_size   = cpu_to_le32(sector_size);
+    mt_phys_sector_size->physical_sector_size = cpu_to_le32(sector_size);
+
+    buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE);
+    md_table = buffer;
+
+    md_table->signature   = VHDX_METADATA_SIGNATURE;
+    md_table->entry_count = 5;
+    vhdx_metadata_header_le_export(md_table);
+
+
+    /* This will reference beyond the reserved table portion */
+    offset = 64 * KiB;
+
+    md_table_entry = buffer + sizeof(VHDXMetadataTableHeader);
+
+    md_table_entry[0].item_id = file_param_guid;
+    md_table_entry[0].offset  = offset;
+    md_table_entry[0].length  = sizeof(VHDXFileParameters);
+    md_table_entry[0].data_bits |= VHDX_META_FLAGS_IS_REQUIRED;
+    offset += md_table_entry[0].length;
+    vhdx_metadata_entry_le_export(&md_table_entry[0]);
+
+    md_table_entry[1].item_id = virtual_size_guid;
+    md_table_entry[1].offset  = offset;
+    md_table_entry[1].length  = sizeof(VHDXVirtualDiskSize);
+    md_table_entry[1].data_bits |= VHDX_META_FLAGS_IS_REQUIRED |
+                                   VHDX_META_FLAGS_IS_VIRTUAL_DISK;
+    offset += md_table_entry[1].length;
+    vhdx_metadata_entry_le_export(&md_table_entry[1]);
+
+    md_table_entry[2].item_id = page83_guid;
+    md_table_entry[2].offset  = offset;
+    md_table_entry[2].length  = sizeof(VHDXPage83Data);
+    md_table_entry[2].data_bits |= VHDX_META_FLAGS_IS_REQUIRED |
+                                   VHDX_META_FLAGS_IS_VIRTUAL_DISK;
+    offset += md_table_entry[2].length;
+    vhdx_metadata_entry_le_export(&md_table_entry[2]);
+
+    md_table_entry[3].item_id = logical_sector_guid;
+    md_table_entry[3].offset  = offset;
+    md_table_entry[3].length  = sizeof(VHDXVirtualDiskLogicalSectorSize);
+    md_table_entry[3].data_bits |= VHDX_META_FLAGS_IS_REQUIRED |
+                                   VHDX_META_FLAGS_IS_VIRTUAL_DISK;
+    offset += md_table_entry[3].length;
+    vhdx_metadata_entry_le_export(&md_table_entry[3]);
+
+    md_table_entry[4].item_id = phys_sector_guid;
+    md_table_entry[4].offset  = offset;
+    md_table_entry[4].length  = sizeof(VHDXVirtualDiskPhysicalSectorSize);
+    md_table_entry[4].data_bits |= VHDX_META_FLAGS_IS_REQUIRED |
+                                   VHDX_META_FLAGS_IS_VIRTUAL_DISK;
+    vhdx_metadata_entry_le_export(&md_table_entry[4]);
+
+    ret = bdrv_pwrite(bs, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer,
+                      VHDX_HEADER_BLOCK_SIZE);
+    if (ret < 0) {
+        goto exit;
+    }
+
+
+exit:
+    g_free(buffer);
+    g_free(entry_buffer);
+    return ret;
+}
+
+/* This create the actual BAT itself.  We currently only support
+ * 'Dynamic' and 'Fixed' image types.
+ *
+ *  Dynamic images: default state of the BAT is all zeroes.
+ *
+ *  Fixed images: default state of the BAT is fully populated, with
+ *                file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT.
+ */
+static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
+                           uint64_t image_size, VHDXImageType type,
+                           bool use_zero_blocks, VHDXRegionTableEntry *rt_bat)
+{
+    int ret = 0;
+    uint64_t data_file_offset;
+    uint64_t total_sectors = 0;
+    uint64_t sector_num = 0;
+    uint64_t unused;
+    int block_state;
+    VHDXSectorInfo sinfo;
+
+    assert(s->bat == NULL);
+
+    /* this gives a data start after BAT/bitmap entries, and well
+     * past any metadata entries (with a 4 MB buffer for future
+     * expansion */
+    data_file_offset = rt_bat->file_offset + rt_bat->length + 5 * MiB;
+    total_sectors = image_size >> s->logical_sector_size_bits;
+
+    if (type == VHDX_TYPE_DYNAMIC) {
+        /* All zeroes, so we can just extend the file - the end of the BAT
+         * is the furthest thing we have written yet */
+        ret = bdrv_truncate(bs, data_file_offset);
+        if (ret < 0) {
+            goto exit;
+        }
+    } else if (type == VHDX_TYPE_FIXED) {
+        ret = bdrv_truncate(bs, data_file_offset + image_size);
+        if (ret < 0) {
+            goto exit;
+        }
+    } else {
+        ret = -ENOTSUP;
+        goto exit;
+    }
+
+    if (type == VHDX_TYPE_FIXED ||
+                use_zero_blocks ||
+                bdrv_has_zero_init(bs) == 0) {
+        /* for a fixed file, the default BAT entry is not zero */
+        s->bat = g_malloc0(rt_bat->length);
+        block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT :
+                                                PAYLOAD_BLOCK_NOT_PRESENT;
+        block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state;
+        /* fill the BAT by emulating sector writes of sectors_per_block size */
+        while (sector_num < total_sectors) {
+            vhdx_block_translate(s, sector_num, s->sectors_per_block, &sinfo);
+            sinfo.file_offset = data_file_offset +
+                                (sector_num << s->logical_sector_size_bits);
+            sinfo.file_offset = ROUND_UP(sinfo.file_offset, MiB);
+            vhdx_update_bat_table_entry(bs, s, &sinfo, &unused, &unused,
+                                        block_state);
+            cpu_to_le64s(&s->bat[sinfo.bat_idx]);
+            sector_num += s->sectors_per_block;
+        }
+        ret = bdrv_pwrite(bs, rt_bat->file_offset, s->bat, rt_bat->length);
+        if (ret < 0) {
+            goto exit;
+        }
+    }
+
+
+
+exit:
+    g_free(s->bat);
+    return ret;
+}
+
+/* Creates the region table header, and region table entries.
+ * There are 2 supported region table entries: BAT, and Metadata/
+ *
+ * As the calculations for the BAT region table are also needed
+ * to create the BAT itself, we will also cause the BAT to be
+ * created.
+ */
+static int vhdx_create_new_region_table(BlockDriverState *bs,
+                                        uint64_t image_size,
+                                        uint32_t block_size,
+                                        uint32_t sector_size,
+                                        uint32_t log_size,
+                                        bool use_zero_blocks,
+                                        VHDXImageType type,
+                                        uint64_t *metadata_offset)
+{
+    int ret = 0;
+    uint32_t offset = 0;
+    void *buffer = NULL;
+    BDRVVHDXState *s = NULL;
+    VHDXRegionTableHeader *region_table;
+    VHDXRegionTableEntry *rt_bat;
+    VHDXRegionTableEntry *rt_metadata;
+
+    assert(metadata_offset != NULL);
+
+    /* Populate enough of the BDRVVHDXState to be able to use the
+     * pre-existing BAT calculation, translation, and update functions */
+    s = g_malloc0(sizeof(BDRVVHDXState));
+
+    s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
+                     (uint64_t) sector_size / (uint64_t) block_size;
+
+    s->sectors_per_block = block_size / sector_size;
+    s->virtual_disk_size = image_size;
+    s->block_size = block_size;
+    s->logical_sector_size = sector_size;
+
+    vhdx_set_shift_bits(s);
+
+    vhdx_calc_bat_entries(s);
+
+    /* At this point the VHDX state is populated enough for creation */
+
+    /* a single buffer is used so we can calculate the checksum over the
+     * entire 64KB block */
+    buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE);
+    region_table = buffer;
+    offset += sizeof(VHDXRegionTableHeader);
+    rt_bat = buffer + offset;
+    offset += sizeof(VHDXRegionTableEntry);
+    rt_metadata  = buffer + offset;
+
+    region_table->signature = VHDX_REGION_SIGNATURE;
+    region_table->entry_count = 2;   /* BAT and Metadata */
+
+    rt_bat->guid        = bat_guid;
+    rt_bat->length      = ROUND_UP(s->bat_entries * sizeof(VHDXBatEntry), MiB);
+    rt_bat->file_offset = ROUND_UP(VHDX_HEADER_SECTION_END + log_size, MiB);
+    s->bat_offset = rt_bat->file_offset;
+
+    rt_metadata->guid        = metadata_guid;
+    rt_metadata->file_offset = ROUND_UP(rt_bat->file_offset + rt_bat->length,
+                                        MiB);
+    rt_metadata->length      = 1 * MiB; /* min size, and more than enough */
+    *metadata_offset = rt_metadata->file_offset;
+
+    vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE,
+                         offsetof(VHDXRegionTableHeader, checksum));
+
+
+    /* The region table gives us the data we need to create the BAT,
+     * so do that now */
+    ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks, rt_bat);
+
+    /* Now write out the region headers to disk */
+    vhdx_region_header_le_export(region_table);
+    vhdx_region_entry_le_export(rt_bat);
+    vhdx_region_entry_le_export(rt_metadata);
+
+    ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer,
+                      VHDX_HEADER_BLOCK_SIZE);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    ret = bdrv_pwrite(bs, VHDX_REGION_TABLE2_OFFSET, buffer,
+                      VHDX_HEADER_BLOCK_SIZE);
+    if (ret < 0) {
+        goto exit;
+    }
+
+
+exit:
+    g_free(s);
+    g_free(buffer);
+    return ret;
+}
+
+/* We need to create the following elements:
+ *
+ *    .-----------------------------------------------------------------.
+ *    |   (A)    |   (B)    |    (C)    |     (D)       |     (E)       |
+ *    |  File ID |  Header1 |  Header 2 |  Region Tbl 1 |  Region Tbl 2 |
+ *    |          |          |           |               |               |
+ *    .-----------------------------------------------------------------.
+ *    0         64KB      128KB       192KB           256KB           320KB
+ *
+ *
+ *    .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
+ *    |     (F)     |     (G)       |    (H)    |                        |
+ *    | Journal Log |  BAT / Bitmap |  Metadata |  .... data ......      |
+ *    |             |               |           |                        |
+ *    .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
+ *   1MB
+ */
+static int vhdx_create(const char *filename, QEMUOptionParameter *options,
+                       Error **errp)
+{
+    int ret = 0;
+    uint64_t image_size = (uint64_t) 2 * GiB;
+    uint32_t log_size   = 1 * MiB;
+    uint32_t block_size = 0;
+    uint64_t signature;
+    uint64_t metadata_offset;
+    bool use_zero_blocks = false;
+
+    gunichar2 *creator = NULL;
+    glong creator_items;
+    BlockDriverState *bs;
+    const char *type = NULL;
+    VHDXImageType image_type;
+    Error *local_err = NULL;
+
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            image_size = options->value.n;
+        } else if (!strcmp(options->name, VHDX_BLOCK_OPT_LOG_SIZE)) {
+            log_size = options->value.n;
+        } else if (!strcmp(options->name, VHDX_BLOCK_OPT_BLOCK_SIZE)) {
+            block_size = options->value.n;
+        } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
+            type = options->value.s;
+        } else if (!strcmp(options->name, VHDX_BLOCK_OPT_ZERO)) {
+            use_zero_blocks = options->value.n != 0;
+        }
+        options++;
+    }
+
+    if (image_size > VHDX_MAX_IMAGE_SIZE) {
+        error_setg_errno(errp, EINVAL, "Image size too large; max of 64TB");
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    if (type == NULL) {
+        type = "dynamic";
+    }
+
+    if (!strcmp(type, "dynamic")) {
+        image_type = VHDX_TYPE_DYNAMIC;
+    } else if (!strcmp(type, "fixed")) {
+        image_type = VHDX_TYPE_FIXED;
+    } else if (!strcmp(type, "differencing")) {
+        error_setg_errno(errp, ENOTSUP,
+                         "Differencing files not yet supported");
+        ret = -ENOTSUP;
+        goto exit;
+    } else {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    /* These are pretty arbitrary, and mainly designed to keep the BAT
+     * size reasonable to load into RAM */
+    if (block_size == 0) {
+        if (image_size > 32 * TiB) {
+            block_size = 64 * MiB;
+        } else if (image_size > (uint64_t) 100 * GiB) {
+            block_size = 32 * MiB;
+        } else if (image_size > 1 * GiB) {
+            block_size = 16 * MiB;
+        } else {
+            block_size = 8 * MiB;
+        }
+    }
+
+
+    /* make the log size close to what was specified, but must be
+     * min 1MB, and multiple of 1MB */
+    log_size = ROUND_UP(log_size, MiB);
+
+    block_size = ROUND_UP(block_size, MiB);
+    block_size = block_size > VHDX_BLOCK_SIZE_MAX ? VHDX_BLOCK_SIZE_MAX :
+                                                    block_size;
+
+    ret = bdrv_create_file(filename, options, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        goto exit;
+    }
+
+    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        goto exit;
+    }
+
+    /* Create (A) */
+
+    /* The creator field is optional, but may be useful for
+     * debugging / diagnostics */
+    creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
+                              &creator_items, NULL);
+    signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
+    bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+    if (ret < 0) {
+        goto delete_and_exit;
+    }
+    if (creator) {
+        bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature), creator,
+                    creator_items * sizeof(gunichar2));
+        if (ret < 0) {
+            goto delete_and_exit;
+        }
+    }
+
+
+    /* Creates (B),(C) */
+    ret = vhdx_create_new_headers(bs, image_size, log_size);
+    if (ret < 0) {
+        goto delete_and_exit;
+    }
+
+    /* Creates (D),(E),(G) explicitly. (F) created as by-product */
+    ret = vhdx_create_new_region_table(bs, image_size, block_size, 512,
+                                       log_size, use_zero_blocks, image_type,
+                                       &metadata_offset);
+    if (ret < 0) {
+        goto delete_and_exit;
+    }
+
+    /* Creates (H) */
+    ret = vhdx_create_new_metadata(bs, image_size, block_size, 512,
+                                   metadata_offset, image_type);
+    if (ret < 0) {
+        goto delete_and_exit;
+    }
+
+
+
+delete_and_exit:
+    bdrv_unref(bs);
+exit:
+    g_free(creator);
+    return ret;
+}
+
+static QEMUOptionParameter vhdx_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size; max of 64TB."
+    },
+    {
+        .name = VHDX_BLOCK_OPT_LOG_SIZE,
+        .type = OPT_SIZE,
+        .value.n = 1 * MiB,
+        .help = "Log size; min 1MB."
+    },
+    {
+        .name = VHDX_BLOCK_OPT_BLOCK_SIZE,
+        .type = OPT_SIZE,
+        .value.n = 0,
+        .help = "Block Size; min 1MB, max 256MB. " \
+                "0 means auto-calculate based on image size."
+    },
+    {
+        .name = BLOCK_OPT_SUBFMT,
+        .type = OPT_STRING,
+        .help = "VHDX format type, can be either 'dynamic' or 'fixed'. "\
+                "Default is 'dynamic'."
+    },
+    {
+        .name = VHDX_BLOCK_OPT_ZERO,
+        .type = OPT_FLAG,
+        .help = "Force use of payload blocks of type 'ZERO'.  Non-standard."
+    },
+    { NULL }
+};
+
 static BlockDriver bdrv_vhdx = {
     .format_name            = "vhdx",
     .instance_size          = sizeof(BDRVVHDXState),
@@ -973,6 +1884,9 @@
     .bdrv_reopen_prepare    = vhdx_reopen_prepare,
     .bdrv_co_readv          = vhdx_co_readv,
     .bdrv_co_writev         = vhdx_co_writev,
+    .bdrv_create            = vhdx_create,
+
+    .create_options         = vhdx_create_options,
 };
 
 static void bdrv_vhdx_init(void)
diff --git a/block/vhdx.h b/block/vhdx.h
index fb687ed..51183b2 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -6,9 +6,9 @@
  * Authors:
  *  Jeff Cody <jcody@redhat.com>
  *
- *  This is based on the "VHDX Format Specification v0.95", published 4/12/2012
+ *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
  *  by Microsoft:
- *      https://www.microsoft.com/en-us/download/details.aspx?id=29681
+ *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
  *
  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
  * See the COPYING.LIB file in the top-level directory.
@@ -18,6 +18,11 @@
 #ifndef BLOCK_VHDX_H
 #define BLOCK_VHDX_H
 
+#define KiB              (1 * 1024)
+#define MiB            (KiB * 1024)
+#define GiB            (MiB * 1024)
+#define TiB ((uint64_t) GiB * 1024)
+
 /* Structures and fields present in the VHDX file */
 
 /* The header section has the following blocks,
@@ -30,14 +35,15 @@
  * 0.........64KB...........128KB........192KB..........256KB................1MB
  */
 
-#define VHDX_HEADER_BLOCK_SIZE      (64*1024)
+#define VHDX_HEADER_BLOCK_SIZE      (64 * 1024)
 
 #define VHDX_FILE_ID_OFFSET         0
-#define VHDX_HEADER1_OFFSET         (VHDX_HEADER_BLOCK_SIZE*1)
-#define VHDX_HEADER2_OFFSET         (VHDX_HEADER_BLOCK_SIZE*2)
-#define VHDX_REGION_TABLE_OFFSET    (VHDX_HEADER_BLOCK_SIZE*3)
+#define VHDX_HEADER1_OFFSET         (VHDX_HEADER_BLOCK_SIZE * 1)
+#define VHDX_HEADER2_OFFSET         (VHDX_HEADER_BLOCK_SIZE * 2)
+#define VHDX_REGION_TABLE_OFFSET    (VHDX_HEADER_BLOCK_SIZE * 3)
+#define VHDX_REGION_TABLE2_OFFSET   (VHDX_HEADER_BLOCK_SIZE * 4)
 
-
+#define VHDX_HEADER_SECTION_END     (1 * MiB)
 /*
  * A note on the use of MS-GUID fields.  For more details on the GUID,
  * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier.
@@ -55,10 +61,11 @@
 /* These structures are ones that are defined in the VHDX specification
  * document */
 
+#define VHDX_FILE_SIGNATURE 0x656C696678646876  /* "vhdxfile" in ASCII */
 typedef struct VHDXFileIdentifier {
     uint64_t    signature;              /* "vhdxfile" in ASCII */
     uint16_t    creator[256];           /* optional; utf-16 string to identify
-                                           the vhdx file creator.  Diagnotistic
+                                           the vhdx file creator.  Diagnostic
                                            only */
 } VHDXFileIdentifier;
 
@@ -67,7 +74,7 @@
  * Microsoft is not just 16 bytes though - it is a structure that is defined,
  * so we need to follow it here so that endianness does not trip us up */
 
-typedef struct MSGUID {
+typedef struct QEMU_PACKED MSGUID {
     uint32_t  data1;
     uint16_t  data2;
     uint16_t  data3;
@@ -77,14 +84,15 @@
 #define guid_eq(a, b) \
     (memcmp(&(a), &(b), sizeof(MSGUID)) == 0)
 
-#define VHDX_HEADER_SIZE (4*1024)   /* although the vhdx_header struct in disk
-                                       is only 582 bytes, for purposes of crc
-                                       the header is the first 4KB of the 64KB
-                                       block */
+#define VHDX_HEADER_SIZE (4 * 1024)   /* although the vhdx_header struct in disk
+                                         is only 582 bytes, for purposes of crc
+                                         the header is the first 4KB of the 64KB
+                                         block */
 
 /* The full header is 4KB, although the actual header data is much smaller.
  * But for the checksum calculation, it is over the entire 4KB structure,
  * not just the defined portion of it */
+#define VHDX_HEADER_SIGNATURE 0x64616568
 typedef struct QEMU_PACKED VHDXHeader {
     uint32_t    signature;              /* "head" in ASCII */
     uint32_t    checksum;               /* CRC-32C hash of the whole header */
@@ -92,7 +100,7 @@
                                            VHDX file has 2 of these headers,
                                            and only the header with the highest
                                            sequence number is valid */
-    MSGUID      file_write_guid;       /* 128 bit unique identifier. Must be
+    MSGUID      file_write_guid;        /* 128 bit unique identifier. Must be
                                            updated to new, unique value before
                                            the first modification is made to
                                            file */
@@ -114,9 +122,9 @@
                                            there is no valid log. If non-zero,
                                            log entries with this guid are
                                            valid. */
-    uint16_t    log_version;            /* version of the log format. Mustn't be
-                                           zero, unless log_guid is also zero */
-    uint16_t    version;                /* version of th evhdx file.  Currently,
+    uint16_t    log_version;            /* version of the log format. Must be
+                                           set to zero */
+    uint16_t    version;                /* version of the vhdx file.  Currently,
                                            only supported version is "1" */
     uint32_t    log_length;             /* length of the log.  Must be multiple
                                            of 1MB */
@@ -125,6 +133,7 @@
 } VHDXHeader;
 
 /* Header for the region table block */
+#define VHDX_REGION_SIGNATURE  0x69676572  /* "regi" in ASCII */
 typedef struct QEMU_PACKED VHDXRegionTableHeader {
     uint32_t    signature;              /* "regi" in ASCII */
     uint32_t    checksum;               /* CRC-32C hash of the 64KB table */
@@ -151,7 +160,10 @@
 
 
 /* ---- LOG ENTRY STRUCTURES ---- */
+#define VHDX_LOG_MIN_SIZE (1024 * 1024)
+#define VHDX_LOG_SECTOR_SIZE 4096
 #define VHDX_LOG_HDR_SIZE 64
+#define VHDX_LOG_SIGNATURE 0x65676f6c
 typedef struct QEMU_PACKED VHDXLogEntryHeader {
     uint32_t    signature;              /* "loge" in ASCII */
     uint32_t    checksum;               /* CRC-32C hash of the 64KB table */
@@ -174,7 +186,8 @@
 } VHDXLogEntryHeader;
 
 #define VHDX_LOG_DESC_SIZE 32
-
+#define VHDX_LOG_DESC_SIGNATURE 0x63736564
+#define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a
 typedef struct QEMU_PACKED VHDXLogDescriptor {
     uint32_t    signature;              /* "zero" or "desc" in ASCII */
     union  {
@@ -194,6 +207,7 @@
                                            vhdx_log_entry_header */
 } VHDXLogDescriptor;
 
+#define VHDX_LOG_DATA_SIGNATURE 0x61746164
 typedef struct QEMU_PACKED VHDXLogDataSector {
     uint32_t    data_signature;         /* "data" in ASCII */
     uint32_t    sequence_high;          /* 4 MSB of 8 byte sequence_number */
@@ -212,19 +226,19 @@
 #define PAYLOAD_BLOCK_UNDEFINED         1
 #define PAYLOAD_BLOCK_ZERO              2
 #define PAYLOAD_BLOCK_UNMAPPED          5
-#define PAYLOAD_BLOCK_FULL_PRESENT      6
+#define PAYLOAD_BLOCK_FULLY_PRESENT     6
 #define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7
 
 #define SB_BLOCK_NOT_PRESENT    0
 #define SB_BLOCK_PRESENT        6
 
 /* per the spec */
-#define VHDX_MAX_SECTORS_PER_BLOCK  (1<<23)
+#define VHDX_MAX_SECTORS_PER_BLOCK  (1 << 23)
 
 /* upper 44 bits are the file offset in 1MB units lower 3 bits are the state
    other bits are reserved */
 #define VHDX_BAT_STATE_BIT_MASK 0x07
-#define VHDX_BAT_FILE_OFF_BITS (64-44)
+#define VHDX_BAT_FILE_OFF_MASK  0xFFFFFFFFFFF00000 /* upper 44 bits */
 typedef uint64_t VHDXBatEntry;
 
 /* ---- METADATA REGION STRUCTURES ---- */
@@ -233,6 +247,7 @@
 #define VHDX_METADATA_MAX_ENTRIES 2047  /* not including the header */
 #define VHDX_METADATA_TABLE_MAX_SIZE \
     (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1))
+#define VHDX_METADATA_SIGNATURE 0x617461646174656D  /* "metadata" in ASCII */
 typedef struct QEMU_PACKED VHDXMetadataTableHeader {
     uint64_t    signature;              /* "metadata" in ASCII */
     uint16_t    reserved;
@@ -252,8 +267,8 @@
                                            metadata region */
                                         /* note: if length = 0, so is offset */
     uint32_t    length;                 /* length of metadata. <= 1MB. */
-    uint32_t    data_bits;      /* least-significant 3 bits are flags, the
-                                   rest are reserved (see above) */
+    uint32_t    data_bits;              /* least-significant 3 bits are flags,
+                                           the rest are reserved (see above) */
     uint32_t    reserved2;
 } VHDXMetadataTableEntry;
 
@@ -262,13 +277,16 @@
                                                    If set indicates a fixed
                                                    size VHDX file */
 #define VHDX_PARAMS_HAS_PARENT           0x02    /* has parent / backing file */
+#define VHDX_BLOCK_SIZE_MIN             (1   * MiB)
+#define VHDX_BLOCK_SIZE_MAX             (256 * MiB)
 typedef struct QEMU_PACKED VHDXFileParameters {
     uint32_t    block_size;             /* size of each payload block, always
                                            power of 2, <= 256MB and >= 1MB. */
-    uint32_t data_bits;     /* least-significant 2 bits are flags, the rest
-                               are reserved (see above) */
+    uint32_t data_bits;                 /* least-significant 2 bits are flags,
+                                           the rest are reserved (see above) */
 } VHDXFileParameters;
 
+#define VHDX_MAX_IMAGE_SIZE  ((uint64_t) 64 * TiB)
 typedef struct QEMU_PACKED VHDXVirtualDiskSize {
     uint64_t    virtual_disk_size;      /* Size of the virtual disk, in bytes.
                                            Must be multiple of the sector size,
@@ -276,7 +294,7 @@
 } VHDXVirtualDiskSize;
 
 typedef struct QEMU_PACKED VHDXPage83Data {
-    MSGUID      page_83_data[16];       /* unique id for scsi devices that
+    MSGUID      page_83_data;           /* unique id for scsi devices that
                                            support page 0x83 */
 } VHDXPage83Data;
 
@@ -291,7 +309,7 @@
 } VHDXVirtualDiskPhysicalSectorSize;
 
 typedef struct QEMU_PACKED VHDXParentLocatorHeader {
-    MSGUID      locator_type[16];       /* type of the parent virtual disk. */
+    MSGUID      locator_type;           /* type of the parent virtual disk. */
     uint16_t    reserved;
     uint16_t    key_value_count;        /* number of key/value pairs for this
                                            locator */
@@ -308,18 +326,122 @@
 
 /* ----- END VHDX SPECIFICATION STRUCTURES ---- */
 
+typedef struct VHDXMetadataEntries {
+    VHDXMetadataTableEntry file_parameters_entry;
+    VHDXMetadataTableEntry virtual_disk_size_entry;
+    VHDXMetadataTableEntry page83_data_entry;
+    VHDXMetadataTableEntry logical_sector_size_entry;
+    VHDXMetadataTableEntry phys_sector_size_entry;
+    VHDXMetadataTableEntry parent_locator_entry;
+    uint16_t present;
+} VHDXMetadataEntries;
 
+typedef struct VHDXLogEntries {
+    uint64_t offset;
+    uint64_t length;
+    uint32_t write;
+    uint32_t read;
+    VHDXLogEntryHeader *hdr;
+    void *desc_buffer;
+    uint64_t sequence;
+    uint32_t tail;
+} VHDXLogEntries;
+
+typedef struct VHDXRegionEntry {
+    uint64_t start;
+    uint64_t end;
+    QLIST_ENTRY(VHDXRegionEntry) entries;
+} VHDXRegionEntry;
+
+typedef struct BDRVVHDXState {
+    CoMutex lock;
+
+    int curr_header;
+    VHDXHeader *headers[2];
+
+    VHDXRegionTableHeader rt;
+    VHDXRegionTableEntry bat_rt;         /* region table for the BAT */
+    VHDXRegionTableEntry metadata_rt;    /* region table for the metadata */
+
+    VHDXMetadataTableHeader metadata_hdr;
+    VHDXMetadataEntries metadata_entries;
+
+    VHDXFileParameters params;
+    uint32_t block_size;
+    uint32_t block_size_bits;
+    uint32_t sectors_per_block;
+    uint32_t sectors_per_block_bits;
+
+    uint64_t virtual_disk_size;
+    uint32_t logical_sector_size;
+    uint32_t physical_sector_size;
+
+    uint64_t chunk_ratio;
+    uint32_t chunk_ratio_bits;
+    uint32_t logical_sector_size_bits;
+
+    uint32_t bat_entries;
+    VHDXBatEntry *bat;
+    uint64_t bat_offset;
+
+    bool first_visible_write;
+    MSGUID session_guid;
+
+    VHDXLogEntries log;
+
+    VHDXParentLocatorHeader parent_header;
+    VHDXParentLocatorEntry *parent_entries;
+
+    Error *migration_blocker;
+
+    QLIST_HEAD(VHDXRegionHead, VHDXRegionEntry) regions;
+} BDRVVHDXState;
+
+void vhdx_guid_generate(MSGUID *guid);
+
+int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw,
+                        MSGUID *log_guid);
+
+uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset);
 uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
                             int crc_offset);
 
 bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
 
+int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed);
 
-static void leguid_to_cpus(MSGUID *guid)
+int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
+                             void *data, uint32_t length, uint64_t offset);
+
+static inline void leguid_to_cpus(MSGUID *guid)
 {
     le32_to_cpus(&guid->data1);
     le16_to_cpus(&guid->data2);
     le16_to_cpus(&guid->data3);
 }
 
+static inline void cpu_to_leguids(MSGUID *guid)
+{
+    cpu_to_le32s(&guid->data1);
+    cpu_to_le16s(&guid->data2);
+    cpu_to_le16s(&guid->data3);
+}
+
+void vhdx_header_le_import(VHDXHeader *h);
+void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
+void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
+void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
+void vhdx_log_data_le_export(VHDXLogDataSector *d);
+void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
+void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
+void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr);
+void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr);
+void vhdx_region_entry_le_import(VHDXRegionTableEntry *e);
+void vhdx_region_entry_le_export(VHDXRegionTableEntry *e);
+void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr);
+void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr);
+void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e);
+void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e);
+int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s);
+
 #endif
diff --git a/block/vpc.c b/block/vpc.c
index 627d11c..577cc45 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -211,6 +211,15 @@
     bs->total_sectors = (int64_t)
         be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 
+    /* images created with disk2vhd report a far higher virtual size
+     * than expected with the cyls * heads * sectors_per_cyl formula.
+     * use the footer->size instead if the image was created with
+     * disk2vhd.
+     */
+    if (!strncmp(footer->creator_app, "d2v", 4)) {
+        bs->total_sectors = be64_to_cpu(footer->size) / BDRV_SECTOR_SIZE;
+    }
+
     /* Allow a maximum disk size of approximately 2 TB */
     if (bs->total_sectors >= 65535LL * 255 * 255) {
         ret = -EFBIG;
diff --git a/blockdev.c b/blockdev.c
index b260477..86e6bff 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -341,7 +341,7 @@
     qemu_opts_absorb_qdict(opts, bs_opts, &error);
     if (error_is_set(&error)) {
         error_propagate(errp, error);
-        return NULL;
+        goto early_err;
     }
 
     if (id) {
@@ -361,7 +361,7 @@
     if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
         if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) {
             error_setg(errp, "invalid discard option");
-            return NULL;
+            goto early_err;
         }
     }
 
@@ -383,7 +383,7 @@
             /* this is the default */
         } else {
            error_setg(errp, "invalid aio option");
-           return NULL;
+           goto early_err;
         }
     }
 #endif
@@ -393,13 +393,13 @@
             error_printf("Supported formats:");
             bdrv_iterate_format(bdrv_format_print, NULL);
             error_printf("\n");
-            return NULL;
+            goto early_err;
         }
 
         drv = bdrv_find_format(buf);
         if (!drv) {
             error_setg(errp, "'%s' invalid format", buf);
-            return NULL;
+            goto early_err;
         }
     }
 
@@ -435,20 +435,20 @@
 
     if (!check_throttle_config(&cfg, &error)) {
         error_propagate(errp, error);
-        return NULL;
+        goto early_err;
     }
 
     on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
     if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
         if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
             error_setg(errp, "werror is not supported by this bus type");
-            return NULL;
+            goto early_err;
         }
 
         on_write_error = parse_block_error_action(buf, 0, &error);
         if (error_is_set(&error)) {
             error_propagate(errp, error);
-            return NULL;
+            goto early_err;
         }
     }
 
@@ -456,13 +456,13 @@
     if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
         if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) {
             error_report("rerror is not supported by this bus type");
-            return NULL;
+            goto early_err;
         }
 
         on_read_error = parse_block_error_action(buf, 1, &error);
         if (error_is_set(&error)) {
             error_propagate(errp, error);
-            return NULL;
+            goto early_err;
         }
     }
 
@@ -491,6 +491,8 @@
         if (has_driver_specific_opts) {
             file = NULL;
         } else {
+            QDECREF(bs_opts);
+            qemu_opts_del(opts);
             return dinfo;
         }
     }
@@ -529,12 +531,13 @@
     return dinfo;
 
 err:
-    qemu_opts_del(opts);
-    QDECREF(bs_opts);
     bdrv_unref(dinfo->bdrv);
     g_free(dinfo->id);
     QTAILQ_REMOVE(&drives, dinfo, next);
     g_free(dinfo);
+early_err:
+    QDECREF(bs_opts);
+    qemu_opts_del(opts);
     return NULL;
 }
 
diff --git a/configure b/configure
index 9addff1..9a02610 100755
--- a/configure
+++ b/configure
@@ -27,6 +27,19 @@
 echo >> config.log
 echo "#" >> config.log
 
+# Save the configure command line for later reuse.
+cat <<EOD >config.status
+#!/bin/sh
+# Generated by configure.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+EOD
+printf "exec" >>config.status
+printf " '%s'" "$0" "$@" >>config.status
+echo >>config.status
+chmod +x config.status
+
 error_exit() {
     echo
     echo "ERROR: $1"
@@ -247,6 +260,7 @@
 gtkabi="2.0"
 tpm="no"
 libssh2=""
+vhdx=""
 
 # parse CC options first
 for opt do
@@ -972,6 +986,10 @@
   ;;
   --enable-libssh2) libssh2="yes"
   ;;
+  --enable-vhdx) vhdx="yes"
+  ;;
+  --disable-vhdx) vhdx="no"
+  ;;
   *) echo "ERROR: unknown option $opt"; show_help="yes"
   ;;
   esac
@@ -1204,6 +1222,8 @@
 echo "  --enable-tpm             enable TPM support"
 echo "  --disable-libssh2        disable ssh block device support"
 echo "  --enable-libssh2         enable ssh block device support"
+echo "  --disable-vhdx           disables support for the Microsoft VHDX image format"
+echo "  --enable-vhdx            enable support for the Microsoft VHDX image format"
 echo ""
 echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
@@ -2004,6 +2024,18 @@
   fi
 fi
 
+if test "$vhdx" = "yes" ; then
+    if test "$uuid" = "no" ; then
+        error_exit "uuid required for VHDX support"
+    fi
+elif test "$vhdx" != "no" ; then
+    if test "$uuid" = "yes" ; then
+        vhdx=yes
+    else
+        vhdx=no
+    fi
+fi
+
 ##########################################
 # xfsctl() probe, used for raw-posix
 if test "$xfs" != "no" ; then
@@ -3747,6 +3779,7 @@
 echo "libssh2 support   $libssh2"
 echo "TPM passthrough   $tpm_passthrough"
 echo "QOM debugging     $qom_cast_debug"
+echo "vhdx              $vhdx"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -3757,8 +3790,6 @@
 echo "# Automatically generated by configure - do not modify" >config-all-disas.mak
 
 echo "# Automatically generated by configure - do not modify" > $config_host_mak
-printf "# Configured with:" >> $config_host_mak
-printf " '%s'" "$0" "$@" >> $config_host_mak
 echo >> $config_host_mak
 
 echo all: >> $config_host_mak
@@ -4141,6 +4172,10 @@
   echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak
 fi
 
+if test "$vhdx" = "yes" ; then
+  echo "CONFIG_VHDX=y" >> $config_host_mak
+fi
+
 # USB host support
 if test "$libusb" = "yes"; then
   echo "HOST_USB=libusb legacy" >> $config_host_mak
diff --git a/exec.c b/exec.c
index 79610ce..95c4356 100644
--- a/exec.c
+++ b/exec.c
@@ -409,8 +409,10 @@
 #else
 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 {
-    tb_invalidate_phys_addr(cpu_get_phys_page_debug(cpu, pc) |
-            (pc & ~TARGET_PAGE_MASK));
+    hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
+    if (phys != -1) {
+        tb_invalidate_phys_addr(phys | (pc & ~TARGET_PAGE_MASK));
+    }
 }
 #endif
 #endif /* TARGET_HAS_ICE */
@@ -1741,12 +1743,7 @@
 static void memory_map_init(void)
 {
     system_memory = g_malloc(sizeof(*system_memory));
-
-    assert(TARGET_PHYS_ADDR_SPACE_BITS <= 64);
-
-    memory_region_init(system_memory, NULL, "system",
-                       TARGET_PHYS_ADDR_SPACE_BITS == 64 ?
-                       UINT64_MAX : (0x1ULL << TARGET_PHYS_ADDR_SPACE_BITS));
+    memory_region_init(system_memory, NULL, "system", INT64_MAX);
     address_space_init(&address_space_memory, system_memory, "memory");
 
     system_io = g_malloc(sizeof(*system_io));
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 6cfa044..486e705 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1182,6 +1182,11 @@
         return;
     }
 
+    if (!acpi_enabled) {
+        ACPI_BUILD_DPRINTF(3, "ACPI disabled. Bailing out.\n");
+        return;
+    }
+
     build_state = g_malloc0(sizeof *build_state);
 
     build_state->guest_info = guest_info;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index ae51d96..613f144 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -657,7 +657,8 @@
     }
 
     if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
-        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
+        s = iov_to_buf(iov, iov_cnt, 0,
+                       &n->mac_table.macs[n->mac_table.in_use * ETH_ALEN],
                        mac_data.entries * ETH_ALEN);
         if (s != mac_data.entries * ETH_ALEN) {
             goto error;
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index bad3953..edc974e 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -48,6 +48,7 @@
     PCIHostState parent_obj;
     PcPciInfo pci_info;
     uint64_t pci_hole64_size;
+    uint32_t short_root_bus;
 } I440FXState;
 
 #define PIIX_NUM_PIC_IRQS       16      /* i8259 * 2 */
@@ -720,13 +721,19 @@
 static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge,
                                                 PCIBus *rootbus)
 {
+    I440FXState *s = I440FX_PCI_HOST_BRIDGE(host_bridge);
+
     /* For backwards compat with old device paths */
-    return "0000";
+    if (s->short_root_bus) {
+        return "0000";
+    }
+    return "0000:00";
 }
 
 static Property i440fx_props[] = {
     DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, I440FXState,
                      pci_hole64_size, DEFAULT_PCI_HOLE64_SIZE),
+    DEFINE_PROP_UINT32("short_root_bus", I440FXState, short_root_bus, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index b8feed1..c043998 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -61,8 +61,13 @@
 static const char *q35_host_root_bus_path(PCIHostState *host_bridge,
                                           PCIBus *rootbus)
 {
-    /* For backwards compat with old device paths */
-    return "0000";
+    Q35PCIHost *s = Q35_HOST_DEVICE(host_bridge);
+
+     /* For backwards compat with old device paths */
+    if (s->mch.short_root_bus) {
+        return "0000";
+    }
+    return "0000:00";
 }
 
 static void q35_host_get_pci_hole_start(Object *obj, Visitor *v,
@@ -124,6 +129,7 @@
                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT),
     DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, Q35PCIHost,
                      mch.pci_hole64_size, DEFAULT_PCI_HOLE64_SIZE),
+    DEFINE_PROP_UINT32("short_root_bus", Q35PCIHost, mch.short_root_bus, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index a98c8a0..ed32059 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -283,24 +283,6 @@
     return rootbus->qbus.name;
 }
 
-static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
-{
-   return -1ULL;
-}
-
-static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
-                                   unsigned size)
-{
-}
-
-static const MemoryRegionOps master_abort_mem_ops = {
-    .read = master_abort_mem_read,
-    .write = master_abort_mem_write,
-    .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-#define MASTER_ABORT_MEM_PRIORITY INT_MIN
-
 static void pci_bus_init(PCIBus *bus, DeviceState *parent,
                          const char *name,
                          MemoryRegion *address_space_mem,
@@ -312,14 +294,6 @@
     bus->address_space_mem = address_space_mem;
     bus->address_space_io = address_space_io;
 
-
-    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
-                          &master_abort_mem_ops, bus, "pci-master-abort",
-                          memory_region_size(bus->address_space_mem));
-    memory_region_add_subregion_overlap(bus->address_space_mem,
-                                        0, &bus->master_abort_mem,
-                                        MASTER_ABORT_MEM_PRIORITY);
-
     /* host bridge */
     QLIST_INIT(&bus->child);
 
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
index 655e499..67597df 100644
--- a/hw/ppc/ppc440_bamboo.c
+++ b/hw/ppc/ppc440_bamboo.c
@@ -110,8 +110,9 @@
     qemu_devtree_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency",
                               tb_freq);
 
-    ret = rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
+    rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
     g_free(fdt);
+    return 0;
 
 out:
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 03cc0ba..57e8d16 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -260,6 +260,14 @@
             .driver   = "qemu32-" TYPE_X86_CPU,\
             .property = "model",\
             .value    = stringify(3),\
+        },{\
+            .driver   = "i440FX-pcihost",\
+            .property = "short_root_bus",\
+            .value    = stringify(1),\
+        },{\
+            .driver   = "q35-pcihost",\
+            .property = "short_root_bus",\
+            .value    = stringify(1),\
         }
 
 #define PC_COMPAT_1_5 \
@@ -296,6 +304,14 @@
             .driver = TYPE_X86_CPU,\
             .property = "pmu",\
             .value = "on",\
+        },{\
+            .driver   = "i440FX-pcihost",\
+            .property = "short_root_bus",\
+            .value    = stringify(0),\
+        },{\
+            .driver   = "q35-pcihost",\
+            .property = "short_root_bus",\
+            .value    = stringify(0),\
         }
 
 #define PC_COMPAT_1_4 \
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 58eca98..7a23d6b 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -55,7 +55,7 @@
 #define rom_add_file_fixed(_f, _a, _i)          \
     rom_add_file(_f, NULL, _a, _i)
 #define rom_add_blob_fixed(_f, _b, _l, _a)      \
-    (rom_add_blob(_f, _b, _l, _a, NULL, NULL, NULL) ? 0 : -1)
+    rom_add_blob(_f, _b, _l, _a, NULL, NULL, NULL)
 
 #define PC_ROM_MIN_VGA     0xc0000
 #define PC_ROM_MIN_OPTION  0xc8000
diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h
index aee91aa..309065f 100644
--- a/include/hw/pci-host/q35.h
+++ b/include/hw/pci-host/q35.h
@@ -61,6 +61,7 @@
     ram_addr_t above_4g_mem_size;
     uint64_t pci_hole64_size;
     PcGuestInfo *guest_info;
+    uint32_t short_root_bus;
 } MCHPCIState;
 
 typedef struct Q35PCIHost {
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 2ad5edb..9df1788 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -23,7 +23,6 @@
     PCIDevice *parent_dev;
     MemoryRegion *address_space_mem;
     MemoryRegion *address_space_io;
-    MemoryRegion master_abort_mem;
 
     QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
     QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
diff --git a/include/net/eth.h b/include/net/eth.h
index 1d48e06..b3273b8 100644
--- a/include/net/eth.h
+++ b/include/net/eth.h
@@ -84,7 +84,7 @@
 } ip_pseudo_header;
 
 /* IPv6 address */
-struct in6_addr {
+struct in6_address {
     union {
         uint8_t __u6_addr8[16];
     } __in6_u;
@@ -105,8 +105,8 @@
             uint8_t  ip6_un3_ecn;  /* 2 bits ECN, top 6 bits payload length */
         } ip6_un3;
     } ip6_ctlun;
-    struct in6_addr ip6_src;     /* source address */
-    struct in6_addr ip6_dst;     /* destination address */
+    struct in6_address ip6_src;    /* source address */
+    struct in6_address ip6_dst;    /* destination address */
 };
 
 struct ip6_ext_hdr {
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 4a14a43..eaaf00d 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -4870,10 +4870,10 @@
     } else
 #endif
     {
-#if TARGET_ABI_BITS == 64 && !defined(TARGET_ALPHA)
-        struct target_stat *target_st;
-#else
+#if defined(TARGET_HAS_STRUCT_STAT64)
         struct target_stat64 *target_st;
+#else
+        struct target_stat *target_st;
 #endif
 
         if (!lock_user_struct(VERIFY_WRITE, target_st, target_addr, 0))
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 5f53a28..fe540f6 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -1178,6 +1178,7 @@
 /* This matches struct stat64 in glibc2.1, hence the absolutely
  * insane amounts of padding around dev_t's.
  */
+#define TARGET_HAS_STRUCT_STAT64
 struct target_stat64 {
 	unsigned short	st_dev;
 	unsigned char	__pad0[10];
@@ -1213,6 +1214,7 @@
 } QEMU_PACKED;
 
 #ifdef TARGET_ARM
+#define TARGET_HAS_STRUCT_STAT64
 struct target_eabi_stat64 {
         unsigned long long st_dev;
         unsigned int    __pad1;
@@ -1262,6 +1264,7 @@
 	abi_ulong	__unused4[2];
 };
 
+#define TARGET_HAS_STRUCT_STAT64
 struct target_stat64 {
 	unsigned char	__pad0[6];
 	unsigned short	st_dev;
@@ -1317,6 +1320,7 @@
 	abi_ulong	__unused4[2];
 };
 
+#define TARGET_HAS_STRUCT_STAT64
 struct target_stat64 {
 	unsigned char	__pad0[6];
 	unsigned short	st_dev;
@@ -1384,6 +1388,8 @@
 #endif
 };
 
+#if !defined(TARGET_PPC64) || defined(TARGET_ABI32)
+#define TARGET_HAS_STRUCT_STAT64
 struct QEMU_PACKED target_stat64 {
 	unsigned long long st_dev;
         unsigned long long st_ino;
@@ -1406,6 +1412,7 @@
         unsigned int   __unused4;
         unsigned int   __unused5;
 };
+#endif
 
 #elif defined(TARGET_MICROBLAZE)
 
@@ -1431,6 +1438,7 @@
 };
 
 /* FIXME: Microblaze no-mmu user-space has a difference stat64 layout...  */
+#define TARGET_HAS_STRUCT_STAT64
 struct QEMU_PACKED target_stat64 {
 	uint64_t st_dev;
 #define TARGET_STAT64_HAS_BROKEN_ST_INO 1
@@ -1486,6 +1494,7 @@
 /* This matches struct stat64 in glibc2.1, hence the absolutely
  * insane amounts of padding around dev_t's.
  */
+#define TARGET_HAS_STRUCT_STAT64
 struct target_stat64 {
 	unsigned long long	st_dev;
 	unsigned char	__pad1[2];
@@ -1594,6 +1603,7 @@
  * struct stat of the 64-bit kernel.
  */
 
+#define TARGET_HAS_STRUCT_STAT64
 struct target_stat64 {
 	unsigned int	st_dev;
 	unsigned int	st_pad0[3];	/* Reserved for st_dev expansion  */
@@ -1665,6 +1675,7 @@
  * struct stat of the 64-bit kernel.
  */
 
+#define TARGET_HAS_STRUCT_STAT64
 struct target_stat64 {
 	abi_ulong	st_dev;
 	abi_ulong	st_pad0[3];	/* Reserved for st_dev expansion  */
@@ -1721,6 +1732,7 @@
        unsigned int    st_gen;
 };
 
+#define TARGET_HAS_STRUCT_STAT64
 struct target_stat64 {
        abi_ulong    st_dev;
        abi_ulong    st_ino;
@@ -1770,6 +1782,7 @@
 /* This matches struct stat64 in glibc2.1, hence the absolutely
  * insane amounts of padding around dev_t's.
  */
+#define TARGET_HAS_STRUCT_STAT64
 struct QEMU_PACKED target_stat64 {
 	unsigned long long	st_dev;
 	unsigned char	__pad0[4];
@@ -1897,6 +1910,7 @@
     unsigned int __unused5;
 };
 
+#define TARGET_HAS_STRUCT_STAT64
 struct target_stat64 {
     uint64_t st_dev;
     uint64_t st_ino;
diff --git a/net/net.c b/net/net.c
index c330c9a..0a88e68 100644
--- a/net/net.c
+++ b/net/net.c
@@ -27,6 +27,7 @@
 #include "clients.h"
 #include "hub.h"
 #include "net/slirp.h"
+#include "net/eth.h"
 #include "util.h"
 
 #include "monitor/monitor.h"
@@ -442,7 +443,6 @@
         if (net_hub_flush(nc->peer)) {
             qemu_notify_event();
         }
-        return;
     }
     if (qemu_net_queue_flush(nc->incoming_queue)) {
         /* We emptied the queue successfully, signal to the IO thread to repoll
@@ -689,6 +689,11 @@
         error_report("invalid syntax for ethernet address");
         return -1;
     }
+    if (nic->has_macaddr &&
+        is_multicast_ether_addr(nd->macaddr.a)) {
+        error_report("NIC cannot have multicast MAC address (odd 1st byte)");
+        return -1;
+    }
     qemu_macaddr_default_if_unset(&nd->macaddr);
 
     if (nic->has_vectors) {
diff --git a/qapi-schema.json b/qapi-schema.json
index 81a375b..76c98a7 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -227,7 +227,7 @@
 ##
 # @ImageInfoSpecificVmdk:
 #
-# @create_type: The create type of VMDK image
+# @create-type: The create type of VMDK image
 #
 # @cid: Content id of image
 #
diff --git a/qemu.nsi b/qemu.nsi
index 0dc1f52..cc5fafd 100644
--- a/qemu.nsi
+++ b/qemu.nsi
@@ -60,7 +60,11 @@
 
 ; Registry key to check for directory (so if you install again, it will
 ; overwrite the old one automatically)
-InstallDirRegKey HKLM "Software\qemu" "Install_Dir"
+!ifdef W64
+InstallDirRegKey HKLM "Software\qemu64" "Install_Dir"
+!else
+InstallDirRegKey HKLM "Software\qemu32" "Install_Dir"
+!endif
 
 ; Request administrator privileges for Windows Vista.
 RequestExecutionLevel admin
diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py
index c39e628..65f1a54 100644
--- a/scripts/qapi-visit.py
+++ b/scripts/qapi-visit.py
@@ -20,7 +20,10 @@
 def generate_visit_struct_fields(name, field_prefix, fn_prefix, members, base = None):
     substructs = []
     ret = ''
-    full_name = name if not fn_prefix else "%s_%s" % (name, fn_prefix)
+    if not fn_prefix:
+        full_name = name
+    else:
+        full_name = "%s_%s" % (name, fn_prefix)
 
     for argname, argentry, optional, structured in parse_args(members):
         if structured:
@@ -97,7 +100,10 @@
 ''')
     push_indent()
 
-    full_name = name if not field_prefix else "%s_%s" % (field_prefix, name)
+    if not field_prefix:
+        full_name = name
+    else:
+        full_name = "%s_%s" % (field_prefix, name)
 
     if len(field_prefix):
         ret += mcgen('''
@@ -283,12 +289,17 @@
             name=name)
 
     pop_indent()
+
+    if not discriminator:
+        desc_type = "type"
+    else:
+        desc_type = discriminator
     ret += mcgen('''
         visit_type_%(name)sKind(m, &(*obj)->kind, "%(type)s", &err);
         if (!err) {
             switch ((*obj)->kind) {
 ''',
-                 name=name, type="type" if not discriminator else discriminator)
+                 name=name, type=desc_type)
 
     for key in members:
         if not discriminator:
diff --git a/target-xtensa/core-dc233c.c b/target-xtensa/core-dc233c.c
index 11acbf3..738d543 100644
--- a/target-xtensa/core-dc233c.c
+++ b/target-xtensa/core-dc233c.c
@@ -49,6 +49,7 @@
     EXCEPTIONS_SECTION,
     INTERRUPTS_SECTION,
     TLB_SECTION,
+    DEBUG_SECTION,
     .clock_freq_khz = 10000,
 };
 
diff --git a/tests/Makefile b/tests/Makefile
index f414f2c..379cdd9 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -68,6 +68,8 @@
 check-qtest-i386-y += tests/i440fx-test$(EXESUF)
 check-qtest-i386-y += tests/fw_cfg-test$(EXESUF)
 check-qtest-i386-y += tests/qom-test$(EXESUF)
+check-qtest-i386-y += tests/blockdev-test$(EXESUF)
+check-qtest-i386-y += tests/qdev-monitor-test$(EXESUF)
 check-qtest-x86_64-y = $(check-qtest-i386-y)
 gcov-files-i386-y += i386-softmmu/hw/mc146818rtc.c
 gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
@@ -200,6 +202,8 @@
 tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y)
 tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y)
 tests/qom-test$(EXESUF): tests/qom-test.o
+tests/blockdev-test$(EXESUF): tests/blockdev-test.o $(libqos-pc-obj-y)
+tests/qdev-monitor-test$(EXESUF): tests/qdev-monitor-test.o $(libqos-pc-obj-y)
 tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
 
 # QTest rules
diff --git a/tests/blockdev-test.c b/tests/blockdev-test.c
new file mode 100644
index 0000000..c940e00
--- /dev/null
+++ b/tests/blockdev-test.c
@@ -0,0 +1,59 @@
+/*
+ * blockdev.c test cases
+ *
+ * Copyright (C) 2013 Red Hat Inc.
+ *
+ * Authors:
+ *  Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include <glib.h>
+#include <string.h>
+#include "libqtest.h"
+
+static void test_drive_add_empty(void)
+{
+    QDict *response;
+    const char *response_return;
+
+    /* Start with an empty drive */
+    qtest_start("-drive if=none,id=drive0");
+
+    /* Delete the drive */
+    response = qmp("{\"execute\": \"human-monitor-command\","
+                   " \"arguments\": {"
+                   "   \"command-line\": \"drive_del drive0\""
+                   "}}");
+    g_assert(response);
+    response_return = qdict_get_try_str(response, "return");
+    g_assert(response_return);
+    g_assert(strcmp(response_return, "") == 0);
+    QDECREF(response);
+
+    /* Ensure re-adding the drive works - there should be no duplicate ID error
+     * because the old drive must be gone.
+     */
+    response = qmp("{\"execute\": \"human-monitor-command\","
+                   " \"arguments\": {"
+                   "   \"command-line\": \"drive_add 0 if=none,id=drive0\""
+                   "}}");
+    g_assert(response);
+    response_return = qdict_get_try_str(response, "return");
+    g_assert(response_return);
+    g_assert(strcmp(response_return, "OK\r\n") == 0);
+    QDECREF(response);
+
+    qtest_end();
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_func("/qmp/drive_add_empty", test_drive_add_empty);
+
+    return g_test_run();
+}
diff --git a/tests/boot-order-test.c b/tests/boot-order-test.c
index 4b233d0..da158c3 100644
--- a/tests/boot-order-test.c
+++ b/tests/boot-order-test.c
@@ -41,12 +41,12 @@
     qtest_start(args);
     actual = read_boot_order();
     g_assert_cmphex(actual, ==, expected_boot);
-    qmp("{ 'execute': 'system_reset' }");
+    qmp_discard_response("{ 'execute': 'system_reset' }");
     /*
      * system_reset only requests reset.  We get a RESET event after
      * the actual reset completes.  Need to wait for that.
      */
-    qmp("");                    /* HACK: wait for event */
+    qmp_discard_response("");   /* HACK: wait for event */
     actual = read_boot_order();
     g_assert_cmphex(actual, ==, expected_reboot);
     qtest_quit(global_qtest);
diff --git a/tests/fdc-test.c b/tests/fdc-test.c
index fd198dc..38b5b17 100644
--- a/tests/fdc-test.c
+++ b/tests/fdc-test.c
@@ -290,10 +290,12 @@
 
     /* Insert media in drive. DSKCHK should not be reset until a step pulse
      * is sent. */
-    qmp("{'execute':'change', 'arguments':{ 'device':'floppy0', "
-        "'target': '%s' }}", test_image);
-    qmp(""); /* ignore event (FIXME open -> open transition?!) */
-    qmp(""); /* ignore event */
+    qmp_discard_response("{'execute':'change', 'arguments':{"
+                         " 'device':'floppy0', 'target': '%s' }}",
+                         test_image);
+    qmp_discard_response(""); /* ignore event
+                                 (FIXME open -> open transition?!) */
+    qmp_discard_response(""); /* ignore event */
 
     dir = inb(FLOPPY_BASE + reg_dir);
     assert_bit_set(dir, DSKCHG);
@@ -322,8 +324,9 @@
 
     /* Eject the floppy and check that DSKCHG is set. Reading it out doesn't
      * reset the bit. */
-    qmp("{'execute':'eject', 'arguments':{ 'device':'floppy0' }}");
-    qmp(""); /* ignore event */
+    qmp_discard_response("{'execute':'eject', 'arguments':{"
+                         " 'device':'floppy0' }}");
+    qmp_discard_response(""); /* ignore event */
 
     dir = inb(FLOPPY_BASE + reg_dir);
     assert_bit_set(dir, DSKCHG);
diff --git a/tests/ide-test.c b/tests/ide-test.c
index bc824a8..d5cec5a 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -460,8 +460,9 @@
         tmp_path);
 
     /* Delay the completion of the flush request until we explicitly do it */
-    qmp("{'execute':'human-monitor-command', 'arguments': { "
-        "'command-line': 'qemu-io ide0-hd0 \"break flush_to_os A\"'} }");
+    qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {"
+                         " 'command-line':"
+                         " 'qemu-io ide0-hd0 \"break flush_to_os A\"'} }");
 
     /* FLUSH CACHE command on device 0*/
     outb(IDE_BASE + reg_device, 0);
@@ -473,8 +474,9 @@
     assert_bit_clear(data, DF | ERR | DRQ);
 
     /* Complete the command */
-    qmp("{'execute':'human-monitor-command', 'arguments': { "
-        "'command-line': 'qemu-io ide0-hd0 \"resume A\"'} }");
+    qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {"
+                         " 'command-line':"
+                         " 'qemu-io ide0-hd0 \"resume A\"'} }");
 
     /* Check registers */
     data = inb(IDE_BASE + reg_device);
diff --git a/tests/libqtest.c b/tests/libqtest.c
index bb82069..83424c3 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -30,6 +30,8 @@
 
 #include "qemu/compiler.h"
 #include "qemu/osdep.h"
+#include "qapi/qmp/json-streamer.h"
+#include "qapi/qmp/json-parser.h"
 
 #define MAX_IRQ 256
 
@@ -151,8 +153,8 @@
     }
 
     /* Read the QMP greeting and then do the handshake */
-    qtest_qmp(s, "");
-    qtest_qmp(s, "{ 'execute': 'qmp_capabilities' }");
+    qtest_qmp_discard_response(s, "");
+    qtest_qmp_discard_response(s, "{ 'execute': 'qmp_capabilities' }");
 
     if (getenv("QTEST_STOP")) {
         kill(qtest_qemu_pid(s), SIGSTOP);
@@ -291,16 +293,38 @@
     return words;
 }
 
-void qtest_qmpv(QTestState *s, const char *fmt, va_list ap)
+typedef struct {
+    JSONMessageParser parser;
+    QDict *response;
+} QMPResponseParser;
+
+static void qmp_response(JSONMessageParser *parser, QList *tokens)
 {
-    bool has_reply = false;
-    int nesting = 0;
+    QMPResponseParser *qmp = container_of(parser, QMPResponseParser, parser);
+    QObject *obj;
+
+    obj = json_parser_parse(tokens, NULL);
+    if (!obj) {
+        fprintf(stderr, "QMP JSON response parsing failed\n");
+        exit(1);
+    }
+
+    g_assert(qobject_type(obj) == QTYPE_QDICT);
+    g_assert(!qmp->response);
+    qmp->response = (QDict *)obj;
+}
+
+QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap)
+{
+    QMPResponseParser qmp;
 
     /* Send QMP request */
     socket_sendf(s->qmp_fd, fmt, ap);
 
     /* Receive reply */
-    while (!has_reply || nesting > 0) {
+    qmp.response = NULL;
+    json_message_parser_init(&qmp.parser, qmp_response);
+    while (!qmp.response) {
         ssize_t len;
         char c;
 
@@ -314,25 +338,39 @@
             exit(1);
         }
 
-        switch (c) {
-        case '{':
-            nesting++;
-            has_reply = true;
-            break;
-        case '}':
-            nesting--;
-            break;
-        }
+        json_message_parser_feed(&qmp.parser, &c, 1);
     }
+    json_message_parser_destroy(&qmp.parser);
+
+    return qmp.response;
 }
 
-void qtest_qmp(QTestState *s, const char *fmt, ...)
+QDict *qtest_qmp(QTestState *s, const char *fmt, ...)
 {
     va_list ap;
+    QDict *response;
 
     va_start(ap, fmt);
-    qtest_qmpv(s, fmt, ap);
+    response = qtest_qmpv(s, fmt, ap);
     va_end(ap);
+    return response;
+}
+
+void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap)
+{
+    QDict *response = qtest_qmpv(s, fmt, ap);
+    QDECREF(response);
+}
+
+void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...)
+{
+    va_list ap;
+    QDict *response;
+
+    va_start(ap, fmt);
+    response = qtest_qmpv(s, fmt, ap);
+    va_end(ap);
+    QDECREF(response);
 }
 
 const char *qtest_get_arch(void)
diff --git a/tests/libqtest.h b/tests/libqtest.h
index a6e99bd..9deebdc 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -22,6 +22,7 @@
 #include <stdbool.h>
 #include <stdarg.h>
 #include <sys/types.h>
+#include "qapi/qmp/qdict.h"
 
 typedef struct QTestState QTestState;
 
@@ -44,13 +45,32 @@
 void qtest_quit(QTestState *s);
 
 /**
+ * qtest_qmp_discard_response:
+ * @s: #QTestState instance to operate on.
+ * @fmt...: QMP message to send to qemu
+ *
+ * Sends a QMP message to QEMU and consumes the response.
+ */
+void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...);
+
+/**
  * qtest_qmp:
  * @s: #QTestState instance to operate on.
  * @fmt...: QMP message to send to qemu
  *
- * Sends a QMP message to QEMU
+ * Sends a QMP message to QEMU and returns the response.
  */
-void qtest_qmp(QTestState *s, const char *fmt, ...);
+QDict *qtest_qmp(QTestState *s, const char *fmt, ...);
+
+/**
+ * qtest_qmpv_discard_response:
+ * @s: #QTestState instance to operate on.
+ * @fmt: QMP message to send to QEMU
+ * @ap: QMP message arguments
+ *
+ * Sends a QMP message to QEMU and consumes the response.
+ */
+void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap);
 
 /**
  * qtest_qmpv:
@@ -58,9 +78,9 @@
  * @fmt: QMP message to send to QEMU
  * @ap: QMP message arguments
  *
- * Sends a QMP message to QEMU.
+ * Sends a QMP message to QEMU and returns the response.
  */
-void qtest_qmpv(QTestState *s, const char *fmt, va_list ap);
+QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap);
 
 /**
  * qtest_get_irq:
@@ -334,14 +354,31 @@
  * qmp:
  * @fmt...: QMP message to send to qemu
  *
- * Sends a QMP message to QEMU
+ * Sends a QMP message to QEMU and returns the response.
  */
-static inline void qmp(const char *fmt, ...)
+static inline QDict *qmp(const char *fmt, ...)
+{
+    va_list ap;
+    QDict *response;
+
+    va_start(ap, fmt);
+    response = qtest_qmpv(global_qtest, fmt, ap);
+    va_end(ap);
+    return response;
+}
+
+/**
+ * qmp_discard_response:
+ * @fmt...: QMP message to send to qemu
+ *
+ * Sends a QMP message to QEMU and consumes the response.
+ */
+static inline void qmp_discard_response(const char *fmt, ...)
 {
     va_list ap;
 
     va_start(ap, fmt);
-    qtest_qmpv(global_qtest, fmt, ap);
+    qtest_qmpv_discard_response(global_qtest, fmt, ap);
     va_end(ap);
 }
 
diff --git a/tests/qdev-monitor-test.c b/tests/qdev-monitor-test.c
new file mode 100644
index 0000000..33a8ea4
--- /dev/null
+++ b/tests/qdev-monitor-test.c
@@ -0,0 +1,81 @@
+/*
+ * qdev-monitor.c test cases
+ *
+ * Copyright (C) 2013 Red Hat Inc.
+ *
+ * Authors:
+ *  Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include <string.h>
+#include <glib.h>
+#include "libqtest.h"
+#include "qapi/qmp/qjson.h"
+
+static void test_device_add(void)
+{
+    QDict *response;
+    QDict *error;
+
+    qtest_start("-drive if=none,id=drive0");
+
+    /* Make device_add fail.  If this leaks the virtio-blk-pci device then a
+     * reference to drive0 will also be held (via qdev properties).
+     */
+    response = qmp("{\"execute\": \"device_add\","
+                   " \"arguments\": {"
+                   "   \"driver\": \"virtio-blk-pci\","
+                   "   \"drive\": \"drive0\""
+                   "}}");
+    g_assert(response);
+    error = qdict_get_qdict(response, "error");
+    g_assert(!strcmp(qdict_get_try_str(error, "class") ?: "",
+                     "GenericError"));
+    g_assert(!strcmp(qdict_get_try_str(error, "desc") ?: "",
+                     "Device initialization failed."));
+    QDECREF(response);
+
+    /* Delete the drive */
+    response = qmp("{\"execute\": \"human-monitor-command\","
+                   " \"arguments\": {"
+                   "   \"command-line\": \"drive_del drive0\""
+                   "}}");
+    g_assert(response);
+    g_assert(!strcmp(qdict_get_try_str(response, "return") ?: "(null)", ""));
+    QDECREF(response);
+
+    /* Try to re-add the drive.  This fails with duplicate IDs if a leaked
+     * virtio-blk-pci exists that holds a reference to the old drive0.
+     */
+    response = qmp("{\"execute\": \"human-monitor-command\","
+                   " \"arguments\": {"
+                   "   \"command-line\": \"drive_add pci-addr=auto if=none,id=drive0\""
+                   "}}");
+    g_assert(response);
+    g_assert(!strcmp(qdict_get_try_str(response, "return") ?: "",
+                     "OK\r\n"));
+    QDECREF(response);
+
+    qtest_end();
+}
+
+int main(int argc, char **argv)
+{
+    const char *arch = qtest_get_arch();
+
+    /* Check architecture */
+    if (strcmp(arch, "i386") && strcmp(arch, "x86_64")) {
+        g_test_message("Skipping test for non-x86\n");
+        return 0;
+    }
+
+    /* Run the tests */
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_func("/qmp/device_add", test_device_add);
+
+    return g_test_run();
+}
diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017
index 45f2c0b..aba3faf 100755
--- a/tests/qemu-iotests/017
+++ b/tests/qemu-iotests/017
@@ -66,7 +66,7 @@
 echo
 
 TEST_IMG=$TEST_IMG_SAVE
-_make_test_img -b $TEST_IMG.base 6G
+_make_test_img -b "$TEST_IMG.base" 6G
 
 echo "Filling test image"
 echo
diff --git a/tests/qemu-iotests/019 b/tests/qemu-iotests/019
index cd3582c..5bb18d0 100755
--- a/tests/qemu-iotests/019
+++ b/tests/qemu-iotests/019
@@ -90,12 +90,12 @@
 # Test the conversion twice: One test with the old-style -B option and another
 # one with -o backing_file
 
-for backing_option in "-B $TEST_IMG.base" "-o backing_file=$TEST_IMG.base"; do
+for backing_option in "-B " "-o backing_file="; do
 
     echo
-    echo Testing conversion with $backing_option | _filter_testdir | _filter_imgfmt
+    echo Testing conversion with $backing_option$TEST_IMG.base | _filter_testdir | _filter_imgfmt
     echo
-    $QEMU_IMG convert -O $IMGFMT $backing_option "$TEST_IMG.orig" "$TEST_IMG"
+    $QEMU_IMG convert -O $IMGFMT $backing_option"$TEST_IMG.base" "$TEST_IMG.orig" "$TEST_IMG"
 
     echo "Checking if backing clusters are allocated when they shouldn't"
     echo
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index f85b4ce..8bade92 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -54,7 +54,7 @@
 IMGOPTS="compat=1.1,lazy_refcounts=on"
 _make_test_img $size
 
-$QEMU_IO -c "write -P 0x5a 0 512" ""$TEST_IMG"" | _filter_qemu_io
+$QEMU_IO -c "write -P 0x5a 0 512" "$TEST_IMG" | _filter_qemu_io
 
 # The dirty bit must not be set
 ./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
index 356c375..0a4971d 100755
--- a/tests/qemu-iotests/051
+++ b/tests/qemu-iotests/051
@@ -64,9 +64,9 @@
 size=128M
 
 _make_test_img $size
-cp $TEST_IMG $TEST_IMG.orig
-mv $TEST_IMG $TEST_IMG.base
-_make_test_img -b $TEST_IMG.base $size
+cp "$TEST_IMG" "$TEST_IMG.orig"
+mv "$TEST_IMG" "$TEST_IMG.base"
+_make_test_img -b "$TEST_IMG.base" $size
 
 echo
 echo === Unknown option ===
@@ -81,7 +81,7 @@
 echo === Overriding backing file ===
 echo
 
-echo "info block" | run_qemu -drive file=$TEST_IMG,driver=qcow2,backing.file.filename=$TEST_IMG.orig -nodefaults
+echo "info block" | run_qemu -drive file="$TEST_IMG",driver=qcow2,backing.file.filename="$TEST_IMG.orig" -nodefaults
 
 echo
 echo === Enable and disable lazy refcounting on the command line, plus some invalid values ===
diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index fa9319d..e42f9bd 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -163,7 +163,7 @@
 echo
 IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M
 $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io
-IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M
+IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M
 $QEMU_IO -c "read -P 0x2a 0 128k" -c "write -z 0 64k" "$TEST_IMG" | _filter_qemu_io
 $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
 _check_test_img
@@ -174,7 +174,7 @@
 echo
 IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M
 $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io
-IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M
+IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M
 $QEMU_IO -c "write -z 0 64k" "$TEST_IMG" | _filter_qemu_io
 $QEMU_IMG snapshot -c foo "$TEST_IMG"
 $QEMU_IO -c "write -P 0x42 0 128k" "$TEST_IMG" | _filter_qemu_io
@@ -190,7 +190,7 @@
 echo
 IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M
 $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io
-IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M
+IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M
 $QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io
 $QEMU_IMG snapshot -c foo "$TEST_IMG"
 $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
diff --git a/tests/qemu-iotests/064 b/tests/qemu-iotests/064
index 6789aa6..1c74c31 100755
--- a/tests/qemu-iotests/064
+++ b/tests/qemu-iotests/064
@@ -56,6 +56,17 @@
 echo "=== Verify pattern 0x00, 66M - 1024M ==="
 $QEMU_IO -r -c "read -pP 0x00 66M 958M" "$TEST_IMG" | _filter_qemu_io
 
+echo
+echo "=== Verify pattern write, 0xc3 99M-157M ==="
+$QEMU_IO -c "write -pP 0xc3 99M 58M" "$TEST_IMG" | _filter_qemu_io
+# first verify we didn't write where we should not have
+$QEMU_IO -c "read -pP 0xa5 0 33M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -pP 0x96 33M 33M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -pP 0x00 66M 33M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -pP 0x00 157MM 867MM" "$TEST_IMG" | _filter_qemu_io
+# now verify what we should have actually written
+$QEMU_IO -c "read -pP 0xc3 99M 58M" "$TEST_IMG" | _filter_qemu_io
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/064.out b/tests/qemu-iotests/064.out
index b9e8e4a..5346a4e 100644
--- a/tests/qemu-iotests/064.out
+++ b/tests/qemu-iotests/064.out
@@ -11,4 +11,18 @@
 === Verify pattern 0x00, 66M - 1024M ===
 read 1004535808/1004535808 bytes at offset 69206016
 958 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Verify pattern write, 0xc3 99M-157M ===
+wrote 60817408/60817408 bytes at offset 103809024
+58 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 34603008/34603008 bytes at offset 0
+33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 34603008/34603008 bytes at offset 34603008
+33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 34603008/34603008 bytes at offset 69206016
+33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 909115392/909115392 bytes at offset 164626432
+867 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 60817408/60817408 bytes at offset 103809024
+58 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067
index 79dc38b..d025192 100755
--- a/tests/qemu-iotests/067
+++ b/tests/qemu-iotests/067
@@ -45,7 +45,7 @@
 
 function run_qemu()
 {
-    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp
+    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
 }
 
 size=128M
diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out
index 4bb9ff9..8d271cc 100644
--- a/tests/qemu-iotests/067.out
+++ b/tests/qemu-iotests/067.out
@@ -6,7 +6,7 @@
 Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virtio-blk-pci,drive=disk,id=virtio0
 QMP_VERSION
 {"return": {}}
-{"return": [{"io-status": "ok", "device": "disk", "locked": false, "removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]}
+{"return": [{"io-status": "ok", "device": "disk", "locked": false, "removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]}
 {"return": {}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/virtio0/virtio-backend"}}
@@ -24,7 +24,7 @@
 Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk
 QMP_VERSION
 {"return": {}}
-{"return": [{"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]}
+{"return": [{"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]}
 {"return": {}}
 {"return": {}}
 {"return": {}}
@@ -44,7 +44,7 @@
 QMP_VERSION
 {"return": {}}
 {"return": "OK\r\n"}
-{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]}
+{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]}
 {"return": {}}
 {"return": {}}
 {"return": {}}
@@ -64,14 +64,14 @@
 QMP_VERSION
 {"return": {}}
 {"return": {}}
-{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]}
+{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]}
 {"return": {}}
 {"return": {}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/virtio0/virtio-backend"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_DELETED", "data": {"device": "virtio0", "path": "/machine/peripheral/virtio0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "RESET"}
-{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]}
+{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}}
diff --git a/tests/qemu-iotests/070 b/tests/qemu-iotests/070
new file mode 100755
index 0000000..41bf100
--- /dev/null
+++ b/tests/qemu-iotests/070
@@ -0,0 +1,67 @@
+#!/bin/bash
+#
+# Test VHDX log replay from an image with a journal that needs to be
+# replayed
+#
+# Copyright (C) 2013 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=jcody@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt vhdx
+_supported_proto generic
+_supported_os Linux
+
+# With the log replayed, the pattern 0xa5 extends to 0xc025000
+# If the log was not replayed, it would only extend to 0xc000000
+#
+# This image is a 10G dynamic image, with 4M block size, and 1 unplayed
+# data sector in the log
+#
+# This image was created with qemu-img, however it was verified using
+# Hyper-V to properly replay the logs and give the same post-replay
+# image as qemu.
+_use_sample_img iotest-dirtylog-10G-4M.vhdx.bz2
+
+echo
+echo "=== Verify open image read-only fails, due to dirty log ==="
+$QEMU_IO -r -c "read -pP 0xa5 0 18M" "$TEST_IMG" 2>&1 | grep -o "Permission denied"
+
+echo "=== Verify open image replays log  ==="
+$QEMU_IO  -c "read -pP 0xa5 0 18M" "$TEST_IMG" | _filter_qemu_io
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/070.out b/tests/qemu-iotests/070.out
new file mode 100644
index 0000000..9db8ff2
--- /dev/null
+++ b/tests/qemu-iotests/070.out
@@ -0,0 +1,8 @@
+QA output created by 070
+
+=== Verify open image read-only fails, due to dirty log ===
+Permission denied
+=== Verify open image replays log  ===
+read 18874368/18874368 bytes at offset 0
+18 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 2932e14..8cde7f1 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -200,7 +200,6 @@
         -vhdx)
             IMGFMT=vhdx
             xpand=false
-            IMGFMT_GENERIC=false
             ;;
 
         -rbd)
diff --git a/tests/qemu-iotests/common.pattern b/tests/qemu-iotests/common.pattern
index 00e0f60..ddfbca1 100644
--- a/tests/qemu-iotests/common.pattern
+++ b/tests/qemu-iotests/common.pattern
@@ -28,7 +28,7 @@
 }
 
 function is_allocated() {
-    do_is_allocated "$@" | $QEMU_IO $TEST_IMG | _filter_qemu_io
+    do_is_allocated "$@" | $QEMU_IO "$TEST_IMG" | _filter_qemu_io
 }
 
 function do_io() {
@@ -46,18 +46,18 @@
 }
 
 function io_pattern() {
-    do_io "$@" | $QEMU_IO $TEST_IMG | _filter_qemu_io
+    do_io "$@" | $QEMU_IO "$TEST_IMG" | _filter_qemu_io
 }
 
 function io() {
     local start=$2
     local pattern=$(( (start >> 9) % 256 ))
 
-    do_io "$@" $pattern | $QEMU_IO $TEST_IMG | _filter_qemu_io
+    do_io "$@" $pattern | $QEMU_IO "$TEST_IMG" | _filter_qemu_io
 }
 
 function io_zero() {
-    do_io "$@" 0 | $QEMU_IO $TEST_IMG | _filter_qemu_io
+    do_io "$@" 0 | $QEMU_IO "$TEST_IMG" | _filter_qemu_io
 }
 
 function io_test() {
@@ -117,8 +117,8 @@
     echo === Clusters to be compressed [3]
     io_pattern writev $((offset + 8 * $cluster_size)) $cluster_size $((9 * $cluster_size)) $num 165
 
-    mv $TEST_IMG $TEST_IMG.orig
-    $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -c $TEST_IMG.orig $TEST_IMG
+    mv "$TEST_IMG" "$TEST_IMG.orig"
+    $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -c "$TEST_IMG.orig" "$TEST_IMG"
 
     # Write the used clusters
     echo === Used clusters [1]
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 4e82604..7f62457 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -111,6 +111,8 @@
     local image_size=$*
     local optstr=""
     local img_name=""
+    local use_backing=0
+    local backing_file=""
 
     if [ -n "$TEST_IMG_FILE" ]; then
         img_name=$TEST_IMG_FILE
@@ -123,7 +125,8 @@
     fi
 
     if [ "$1" = "-b" ]; then
-        extra_img_options="$1 $2"
+        use_backing=1
+        backing_file=$2
         image_size=$3
     fi
     if [ \( "$IMGFMT" = "qcow2" -o "$IMGFMT" = "qed" \) -a -n "$CLUSTER_SIZE" ]; then
@@ -135,7 +138,13 @@
     fi
 
     # XXX(hch): have global image options?
-    $QEMU_IMG create -f $IMGFMT $extra_img_options $img_name $image_size 2>&1 | \
+    (
+     if [ $use_backing = 1 ]; then
+        $QEMU_IMG create -f $IMGFMT $extra_img_options -b "$backing_file" "$img_name" $image_size 2>&1
+     else
+        $QEMU_IMG create -f $IMGFMT $extra_img_options "$img_name" $image_size 2>&1
+     fi
+    ) | \
         sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
             -e "s#$TEST_DIR#TEST_DIR#g" \
             -e "s#$IMGFMT#IMGFMT#g" \
@@ -148,7 +157,10 @@
             -e "s# zeroed_grain=\\(on\\|off\\)##g" \
             -e "s# subformat='[^']*'##g" \
             -e "s# adapter_type='[^']*'##g" \
-            -e "s# lazy_refcounts=\\(on\\|off\\)##g"
+            -e "s# lazy_refcounts=\\(on\\|off\\)##g" \
+            -e "s# block_size=[0-9]\\+##g" \
+            -e "s# block_state_zero=\\(on\\|off\\)##g" \
+            -e "s# log_size=[0-9]\\+##g"
 
     # Start an NBD server on the image file, which is what we'll be talking to
     if [ $IMGPROTO = "nbd" ]; then
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index c57ff35..b18b241 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -75,3 +75,4 @@
 067 rw auto
 068 rw auto
 069 rw auto
+070 rw auto
diff --git a/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2 b/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2
new file mode 100644
index 0000000..4b91cfc
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2
Binary files differ