Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Mon 03 Nov 2014 11:50:53 GMT using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>"
* remotes/stefanha/tags/block-pull-request: (53 commits)
block: declare blockjobs and dataplane friends!
block: let commit blockjob run in BDS AioContext
block: let mirror blockjob run in BDS AioContext
block: let stream blockjob run in BDS AioContext
block: let backup blockjob run in BDS AioContext
block: add bdrv_drain()
blockjob: add block_job_defer_to_main_loop()
blockdev: add note that block_job_cb() must be thread-safe
blockdev: acquire AioContext in blockdev_mark_auto_del()
blockdev: acquire AioContext in do_qmp_query_block_jobs_one()
block: acquire AioContext in generic blockjob QMP commands
iotests: Expand test 061
block/qcow2: Simplify shared L2 handling in amend
block/qcow2: Make get_refcount() global
block/qcow2: Implement status CB for amend
qemu-img: Fix insignificant memleak
qemu-img: Add progress output for amend
block: Add status callback to bdrv_amend_options()
block: qemu-iotest 107 supports NFS
iotests: Add test for qcow2's bdrv_make_empty
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/block.c b/block.c
index 88f6d9b..dacd881 100644
--- a/block.c
+++ b/block.c
@@ -519,6 +519,7 @@
return;
}
bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
+ bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
} else {
bs->bl.opt_mem_alignment = 512;
@@ -533,6 +534,9 @@
bs->bl.opt_transfer_length =
MAX(bs->bl.opt_transfer_length,
bs->backing_hd->bl.opt_transfer_length);
+ bs->bl.max_transfer_length =
+ MIN_NON_ZERO(bs->bl.max_transfer_length,
+ bs->backing_hd->bl.max_transfer_length);
bs->bl.opt_mem_alignment =
MAX(bs->bl.opt_mem_alignment,
bs->backing_hd->bl.opt_mem_alignment);
@@ -1900,6 +1904,34 @@
return false;
}
+static bool bdrv_drain_one(BlockDriverState *bs)
+{
+ bool bs_busy;
+
+ bdrv_flush_io_queue(bs);
+ bdrv_start_throttled_reqs(bs);
+ bs_busy = bdrv_requests_pending(bs);
+ bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
+ return bs_busy;
+}
+
+/*
+ * Wait for pending requests to complete on a single BlockDriverState subtree
+ *
+ * See the warning in bdrv_drain_all(). This function can only be called if
+ * you are sure nothing can generate I/O because you have op blockers
+ * installed.
+ *
+ * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
+ * AioContext.
+ */
+void bdrv_drain(BlockDriverState *bs)
+{
+ while (bdrv_drain_one(bs)) {
+ /* Keep iterating */
+ }
+}
+
/*
* Wait for pending requests to complete across all BlockDriverStates
*
@@ -1923,16 +1955,10 @@
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
AioContext *aio_context = bdrv_get_aio_context(bs);
- bool bs_busy;
aio_context_acquire(aio_context);
- bdrv_flush_io_queue(bs);
- bdrv_start_throttled_reqs(bs);
- bs_busy = bdrv_requests_pending(bs);
- bs_busy |= aio_poll(aio_context, bs_busy);
+ busy |= bdrv_drain_one(bs);
aio_context_release(aio_context);
-
- busy |= bs_busy;
}
}
}
@@ -3540,10 +3566,10 @@
BlockErrorAction action,
bool is_read, int error)
{
- BlockErrorAction ac;
+ IoOperationType optype;
- ac = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
- qapi_event_send_block_io_error(bdrv_get_device_name(bs), ac, action,
+ optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
+ qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
bdrv_iostatus_is_enabled(bs),
error == ENOSPC, strerror(error),
&error_abort);
@@ -4442,6 +4468,11 @@
merge = 0;
}
+ if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
+ reqs[i].nb_sectors > bs->bl.max_transfer_length) {
+ merge = 0;
+ }
+
if (merge) {
size_t size;
QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
@@ -5750,12 +5781,13 @@
notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
}
-int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts)
+int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb)
{
if (!bs->drv->bdrv_amend_options) {
return -ENOTSUP;
}
- return bs->drv->bdrv_amend_options(bs, opts);
+ return bs->drv->bdrv_amend_options(bs, opts, status_cb);
}
/* This function will be called by the bdrv_recurse_is_first_non_filter method
@@ -5818,13 +5850,19 @@
BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
{
BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
+ AioContext *aio_context;
+
if (!to_replace_bs) {
error_setg(errp, "Node name '%s' not found", node_name);
return NULL;
}
+ aio_context = bdrv_get_aio_context(to_replace_bs);
+ aio_context_acquire(aio_context);
+
if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
- return NULL;
+ to_replace_bs = NULL;
+ goto out;
}
/* We don't want arbitrary node of the BDS chain to be replaced only the top
@@ -5834,9 +5872,12 @@
*/
if (!bdrv_is_first_non_filter(to_replace_bs)) {
error_setg(errp, "Only top most non filter can be replaced");
- return NULL;
+ to_replace_bs = NULL;
+ goto out;
}
+out:
+ aio_context_release(aio_context);
return to_replace_bs;
}
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 27911b6..04b0e43 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -9,7 +9,7 @@
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
-block-obj-y += null.o
+block-obj-y += null.o mirror.o
block-obj-y += nbd.o nbd-client.o sheepdog.o
block-obj-$(CONFIG_LIBISCSI) += iscsi.o
@@ -23,7 +23,6 @@
common-obj-y += stream.o
common-obj-y += commit.o
-common-obj-y += mirror.o
common-obj-y += backup.o
iscsi.o-cflags := $(LIBISCSI_CFLAGS)
diff --git a/block/backup.c b/block/backup.c
index e334740..792e655 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -227,9 +227,25 @@
}
}
+typedef struct {
+ int ret;
+} BackupCompleteData;
+
+static void backup_complete(BlockJob *job, void *opaque)
+{
+ BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+ BackupCompleteData *data = opaque;
+
+ bdrv_unref(s->target);
+
+ block_job_completed(job, data->ret);
+ g_free(data);
+}
+
static void coroutine_fn backup_run(void *opaque)
{
BackupBlockJob *job = opaque;
+ BackupCompleteData *data;
BlockDriverState *bs = job->common.bs;
BlockDriverState *target = job->target;
BlockdevOnError on_target_error = job->on_target_error;
@@ -344,9 +360,10 @@
hbitmap_free(job->bitmap);
bdrv_iostatus_disable(target);
- bdrv_unref(target);
- block_job_completed(&job->common, ret);
+ data = g_malloc(sizeof(*data));
+ data->ret = ret;
+ block_job_defer_to_main_loop(&job->common, backup_complete, data);
}
void backup_start(BlockDriverState *bs, BlockDriverState *target,
diff --git a/block/blkdebug.c b/block/blkdebug.c
index e046b92..862d93b 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -195,6 +195,8 @@
[BLKDBG_PWRITEV] = "pwritev",
[BLKDBG_PWRITEV_ZERO] = "pwritev_zero",
[BLKDBG_PWRITEV_DONE] = "pwritev_done",
+
+ [BLKDBG_EMPTY_IMAGE_PREPARE] = "empty_image_prepare",
};
static int get_event_by_name(const char *name, BlkDebugEvent *event)
diff --git a/block/commit.c b/block/commit.c
index 60a2acc..cfa2bbe 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -60,17 +60,50 @@
return 0;
}
-static void coroutine_fn commit_run(void *opaque)
+typedef struct {
+ int ret;
+} CommitCompleteData;
+
+static void commit_complete(BlockJob *job, void *opaque)
{
- CommitBlockJob *s = opaque;
+ CommitBlockJob *s = container_of(job, CommitBlockJob, common);
+ CommitCompleteData *data = opaque;
BlockDriverState *active = s->active;
BlockDriverState *top = s->top;
BlockDriverState *base = s->base;
BlockDriverState *overlay_bs;
+ int ret = data->ret;
+
+ if (!block_job_is_cancelled(&s->common) && ret == 0) {
+ /* success */
+ ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
+ }
+
+ /* restore base open flags here if appropriate (e.g., change the base back
+ * to r/o). These reopens do not need to be atomic, since we won't abort
+ * even on failure here */
+ if (s->base_flags != bdrv_get_flags(base)) {
+ bdrv_reopen(base, s->base_flags, NULL);
+ }
+ overlay_bs = bdrv_find_overlay(active, top);
+ if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
+ bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
+ }
+ g_free(s->backing_file_str);
+ block_job_completed(&s->common, ret);
+ g_free(data);
+}
+
+static void coroutine_fn commit_run(void *opaque)
+{
+ CommitBlockJob *s = opaque;
+ CommitCompleteData *data;
+ BlockDriverState *top = s->top;
+ BlockDriverState *base = s->base;
int64_t sector_num, end;
int ret = 0;
int n = 0;
- void *buf;
+ void *buf = NULL;
int bytes_written = 0;
int64_t base_len;
@@ -78,18 +111,18 @@
if (s->common.len < 0) {
- goto exit_restore_reopen;
+ goto out;
}
ret = base_len = bdrv_getlength(base);
if (base_len < 0) {
- goto exit_restore_reopen;
+ goto out;
}
if (base_len < s->common.len) {
ret = bdrv_truncate(base, s->common.len);
if (ret) {
- goto exit_restore_reopen;
+ goto out;
}
}
@@ -128,7 +161,7 @@
if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
s->on_error == BLOCKDEV_ON_ERROR_REPORT||
(s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
- goto exit_free_buf;
+ goto out;
} else {
n = 0;
continue;
@@ -140,27 +173,12 @@
ret = 0;
- if (!block_job_is_cancelled(&s->common) && sector_num == end) {
- /* success */
- ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
- }
-
-exit_free_buf:
+out:
qemu_vfree(buf);
-exit_restore_reopen:
- /* restore base open flags here if appropriate (e.g., change the base back
- * to r/o). These reopens do not need to be atomic, since we won't abort
- * even on failure here */
- if (s->base_flags != bdrv_get_flags(base)) {
- bdrv_reopen(base, s->base_flags, NULL);
- }
- overlay_bs = bdrv_find_overlay(active, top);
- if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
- bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
- }
- g_free(s->backing_file_str);
- block_job_completed(&s->common, ret);
+ data = g_malloc(sizeof(*data));
+ data->ret = ret;
+ block_job_defer_to_main_loop(&s->common, commit_complete, data);
}
static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
diff --git a/block/curl.c b/block/curl.c
index b4157cc..bbee3ca 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -64,6 +64,7 @@
#define SECTOR_SIZE 512
#define READ_AHEAD_DEFAULT (256 * 1024)
#define CURL_TIMEOUT_DEFAULT 5
+#define CURL_TIMEOUT_MAX 10000
#define FIND_RET_NONE 0
#define FIND_RET_OK 1
@@ -112,7 +113,7 @@
char *url;
size_t readahead_size;
bool sslverify;
- int timeout;
+ uint64_t timeout;
char *cookie;
bool accept_range;
AioContext *aio_context;
@@ -390,7 +391,7 @@
if (s->cookie) {
curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie);
}
- curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, s->timeout);
+ curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout);
curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
(void *)curl_read_cb);
curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state);
@@ -546,6 +547,10 @@
s->timeout = qemu_opt_get_number(opts, CURL_BLOCK_OPT_TIMEOUT,
CURL_TIMEOUT_DEFAULT);
+ if (s->timeout > CURL_TIMEOUT_MAX) {
+ error_setg(errp, "timeout parameter is too large or negative");
+ goto out_noclean;
+ }
s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true);
diff --git a/block/iscsi.c b/block/iscsi.c
index 3485d62..ed375fc 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -362,6 +362,12 @@
return -EINVAL;
}
+ if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
+ error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
+ "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
+ return -EINVAL;
+ }
+
lba = sector_qemu2lun(sector_num, iscsilun);
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
iscsi_co_init_iscsitask(iscsilun, &iTask);
@@ -529,6 +535,12 @@
return -EINVAL;
}
+ if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
+ error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
+ "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
+ return -EINVAL;
+ }
+
if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
!iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
int64_t ret;
@@ -1489,31 +1501,44 @@
memset(iscsilun, 0, sizeof(IscsiLun));
}
+static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
+{
+ return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
+}
+
static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
{
- IscsiLun *iscsilun = bs->opaque;
-
/* We don't actually refresh here, but just return data queried in
* iscsi_open(): iscsi targets don't change their limits. */
+
+ IscsiLun *iscsilun = bs->opaque;
+ uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
+
+ if (iscsilun->bl.max_xfer_len) {
+ max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
+ }
+
+ bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
+
if (iscsilun->lbp.lbpu) {
if (iscsilun->bl.max_unmap < 0xffffffff) {
- bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
- iscsilun);
+ bs->bl.max_discard =
+ sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
}
- bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
- iscsilun);
+ bs->bl.discard_alignment =
+ sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
}
if (iscsilun->bl.max_ws_len < 0xffffffff) {
- bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
- iscsilun);
+ bs->bl.max_write_zeroes =
+ sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
}
if (iscsilun->lbp.lbpws) {
- bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
- iscsilun);
+ bs->bl.write_zeroes_alignment =
+ sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
}
- bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
- iscsilun);
+ bs->bl.opt_transfer_length =
+ sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
}
/* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
diff --git a/block/mirror.c b/block/mirror.c
index e8a43eb..2c6dd2a 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -45,6 +45,7 @@
int64_t sector_num;
int64_t granularity;
size_t buf_size;
+ int64_t bdev_length;
unsigned long *cow_bitmap;
BdrvDirtyBitmap *dirty_bitmap;
HBitmapIter hbi;
@@ -54,6 +55,7 @@
unsigned long *in_flight_bitmap;
int in_flight;
+ int sectors_in_flight;
int ret;
} MirrorBlockJob;
@@ -87,6 +89,7 @@
trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);
s->in_flight--;
+ s->sectors_in_flight -= op->nb_sectors;
iov = op->qiov.iov;
for (i = 0; i < op->qiov.niov; i++) {
MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
@@ -98,8 +101,11 @@
chunk_num = op->sector_num / sectors_per_chunk;
nb_chunks = op->nb_sectors / sectors_per_chunk;
bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
- if (s->cow_bitmap && ret >= 0) {
- bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
+ if (ret >= 0) {
+ if (s->cow_bitmap) {
+ bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
+ }
+ s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
}
qemu_iovec_destroy(&op->qiov);
@@ -172,7 +178,7 @@
hbitmap_next_sector = s->sector_num;
sector_num = s->sector_num;
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
- end = s->common.len >> BDRV_SECTOR_BITS;
+ end = s->bdev_length / BDRV_SECTOR_SIZE;
/* Extend the QEMUIOVector to include all adjacent blocks that will
* be copied in this operation.
@@ -284,6 +290,7 @@
/* Copy the dirty cluster. */
s->in_flight++;
+ s->sectors_in_flight += nb_sectors;
trace_mirror_one_iteration(s, sector_num, nb_sectors);
bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
mirror_read_complete, op);
@@ -314,9 +321,56 @@
}
}
+typedef struct {
+ int ret;
+} MirrorExitData;
+
+static void mirror_exit(BlockJob *job, void *opaque)
+{
+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+ MirrorExitData *data = opaque;
+ AioContext *replace_aio_context = NULL;
+
+ if (s->to_replace) {
+ replace_aio_context = bdrv_get_aio_context(s->to_replace);
+ aio_context_acquire(replace_aio_context);
+ }
+
+ if (s->should_complete && data->ret == 0) {
+ BlockDriverState *to_replace = s->common.bs;
+ if (s->to_replace) {
+ to_replace = s->to_replace;
+ }
+ if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
+ bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
+ }
+ bdrv_swap(s->target, to_replace);
+ if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
+ /* drop the bs loop chain formed by the swap: break the loop then
+ * trigger the unref from the top one */
+ BlockDriverState *p = s->base->backing_hd;
+ bdrv_set_backing_hd(s->base, NULL);
+ bdrv_unref(p);
+ }
+ }
+ if (s->to_replace) {
+ bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+ error_free(s->replace_blocker);
+ bdrv_unref(s->to_replace);
+ }
+ if (replace_aio_context) {
+ aio_context_release(replace_aio_context);
+ }
+ g_free(s->replaces);
+ bdrv_unref(s->target);
+ block_job_completed(&s->common, data->ret);
+ g_free(data);
+}
+
static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
+ MirrorExitData *data;
BlockDriverState *bs = s->common.bs;
int64_t sector_num, end, sectors_per_chunk, length;
uint64_t last_pause_ns;
@@ -329,11 +383,11 @@
goto immediate_exit;
}
- s->common.len = bdrv_getlength(bs);
- if (s->common.len < 0) {
- ret = s->common.len;
+ s->bdev_length = bdrv_getlength(bs);
+ if (s->bdev_length < 0) {
+ ret = s->bdev_length;
goto immediate_exit;
- } else if (s->common.len == 0) {
+ } else if (s->bdev_length == 0) {
/* Report BLOCK_JOB_READY and wait for complete. */
block_job_event_ready(&s->common);
s->synced = true;
@@ -344,7 +398,7 @@
goto immediate_exit;
}
- length = DIV_ROUND_UP(s->common.len, s->granularity);
+ length = DIV_ROUND_UP(s->bdev_length, s->granularity);
s->in_flight_bitmap = bitmap_new(length);
/* If we have no backing file yet in the destination, we cannot let
@@ -364,7 +418,7 @@
}
}
- end = s->common.len >> BDRV_SECTOR_BITS;
+ end = s->bdev_length / BDRV_SECTOR_SIZE;
s->buf = qemu_try_blockalign(bs, s->buf_size);
if (s->buf == NULL) {
ret = -ENOMEM;
@@ -409,6 +463,12 @@
}
cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+ /* s->common.offset contains the number of bytes already processed so
+ * far, cnt is the number of dirty sectors remaining and
+ * s->sectors_in_flight is the number of sectors currently being
+ * processed; together those are the current total operation length */
+ s->common.len = s->common.offset +
+ (cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE;
/* Note that even when no rate limit is applied we need to yield
* periodically with no pending I/O so that qemu_aio_flush() returns.
@@ -445,7 +505,6 @@
* report completion. This way, block-job-cancel will leave
* the target in a consistent state.
*/
- s->common.offset = end * BDRV_SECTOR_SIZE;
if (!s->synced) {
block_job_event_ready(&s->common);
s->synced = true;
@@ -467,15 +526,13 @@
* mirror_populate runs.
*/
trace_mirror_before_drain(s, cnt);
- bdrv_drain_all();
+ bdrv_drain(bs);
cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
}
ret = 0;
trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
if (!s->synced) {
- /* Publish progress */
- s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
if (block_job_is_cancelled(&s->common)) {
break;
@@ -510,31 +567,10 @@
g_free(s->in_flight_bitmap);
bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
bdrv_iostatus_disable(s->target);
- if (s->should_complete && ret == 0) {
- BlockDriverState *to_replace = s->common.bs;
- if (s->to_replace) {
- to_replace = s->to_replace;
- }
- if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
- bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
- }
- bdrv_swap(s->target, to_replace);
- if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
- /* drop the bs loop chain formed by the swap: break the loop then
- * trigger the unref from the top one */
- BlockDriverState *p = s->base->backing_hd;
- bdrv_set_backing_hd(s->base, NULL);
- bdrv_unref(p);
- }
- }
- if (s->to_replace) {
- bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
- error_free(s->replace_blocker);
- bdrv_unref(s->to_replace);
- }
- g_free(s->replaces);
- bdrv_unref(s->target);
- block_job_completed(&s->common, ret);
+
+ data = g_malloc(sizeof(*data));
+ data->ret = ret;
+ block_job_defer_to_main_loop(&s->common, mirror_exit, data);
}
static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -574,16 +610,23 @@
/* check the target bs is not blocked and block all operations on it */
if (s->replaces) {
+ AioContext *replace_aio_context;
+
s->to_replace = check_to_replace_node(s->replaces, &local_err);
if (!s->to_replace) {
error_propagate(errp, local_err);
return;
}
+ replace_aio_context = bdrv_get_aio_context(s->to_replace);
+ aio_context_acquire(replace_aio_context);
+
error_setg(&s->replace_blocker,
"block device is in use by block-job-complete");
bdrv_op_block_all(s->to_replace, s->replace_blocker);
bdrv_ref(s->to_replace);
+
+ aio_context_release(replace_aio_context);
}
s->should_complete = true;
diff --git a/block/parallels.c b/block/parallels.c
index 2a814f3..4f9cd8d 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -155,7 +155,7 @@
offset = sector_num % s->tracks;
/* not allocated */
- if ((index > s->catalog_size) || (s->catalog_bitmap[index] == 0))
+ if ((index >= s->catalog_size) || (s->catalog_bitmap[index] == 0))
return -1;
return
((uint64_t)s->catalog_bitmap[index] * s->off_multiplier + offset) * 512;
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 4d888c7..df0b2c9 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1414,7 +1414,7 @@
* clusters.
*/
static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
- unsigned int nb_clusters, enum qcow2_discard_type type)
+ unsigned int nb_clusters, enum qcow2_discard_type type, bool full_discard)
{
BDRVQcowState *s = bs->opaque;
uint64_t *l2_table;
@@ -1436,23 +1436,30 @@
old_l2_entry = be64_to_cpu(l2_table[l2_index + i]);
/*
- * Make sure that a discarded area reads back as zeroes for v3 images
- * (we cannot do it for v2 without actually writing a zero-filled
- * buffer). We can skip the operation if the cluster is already marked
- * as zero, or if it's unallocated and we don't have a backing file.
+ * If full_discard is false, make sure that a discarded area reads back
+ * as zeroes for v3 images (we cannot do it for v2 without actually
+ * writing a zero-filled buffer). We can skip the operation if the
+ * cluster is already marked as zero, or if it's unallocated and we
+ * don't have a backing file.
*
* TODO We might want to use bdrv_get_block_status(bs) here, but we're
* holding s->lock, so that doesn't work today.
+ *
+ * If full_discard is true, the sector should not read back as zeroes,
+ * but rather fall through to the backing file.
*/
switch (qcow2_get_cluster_type(old_l2_entry)) {
case QCOW2_CLUSTER_UNALLOCATED:
- if (!bs->backing_hd) {
+ if (full_discard || !bs->backing_hd) {
continue;
}
break;
case QCOW2_CLUSTER_ZERO:
- continue;
+ if (!full_discard) {
+ continue;
+ }
+ break;
case QCOW2_CLUSTER_NORMAL:
case QCOW2_CLUSTER_COMPRESSED:
@@ -1464,7 +1471,7 @@
/* First remove L2 entries */
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- if (s->qcow_version >= 3) {
+ if (!full_discard && s->qcow_version >= 3) {
l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
} else {
l2_table[l2_index + i] = cpu_to_be64(0);
@@ -1483,7 +1490,7 @@
}
int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
- int nb_sectors, enum qcow2_discard_type type)
+ int nb_sectors, enum qcow2_discard_type type, bool full_discard)
{
BDRVQcowState *s = bs->opaque;
uint64_t end_offset;
@@ -1506,7 +1513,7 @@
/* Each L2 table is handled by its own loop iteration */
while (nb_clusters > 0) {
- ret = discard_single_l2(bs, offset, nb_clusters, type);
+ ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard);
if (ret < 0) {
goto fail;
}
@@ -1606,15 +1613,14 @@
* Expands all zero clusters in a specific L1 table (or deallocates them, for
* non-backed non-pre-allocated zero clusters).
*
- * expanded_clusters is a bitmap where every bit corresponds to one cluster in
- * the image file; a bit gets set if the corresponding cluster has been used for
- * zero expansion (i.e., has been filled with zeroes and is referenced from an
- * L2 table). nb_clusters contains the total cluster count of the image file,
- * i.e., the number of bits in expanded_clusters.
+ * l1_entries and *visited_l1_entries are used to keep track of progress for
+ * status_cb(). l1_entries contains the total number of L1 entries and
+ * *visited_l1_entries counts all visited L1 entries.
*/
static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
- int l1_size, uint8_t **expanded_clusters,
- uint64_t *nb_clusters)
+ int l1_size, int64_t *visited_l1_entries,
+ int64_t l1_entries,
+ BlockDriverAmendStatusCB *status_cb)
{
BDRVQcowState *s = bs->opaque;
bool is_active_l1 = (l1_table == s->l1_table);
@@ -1634,9 +1640,14 @@
for (i = 0; i < l1_size; i++) {
uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
bool l2_dirty = false;
+ int l2_refcount;
if (!l2_offset) {
/* unallocated */
+ (*visited_l1_entries)++;
+ if (status_cb) {
+ status_cb(bs, *visited_l1_entries, l1_entries);
+ }
continue;
}
@@ -1653,33 +1664,19 @@
goto fail;
}
+ l2_refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits);
+ if (l2_refcount < 0) {
+ ret = l2_refcount;
+ goto fail;
+ }
+
for (j = 0; j < s->l2_size; j++) {
uint64_t l2_entry = be64_to_cpu(l2_table[j]);
- int64_t offset = l2_entry & L2E_OFFSET_MASK, cluster_index;
+ int64_t offset = l2_entry & L2E_OFFSET_MASK;
int cluster_type = qcow2_get_cluster_type(l2_entry);
bool preallocated = offset != 0;
- if (cluster_type == QCOW2_CLUSTER_NORMAL) {
- cluster_index = offset >> s->cluster_bits;
- assert((cluster_index >= 0) && (cluster_index < *nb_clusters));
- if ((*expanded_clusters)[cluster_index / 8] &
- (1 << (cluster_index % 8))) {
- /* Probably a shared L2 table; this cluster was a zero
- * cluster which has been expanded, its refcount
- * therefore most likely requires an update. */
- ret = qcow2_update_cluster_refcount(bs, cluster_index, 1,
- QCOW2_DISCARD_NEVER);
- if (ret < 0) {
- goto fail;
- }
- /* Since we just increased the refcount, the COPIED flag may
- * no longer be set. */
- l2_table[j] = cpu_to_be64(l2_entry & ~QCOW_OFLAG_COPIED);
- l2_dirty = true;
- }
- continue;
- }
- else if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_ZERO) {
+ if (cluster_type != QCOW2_CLUSTER_ZERO) {
continue;
}
@@ -1697,6 +1694,19 @@
ret = offset;
goto fail;
}
+
+ if (l2_refcount > 1) {
+ /* For shared L2 tables, set the refcount accordingly (it is
+ * already 1 and needs to be l2_refcount) */
+ ret = qcow2_update_cluster_refcount(bs,
+ offset >> s->cluster_bits, l2_refcount - 1,
+ QCOW2_DISCARD_OTHER);
+ if (ret < 0) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_OTHER);
+ goto fail;
+ }
+ }
}
ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
@@ -1718,29 +1728,12 @@
goto fail;
}
- l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
- l2_dirty = true;
-
- cluster_index = offset >> s->cluster_bits;
-
- if (cluster_index >= *nb_clusters) {
- uint64_t old_bitmap_size = (*nb_clusters + 7) / 8;
- uint64_t new_bitmap_size;
- /* The offset may lie beyond the old end of the underlying image
- * file for growable files only */
- assert(bs->file->growable);
- *nb_clusters = size_to_clusters(s, bs->file->total_sectors *
- BDRV_SECTOR_SIZE);
- new_bitmap_size = (*nb_clusters + 7) / 8;
- *expanded_clusters = g_realloc(*expanded_clusters,
- new_bitmap_size);
- /* clear the newly allocated space */
- memset(&(*expanded_clusters)[old_bitmap_size], 0,
- new_bitmap_size - old_bitmap_size);
+ if (l2_refcount == 1) {
+ l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
+ } else {
+ l2_table[j] = cpu_to_be64(offset);
}
-
- assert((cluster_index >= 0) && (cluster_index < *nb_clusters));
- (*expanded_clusters)[cluster_index / 8] |= 1 << (cluster_index % 8);
+ l2_dirty = true;
}
if (is_active_l1) {
@@ -1769,6 +1762,11 @@
}
}
}
+
+ (*visited_l1_entries)++;
+ if (status_cb) {
+ status_cb(bs, *visited_l1_entries, l1_entries);
+ }
}
ret = 0;
@@ -1795,25 +1793,25 @@
* allocation for pre-allocated ones). This is important for downgrading to a
* qcow2 version which doesn't yet support metadata zero clusters.
*/
-int qcow2_expand_zero_clusters(BlockDriverState *bs)
+int qcow2_expand_zero_clusters(BlockDriverState *bs,
+ BlockDriverAmendStatusCB *status_cb)
{
BDRVQcowState *s = bs->opaque;
uint64_t *l1_table = NULL;
- uint64_t nb_clusters;
- uint8_t *expanded_clusters;
+ int64_t l1_entries = 0, visited_l1_entries = 0;
int ret;
int i, j;
- nb_clusters = size_to_clusters(s, bs->file->total_sectors *
- BDRV_SECTOR_SIZE);
- expanded_clusters = g_try_malloc0((nb_clusters + 7) / 8);
- if (expanded_clusters == NULL) {
- ret = -ENOMEM;
- goto fail;
+ if (status_cb) {
+ l1_entries = s->l1_size;
+ for (i = 0; i < s->nb_snapshots; i++) {
+ l1_entries += s->snapshots[i].l1_size;
+ }
}
ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
- &expanded_clusters, &nb_clusters);
+ &visited_l1_entries, l1_entries,
+ status_cb);
if (ret < 0) {
goto fail;
}
@@ -1847,7 +1845,8 @@
}
ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
- &expanded_clusters, &nb_clusters);
+ &visited_l1_entries, l1_entries,
+ status_cb);
if (ret < 0) {
goto fail;
}
@@ -1856,7 +1855,6 @@
ret = 0;
fail:
- g_free(expanded_clusters);
g_free(l1_table);
return ret;
}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 1477031..9afdb40 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -91,7 +91,7 @@
* return value is the refcount of the cluster, negative values are -errno
* and indicate an error.
*/
-static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
+int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index)
{
BDRVQcowState *s = bs->opaque;
uint64_t refcount_table_index, block_index;
@@ -629,8 +629,7 @@
}
/*
- * Increases or decreases the refcount of a given cluster by one.
- * addend must be 1 or -1.
+ * Increases or decreases the refcount of a given cluster.
*
* If the return value is non-negative, it is the new refcount of the cluster.
* If it is negative, it is -errno and indicates an error.
@@ -649,7 +648,7 @@
return ret;
}
- return get_refcount(bs, cluster_index);
+ return qcow2_get_refcount(bs, cluster_index);
}
@@ -670,7 +669,7 @@
retry:
for(i = 0; i < nb_clusters; i++) {
uint64_t next_cluster_index = s->free_cluster_index++;
- refcount = get_refcount(bs, next_cluster_index);
+ refcount = qcow2_get_refcount(bs, next_cluster_index);
if (refcount < 0) {
return refcount;
@@ -734,7 +733,7 @@
/* Check how many clusters there are free */
cluster_index = offset >> s->cluster_bits;
for(i = 0; i < nb_clusters; i++) {
- refcount = get_refcount(bs, cluster_index++);
+ refcount = qcow2_get_refcount(bs, cluster_index++);
if (refcount < 0) {
return refcount;
@@ -981,7 +980,7 @@
cluster_index, addend,
QCOW2_DISCARD_SNAPSHOT);
} else {
- refcount = get_refcount(bs, cluster_index);
+ refcount = qcow2_get_refcount(bs, cluster_index);
}
if (refcount < 0) {
@@ -1021,7 +1020,7 @@
refcount = qcow2_update_cluster_refcount(bs, l2_offset >>
s->cluster_bits, addend, QCOW2_DISCARD_SNAPSHOT);
} else {
- refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
+ refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits);
}
if (refcount < 0) {
ret = refcount;
@@ -1332,8 +1331,8 @@
* Checks the OFLAG_COPIED flag for all L1 and L2 entries.
*
* This function does not print an error message nor does it increment
- * check_errors if get_refcount fails (this is because such an error will have
- * been already detected and sufficiently signaled by the calling function
+ * check_errors if qcow2_get_refcount fails (this is because such an error will
+ * have been already detected and sufficiently signaled by the calling function
* (qcow2_check_refcounts) by the time this function is called).
*/
static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
@@ -1354,7 +1353,7 @@
continue;
}
- refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
+ refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits);
if (refcount < 0) {
/* don't print message nor increment check_errors */
continue;
@@ -1396,7 +1395,8 @@
if ((cluster_type == QCOW2_CLUSTER_NORMAL) ||
((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) {
- refcount = get_refcount(bs, data_offset >> s->cluster_bits);
+ refcount = qcow2_get_refcount(bs,
+ data_offset >> s->cluster_bits);
if (refcount < 0) {
/* don't print message nor increment check_errors */
continue;
@@ -1632,7 +1632,7 @@
int refcount1, refcount2, ret;
for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
- refcount1 = get_refcount(bs, i);
+ refcount1 = qcow2_get_refcount(bs, i);
if (refcount1 < 0) {
fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
i, strerror(-refcount1));
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index f52d7fd..5b3903c 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -441,7 +441,7 @@
qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
align_offset(sn->vm_state_size, s->cluster_size)
>> BDRV_SECTOR_BITS,
- QCOW2_DISCARD_NEVER);
+ QCOW2_DISCARD_NEVER, false);
#ifdef DEBUG_ALLOC
{
diff --git a/block/qcow2.c b/block/qcow2.c
index d031515..d120494 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2089,7 +2089,7 @@
qemu_co_mutex_lock(&s->lock);
ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors, QCOW2_DISCARD_REQUEST);
+ nb_sectors, QCOW2_DISCARD_REQUEST, false);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
@@ -2230,6 +2230,195 @@
return ret;
}
+static int make_completely_empty(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret, l1_clusters;
+ int64_t offset;
+ uint64_t *new_reftable = NULL;
+ uint64_t rt_entry, l1_size2;
+ struct {
+ uint64_t l1_offset;
+ uint64_t reftable_offset;
+ uint32_t reftable_clusters;
+ } QEMU_PACKED l1_ofs_rt_ofs_cls;
+
+ ret = qcow2_cache_empty(bs, s->l2_table_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = qcow2_cache_empty(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Refcounts will be broken utterly */
+ ret = qcow2_mark_dirty(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
+
+ l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
+ l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t);
+
+ /* After this call, neither the in-memory nor the on-disk refcount
+ * information accurately describe the actual references */
+
+ ret = bdrv_write_zeroes(bs->file, s->l1_table_offset / BDRV_SECTOR_SIZE,
+ l1_clusters * s->cluster_sectors, 0);
+ if (ret < 0) {
+ goto fail_broken_refcounts;
+ }
+ memset(s->l1_table, 0, l1_size2);
+
+ BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
+
+ /* Overwrite enough clusters at the beginning of the sectors to place
+ * the refcount table, a refcount block and the L1 table in; this may
+ * overwrite parts of the existing refcount and L1 table, which is not
+ * an issue because the dirty flag is set, complete data loss is in fact
+ * desired and partial data loss is consequently fine as well */
+ ret = bdrv_write_zeroes(bs->file, s->cluster_size / BDRV_SECTOR_SIZE,
+ (2 + l1_clusters) * s->cluster_size /
+ BDRV_SECTOR_SIZE, 0);
+ /* This call (even if it failed overall) may have overwritten on-disk
+ * refcount structures; in that case, the in-memory refcount information
+ * will probably differ from the on-disk information which makes the BDS
+ * unusable */
+ if (ret < 0) {
+ goto fail_broken_refcounts;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
+ BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
+
+ /* "Create" an empty reftable (one cluster) directly after the image
+ * header and an empty L1 table three clusters after the image header;
+ * the cluster between those two will be used as the first refblock */
+ cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
+ cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
+ cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
+ &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
+ if (ret < 0) {
+ goto fail_broken_refcounts;
+ }
+
+ s->l1_table_offset = 3 * s->cluster_size;
+
+ new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t));
+ if (!new_reftable) {
+ ret = -ENOMEM;
+ goto fail_broken_refcounts;
+ }
+
+ s->refcount_table_offset = s->cluster_size;
+ s->refcount_table_size = s->cluster_size / sizeof(uint64_t);
+
+ g_free(s->refcount_table);
+ s->refcount_table = new_reftable;
+ new_reftable = NULL;
+
+ /* Now the in-memory refcount information again corresponds to the on-disk
+ * information (reftable is empty and no refblocks (the refblock cache is
+ * empty)); however, this means some clusters (e.g. the image header) are
+ * referenced, but not refcounted, but the normal qcow2 code assumes that
+ * the in-memory information is always correct */
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
+
+ /* Enter the first refblock into the reftable */
+ rt_entry = cpu_to_be64(2 * s->cluster_size);
+ ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
+ &rt_entry, sizeof(rt_entry));
+ if (ret < 0) {
+ goto fail_broken_refcounts;
+ }
+ s->refcount_table[0] = 2 * s->cluster_size;
+
+ s->free_cluster_index = 0;
+ assert(3 + l1_clusters <= s->refcount_block_size);
+ offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
+ if (offset < 0) {
+ ret = offset;
+ goto fail_broken_refcounts;
+ } else if (offset > 0) {
+ error_report("First cluster in emptied image is in use");
+ abort();
+ }
+
+ /* Now finally the in-memory information corresponds to the on-disk
+ * structures and is correct */
+ ret = qcow2_mark_clean(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ return 0;
+
+fail_broken_refcounts:
+ /* The BDS is unusable at this point. If we wanted to make it usable, we
+ * would have to call qcow2_refcount_close(), qcow2_refcount_init(),
+ * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init()
+ * again. However, because the functions which could have caused this error
+ * path to be taken are used by those functions as well, it's very likely
+ * that that sequence will fail as well. Therefore, just eject the BDS. */
+ bs->drv = NULL;
+
+fail:
+ g_free(new_reftable);
+ return ret;
+}
+
+static int qcow2_make_empty(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t start_sector;
+ int sector_step = INT_MAX / BDRV_SECTOR_SIZE;
+ int l1_clusters, ret = 0;
+
+ l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
+
+ if (s->qcow_version >= 3 && !s->snapshots &&
+ 3 + l1_clusters <= s->refcount_block_size) {
+ /* The following function only works for qcow2 v3 images (it requires
+ * the dirty flag) and only as long as there are no snapshots (because
+ * it completely empties the image). Furthermore, the L1 table and three
+ * additional clusters (image header, refcount table, one refcount
+ * block) have to fit inside one refcount block. */
+ return make_completely_empty(bs);
+ }
+
+ /* This fallback code simply discards every active cluster; this is slow,
+ * but works in all cases */
+ for (start_sector = 0; start_sector < bs->total_sectors;
+ start_sector += sector_step)
+ {
+ /* As this function is generally used after committing an external
+ * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
+ * default action for this kind of discard is to pass the discard,
+ * which will ideally result in an actually smaller image file, as
+ * is probably desired. */
+ ret = qcow2_discard_clusters(bs, start_sector * BDRV_SECTOR_SIZE,
+ MIN(sector_step,
+ bs->total_sectors - start_sector),
+ QCOW2_DISCARD_SNAPSHOT, true);
+ if (ret < 0) {
+ break;
+ }
+ }
+
+ return ret;
+}
+
static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
@@ -2361,7 +2550,8 @@
* Downgrades an image's version. To achieve this, any incompatible features
* have to be removed.
*/
-static int qcow2_downgrade(BlockDriverState *bs, int target_version)
+static int qcow2_downgrade(BlockDriverState *bs, int target_version,
+ BlockDriverAmendStatusCB *status_cb)
{
BDRVQcowState *s = bs->opaque;
int current_version = s->qcow_version;
@@ -2410,7 +2600,7 @@
/* clearing autoclear features is trivial */
s->autoclear_features = 0;
- ret = qcow2_expand_zero_clusters(bs);
+ ret = qcow2_expand_zero_clusters(bs, status_cb);
if (ret < 0) {
return ret;
}
@@ -2424,7 +2614,8 @@
return 0;
}
-static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts)
+static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb)
{
BDRVQcowState *s = bs->opaque;
int old_version = s->qcow_version, new_version = old_version;
@@ -2502,7 +2693,7 @@
return ret;
}
} else {
- ret = qcow2_downgrade(bs, new_version);
+ ret = qcow2_downgrade(bs, new_version, status_cb);
if (ret < 0) {
return ret;
}
@@ -2676,6 +2867,7 @@
.bdrv_co_discard = qcow2_co_discard,
.bdrv_truncate = qcow2_truncate,
.bdrv_write_compressed = qcow2_write_compressed,
+ .bdrv_make_empty = qcow2_make_empty,
.bdrv_snapshot_create = qcow2_snapshot_create,
.bdrv_snapshot_goto = qcow2_snapshot_goto,
diff --git a/block/qcow2.h b/block/qcow2.h
index 577ccd1..6e39a1b 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -487,6 +487,8 @@
int qcow2_refcount_init(BlockDriverState *bs);
void qcow2_refcount_close(BlockDriverState *bs);
+int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index);
+
int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
int addend, enum qcow2_discard_type type);
@@ -534,10 +536,11 @@
int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
- int nb_sectors, enum qcow2_discard_type type);
+ int nb_sectors, enum qcow2_discard_type type, bool full_discard);
int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
-int qcow2_expand_zero_clusters(BlockDriverState *bs);
+int qcow2_expand_zero_clusters(BlockDriverState *bs,
+ BlockDriverAmendStatusCB *status_cb);
/* qcow2-snapshot.c functions */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 475cf74..e100ae2 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1481,12 +1481,12 @@
return result;
}
-static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t *data,
- off_t *hole, int nb_sectors, int *pnum)
+static int try_fiemap(BlockDriverState *bs, off_t start, off_t *data,
+ off_t *hole, int nb_sectors)
{
#ifdef CONFIG_FIEMAP
BDRVRawState *s = bs->opaque;
- int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+ int ret = 0;
struct {
struct fiemap fm;
struct fiemap_extent fe;
@@ -1527,18 +1527,14 @@
#endif
}
-static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data,
- off_t *hole, int *pnum)
+static int try_seek_hole(BlockDriverState *bs, off_t start, off_t *data,
+ off_t *hole)
{
#if defined SEEK_HOLE && defined SEEK_DATA
BDRVRawState *s = bs->opaque;
*hole = lseek(s->fd, start, SEEK_HOLE);
if (*hole == -1) {
- /* -ENXIO indicates that sector_num was past the end of the file.
- * There is a virtual hole there. */
- assert(errno != -ENXIO);
-
return -errno;
}
@@ -1552,7 +1548,7 @@
}
}
- return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+ return 0;
#else
return -ENOTSUP;
#endif
@@ -1578,7 +1574,8 @@
int nb_sectors, int *pnum)
{
off_t start, data = 0, hole = 0;
- int64_t ret;
+ int64_t total_size;
+ int ret;
ret = fd_open(bs);
if (ret < 0) {
@@ -1586,29 +1583,38 @@
}
start = sector_num * BDRV_SECTOR_SIZE;
+ total_size = bdrv_getlength(bs);
+ if (total_size < 0) {
+ return total_size;
+ } else if (start >= total_size) {
+ *pnum = 0;
+ return 0;
+ } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
+ nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
+ }
- ret = try_seek_hole(bs, start, &data, &hole, pnum);
+ ret = try_seek_hole(bs, start, &data, &hole);
if (ret < 0) {
- ret = try_fiemap(bs, start, &data, &hole, nb_sectors, pnum);
+ ret = try_fiemap(bs, start, &data, &hole, nb_sectors);
if (ret < 0) {
/* Assume everything is allocated. */
data = 0;
hole = start + nb_sectors * BDRV_SECTOR_SIZE;
- ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+ ret = 0;
}
}
+ assert(ret >= 0);
+
if (data <= start) {
/* On a data extent, compute sectors to the end of the extent. */
*pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
+ return ret | BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
} else {
/* On a hole, compute sectors to the beginning of the next extent. */
*pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
- ret &= ~BDRV_BLOCK_DATA;
- ret |= BDRV_BLOCK_ZERO;
+ return ret | BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID | start;
}
-
- return ret;
}
static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs,
diff --git a/block/rbd.c b/block/rbd.c
index 47cab8b..5b5a64a 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -887,6 +887,18 @@
}
#endif
+#ifdef LIBRBD_SUPPORTS_INVALIDATE
+static void qemu_rbd_invalidate_cache(BlockDriverState *bs,
+ Error **errp)
+{
+ BDRVRBDState *s = bs->opaque;
+ int r = rbd_invalidate_cache(s->image);
+ if (r < 0) {
+ error_setg_errno(errp, -r, "Failed to invalidate the cache");
+ }
+}
+#endif
+
static QemuOptsList qemu_rbd_create_opts = {
.name = "rbd-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(qemu_rbd_create_opts.head),
@@ -936,6 +948,9 @@
.bdrv_snapshot_delete = qemu_rbd_snap_remove,
.bdrv_snapshot_list = qemu_rbd_snap_list,
.bdrv_snapshot_goto = qemu_rbd_snap_rollback,
+#ifdef LIBRBD_SUPPORTS_INVALIDATE
+ .bdrv_invalidate_cache = qemu_rbd_invalidate_cache,
+#endif
};
static void bdrv_rbd_init(void)
diff --git a/block/snapshot.c b/block/snapshot.c
index 85c52ff..698e1a1 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -236,6 +236,10 @@
error_setg(errp, "snapshot_id and name are both NULL");
return -EINVAL;
}
+
+ /* drain all pending i/o before deleting snapshot */
+ bdrv_drain_all();
+
if (drv->bdrv_snapshot_delete) {
return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
}
diff --git a/block/stream.c b/block/stream.c
index a1dc8da..a628901 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -79,9 +79,39 @@
bdrv_refresh_limits(top, NULL);
}
+typedef struct {
+ int ret;
+ bool reached_end;
+} StreamCompleteData;
+
+static void stream_complete(BlockJob *job, void *opaque)
+{
+ StreamBlockJob *s = container_of(job, StreamBlockJob, common);
+ StreamCompleteData *data = opaque;
+ BlockDriverState *base = s->base;
+
+ if (!block_job_is_cancelled(&s->common) && data->reached_end &&
+ data->ret == 0) {
+ const char *base_id = NULL, *base_fmt = NULL;
+ if (base) {
+ base_id = s->backing_file_str;
+ if (base->drv) {
+ base_fmt = base->drv->format_name;
+ }
+ }
+ data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
+ close_unused_images(job->bs, base, base_id);
+ }
+
+ g_free(s->backing_file_str);
+ block_job_completed(&s->common, data->ret);
+ g_free(data);
+}
+
static void coroutine_fn stream_run(void *opaque)
{
StreamBlockJob *s = opaque;
+ StreamCompleteData *data;
BlockDriverState *bs = s->common.bs;
BlockDriverState *base = s->base;
int64_t sector_num, end;
@@ -183,21 +213,13 @@
/* Do not remove the backing file if an error was there but ignored. */
ret = error;
- if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
- const char *base_id = NULL, *base_fmt = NULL;
- if (base) {
- base_id = s->backing_file_str;
- if (base->drv) {
- base_fmt = base->drv->format_name;
- }
- }
- ret = bdrv_change_backing_file(bs, base_id, base_fmt);
- close_unused_images(bs, base, base_id);
- }
-
qemu_vfree(buf);
- g_free(s->backing_file_str);
- block_job_completed(&s->common, ret);
+
+ /* Modify backing chain and close BDSes in main loop */
+ data = g_malloc(sizeof(*data));
+ data->ret = ret;
+ data->reached_end = sector_num == end;
+ block_job_defer_to_main_loop(&s->common, stream_complete, data);
}
static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
diff --git a/blockdev.c b/blockdev.c
index eb743a9..57910b8 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -115,14 +115,21 @@
{
DriveInfo *dinfo = blk_legacy_dinfo(blk);
BlockDriverState *bs = blk_bs(blk);
+ AioContext *aio_context;
if (!dinfo) {
return;
}
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
if (bs->job) {
block_job_cancel(bs->job);
}
+
+ aio_context_release(aio_context);
+
dinfo->auto_del = 1;
}
@@ -1922,6 +1929,11 @@
static void block_job_cb(void *opaque, int ret)
{
+ /* Note that this function may be executed from another AioContext besides
+ * the QEMU main loop. If you need to access anything that assumes the
+ * QEMU global mutex, use a BH or introduce a mutex.
+ */
+
BlockDriverState *bs = opaque;
const char *msg = NULL;
@@ -1951,6 +1963,7 @@
{
BlockDriverState *bs;
BlockDriverState *base_bs = NULL;
+ AioContext *aio_context;
Error *local_err = NULL;
const char *base_name = NULL;
@@ -1964,16 +1977,20 @@
return;
}
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
- return;
+ goto out;
}
if (has_base) {
base_bs = bdrv_find_backing_image(bs, base);
if (base_bs == NULL) {
error_set(errp, QERR_BASE_NOT_FOUND, base);
- return;
+ goto out;
}
+ assert(bdrv_get_aio_context(base_bs) == aio_context);
base_name = base;
}
@@ -1982,7 +1999,7 @@
if (base_bs == NULL && has_backing_file) {
error_setg(errp, "backing file specified, but streaming the "
"entire chain");
- return;
+ goto out;
}
/* backing_file string overrides base bs filename */
@@ -1992,10 +2009,13 @@
on_error, block_job_cb, bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- return;
+ goto out;
}
trace_qmp_block_stream(bs, bs->job);
+
+out:
+ aio_context_release(aio_context);
}
void qmp_block_commit(const char *device,
@@ -2007,6 +2027,7 @@
{
BlockDriverState *bs;
BlockDriverState *base_bs, *top_bs;
+ AioContext *aio_context;
Error *local_err = NULL;
/* This will be part of the QMP command, if/when the
* BlockdevOnError change for blkmirror makes it in
@@ -2017,9 +2038,6 @@
speed = 0;
}
- /* drain all i/o before commits */
- bdrv_drain_all();
-
/* Important Note:
* libvirt relies on the DeviceNotFound error class in order to probe for
* live commit feature versions; for this to work, we must make sure to
@@ -2031,8 +2049,14 @@
return;
}
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ /* drain all i/o before commits */
+ bdrv_drain_all();
+
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, errp)) {
- return;
+ goto out;
}
/* default top_bs is the active layer */
@@ -2046,9 +2070,11 @@
if (top_bs == NULL) {
error_setg(errp, "Top image file %s not found", top ? top : "NULL");
- return;
+ goto out;
}
+ assert(bdrv_get_aio_context(top_bs) == aio_context);
+
if (has_base && base) {
base_bs = bdrv_find_backing_image(top_bs, base);
} else {
@@ -2057,20 +2083,22 @@
if (base_bs == NULL) {
error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL");
- return;
+ goto out;
}
+ assert(bdrv_get_aio_context(base_bs) == aio_context);
+
/* Do not allow attempts to commit an image into itself */
if (top_bs == base_bs) {
error_setg(errp, "cannot commit an image into itself");
- return;
+ goto out;
}
if (top_bs == bs) {
if (has_backing_file) {
error_setg(errp, "'backing-file' specified,"
" but 'top' is the active layer");
- return;
+ goto out;
}
commit_active_start(bs, base_bs, speed, on_error, block_job_cb,
bs, &local_err);
@@ -2080,8 +2108,11 @@
}
if (local_err != NULL) {
error_propagate(errp, local_err);
- return;
+ goto out;
}
+
+out:
+ aio_context_release(aio_context);
}
void qmp_drive_backup(const char *device, const char *target,
@@ -2096,6 +2127,7 @@
BlockDriverState *bs;
BlockDriverState *target_bs;
BlockDriverState *source = NULL;
+ AioContext *aio_context;
BlockDriver *drv = NULL;
Error *local_err = NULL;
int flags;
@@ -2121,9 +2153,12 @@
return;
}
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
if (!bdrv_is_inserted(bs)) {
error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
- return;
+ goto out;
}
if (!has_format) {
@@ -2133,12 +2168,12 @@
drv = bdrv_find_format(format);
if (!drv) {
error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
- return;
+ goto out;
}
}
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
- return;
+ goto out;
}
flags = bs->open_flags | BDRV_O_RDWR;
@@ -2158,7 +2193,7 @@
size = bdrv_getlength(bs);
if (size < 0) {
error_setg_errno(errp, -size, "bdrv_getlength failed");
- return;
+ goto out;
}
if (mode != NEW_IMAGE_MODE_EXISTING) {
@@ -2175,23 +2210,28 @@
if (local_err) {
error_propagate(errp, local_err);
- return;
+ goto out;
}
target_bs = NULL;
ret = bdrv_open(&target_bs, target, NULL, NULL, flags, drv, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
- return;
+ goto out;
}
+ bdrv_set_aio_context(target_bs, aio_context);
+
backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
block_job_cb, bs, &local_err);
if (local_err != NULL) {
bdrv_unref(target_bs);
error_propagate(errp, local_err);
- return;
+ goto out;
}
+
+out:
+ aio_context_release(aio_context);
}
BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
@@ -2216,6 +2256,7 @@
{
BlockDriverState *bs;
BlockDriverState *source, *target_bs;
+ AioContext *aio_context;
BlockDriver *drv = NULL;
Error *local_err = NULL;
QDict *options = NULL;
@@ -2258,9 +2299,12 @@
return;
}
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
if (!bdrv_is_inserted(bs)) {
error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
- return;
+ goto out;
}
if (!has_format) {
@@ -2270,12 +2314,12 @@
drv = bdrv_find_format(format);
if (!drv) {
error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
- return;
+ goto out;
}
}
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR, errp)) {
- return;
+ goto out;
}
flags = bs->open_flags | BDRV_O_RDWR;
@@ -2290,29 +2334,36 @@
size = bdrv_getlength(bs);
if (size < 0) {
error_setg_errno(errp, -size, "bdrv_getlength failed");
- return;
+ goto out;
}
if (has_replaces) {
BlockDriverState *to_replace_bs;
+ AioContext *replace_aio_context;
+ int64_t replace_size;
if (!has_node_name) {
error_setg(errp, "a node-name must be provided when replacing a"
" named node of the graph");
- return;
+ goto out;
}
to_replace_bs = check_to_replace_node(replaces, &local_err);
if (!to_replace_bs) {
error_propagate(errp, local_err);
- return;
+ goto out;
}
- if (size != bdrv_getlength(to_replace_bs)) {
+ replace_aio_context = bdrv_get_aio_context(to_replace_bs);
+ aio_context_acquire(replace_aio_context);
+ replace_size = bdrv_getlength(to_replace_bs);
+ aio_context_release(replace_aio_context);
+
+ if (size != replace_size) {
error_setg(errp, "cannot replace image with a mirror image of "
"different size");
- return;
+ goto out;
}
}
@@ -2341,7 +2392,7 @@
if (local_err) {
error_propagate(errp, local_err);
- return;
+ goto out;
}
if (has_node_name) {
@@ -2357,9 +2408,11 @@
flags | BDRV_O_NO_BACKING, drv, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
- return;
+ goto out;
}
+ bdrv_set_aio_context(target_bs, aio_context);
+
/* pass the node name to replace to mirror start since it's loose coupling
* and will allow to check whether the node still exist at mirror completion
*/
@@ -2371,24 +2424,42 @@
if (local_err != NULL) {
bdrv_unref(target_bs);
error_propagate(errp, local_err);
- return;
+ goto out;
}
+
+out:
+ aio_context_release(aio_context);
}
-static BlockJob *find_block_job(const char *device)
+/* Get the block job for a given device name and acquire its AioContext */
+static BlockJob *find_block_job(const char *device, AioContext **aio_context)
{
BlockDriverState *bs;
bs = bdrv_find(device);
- if (!bs || !bs->job) {
- return NULL;
+ if (!bs) {
+ goto notfound;
}
+
+ *aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(*aio_context);
+
+ if (!bs->job) {
+ aio_context_release(*aio_context);
+ goto notfound;
+ }
+
return bs->job;
+
+notfound:
+ *aio_context = NULL;
+ return NULL;
}
void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
{
- BlockJob *job = find_block_job(device);
+ AioContext *aio_context;
+ BlockJob *job = find_block_job(device, &aio_context);
if (!job) {
error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
@@ -2396,34 +2467,40 @@
}
block_job_set_speed(job, speed, errp);
+ aio_context_release(aio_context);
}
void qmp_block_job_cancel(const char *device,
bool has_force, bool force, Error **errp)
{
- BlockJob *job = find_block_job(device);
-
- if (!has_force) {
- force = false;
- }
+ AioContext *aio_context;
+ BlockJob *job = find_block_job(device, &aio_context);
if (!job) {
error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
return;
}
+
+ if (!has_force) {
+ force = false;
+ }
+
if (job->paused && !force) {
error_setg(errp, "The block job for device '%s' is currently paused",
device);
- return;
+ goto out;
}
trace_qmp_block_job_cancel(job);
block_job_cancel(job);
+out:
+ aio_context_release(aio_context);
}
void qmp_block_job_pause(const char *device, Error **errp)
{
- BlockJob *job = find_block_job(device);
+ AioContext *aio_context;
+ BlockJob *job = find_block_job(device, &aio_context);
if (!job) {
error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
@@ -2432,11 +2509,13 @@
trace_qmp_block_job_pause(job);
block_job_pause(job);
+ aio_context_release(aio_context);
}
void qmp_block_job_resume(const char *device, Error **errp)
{
- BlockJob *job = find_block_job(device);
+ AioContext *aio_context;
+ BlockJob *job = find_block_job(device, &aio_context);
if (!job) {
error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
@@ -2445,11 +2524,13 @@
trace_qmp_block_job_resume(job);
block_job_resume(job);
+ aio_context_release(aio_context);
}
void qmp_block_job_complete(const char *device, Error **errp)
{
- BlockJob *job = find_block_job(device);
+ AioContext *aio_context;
+ BlockJob *job = find_block_job(device, &aio_context);
if (!job) {
error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
@@ -2458,6 +2539,7 @@
trace_qmp_block_job_complete(job);
block_job_complete(job, errp);
+ aio_context_release(aio_context);
}
void qmp_change_backing_file(const char *device,
@@ -2602,12 +2684,18 @@
BlockDriverState *bs;
for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(aio_context);
+
if (bs->job) {
BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
elem->value = block_job_query(bs->job);
*p_next = elem;
p_next = &elem->next;
}
+
+ aio_context_release(aio_context);
}
return head;
diff --git a/blockjob.c b/blockjob.c
index ff0908a..ba2255d 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -50,6 +50,7 @@
error_setg(&job->blocker, "block device is in use by block job: %s",
BlockJobType_lookup[driver->job_type]);
bdrv_op_block_all(bs, job->blocker);
+ bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
job->driver = driver;
job->bs = bs;
@@ -153,7 +154,7 @@
}
}
-struct BlockCancelData {
+struct BlockFinishData {
BlockJob *job;
BlockCompletionFunc *cb;
void *opaque;
@@ -161,19 +162,22 @@
int ret;
};
-static void block_job_cancel_cb(void *opaque, int ret)
+static void block_job_finish_cb(void *opaque, int ret)
{
- struct BlockCancelData *data = opaque;
+ struct BlockFinishData *data = opaque;
data->cancelled = block_job_is_cancelled(data->job);
data->ret = ret;
data->cb(data->opaque, ret);
}
-int block_job_cancel_sync(BlockJob *job)
+static int block_job_finish_sync(BlockJob *job,
+ void (*finish)(BlockJob *, Error **errp),
+ Error **errp)
{
- struct BlockCancelData data;
+ struct BlockFinishData data;
BlockDriverState *bs = job->bs;
+ Error *local_err = NULL;
assert(bs->job == job);
@@ -184,15 +188,37 @@
data.cb = job->cb;
data.opaque = job->opaque;
data.ret = -EINPROGRESS;
- job->cb = block_job_cancel_cb;
+ job->cb = block_job_finish_cb;
job->opaque = &data;
- block_job_cancel(job);
+ finish(job, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return -EBUSY;
+ }
while (data.ret == -EINPROGRESS) {
aio_poll(bdrv_get_aio_context(bs), true);
}
return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
}
+/* A wrapper around block_job_cancel() taking an Error ** parameter so it may be
+ * used with block_job_finish_sync() without the need for (rather nasty)
+ * function pointer casts there. */
+static void block_job_cancel_err(BlockJob *job, Error **errp)
+{
+ block_job_cancel(job);
+}
+
+int block_job_cancel_sync(BlockJob *job)
+{
+ return block_job_finish_sync(job, &block_job_cancel_err, NULL);
+}
+
+int block_job_complete_sync(BlockJob *job, Error **errp)
+{
+ return block_job_finish_sync(job, &block_job_complete, errp);
+}
+
void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
{
assert(job->busy);
@@ -236,6 +262,7 @@
info->offset = job->offset;
info->speed = job->speed;
info->io_status = job->iostatus;
+ info->ready = job->ready;
return info;
}
@@ -271,6 +298,8 @@
void block_job_event_ready(BlockJob *job)
{
+ job->ready = true;
+
qapi_event_send_block_job_ready(job->driver->job_type,
bdrv_get_device_name(job->bs),
job->len,
@@ -314,3 +343,48 @@
}
return action;
}
+
+typedef struct {
+ BlockJob *job;
+ QEMUBH *bh;
+ AioContext *aio_context;
+ BlockJobDeferToMainLoopFn *fn;
+ void *opaque;
+} BlockJobDeferToMainLoopData;
+
+static void block_job_defer_to_main_loop_bh(void *opaque)
+{
+ BlockJobDeferToMainLoopData *data = opaque;
+ AioContext *aio_context;
+
+ qemu_bh_delete(data->bh);
+
+ /* Prevent race with block_job_defer_to_main_loop() */
+ aio_context_acquire(data->aio_context);
+
+ /* Fetch BDS AioContext again, in case it has changed */
+ aio_context = bdrv_get_aio_context(data->job->bs);
+ aio_context_acquire(aio_context);
+
+ data->fn(data->job, data->opaque);
+
+ aio_context_release(aio_context);
+
+ aio_context_release(data->aio_context);
+
+ g_free(data);
+}
+
+void block_job_defer_to_main_loop(BlockJob *job,
+ BlockJobDeferToMainLoopFn *fn,
+ void *opaque)
+{
+ BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
+ data->job = job;
+ data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data);
+ data->aio_context = bdrv_get_aio_context(job->bs);
+ data->fn = fn;
+ data->opaque = opaque;
+
+ qemu_bh_schedule(data->bh);
+}
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 45b1164..1222a37 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -196,6 +196,11 @@
blk_op_block_all(conf->conf.blk, s->blocker);
blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_RESIZE, s->blocker);
blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker);
+ blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_BACKUP_SOURCE, s->blocker);
+ blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT, s->blocker);
+ blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_MIRROR, s->blocker);
+ blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_STREAM, s->blocker);
+ blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_REPLACE, s->blocker);
*dataplane = s;
}
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 70958e3..61dbed1 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -49,6 +49,9 @@
static void ahci_reset_port(AHCIState *s, int port);
static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis);
static void ahci_init_d2h(AHCIDevice *ad);
+static int ahci_dma_prepare_buf(IDEDMA *dma, int is_write);
+static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes);
+
static uint32_t ahci_port_read(AHCIState *s, int port, int offset)
{
@@ -567,24 +570,24 @@
AHCIDevice *ad = &s->dev[port];
AHCIPortRegs *pr = &ad->port_regs;
IDEState *ide_state;
- uint8_t *sdb_fis;
+ SDBFIS *sdb_fis;
if (!s->dev[port].res_fis ||
!(pr->cmd & PORT_CMD_FIS_RX)) {
return;
}
- sdb_fis = &ad->res_fis[RES_FIS_SDBFIS];
+ sdb_fis = (SDBFIS *)&ad->res_fis[RES_FIS_SDBFIS];
ide_state = &ad->port.ifs[0];
- /* clear memory */
- *(uint32_t*)sdb_fis = 0;
-
- /* write values */
- sdb_fis[0] = ide_state->error;
- sdb_fis[2] = ide_state->status & 0x77;
+ sdb_fis->type = 0xA1;
+ /* Interrupt pending & Notification bit */
+ sdb_fis->flags = (ad->hba->control_regs.irqstatus ? (1 << 6) : 0);
+ sdb_fis->status = ide_state->status & 0x77;
+ sdb_fis->error = ide_state->error;
+ /* update SAct field in SDB_FIS */
s->dev[port].finished |= finished;
- *(uint32_t*)(sdb_fis + 4) = cpu_to_le32(ad->finished);
+ sdb_fis->payload = cpu_to_le32(ad->finished);
/* Update shadow registers (except BSY 0x80 and DRQ 0x08) */
pr->tfdata = (ad->port.ifs[0].error << 8) |
@@ -600,6 +603,7 @@
uint8_t *pio_fis, *cmd_fis;
uint64_t tbl_addr;
dma_addr_t cmd_len = 0x80;
+ IDEState *s = &ad->port.ifs[0];
if (!ad->res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) {
return;
@@ -629,21 +633,21 @@
pio_fis[0] = 0x5f;
pio_fis[1] = (ad->hba->control_regs.irqstatus ? (1 << 6) : 0);
- pio_fis[2] = ad->port.ifs[0].status;
- pio_fis[3] = ad->port.ifs[0].error;
+ pio_fis[2] = s->status;
+ pio_fis[3] = s->error;
- pio_fis[4] = cmd_fis[4];
- pio_fis[5] = cmd_fis[5];
- pio_fis[6] = cmd_fis[6];
- pio_fis[7] = cmd_fis[7];
- pio_fis[8] = cmd_fis[8];
- pio_fis[9] = cmd_fis[9];
- pio_fis[10] = cmd_fis[10];
- pio_fis[11] = cmd_fis[11];
+ pio_fis[4] = s->sector;
+ pio_fis[5] = s->lcyl;
+ pio_fis[6] = s->hcyl;
+ pio_fis[7] = s->select;
+ pio_fis[8] = s->hob_sector;
+ pio_fis[9] = s->hob_lcyl;
+ pio_fis[10] = s->hob_hcyl;
+ pio_fis[11] = 0;
pio_fis[12] = cmd_fis[12];
pio_fis[13] = cmd_fis[13];
pio_fis[14] = 0;
- pio_fis[15] = ad->port.ifs[0].status;
+ pio_fis[15] = s->status;
pio_fis[16] = len & 255;
pio_fis[17] = len >> 8;
pio_fis[18] = 0;
@@ -670,6 +674,7 @@
int i;
dma_addr_t cmd_len = 0x80;
int cmd_mapped = 0;
+ IDEState *s = &ad->port.ifs[0];
if (!ad->res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) {
return;
@@ -687,17 +692,17 @@
d2h_fis[0] = 0x34;
d2h_fis[1] = (ad->hba->control_regs.irqstatus ? (1 << 6) : 0);
- d2h_fis[2] = ad->port.ifs[0].status;
- d2h_fis[3] = ad->port.ifs[0].error;
+ d2h_fis[2] = s->status;
+ d2h_fis[3] = s->error;
- d2h_fis[4] = cmd_fis[4];
- d2h_fis[5] = cmd_fis[5];
- d2h_fis[6] = cmd_fis[6];
- d2h_fis[7] = cmd_fis[7];
- d2h_fis[8] = cmd_fis[8];
- d2h_fis[9] = cmd_fis[9];
- d2h_fis[10] = cmd_fis[10];
- d2h_fis[11] = cmd_fis[11];
+ d2h_fis[4] = s->sector;
+ d2h_fis[5] = s->lcyl;
+ d2h_fis[6] = s->hcyl;
+ d2h_fis[7] = s->select;
+ d2h_fis[8] = s->hob_sector;
+ d2h_fis[9] = s->hob_lcyl;
+ d2h_fis[10] = s->hob_hcyl;
+ d2h_fis[11] = 0;
d2h_fis[12] = cmd_fis[12];
d2h_fis[13] = cmd_fis[13];
for (i = 14; i < 20; i++) {
@@ -1103,16 +1108,12 @@
}
}
- /* update number of transferred bytes */
- ad->cur_cmd->status = cpu_to_le32(le32_to_cpu(ad->cur_cmd->status) + size);
-
out:
/* declare that we processed everything */
s->data_ptr = s->data_end;
- if (has_sglist) {
- qemu_sglist_destroy(&s->sg);
- }
+ /* Update number of transferred bytes, destroy sglist */
+ ahci_commit_buf(dma, size);
s->end_transfer_func(s);
@@ -1133,6 +1134,11 @@
dma_cb(s, 0);
}
+/**
+ * Called in DMA R/W chains to read the PRDT, utilizing ahci_populate_sglist.
+ * Not currently invoked by PIO R/W chains,
+ * which invoke ahci_populate_sglist via ahci_start_transfer.
+ */
static int ahci_dma_prepare_buf(IDEDMA *dma, int is_write)
{
AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
@@ -1145,6 +1151,24 @@
return s->io_buffer_size != 0;
}
+/**
+ * Destroys the scatter-gather list,
+ * and updates the command header with a bytes-read value.
+ * called explicitly via ahci_dma_rw_buf (ATAPI DMA),
+ * and ahci_start_transfer (PIO R/W),
+ * and called via callback from ide_dma_cb for DMA R/W paths.
+ */
+static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes)
+{
+ AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
+ IDEState *s = &ad->port.ifs[0];
+
+ tx_bytes += le32_to_cpu(ad->cur_cmd->status);
+ ad->cur_cmd->status = cpu_to_le32(tx_bytes);
+
+ qemu_sglist_destroy(&s->sg);
+}
+
static int ahci_dma_rw_buf(IDEDMA *dma, int is_write)
{
AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
@@ -1162,11 +1186,9 @@
dma_buf_write(p, l, &s->sg);
}
- /* free sglist that was created in ahci_populate_sglist() */
- qemu_sglist_destroy(&s->sg);
+ /* free sglist, update byte count */
+ ahci_commit_buf(dma, l);
- /* update number of transferred bytes */
- ad->cur_cmd->status = cpu_to_le32(le32_to_cpu(ad->cur_cmd->status) + l);
s->io_buffer_index += l;
s->io_buffer_offset += l;
@@ -1209,6 +1231,7 @@
.start_dma = ahci_start_dma,
.start_transfer = ahci_start_transfer,
.prepare_buf = ahci_dma_prepare_buf,
+ .commit_buf = ahci_commit_buf,
.rw_buf = ahci_dma_rw_buf,
.set_unit = ahci_dma_set_unit,
.cmd_done = ahci_cmd_done,
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index e8ea34a..b123237 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -327,6 +327,14 @@
uint8_t reserved10;
} QEMU_PACKED NCQFrame;
+typedef struct SDBFIS {
+ uint8_t type;
+ uint8_t flags;
+ uint8_t status;
+ uint8_t error;
+ uint32_t payload;
+} QEMU_PACKED SDBFIS;
+
void ahci_init(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports);
void ahci_uninit(AHCIState *s);
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 44e3d50..d316ccf 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -634,8 +634,11 @@
ide_sector_read_cb, s);
}
-static void dma_buf_commit(IDEState *s)
+static void dma_buf_commit(IDEState *s, uint32_t tx_bytes)
{
+ if (s->bus->dma->ops->commit_buf) {
+ s->bus->dma->ops->commit_buf(s->bus->dma, tx_bytes);
+ }
qemu_sglist_destroy(&s->sg);
}
@@ -650,6 +653,7 @@
void ide_dma_error(IDEState *s)
{
+ dma_buf_commit(s, 0);
ide_abort_command(s);
ide_set_inactive(s, false);
ide_set_irq(s->bus);
@@ -665,7 +669,6 @@
s->bus->error_status = op;
} else if (action == BLOCK_ERROR_ACTION_REPORT) {
if (op & IDE_RETRY_DMA) {
- dma_buf_commit(s);
ide_dma_error(s);
} else {
ide_rw_error(s);
@@ -709,7 +712,8 @@
sector_num = ide_get_sector(s);
if (n > 0) {
- dma_buf_commit(s);
+ assert(s->io_buffer_size == s->sg.size);
+ dma_buf_commit(s, s->io_buffer_size);
sector_num += n;
ide_set_sector(s, sector_num);
s->nsector -= n;
@@ -740,7 +744,6 @@
if ((s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) &&
!ide_sect_range_ok(s, sector_num, n)) {
- dma_buf_commit(s);
ide_dma_error(s);
return;
}
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index 68ac610..907493d 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -322,6 +322,7 @@
typedef void DMAStartFunc(IDEDMA *, IDEState *, BlockCompletionFunc *);
typedef void DMAVoidFunc(IDEDMA *);
typedef int DMAIntFunc(IDEDMA *, int);
+typedef void DMAu32Func(IDEDMA *, uint32_t);
typedef void DMAStopFunc(IDEDMA *, bool);
typedef void DMARestartFunc(void *, int, RunState);
@@ -430,6 +431,7 @@
DMAStartFunc *start_dma;
DMAVoidFunc *start_transfer;
DMAIntFunc *prepare_buf;
+ DMAu32Func *commit_buf;
DMAIntFunc *rw_buf;
DMAIntFunc *set_unit;
DMAStopFunc *set_inactive;
diff --git a/include/block/block.h b/include/block/block.h
index 341054d..13e4537 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -268,7 +268,13 @@
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
-int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts);
+/* The units of offset and total_work_size may be chosen arbitrarily by the
+ * block driver; total_work_size may change during the course of the amendment
+ * operation */
+typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
+ int64_t total_work_size);
+int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb);
/* external snapshots */
bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -328,6 +334,7 @@
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
int bdrv_flush_all(void);
void bdrv_close_all(void);
+void bdrv_drain(BlockDriverState *bs);
void bdrv_drain_all(void);
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
@@ -498,6 +505,8 @@
BLKDBG_PWRITEV_ZERO,
BLKDBG_PWRITEV_DONE,
+ BLKDBG_EMPTY_IMAGE_PREPARE,
+
BLKDBG_EVENT_MAX,
} BlkDebugEvent;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8898c6c..a1c17b9 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -232,7 +232,8 @@
int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result,
BdrvCheckMode fix);
- int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts);
+ int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb);
void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event);
@@ -289,6 +290,9 @@
/* optimal transfer length in sectors */
int opt_transfer_length;
+ /* maximal transfer length in sectors */
+ int max_transfer_length;
+
/* memory alignment so that no bounce buffer is needed */
size_t opt_mem_alignment;
} BlockLimits;
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index acb399f..b6d4ebb 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -91,6 +91,11 @@
*/
bool busy;
+ /**
+ * Set to true when the job is ready to be completed.
+ */
+ bool ready;
+
/** Status that is published by the query-block-jobs QMP API */
BlockDeviceIoStatus iostatus;
@@ -273,6 +278,21 @@
int block_job_cancel_sync(BlockJob *job);
/**
+ * block_job_complete_sync:
+ * @job: The job to be completed.
+ * @errp: Error object which may be set by block_job_complete(); this is not
+ * necessarily set on every error, the job return value has to be
+ * checked as well.
+ *
+ * Synchronously complete the job. The completion callback is called before the
+ * function returns, unless it is NULL (which is permissible when using this
+ * function).
+ *
+ * Returns the return value from the job.
+ */
+int block_job_complete_sync(BlockJob *job, Error **errp);
+
+/**
* block_job_iostatus_reset:
* @job: The job whose I/O status should be reset.
*
@@ -295,4 +315,23 @@
BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
BlockdevOnError on_err,
int is_read, int error);
+
+typedef void BlockJobDeferToMainLoopFn(BlockJob *job, void *opaque);
+
+/**
+ * block_job_defer_to_main_loop:
+ * @job: The job
+ * @fn: The function to run in the main loop
+ * @opaque: The opaque value that is passed to @fn
+ *
+ * Execute a given function in the main loop with the BlockDriverState
+ * AioContext acquired. Block jobs must call bdrv_unref(), bdrv_close(), and
+ * anything that uses bdrv_drain_all() in the main loop.
+ *
+ * The @job AioContext is held while @fn executes.
+ */
+void block_job_defer_to_main_loop(BlockJob *job,
+ BlockJobDeferToMainLoopFn *fn,
+ void *opaque);
+
#endif
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 1565404..c032434 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -68,6 +68,12 @@
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
+/* Minimum function that returns zero only iff both values are zero.
+ * Intended for use with unsigned values only. */
+#ifndef MIN_NON_ZERO
+#define MIN_NON_ZERO(a, b) (((a) != 0 && (a) < (b)) ? (a) : (b))
+#endif
+
#ifndef ROUND_UP
#define ROUND_UP(n,d) (((n) + (d) - 1) & -(d))
#endif
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 8f7089e..77a0cfb 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -514,12 +514,14 @@
#
# @io-status: the status of the job (since 1.3)
#
+# @ready: true if the job may be completed (since 2.2)
+#
# Since: 1.1
##
{ 'type': 'BlockJobInfo',
'data': {'type': 'str', 'device': 'str', 'len': 'int',
'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
- 'io-status': 'BlockDeviceIoStatus'} }
+ 'io-status': 'BlockDeviceIoStatus', 'ready': 'bool'} }
##
# @query-block-jobs:
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 55aec6b..9567774 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -22,9 +22,9 @@
ETEXI
DEF("commit", img_commit,
- "commit [-q] [-f fmt] [-t cache] filename")
+ "commit [-q] [-f fmt] [-t cache] [-b base] [-d] [-p] filename")
STEXI
-@item commit [-q] [-f @var{fmt}] [-t @var{cache}] @var{filename}
+@item commit [-q] [-f @var{fmt}] [-t @var{cache}] [-b @var{base}] [-d] [-p] @var{filename}
ETEXI
DEF("compare", img_compare,
@@ -70,8 +70,8 @@
ETEXI
DEF("amend", img_amend,
- "amend [-q] [-f fmt] [-t cache] -o options filename")
+ "amend [-p] [-q] [-f fmt] [-t cache] -o options filename")
STEXI
-@item amend [-q] [-f @var{fmt}] [-t @var{cache}] -o @var{options} @var{filename}
+@item amend [-p] [-q] [-f @var{fmt}] [-t @var{cache}] -o @var{options} @var{filename}
@end table
ETEXI
diff --git a/qemu-img.c b/qemu-img.c
index 731502c..66a7eb4 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -31,6 +31,7 @@
#include "sysemu/sysemu.h"
#include "sysemu/block-backend.h"
#include "block/block_int.h"
+#include "block/blockjob.h"
#include "block/qapi.h"
#include <getopt.h>
@@ -722,18 +723,54 @@
return ret;
}
+typedef struct CommonBlockJobCBInfo {
+ BlockDriverState *bs;
+ Error **errp;
+} CommonBlockJobCBInfo;
+
+static void common_block_job_cb(void *opaque, int ret)
+{
+ CommonBlockJobCBInfo *cbi = opaque;
+
+ if (ret < 0) {
+ error_setg_errno(cbi->errp, -ret, "Block job failed");
+ }
+
+ /* Drop this block job's reference */
+ bdrv_unref(cbi->bs);
+}
+
+static void run_block_job(BlockJob *job, Error **errp)
+{
+ AioContext *aio_context = bdrv_get_aio_context(job->bs);
+
+ do {
+ aio_poll(aio_context, true);
+ qemu_progress_print((float)job->offset / job->len * 100.f, 0);
+ } while (!job->ready);
+
+ block_job_complete_sync(job, errp);
+
+ /* A block job may finish instantaneously without publishing any progress,
+ * so just signal completion here */
+ qemu_progress_print(100.f, 0);
+}
+
static int img_commit(int argc, char **argv)
{
int c, ret, flags;
- const char *filename, *fmt, *cache;
+ const char *filename, *fmt, *cache, *base;
BlockBackend *blk;
- BlockDriverState *bs;
- bool quiet = false;
+ BlockDriverState *bs, *base_bs;
+ bool progress = false, quiet = false, drop = false;
+ Error *local_err = NULL;
+ CommonBlockJobCBInfo cbi;
fmt = NULL;
cache = BDRV_DEFAULT_CACHE;
+ base = NULL;
for(;;) {
- c = getopt(argc, argv, "f:ht:q");
+ c = getopt(argc, argv, "f:ht:b:dpq");
if (c == -1) {
break;
}
@@ -748,17 +785,34 @@
case 't':
cache = optarg;
break;
+ case 'b':
+ base = optarg;
+ /* -b implies -d */
+ drop = true;
+ break;
+ case 'd':
+ drop = true;
+ break;
+ case 'p':
+ progress = true;
+ break;
case 'q':
quiet = true;
break;
}
}
+
+ /* Progress is not shown in Quiet mode */
+ if (quiet) {
+ progress = false;
+ }
+
if (optind != argc - 1) {
error_exit("Expecting one image file name");
}
filename = argv[optind++];
- flags = BDRV_O_RDWR;
+ flags = BDRV_O_RDWR | BDRV_O_UNMAP;
ret = bdrv_parse_cache_flags(cache, &flags);
if (ret < 0) {
error_report("Invalid cache option: %s", cache);
@@ -771,29 +825,76 @@
}
bs = blk_bs(blk);
- ret = bdrv_commit(bs);
- switch(ret) {
- case 0:
- qprintf(quiet, "Image committed.\n");
- break;
- case -ENOENT:
- error_report("No disk inserted");
- break;
- case -EACCES:
- error_report("Image is read-only");
- break;
- case -ENOTSUP:
- error_report("Image is already committed");
- break;
- default:
- error_report("Error while committing image");
- break;
+ qemu_progress_init(progress, 1.f);
+ qemu_progress_print(0.f, 100);
+
+ if (base) {
+ base_bs = bdrv_find_backing_image(bs, base);
+ if (!base_bs) {
+ error_set(&local_err, QERR_BASE_NOT_FOUND, base);
+ goto done;
+ }
+ } else {
+ /* This is different from QMP, which by default uses the deepest file in
+ * the backing chain (i.e., the very base); however, the traditional
+ * behavior of qemu-img commit is using the immediate backing file. */
+ base_bs = bs->backing_hd;
+ if (!base_bs) {
+ error_setg(&local_err, "Image does not have a backing file");
+ goto done;
+ }
}
+ cbi = (CommonBlockJobCBInfo){
+ .errp = &local_err,
+ .bs = bs,
+ };
+
+ commit_active_start(bs, base_bs, 0, BLOCKDEV_ON_ERROR_REPORT,
+ common_block_job_cb, &cbi, &local_err);
+ if (local_err) {
+ goto done;
+ }
+
+ /* The block job will swap base_bs and bs (which is not what we really want
+ * here, but okay) and unref base_bs (after the swap, i.e., the old top
+ * image). In order to still be able to empty that top image afterwards,
+ * increment the reference counter here preemptively. */
+ if (!drop) {
+ bdrv_ref(base_bs);
+ }
+
+ run_block_job(bs->job, &local_err);
+ if (local_err) {
+ goto unref_backing;
+ }
+
+ if (!drop && base_bs->drv->bdrv_make_empty) {
+ ret = base_bs->drv->bdrv_make_empty(base_bs);
+ if (ret) {
+ error_setg_errno(&local_err, -ret, "Could not empty %s",
+ filename);
+ goto unref_backing;
+ }
+ }
+
+unref_backing:
+ if (!drop) {
+ bdrv_unref(base_bs);
+ }
+
+done:
+ qemu_progress_end();
+
blk_unref(blk);
- if (ret) {
+
+ if (local_err) {
+ qerror_report_err(local_err);
+ error_free(local_err);
return 1;
}
+
+ qprintf(quiet, "Image committed.\n");
return 0;
}
@@ -2770,6 +2871,12 @@
return 0;
}
+static void amend_status_cb(BlockDriverState *bs,
+ int64_t offset, int64_t total_work_size)
+{
+ qemu_progress_print(100.f * offset / total_work_size, 0);
+}
+
static int img_amend(int argc, char **argv)
{
int c, ret = 0;
@@ -2778,13 +2885,13 @@
QemuOpts *opts = NULL;
const char *fmt = NULL, *filename, *cache;
int flags;
- bool quiet = false;
+ bool quiet = false, progress = false;
BlockBackend *blk = NULL;
BlockDriverState *bs = NULL;
cache = BDRV_DEFAULT_CACHE;
for (;;) {
- c = getopt(argc, argv, "ho:f:t:q");
+ c = getopt(argc, argv, "ho:f:t:pq");
if (c == -1) {
break;
}
@@ -2814,6 +2921,9 @@
case 't':
cache = optarg;
break;
+ case 'p':
+ progress = true;
+ break;
case 'q':
quiet = true;
break;
@@ -2824,6 +2934,11 @@
error_exit("Must specify options (-o)");
}
+ if (quiet) {
+ progress = false;
+ }
+ qemu_progress_init(progress, 1.0);
+
filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
if (fmt && has_help_option(options)) {
/* If a format is explicitly specified (and possibly no filename is
@@ -2833,7 +2948,9 @@
}
if (optind != argc - 1) {
- error_exit("Expecting one image file name");
+ error_report("Expecting one image file name");
+ ret = -1;
+ goto out;
}
flags = BDRV_O_FLAGS | BDRV_O_RDWR;
@@ -2867,13 +2984,18 @@
goto out;
}
- ret = bdrv_amend_options(bs, opts);
+ /* In case the driver does not call amend_status_cb() */
+ qemu_progress_print(0.f, 0);
+ ret = bdrv_amend_options(bs, opts, &amend_status_cb);
+ qemu_progress_print(100.f, 0);
if (ret < 0) {
error_report("Error while amending options: %s", strerror(-ret));
goto out;
}
out:
+ qemu_progress_end();
+
blk_unref(blk);
qemu_opts_del(opts);
qemu_opts_free(create_opts);
diff --git a/qemu-img.texi b/qemu-img.texi
index e3ef4a0..0a1ab35 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -167,7 +167,7 @@
The size can also be specified using the @var{size} option with @code{-o},
it doesn't need to be specified separately in this case.
-@item commit [-f @var{fmt}] [-t @var{cache}] @var{filename}
+@item commit [-q] [-f @var{fmt}] [-t @var{cache}] [-b @var{base}] [-d] [-p] @var{filename}
Commit the changes recorded in @var{filename} in its base image or backing file.
If the backing file is smaller than the snapshot, then the backing file will be
@@ -176,6 +176,20 @@
backing file to match the size of the smaller snapshot, you can safely truncate
it yourself once the commit operation successfully completes.
+The image @var{filename} is emptied after the operation has succeeded. If you do
+not need @var{filename} afterwards and intend to drop it, you may skip emptying
+@var{filename} by specifying the @code{-d} flag.
+
+If the backing chain of the given image file @var{filename} has more than one
+layer, the backing file into which the changes will be committed may be
+specified as @var{base} (which has to be part of @var{filename}'s backing
+chain). If @var{base} is not specified, the immediate backing file of the top
+image (which is @var{filename}) will be used. For reasons of consistency,
+explicitly specifying @var{base} will always imply @code{-d} (since emptying an
+image after committing to an indirect backing file would lead to different data
+being read from the image due to content in the intermediate backing chain
+overruling the commit target).
+
@item compare [-f @var{fmt}] [-F @var{fmt}] [-T @var{src_cache}] [-p] [-s] [-q] @var{filename1} @var{filename2}
Check if two images have the same content. You can compare images with
@@ -398,7 +412,7 @@
partitioning tools inside the VM to actually begin using the new space on the
device.
-@item amend [-f @var{fmt}] [-t @var{cache}] -o @var{options} @var{filename}
+@item amend [-p] [-f @var{fmt}] [-t @var{cache}] -o @var{options} @var{filename}
Amends the image format specific @var{options} for the image file
@var{filename}. Not all file formats support this operation.
diff --git a/savevm.c b/savevm.c
index 2d8eb96..08ec678 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1246,7 +1246,7 @@
void do_delvm(Monitor *mon, const QDict *qdict)
{
BlockDriverState *bs;
- Error *err = NULL;
+ Error *err;
const char *name = qdict_get_str(qdict, "name");
if (!find_vmstate_bs()) {
@@ -1257,6 +1257,7 @@
bs = NULL;
while ((bs = bdrv_next(bs))) {
if (bdrv_can_snapshot(bs)) {
+ err = NULL;
bdrv_snapshot_delete_by_id_or_name(bs, name, &err);
if (err) {
monitor_printf(mon,
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index f1e16c1..2b432ad 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -43,8 +43,7 @@
if event['event'] == 'BLOCK_JOB_COMPLETED':
self.assert_qmp(event, 'data/type', 'commit')
self.assert_qmp(event, 'data/device', 'drive0')
- self.assert_qmp(event, 'data/offset', self.image_len)
- self.assert_qmp(event, 'data/len', self.image_len)
+ self.assert_qmp(event, 'data/offset', event['data']['len'])
if need_ready:
self.assertTrue(ready, "Expecting BLOCK_JOB_COMPLETED event")
completed = True
@@ -52,7 +51,6 @@
ready = True
self.assert_qmp(event, 'data/type', 'commit')
self.assert_qmp(event, 'data/device', 'drive0')
- self.assert_qmp(event, 'data/len', self.image_len)
self.vm.qmp('block-job-complete', device='drive0')
self.assert_no_active_block_jobs()
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index 5dbd4ee..59a8f73 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -52,8 +52,7 @@
event = self.cancel_and_wait(drive=drive)
self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED')
self.assert_qmp(event, 'data/type', 'mirror')
- self.assert_qmp(event, 'data/offset', self.image_len)
- self.assert_qmp(event, 'data/len', self.image_len)
+ self.assert_qmp(event, 'data/offset', event['data']['len'])
def complete_and_wait(self, drive='drive0', wait_ready=True):
'''Complete a block job and wait for it to finish'''
@@ -417,7 +416,6 @@
self.assert_qmp(event, 'data/type', 'mirror')
self.assert_qmp(event, 'data/device', 'drive0')
self.assert_qmp(event, 'data/error', 'Input/output error')
- self.assert_qmp(event, 'data/len', self.image_len)
completed = True
self.assert_no_active_block_jobs()
@@ -568,7 +566,6 @@
self.assert_qmp(event, 'data/type', 'mirror')
self.assert_qmp(event, 'data/device', 'drive0')
self.assert_qmp(event, 'data/error', 'Input/output error')
- self.assert_qmp(event, 'data/len', self.image_len)
completed = True
self.assert_no_active_block_jobs()
diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index ab98def..8d37f8a 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -209,6 +209,31 @@
_check_test_img
$QEMU_IO -c "read -P 0 0 64M" "$TEST_IMG" | _filter_qemu_io
+echo
+echo "=== Testing progress report without snapshot ==="
+echo
+IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 4G
+IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 4G
+$QEMU_IO -c "write -z 0 64k" \
+ -c "write -z 1G 64k" \
+ -c "write -z 2G 64k" \
+ -c "write -z 3G 64k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG amend -p -o "compat=0.10" "$TEST_IMG"
+_check_test_img
+
+echo
+echo "=== Testing progress report with snapshot ==="
+echo
+IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 4G
+IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 4G
+$QEMU_IO -c "write -z 0 64k" \
+ -c "write -z 1G 64k" \
+ -c "write -z 2G 64k" \
+ -c "write -z 3G 64k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG snapshot -c foo "$TEST_IMG"
+$QEMU_IMG amend -p -o "compat=0.10" "$TEST_IMG"
+_check_test_img
+
# success, all done
echo "*** done"
rm -f $seq.full
diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out
index 4ae6472..9045544 100644
--- a/tests/qemu-iotests/061.out
+++ b/tests/qemu-iotests/061.out
Binary files differ
diff --git a/tests/qemu-iotests/076 b/tests/qemu-iotests/076
index bc47457..ed2be35 100755
--- a/tests/qemu-iotests/076
+++ b/tests/qemu-iotests/076
@@ -47,29 +47,34 @@
nb_sectors_offset=$((0x24))
echo
-echo "== Read from a valid (enough) image =="
-_use_sample_img fake.parallels.bz2
+echo "== Read from a valid v1 image =="
+_use_sample_img parallels-v1.bz2
{ $QEMU_IO -c "read -P 0x11 0 64k" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
echo
echo "== Negative catalog size =="
-_use_sample_img fake.parallels.bz2
+_use_sample_img parallels-v1.bz2
poke_file "$TEST_IMG" "$catalog_entries_offset" "\xff\xff\xff\xff"
{ $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
echo
echo "== Overflow in catalog allocation =="
-_use_sample_img fake.parallels.bz2
+_use_sample_img parallels-v1.bz2
poke_file "$TEST_IMG" "$nb_sectors_offset" "\xff\xff\xff\xff"
poke_file "$TEST_IMG" "$catalog_entries_offset" "\x01\x00\x00\x40"
{ $QEMU_IO -c "read 64M 64M" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
echo
echo "== Zero sectors per track =="
-_use_sample_img fake.parallels.bz2
+_use_sample_img parallels-v1.bz2
poke_file "$TEST_IMG" "$tracks_offset" "\x00\x00\x00\x00"
{ $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo
+echo "== Read from a valid v2 image =="
+_use_sample_img parallels-v2.bz2
+{ $QEMU_IO -c "read -P 0x11 0 64k" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
+
# success, all done
echo "*** done"
rm -f $seq.full
diff --git a/tests/qemu-iotests/076.out b/tests/qemu-iotests/076.out
index f7745d8..32ade08 100644
--- a/tests/qemu-iotests/076.out
+++ b/tests/qemu-iotests/076.out
@@ -1,18 +1,22 @@
QA output created by 076
-== Read from a valid (enough) image ==
+== Read from a valid v1 image ==
read 65536/65536 bytes at offset 0
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== Negative catalog size ==
-qemu-io: can't open device TEST_DIR/fake.parallels: Catalog too large
+qemu-io: can't open device TEST_DIR/parallels-v1: Catalog too large
no file open, try 'help open'
== Overflow in catalog allocation ==
-qemu-io: can't open device TEST_DIR/fake.parallels: Catalog too large
+qemu-io: can't open device TEST_DIR/parallels-v1: Catalog too large
no file open, try 'help open'
== Zero sectors per track ==
-qemu-io: can't open device TEST_DIR/fake.parallels: Invalid image: Zero sectors per track
+qemu-io: can't open device TEST_DIR/parallels-v1: Invalid image: Zero sectors per track
no file open, try 'help open'
+
+== Read from a valid v2 image ==
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
*** done
diff --git a/tests/qemu-iotests/097 b/tests/qemu-iotests/097
new file mode 100755
index 0000000..c7a613b
--- /dev/null
+++ b/tests/qemu-iotests/097
@@ -0,0 +1,122 @@
+#!/bin/bash
+#
+# Commit changes into backing chains and empty the top image if the
+# backing image is not explicitly specified
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=mreitz@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+ _rm_test_img "$TEST_IMG.itmd"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.pattern
+
+# Any format supporting backing files and bdrv_make_empty
+_supported_fmt qcow qcow2
+_supported_proto file
+_supported_os Linux
+
+
+# Four passes:
+# 0: Two-layer backing chain, commit to upper backing file (implicitly)
+# (in this case, the top image will be emptied)
+# 1: Two-layer backing chain, commit to upper backing file (explicitly)
+# (in this case, the top image will implicitly stay unchanged)
+# 2: Two-layer backing chain, commit to upper backing file (implicitly with -d)
+# (in this case, the top image will explicitly stay unchanged)
+# 3: Two-layer backing chain, commit to lower backing file
+# (in this case, the top image will implicitly stay unchanged)
+#
+# 020 already tests committing, so this only tests whether image chains are
+# working properly and that all images above the base are emptied; therefore,
+# no complicated patterns are necessary
+for i in 0 1 2 3; do
+
+echo
+echo "=== Test pass $i ==="
+echo
+
+TEST_IMG="$TEST_IMG.base" _make_test_img 64M
+TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" 64M
+_make_test_img -b "$TEST_IMG.itmd" 64M
+
+$QEMU_IO -c 'write -P 1 0 192k' "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c 'write -P 2 64k 128k' "$TEST_IMG.itmd" | _filter_qemu_io
+$QEMU_IO -c 'write -P 3 128k 64k' "$TEST_IMG" | _filter_qemu_io
+
+if [ $i -lt 3 ]; then
+ if [ $i == 0 ]; then
+ # -b "$TEST_IMG.itmd" should be the default (that is, committing to the
+ # first backing file in the chain)
+ $QEMU_IMG commit "$TEST_IMG"
+ elif [ $i == 1 ]; then
+ # explicitly specify the commit target (this should imply -d)
+ $QEMU_IMG commit -b "$TEST_IMG.itmd" "$TEST_IMG"
+ else
+ # do not explicitly specify the commit target, but use -d to leave the
+ # top image unchanged
+ $QEMU_IMG commit -d "$TEST_IMG"
+ fi
+
+ # Bottom should be unchanged
+ $QEMU_IO -c 'read -P 1 0 192k' "$TEST_IMG.base" | _filter_qemu_io
+
+ # Intermediate should contain changes from top
+ $QEMU_IO -c 'read -P 1 0 64k' "$TEST_IMG.itmd" | _filter_qemu_io
+ $QEMU_IO -c 'read -P 2 64k 64k' "$TEST_IMG.itmd" | _filter_qemu_io
+ $QEMU_IO -c 'read -P 3 128k 64k' "$TEST_IMG.itmd" | _filter_qemu_io
+
+ # And in pass 0, the top image should be empty, whereas in both other passes
+ # it should be unchanged (which is both checked by qemu-img map)
+else
+ $QEMU_IMG commit -b "$TEST_IMG.base" "$TEST_IMG"
+
+ # Bottom should contain all changes
+ $QEMU_IO -c 'read -P 1 0 64k' "$TEST_IMG.base" | _filter_qemu_io
+ $QEMU_IO -c 'read -P 2 64k 64k' "$TEST_IMG.base" | _filter_qemu_io
+ $QEMU_IO -c 'read -P 3 128k 64k' "$TEST_IMG.base" | _filter_qemu_io
+
+ # Both top and intermediate should be unchanged
+fi
+
+$QEMU_IMG map "$TEST_IMG.base" | _filter_qemu_img_map
+$QEMU_IMG map "$TEST_IMG.itmd" | _filter_qemu_img_map
+$QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map
+
+done
+
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/097.out b/tests/qemu-iotests/097.out
new file mode 100644
index 0000000..1cb7764
--- /dev/null
+++ b/tests/qemu-iotests/097.out
@@ -0,0 +1,119 @@
+QA output created by 097
+
+=== Test pass 0 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.itmd'
+wrote 196608/196608 bytes at offset 0
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 65536
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 196608/196608 bytes at offset 0
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset Length File
+0 0x30000 TEST_DIR/t.IMGFMT.base
+Offset Length File
+0 0x10000 TEST_DIR/t.IMGFMT.base
+0x10000 0x20000 TEST_DIR/t.IMGFMT.itmd
+Offset Length File
+0 0x10000 TEST_DIR/t.IMGFMT.base
+0x10000 0x20000 TEST_DIR/t.IMGFMT.itmd
+
+=== Test pass 1 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.itmd'
+wrote 196608/196608 bytes at offset 0
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 65536
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 196608/196608 bytes at offset 0
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset Length File
+0 0x30000 TEST_DIR/t.IMGFMT.base
+Offset Length File
+0 0x10000 TEST_DIR/t.IMGFMT.base
+0x10000 0x20000 TEST_DIR/t.IMGFMT.itmd
+Offset Length File
+0 0x10000 TEST_DIR/t.IMGFMT.base
+0x10000 0x10000 TEST_DIR/t.IMGFMT.itmd
+0x20000 0x10000 TEST_DIR/t.IMGFMT
+
+=== Test pass 2 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.itmd'
+wrote 196608/196608 bytes at offset 0
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 65536
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 196608/196608 bytes at offset 0
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset Length File
+0 0x30000 TEST_DIR/t.IMGFMT.base
+Offset Length File
+0 0x10000 TEST_DIR/t.IMGFMT.base
+0x10000 0x20000 TEST_DIR/t.IMGFMT.itmd
+Offset Length File
+0 0x10000 TEST_DIR/t.IMGFMT.base
+0x10000 0x10000 TEST_DIR/t.IMGFMT.itmd
+0x20000 0x10000 TEST_DIR/t.IMGFMT
+
+=== Test pass 3 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.itmd'
+wrote 196608/196608 bytes at offset 0
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 65536
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset Length File
+0 0x30000 TEST_DIR/t.IMGFMT.base
+Offset Length File
+0 0x10000 TEST_DIR/t.IMGFMT.base
+0x10000 0x20000 TEST_DIR/t.IMGFMT.itmd
+Offset Length File
+0 0x10000 TEST_DIR/t.IMGFMT.base
+0x10000 0x10000 TEST_DIR/t.IMGFMT.itmd
+0x20000 0x10000 TEST_DIR/t.IMGFMT
+*** done
diff --git a/tests/qemu-iotests/098 b/tests/qemu-iotests/098
new file mode 100755
index 0000000..e2230ad
--- /dev/null
+++ b/tests/qemu-iotests/098
@@ -0,0 +1,82 @@
+#!/bin/bash
+#
+# Test qcow2's bdrv_make_empty for images without internal snapshots
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=mreitz@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+ rm -f "$TEST_DIR/blkdebug.conf"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.pattern
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+IMGOPTS="compat=1.1"
+
+for event in l1_update empty_image_prepare reftable_update refblock_alloc; do
+
+echo
+echo "=== $event ==="
+echo
+
+TEST_IMG="$TEST_IMG.base" _make_test_img 64M
+_make_test_img -b "$TEST_IMG.base" 64M
+
+# Some data that can be leaked when emptying the top image
+$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+cat > "$TEST_DIR/blkdebug.conf" <<EOF
+[inject-error]
+event = "$event"
+EOF
+
+$QEMU_IMG commit "blkdebug:$TEST_DIR/blkdebug.conf:$TEST_IMG" 2>&1 \
+ | _filter_testdir | _filter_imgfmt
+
+# There may be errors, but they should be fixed by opening the image
+$QEMU_IO -c close "$TEST_IMG"
+
+_check_test_img
+
+done
+
+
+rm -f "$TEST_DIR/blkdebug.conf"
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/098.out b/tests/qemu-iotests/098.out
new file mode 100644
index 0000000..586dfc8
--- /dev/null
+++ b/tests/qemu-iotests/098.out
@@ -0,0 +1,52 @@
+QA output created by 098
+
+=== l1_update ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-img: Could not empty blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error
+No errors were found on the image.
+
+=== empty_image_prepare ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-img: Could not empty blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error
+Leaked cluster 4 refcount=1 reference=0
+Leaked cluster 5 refcount=1 reference=0
+Repairing cluster 4 refcount=1 reference=0
+Repairing cluster 5 refcount=1 reference=0
+No errors were found on the image.
+
+=== reftable_update ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-img: Could not empty blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error
+ERROR cluster 0 refcount=0 reference=1
+ERROR cluster 1 refcount=0 reference=1
+ERROR cluster 3 refcount=0 reference=1
+Rebuilding refcount structure
+Repairing cluster 1 refcount=1 reference=0
+No errors were found on the image.
+
+=== refblock_alloc ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-img: Could not empty blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error
+ERROR cluster 0 refcount=0 reference=1
+ERROR cluster 1 refcount=0 reference=1
+ERROR cluster 3 refcount=0 reference=1
+Rebuilding refcount structure
+Repairing cluster 1 refcount=1 reference=0
+No errors were found on the image.
+*** done
diff --git a/tests/qemu-iotests/102 b/tests/qemu-iotests/102
index 34b363f..161b197 100755
--- a/tests/qemu-iotests/102
+++ b/tests/qemu-iotests/102
@@ -34,9 +34,10 @@
}
trap "_cleanup; exit \$status" 0 1 2 3 15
-# get standard environment, filters and checks
+# get standard environment, filters and qemu instance handling
. ./common.rc
. ./common.filter
+. ./common.qemu
_supported_fmt qcow2
_supported_proto file
@@ -53,11 +54,27 @@
$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io
# Remove data cluster from image (first cluster: image header, second: reftable,
# third: refblock, fourth: L1 table, fifth: L2 table)
-truncate -s $((5 * 64 * 1024)) "$TEST_IMG"
+$QEMU_IMG resize -f raw "$TEST_IMG" $((5 * 64 * 1024))
$QEMU_IO -c map "$TEST_IMG"
$QEMU_IMG map "$TEST_IMG"
+echo
+echo '=== Testing map on an image file truncated outside of qemu ==='
+echo
+
+# Same as above, only now we concurrently truncate and map the image
+_make_test_img $IMG_SIZE
+$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+qemu_comm_method=monitor _launch_qemu -drive if=none,file="$TEST_IMG",id=drv0
+
+$QEMU_IMG resize -f raw "$TEST_IMG" $((5 * 64 * 1024))
+
+_send_qemu_cmd $QEMU_HANDLE 'qemu-io drv0 map' 'allocated' \
+ | sed -e 's/^(qemu).*qemu-io drv0 map...$/(qemu) qemu-io drv0 map/'
+_send_qemu_cmd $QEMU_HANDLE 'quit' ''
+
# success, all done
echo '*** done'
rm -f $seq.full
diff --git a/tests/qemu-iotests/102.out b/tests/qemu-iotests/102.out
index e0e9cdc..eecde16 100644
--- a/tests/qemu-iotests/102.out
+++ b/tests/qemu-iotests/102.out
@@ -5,6 +5,17 @@
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536
wrote 65536/65536 bytes at offset 0
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image resized.
[ 0] 128/ 128 sectors allocated at offset 0 bytes (1)
Offset Length Mapped to File
+
+=== Testing map on an image file truncated outside of qemu ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image resized.
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) qemu-io drv0 map
+[ 0] 128/ 128 sectors allocated at offset 0 bytes (1)
*** done
diff --git a/tests/qemu-iotests/107 b/tests/qemu-iotests/107
index cad1cf9..9862030 100755
--- a/tests/qemu-iotests/107
+++ b/tests/qemu-iotests/107
@@ -39,7 +39,7 @@
. ./common.filter
_supported_fmt qcow2
-_supported_proto file
+_supported_proto file nfs
_supported_os Linux
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index f69cb6b..3acdb30 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -213,5 +213,12 @@
-e "s/archipelago:a/TEST_DIR\//g"
}
+# filter out offsets and file names from qemu-img map
+_filter_qemu_img_map()
+{
+ sed -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \
+ -e 's/Mapped to *//' | _filter_testdir | _filter_imgfmt
+}
+
# make sure this script returns success
/bin/true
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 9bbd5d3..7b2c666 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -67,7 +67,7 @@
058 rw auto quick
059 rw auto quick
060 rw auto quick
-061 rw auto quick
+061 rw auto
062 rw auto quick
063 rw auto quick
064 rw auto quick
@@ -100,6 +100,8 @@
091 rw auto quick
092 rw auto quick
095 rw auto quick
+097 rw auto backing
+098 rw auto backing quick
099 rw auto quick
100 rw auto quick
101 rw auto quick
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 39a4cfc..f57f154 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -267,8 +267,7 @@
self.assert_qmp(event, 'data/device', drive)
self.assert_qmp_absent(event, 'data/error')
if check_offset:
- self.assert_qmp(event, 'data/offset', self.image_len)
- self.assert_qmp(event, 'data/len', self.image_len)
+ self.assert_qmp(event, 'data/offset', event['data']['len'])
completed = True
self.assert_no_active_block_jobs()
diff --git a/tests/qemu-iotests/sample_images/fake.parallels.bz2 b/tests/qemu-iotests/sample_images/fake.parallels.bz2
deleted file mode 100644
index ffb5f13..0000000
--- a/tests/qemu-iotests/sample_images/fake.parallels.bz2
+++ /dev/null
Binary files differ
diff --git a/tests/qemu-iotests/sample_images/parallels-v1.bz2 b/tests/qemu-iotests/sample_images/parallels-v1.bz2
new file mode 100644
index 0000000..d1ef142
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/parallels-v1.bz2
Binary files differ
diff --git a/tests/qemu-iotests/sample_images/parallels-v2.bz2 b/tests/qemu-iotests/sample_images/parallels-v2.bz2
new file mode 100644
index 0000000..fd8614d
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/parallels-v2.bz2
Binary files differ